mirror of
https://github.com/leejet/stable-diffusion.cpp.git
synced 2025-12-13 05:48:56 +00:00
fix: resolve precision issues in SDXL VAE under fp16
This commit is contained in:
parent
2e9242e37f
commit
1d13041aa2
@ -17,7 +17,6 @@ API and command-line option may change frequently.***
|
|||||||
- Image Models
|
- Image Models
|
||||||
- SD1.x, SD2.x, [SD-Turbo](https://huggingface.co/stabilityai/sd-turbo)
|
- SD1.x, SD2.x, [SD-Turbo](https://huggingface.co/stabilityai/sd-turbo)
|
||||||
- SDXL, [SDXL-Turbo](https://huggingface.co/stabilityai/sdxl-turbo)
|
- SDXL, [SDXL-Turbo](https://huggingface.co/stabilityai/sdxl-turbo)
|
||||||
- !!!The VAE in SDXL encounters NaN issues under FP16, but unfortunately, the ggml_conv_2d only operates under FP16. Hence, a parameter is needed to specify the VAE that has fixed the FP16 NaN issue. You can find it here: [SDXL VAE FP16 Fix](https://huggingface.co/madebyollin/sdxl-vae-fp16-fix/blob/main/sdxl_vae.safetensors).
|
|
||||||
- [SD3/SD3.5](./docs/sd3.md)
|
- [SD3/SD3.5](./docs/sd3.md)
|
||||||
- [Flux-dev/Flux-schnell](./docs/flux.md)
|
- [Flux-dev/Flux-schnell](./docs/flux.md)
|
||||||
- [Chroma](./docs/chroma.md)
|
- [Chroma](./docs/chroma.md)
|
||||||
|
|||||||
@ -1457,7 +1457,7 @@ struct Qwen2_5_VLCLIPEmbedder : public Conditioner {
|
|||||||
const ConditionerParams& conditioner_params) {
|
const ConditionerParams& conditioner_params) {
|
||||||
std::string prompt;
|
std::string prompt;
|
||||||
std::vector<std::pair<int, ggml_tensor*>> image_embeds;
|
std::vector<std::pair<int, ggml_tensor*>> image_embeds;
|
||||||
size_t system_prompt_length = 0;
|
size_t system_prompt_length = 0;
|
||||||
int prompt_template_encode_start_idx = 34;
|
int prompt_template_encode_start_idx = 34;
|
||||||
if (qwenvl->enable_vision && conditioner_params.ref_images.size() > 0) {
|
if (qwenvl->enable_vision && conditioner_params.ref_images.size() > 0) {
|
||||||
LOG_INFO("QwenImageEditPlusPipeline");
|
LOG_INFO("QwenImageEditPlusPipeline");
|
||||||
|
|||||||
@ -975,38 +975,28 @@ __STATIC_INLINE__ struct ggml_tensor* ggml_nn_conv_2d(struct ggml_context* ctx,
|
|||||||
struct ggml_tensor* x,
|
struct ggml_tensor* x,
|
||||||
struct ggml_tensor* w,
|
struct ggml_tensor* w,
|
||||||
struct ggml_tensor* b,
|
struct ggml_tensor* b,
|
||||||
int s0 = 1,
|
int s0 = 1,
|
||||||
int s1 = 1,
|
int s1 = 1,
|
||||||
int p0 = 0,
|
int p0 = 0,
|
||||||
int p1 = 0,
|
int p1 = 0,
|
||||||
int d0 = 1,
|
int d0 = 1,
|
||||||
int d1 = 1) {
|
int d1 = 1,
|
||||||
x = ggml_conv_2d(ctx, w, x, s0, s1, p0, p1, d0, d1);
|
bool direct = false,
|
||||||
if (b != NULL) {
|
float scale = 1.f) {
|
||||||
b = ggml_reshape_4d(ctx, b, 1, 1, b->ne[0], 1);
|
if (scale != 1.f) {
|
||||||
// b = ggml_repeat(ctx, b, x);
|
x = ggml_scale(ctx, x, scale);
|
||||||
x = ggml_add_inplace(ctx, x, b);
|
}
|
||||||
|
if (direct) {
|
||||||
|
x = ggml_conv_2d_direct(ctx, w, x, s0, s1, p0, p1, d0, d1);
|
||||||
|
} else {
|
||||||
|
x = ggml_conv_2d(ctx, w, x, s0, s1, p0, p1, d0, d1);
|
||||||
|
}
|
||||||
|
if (scale != 1.f) {
|
||||||
|
x = ggml_scale(ctx, x, 1.f / scale);
|
||||||
}
|
}
|
||||||
return x;
|
|
||||||
}
|
|
||||||
|
|
||||||
// w: [OC*IC, KD, KH, KW]
|
|
||||||
// x: [N*IC, ID, IH, IW]
|
|
||||||
__STATIC_INLINE__ struct ggml_tensor* ggml_nn_conv_2d_direct(struct ggml_context* ctx,
|
|
||||||
struct ggml_tensor* x,
|
|
||||||
struct ggml_tensor* w,
|
|
||||||
struct ggml_tensor* b,
|
|
||||||
int s0 = 1,
|
|
||||||
int s1 = 1,
|
|
||||||
int p0 = 0,
|
|
||||||
int p1 = 0,
|
|
||||||
int d0 = 1,
|
|
||||||
int d1 = 1) {
|
|
||||||
x = ggml_conv_2d_direct(ctx, w, x, s0, s1, p0, p1, d0, d1);
|
|
||||||
if (b != NULL) {
|
if (b != NULL) {
|
||||||
b = ggml_reshape_4d(ctx, b, 1, 1, b->ne[0], 1);
|
b = ggml_reshape_4d(ctx, b, 1, 1, b->ne[0], 1);
|
||||||
// b = ggml_repeat(ctx, b, x);
|
x = ggml_add_inplace(ctx, x, b);
|
||||||
x = ggml_add(ctx, x, b);
|
|
||||||
}
|
}
|
||||||
return x;
|
return x;
|
||||||
}
|
}
|
||||||
@ -2067,6 +2057,7 @@ protected:
|
|||||||
std::pair<int, int> dilation;
|
std::pair<int, int> dilation;
|
||||||
bool bias;
|
bool bias;
|
||||||
bool direct = false;
|
bool direct = false;
|
||||||
|
float scale = 1.f;
|
||||||
|
|
||||||
void init_params(struct ggml_context* ctx, const String2GGMLType& tensor_types, const std::string prefix = "") {
|
void init_params(struct ggml_context* ctx, const String2GGMLType& tensor_types, const std::string prefix = "") {
|
||||||
enum ggml_type wtype = GGML_TYPE_F16;
|
enum ggml_type wtype = GGML_TYPE_F16;
|
||||||
@ -2097,6 +2088,10 @@ public:
|
|||||||
direct = true;
|
direct = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void set_scale(float scale_value) {
|
||||||
|
scale = scale_value;
|
||||||
|
}
|
||||||
|
|
||||||
std::string get_desc() {
|
std::string get_desc() {
|
||||||
return "Conv2d";
|
return "Conv2d";
|
||||||
}
|
}
|
||||||
@ -2107,11 +2102,18 @@ public:
|
|||||||
if (bias) {
|
if (bias) {
|
||||||
b = params["bias"];
|
b = params["bias"];
|
||||||
}
|
}
|
||||||
if (direct) {
|
return ggml_nn_conv_2d(ctx,
|
||||||
return ggml_nn_conv_2d_direct(ctx, x, w, b, stride.second, stride.first, padding.second, padding.first, dilation.second, dilation.first);
|
x,
|
||||||
} else {
|
w,
|
||||||
return ggml_nn_conv_2d(ctx, x, w, b, stride.second, stride.first, padding.second, padding.first, dilation.second, dilation.first);
|
b,
|
||||||
}
|
stride.second,
|
||||||
|
stride.first,
|
||||||
|
padding.second,
|
||||||
|
padding.first,
|
||||||
|
dilation.second,
|
||||||
|
dilation.first,
|
||||||
|
direct,
|
||||||
|
scale);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|||||||
@ -535,7 +535,7 @@ namespace Qwen {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
LOG_ERROR("qwen_image_params.num_layers: %ld", qwen_image_params.num_layers);
|
LOG_ERROR("qwen_image_params.num_layers: %ld", qwen_image_params.num_layers);
|
||||||
qwen_image = QwenImageModel(qwen_image_params);
|
qwen_image = QwenImageModel(qwen_image_params);
|
||||||
qwen_image.init(params_ctx, tensor_types, prefix);
|
qwen_image.init(params_ctx, tensor_types, prefix);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -330,13 +330,6 @@ public:
|
|||||||
|
|
||||||
if (sd_version_is_sdxl(version)) {
|
if (sd_version_is_sdxl(version)) {
|
||||||
scale_factor = 0.13025f;
|
scale_factor = 0.13025f;
|
||||||
if (strlen(SAFE_STR(sd_ctx_params->vae_path)) == 0 && strlen(SAFE_STR(sd_ctx_params->taesd_path)) == 0) {
|
|
||||||
LOG_WARN(
|
|
||||||
"!!!It looks like you are using SDXL model. "
|
|
||||||
"If you find that the generated images are completely black, "
|
|
||||||
"try specifying SDXL VAE FP16 Fix with the --vae parameter. "
|
|
||||||
"You can find it here: https://huggingface.co/madebyollin/sdxl-vae-fp16-fix/blob/main/sdxl_vae.safetensors");
|
|
||||||
}
|
|
||||||
} else if (sd_version_is_sd3(version)) {
|
} else if (sd_version_is_sd3(version)) {
|
||||||
scale_factor = 1.5305f;
|
scale_factor = 1.5305f;
|
||||||
} else if (sd_version_is_flux(version)) {
|
} else if (sd_version_is_flux(version)) {
|
||||||
@ -517,6 +510,11 @@ public:
|
|||||||
LOG_INFO("Using Conv2d direct in the vae model");
|
LOG_INFO("Using Conv2d direct in the vae model");
|
||||||
first_stage_model->enable_conv2d_direct();
|
first_stage_model->enable_conv2d_direct();
|
||||||
}
|
}
|
||||||
|
if (version == VERSION_SDXL && strlen(SAFE_STR(sd_ctx_params->vae_path)) == 0) {
|
||||||
|
float vae_conv_2d_scale = 1.f / 32.f;
|
||||||
|
LOG_WARN("No VAE specified with --vae, using Conv2D scale %.3f", vae_conv_2d_scale);
|
||||||
|
first_stage_model->set_conv2d_scale(vae_conv_2d_scale);
|
||||||
|
}
|
||||||
first_stage_model->alloc_params_buffer();
|
first_stage_model->alloc_params_buffer();
|
||||||
first_stage_model->get_param_tensors(tensors, "first_stage_model");
|
first_stage_model->get_param_tensors(tensors, "first_stage_model");
|
||||||
} else {
|
} else {
|
||||||
|
|||||||
12
vae.hpp
12
vae.hpp
@ -530,6 +530,7 @@ struct VAE : public GGMLRunner {
|
|||||||
struct ggml_context* output_ctx) = 0;
|
struct ggml_context* output_ctx) = 0;
|
||||||
virtual void get_param_tensors(std::map<std::string, struct ggml_tensor*>& tensors, const std::string prefix) = 0;
|
virtual void get_param_tensors(std::map<std::string, struct ggml_tensor*>& tensors, const std::string prefix) = 0;
|
||||||
virtual void enable_conv2d_direct(){};
|
virtual void enable_conv2d_direct(){};
|
||||||
|
virtual void set_conv2d_scale(float scale) { SD_UNUSED(scale); };
|
||||||
};
|
};
|
||||||
|
|
||||||
struct AutoEncoderKL : public VAE {
|
struct AutoEncoderKL : public VAE {
|
||||||
@ -558,6 +559,17 @@ struct AutoEncoderKL : public VAE {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void set_conv2d_scale(float scale) {
|
||||||
|
std::vector<GGMLBlock*> blocks;
|
||||||
|
ae.get_all_blocks(blocks);
|
||||||
|
for (auto block : blocks) {
|
||||||
|
if (block->get_desc() == "Conv2d") {
|
||||||
|
auto conv_block = (Conv2d*)block;
|
||||||
|
conv_block->set_scale(scale);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
std::string get_desc() {
|
std::string get_desc() {
|
||||||
return "vae";
|
return "vae";
|
||||||
}
|
}
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user