From 1d13041aa231bc29faa0577927bb784bffc0c017 Mon Sep 17 00:00:00 2001
From: leejet <leejet714@gmail.com>
Date: Tue, 14 Oct 2025 23:12:39 +0800
Subject: [PATCH] fix: resolve precision issues in SDXL VAE under fp16

---
 README.md            |  1 -
 conditioner.hpp      |  2 +-
 ggml_extend.hpp      | 70 +++++++++++++++++++++++---------------------
 qwen_image.hpp       |  2 +-
 stable-diffusion.cpp | 12 ++++----
 vae.hpp              | 12 ++++++++
 6 files changed, 55 insertions(+), 44 deletions(-)

diff --git a/README.md b/README.md
index 0a27bc1..516b719 100644
--- a/README.md
+++ b/README.md
@@ -17,7 +17,6 @@ API and command-line option may change frequently.***
   - Image Models
     - SD1.x, SD2.x, [SD-Turbo](https://huggingface.co/stabilityai/sd-turbo)
     - SDXL, [SDXL-Turbo](https://huggingface.co/stabilityai/sdxl-turbo)
-      - !!!The VAE in SDXL encounters NaN issues under FP16, but unfortunately, the ggml_conv_2d only operates under FP16. Hence, a parameter is needed to specify the VAE that has fixed the FP16 NaN issue. You can find it here: [SDXL VAE FP16 Fix](https://huggingface.co/madebyollin/sdxl-vae-fp16-fix/blob/main/sdxl_vae.safetensors).
     - [SD3/SD3.5](./docs/sd3.md)
     - [Flux-dev/Flux-schnell](./docs/flux.md)
     - [Chroma](./docs/chroma.md)
diff --git a/conditioner.hpp b/conditioner.hpp
index abd6dbc..4f9efb8 100644
--- a/conditioner.hpp
+++ b/conditioner.hpp
@@ -1457,7 +1457,7 @@ struct Qwen2_5_VLCLIPEmbedder : public Conditioner {
                                       const ConditionerParams& conditioner_params) {
         std::string prompt;
         std::vector<std::pair<int, ggml_tensor*>> image_embeds;
-        size_t system_prompt_length = 0;
+        size_t system_prompt_length          = 0;
         int prompt_template_encode_start_idx = 34;
         if (qwenvl->enable_vision && conditioner_params.ref_images.size() > 0) {
             LOG_INFO("QwenImageEditPlusPipeline");
diff --git a/ggml_extend.hpp b/ggml_extend.hpp
index ca91121..d8df0d8 100644
--- a/ggml_extend.hpp
+++ b/ggml_extend.hpp
@@ -975,38 +975,28 @@ __STATIC_INLINE__ struct ggml_tensor* ggml_nn_conv_2d(struct ggml_context* ctx,
                                                       struct ggml_tensor* x,
                                                       struct ggml_tensor* w,
                                                       struct ggml_tensor* b,
-                                                      int s0 = 1,
-                                                      int s1 = 1,
-                                                      int p0 = 0,
-                                                      int p1 = 0,
-                                                      int d0 = 1,
-                                                      int d1 = 1) {
-    x = ggml_conv_2d(ctx, w, x, s0, s1, p0, p1, d0, d1);
-    if (b != NULL) {
-        b = ggml_reshape_4d(ctx, b, 1, 1, b->ne[0], 1);
-        // b = ggml_repeat(ctx, b, x);
-        x = ggml_add_inplace(ctx, x, b);
+                                                      int s0      = 1,
+                                                      int s1      = 1,
+                                                      int p0      = 0,
+                                                      int p1      = 0,
+                                                      int d0      = 1,
+                                                      int d1      = 1,
+                                                      bool direct = false,
+                                                      float scale = 1.f) {
+    if (scale != 1.f) {
+        x = ggml_scale(ctx, x, scale);
+    }
+    if (direct) {
+        x = ggml_conv_2d_direct(ctx, w, x, s0, s1, p0, p1, d0, d1);
+    } else {
+        x = ggml_conv_2d(ctx, w, x, s0, s1, p0, p1, d0, d1);
+    }
+    if (scale != 1.f) {
+        x = ggml_scale(ctx, x, 1.f / scale);
     }
-    return x;
-}
-
-// w: [OC*IC, KD, KH, KW]
-// x: [N*IC, ID, IH, IW]
-__STATIC_INLINE__ struct ggml_tensor* ggml_nn_conv_2d_direct(struct ggml_context* ctx,
-                                                             struct ggml_tensor* x,
-                                                             struct ggml_tensor* w,
-                                                             struct ggml_tensor* b,
-                                                             int s0 = 1,
-                                                             int s1 = 1,
-                                                             int p0 = 0,
-                                                             int p1 = 0,
-                                                             int d0 = 1,
-                                                             int d1 = 1) {
-    x = ggml_conv_2d_direct(ctx, w, x, s0, s1, p0, p1, d0, d1);
     if (b != NULL) {
         b = ggml_reshape_4d(ctx, b, 1, 1, b->ne[0], 1);
-        // b = ggml_repeat(ctx, b, x);
-        x = ggml_add(ctx, x, b);
+        x = ggml_add_inplace(ctx, x, b);
     }
     return x;
 }
@@ -2067,6 +2057,7 @@ protected:
     std::pair<int, int> dilation;
     bool bias;
     bool direct = false;
+    float scale = 1.f;
 
     void init_params(struct ggml_context* ctx, const String2GGMLType& tensor_types, const std::string prefix = "") {
         enum ggml_type wtype = GGML_TYPE_F16;
@@ -2097,6 +2088,10 @@ public:
         direct = true;
     }
 
+    void set_scale(float scale_value) {
+        scale = scale_value;
+    }
+
     std::string get_desc() {
         return "Conv2d";
     }
@@ -2107,11 +2102,18 @@ public:
         if (bias) {
             b = params["bias"];
         }
-        if (direct) {
-            return ggml_nn_conv_2d_direct(ctx, x, w, b, stride.second, stride.first, padding.second, padding.first, dilation.second, dilation.first);
-        } else {
-            return ggml_nn_conv_2d(ctx, x, w, b, stride.second, stride.first, padding.second, padding.first, dilation.second, dilation.first);
-        }
+        return ggml_nn_conv_2d(ctx,
+                               x,
+                               w,
+                               b,
+                               stride.second,
+                               stride.first,
+                               padding.second,
+                               padding.first,
+                               dilation.second,
+                               dilation.first,
+                               direct,
+                               scale);
     }
 };
 
diff --git a/qwen_image.hpp b/qwen_image.hpp
index 630e553..ce4e62d 100644
--- a/qwen_image.hpp
+++ b/qwen_image.hpp
@@ -535,7 +535,7 @@ namespace Qwen {
                 }
             }
             LOG_ERROR("qwen_image_params.num_layers: %ld", qwen_image_params.num_layers);
-            qwen_image                   = QwenImageModel(qwen_image_params);
+            qwen_image = QwenImageModel(qwen_image_params);
             qwen_image.init(params_ctx, tensor_types, prefix);
         }
 
diff --git a/stable-diffusion.cpp b/stable-diffusion.cpp
index 4291280..3de9314 100644
--- a/stable-diffusion.cpp
+++ b/stable-diffusion.cpp
@@ -330,13 +330,6 @@ public:
 
         if (sd_version_is_sdxl(version)) {
             scale_factor = 0.13025f;
-            if (strlen(SAFE_STR(sd_ctx_params->vae_path)) == 0 && strlen(SAFE_STR(sd_ctx_params->taesd_path)) == 0) {
-                LOG_WARN(
-                    "!!!It looks like you are using SDXL model. "
-                    "If you find that the generated images are completely black, "
-                    "try specifying SDXL VAE FP16 Fix with the --vae parameter. "
-                    "You can find it here: https://huggingface.co/madebyollin/sdxl-vae-fp16-fix/blob/main/sdxl_vae.safetensors");
-            }
         } else if (sd_version_is_sd3(version)) {
             scale_factor = 1.5305f;
         } else if (sd_version_is_flux(version)) {
@@ -517,6 +510,11 @@ public:
                     LOG_INFO("Using Conv2d direct in the vae model");
                     first_stage_model->enable_conv2d_direct();
                 }
+                if (version == VERSION_SDXL && strlen(SAFE_STR(sd_ctx_params->vae_path)) == 0) {
+                    float vae_conv_2d_scale = 1.f / 32.f;
+                    LOG_WARN("No VAE specified with --vae, using Conv2D scale %.3f", vae_conv_2d_scale);
+                    first_stage_model->set_conv2d_scale(vae_conv_2d_scale);
+                }
                 first_stage_model->alloc_params_buffer();
                 first_stage_model->get_param_tensors(tensors, "first_stage_model");
             } else {
diff --git a/vae.hpp b/vae.hpp
index 622b8bb..20d97a2 100644
--- a/vae.hpp
+++ b/vae.hpp
@@ -530,6 +530,7 @@ struct VAE : public GGMLRunner {
                          struct ggml_context* output_ctx)                                                         = 0;
     virtual void get_param_tensors(std::map<std::string, struct ggml_tensor*>& tensors, const std::string prefix) = 0;
     virtual void enable_conv2d_direct(){};
+    virtual void set_conv2d_scale(float scale) { SD_UNUSED(scale); };
 };
 
 struct AutoEncoderKL : public VAE {
@@ -558,6 +559,17 @@ struct AutoEncoderKL : public VAE {
         }
     }
 
+    void set_conv2d_scale(float scale) {
+        std::vector<GGMLBlock*> blocks;
+        ae.get_all_blocks(blocks);
+        for (auto block : blocks) {
+            if (block->get_desc() == "Conv2d") {
+                auto conv_block = (Conv2d*)block;
+                conv_block->set_scale(scale);
+            }
+        }
+    }
+
     std::string get_desc() {
         return "vae";
     }