From 1c32fa03bc81cbc242d64923746d27f465897549 Mon Sep 17 00:00:00 2001
From: leejet <leejet714@gmail.com>
Date: Mon, 13 Oct 2025 00:01:06 +0800
Subject: [PATCH] fix: avoid generating black images when running T5 on the GPU
 (#882)

---
 stable-diffusion.cpp | 12 +-----------
 t5.hpp               |  4 +++-
 2 files changed, 4 insertions(+), 12 deletions(-)
diff --git a/stable-diffusion.cpp b/stable-diffusion.cpp
index 51f8cbe..62b40c6 100644
--- a/stable-diffusion.cpp
+++ b/stable-diffusion.cpp
@@ -338,17 +338,7 @@ public:
         bool clip_on_cpu = sd_ctx_params->keep_clip_on_cpu;
 
         {
-            clip_backend   = backend;
-            bool use_t5xxl = false;
-            if (sd_version_is_dit(version) && !sd_version_is_qwen_image(version)) {
-                use_t5xxl = true;
-            }
-            if (!clip_on_cpu && !ggml_backend_is_cpu(backend) && use_t5xxl) {
-                LOG_WARN(
-                    "!!!It appears that you are using the T5 model. Some backends may encounter issues with it."
-                    "If you notice that the generated images are completely black,"
-                    "try running the T5 model on the CPU using the --clip-on-cpu parameter.");
-            }
+            clip_backend = backend;
             if (clip_on_cpu && !ggml_backend_is_cpu(backend)) {
                 LOG_INFO("CLIP: Using CPU backend");
                 clip_backend = ggml_backend_cpu_init();
diff --git a/t5.hpp b/t5.hpp
index 062e37b..15f7af8 100644
--- a/t5.hpp
+++ b/t5.hpp
@@ -504,7 +504,9 @@ public:
     T5DenseGatedActDense(int64_t model_dim, int64_t ff_dim) {
         blocks["wi_0"] = std::shared_ptr<GGMLBlock>(new Linear(model_dim, ff_dim, false));
         blocks["wi_1"] = std::shared_ptr<GGMLBlock>(new Linear(model_dim, ff_dim, false));
-        blocks["wo"]   = std::shared_ptr<GGMLBlock>(new Linear(ff_dim, model_dim, false));
+        float scale    = 1.f / 32.f;
+        // The purpose of the scale here is to prevent NaN issues on some backends(CUDA, ...).
+        blocks["wo"] = std::shared_ptr<GGMLBlock>(new Linear(ff_dim, model_dim, false, false, false, scale));
     }
 
     struct ggml_tensor* forward(struct ggml_context* ctx, struct ggml_tensor* x) {