From 4edc3ad2ad89feb8b0cac427b2f945535fb01fcb Mon Sep 17 00:00:00 2001
From: leejet <leejet714@gmail.com>
Date: Mon, 13 Oct 2025 23:02:24 +0800
Subject: [PATCH] to_out.0 precision fix

---
 qwen_image.hpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/qwen_image.hpp b/qwen_image.hpp
index 3ac32de..630e553 100644
--- a/qwen_image.hpp
+++ b/qwen_image.hpp
@@ -94,12 +94,12 @@ namespace Qwen {
             blocks["norm_added_q"] = std::shared_ptr<GGMLBlock>(new RMSNorm(dim_head, eps));
             blocks["norm_added_k"] = std::shared_ptr<GGMLBlock>(new RMSNorm(dim_head, eps));
 
-            blocks["to_out.0"] = std::shared_ptr<GGMLBlock>(new Linear(inner_dim, out_dim, out_bias));
-            // to_out.1 is nn.Dropout
-
             float scale = 1.f / 32.f;
             // The purpose of the scale here is to prevent NaN issues in certain situations.
             // For example when using CUDA but the weights are k-quants (not all prompts).
+            blocks["to_out.0"] = std::shared_ptr<GGMLBlock>(new Linear(inner_dim, out_dim, out_bias, false, false, scale));
+            // to_out.1 is nn.Dropout
+
             blocks["to_add_out"] = std::shared_ptr<GGMLBlock>(new Linear(inner_dim, out_context_dim, out_bias, false, false, scale));
         }
 
@@ -159,7 +159,7 @@ namespace Qwen {
             auto k = ggml_concat(ctx, txt_k, img_k, 2);  // [N, n_txt_token + n_img_token, n_head, d_head]
             auto v = ggml_concat(ctx, txt_v, img_v, 2);  // [N, n_txt_token + n_img_token, n_head, d_head]
 
-            auto attn         = Rope::attention(ctx, backend, q, k, v, pe, mask, flash_attn, (1.0f / 256.f));  // [N, n_txt_token + n_img_token, n_head*d_head]
+            auto attn         = Rope::attention(ctx, backend, q, k, v, pe, mask, flash_attn, (1.0f / 128.f));  // [N, n_txt_token + n_img_token, n_head*d_head]
             attn              = ggml_cont(ctx, ggml_permute(ctx, attn, 0, 2, 1, 3));                           // [n_txt_token + n_img_token, N, hidden_size]
             auto txt_attn_out = ggml_view_3d(ctx,
                                              attn,