From 1798ec02ba21540172d552b098586f79f109262a Mon Sep 17 00:00:00 2001 From: leejet Date: Sun, 30 Nov 2025 22:54:13 +0800 Subject: [PATCH] fix nan issue that occurs when using CUDA with k-quants weights --- z_image.hpp | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/z_image.hpp b/z_image.hpp index 55c6125..b692a14 100644 --- a/z_image.hpp +++ b/z_image.hpp @@ -85,7 +85,15 @@ namespace ZImage { } hidden_dim = multiple_of * ((hidden_dim + multiple_of - 1) / multiple_of); blocks["w1"] = std::make_shared(dim, hidden_dim, false); - blocks["w2"] = std::make_shared(hidden_dim, dim, false); + + bool force_prec_f32 = false; + float scale = 1.f / 128.f; +#ifdef SD_USE_VULKAN + force_prec_f32 = true; +#endif + // The purpose of the scale here is to prevent NaN issues in certain situations. + // For example, when using CUDA but the weights are k-quants. + blocks["w2"] = std::make_shared(hidden_dim, dim, false, false, force_prec_f32, 1.f / 128.f); blocks["w3"] = std::make_shared(dim, hidden_dim, false); }