fix the issue that occurs when using CUDA with k-quants weights

2025-12-13 05:48:56 +00:00 · 2025-10-12 15:41:40 +08:00 · 2025-10-12 15:41:40 +08:00 · 98d6e71492
commit 98d6e71492
parent 6ea2a75929
2 changed files with 13 additions and 3 deletions
--- a/common.hpp
+++ b/common.hpp
@ -245,7 +245,7 @@ public:
                Activation activation = Activation::GEGLU,
                bool force_prec_f32   = false) {
        int64_t inner_dim = dim * mult;
-
+        SD_UNUSED(force_prec_f32);
        if (activation == Activation::GELU) {
            blocks["net.0"] = std::shared_ptr<GGMLBlock>(new GELU(dim, inner_dim));
        } else {
@ -253,7 +253,7 @@ public:
        }

        // net_1 is nn.Dropout(), skip for inference
-        blocks["net.2"] = std::shared_ptr<GGMLBlock>(new Linear(inner_dim, dim_out, true, false, force_prec_f32));
+        blocks["net.2"] = std::shared_ptr<GGMLBlock>(new Linear(inner_dim, dim_out));
    }

    struct ggml_tensor* forward(struct ggml_context* ctx, struct ggml_tensor* x) {
@ -264,7 +264,13 @@ public:
        auto net_2 = std::dynamic_pointer_cast<Linear>(blocks["net.2"]);

        x = net_0->forward(ctx, x);  // [ne3, ne2, ne1, inner_dim]
-        x = net_2->forward(ctx, x);  // [ne3, ne2, ne1, dim_out]
+        // The purpose of the scale here is to prevent NaN issues in certain situations.
+        // For example, when using Vulkan without enabling force_prec_f32,
+        // or when using CUDA but the weights are k-quants.
+        float scale = 1.f / 128.f;
+        x           = ggml_scale(ctx, x, scale);
+        x           = net_2->forward(ctx, x);  // [ne3, ne2, ne1, dim_out]
+        x           = ggml_scale(ctx, x, 1.f / scale);
        return x;
    }
 };
--- a/ggml_extend.hpp
+++ b/ggml_extend.hpp
@ -56,6 +56,10 @@
 #define __STATIC_INLINE__ static inline
 #endif

+#ifndef SD_UNUSED
+#define SD_UNUSED(x) (void)(x)
+#endif
+
 __STATIC_INLINE__ void ggml_log_callback_default(ggml_log_level level, const char* text, void*) {
    switch (level) {
        case GGML_LOG_LEVEL_DEBUG: