workaround: avoid ggml cuda error

2026-06-25 15:46:40 +00:00 · 2025-10-23 22:44:09 +08:00 · 2025-10-23 22:44:09 +08:00 · 27272efbd2
commit 27272efbd2
parent 18a2804e16
1 changed files with 10 additions and 1 deletions
--- a/ggml_extend.hpp
+++ b/ggml_extend.hpp
@ -954,7 +954,16 @@ __STATIC_INLINE__ struct ggml_tensor* ggml_nn_linear(struct ggml_context* ctx,
    if (scale != 1.f) {
        x = ggml_scale(ctx, x, scale);
    }
-    x = ggml_mul_mat(ctx, w, x);
+    if (x->ne[2] * x->ne[3] > 1024) {
+        // workaround: avoid ggml cuda error
+        int64_t ne2 = x->ne[2];
+        int64_t ne3 = x->ne[3];
+        x = ggml_reshape_2d(ctx, x, x->ne[0], x->ne[1]*x->ne[2]*x->ne[3]);
+        x = ggml_mul_mat(ctx, w, x);
+        x = ggml_reshape_4d(ctx, x, x->ne[0], x->ne[1]/ne2/ne3, ne2, ne3);
+    } else {
+        x = ggml_mul_mat(ctx, w, x);
+    }
    if (force_prec_f32) {
        ggml_mul_mat_set_prec(x, GGML_PREC_F32);
    }