From 27272efbd26aca9d469de94c6b68f1c24992f7cc Mon Sep 17 00:00:00 2001 From: leejet Date: Thu, 23 Oct 2025 22:44:09 +0800 Subject: [PATCH] workaround: avoid ggml cuda error --- ggml_extend.hpp | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/ggml_extend.hpp b/ggml_extend.hpp index 02d82bc..c0e81c3 100644 --- a/ggml_extend.hpp +++ b/ggml_extend.hpp @@ -954,7 +954,16 @@ __STATIC_INLINE__ struct ggml_tensor* ggml_nn_linear(struct ggml_context* ctx, if (scale != 1.f) { x = ggml_scale(ctx, x, scale); } - x = ggml_mul_mat(ctx, w, x); + if (x->ne[2] * x->ne[3] > 1024) { + // workaround: avoid ggml cuda error + int64_t ne2 = x->ne[2]; + int64_t ne3 = x->ne[3]; + x = ggml_reshape_2d(ctx, x, x->ne[0], x->ne[1]*x->ne[2]*x->ne[3]); + x = ggml_mul_mat(ctx, w, x); + x = ggml_reshape_4d(ctx, x, x->ne[0], x->ne[1]/ne2/ne3, ne2, ne3); + } else { + x = ggml_mul_mat(ctx, w, x); + } if (force_prec_f32) { ggml_mul_mat_set_prec(x, GGML_PREC_F32); }