diff --git a/src/ggml_extend.hpp b/src/ggml_extend.hpp index 69059c06..2f82af0d 100644 --- a/src/ggml_extend.hpp +++ b/src/ggml_extend.hpp @@ -3380,10 +3380,10 @@ public: forward_params.linear.scale = scale; out = ctx->weight_adapter->forward_with_lora(ctx->ggml_ctx, ctx->backend, x, w, linear_bias, prefix, forward_params); } else { - out = ggml_ext_linear(ctx->ggml_ctx, x, w, linear_bias, force_prec_f32, 1 / 128.f); + out = ggml_ext_linear(ctx->ggml_ctx, x, w, linear_bias, force_prec_f32, scale); } if (has_weight_scale) { - out = ggml_mul(ctx->ggml_ctx, out, params["weight_scale"]); + out = ggml_mul(ctx->ggml_ctx, out, params["weight_scale"]); if (b != nullptr) { out = ggml_add_inplace(ctx->ggml_ctx, out, b); } diff --git a/src/ideogram4.hpp b/src/ideogram4.hpp index 4f2a32e9..58cd7638 100644 --- a/src/ideogram4.hpp +++ b/src/ideogram4.hpp @@ -181,7 +181,7 @@ namespace Ideogram4 { q = norm_q->forward(ctx, q); k = norm_k->forward(ctx, k); - x = Rope::attention(ctx, q, k, v, pe, mask, 1.f / std::sqrt(static_cast(head_dim)), false); + x = Rope::attention(ctx, q, k, v, pe, mask, 1.f / 128.f, false); x = out_proj->forward(ctx, x); return x; }