diff --git a/src/ggml_extend.hpp b/src/ggml_extend.hpp
index 69059c06..2f82af0d 100644
--- a/src/ggml_extend.hpp
+++ b/src/ggml_extend.hpp
@@ -3380,10 +3380,10 @@ public:
             forward_params.linear.scale          = scale;
             out                                  = ctx->weight_adapter->forward_with_lora(ctx->ggml_ctx, ctx->backend, x, w, linear_bias, prefix, forward_params);
         } else {
-            out = ggml_ext_linear(ctx->ggml_ctx, x, w, linear_bias, force_prec_f32, 1 / 128.f);
+            out = ggml_ext_linear(ctx->ggml_ctx, x, w, linear_bias, force_prec_f32, scale);
         }
         if (has_weight_scale) {
-            out               = ggml_mul(ctx->ggml_ctx, out, params["weight_scale"]);
+            out = ggml_mul(ctx->ggml_ctx, out, params["weight_scale"]);
             if (b != nullptr) {
                 out = ggml_add_inplace(ctx->ggml_ctx, out, b);
             }
diff --git a/src/ideogram4.hpp b/src/ideogram4.hpp
index 4f2a32e9..58cd7638 100644
--- a/src/ideogram4.hpp
+++ b/src/ideogram4.hpp
@@ -181,7 +181,7 @@ namespace Ideogram4 {
             q = norm_q->forward(ctx, q);
             k = norm_k->forward(ctx, k);
 
-            x = Rope::attention(ctx, q, k, v, pe, mask, 1.f / std::sqrt(static_cast<float>(head_dim)), false);
+            x = Rope::attention(ctx, q, k, v, pe, mask, 1.f / 128.f, false);
             x = out_proj->forward(ctx, x);
             return x;
         }