diff --git a/ggml_extend.hpp b/ggml_extend.hpp index bf95e33..9d5ea31 100644 --- a/ggml_extend.hpp +++ b/ggml_extend.hpp @@ -1228,7 +1228,7 @@ __STATIC_INLINE__ struct ggml_tensor* ggml_ext_attention_ext(struct ggml_context bool diag_mask_inf = false, bool skip_reshape = false, bool flash_attn = false, - float kv_scale = 1.0f / 128.f) { // avoid overflow + float kv_scale = 1.0f) { // avoid overflow int64_t L_q; int64_t L_k; int64_t C; @@ -2184,7 +2184,7 @@ public: bool bias = true, bool force_f32 = false, bool force_prec_f32 = false, - float scale = 1.f / 128.f) + float scale = 1.f) : in_features(in_features), out_features(out_features), bias(bias),