From 3973015ed7303fcb329358159ca3a7e899b42035 Mon Sep 17 00:00:00 2001 From: Daniele <57776841+daniandtheweb@users.noreply.github.com> Date: Fri, 26 Jun 2026 18:52:32 +0200 Subject: [PATCH] sync: update ggml and revert vulkan workarounds for Anima and Ernie (#1710) --- ggml | 2 +- src/model/diffusion/anima.hpp | 7 ++----- src/model/diffusion/ernie_image.hpp | 4 +--- 3 files changed, 4 insertions(+), 9 deletions(-) diff --git a/ggml b/ggml index 3af5f576..eced84c8 160000 --- a/ggml +++ b/ggml @@ -1 +1 @@ -Subproject commit 3af5f5760e19a96427f5f7a93b79cbdf3d4b265b +Subproject commit eced84c86f8b012c752c016f7fe789adea168e1e diff --git a/src/model/diffusion/anima.hpp b/src/model/diffusion/anima.hpp index 504904d4..6042516a 100644 --- a/src/model/diffusion/anima.hpp +++ b/src/model/diffusion/anima.hpp @@ -227,7 +227,6 @@ namespace Anima { k4 = k_norm->forward(ctx, k4); ggml_tensor* attn_out = nullptr; - float scale = (sd_backend_is(ctx->backend, "Vulkan") && ctx->flash_attn_enabled) ? 1.0f / 32.0f : 1.0f; if (pe_q != nullptr || pe_k != nullptr) { if (pe_q == nullptr) { pe_q = pe_k; @@ -245,8 +244,7 @@ namespace Anima { num_heads, nullptr, true, - ctx->flash_attn_enabled, - scale); + ctx->flash_attn_enabled); } else { auto q_flat = ggml_reshape_3d(ctx->ggml_ctx, q4, head_dim * num_heads, L_q, N); auto k_flat = ggml_reshape_3d(ctx->ggml_ctx, k4, head_dim * num_heads, L_k, N); @@ -258,8 +256,7 @@ namespace Anima { num_heads, nullptr, false, - ctx->flash_attn_enabled, - scale); + ctx->flash_attn_enabled); } return out_proj->forward(ctx, attn_out); diff --git a/src/model/diffusion/ernie_image.hpp b/src/model/diffusion/ernie_image.hpp index 0427b3b3..12fcada5 100644 --- a/src/model/diffusion/ernie_image.hpp +++ b/src/model/diffusion/ernie_image.hpp @@ -162,8 +162,6 @@ namespace ErnieImage { int64_t S = x->ne[1]; int64_t N = x->ne[2]; - float scale = (sd_backend_is(ctx->backend, "Vulkan") && ctx->flash_attn_enabled) ? 1.0f / 32.0f : 1.0f; - auto q = to_q->forward(ctx, x); auto k = to_k->forward(ctx, x); auto v = to_v->forward(ctx, x); @@ -184,7 +182,7 @@ namespace ErnieImage { k = ggml_cont(ctx->ggml_ctx, ggml_permute(ctx->ggml_ctx, k, 0, 2, 1, 3)); // [N, heads, S, head_dim] k = ggml_reshape_3d(ctx->ggml_ctx, k, k->ne[0], k->ne[1], k->ne[2] * k->ne[3]); - x = ggml_ext_attention_ext(ctx->ggml_ctx, ctx->backend, q, k, v, num_heads, attention_mask, true, ctx->flash_attn_enabled, scale); // [N, S, hidden_size] + x = ggml_ext_attention_ext(ctx->ggml_ctx, ctx->backend, q, k, v, num_heads, attention_mask, true, ctx->flash_attn_enabled); // [N, S, hidden_size] x = to_out_0->forward(ctx, x); return x; }