mirror of
https://github.com/leejet/stable-diffusion.cpp.git
synced 2026-06-27 00:26:38 +00:00
sync: update ggml and revert vulkan workarounds for Anima and Ernie (#1710)
This commit is contained in:
parent
9ee77fc227
commit
3973015ed7
2
ggml
2
ggml
@ -1 +1 @@
|
|||||||
Subproject commit 3af5f5760e19a96427f5f7a93b79cbdf3d4b265b
|
Subproject commit eced84c86f8b012c752c016f7fe789adea168e1e
|
||||||
@ -227,7 +227,6 @@ namespace Anima {
|
|||||||
k4 = k_norm->forward(ctx, k4);
|
k4 = k_norm->forward(ctx, k4);
|
||||||
|
|
||||||
ggml_tensor* attn_out = nullptr;
|
ggml_tensor* attn_out = nullptr;
|
||||||
float scale = (sd_backend_is(ctx->backend, "Vulkan") && ctx->flash_attn_enabled) ? 1.0f / 32.0f : 1.0f;
|
|
||||||
if (pe_q != nullptr || pe_k != nullptr) {
|
if (pe_q != nullptr || pe_k != nullptr) {
|
||||||
if (pe_q == nullptr) {
|
if (pe_q == nullptr) {
|
||||||
pe_q = pe_k;
|
pe_q = pe_k;
|
||||||
@ -245,8 +244,7 @@ namespace Anima {
|
|||||||
num_heads,
|
num_heads,
|
||||||
nullptr,
|
nullptr,
|
||||||
true,
|
true,
|
||||||
ctx->flash_attn_enabled,
|
ctx->flash_attn_enabled);
|
||||||
scale);
|
|
||||||
} else {
|
} else {
|
||||||
auto q_flat = ggml_reshape_3d(ctx->ggml_ctx, q4, head_dim * num_heads, L_q, N);
|
auto q_flat = ggml_reshape_3d(ctx->ggml_ctx, q4, head_dim * num_heads, L_q, N);
|
||||||
auto k_flat = ggml_reshape_3d(ctx->ggml_ctx, k4, head_dim * num_heads, L_k, N);
|
auto k_flat = ggml_reshape_3d(ctx->ggml_ctx, k4, head_dim * num_heads, L_k, N);
|
||||||
@ -258,8 +256,7 @@ namespace Anima {
|
|||||||
num_heads,
|
num_heads,
|
||||||
nullptr,
|
nullptr,
|
||||||
false,
|
false,
|
||||||
ctx->flash_attn_enabled,
|
ctx->flash_attn_enabled);
|
||||||
scale);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return out_proj->forward(ctx, attn_out);
|
return out_proj->forward(ctx, attn_out);
|
||||||
|
|||||||
@ -162,8 +162,6 @@ namespace ErnieImage {
|
|||||||
int64_t S = x->ne[1];
|
int64_t S = x->ne[1];
|
||||||
int64_t N = x->ne[2];
|
int64_t N = x->ne[2];
|
||||||
|
|
||||||
float scale = (sd_backend_is(ctx->backend, "Vulkan") && ctx->flash_attn_enabled) ? 1.0f / 32.0f : 1.0f;
|
|
||||||
|
|
||||||
auto q = to_q->forward(ctx, x);
|
auto q = to_q->forward(ctx, x);
|
||||||
auto k = to_k->forward(ctx, x);
|
auto k = to_k->forward(ctx, x);
|
||||||
auto v = to_v->forward(ctx, x);
|
auto v = to_v->forward(ctx, x);
|
||||||
@ -184,7 +182,7 @@ namespace ErnieImage {
|
|||||||
k = ggml_cont(ctx->ggml_ctx, ggml_permute(ctx->ggml_ctx, k, 0, 2, 1, 3)); // [N, heads, S, head_dim]
|
k = ggml_cont(ctx->ggml_ctx, ggml_permute(ctx->ggml_ctx, k, 0, 2, 1, 3)); // [N, heads, S, head_dim]
|
||||||
k = ggml_reshape_3d(ctx->ggml_ctx, k, k->ne[0], k->ne[1], k->ne[2] * k->ne[3]);
|
k = ggml_reshape_3d(ctx->ggml_ctx, k, k->ne[0], k->ne[1], k->ne[2] * k->ne[3]);
|
||||||
|
|
||||||
x = ggml_ext_attention_ext(ctx->ggml_ctx, ctx->backend, q, k, v, num_heads, attention_mask, true, ctx->flash_attn_enabled, scale); // [N, S, hidden_size]
|
x = ggml_ext_attention_ext(ctx->ggml_ctx, ctx->backend, q, k, v, num_heads, attention_mask, true, ctx->flash_attn_enabled); // [N, S, hidden_size]
|
||||||
x = to_out_0->forward(ctx, x);
|
x = to_out_0->forward(ctx, x);
|
||||||
return x;
|
return x;
|
||||||
}
|
}
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user