mirror of
https://github.com/leejet/stable-diffusion.cpp.git
synced 2026-03-24 02:08:51 +00:00
make qwen image a litter faster
This commit is contained in:
parent
6f4b49239c
commit
e2600bd442
@ -690,6 +690,18 @@ __STATIC_INLINE__ struct ggml_tensor* ggml_ext_slice(struct ggml_context* ctx,
|
||||
int64_t end,
|
||||
bool cont = true) {
|
||||
GGML_ASSERT(dim >= 0 && dim < 4);
|
||||
if (x->ne[dim] == 1) {
|
||||
return x;
|
||||
}
|
||||
while (start < 0) {
|
||||
start = x->ne[dim] + start;
|
||||
}
|
||||
while (end < 0) {
|
||||
end = x->ne[dim] + end;
|
||||
}
|
||||
GGML_ASSERT(end > start);
|
||||
GGML_ASSERT(start >= 0 && start < x->ne[dim]);
|
||||
GGML_ASSERT(end > start && end <= x->ne[dim]);
|
||||
|
||||
int64_t slice_size = end - start;
|
||||
int64_t slice_ne[4] = {x->ne[0], x->ne[1], x->ne[2], x->ne[3]};
|
||||
@ -944,6 +956,9 @@ __STATIC_INLINE__ struct ggml_tensor* ggml_ext_linear(struct ggml_context* ctx,
|
||||
bool force_prec_f32 = false,
|
||||
float scale = 1.f) {
|
||||
if (scale != 1.f) {
|
||||
if (!ggml_is_contiguous(x)) {
|
||||
x = ggml_cont(ctx, x);
|
||||
}
|
||||
x = ggml_scale(ctx, x, scale);
|
||||
}
|
||||
if (x->ne[2] * x->ne[3] > 1024) {
|
||||
|
||||
@ -162,26 +162,25 @@ namespace Qwen {
|
||||
auto k = ggml_concat(ctx->ggml_ctx, txt_k, img_k, 2); // [N, n_txt_token + n_img_token, n_head, d_head]
|
||||
auto v = ggml_concat(ctx->ggml_ctx, txt_v, img_v, 2); // [N, n_txt_token + n_img_token, n_head, d_head]
|
||||
|
||||
auto attn = Rope::attention(ctx, q, k, v, pe, mask, (1.0f / 128.f)); // [N, n_txt_token + n_img_token, n_head*d_head]
|
||||
attn = ggml_cont(ctx->ggml_ctx, ggml_permute(ctx->ggml_ctx, attn, 0, 2, 1, 3)); // [n_txt_token + n_img_token, N, hidden_size]
|
||||
auto attn = Rope::attention(ctx, q, k, v, pe, mask, (1.0f / 128.f)); // [N, n_txt_token + n_img_token, n_head*d_head]
|
||||
auto txt_attn_out = ggml_view_3d(ctx->ggml_ctx,
|
||||
attn,
|
||||
attn->ne[0],
|
||||
attn->ne[1],
|
||||
txt->ne[1],
|
||||
attn->ne[2],
|
||||
attn->nb[1],
|
||||
attn->nb[2],
|
||||
0); // [n_txt_token, N, hidden_size]
|
||||
txt_attn_out = ggml_cont(ctx->ggml_ctx, ggml_permute(ctx->ggml_ctx, txt_attn_out, 0, 2, 1, 3)); // [N, n_txt_token, hidden_size]
|
||||
0); // [N, n_txt_token, n_head*d_head]
|
||||
auto img_attn_out = ggml_view_3d(ctx->ggml_ctx,
|
||||
attn,
|
||||
attn->ne[0],
|
||||
attn->ne[1],
|
||||
img->ne[1],
|
||||
attn->ne[2],
|
||||
attn->nb[1],
|
||||
attn->nb[2],
|
||||
attn->nb[2] * txt->ne[1]); // [n_img_token, N, hidden_size]
|
||||
img_attn_out = ggml_cont(ctx->ggml_ctx, ggml_permute(ctx->ggml_ctx, img_attn_out, 0, 2, 1, 3)); // [N, n_img_token, hidden_size]
|
||||
txt->ne[1] * attn->nb[1]); // [N, n_img_token, n_head*d_head]
|
||||
img_attn_out = ggml_cont(ctx->ggml_ctx, img_attn_out);
|
||||
txt_attn_out = ggml_cont(ctx->ggml_ctx, txt_attn_out);
|
||||
|
||||
img_attn_out = to_out_0->forward(ctx, img_attn_out);
|
||||
txt_attn_out = to_add_out->forward(ctx, txt_attn_out);
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user