make qwen image a litter faster

This commit is contained in:
leejet 2026-01-25 00:25:06 +08:00
parent 6f4b49239c
commit e2600bd442
2 changed files with 22 additions and 8 deletions

View File

@ -690,6 +690,18 @@ __STATIC_INLINE__ struct ggml_tensor* ggml_ext_slice(struct ggml_context* ctx,
int64_t end,
bool cont = true) {
GGML_ASSERT(dim >= 0 && dim < 4);
if (x->ne[dim] == 1) {
return x;
}
while (start < 0) {
start = x->ne[dim] + start;
}
while (end < 0) {
end = x->ne[dim] + end;
}
GGML_ASSERT(end > start);
GGML_ASSERT(start >= 0 && start < x->ne[dim]);
GGML_ASSERT(end > start && end <= x->ne[dim]);
int64_t slice_size = end - start;
int64_t slice_ne[4] = {x->ne[0], x->ne[1], x->ne[2], x->ne[3]};
@ -944,6 +956,9 @@ __STATIC_INLINE__ struct ggml_tensor* ggml_ext_linear(struct ggml_context* ctx,
bool force_prec_f32 = false,
float scale = 1.f) {
if (scale != 1.f) {
if (!ggml_is_contiguous(x)) {
x = ggml_cont(ctx, x);
}
x = ggml_scale(ctx, x, scale);
}
if (x->ne[2] * x->ne[3] > 1024) {

View File

@ -163,25 +163,24 @@ namespace Qwen {
auto v = ggml_concat(ctx->ggml_ctx, txt_v, img_v, 2); // [N, n_txt_token + n_img_token, n_head, d_head]
auto attn = Rope::attention(ctx, q, k, v, pe, mask, (1.0f / 128.f)); // [N, n_txt_token + n_img_token, n_head*d_head]
attn = ggml_cont(ctx->ggml_ctx, ggml_permute(ctx->ggml_ctx, attn, 0, 2, 1, 3)); // [n_txt_token + n_img_token, N, hidden_size]
auto txt_attn_out = ggml_view_3d(ctx->ggml_ctx,
attn,
attn->ne[0],
attn->ne[1],
txt->ne[1],
attn->ne[2],
attn->nb[1],
attn->nb[2],
0); // [n_txt_token, N, hidden_size]
txt_attn_out = ggml_cont(ctx->ggml_ctx, ggml_permute(ctx->ggml_ctx, txt_attn_out, 0, 2, 1, 3)); // [N, n_txt_token, hidden_size]
0); // [N, n_txt_token, n_head*d_head]
auto img_attn_out = ggml_view_3d(ctx->ggml_ctx,
attn,
attn->ne[0],
attn->ne[1],
img->ne[1],
attn->ne[2],
attn->nb[1],
attn->nb[2],
attn->nb[2] * txt->ne[1]); // [n_img_token, N, hidden_size]
img_attn_out = ggml_cont(ctx->ggml_ctx, ggml_permute(ctx->ggml_ctx, img_attn_out, 0, 2, 1, 3)); // [N, n_img_token, hidden_size]
txt->ne[1] * attn->nb[1]); // [N, n_img_token, n_head*d_head]
img_attn_out = ggml_cont(ctx->ggml_ctx, img_attn_out);
txt_attn_out = ggml_cont(ctx->ggml_ctx, txt_attn_out);
img_attn_out = to_out_0->forward(ctx, img_attn_out);
txt_attn_out = to_add_out->forward(ctx, txt_attn_out);