From e2600bd442bf6c588b5e03e15dcc3df3e32f3bf0 Mon Sep 17 00:00:00 2001 From: leejet Date: Sun, 25 Jan 2026 00:25:06 +0800 Subject: [PATCH] make qwen image a litter faster --- ggml_extend.hpp | 15 +++++++++++++++ qwen_image.hpp | 15 +++++++-------- 2 files changed, 22 insertions(+), 8 deletions(-) diff --git a/ggml_extend.hpp b/ggml_extend.hpp index 692ba85..f61f7c6 100644 --- a/ggml_extend.hpp +++ b/ggml_extend.hpp @@ -690,6 +690,18 @@ __STATIC_INLINE__ struct ggml_tensor* ggml_ext_slice(struct ggml_context* ctx, int64_t end, bool cont = true) { GGML_ASSERT(dim >= 0 && dim < 4); + if (x->ne[dim] == 1) { + return x; + } + while (start < 0) { + start = x->ne[dim] + start; + } + while (end < 0) { + end = x->ne[dim] + end; + } + GGML_ASSERT(end > start); + GGML_ASSERT(start >= 0 && start < x->ne[dim]); + GGML_ASSERT(end > start && end <= x->ne[dim]); int64_t slice_size = end - start; int64_t slice_ne[4] = {x->ne[0], x->ne[1], x->ne[2], x->ne[3]}; @@ -944,6 +956,9 @@ __STATIC_INLINE__ struct ggml_tensor* ggml_ext_linear(struct ggml_context* ctx, bool force_prec_f32 = false, float scale = 1.f) { if (scale != 1.f) { + if (!ggml_is_contiguous(x)) { + x = ggml_cont(ctx, x); + } x = ggml_scale(ctx, x, scale); } if (x->ne[2] * x->ne[3] > 1024) { diff --git a/qwen_image.hpp b/qwen_image.hpp index ec2231b..dfa5397 100644 --- a/qwen_image.hpp +++ b/qwen_image.hpp @@ -162,26 +162,25 @@ namespace Qwen { auto k = ggml_concat(ctx->ggml_ctx, txt_k, img_k, 2); // [N, n_txt_token + n_img_token, n_head, d_head] auto v = ggml_concat(ctx->ggml_ctx, txt_v, img_v, 2); // [N, n_txt_token + n_img_token, n_head, d_head] - auto attn = Rope::attention(ctx, q, k, v, pe, mask, (1.0f / 128.f)); // [N, n_txt_token + n_img_token, n_head*d_head] - attn = ggml_cont(ctx->ggml_ctx, ggml_permute(ctx->ggml_ctx, attn, 0, 2, 1, 3)); // [n_txt_token + n_img_token, N, hidden_size] + auto attn = Rope::attention(ctx, q, k, v, pe, mask, (1.0f / 128.f)); // [N, n_txt_token + n_img_token, n_head*d_head] auto txt_attn_out = ggml_view_3d(ctx->ggml_ctx, attn, attn->ne[0], - attn->ne[1], txt->ne[1], + attn->ne[2], attn->nb[1], attn->nb[2], - 0); // [n_txt_token, N, hidden_size] - txt_attn_out = ggml_cont(ctx->ggml_ctx, ggml_permute(ctx->ggml_ctx, txt_attn_out, 0, 2, 1, 3)); // [N, n_txt_token, hidden_size] + 0); // [N, n_txt_token, n_head*d_head] auto img_attn_out = ggml_view_3d(ctx->ggml_ctx, attn, attn->ne[0], - attn->ne[1], img->ne[1], + attn->ne[2], attn->nb[1], attn->nb[2], - attn->nb[2] * txt->ne[1]); // [n_img_token, N, hidden_size] - img_attn_out = ggml_cont(ctx->ggml_ctx, ggml_permute(ctx->ggml_ctx, img_attn_out, 0, 2, 1, 3)); // [N, n_img_token, hidden_size] + txt->ne[1] * attn->nb[1]); // [N, n_img_token, n_head*d_head] + img_attn_out = ggml_cont(ctx->ggml_ctx, img_attn_out); + txt_attn_out = ggml_cont(ctx->ggml_ctx, txt_attn_out); img_attn_out = to_out_0->forward(ctx, img_attn_out); txt_attn_out = to_add_out->forward(ctx, txt_attn_out);