support dynamic number of Qwen image transformer blocks

2025-12-13 05:48:56 +00:00 · 2025-10-10 21:49:09 +08:00 · 2025-10-10 21:49:09 +08:00 · a123e25ae8
commit a123e25ae8
parent 9fa817f014
2 changed files with 20 additions and 0 deletions
--- a/qwen_image.hpp
+++ b/qwen_image.hpp
@ -513,6 +513,25 @@ namespace Qwen {
                        bool flash_attn                     = false)
            : GGMLRunner(backend, offload_params_to_cpu) {
            qwen_image_params.flash_attn = flash_attn;
+            qwen_image_params.num_layers = 0;
+            for (auto pair : tensor_types) {
+                std::string tensor_name = pair.first;
+                if (tensor_name.find(prefix) == std::string::npos)
+                    continue;
+                size_t pos = tensor_name.find("transformer_blocks.");
+                if (pos != std::string::npos) {
+                    tensor_name = tensor_name.substr(pos);  // remove prefix
+                    auto items  = split_string(tensor_name, '.');
+                    if (items.size() > 1) {
+                        int block_index = atoi(items[1].c_str());
+                        if (block_index + 1 > qwen_image_params.num_layers) {
+                            qwen_image_params.num_layers = block_index + 1;
+                        }
+                    }
+                    continue;
+                }
+            }
+            LOG_ERROR("qwen_image_params.num_layers: %ld", qwen_image_params.num_layers);
            qwen_image                   = QwenImageModel(qwen_image_params);
            qwen_image.init(params_ctx, tensor_types, prefix);
        }
--- a/stable-diffusion.cpp
+++ b/stable-diffusion.cpp
@ -615,6 +615,7 @@ public:

        if (vae_decode_only) {
            ignore_tensors.insert("first_stage_model.encoder");
+            ignore_tensors.insert("first_stage_model.conv1");
            ignore_tensors.insert("first_stage_model.quant");
            ignore_tensors.insert("text_encoders.qwen2vl.visual.");
        }