diff --git a/model.cpp b/model.cpp index 01a8c45..58c485f 100644 --- a/model.cpp +++ b/model.cpp @@ -1051,6 +1051,9 @@ SDVersion ModelLoader::get_sd_version() { return VERSION_SD3; } if (tensor_storage.name.find("model.diffusion_model.transformer_blocks.0.img_mod.1.weight") != std::string::npos) { + if (tensor_storage_map.find("model.diffusion_model.time_text_embed.addition_t_embedding.weight") != tensor_storage_map.end()) { + return VERSION_QWEN_IMAGE_LAYERED; + } return VERSION_QWEN_IMAGE; } if (tensor_storage.name.find("model.diffusion_model.double_stream_modulation_img.lin.weight") != std::string::npos) { diff --git a/model.h b/model.h index d38aee1..5df07a2 100644 --- a/model.h +++ b/model.h @@ -43,6 +43,7 @@ enum SDVersion { VERSION_WAN2_2_I2V, VERSION_WAN2_2_TI2V, VERSION_QWEN_IMAGE, + VERSION_QWEN_IMAGE_LAYERED, VERSION_FLUX2, VERSION_Z_IMAGE, VERSION_OVIS_IMAGE, @@ -113,7 +114,7 @@ static inline bool sd_version_is_wan(SDVersion version) { } static inline bool sd_version_is_qwen_image(SDVersion version) { - if (version == VERSION_QWEN_IMAGE) { + if (version == VERSION_QWEN_IMAGE || VERSION_QWEN_IMAGE_LAYERED) { return true; } return false; diff --git a/qwen_image.hpp b/qwen_image.hpp index 5ace82d..b1abffb 100644 --- a/qwen_image.hpp +++ b/qwen_image.hpp @@ -47,12 +47,13 @@ namespace Qwen { struct QwenTimestepProjEmbeddings : public GGMLBlock { protected: bool use_additional_t_cond; + public: - QwenTimestepProjEmbeddings(int64_t embedding_dim, bool use_additional_t_cond = false) : - use_additional_t_cond(use_additional_t_cond) { + QwenTimestepProjEmbeddings(int64_t embedding_dim, bool use_additional_t_cond = false) + : use_additional_t_cond(use_additional_t_cond) { blocks["timestep_embedder"] = std::shared_ptr(new TimestepEmbedding(256, embedding_dim)); if (use_additional_t_cond) { - blocks["addition_t_embedding"] = std::make_shared(new Embedding(2, embedding_dim)); + blocks["addition_t_embedding"] = std::shared_ptr(new Embedding(2, embedding_dim)); } } @@ -69,7 +70,7 @@ namespace Qwen { auto addition_t_embedding = std::dynamic_pointer_cast(blocks["addition_t_embedding"]); auto addition_t_emb = addition_t_embedding->forward(ctx, addition_t_cond); - timesteps_emb = ggml_add(ctx->ggml_ctx, timesteps_emb, addition_t_emb); + timesteps_emb = ggml_add(ctx->ggml_ctx, timesteps_emb, addition_t_emb); } return timesteps_emb; } @@ -542,6 +543,9 @@ namespace Qwen { continue; } } + if (version == VERSION_QWEN_IMAGE_LAYERED) { + qwen_image_params.use_additional_t_cond = true; + } LOG_INFO("qwen_image_params.num_layers: %ld", qwen_image_params.num_layers); qwen_image = QwenImageModel(qwen_image_params); qwen_image.init(params_ctx, tensor_storage_map, prefix); diff --git a/stable-diffusion.cpp b/stable-diffusion.cpp index e3f49aa..2bc46d1 100644 --- a/stable-diffusion.cpp +++ b/stable-diffusion.cpp @@ -44,6 +44,7 @@ const char* model_version_to_str[] = { "Wan 2.2 I2V", "Wan 2.2 TI2V", "Qwen Image", + "Qwen Image Layered", "Flux.2", "Z-Image", "Ovis Image",