From 5ea20c114872bd0a6530fcf905a248fd20f1f7d0 Mon Sep 17 00:00:00 2001 From: leejet Date: Tue, 27 Jan 2026 00:56:04 +0800 Subject: [PATCH] auto detect z-image-omni --- model.cpp | 14 +++++++++++++- model.h | 3 ++- stable-diffusion.cpp | 7 +++++-- 3 files changed, 20 insertions(+), 4 deletions(-) diff --git a/model.cpp b/model.cpp index 253dd25..4a6fad8 100644 --- a/model.cpp +++ b/model.cpp @@ -1039,6 +1039,8 @@ SDVersion ModelLoader::get_sd_version() { bool is_xl = false; bool is_flux = false; bool is_flux2 = false; + bool is_z_image = false; + bool is_z_image_omni = false; bool has_single_block_47 = false; bool is_wan = false; int64_t patch_embedding_channels = 0; @@ -1071,7 +1073,10 @@ SDVersion ModelLoader::get_sd_version() { return VERSION_OVIS_IMAGE; } if (tensor_storage.name.find("model.diffusion_model.cap_embedder.0.weight") != std::string::npos) { - return VERSION_Z_IMAGE; + is_z_image = true; + } + if (tensor_storage.name.find("model.diffusion_model.siglip_embedder.0.weight") != std::string::npos) { + is_z_image_omni = true; } if (tensor_storage.name.find("model.diffusion_model.blocks.0.cross_attn.norm_k.weight") != std::string::npos) { is_wan = true; @@ -1174,6 +1179,13 @@ SDVersion ModelLoader::get_sd_version() { return VERSION_FLUX2_KLEIN; } + if (is_z_image) { + if (is_z_image_omni) { + return VERSION_Z_IMAGE_OMNI; + } + return VERSION_Z_IMAGE; + } + if (token_embedding_weight.ne[0] == 768) { if (is_inpaint) { return VERSION_SD1_INPAINT; diff --git a/model.h b/model.h index e16ac3a..553e159 100644 --- a/model.h +++ b/model.h @@ -48,6 +48,7 @@ enum SDVersion { VERSION_FLUX2, VERSION_FLUX2_KLEIN, VERSION_Z_IMAGE, + VERSION_Z_IMAGE_OMNI, VERSION_OVIS_IMAGE, VERSION_COUNT, }; @@ -123,7 +124,7 @@ static inline bool sd_version_is_qwen_image(SDVersion version) { } static inline bool sd_version_is_z_image(SDVersion version) { - if (version == VERSION_Z_IMAGE) { + if (version == VERSION_Z_IMAGE || version == VERSION_Z_IMAGE_OMNI) { return true; } return false; diff --git a/stable-diffusion.cpp b/stable-diffusion.cpp index ea5d5e1..1eb1c80 100644 --- a/stable-diffusion.cpp +++ b/stable-diffusion.cpp @@ -51,6 +51,7 @@ const char* model_version_to_str[] = { "Flux.2", "Flux.2 klein", "Z-Image", + "Z-Image-Omni", "Ovis Image", }; @@ -3713,8 +3714,10 @@ sd_image_t* generate_image(sd_ctx_t* sd_ctx, const sd_img_gen_params_t* sd_img_g ggml_tensor* latent = sd_ctx->sd->encode_first_stage(work_ctx, img); ref_latents.push_back(latent); - auto clip_vision_output = sd_ctx->sd->get_clip_vision_output(work_ctx, *ref_images[i], false, -2); - ref_clip_feats.push_back(clip_vision_output); + if (sd_ctx->sd->version == VERSION_Z_IMAGE_OMNI) { + auto clip_vision_output = sd_ctx->sd->get_clip_vision_output(work_ctx, *ref_images[i], false, -2); + ref_clip_feats.push_back(clip_vision_output); + } } if (sd_img_gen_params->init_image.data != nullptr || sd_img_gen_params->ref_images_count > 0) {