auto detect z-image-omni

This commit is contained in:
leejet 2026-01-27 00:56:04 +08:00
parent 7a34440eff
commit 5ea20c1148
3 changed files with 20 additions and 4 deletions

View File

@ -1039,6 +1039,8 @@ SDVersion ModelLoader::get_sd_version() {
bool is_xl = false; bool is_xl = false;
bool is_flux = false; bool is_flux = false;
bool is_flux2 = false; bool is_flux2 = false;
bool is_z_image = false;
bool is_z_image_omni = false;
bool has_single_block_47 = false; bool has_single_block_47 = false;
bool is_wan = false; bool is_wan = false;
int64_t patch_embedding_channels = 0; int64_t patch_embedding_channels = 0;
@ -1071,7 +1073,10 @@ SDVersion ModelLoader::get_sd_version() {
return VERSION_OVIS_IMAGE; return VERSION_OVIS_IMAGE;
} }
if (tensor_storage.name.find("model.diffusion_model.cap_embedder.0.weight") != std::string::npos) { if (tensor_storage.name.find("model.diffusion_model.cap_embedder.0.weight") != std::string::npos) {
return VERSION_Z_IMAGE; is_z_image = true;
}
if (tensor_storage.name.find("model.diffusion_model.siglip_embedder.0.weight") != std::string::npos) {
is_z_image_omni = true;
} }
if (tensor_storage.name.find("model.diffusion_model.blocks.0.cross_attn.norm_k.weight") != std::string::npos) { if (tensor_storage.name.find("model.diffusion_model.blocks.0.cross_attn.norm_k.weight") != std::string::npos) {
is_wan = true; is_wan = true;
@ -1174,6 +1179,13 @@ SDVersion ModelLoader::get_sd_version() {
return VERSION_FLUX2_KLEIN; return VERSION_FLUX2_KLEIN;
} }
if (is_z_image) {
if (is_z_image_omni) {
return VERSION_Z_IMAGE_OMNI;
}
return VERSION_Z_IMAGE;
}
if (token_embedding_weight.ne[0] == 768) { if (token_embedding_weight.ne[0] == 768) {
if (is_inpaint) { if (is_inpaint) {
return VERSION_SD1_INPAINT; return VERSION_SD1_INPAINT;

View File

@ -48,6 +48,7 @@ enum SDVersion {
VERSION_FLUX2, VERSION_FLUX2,
VERSION_FLUX2_KLEIN, VERSION_FLUX2_KLEIN,
VERSION_Z_IMAGE, VERSION_Z_IMAGE,
VERSION_Z_IMAGE_OMNI,
VERSION_OVIS_IMAGE, VERSION_OVIS_IMAGE,
VERSION_COUNT, VERSION_COUNT,
}; };
@ -123,7 +124,7 @@ static inline bool sd_version_is_qwen_image(SDVersion version) {
} }
static inline bool sd_version_is_z_image(SDVersion version) { static inline bool sd_version_is_z_image(SDVersion version) {
if (version == VERSION_Z_IMAGE) { if (version == VERSION_Z_IMAGE || version == VERSION_Z_IMAGE_OMNI) {
return true; return true;
} }
return false; return false;

View File

@ -51,6 +51,7 @@ const char* model_version_to_str[] = {
"Flux.2", "Flux.2",
"Flux.2 klein", "Flux.2 klein",
"Z-Image", "Z-Image",
"Z-Image-Omni",
"Ovis Image", "Ovis Image",
}; };
@ -3713,8 +3714,10 @@ sd_image_t* generate_image(sd_ctx_t* sd_ctx, const sd_img_gen_params_t* sd_img_g
ggml_tensor* latent = sd_ctx->sd->encode_first_stage(work_ctx, img); ggml_tensor* latent = sd_ctx->sd->encode_first_stage(work_ctx, img);
ref_latents.push_back(latent); ref_latents.push_back(latent);
auto clip_vision_output = sd_ctx->sd->get_clip_vision_output(work_ctx, *ref_images[i], false, -2); if (sd_ctx->sd->version == VERSION_Z_IMAGE_OMNI) {
ref_clip_feats.push_back(clip_vision_output); auto clip_vision_output = sd_ctx->sd->get_clip_vision_output(work_ctx, *ref_images[i], false, -2);
ref_clip_feats.push_back(clip_vision_output);
}
} }
if (sd_img_gen_params->init_image.data != nullptr || sd_img_gen_params->ref_images_count > 0) { if (sd_img_gen_params->init_image.data != nullptr || sd_img_gen_params->ref_images_count > 0) {