Compare commits

...

3 Commits

Author SHA1 Message Date
leejet
1798ec02ba fix nan issue that occurs when using CUDA with k-quants weights 2025-11-30 22:54:13 +08:00
leejet
2fec01d2b3 add support for diffusers format lora 2025-11-30 21:47:10 +08:00
leejet
c736364a28 add support for qwen3 4b gguf 2025-11-30 21:18:49 +08:00
2 changed files with 51 additions and 1 deletions

View File

@ -133,6 +133,8 @@ std::string convert_cond_stage_model_name(std::string name, std::string prefix)
{"attn_q.", "self_attn.q_proj."},
{"attn_k.", "self_attn.k_proj."},
{"attn_v.", "self_attn.v_proj."},
{"attn_q_norm.", "self_attn.q_norm."},
{"attn_k_norm.", "self_attn.k_norm."},
{"attn_output.", "self_attn.o_proj."},
{"attn_norm.", "input_layernorm."},
{"ffn_down.", "mlp.down_proj."},
@ -613,6 +615,44 @@ std::string convert_diffusers_dit_to_original_flux(std::string name) {
return name;
}
std::string convert_diffusers_dit_to_original_lumina2(std::string name) {
int num_layers = 30;
int num_refiner_layers = 2;
static std::unordered_map<std::string, std::string> z_image_name_map;
if (z_image_name_map.empty()) {
z_image_name_map["all_x_embedder.2-1."] = "x_embedder.";
z_image_name_map["all_final_layer.2-1."] = "final_layer.";
// --- transformer blocks ---
auto add_attention_map = [&](const std::string& prefix, int num) {
for (int i = 0; i < num; ++i) {
std::string block_prefix = prefix + std::to_string(i) + ".";
std::string dst_prefix = prefix + std::to_string(i) + ".";
z_image_name_map[block_prefix + "attention.norm_q."] = dst_prefix + "attention.q_norm.";
z_image_name_map[block_prefix + "attention.norm_k."] = dst_prefix + "attention.k_norm.";
z_image_name_map[block_prefix + "attention.to_out.0."] = dst_prefix + "attention.out.";
z_image_name_map[block_prefix + "attention.to_q.weight"] = dst_prefix + "attention.qkv.weight";
z_image_name_map[block_prefix + "attention.to_q.bias"] = dst_prefix + "attention.qkv.bias";
z_image_name_map[block_prefix + "attention.to_k.weight"] = dst_prefix + "attention.qkv.weight.1";
z_image_name_map[block_prefix + "attention.to_k.bias"] = dst_prefix + "attention.qkv.bias.1";
z_image_name_map[block_prefix + "attention.to_v.weight"] = dst_prefix + "attention.qkv.weight.2";
z_image_name_map[block_prefix + "attention.to_v.bias"] = dst_prefix + "attention.qkv.bias.2";
}
};
add_attention_map("noise_refiner.", num_refiner_layers);
add_attention_map("context_refiner.", num_refiner_layers);
add_attention_map("layers.", num_layers);
}
replace_with_prefix_map(name, z_image_name_map);
return name;
}
std::string convert_diffusion_model_name(std::string name, std::string prefix, SDVersion version) {
if (sd_version_is_sd1(version) || sd_version_is_sd2(version)) {
name = convert_diffusers_unet_to_original_sd1(name);
@ -622,6 +662,8 @@ std::string convert_diffusion_model_name(std::string name, std::string prefix, S
name = convert_diffusers_dit_to_original_sd3(name);
} else if (sd_version_is_flux(version) || sd_version_is_flux2(version)) {
name = convert_diffusers_dit_to_original_flux(name);
} else if (sd_version_is_z_image(version)) {
name = convert_diffusers_dit_to_original_lumina2(name);
}
return name;
}

View File

@ -85,7 +85,15 @@ namespace ZImage {
}
hidden_dim = multiple_of * ((hidden_dim + multiple_of - 1) / multiple_of);
blocks["w1"] = std::make_shared<Linear>(dim, hidden_dim, false);
blocks["w2"] = std::make_shared<Linear>(hidden_dim, dim, false);
bool force_prec_f32 = false;
float scale = 1.f / 128.f;
#ifdef SD_USE_VULKAN
force_prec_f32 = true;
#endif
// The purpose of the scale here is to prevent NaN issues in certain situations.
// For example, when using CUDA but the weights are k-quants.
blocks["w2"] = std::make_shared<Linear>(hidden_dim, dim, false, false, force_prec_f32, 1.f / 128.f);
blocks["w3"] = std::make_shared<Linear>(dim, hidden_dim, false);
}