mirror of
https://github.com/leejet/stable-diffusion.cpp.git
synced 2025-12-13 05:48:56 +00:00
Compare commits
3 Commits
ac73c74a3f
...
1798ec02ba
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
1798ec02ba | ||
|
|
2fec01d2b3 | ||
|
|
c736364a28 |
@ -133,6 +133,8 @@ std::string convert_cond_stage_model_name(std::string name, std::string prefix)
|
|||||||
{"attn_q.", "self_attn.q_proj."},
|
{"attn_q.", "self_attn.q_proj."},
|
||||||
{"attn_k.", "self_attn.k_proj."},
|
{"attn_k.", "self_attn.k_proj."},
|
||||||
{"attn_v.", "self_attn.v_proj."},
|
{"attn_v.", "self_attn.v_proj."},
|
||||||
|
{"attn_q_norm.", "self_attn.q_norm."},
|
||||||
|
{"attn_k_norm.", "self_attn.k_norm."},
|
||||||
{"attn_output.", "self_attn.o_proj."},
|
{"attn_output.", "self_attn.o_proj."},
|
||||||
{"attn_norm.", "input_layernorm."},
|
{"attn_norm.", "input_layernorm."},
|
||||||
{"ffn_down.", "mlp.down_proj."},
|
{"ffn_down.", "mlp.down_proj."},
|
||||||
@ -613,6 +615,44 @@ std::string convert_diffusers_dit_to_original_flux(std::string name) {
|
|||||||
return name;
|
return name;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::string convert_diffusers_dit_to_original_lumina2(std::string name) {
|
||||||
|
int num_layers = 30;
|
||||||
|
int num_refiner_layers = 2;
|
||||||
|
static std::unordered_map<std::string, std::string> z_image_name_map;
|
||||||
|
|
||||||
|
if (z_image_name_map.empty()) {
|
||||||
|
z_image_name_map["all_x_embedder.2-1."] = "x_embedder.";
|
||||||
|
z_image_name_map["all_final_layer.2-1."] = "final_layer.";
|
||||||
|
|
||||||
|
// --- transformer blocks ---
|
||||||
|
auto add_attention_map = [&](const std::string& prefix, int num) {
|
||||||
|
for (int i = 0; i < num; ++i) {
|
||||||
|
std::string block_prefix = prefix + std::to_string(i) + ".";
|
||||||
|
std::string dst_prefix = prefix + std::to_string(i) + ".";
|
||||||
|
|
||||||
|
z_image_name_map[block_prefix + "attention.norm_q."] = dst_prefix + "attention.q_norm.";
|
||||||
|
z_image_name_map[block_prefix + "attention.norm_k."] = dst_prefix + "attention.k_norm.";
|
||||||
|
z_image_name_map[block_prefix + "attention.to_out.0."] = dst_prefix + "attention.out.";
|
||||||
|
|
||||||
|
z_image_name_map[block_prefix + "attention.to_q.weight"] = dst_prefix + "attention.qkv.weight";
|
||||||
|
z_image_name_map[block_prefix + "attention.to_q.bias"] = dst_prefix + "attention.qkv.bias";
|
||||||
|
z_image_name_map[block_prefix + "attention.to_k.weight"] = dst_prefix + "attention.qkv.weight.1";
|
||||||
|
z_image_name_map[block_prefix + "attention.to_k.bias"] = dst_prefix + "attention.qkv.bias.1";
|
||||||
|
z_image_name_map[block_prefix + "attention.to_v.weight"] = dst_prefix + "attention.qkv.weight.2";
|
||||||
|
z_image_name_map[block_prefix + "attention.to_v.bias"] = dst_prefix + "attention.qkv.bias.2";
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
add_attention_map("noise_refiner.", num_refiner_layers);
|
||||||
|
add_attention_map("context_refiner.", num_refiner_layers);
|
||||||
|
add_attention_map("layers.", num_layers);
|
||||||
|
}
|
||||||
|
|
||||||
|
replace_with_prefix_map(name, z_image_name_map);
|
||||||
|
|
||||||
|
return name;
|
||||||
|
}
|
||||||
|
|
||||||
std::string convert_diffusion_model_name(std::string name, std::string prefix, SDVersion version) {
|
std::string convert_diffusion_model_name(std::string name, std::string prefix, SDVersion version) {
|
||||||
if (sd_version_is_sd1(version) || sd_version_is_sd2(version)) {
|
if (sd_version_is_sd1(version) || sd_version_is_sd2(version)) {
|
||||||
name = convert_diffusers_unet_to_original_sd1(name);
|
name = convert_diffusers_unet_to_original_sd1(name);
|
||||||
@ -622,6 +662,8 @@ std::string convert_diffusion_model_name(std::string name, std::string prefix, S
|
|||||||
name = convert_diffusers_dit_to_original_sd3(name);
|
name = convert_diffusers_dit_to_original_sd3(name);
|
||||||
} else if (sd_version_is_flux(version) || sd_version_is_flux2(version)) {
|
} else if (sd_version_is_flux(version) || sd_version_is_flux2(version)) {
|
||||||
name = convert_diffusers_dit_to_original_flux(name);
|
name = convert_diffusers_dit_to_original_flux(name);
|
||||||
|
} else if (sd_version_is_z_image(version)) {
|
||||||
|
name = convert_diffusers_dit_to_original_lumina2(name);
|
||||||
}
|
}
|
||||||
return name;
|
return name;
|
||||||
}
|
}
|
||||||
|
|||||||
10
z_image.hpp
10
z_image.hpp
@ -85,7 +85,15 @@ namespace ZImage {
|
|||||||
}
|
}
|
||||||
hidden_dim = multiple_of * ((hidden_dim + multiple_of - 1) / multiple_of);
|
hidden_dim = multiple_of * ((hidden_dim + multiple_of - 1) / multiple_of);
|
||||||
blocks["w1"] = std::make_shared<Linear>(dim, hidden_dim, false);
|
blocks["w1"] = std::make_shared<Linear>(dim, hidden_dim, false);
|
||||||
blocks["w2"] = std::make_shared<Linear>(hidden_dim, dim, false);
|
|
||||||
|
bool force_prec_f32 = false;
|
||||||
|
float scale = 1.f / 128.f;
|
||||||
|
#ifdef SD_USE_VULKAN
|
||||||
|
force_prec_f32 = true;
|
||||||
|
#endif
|
||||||
|
// The purpose of the scale here is to prevent NaN issues in certain situations.
|
||||||
|
// For example, when using CUDA but the weights are k-quants.
|
||||||
|
blocks["w2"] = std::make_shared<Linear>(hidden_dim, dim, false, false, force_prec_f32, 1.f / 128.f);
|
||||||
blocks["w3"] = std::make_shared<Linear>(dim, hidden_dim, false);
|
blocks["w3"] = std::make_shared<Linear>(dim, hidden_dim, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user