mirror of
https://github.com/leejet/stable-diffusion.cpp.git
synced 2026-06-09 15:56:39 +00:00
fix: resolve LLM norm tensor names by architecture (#1570)
This commit is contained in:
parent
55c2aed52c
commit
29ab511fc7
37
src/llm.hpp
37
src/llm.hpp
@ -948,15 +948,20 @@ namespace LLM {
|
||||
protected:
|
||||
LLMArch arch;
|
||||
int sliding_attention;
|
||||
bool has_post_attention_norm;
|
||||
bool has_post_ffw_norm;
|
||||
std::string post_attention_norm_name;
|
||||
std::string pre_ffw_norm_name;
|
||||
std::string post_ffw_norm_name;
|
||||
|
||||
public:
|
||||
TransformerBlock(const LLMParams& params, int layer_index)
|
||||
: arch(params.arch),
|
||||
sliding_attention(0),
|
||||
has_post_attention_norm(params.arch == LLMArch::GEMMA3_12B),
|
||||
has_post_ffw_norm(params.arch == LLMArch::GEMMA3_12B) {
|
||||
sliding_attention(0) {
|
||||
if (params.arch == LLMArch::GEMMA3_12B) {
|
||||
post_attention_norm_name = "post_attention_norm";
|
||||
post_ffw_norm_name = "post_ffw_norm";
|
||||
}
|
||||
pre_ffw_norm_name = params.arch == LLMArch::GPT_OSS_20B ? "post_attention_norm" : "post_attention_layernorm";
|
||||
|
||||
blocks["self_attn"] = std::make_shared<Attention>(params);
|
||||
if (params.arch == LLMArch::GPT_OSS_20B) {
|
||||
blocks["mlp"] = std::make_shared<GPTOSSMLP>(params);
|
||||
@ -967,12 +972,12 @@ namespace LLM {
|
||||
params.mlp_activation);
|
||||
}
|
||||
blocks["input_layernorm"] = std::make_shared<LLMRMSNorm>(params.hidden_size, params.rms_norm_eps, params.rms_norm_add);
|
||||
blocks["post_attention_layernorm"] = std::make_shared<LLMRMSNorm>(params.hidden_size, params.rms_norm_eps, params.rms_norm_add);
|
||||
if (has_post_attention_norm) {
|
||||
blocks["post_attention_norm"] = std::make_shared<LLMRMSNorm>(params.hidden_size, params.rms_norm_eps, params.rms_norm_add);
|
||||
blocks[pre_ffw_norm_name] = std::make_shared<LLMRMSNorm>(params.hidden_size, params.rms_norm_eps, params.rms_norm_add);
|
||||
if (!post_attention_norm_name.empty()) {
|
||||
blocks[post_attention_norm_name] = std::make_shared<LLMRMSNorm>(params.hidden_size, params.rms_norm_eps, params.rms_norm_add);
|
||||
}
|
||||
if (has_post_ffw_norm) {
|
||||
blocks["post_ffw_norm"] = std::make_shared<LLMRMSNorm>(params.hidden_size, params.rms_norm_eps, params.rms_norm_add);
|
||||
if (!post_ffw_norm_name.empty()) {
|
||||
blocks[post_ffw_norm_name] = std::make_shared<LLMRMSNorm>(params.hidden_size, params.rms_norm_eps, params.rms_norm_add);
|
||||
}
|
||||
if (!params.sliding_attention.empty()) {
|
||||
sliding_attention = params.sliding_attention[layer_index % params.sliding_attention.size()];
|
||||
@ -987,14 +992,14 @@ namespace LLM {
|
||||
// x: [N, n_token, hidden_size]
|
||||
auto self_attn = std::dynamic_pointer_cast<Attention>(blocks["self_attn"]);
|
||||
auto input_layernorm = std::dynamic_pointer_cast<LLMRMSNorm>(blocks["input_layernorm"]);
|
||||
auto post_attention_layernorm = std::dynamic_pointer_cast<LLMRMSNorm>(blocks["post_attention_layernorm"]);
|
||||
auto pre_ffw_norm = std::dynamic_pointer_cast<LLMRMSNorm>(blocks[pre_ffw_norm_name]);
|
||||
std::shared_ptr<LLMRMSNorm> post_attention_norm = nullptr;
|
||||
std::shared_ptr<LLMRMSNorm> post_ffw_norm = nullptr;
|
||||
if (has_post_attention_norm) {
|
||||
post_attention_norm = std::dynamic_pointer_cast<LLMRMSNorm>(blocks["post_attention_norm"]);
|
||||
if (!post_attention_norm_name.empty()) {
|
||||
post_attention_norm = std::dynamic_pointer_cast<LLMRMSNorm>(blocks[post_attention_norm_name]);
|
||||
}
|
||||
if (has_post_ffw_norm) {
|
||||
post_ffw_norm = std::dynamic_pointer_cast<LLMRMSNorm>(blocks["post_ffw_norm"]);
|
||||
if (!post_ffw_norm_name.empty()) {
|
||||
post_ffw_norm = std::dynamic_pointer_cast<LLMRMSNorm>(blocks[post_ffw_norm_name]);
|
||||
}
|
||||
ggml_tensor* block_attention_mask = attention_mask;
|
||||
int rope_index = 0;
|
||||
@ -1012,7 +1017,7 @@ namespace LLM {
|
||||
x = ggml_add_inplace(ctx->ggml_ctx, x, residual);
|
||||
|
||||
residual = x;
|
||||
x = post_attention_layernorm->forward(ctx, x);
|
||||
x = pre_ffw_norm->forward(ctx, x);
|
||||
if (arch == LLMArch::GPT_OSS_20B) {
|
||||
auto mlp = std::dynamic_pointer_cast<GPTOSSMLP>(blocks["mlp"]);
|
||||
x = mlp->forward(ctx, x);
|
||||
|
||||
@ -138,8 +138,7 @@ std::string convert_cond_stage_model_name(std::string name, std::string prefix)
|
||||
{"attn_k_norm.", "self_attn.k_norm."},
|
||||
{"attn_output.", "self_attn.o_proj."},
|
||||
{"attn_norm.", "input_layernorm."},
|
||||
{"attn_post_norm.", "post_attention_layernorm."},
|
||||
{"post_attention_norm.", "post_attention_layernorm."},
|
||||
{"attn_post_norm.", "post_attention_norm."},
|
||||
{"ffn_gate_inp.", "mlp.router."},
|
||||
{"ffn_gate_exps.", "mlp.experts.gate_proj."},
|
||||
{"ffn_up_exps.", "mlp.experts.up_proj."},
|
||||
@ -147,6 +146,7 @@ std::string convert_cond_stage_model_name(std::string name, std::string prefix)
|
||||
{"ffn_down.", "mlp.down_proj."},
|
||||
{"ffn_gate.", "mlp.gate_proj."},
|
||||
{"ffn_up.", "mlp.up_proj."},
|
||||
{"ffn_post_norm.", "post_ffw_norm."},
|
||||
{"ffn_norm.", "post_attention_layernorm."},
|
||||
{"output_norm.", "model.norm."},
|
||||
};
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user