do not quantize embedding weight

This commit is contained in:
leejet 2025-09-17 23:30:04 +08:00
parent 1e5f207006
commit 4e2a0d513d
2 changed files with 4 additions and 2 deletions

View File

@ -554,11 +554,11 @@ protected:
enum ggml_type token_wtype = GGML_TYPE_F32;
if (!force_clip_f32) {
auto tensor_type = tensor_types.find(prefix + "token_embedding.weight");
if (tensor_type != tensor_types.end())
if (tensor_type != tensor_types.end() && tensor_type->second == GGML_TYPE_F16) {
token_wtype = tensor_type->second;
}
}
enum ggml_type position_wtype = GGML_TYPE_F32;
params["token_embedding.weight"] = ggml_new_tensor_2d(ctx, token_wtype, embed_dim, vocab_size);
params["position_embedding.weight"] = ggml_new_tensor_2d(ctx, position_wtype, embed_dim, num_positions);
}

View File

@ -2408,6 +2408,8 @@ bool ModelLoader::tensor_should_be_converted(const TensorStorage& tensor_storage
// Pass, do not convert. For MMDiT
} else if (contains(name, "time_embed.") || contains(name, "label_emb.")) {
// Pass, do not convert. For Unet
} else if (contains(name, "embedding")) {
// Pass, do not convert embedding
} else {
return true;
}