From 4e2a0d513dfab6c4eb2b6e85bcbc4972c5474335 Mon Sep 17 00:00:00 2001 From: leejet Date: Wed, 17 Sep 2025 23:30:04 +0800 Subject: [PATCH] do not quantize embedding weight --- clip.hpp | 4 ++-- model.cpp | 2 ++ 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/clip.hpp b/clip.hpp index bde8a78..11dd936 100644 --- a/clip.hpp +++ b/clip.hpp @@ -554,11 +554,11 @@ protected: enum ggml_type token_wtype = GGML_TYPE_F32; if (!force_clip_f32) { auto tensor_type = tensor_types.find(prefix + "token_embedding.weight"); - if (tensor_type != tensor_types.end()) + if (tensor_type != tensor_types.end() && tensor_type->second == GGML_TYPE_F16) { token_wtype = tensor_type->second; + } } enum ggml_type position_wtype = GGML_TYPE_F32; - params["token_embedding.weight"] = ggml_new_tensor_2d(ctx, token_wtype, embed_dim, vocab_size); params["position_embedding.weight"] = ggml_new_tensor_2d(ctx, position_wtype, embed_dim, num_positions); } diff --git a/model.cpp b/model.cpp index 168b675..b40b6bc 100644 --- a/model.cpp +++ b/model.cpp @@ -2408,6 +2408,8 @@ bool ModelLoader::tensor_should_be_converted(const TensorStorage& tensor_storage // Pass, do not convert. For MMDiT } else if (contains(name, "time_embed.") || contains(name, "label_emb.")) { // Pass, do not convert. For Unet + } else if (contains(name, "embedding")) { + // Pass, do not convert embedding } else { return true; }