From 4023083f70c6afaa4b6a1c1f4957722870d0c308 Mon Sep 17 00:00:00 2001
From: leejet <leejet714@gmail.com>
Date: Thu, 25 Sep 2025 00:16:21 +0800
Subject: [PATCH] allow more quant types

---
 clip.hpp | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/clip.hpp b/clip.hpp
index 11dd936..2a6b08c 100644
--- a/clip.hpp
+++ b/clip.hpp
@@ -553,12 +553,13 @@ protected:
     void init_params(struct ggml_context* ctx, const String2GGMLType& tensor_types = {}, const std::string prefix = "") {
         enum ggml_type token_wtype = GGML_TYPE_F32;
         if (!force_clip_f32) {
-            auto tensor_type = tensor_types.find(prefix + "token_embedding.weight");
-            if (tensor_type != tensor_types.end() && tensor_type->second == GGML_TYPE_F16) {
+            auto tensor_type                = tensor_types.find(prefix + "token_embedding.weight");
+            std::set<ggml_type> allow_types = {GGML_TYPE_F16, GGML_TYPE_Q8_0, GGML_TYPE_Q5_1, GGML_TYPE_Q5_0, GGML_TYPE_Q4_1, GGML_TYPE_Q4_0};
+            if (tensor_type != tensor_types.end() && allow_types.find(tensor_type->second) != allow_types.end()) {
                 token_wtype = tensor_type->second;
             }
         }
-        enum ggml_type position_wtype = GGML_TYPE_F32;
+        enum ggml_type position_wtype       = GGML_TYPE_F32;
         params["token_embedding.weight"]    = ggml_new_tensor_2d(ctx, token_wtype, embed_dim, vocab_size);
         params["position_embedding.weight"] = ggml_new_tensor_2d(ctx, position_wtype, embed_dim, num_positions);
     }