From e91ce4f103ce7106503c3386a62288298e792362 Mon Sep 17 00:00:00 2001
From: leejet <leejet714@gmail.com>
Date: Thu, 22 Aug 2024 02:12:21 +0800
Subject: [PATCH] add k quants support

---
 examples/cli/main.cpp | 8 +++++++-
 ggml_extend.hpp       | 3 +++
 model.cpp             | 2 +-
 3 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/examples/cli/main.cpp b/examples/cli/main.cpp
index bb773da..a132f26 100644
--- a/examples/cli/main.cpp
+++ b/examples/cli/main.cpp
@@ -327,8 +327,14 @@ void parse_args(int argc, const char** argv, SDParams& params) {
                 params.wtype = SD_TYPE_Q5_1;
             } else if (type == "q8_0") {
                 params.wtype = SD_TYPE_Q8_0;
+            } else if (type == "q2_k") {
+                params.wtype = SD_TYPE_Q2_K;
+            } else if (type == "q3_k") {
+                params.wtype = SD_TYPE_Q3_K;
+            } else if (type == "q4_k") {
+                params.wtype = SD_TYPE_Q4_K;
             } else {
-                fprintf(stderr, "error: invalid weight format %s, must be one of [f32, f16, q4_0, q4_1, q5_0, q5_1, q8_0]\n",
+                fprintf(stderr, "error: invalid weight format %s, must be one of [f32, f16, q4_0, q4_1, q5_0, q5_1, q8_0, q2_k, q3_k, q4_k]\n",
                         type.c_str());
                 exit(1);
             }
diff --git a/ggml_extend.hpp b/ggml_extend.hpp
index 3ad9906..dcef98a 100644
--- a/ggml_extend.hpp
+++ b/ggml_extend.hpp
@@ -1183,6 +1183,9 @@ protected:
     bool bias;
 
     void init_params(struct ggml_context* ctx, ggml_type wtype) {
+        if (in_features % ggml_blck_size(wtype) != 0) {
+            wtype = GGML_TYPE_F32;
+        }
         params["weight"] = ggml_new_tensor_2d(ctx, wtype, in_features, out_features);
         if (bias) {
             params["bias"] = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, out_features);
diff --git a/model.cpp b/model.cpp
index c372b91..7502607 100644
--- a/model.cpp
+++ b/model.cpp
@@ -1672,7 +1672,7 @@ bool ModelLoader::save_to_gguf_file(const std::string& file_path, ggml_type type
 
         ggml_type tensor_type = tensor_storage.type;
         if (type != GGML_TYPE_COUNT) {
-            if (ggml_is_quantized(type) && tensor_storage.ne[0] % 32 != 0) {
+            if (ggml_is_quantized(type) && tensor_storage.ne[0] % ggml_blck_size(type) != 0) {
                 tensor_type = GGML_TYPE_F16;
             } else {
                 tensor_type = type;