From e91ce4f103ce7106503c3386a62288298e792362 Mon Sep 17 00:00:00 2001 From: leejet Date: Thu, 22 Aug 2024 02:12:21 +0800 Subject: [PATCH] add k quants support --- examples/cli/main.cpp | 8 +++++++- ggml_extend.hpp | 3 +++ model.cpp | 2 +- 3 files changed, 11 insertions(+), 2 deletions(-) diff --git a/examples/cli/main.cpp b/examples/cli/main.cpp index bb773da..a132f26 100644 --- a/examples/cli/main.cpp +++ b/examples/cli/main.cpp @@ -327,8 +327,14 @@ void parse_args(int argc, const char** argv, SDParams& params) { params.wtype = SD_TYPE_Q5_1; } else if (type == "q8_0") { params.wtype = SD_TYPE_Q8_0; + } else if (type == "q2_k") { + params.wtype = SD_TYPE_Q2_K; + } else if (type == "q3_k") { + params.wtype = SD_TYPE_Q3_K; + } else if (type == "q4_k") { + params.wtype = SD_TYPE_Q4_K; } else { - fprintf(stderr, "error: invalid weight format %s, must be one of [f32, f16, q4_0, q4_1, q5_0, q5_1, q8_0]\n", + fprintf(stderr, "error: invalid weight format %s, must be one of [f32, f16, q4_0, q4_1, q5_0, q5_1, q8_0, q2_k, q3_k, q4_k]\n", type.c_str()); exit(1); } diff --git a/ggml_extend.hpp b/ggml_extend.hpp index 3ad9906..dcef98a 100644 --- a/ggml_extend.hpp +++ b/ggml_extend.hpp @@ -1183,6 +1183,9 @@ protected: bool bias; void init_params(struct ggml_context* ctx, ggml_type wtype) { + if (in_features % ggml_blck_size(wtype) != 0) { + wtype = GGML_TYPE_F32; + } params["weight"] = ggml_new_tensor_2d(ctx, wtype, in_features, out_features); if (bias) { params["bias"] = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, out_features); diff --git a/model.cpp b/model.cpp index c372b91..7502607 100644 --- a/model.cpp +++ b/model.cpp @@ -1672,7 +1672,7 @@ bool ModelLoader::save_to_gguf_file(const std::string& file_path, ggml_type type ggml_type tensor_type = tensor_storage.type; if (type != GGML_TYPE_COUNT) { - if (ggml_is_quantized(type) && tensor_storage.ne[0] % 32 != 0) { + if (ggml_is_quantized(type) && tensor_storage.ne[0] % ggml_blck_size(type) != 0) { tensor_type = GGML_TYPE_F16; } else { tensor_type = type;