add k quants support

2025-12-13 05:48:56 +00:00 · 2024-08-22 02:12:21 +08:00 · 2024-08-22 02:12:21 +08:00 · e91ce4f103
commit e91ce4f103
parent 77ca8e3f48
3 changed files with 11 additions and 2 deletions
--- a/examples/cli/main.cpp
+++ b/examples/cli/main.cpp
@ -327,8 +327,14 @@ void parse_args(int argc, const char** argv, SDParams& params) {
                params.wtype = SD_TYPE_Q5_1;
            } else if (type == "q8_0") {
                params.wtype = SD_TYPE_Q8_0;
            } else if (type == "q2_k") {
                params.wtype = SD_TYPE_Q2_K;
            } else if (type == "q3_k") {
                params.wtype = SD_TYPE_Q3_K;
            } else if (type == "q4_k") {
                params.wtype = SD_TYPE_Q4_K;
            } else {
-                fprintf(stderr, "error: invalid weight format %s, must be one of [f32, f16, q4_0, q4_1, q5_0, q5_1, q8_0]\n",
+                fprintf(stderr, "error: invalid weight format %s, must be one of [f32, f16, q4_0, q4_1, q5_0, q5_1, q8_0, q2_k, q3_k, q4_k]\n",
                        type.c_str());
                exit(1);
            }
--- a/ggml_extend.hpp
+++ b/ggml_extend.hpp
@ -1183,6 +1183,9 @@ protected:
    bool bias;
    void init_params(struct ggml_context* ctx, ggml_type wtype) {
        if (in_features % ggml_blck_size(wtype) != 0) {
            wtype = GGML_TYPE_F32;
        }
        params["weight"] = ggml_new_tensor_2d(ctx, wtype, in_features, out_features);
        if (bias) {
            params["bias"] = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, out_features);
--- a/model.cpp
+++ b/model.cpp
@ -1672,7 +1672,7 @@ bool ModelLoader::save_to_gguf_file(const std::string& file_path, ggml_type type
        ggml_type tensor_type = tensor_storage.type;
        if (type != GGML_TYPE_COUNT) {
-            if (ggml_is_quantized(type) && tensor_storage.ne[0] % 32 != 0) {
+            if (ggml_is_quantized(type) && tensor_storage.ne[0] % ggml_blck_size(type) != 0) {
                tensor_type = GGML_TYPE_F16;
            } else {
                tensor_type = type;