add k quants support

This commit is contained in:
leejet 2024-08-22 02:12:21 +08:00
parent 77ca8e3f48
commit e91ce4f103
3 changed files with 11 additions and 2 deletions

View File

@ -327,8 +327,14 @@ void parse_args(int argc, const char** argv, SDParams& params) {
params.wtype = SD_TYPE_Q5_1; params.wtype = SD_TYPE_Q5_1;
} else if (type == "q8_0") { } else if (type == "q8_0") {
params.wtype = SD_TYPE_Q8_0; params.wtype = SD_TYPE_Q8_0;
} else if (type == "q2_k") {
params.wtype = SD_TYPE_Q2_K;
} else if (type == "q3_k") {
params.wtype = SD_TYPE_Q3_K;
} else if (type == "q4_k") {
params.wtype = SD_TYPE_Q4_K;
} else { } else {
fprintf(stderr, "error: invalid weight format %s, must be one of [f32, f16, q4_0, q4_1, q5_0, q5_1, q8_0]\n", fprintf(stderr, "error: invalid weight format %s, must be one of [f32, f16, q4_0, q4_1, q5_0, q5_1, q8_0, q2_k, q3_k, q4_k]\n",
type.c_str()); type.c_str());
exit(1); exit(1);
} }

View File

@ -1183,6 +1183,9 @@ protected:
bool bias; bool bias;
void init_params(struct ggml_context* ctx, ggml_type wtype) { void init_params(struct ggml_context* ctx, ggml_type wtype) {
if (in_features % ggml_blck_size(wtype) != 0) {
wtype = GGML_TYPE_F32;
}
params["weight"] = ggml_new_tensor_2d(ctx, wtype, in_features, out_features); params["weight"] = ggml_new_tensor_2d(ctx, wtype, in_features, out_features);
if (bias) { if (bias) {
params["bias"] = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, out_features); params["bias"] = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, out_features);

View File

@ -1672,7 +1672,7 @@ bool ModelLoader::save_to_gguf_file(const std::string& file_path, ggml_type type
ggml_type tensor_type = tensor_storage.type; ggml_type tensor_type = tensor_storage.type;
if (type != GGML_TYPE_COUNT) { if (type != GGML_TYPE_COUNT) {
if (ggml_is_quantized(type) && tensor_storage.ne[0] % 32 != 0) { if (ggml_is_quantized(type) && tensor_storage.ne[0] % ggml_blck_size(type) != 0) {
tensor_type = GGML_TYPE_F16; tensor_type = GGML_TYPE_F16;
} else { } else {
tensor_type = type; tensor_type = type;