mirror of
https://github.com/leejet/stable-diffusion.cpp.git
synced 2025-12-13 05:48:56 +00:00
add k quants support
This commit is contained in:
parent
77ca8e3f48
commit
e91ce4f103
@ -327,8 +327,14 @@ void parse_args(int argc, const char** argv, SDParams& params) {
|
|||||||
params.wtype = SD_TYPE_Q5_1;
|
params.wtype = SD_TYPE_Q5_1;
|
||||||
} else if (type == "q8_0") {
|
} else if (type == "q8_0") {
|
||||||
params.wtype = SD_TYPE_Q8_0;
|
params.wtype = SD_TYPE_Q8_0;
|
||||||
|
} else if (type == "q2_k") {
|
||||||
|
params.wtype = SD_TYPE_Q2_K;
|
||||||
|
} else if (type == "q3_k") {
|
||||||
|
params.wtype = SD_TYPE_Q3_K;
|
||||||
|
} else if (type == "q4_k") {
|
||||||
|
params.wtype = SD_TYPE_Q4_K;
|
||||||
} else {
|
} else {
|
||||||
fprintf(stderr, "error: invalid weight format %s, must be one of [f32, f16, q4_0, q4_1, q5_0, q5_1, q8_0]\n",
|
fprintf(stderr, "error: invalid weight format %s, must be one of [f32, f16, q4_0, q4_1, q5_0, q5_1, q8_0, q2_k, q3_k, q4_k]\n",
|
||||||
type.c_str());
|
type.c_str());
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1183,6 +1183,9 @@ protected:
|
|||||||
bool bias;
|
bool bias;
|
||||||
|
|
||||||
void init_params(struct ggml_context* ctx, ggml_type wtype) {
|
void init_params(struct ggml_context* ctx, ggml_type wtype) {
|
||||||
|
if (in_features % ggml_blck_size(wtype) != 0) {
|
||||||
|
wtype = GGML_TYPE_F32;
|
||||||
|
}
|
||||||
params["weight"] = ggml_new_tensor_2d(ctx, wtype, in_features, out_features);
|
params["weight"] = ggml_new_tensor_2d(ctx, wtype, in_features, out_features);
|
||||||
if (bias) {
|
if (bias) {
|
||||||
params["bias"] = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, out_features);
|
params["bias"] = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, out_features);
|
||||||
|
|||||||
@ -1672,7 +1672,7 @@ bool ModelLoader::save_to_gguf_file(const std::string& file_path, ggml_type type
|
|||||||
|
|
||||||
ggml_type tensor_type = tensor_storage.type;
|
ggml_type tensor_type = tensor_storage.type;
|
||||||
if (type != GGML_TYPE_COUNT) {
|
if (type != GGML_TYPE_COUNT) {
|
||||||
if (ggml_is_quantized(type) && tensor_storage.ne[0] % 32 != 0) {
|
if (ggml_is_quantized(type) && tensor_storage.ne[0] % ggml_blck_size(type) != 0) {
|
||||||
tensor_type = GGML_TYPE_F16;
|
tensor_type = GGML_TYPE_F16;
|
||||||
} else {
|
} else {
|
||||||
tensor_type = type;
|
tensor_type = type;
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user