From bd1eaef93eed70ab227997c3771d9a814d27208c Mon Sep 17 00:00:00 2001 From: leejet Date: Thu, 24 Jul 2025 00:59:38 +0800 Subject: [PATCH] fix: convert f64 to f32 and i64 to i32 when loading weights --- model.cpp | 54 +++++++++++++++++++++++++++++++++++++++++++++++++----- model.h | 8 ++++++++ 2 files changed, 57 insertions(+), 5 deletions(-) diff --git a/model.cpp b/model.cpp index 2e40e00..9529cc5 100644 --- a/model.cpp +++ b/model.cpp @@ -815,6 +815,7 @@ void f8_e4m3_to_f16_vec(uint8_t* src, uint16_t* dst, int64_t n) { dst[i] = f8_e4m3_to_f16(src[i]); } } + void f8_e5m2_to_f16_vec(uint8_t* src, uint16_t* dst, int64_t n) { // support inplace op for (int64_t i = n - 1; i >= 0; i--) { @@ -822,6 +823,20 @@ void f8_e5m2_to_f16_vec(uint8_t* src, uint16_t* dst, int64_t n) { } } +void f64_to_f32_vec(double* src, float* dst, int64_t n) { + // support inplace op + for (int64_t i = 0; i < n; i++) { + dst[i] = (float)src[i]; + } +} + +void i64_to_i32_vec(int64_t* src, int32_t* dst, int64_t n) { + // support inplace op + for (int64_t i = 0; i < n; i++) { + dst[i] = (int32_t)src[i]; + } +} + void convert_tensor(void* src, ggml_type src_type, void* dst, @@ -1057,13 +1072,13 @@ ggml_type str_to_ggml_type(const std::string& dtype) { } else if (dtype == "F32") { ttype = GGML_TYPE_F32; } else if (dtype == "F64") { - ttype = GGML_TYPE_F64; + ttype = GGML_TYPE_F32; } else if (dtype == "F8_E4M3") { ttype = GGML_TYPE_F16; } else if (dtype == "F8_E5M2") { ttype = GGML_TYPE_F16; } else if (dtype == "I64") { - ttype = GGML_TYPE_I64; + ttype = GGML_TYPE_I32; } return ttype; } @@ -1185,6 +1200,14 @@ bool ModelLoader::init_from_safetensors_file(const std::string& file_path, const tensor_storage.is_f8_e5m2 = true; // f8 -> f16 GGML_ASSERT(tensor_storage.nbytes() == tensor_data_size * 2); + } else if (dtype == "F64") { + tensor_storage.is_f64 = true; + // f64 -> f32 + GGML_ASSERT(tensor_storage.nbytes() * 2 == tensor_data_size); + } else if (dtype == "I64") { + tensor_storage.is_i64 = true; + // i64 -> i32 + GGML_ASSERT(tensor_storage.nbytes() * 2 == tensor_data_size); } else { GGML_ASSERT(tensor_storage.nbytes() == tensor_data_size); } @@ -1945,7 +1968,12 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb, ggml_backend // for the CPU and Metal backend, we can copy directly into the tensor if (tensor_storage.type == dst_tensor->type) { GGML_ASSERT(ggml_nbytes(dst_tensor) == tensor_storage.nbytes()); - read_data(tensor_storage, (char*)dst_tensor->data, nbytes_to_read); + if (tensor_storage.is_f64 || tensor_storage.is_i64) { + read_buffer.resize(tensor_storage.nbytes_to_read()); + read_data(tensor_storage, (char*)read_buffer.data(), nbytes_to_read); + } else { + read_data(tensor_storage, (char*)dst_tensor->data, nbytes_to_read); + } if (tensor_storage.is_bf16) { // inplace op @@ -1956,9 +1984,13 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb, ggml_backend } else if (tensor_storage.is_f8_e5m2) { // inplace op f8_e5m2_to_f16_vec((uint8_t*)dst_tensor->data, (uint16_t*)dst_tensor->data, tensor_storage.nelements()); + } else if (tensor_storage.is_f64) { + f64_to_f32_vec((double*)read_buffer.data(), (float*)dst_tensor->data, tensor_storage.nelements()); + } else if (tensor_storage.is_i64) { + i64_to_i32_vec((int64_t*)read_buffer.data(), (int32_t*)dst_tensor->data, tensor_storage.nelements()); } } else { - read_buffer.resize(tensor_storage.nbytes()); + read_buffer.resize(std::max(tensor_storage.nbytes(), tensor_storage.nbytes_to_read())); read_data(tensor_storage, (char*)read_buffer.data(), nbytes_to_read); if (tensor_storage.is_bf16) { @@ -1970,13 +2002,19 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb, ggml_backend } else if (tensor_storage.is_f8_e5m2) { // inplace op f8_e5m2_to_f16_vec((uint8_t*)read_buffer.data(), (uint16_t*)read_buffer.data(), tensor_storage.nelements()); + } else if (tensor_storage.is_f64) { + // inplace op + f64_to_f32_vec((double*)read_buffer.data(), (float*)read_buffer.data(), tensor_storage.nelements()); + } else if (tensor_storage.is_i64) { + // inplace op + i64_to_i32_vec((int64_t*)read_buffer.data(), (int32_t*)read_buffer.data(), tensor_storage.nelements()); } convert_tensor((void*)read_buffer.data(), tensor_storage.type, dst_tensor->data, dst_tensor->type, (int)tensor_storage.nelements() / (int)tensor_storage.ne[0], (int)tensor_storage.ne[0]); } } else { - read_buffer.resize(tensor_storage.nbytes()); + read_buffer.resize(std::max(tensor_storage.nbytes(), tensor_storage.nbytes_to_read())); read_data(tensor_storage, (char*)read_buffer.data(), nbytes_to_read); if (tensor_storage.is_bf16) { @@ -1988,6 +2026,12 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb, ggml_backend } else if (tensor_storage.is_f8_e5m2) { // inplace op f8_e5m2_to_f16_vec((uint8_t*)read_buffer.data(), (uint16_t*)read_buffer.data(), tensor_storage.nelements()); + } else if (tensor_storage.is_f64) { + // inplace op + f64_to_f32_vec((double*)read_buffer.data(), (float*)read_buffer.data(), tensor_storage.nelements()); + } else if (tensor_storage.is_i64) { + // inplace op + i64_to_i32_vec((int64_t*)read_buffer.data(), (int32_t*)read_buffer.data(), tensor_storage.nelements()); } if (tensor_storage.type == dst_tensor->type) { diff --git a/model.h b/model.h index a626603..ea71610 100644 --- a/model.h +++ b/model.h @@ -102,6 +102,8 @@ struct TensorStorage { bool is_bf16 = false; bool is_f8_e4m3 = false; bool is_f8_e5m2 = false; + bool is_f64 = false; + bool is_i64 = false; int64_t ne[SD_MAX_DIMS] = {1, 1, 1, 1, 1}; int n_dims = 0; @@ -133,6 +135,8 @@ struct TensorStorage { int64_t nbytes_to_read() const { if (is_bf16 || is_f8_e4m3 || is_f8_e5m2) { return nbytes() / 2; + } else if (is_f64 || is_i64) { + return nbytes() * 2; } else { return nbytes(); } @@ -183,6 +187,10 @@ struct TensorStorage { type_name = "f8_e4m3"; } else if (is_f8_e5m2) { type_name = "f8_e5m2"; + } else if (is_f64) { + type_name = "f64"; + } else if (is_i64) { + type_name = "i64"; } ss << name << " | " << type_name << " | "; ss << n_dims << " [";