fix: avoid potential dangling pointer problem

sync: update ggml
fix: fix race condition causing inconsistent value for decoder_only (#609 )
2025-12-13 05:48:56 +00:00 · 2025-03-01 16:58:26 +08:00 · 2025-03-01 12:09:55 +08:00 · 2025-03-01 11:49:06 +08:00 · 2025-03-01 11:48:04 +08:00 · 2025-03-01 11:45:39 +08:00
7 changed files with 84 additions and 51 deletions
--- a/clip.hpp
+++ b/clip.hpp
@ -546,7 +546,7 @@ protected:
    int64_t num_positions;

    void init_params(struct ggml_context* ctx, std::map<std::string, enum ggml_type>& tensor_types, const std::string prefix = "") {
-        enum ggml_type token_wtype    = (tensor_types.find(prefix + "token_embedding.weight") != tensor_types.end()) ? tensor_types[prefix + "token_embedding.weight"] : GGML_TYPE_F32;
+        enum ggml_type token_wtype    = GGML_TYPE_F32;  //(tensor_types.find(prefix + "token_embedding.weight") != tensor_types.end()) ? tensor_types[prefix + "token_embedding.weight"] : GGML_TYPE_F32;
        enum ggml_type position_wtype = GGML_TYPE_F32;  //(tensor_types.find(prefix + "position_embedding.weight") != tensor_types.end()) ? tensor_types[prefix + "position_embedding.weight"] : GGML_TYPE_F32;

        params["token_embedding.weight"]    = ggml_new_tensor_2d(ctx, token_wtype, embed_dim, vocab_size);
--- a/examples/cli/main.cpp
+++ b/examples/cli/main.cpp
@ -931,12 +931,12 @@ int main(int argc, const char* argv[]) {
        }
    }

+    std::vector<uint8_t> default_mask_image_vec(params.width * params.height, 255);
    if (params.mask_path != "") {
        int c             = 0;
        mask_image_buffer = stbi_load(params.mask_path.c_str(), &params.width, &params.height, &c, 1);
    } else {
-        std::vector<uint8_t> arr(params.width * params.height, 255);
-        mask_image_buffer = arr.data();
+        mask_image_buffer = default_mask_image_vec.data();
    }
    sd_image_t mask_image = {(uint32_t)params.width,
                             (uint32_t)params.height,
--- a/2
+++ b/2
@ -1 +1 @@
-Subproject commit 6fcbd60bc72ac3f7ad43f78c87e535f2e6206f58
+Subproject commit ff9052988b76e137bcf92bb335733933ca196ac0
--- a/model.cpp
+++ b/model.cpp
@ -558,6 +558,26 @@ std::string convert_tensor_name(std::string name) {
    return new_name;
 }

+void add_preprocess_tensor_storage_types(std::map<std::string, enum ggml_type>& tensor_storages_types, std::string name, enum ggml_type type) {
+    std::string new_name = convert_tensor_name(name);
+
+    if (new_name.find("cond_stage_model") != std::string::npos && ends_with(new_name, "attn.in_proj_weight")) {
+        size_t prefix_size                                        = new_name.find("attn.in_proj_weight");
+        std::string prefix                                        = new_name.substr(0, prefix_size);
+        tensor_storages_types[prefix + "self_attn.q_proj.weight"] = type;
+        tensor_storages_types[prefix + "self_attn.k_proj.weight"] = type;
+        tensor_storages_types[prefix + "self_attn.v_proj.weight"] = type;
+    } else if (new_name.find("cond_stage_model") != std::string::npos && ends_with(new_name, "attn.in_proj_bias")) {
+        size_t prefix_size                                      = new_name.find("attn.in_proj_bias");
+        std::string prefix                                      = new_name.substr(0, prefix_size);
+        tensor_storages_types[prefix + "self_attn.q_proj.bias"] = type;
+        tensor_storages_types[prefix + "self_attn.k_proj.bias"] = type;
+        tensor_storages_types[prefix + "self_attn.v_proj.bias"] = type;
+    } else {
+        tensor_storages_types[new_name] = type;
+    }
+}
+
 void preprocess_tensor(TensorStorage tensor_storage,
                       std::vector<TensorStorage>& processed_tensor_storages) {
    std::vector<TensorStorage> result;
@ -927,7 +947,7 @@ bool ModelLoader::init_from_gguf_file(const std::string& file_path, const std::s
        GGML_ASSERT(ggml_nbytes(dummy) == tensor_storage.nbytes());

        tensor_storages.push_back(tensor_storage);
-        tensor_storages_types[tensor_storage.name] = tensor_storage.type;
+        add_preprocess_tensor_storage_types(tensor_storages_types, tensor_storage.name, tensor_storage.type);
    }

    gguf_free(ctx_gguf_);
@ -1072,7 +1092,7 @@ bool ModelLoader::init_from_safetensors_file(const std::string& file_path, const
        }

        tensor_storages.push_back(tensor_storage);
-        tensor_storages_types[tensor_storage.name] = tensor_storage.type;
+        add_preprocess_tensor_storage_types(tensor_storages_types, tensor_storage.name, tensor_storage.type);

        // LOG_DEBUG("%s %s", tensor_storage.to_string().c_str(), dtype.c_str());
    }
@ -1403,7 +1423,7 @@ bool ModelLoader::parse_data_pkl(uint8_t* buffer,
                        // printf(" ZIP got tensor %s \n ", reader.tensor_storage.name.c_str());
                        reader.tensor_storage.name = prefix + reader.tensor_storage.name;
                        tensor_storages.push_back(reader.tensor_storage);
-                        tensor_storages_types[reader.tensor_storage.name] = reader.tensor_storage.type;
+                        add_preprocess_tensor_storage_types(tensor_storages_types, reader.tensor_storage.name, reader.tensor_storage.type);

                        // LOG_DEBUG("%s", reader.tensor_storage.name.c_str());
                        // reset
@ -1461,10 +1481,10 @@ SDVersion ModelLoader::get_sd_version() {
    TensorStorage token_embedding_weight, input_block_weight;
    bool input_block_checked = false;

-    bool has_multiple_encoders   = false;
-    bool is_unet = false;
+    bool has_multiple_encoders = false;
+    bool is_unet               = false;

-    bool is_xl = false;
+    bool is_xl   = false;
    bool is_flux = false;

 #define found_family (is_xl || is_flux)
@ -1481,7 +1501,7 @@ SDVersion ModelLoader::get_sd_version() {
            }
            if (tensor_storage.name.find("model.diffusion_model.input_blocks.") != std::string::npos) {
                is_unet = true;
-                if(has_multiple_encoders){
+                if (has_multiple_encoders) {
                    is_xl = true;
                    if (input_block_checked) {
                        break;
@ -1490,7 +1510,7 @@ SDVersion ModelLoader::get_sd_version() {
            }
            if (tensor_storage.name.find("conditioner.embedders.1") != std::string::npos || tensor_storage.name.find("cond_stage_model.1") != std::string::npos) {
                has_multiple_encoders = true;
-                if(is_unet){
+                if (is_unet) {
                    is_xl = true;
                    if (input_block_checked) {
                        break;
@ -1635,11 +1655,20 @@ ggml_type ModelLoader::get_vae_wtype() {
 void ModelLoader::set_wtype_override(ggml_type wtype, std::string prefix) {
    for (auto& pair : tensor_storages_types) {
        if (prefix.size() < 1 || pair.first.substr(0, prefix.size()) == prefix) {
+            bool found = false;
            for (auto& tensor_storage : tensor_storages) {
-                if (tensor_storage.name == pair.first) {
-                    if (tensor_should_be_converted(tensor_storage, wtype)) {
-                        pair.second = wtype;
+                std::map<std::string, ggml_type> temp;
+                add_preprocess_tensor_storage_types(temp, tensor_storage.name, tensor_storage.type);
+                for (auto& preprocessed_name : temp) {
+                    if (preprocessed_name.first == pair.first) {
+                        if (tensor_should_be_converted(tensor_storage, wtype)) {
+                            pair.second = wtype;
+                        }
+                        found = true;
+                        break;
                    }
+                }
+                if (found) {
                    break;
                }
            }
--- a/model.h
+++ b/model.h
@ -14,6 +14,7 @@
 #include "ggml.h"
 #include "json.hpp"
 #include "zip.h"
+#include "gguf.h"

 #define SD_MAX_DIMS 5

--- a/stable-diffusion.h
+++ b/stable-diffusion.h
@ -61,43 +61,46 @@ enum schedule_t {

 // same as enum ggml_type
 enum sd_type_t {
-    SD_TYPE_F32  = 0,
-    SD_TYPE_F16  = 1,
-    SD_TYPE_Q4_0 = 2,
-    SD_TYPE_Q4_1 = 3,
+    SD_TYPE_F32     = 0,
+    SD_TYPE_F16     = 1,
+    SD_TYPE_Q4_0    = 2,
+    SD_TYPE_Q4_1    = 3,
    // SD_TYPE_Q4_2 = 4, support has been removed
    // SD_TYPE_Q4_3 = 5, support has been removed
-    SD_TYPE_Q5_0     = 6,
-    SD_TYPE_Q5_1     = 7,
-    SD_TYPE_Q8_0     = 8,
-    SD_TYPE_Q8_1     = 9,
-    SD_TYPE_Q2_K     = 10,
-    SD_TYPE_Q3_K     = 11,
-    SD_TYPE_Q4_K     = 12,
-    SD_TYPE_Q5_K     = 13,
-    SD_TYPE_Q6_K     = 14,
-    SD_TYPE_Q8_K     = 15,
-    SD_TYPE_IQ2_XXS  = 16,
-    SD_TYPE_IQ2_XS   = 17,
-    SD_TYPE_IQ3_XXS  = 18,
-    SD_TYPE_IQ1_S    = 19,
-    SD_TYPE_IQ4_NL   = 20,
-    SD_TYPE_IQ3_S    = 21,
-    SD_TYPE_IQ2_S    = 22,
-    SD_TYPE_IQ4_XS   = 23,
-    SD_TYPE_I8       = 24,
-    SD_TYPE_I16      = 25,
-    SD_TYPE_I32      = 26,
-    SD_TYPE_I64      = 27,
-    SD_TYPE_F64      = 28,
-    SD_TYPE_IQ1_M    = 29,
-    SD_TYPE_BF16     = 30,
-    SD_TYPE_Q4_0_4_4 = 31,
-    SD_TYPE_Q4_0_4_8 = 32,
-    SD_TYPE_Q4_0_8_8 = 33,
-    SD_TYPE_TQ1_0    = 34,
-    SD_TYPE_TQ2_0    = 35,
-    SD_TYPE_COUNT,
+    SD_TYPE_Q5_0    = 6,
+    SD_TYPE_Q5_1    = 7,
+    SD_TYPE_Q8_0    = 8,
+    SD_TYPE_Q8_1    = 9,
+    SD_TYPE_Q2_K    = 10,
+    SD_TYPE_Q3_K    = 11,
+    SD_TYPE_Q4_K    = 12,
+    SD_TYPE_Q5_K    = 13,
+    SD_TYPE_Q6_K    = 14,
+    SD_TYPE_Q8_K    = 15,
+    SD_TYPE_IQ2_XXS = 16,
+    SD_TYPE_IQ2_XS  = 17,
+    SD_TYPE_IQ3_XXS = 18,
+    SD_TYPE_IQ1_S   = 19,
+    SD_TYPE_IQ4_NL  = 20,
+    SD_TYPE_IQ3_S   = 21,
+    SD_TYPE_IQ2_S   = 22,
+    SD_TYPE_IQ4_XS  = 23,
+    SD_TYPE_I8      = 24,
+    SD_TYPE_I16     = 25,
+    SD_TYPE_I32     = 26,
+    SD_TYPE_I64     = 27,
+    SD_TYPE_F64     = 28,
+    SD_TYPE_IQ1_M   = 29,
+    SD_TYPE_BF16    = 30,
+    // SD_TYPE_Q4_0_4_4 = 31, support has been removed from gguf files
+    // SD_TYPE_Q4_0_4_8 = 32,
+    // SD_TYPE_Q4_0_8_8 = 33,
+    SD_TYPE_TQ1_0   = 34,
+    SD_TYPE_TQ2_0   = 35,
+    // SD_TYPE_IQ4_NL_4_4 = 36,
+    // SD_TYPE_IQ4_NL_4_8 = 37,
+    // SD_TYPE_IQ4_NL_8_8 = 38,
+    SD_TYPE_COUNT   = 39,
 };

 SD_API const char* sd_type_name(enum sd_type_t type);
--- a/tae.hpp
+++ b/tae.hpp
@ -201,7 +201,7 @@ struct TinyAutoEncoder : public GGMLRunner {
                    bool decoder_only = true,
                    SDVersion version = VERSION_SD1)
        : decode_only(decoder_only),
-          taesd(decode_only, version),
+          taesd(decoder_only, version),
          GGMLRunner(backend) {
        taesd.init(params_ctx, tensor_types, prefix);
    }
Author	SHA1	Message	Date
leejet	30b3ac8e62	fix: avoid potential dangling pointer problem	2025-03-01 16:58:26 +08:00
leejet	195d170136	sync: update ggml	2025-03-01 12:09:55 +08:00
stduhpf	f50a7f66aa	fix: fix race condition causing inconsistent value for `decoder_only` (#609 )	2025-03-01 11:49:06 +08:00
stduhpf	85e9a12988	fix: preprocess tensor names in tensor types map (#607 ) Thank you for your contribution	2025-03-01 11:48:04 +08:00
stduhpf	fbd42b6fc1	fix: fix embeddings with quantized models (#601 )	2025-03-01 11:45:39 +08:00