fix: prevent crash in case of a mem alloc error and graceful exit (#1566)

2026-06-09 15:56:39 +00:00 · 2026-05-30 12:34:07 +02:00 · 2026-05-30 12:34:07 +02:00 · b4ba55d8d7
commit b4ba55d8d7
parent b54bd83a3f
18 changed files with 156 additions and 48 deletions
--- a/src/conditioner.hpp
+++ b/src/conditioner.hpp
@ -113,7 +113,7 @@ struct Conditioner {
 public:
    virtual SDCondition get_learned_condition(int n_threads,
                                              const ConditionerParams& conditioner_params) = 0;
-    virtual void alloc_params_buffer()                                                     = 0;
+    virtual bool alloc_params_buffer()                                                     = 0;
    virtual void free_params_buffer()                                                      = 0;
    virtual void get_param_tensors(std::map<std::string, ggml_tensor*>& tensors)           = 0;
    virtual size_t get_params_buffer_size()                                                = 0;
@ -176,11 +176,16 @@ struct FrozenCLIPEmbedderWithCustomWords : public Conditioner {
        }
    }

-    void alloc_params_buffer() override {
-        text_model->alloc_params_buffer();
-        if (sd_version_is_sdxl(version)) {
-            text_model2->alloc_params_buffer();
+    bool alloc_params_buffer() override {
+        if (!text_model->alloc_params_buffer()) {
+            return false;
        }
+        if (sd_version_is_sdxl(version)) {
+            if (!text_model2->alloc_params_buffer()) {
+                return false;
+            }
+        }
+        return true;
    }

    void free_params_buffer() override {
@ -781,16 +786,23 @@ struct SD3CLIPEmbedder : public Conditioner {
        }
    }

-    void alloc_params_buffer() override {
+    bool alloc_params_buffer() override {
        if (clip_l) {
-            clip_l->alloc_params_buffer();
+            if (!clip_l->alloc_params_buffer()) {
+                return false;
+            }
        }
        if (clip_g) {
-            clip_g->alloc_params_buffer();
+            if (!clip_g->alloc_params_buffer()) {
+                return false;
+            }
        }
        if (t5) {
-            t5->alloc_params_buffer();
+            if (!t5->alloc_params_buffer()) {
+                return false;
+            }
        }
+        return true;
    }

    void free_params_buffer() override {
@ -1145,15 +1157,21 @@ struct FluxCLIPEmbedder : public Conditioner {
        }
    }

-    void alloc_params_buffer() override {
+    bool alloc_params_buffer() override {
        if (clip_l) {
-            clip_l->alloc_params_buffer();
+            if (!clip_l->alloc_params_buffer()) {
+                return false;
+            }
        }
        if (t5) {
-            t5->alloc_params_buffer();
+            if (!t5->alloc_params_buffer()) {
+                return false;
+            }
        }
+        return true;
    }

+
    void free_params_buffer() override {
        if (clip_l) {
            clip_l->free_params_buffer();
@ -1388,10 +1406,13 @@ struct T5CLIPEmbedder : public Conditioner {
        }
    }

-    void alloc_params_buffer() override {
+    bool alloc_params_buffer() override {
        if (t5) {
-            t5->alloc_params_buffer();
+            if (!t5->alloc_params_buffer()) {
+                return false;
+            }
        }
+        return true;
    }

    void free_params_buffer() override {
@ -1578,8 +1599,11 @@ struct AnimaConditioner : public Conditioner {
        llm->get_param_tensors(tensors, "text_encoders.llm");
    }

-    void alloc_params_buffer() override {
-        llm->alloc_params_buffer();
+    bool alloc_params_buffer() override {
+        if (!llm->alloc_params_buffer()) {
+                return false;
+            }
+        return true;
    }

    void free_params_buffer() override {
@ -1717,8 +1741,11 @@ struct LLMEmbedder : public Conditioner {
        llm->get_param_tensors(tensors, "text_encoders.llm");
    }

-    void alloc_params_buffer() override {
-        llm->alloc_params_buffer();
+    bool alloc_params_buffer() override {
+        if (!llm->alloc_params_buffer()) {
+                return false;
+        }
+        return true;
    }

    void free_params_buffer() override {
@ -2239,9 +2266,14 @@ struct LTXAVEmbedder : public Conditioner {
        projector->get_param_tensors(tensors, "text_embedding_projection");
    }

-    void alloc_params_buffer() override {
-        llm->alloc_params_buffer();
-        projector->alloc_params_buffer();
+    bool alloc_params_buffer() override {
+        if (!llm->alloc_params_buffer()) {
+                return false;
+        }
+        if (!projector->alloc_params_buffer()) {
+                return false;
+        }
+        return true;
    }

    void free_params_buffer() override {
--- a/src/control.hpp
+++ b/src/control.hpp
@ -457,7 +457,11 @@ struct ControlNet : public GGMLRunner {

    bool load_from_file(const std::string& file_path, int n_threads) {
        LOG_INFO("loading control net from '%s'", file_path.c_str());
-        alloc_params_buffer();
+        if (!alloc_params_buffer()) {
+            LOG_ERROR("control net model buffer allocation failed");
+            return false;
+        }
+
        std::map<std::string, ggml_tensor*> tensors;
        control_net.get_param_tensors(tensors);
        std::set<std::string> ignore_tensors;
--- a/src/esrgan.hpp
+++ b/src/esrgan.hpp
@ -270,7 +270,11 @@ struct ESRGAN : public GGMLRunner {
        rrdb_net = std::make_unique<RRDBNet>(detected_scale, detected_num_block, detected_num_in_ch, detected_num_out_ch, detected_num_feat, detected_num_grow_ch);
        rrdb_net->init(params_ctx, {}, "");

-        alloc_params_buffer();
+        if (!alloc_params_buffer()) {
+            LOG_ERROR("esrgan model buffer allocation failed");
+            return false;
+        }
+
        std::map<std::string, ggml_tensor*> esrgan_tensors;
        rrdb_net->get_param_tensors(esrgan_tensors);

--- a/src/flux.hpp
+++ b/src/flux.hpp
@ -1592,7 +1592,11 @@ namespace Flux {
                                                                            VERSION_FLUX2,
                                                                            false);

-            flux->alloc_params_buffer();
+            if (!flux->alloc_params_buffer()) {
+                LOG_ERROR("flux model allocation failed");
+                return;
+            }
+
            std::map<std::string, ggml_tensor*> tensors;
            flux->get_param_tensors(tensors, "model.diffusion_model");

--- a/src/hidream_o1.hpp
+++ b/src/hidream_o1.hpp
@ -492,8 +492,11 @@ namespace HiDreamO1 {
            vision_runner->get_param_tensors(tensors);
        }

-        void alloc_params_buffer() override {
-            vision_runner->alloc_params_buffer();
+        bool alloc_params_buffer() override {
+            if (!vision_runner->alloc_params_buffer()) {
+                return false;
+            }
+            return true;
        }

        void free_params_buffer() override {
--- a/src/llm.hpp
+++ b/src/llm.hpp
@ -1769,8 +1769,11 @@ namespace LLM {
            model.get_param_tensors(tensors, prefix);
        }

-        void alloc_params_buffer() {
-            model.alloc_params_buffer();
+        bool alloc_params_buffer() {
+            if (!model.alloc_params_buffer()) {
+                return false;
+            }
+            return true;
        }

        std::tuple<std::vector<int>, std::vector<float>> tokenize(std::string text,
@ -2012,7 +2015,11 @@ namespace LLM {
                                                                             "text_encoders.llm",
                                                                             true);

-            llm->alloc_params_buffer();
+            if (!llm->alloc_params_buffer()) {
+                LOG_ERROR("llm model allocation failed");
+                return;
+            }
+
            std::map<std::string, ggml_tensor*> tensors;
            llm->get_param_tensors(tensors, "text_encoders.llm");

--- a/src/lora.hpp
+++ b/src/lora.hpp
@ -86,7 +86,11 @@ struct LoraModel : public GGMLRunner {
            lora_tensors[name] = real;
        }

-        alloc_params_buffer();
+        if (!alloc_params_buffer()) {
+            LOG_ERROR("lora model buffer allocation failed");
+            return false;
+        }
+

        dry_run = false;
        model_loader.load_tensors(on_new_tensor_cb, n_threads);
--- a/src/ltx_audio_vae.h
+++ b/src/ltx_audio_vae.h
@ -1068,7 +1068,11 @@ namespace LTXV {
                                                                     tensor_storage_map,
                                                                     prefix);

-            ltx_audio_vae->alloc_params_buffer();
+            if (!ltx_audio_vae->alloc_params_buffer()) {
+               LOG_ERROR("ltx audio vae buffer allocation failed");
+               return;
+            }
+
            std::map<std::string, ggml_tensor*> tensors;
            ltx_audio_vae->get_param_tensors(tensors, "");

--- a/src/ltx_vae.hpp
+++ b/src/ltx_vae.hpp
@ -1534,7 +1534,11 @@ struct LTXVideoVAE : public VAE {
                                                                         true,
                                                                         VERSION_LTXAV);

-        vae->alloc_params_buffer();
+        if (!vae->alloc_params_buffer()) {
+            LOG_ERROR("vae buffer allocation failed");
+            return;
+        }
+
        std::map<std::string, ggml_tensor*> tensors;
        vae->get_param_tensors(tensors, "first_stage_model");

--- a/src/ltxv.hpp
+++ b/src/ltxv.hpp
@ -2017,7 +2017,10 @@ namespace LTXV {
                                                                               tensor_storage_map,
                                                                               "model.diffusion_model");

-            ltxav->alloc_params_buffer();
+            if (!ltxav->alloc_params_buffer()) {
+                LOG_ERROR("ltxav buffer allocation failed");
+                return;
+            }
            std::map<std::string, ggml_tensor*> tensors;
            ltxav->get_param_tensors(tensors, "model.diffusion_model");

--- a/src/mmdit.hpp
+++ b/src/mmdit.hpp
@ -953,7 +953,11 @@ struct MMDiTRunner : public DiffusionModelRunner {
        {
            LOG_INFO("loading from '%s'", file_path.c_str());

-            mmdit->alloc_params_buffer();
+            if (!mmdit->alloc_params_buffer()) {
+                LOG_ERROR("mmdit embeds buffer allocation failed");
+                return;
+            }
+
            std::map<std::string, ggml_tensor*> tensors;
            mmdit->get_param_tensors(tensors, "model.diffusion_model");

--- a/src/model.cpp
+++ b/src/model.cpp
@ -1004,6 +1004,12 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb, int n_thread
                        continue;
                    }

+                    if (dst_tensor->data == nullptr) {
+                        LOG_ERROR("process tensor data failed: '%s'", tensor_storage.name.c_str());
+                        failed = true;
+                        break;
+                    }
+
                    // skip mmapped tensors
                    if (dst_tensor->buffer != nullptr && dst_tensor->buffer == fdata.mmbuffer.get()) {
                        continue;
--- a/src/pmid.hpp
+++ b/src/pmid.hpp
@ -615,7 +615,10 @@ struct PhotoMakerIDEmbed : public GGMLRunner {
        };

        model_loader->load_tensors(on_new_tensor_cb, n_threads);
-        alloc_params_buffer();
+        if (!alloc_params_buffer()) {
+            LOG_ERROR("PhotoMaker ID embeds buffer allocation failed");
+            return false;
+        }

        dry_run = false;
        model_loader->load_tensors(on_new_tensor_cb, n_threads);
--- a/src/qwen_image.hpp
+++ b/src/qwen_image.hpp
@ -705,7 +705,11 @@ namespace Qwen {
                                                                                            "model.diffusion_model",
                                                                                            VERSION_QWEN_IMAGE);

-            qwen_image->alloc_params_buffer();
+            if (!qwen_image->alloc_params_buffer()) {
+                LOG_ERROR("qwen_image buffer allocation failed");
+                return;
+            }
+
            std::map<std::string, ggml_tensor*> tensors;
            qwen_image->get_param_tensors(tensors, "model.diffusion_model");

--- a/src/stable-diffusion.cpp
+++ b/src/stable-diffusion.cpp
@ -984,14 +984,20 @@ public:
            ggml_free(ctx);
            return false;
        }
-        if (cond_stage_model) {
-            cond_stage_model->alloc_params_buffer();
+        if (cond_stage_model && !cond_stage_model->alloc_params_buffer()) {
+            LOG_ERROR("Conditioner model params buffer allocation failed");
+            ggml_free(ctx);
+            return false;
        }
-        if (diffusion_model) {
-            diffusion_model->alloc_params_buffer();
+        if (diffusion_model && !diffusion_model->alloc_params_buffer()) {
+            LOG_ERROR("Diffusion model params buffer allocation failed");
+            ggml_free(ctx);
+            return false;
        }
-        if (high_noise_diffusion_model) {
-            high_noise_diffusion_model->alloc_params_buffer();
+        if (high_noise_diffusion_model && !high_noise_diffusion_model->alloc_params_buffer()) {
+            LOG_ERROR("High noise diffusion model params buffer allocation failed");
+            ggml_free(ctx);
+            return false;
        }
        if (first_stage_model && !first_stage_model->alloc_params_buffer()) {
            LOG_ERROR("VAE params buffer allocation failed");
--- a/src/t5.hpp
+++ b/src/t5.hpp
@ -475,8 +475,11 @@ struct T5Embedder {
        model.get_param_tensors(tensors, prefix);
    }

-    void alloc_params_buffer() {
-        model.alloc_params_buffer();
+    bool alloc_params_buffer() {
+        if (!model.alloc_params_buffer()) {
+            return false;
+         }
+        return true;
    }

    std::tuple<std::vector<int>, std::vector<float>, std::vector<float>> tokenize(std::string text,
@ -578,7 +581,10 @@ struct T5Embedder {

        std::shared_ptr<T5Embedder> t5 = std::make_shared<T5Embedder>(backend, backend, tensor_storage_map, "", true);

-        t5->alloc_params_buffer();
+        if (!t5->alloc_params_buffer()) {
+            LOG_ERROR("t5 params buffer allocation failed");
+            return;
+        }
        std::map<std::string, ggml_tensor*> tensors;
        t5->get_param_tensors(tensors, "");

--- a/src/wan.hpp
+++ b/src/wan.hpp
@ -1334,7 +1334,10 @@ namespace WAN {
            {
                LOG_INFO("loading from '%s'", file_path.c_str());

-                vae->alloc_params_buffer();
+                if (!vae->alloc_params_buffer()) {
+                    LOG_ERROR("vae buffer allocation failed");
+                    return;
+                }
                std::map<std::string, ggml_tensor*> tensors;
                vae->get_param_tensors(tensors, "first_stage_model");

@ -2368,7 +2371,11 @@ namespace WAN {
                                                                         "model.diffusion_model",
                                                                         VERSION_WAN2_2_TI2V);

-            wan->alloc_params_buffer();
+            if (!wan->alloc_params_buffer()) {
+                LOG_ERROR("wan buffer allocation failed");
+                return;
+            }
+
            std::map<std::string, ggml_tensor*> tensors;
            wan->get_param_tensors(tensors, "model.diffusion_model");

--- a/src/z_image.hpp
+++ b/src/z_image.hpp
@ -639,7 +639,10 @@ namespace ZImage {
                                                                                   "model.diffusion_model",
                                                                                   VERSION_QWEN_IMAGE);

-            z_image->alloc_params_buffer();
+            if (!z_image->alloc_params_buffer()) {
+                LOG_ERROR("z_image buffer allocation failed");
+                return;
+            }
            std::map<std::string, ggml_tensor*> tensors;
            z_image->get_param_tensors(tensors, "model.diffusion_model");