From 567f9f14f0f61c4ca436f45bd86c47d260586448 Mon Sep 17 00:00:00 2001
From: leejet <leejet714@gmail.com>
Date: Thu, 18 Sep 2025 00:00:15 +0800
Subject: [PATCH 1/3] fix: avoid multithreading issues in the model loader

---
 model.cpp | 2 ++
 pmid.hpp  | 4 +++-
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/model.cpp b/model.cpp
index 168b675..0585e98 100644
--- a/model.cpp
+++ b/model.cpp
@@ -2427,6 +2427,7 @@ bool ModelLoader::save_to_gguf_file(const std::string& file_path, ggml_type type
 
     auto tensor_type_rules = parse_tensor_type_rules(tensor_type_rules_str);
 
+    std::mutex tensor_mutex;
     auto on_new_tensor_cb = [&](const TensorStorage& tensor_storage, ggml_tensor** dst_tensor) -> bool {
         const std::string& name = tensor_storage.name;
         ggml_type tensor_type   = tensor_storage.type;
@@ -2444,6 +2445,7 @@ bool ModelLoader::save_to_gguf_file(const std::string& file_path, ggml_type type
             tensor_type = dst_type;
         }
 
+        std::lock_guard<std::mutex> lock(tensor_mutex);
         ggml_tensor* tensor = ggml_new_tensor(ggml_ctx, tensor_type, tensor_storage.n_dims, tensor_storage.ne);
         if (tensor == NULL) {
             LOG_ERROR("ggml_new_tensor failed");
diff --git a/pmid.hpp b/pmid.hpp
index d7daa41..3bd59cd 100644
--- a/pmid.hpp
+++ b/pmid.hpp
@@ -599,7 +599,8 @@ struct PhotoMakerIDEmbed : public GGMLRunner {
             return false;
         }
 
-        bool dry_run          = true;
+        bool dry_run = true;
+        std::mutex tensor_mutex;
         auto on_new_tensor_cb = [&](const TensorStorage& tensor_storage, ggml_tensor** dst_tensor) -> bool {
             const std::string& name = tensor_storage.name;
 
@@ -608,6 +609,7 @@ struct PhotoMakerIDEmbed : public GGMLRunner {
                 return true;
             }
             if (dry_run) {
+                std::lock_guard<std::mutex> lock(tensor_mutex);
                 struct ggml_tensor* real = ggml_new_tensor(params_ctx,
                                                            tensor_storage.type,
                                                            tensor_storage.n_dims,

From 171b2222a5491e7e82c0ef04bf370b2a78a3d44c Mon Sep 17 00:00:00 2001
From: Wagner Bruna <wbruna@users.noreply.github.com>
Date: Wed, 17 Sep 2025 13:11:38 -0300
Subject: [PATCH 2/3] fix: avoid segfault for pix2pix models without reference
 images (#766)

* fix: avoid segfault for pix2pix models with no reference images

* fix: default to empty reference on pix2pix models to avoid segfault

* use resize instead of reserve

* format code

---------

Co-authored-by: leejet <leejet714@gmail.com>
---
 stable-diffusion.cpp | 52 ++++++++++++++++++++++++++++++--------------
 1 file changed, 36 insertions(+), 16 deletions(-)

diff --git a/stable-diffusion.cpp b/stable-diffusion.cpp
index c35268b..e4102e6 100644
--- a/stable-diffusion.cpp
+++ b/stable-diffusion.cpp
@@ -443,6 +443,10 @@ public:
             diffusion_model->alloc_params_buffer();
             diffusion_model->get_param_tensors(tensors);
 
+            if (sd_version_is_unet_edit(version)) {
+                vae_decode_only = false;
+            }
+
             if (high_noise_diffusion_model) {
                 high_noise_diffusion_model->alloc_params_buffer();
                 high_noise_diffusion_model->get_param_tensors(tensors);
@@ -748,15 +752,15 @@ public:
                 denoiser->scheduler->version = version;
                 break;
             case SGM_UNIFORM:
-                    LOG_INFO("Running with SGM Uniform schedule");
-                    denoiser->scheduler          = std::make_shared<SGMUniformSchedule>();
-                    denoiser->scheduler->version = version;
-                    break;
+                LOG_INFO("Running with SGM Uniform schedule");
+                denoiser->scheduler          = std::make_shared<SGMUniformSchedule>();
+                denoiser->scheduler->version = version;
+                break;
             case SIMPLE:
-                    LOG_INFO("Running with Simple schedule");
-                    denoiser->scheduler          = std::make_shared<SimpleSchedule>();
-                    denoiser->scheduler->version = version;
-                    break;
+                LOG_INFO("Running with Simple schedule");
+                denoiser->scheduler          = std::make_shared<SimpleSchedule>();
+                denoiser->scheduler->version = version;
+                break;
             case SMOOTHSTEP:
                 LOG_INFO("Running with SmoothStep scheduler");
                 denoiser->scheduler = std::make_shared<SmoothStepSchedule>();
@@ -1053,7 +1057,7 @@ public:
                         ggml_tensor* denoise_mask             = NULL,
                         ggml_tensor* vace_context             = NULL,
                         float vace_strength                   = 1.f) {
-         if (shifted_timestep > 0 && !sd_version_is_sdxl(version)) {
+        if (shifted_timestep > 0 && !sd_version_is_sdxl(version)) {
             LOG_WARN("timestep shifting is only supported for SDXL models!");
             shifted_timestep = 0;
         }
@@ -1127,7 +1131,7 @@ public:
             } else {
                 timesteps_vec.assign(1, t);
             }
-            
+
             timesteps_vec  = process_timesteps(timesteps_vec, init_latent, denoise_mask);
             auto timesteps = vector_to_ggml_tensor(work_ctx, timesteps_vec);
             std::vector<float> guidance_vec(1, guidance.distilled_guidance);
@@ -2387,19 +2391,35 @@ sd_image_t* generate_image(sd_ctx_t* sd_ctx, const sd_img_gen_params_t* sd_img_g
         init_latent = generate_init_latent(sd_ctx, work_ctx, width, height);
     }
 
-    if (sd_img_gen_params->ref_images_count > 0) {
+    sd_guidance_params_t guidance = sd_img_gen_params->sample_params.guidance;
+    std::vector<sd_image_t*> ref_images;
+    for (int i = 0; i < sd_img_gen_params->ref_images_count; i++) {
+        ref_images.push_back(&sd_img_gen_params->ref_images[i]);
+    }
+
+    std::vector<uint8_t> empty_image_data;
+    sd_image_t empty_image = {(uint32_t)width, (uint32_t)height, 3, nullptr};
+    if (ref_images.empty() && sd_version_is_unet_edit(sd_ctx->sd->version)) {
+        LOG_WARN("This model needs at least one reference image; using an empty reference");
+        empty_image_data.resize(width * height * 3);
+        ref_images.push_back(&empty_image);
+        empty_image.data = empty_image_data.data();
+        guidance.img_cfg = 0.f;
+    }
+
+    if (ref_images.size() > 0) {
         LOG_INFO("EDIT mode");
     }
 
     std::vector<ggml_tensor*> ref_latents;
-    for (int i = 0; i < sd_img_gen_params->ref_images_count; i++) {
+    for (int i = 0; i < ref_images.size(); i++) {
         ggml_tensor* img = ggml_new_tensor_4d(work_ctx,
                                               GGML_TYPE_F32,
-                                              sd_img_gen_params->ref_images[i].width,
-                                              sd_img_gen_params->ref_images[i].height,
+                                              ref_images[i]->width,
+                                              ref_images[i]->height,
                                               3,
                                               1);
-        sd_image_to_tensor(sd_img_gen_params->ref_images[i], img);
+        sd_image_to_tensor(*ref_images[i], img);
 
         ggml_tensor* latent = NULL;
         if (sd_ctx->sd->use_tiny_autoencoder) {
@@ -2437,7 +2457,7 @@ sd_image_t* generate_image(sd_ctx_t* sd_ctx, const sd_img_gen_params_t* sd_img_g
                                                         SAFE_STR(sd_img_gen_params->prompt),
                                                         SAFE_STR(sd_img_gen_params->negative_prompt),
                                                         sd_img_gen_params->clip_skip,
-                                                        sd_img_gen_params->sample_params.guidance,
+                                                        guidance,
                                                         sd_img_gen_params->sample_params.eta,
                                                         sd_img_gen_params->sample_params.shifted_timestep,
                                                         width,

From fd693ac6a2ab12cfe8726e85d11f6ec1f6ec70ef Mon Sep 17 00:00:00 2001
From: leejet <leejet714@gmail.com>
Date: Thu, 18 Sep 2025 00:12:53 +0800
Subject: [PATCH 3/3] refactor: remove unused --normalize-input parameter
 (#835)

---
 README.md             | 1 -
 examples/cli/main.cpp | 5 -----
 stable-diffusion.cpp  | 5 -----
 stable-diffusion.h    | 1 -
 4 files changed, 12 deletions(-)

diff --git a/README.md b/README.md
index 41c7ba6..62b5979 100644
--- a/README.md
+++ b/README.md
@@ -384,7 +384,6 @@ arguments:
   --pm-id-images-dir [DIR]           path to PHOTOMAKER input id images dir
   --pm-id-embed-path [PATH]          path to PHOTOMAKER v2 id embed
   --pm-style-strength                strength for keeping PHOTOMAKER input identity (default: 20)
-  --normalize-input                  normalize PHOTOMAKER input id images
   -v, --verbose                      print extra info
 ```
 
diff --git a/examples/cli/main.cpp b/examples/cli/main.cpp
index 274a25a..02f4767 100644
--- a/examples/cli/main.cpp
+++ b/examples/cli/main.cpp
@@ -103,7 +103,6 @@ struct SDParams {
     bool verbose               = false;
     bool offload_params_to_cpu = false;
     bool control_net_cpu       = false;
-    bool normalize_input       = false;
     bool clip_on_cpu           = false;
     bool vae_on_cpu            = false;
     bool diffusion_flash_attn  = false;
@@ -156,7 +155,6 @@ void print_params(SDParams params) {
     printf("    pm_id_images_dir:                  %s\n", params.pm_id_images_dir.c_str());
     printf("    pm_id_embed_path:                  %s\n", params.pm_id_embed_path.c_str());
     printf("    pm_style_strength:                 %.2f\n", params.pm_style_strength);
-    printf("    normalize input image:             %s\n", params.normalize_input ? "true" : "false");
     printf("    output_path:                       %s\n", params.output_path.c_str());
     printf("    init_image_path:                   %s\n", params.init_image_path.c_str());
     printf("    end_image_path:                    %s\n", params.end_image_path.c_str());
@@ -306,7 +304,6 @@ void print_usage(int argc, const char* argv[]) {
     printf("  --pm-id-images-dir [DIR]           path to PHOTOMAKER input id images dir\n");
     printf("  --pm-id-embed-path [PATH]          path to PHOTOMAKER v2 id embed\n");
     printf("  --pm-style-strength                strength for keeping PHOTOMAKER input identity (default: 20)\n");
-    printf("  --normalize-input                  normalize PHOTOMAKER input id images\n");
     printf("  -v, --verbose                      print extra info\n");
 }
 
@@ -552,7 +549,6 @@ void parse_args(int argc, const char** argv, SDParams& params) {
         {"", "--vae-tiling", "", true, &params.vae_tiling_params.enabled},
         {"", "--offload-to-cpu", "", true, &params.offload_params_to_cpu},
         {"", "--control-net-cpu", "", true, &params.control_net_cpu},
-        {"", "--normalize-input", "", true, &params.normalize_input},
         {"", "--clip-on-cpu", "", true, &params.clip_on_cpu},
         {"", "--vae-on-cpu", "", true, &params.vae_on_cpu},
         {"", "--diffusion-fa", "", true, &params.diffusion_flash_attn},
@@ -1379,7 +1375,6 @@ int main(int argc, const char* argv[]) {
             params.batch_count,
             control_image,
             params.control_strength,
-            params.normalize_input,
             {
                 pmid_images.data(),
                 (int)pmid_images.size(),
diff --git a/stable-diffusion.cpp b/stable-diffusion.cpp
index e4102e6..ff064bb 100644
--- a/stable-diffusion.cpp
+++ b/stable-diffusion.cpp
@@ -1794,7 +1794,6 @@ void sd_img_gen_params_init(sd_img_gen_params_t* sd_img_gen_params) {
     sd_img_gen_params->seed              = -1;
     sd_img_gen_params->batch_count       = 1;
     sd_img_gen_params->control_strength  = 0.9f;
-    sd_img_gen_params->normalize_input   = false;
     sd_img_gen_params->pm_params         = {nullptr, 0, nullptr, 20.f};
     sd_img_gen_params->vae_tiling_params = {false, 0, 0, 0.5f, 0.0f, 0.0f};
 }
@@ -1820,7 +1819,6 @@ char* sd_img_gen_params_to_str(const sd_img_gen_params_t* sd_img_gen_params) {
              "ref_images_count: %d\n"
              "increase_ref_index: %s\n"
              "control_strength: %.2f\n"
-             "normalize_input: %s\n"
              "photo maker: {style_strength = %.2f, id_images_count = %d, id_embed_path = %s}\n"
              "VAE tiling: %s\n",
              SAFE_STR(sd_img_gen_params->prompt),
@@ -1835,7 +1833,6 @@ char* sd_img_gen_params_to_str(const sd_img_gen_params_t* sd_img_gen_params) {
              sd_img_gen_params->ref_images_count,
              BOOL_STR(sd_img_gen_params->increase_ref_index),
              sd_img_gen_params->control_strength,
-             BOOL_STR(sd_img_gen_params->normalize_input),
              sd_img_gen_params->pm_params.style_strength,
              sd_img_gen_params->pm_params.id_images_count,
              SAFE_STR(sd_img_gen_params->pm_params.id_embed_path),
@@ -1919,7 +1916,6 @@ sd_image_t* generate_image_internal(sd_ctx_t* sd_ctx,
                                     int batch_count,
                                     sd_image_t control_image,
                                     float control_strength,
-                                    bool normalize_input,
                                     sd_pm_params_t pm_params,
                                     std::vector<ggml_tensor*> ref_latents,
                                     bool increase_ref_index,
@@ -2468,7 +2464,6 @@ sd_image_t* generate_image(sd_ctx_t* sd_ctx, const sd_img_gen_params_t* sd_img_g
                                                         sd_img_gen_params->batch_count,
                                                         sd_img_gen_params->control_image,
                                                         sd_img_gen_params->control_strength,
-                                                        sd_img_gen_params->normalize_input,
                                                         sd_img_gen_params->pm_params,
                                                         ref_latents,
                                                         sd_img_gen_params->increase_ref_index,
diff --git a/stable-diffusion.h b/stable-diffusion.h
index 80f1f6e..7efbce5 100644
--- a/stable-diffusion.h
+++ b/stable-diffusion.h
@@ -212,7 +212,6 @@ typedef struct {
     int batch_count;
     sd_image_t control_image;
     float control_strength;
-    bool normalize_input;
     sd_pm_params_t pm_params;
     sd_tiling_params_t vae_tiling_params;
 } sd_img_gen_params_t;