From 567f9f14f0f61c4ca436f45bd86c47d260586448 Mon Sep 17 00:00:00 2001 From: leejet Date: Thu, 18 Sep 2025 00:00:15 +0800 Subject: [PATCH 1/3] fix: avoid multithreading issues in the model loader --- model.cpp | 2 ++ pmid.hpp | 4 +++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/model.cpp b/model.cpp index 168b675..0585e98 100644 --- a/model.cpp +++ b/model.cpp @@ -2427,6 +2427,7 @@ bool ModelLoader::save_to_gguf_file(const std::string& file_path, ggml_type type auto tensor_type_rules = parse_tensor_type_rules(tensor_type_rules_str); + std::mutex tensor_mutex; auto on_new_tensor_cb = [&](const TensorStorage& tensor_storage, ggml_tensor** dst_tensor) -> bool { const std::string& name = tensor_storage.name; ggml_type tensor_type = tensor_storage.type; @@ -2444,6 +2445,7 @@ bool ModelLoader::save_to_gguf_file(const std::string& file_path, ggml_type type tensor_type = dst_type; } + std::lock_guard lock(tensor_mutex); ggml_tensor* tensor = ggml_new_tensor(ggml_ctx, tensor_type, tensor_storage.n_dims, tensor_storage.ne); if (tensor == NULL) { LOG_ERROR("ggml_new_tensor failed"); diff --git a/pmid.hpp b/pmid.hpp index d7daa41..3bd59cd 100644 --- a/pmid.hpp +++ b/pmid.hpp @@ -599,7 +599,8 @@ struct PhotoMakerIDEmbed : public GGMLRunner { return false; } - bool dry_run = true; + bool dry_run = true; + std::mutex tensor_mutex; auto on_new_tensor_cb = [&](const TensorStorage& tensor_storage, ggml_tensor** dst_tensor) -> bool { const std::string& name = tensor_storage.name; @@ -608,6 +609,7 @@ struct PhotoMakerIDEmbed : public GGMLRunner { return true; } if (dry_run) { + std::lock_guard lock(tensor_mutex); struct ggml_tensor* real = ggml_new_tensor(params_ctx, tensor_storage.type, tensor_storage.n_dims, From 171b2222a5491e7e82c0ef04bf370b2a78a3d44c Mon Sep 17 00:00:00 2001 From: Wagner Bruna Date: Wed, 17 Sep 2025 13:11:38 -0300 Subject: [PATCH 2/3] fix: avoid segfault for pix2pix models without reference images (#766) * fix: avoid segfault for pix2pix models with no reference images * fix: default to empty reference on pix2pix models to avoid segfault * use resize instead of reserve * format code --------- Co-authored-by: leejet --- stable-diffusion.cpp | 52 ++++++++++++++++++++++++++++++-------------- 1 file changed, 36 insertions(+), 16 deletions(-) diff --git a/stable-diffusion.cpp b/stable-diffusion.cpp index c35268b..e4102e6 100644 --- a/stable-diffusion.cpp +++ b/stable-diffusion.cpp @@ -443,6 +443,10 @@ public: diffusion_model->alloc_params_buffer(); diffusion_model->get_param_tensors(tensors); + if (sd_version_is_unet_edit(version)) { + vae_decode_only = false; + } + if (high_noise_diffusion_model) { high_noise_diffusion_model->alloc_params_buffer(); high_noise_diffusion_model->get_param_tensors(tensors); @@ -748,15 +752,15 @@ public: denoiser->scheduler->version = version; break; case SGM_UNIFORM: - LOG_INFO("Running with SGM Uniform schedule"); - denoiser->scheduler = std::make_shared(); - denoiser->scheduler->version = version; - break; + LOG_INFO("Running with SGM Uniform schedule"); + denoiser->scheduler = std::make_shared(); + denoiser->scheduler->version = version; + break; case SIMPLE: - LOG_INFO("Running with Simple schedule"); - denoiser->scheduler = std::make_shared(); - denoiser->scheduler->version = version; - break; + LOG_INFO("Running with Simple schedule"); + denoiser->scheduler = std::make_shared(); + denoiser->scheduler->version = version; + break; case SMOOTHSTEP: LOG_INFO("Running with SmoothStep scheduler"); denoiser->scheduler = std::make_shared(); @@ -1053,7 +1057,7 @@ public: ggml_tensor* denoise_mask = NULL, ggml_tensor* vace_context = NULL, float vace_strength = 1.f) { - if (shifted_timestep > 0 && !sd_version_is_sdxl(version)) { + if (shifted_timestep > 0 && !sd_version_is_sdxl(version)) { LOG_WARN("timestep shifting is only supported for SDXL models!"); shifted_timestep = 0; } @@ -1127,7 +1131,7 @@ public: } else { timesteps_vec.assign(1, t); } - + timesteps_vec = process_timesteps(timesteps_vec, init_latent, denoise_mask); auto timesteps = vector_to_ggml_tensor(work_ctx, timesteps_vec); std::vector guidance_vec(1, guidance.distilled_guidance); @@ -2387,19 +2391,35 @@ sd_image_t* generate_image(sd_ctx_t* sd_ctx, const sd_img_gen_params_t* sd_img_g init_latent = generate_init_latent(sd_ctx, work_ctx, width, height); } - if (sd_img_gen_params->ref_images_count > 0) { + sd_guidance_params_t guidance = sd_img_gen_params->sample_params.guidance; + std::vector ref_images; + for (int i = 0; i < sd_img_gen_params->ref_images_count; i++) { + ref_images.push_back(&sd_img_gen_params->ref_images[i]); + } + + std::vector empty_image_data; + sd_image_t empty_image = {(uint32_t)width, (uint32_t)height, 3, nullptr}; + if (ref_images.empty() && sd_version_is_unet_edit(sd_ctx->sd->version)) { + LOG_WARN("This model needs at least one reference image; using an empty reference"); + empty_image_data.resize(width * height * 3); + ref_images.push_back(&empty_image); + empty_image.data = empty_image_data.data(); + guidance.img_cfg = 0.f; + } + + if (ref_images.size() > 0) { LOG_INFO("EDIT mode"); } std::vector ref_latents; - for (int i = 0; i < sd_img_gen_params->ref_images_count; i++) { + for (int i = 0; i < ref_images.size(); i++) { ggml_tensor* img = ggml_new_tensor_4d(work_ctx, GGML_TYPE_F32, - sd_img_gen_params->ref_images[i].width, - sd_img_gen_params->ref_images[i].height, + ref_images[i]->width, + ref_images[i]->height, 3, 1); - sd_image_to_tensor(sd_img_gen_params->ref_images[i], img); + sd_image_to_tensor(*ref_images[i], img); ggml_tensor* latent = NULL; if (sd_ctx->sd->use_tiny_autoencoder) { @@ -2437,7 +2457,7 @@ sd_image_t* generate_image(sd_ctx_t* sd_ctx, const sd_img_gen_params_t* sd_img_g SAFE_STR(sd_img_gen_params->prompt), SAFE_STR(sd_img_gen_params->negative_prompt), sd_img_gen_params->clip_skip, - sd_img_gen_params->sample_params.guidance, + guidance, sd_img_gen_params->sample_params.eta, sd_img_gen_params->sample_params.shifted_timestep, width, From fd693ac6a2ab12cfe8726e85d11f6ec1f6ec70ef Mon Sep 17 00:00:00 2001 From: leejet Date: Thu, 18 Sep 2025 00:12:53 +0800 Subject: [PATCH 3/3] refactor: remove unused --normalize-input parameter (#835) --- README.md | 1 - examples/cli/main.cpp | 5 ----- stable-diffusion.cpp | 5 ----- stable-diffusion.h | 1 - 4 files changed, 12 deletions(-) diff --git a/README.md b/README.md index 41c7ba6..62b5979 100644 --- a/README.md +++ b/README.md @@ -384,7 +384,6 @@ arguments: --pm-id-images-dir [DIR] path to PHOTOMAKER input id images dir --pm-id-embed-path [PATH] path to PHOTOMAKER v2 id embed --pm-style-strength strength for keeping PHOTOMAKER input identity (default: 20) - --normalize-input normalize PHOTOMAKER input id images -v, --verbose print extra info ``` diff --git a/examples/cli/main.cpp b/examples/cli/main.cpp index 274a25a..02f4767 100644 --- a/examples/cli/main.cpp +++ b/examples/cli/main.cpp @@ -103,7 +103,6 @@ struct SDParams { bool verbose = false; bool offload_params_to_cpu = false; bool control_net_cpu = false; - bool normalize_input = false; bool clip_on_cpu = false; bool vae_on_cpu = false; bool diffusion_flash_attn = false; @@ -156,7 +155,6 @@ void print_params(SDParams params) { printf(" pm_id_images_dir: %s\n", params.pm_id_images_dir.c_str()); printf(" pm_id_embed_path: %s\n", params.pm_id_embed_path.c_str()); printf(" pm_style_strength: %.2f\n", params.pm_style_strength); - printf(" normalize input image: %s\n", params.normalize_input ? "true" : "false"); printf(" output_path: %s\n", params.output_path.c_str()); printf(" init_image_path: %s\n", params.init_image_path.c_str()); printf(" end_image_path: %s\n", params.end_image_path.c_str()); @@ -306,7 +304,6 @@ void print_usage(int argc, const char* argv[]) { printf(" --pm-id-images-dir [DIR] path to PHOTOMAKER input id images dir\n"); printf(" --pm-id-embed-path [PATH] path to PHOTOMAKER v2 id embed\n"); printf(" --pm-style-strength strength for keeping PHOTOMAKER input identity (default: 20)\n"); - printf(" --normalize-input normalize PHOTOMAKER input id images\n"); printf(" -v, --verbose print extra info\n"); } @@ -552,7 +549,6 @@ void parse_args(int argc, const char** argv, SDParams& params) { {"", "--vae-tiling", "", true, ¶ms.vae_tiling_params.enabled}, {"", "--offload-to-cpu", "", true, ¶ms.offload_params_to_cpu}, {"", "--control-net-cpu", "", true, ¶ms.control_net_cpu}, - {"", "--normalize-input", "", true, ¶ms.normalize_input}, {"", "--clip-on-cpu", "", true, ¶ms.clip_on_cpu}, {"", "--vae-on-cpu", "", true, ¶ms.vae_on_cpu}, {"", "--diffusion-fa", "", true, ¶ms.diffusion_flash_attn}, @@ -1379,7 +1375,6 @@ int main(int argc, const char* argv[]) { params.batch_count, control_image, params.control_strength, - params.normalize_input, { pmid_images.data(), (int)pmid_images.size(), diff --git a/stable-diffusion.cpp b/stable-diffusion.cpp index e4102e6..ff064bb 100644 --- a/stable-diffusion.cpp +++ b/stable-diffusion.cpp @@ -1794,7 +1794,6 @@ void sd_img_gen_params_init(sd_img_gen_params_t* sd_img_gen_params) { sd_img_gen_params->seed = -1; sd_img_gen_params->batch_count = 1; sd_img_gen_params->control_strength = 0.9f; - sd_img_gen_params->normalize_input = false; sd_img_gen_params->pm_params = {nullptr, 0, nullptr, 20.f}; sd_img_gen_params->vae_tiling_params = {false, 0, 0, 0.5f, 0.0f, 0.0f}; } @@ -1820,7 +1819,6 @@ char* sd_img_gen_params_to_str(const sd_img_gen_params_t* sd_img_gen_params) { "ref_images_count: %d\n" "increase_ref_index: %s\n" "control_strength: %.2f\n" - "normalize_input: %s\n" "photo maker: {style_strength = %.2f, id_images_count = %d, id_embed_path = %s}\n" "VAE tiling: %s\n", SAFE_STR(sd_img_gen_params->prompt), @@ -1835,7 +1833,6 @@ char* sd_img_gen_params_to_str(const sd_img_gen_params_t* sd_img_gen_params) { sd_img_gen_params->ref_images_count, BOOL_STR(sd_img_gen_params->increase_ref_index), sd_img_gen_params->control_strength, - BOOL_STR(sd_img_gen_params->normalize_input), sd_img_gen_params->pm_params.style_strength, sd_img_gen_params->pm_params.id_images_count, SAFE_STR(sd_img_gen_params->pm_params.id_embed_path), @@ -1919,7 +1916,6 @@ sd_image_t* generate_image_internal(sd_ctx_t* sd_ctx, int batch_count, sd_image_t control_image, float control_strength, - bool normalize_input, sd_pm_params_t pm_params, std::vector ref_latents, bool increase_ref_index, @@ -2468,7 +2464,6 @@ sd_image_t* generate_image(sd_ctx_t* sd_ctx, const sd_img_gen_params_t* sd_img_g sd_img_gen_params->batch_count, sd_img_gen_params->control_image, sd_img_gen_params->control_strength, - sd_img_gen_params->normalize_input, sd_img_gen_params->pm_params, ref_latents, sd_img_gen_params->increase_ref_index, diff --git a/stable-diffusion.h b/stable-diffusion.h index 80f1f6e..7efbce5 100644 --- a/stable-diffusion.h +++ b/stable-diffusion.h @@ -212,7 +212,6 @@ typedef struct { int batch_count; sd_image_t control_image; float control_strength; - bool normalize_input; sd_pm_params_t pm_params; sd_tiling_params_t vae_tiling_params; } sd_img_gen_params_t;