From 15d0f82760e2d44d9bec904b277c4a7ad1f6b2ed Mon Sep 17 00:00:00 2001 From: leejet Date: Sat, 13 Dec 2025 14:27:47 +0800 Subject: [PATCH 1/5] feat(server): do not parse lora fromt client-side prompts (#1083) --- examples/common/common.hpp | 3 +++ examples/server/main.cpp | 4 ++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/examples/common/common.hpp b/examples/common/common.hpp index 558817e..0ab5c08 100644 --- a/examples/common/common.hpp +++ b/examples/common/common.hpp @@ -1349,6 +1349,9 @@ struct SDGenerationParams { } void extract_and_remove_lora(const std::string& lora_model_dir) { + if (lora_model_dir.empty()) { + return; + } static const std::regex re(R"(]+):([^>]+)>)"); static const std::vector valid_ext = {".pt", ".safetensors", ".gguf"}; std::smatch m; diff --git a/examples/server/main.cpp b/examples/server/main.cpp index 90cf484..f1ba0cd 100644 --- a/examples/server/main.cpp +++ b/examples/server/main.cpp @@ -425,7 +425,7 @@ int main(int argc, const char** argv) { return; } - if (!gen_params.process_and_check(IMG_GEN, ctx_params.lora_model_dir)) { + if (!gen_params.process_and_check(IMG_GEN, "")) { res.status = 400; res.set_content(R"({"error":"invalid params"})", "application/json"); return; @@ -605,7 +605,7 @@ int main(int argc, const char** argv) { return; } - if (!gen_params.process_and_check(IMG_GEN, ctx_params.lora_model_dir)) { + if (!gen_params.process_and_check(IMG_GEN, "")) { res.status = 400; res.set_content(R"({"error":"invalid params"})", "application/json"); return; From 8f05f5bc6ee9d6aba9d1ff2be7739a5a3cf1586d Mon Sep 17 00:00:00 2001 From: rmatif Date: Sat, 13 Dec 2025 09:20:02 +0100 Subject: [PATCH 2/5] feat: add support for custom scheduler (#694) --------- Co-authored-by: leejet --- examples/cli/README.md | 1 + examples/cli/main.cpp | 12 ++++++++-- examples/common/common.hpp | 46 ++++++++++++++++++++++++++++++++++++ examples/server/README.md | 1 + stable-diffusion.cpp | 48 ++++++++++++++++++++++++++++++++------ stable-diffusion.h | 2 ++ 6 files changed, 101 insertions(+), 9 deletions(-) diff --git a/examples/cli/README.md b/examples/cli/README.md index f6a4278..02650f7 100644 --- a/examples/cli/README.md +++ b/examples/cli/README.md @@ -121,6 +121,7 @@ Generation Options: ddim_trailing, tcd] default: euler for Flux/SD3/Wan, euler_a otherwise --scheduler denoiser sigma scheduler, one of [discrete, karras, exponential, ays, gits, smoothstep, sgm_uniform, simple, lcm], default: discrete + --sigmas custom sigma values for the sampler, comma-separated (e.g., "14.61,7.8,3.5,0.0"). --skip-layers layers to skip for SLG steps (default: [7,8,9]) --high-noise-skip-layers (high noise) layers to skip for SLG steps (default: [7,8,9]) -r, --ref-image reference image for Flux Kontext models (can be used multiple times) diff --git a/examples/cli/main.cpp b/examples/cli/main.cpp index eaa2591..417d211 100644 --- a/examples/cli/main.cpp +++ b/examples/cli/main.cpp @@ -258,7 +258,15 @@ std::string get_image_params(const SDCliParams& cli_params, const SDContextParam parameter_string += "Sampler RNG: " + std::string(sd_rng_type_name(ctx_params.sampler_rng_type)) + ", "; } parameter_string += "Sampler: " + std::string(sd_sample_method_name(gen_params.sample_params.sample_method)); - if (gen_params.sample_params.scheduler != SCHEDULER_COUNT) { + if (!gen_params.custom_sigmas.empty()) { + parameter_string += ", Custom Sigmas: ["; + for (size_t i = 0; i < gen_params.custom_sigmas.size(); ++i) { + std::ostringstream oss; + oss << std::fixed << std::setprecision(4) << gen_params.custom_sigmas[i]; + parameter_string += oss.str() + (i == gen_params.custom_sigmas.size() - 1 ? "" : ", "); + } + parameter_string += "]"; + } else if (gen_params.sample_params.scheduler != SCHEDULER_COUNT) { // Only show schedule if not using custom sigmas parameter_string += " " + std::string(sd_scheduler_name(gen_params.sample_params.scheduler)); } parameter_string += ", "; @@ -806,4 +814,4 @@ int main(int argc, const char* argv[]) { release_all_resources(); return 0; -} +} \ No newline at end of file diff --git a/examples/common/common.hpp b/examples/common/common.hpp index 0ab5c08..ccd01ce 100644 --- a/examples/common/common.hpp +++ b/examples/common/common.hpp @@ -883,6 +883,8 @@ struct SDGenerationParams { std::vector high_noise_skip_layers = {7, 8, 9}; sd_sample_params_t high_noise_sample_params; + std::vector custom_sigmas; + std::string easycache_option; sd_easycache_params_t easycache_params; @@ -1201,6 +1203,43 @@ struct SDGenerationParams { return 1; }; + auto on_sigmas_arg = [&](int argc, const char** argv, int index) { + if (++index >= argc) { + return -1; + } + std::string sigmas_str = argv[index]; + if (!sigmas_str.empty() && sigmas_str.front() == '[') { + sigmas_str.erase(0, 1); + } + if (!sigmas_str.empty() && sigmas_str.back() == ']') { + sigmas_str.pop_back(); + } + + std::stringstream ss(sigmas_str); + std::string item; + while (std::getline(ss, item, ',')) { + item.erase(0, item.find_first_not_of(" \t\n\r\f\v")); + item.erase(item.find_last_not_of(" \t\n\r\f\v") + 1); + if (!item.empty()) { + try { + custom_sigmas.push_back(std::stof(item)); + } catch (const std::invalid_argument& e) { + fprintf(stderr, "error: invalid float value '%s' in --sigmas\n", item.c_str()); + return -1; + } catch (const std::out_of_range& e) { + fprintf(stderr, "error: float value '%s' out of range in --sigmas\n", item.c_str()); + return -1; + } + } + } + + if (custom_sigmas.empty() && !sigmas_str.empty()) { + fprintf(stderr, "error: could not parse any sigma values from '%s'\n", argv[index]); + return -1; + } + return 1; + }; + auto on_ref_image_arg = [&](int argc, const char** argv, int index) { if (++index >= argc) { return -1; @@ -1260,6 +1299,10 @@ struct SDGenerationParams { "--scheduler", "denoiser sigma scheduler, one of [discrete, karras, exponential, ays, gits, smoothstep, sgm_uniform, simple, lcm], default: discrete", on_scheduler_arg}, + {"", + "--sigmas", + "custom sigma values for the sampler, comma-separated (e.g., \"14.61,7.8,3.5,0.0\").", + on_sigmas_arg}, {"", "--skip-layers", "layers to skip for SLG steps (default: [7,8,9])", @@ -1512,6 +1555,8 @@ struct SDGenerationParams { sample_params.guidance.slg.layers = skip_layers.data(); sample_params.guidance.slg.layer_count = skip_layers.size(); + sample_params.custom_sigmas = custom_sigmas.data(); + sample_params.custom_sigmas_count = static_cast(custom_sigmas.size()); high_noise_sample_params.guidance.slg.layers = high_noise_skip_layers.data(); high_noise_sample_params.guidance.slg.layer_count = high_noise_skip_layers.size(); @@ -1606,6 +1651,7 @@ struct SDGenerationParams { << " sample_params: " << sample_params_str << ",\n" << " high_noise_skip_layers: " << vec_to_string(high_noise_skip_layers) << ",\n" << " high_noise_sample_params: " << high_noise_sample_params_str << ",\n" + << " custom_sigmas: " << vec_to_string(custom_sigmas) << ",\n" << " easycache_option: \"" << easycache_option << "\",\n" << " easycache: " << (easycache_params.enabled ? "enabled" : "disabled") diff --git a/examples/server/README.md b/examples/server/README.md index 6393d84..43c5d5f 100644 --- a/examples/server/README.md +++ b/examples/server/README.md @@ -115,6 +115,7 @@ Default Generation Options: ddim_trailing, tcd] default: euler for Flux/SD3/Wan, euler_a otherwise --scheduler denoiser sigma scheduler, one of [discrete, karras, exponential, ays, gits, smoothstep, sgm_uniform, simple, lcm], default: discrete + --sigmas custom sigma values for the sampler, comma-separated (e.g., "14.61,7.8,3.5,0.0"). --skip-layers layers to skip for SLG steps (default: [7,8,9]) --high-noise-skip-layers (high noise) layers to skip for SLG steps (default: [7,8,9]) -r, --ref-image reference image for Flux Kontext models (can be used multiple times) diff --git a/stable-diffusion.cpp b/stable-diffusion.cpp index 1ef8512..2cb5882 100644 --- a/stable-diffusion.cpp +++ b/stable-diffusion.cpp @@ -2600,6 +2600,8 @@ void sd_sample_params_init(sd_sample_params_t* sample_params) { sample_params->scheduler = SCHEDULER_COUNT; sample_params->sample_method = SAMPLE_METHOD_COUNT; sample_params->sample_steps = 20; + sample_params->custom_sigmas = nullptr; + sample_params->custom_sigmas_count = 0; } char* sd_sample_params_to_str(const sd_sample_params_t* sample_params) { @@ -3194,11 +3196,21 @@ sd_image_t* generate_image(sd_ctx_t* sd_ctx, const sd_img_gen_params_t* sd_img_g } LOG_INFO("sampling using %s method", sampling_methods_str[sample_method]); - int sample_steps = sd_img_gen_params->sample_params.sample_steps; - std::vector sigmas = sd_ctx->sd->denoiser->get_sigmas(sample_steps, - sd_ctx->sd->get_image_seq_len(height, width), - sd_img_gen_params->sample_params.scheduler, - sd_ctx->sd->version); + int sample_steps = sd_img_gen_params->sample_params.sample_steps; + std::vector sigmas; + if (sd_img_gen_params->sample_params.custom_sigmas_count > 0) { + sigmas = std::vector(sd_img_gen_params->sample_params.custom_sigmas, + sd_img_gen_params->sample_params.custom_sigmas + sd_img_gen_params->sample_params.custom_sigmas_count); + if (sample_steps != sigmas.size() - 1) { + sample_steps = static_cast(sigmas.size()) - 1; + LOG_WARN("sample_steps != custom_sigmas_count - 1, set sample_steps to %d", sample_steps); + } + } else { + sigmas = sd_ctx->sd->denoiser->get_sigmas(sample_steps, + sd_ctx->sd->get_image_seq_len(height, width), + sd_img_gen_params->sample_params.scheduler, + sd_ctx->sd->version); + } ggml_tensor* init_latent = nullptr; ggml_tensor* concat_latent = nullptr; @@ -3461,7 +3473,29 @@ SD_API sd_image_t* generate_video(sd_ctx_t* sd_ctx, const sd_vid_gen_params_t* s if (high_noise_sample_steps > 0) { total_steps += high_noise_sample_steps; } - std::vector sigmas = sd_ctx->sd->denoiser->get_sigmas(total_steps, 0, sd_vid_gen_params->sample_params.scheduler, sd_ctx->sd->version); + + std::vector sigmas; + if (sd_vid_gen_params->sample_params.custom_sigmas_count > 0) { + sigmas = std::vector(sd_vid_gen_params->sample_params.custom_sigmas, + sd_vid_gen_params->sample_params.custom_sigmas + sd_vid_gen_params->sample_params.custom_sigmas_count); + if (total_steps != sigmas.size() - 1) { + total_steps = static_cast(sigmas.size()) - 1; + LOG_WARN("total_steps != custom_sigmas_count - 1, set total_steps to %d", total_steps); + if (sample_steps >= total_steps) { + sample_steps = total_steps; + LOG_WARN("total_steps != custom_sigmas_count - 1, set sample_steps to %d", sample_steps); + } + if (high_noise_sample_steps > 0) { + high_noise_sample_steps = total_steps - sample_steps; + LOG_WARN("total_steps != custom_sigmas_count - 1, set high_noise_sample_steps to %d", high_noise_sample_steps); + } + } + } else { + sigmas = sd_ctx->sd->denoiser->get_sigmas(total_steps, + 0, + sd_vid_gen_params->sample_params.scheduler, + sd_ctx->sd->version); + } if (high_noise_sample_steps < 0) { // timesteps ∝ sigmas for Flow models (like wan2.2 a14b) @@ -3841,4 +3875,4 @@ SD_API sd_image_t* generate_video(sd_ctx_t* sd_ctx, const sd_vid_gen_params_t* s LOG_INFO("generate_video completed in %.2fs", (t5 - t0) * 1.0f / 1000); return result_images; -} +} \ No newline at end of file diff --git a/stable-diffusion.h b/stable-diffusion.h index 2da70bd..e4abc8d 100644 --- a/stable-diffusion.h +++ b/stable-diffusion.h @@ -225,6 +225,8 @@ typedef struct { int sample_steps; float eta; int shifted_timestep; + float* custom_sigmas; + int custom_sigmas_count; } sd_sample_params_t; typedef struct { From d96b4152d692a2f28cfb1677e4939c1ca551a937 Mon Sep 17 00:00:00 2001 From: stduhpf Date: Sat, 13 Dec 2025 18:22:41 +0100 Subject: [PATCH 3/5] perf: optimize ggml_ext_chunk (#1084) --- common.hpp | 4 +++- ggml_extend.hpp | 34 +++++++++++----------------------- 2 files changed, 14 insertions(+), 24 deletions(-) diff --git a/common.hpp b/common.hpp index 33d499f..74b218a 100644 --- a/common.hpp +++ b/common.hpp @@ -194,10 +194,12 @@ public: auto proj = std::dynamic_pointer_cast(blocks["proj"]); x = proj->forward(ctx, x); // [ne3, ne2, ne1, dim_out*2] - auto x_vec = ggml_ext_chunk(ctx->ggml_ctx, x, 2, 0); + auto x_vec = ggml_ext_chunk(ctx->ggml_ctx, x, 2, 0, false); x = x_vec[0]; // [ne3, ne2, ne1, dim_out] auto gate = x_vec[1]; // [ne3, ne2, ne1, dim_out] + gate = ggml_cont(ctx->ggml_ctx, gate); + gate = ggml_gelu_inplace(ctx->ggml_ctx, gate); x = ggml_mul(ctx->ggml_ctx, x, gate); // [ne3, ne2, ne1, dim_out] diff --git a/ggml_extend.hpp b/ggml_extend.hpp index 07b9bfb..26dff49 100644 --- a/ggml_extend.hpp +++ b/ggml_extend.hpp @@ -732,34 +732,22 @@ __STATIC_INLINE__ struct ggml_tensor* ggml_ext_slice(struct ggml_context* ctx, __STATIC_INLINE__ std::vector ggml_ext_chunk(struct ggml_context* ctx, struct ggml_tensor* x, int num, - int64_t dim) { + int64_t dim, + bool cont = true) { GGML_ASSERT(dim >= 0 && dim < 4); GGML_ASSERT(x->ne[dim] % num == 0); - int perm[4] = {0, 1, 2, 3}; - for (int i = dim; i < 3; ++i) - perm[i] = perm[i + 1]; - perm[3] = dim; - - int inv_perm[4]; - for (int i = 0; i < 4; ++i) - inv_perm[perm[i]] = i; - - if (dim != 3) { - x = ggml_ext_torch_permute(ctx, x, perm[0], perm[1], perm[2], perm[3]); - x = ggml_cont(ctx, x); - } - std::vector chunks; - int64_t chunk_size = x->ne[3] / num; + int64_t chunk_size = x->ne[dim] / num; + int64_t stride = chunk_size * x->nb[dim]; + int64_t chunk_ne[4] = {x->ne[0], x->ne[1], x->ne[2], x->ne[3]}; + chunk_ne[dim] = chunk_size; for (int i = 0; i < num; i++) { auto chunk = ggml_view_4d( ctx, x, - x->ne[0], x->ne[1], x->ne[2], chunk_size, - x->nb[1], x->nb[2], x->nb[3], x->nb[3] * i * chunk_size); - - if (dim != 3) { - chunk = ggml_ext_torch_permute(ctx, chunk, inv_perm[0], inv_perm[1], inv_perm[2], inv_perm[3]); + chunk_ne[0], chunk_ne[1], chunk_ne[2], chunk_ne[3], + x->nb[1], x->nb[2], x->nb[3], stride * i); + if (cont) { chunk = ggml_cont(ctx, chunk); } chunks.push_back(chunk); @@ -772,7 +760,7 @@ __STATIC_INLINE__ ggml_tensor* ggml_ext_silu_act(ggml_context* ctx, ggml_tensor* // x: [ne3, ne2, ne1, ne0] // return: [ne3, ne2, ne1, ne0/2] - auto x_vec = ggml_ext_chunk(ctx, x, 2, 0); + auto x_vec = ggml_ext_chunk(ctx, x, 2, 0, false); ggml_tensor* gate; if (gate_first) { gate = x_vec[0]; @@ -781,7 +769,7 @@ __STATIC_INLINE__ ggml_tensor* ggml_ext_silu_act(ggml_context* ctx, ggml_tensor* x = x_vec[0]; gate = x_vec[1]; } - + gate = ggml_cont(ctx, gate); gate = ggml_silu_inplace(ctx, gate); x = ggml_mul(ctx, x, gate); // [ne3, ne2, ne1, ne0/2] From 614f8736df54bbf7a20ecb324a821d0e505c6503 Mon Sep 17 00:00:00 2001 From: "Kirill A. Korinsky" Date: Sat, 13 Dec 2025 18:23:34 +0100 Subject: [PATCH 4/5] sync: update ggml (#1082) --- ggml | 2 +- ggml_extend.hpp | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/ggml b/ggml index 2d3876d..f5425c0 160000 --- a/ggml +++ b/ggml @@ -1 +1 @@ -Subproject commit 2d3876d554551d35c06dccc5852be50d5fd2a275 +Subproject commit f5425c0ee5e582a7d64411f06139870bff3e52e0 diff --git a/ggml_extend.hpp b/ggml_extend.hpp index 26dff49..28fd018 100644 --- a/ggml_extend.hpp +++ b/ggml_extend.hpp @@ -1270,6 +1270,9 @@ __STATIC_INLINE__ struct ggml_tensor* ggml_ext_attention_ext(struct ggml_context } if (mask_in != nullptr) { + // the need for padding got removed in ggml 4767bda + // ensure we can still use the old version for now +#ifdef GGML_KQ_MASK_PAD int mask_pad = 0; if (mask_in->ne[1] % GGML_KQ_MASK_PAD != 0) { mask_pad = GGML_PAD(L_q, GGML_KQ_MASK_PAD) - mask_in->ne[1]; @@ -1277,6 +1280,7 @@ __STATIC_INLINE__ struct ggml_tensor* ggml_ext_attention_ext(struct ggml_context if (mask_pad > 0) { mask_in = ggml_pad(ctx, mask_in, 0, mask_pad, 0, 0); } +#endif mask_in = ggml_cast(ctx, mask_in, GGML_TYPE_F16); } From 43a70e819b9254dee0d017305d6992f6bb27f850 Mon Sep 17 00:00:00 2001 From: leejet Date: Sun, 14 Dec 2025 01:24:15 +0800 Subject: [PATCH 5/5] fix: add lora info to image metadata (#1086) --- examples/cli/main.cpp | 2 +- examples/common/common.hpp | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/examples/cli/main.cpp b/examples/cli/main.cpp index 417d211..22480d7 100644 --- a/examples/cli/main.cpp +++ b/examples/cli/main.cpp @@ -232,7 +232,7 @@ static std::string sd_basename(const std::string& path) { } std::string get_image_params(const SDCliParams& cli_params, const SDContextParams& ctx_params, const SDGenerationParams& gen_params, int64_t seed) { - std::string parameter_string = gen_params.prompt + "\n"; + std::string parameter_string = gen_params.prompt_with_lora + "\n"; if (gen_params.negative_prompt.size() != 0) { parameter_string += "Negative prompt: " + gen_params.negative_prompt + "\n"; } diff --git a/examples/common/common.hpp b/examples/common/common.hpp index ccd01ce..bf38379 100644 --- a/examples/common/common.hpp +++ b/examples/common/common.hpp @@ -863,6 +863,7 @@ static bool is_absolute_path(const std::string& p) { struct SDGenerationParams { std::string prompt; + std::string prompt_with_lora; // for metadata record only std::string negative_prompt; int clip_skip = -1; // <= 0 represents unspecified int width = 512; @@ -1476,6 +1477,7 @@ struct SDGenerationParams { } bool process_and_check(SDMode mode, const std::string& lora_model_dir) { + prompt_with_lora = prompt; if (width <= 0) { fprintf(stderr, "error: the width must be greater than 0\n"); return false;