From 9d683417cbb9263cd0efb10ea8d734b12cc8000d Mon Sep 17 00:00:00 2001 From: Daniele <57776841+daniandtheweb@users.noreply.github.com> Date: Thu, 14 May 2026 18:29:04 +0200 Subject: [PATCH] feat: add Euler CFG++ and Euler-A CFG++ samplers (#1354) --- examples/cli/README.md | 4 +- examples/common/common.cpp | 4 +- examples/server/README.md | 4 +- examples/server/routes_sdapi.cpp | 4 ++ include/stable-diffusion.h | 2 + src/denoiser.hpp | 104 +++++++++++++++++++++++-------- src/stable-diffusion.cpp | 38 ++++++++--- 7 files changed, 120 insertions(+), 40 deletions(-) diff --git a/examples/cli/README.md b/examples/cli/README.md index b32fe37f..5fbeec39 100644 --- a/examples/cli/README.md +++ b/examples/cli/README.md @@ -162,10 +162,10 @@ Generation Options: -s, --seed RNG seed (default: 42, use random seed for < 0) --sampling-method sampling method, one of [euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m, dpm++2mv2, ipndm, ipndm_v, lcm, ddim_trailing, tcd, res_multistep, res_2s, - er_sde] (default: euler for Flux/SD3/Wan, euler_a otherwise) + er_sde, euler_cfg_pp, euler_a_cfg_pp] (default: euler for Flux/SD3/Wan, euler_a otherwise) --high-noise-sampling-method (high noise) sampling method, one of [euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m, dpm++2mv2, ipndm, ipndm_v, lcm, ddim_trailing, tcd, res_multistep, - res_2s, er_sde] default: euler for Flux/SD3/Wan, euler_a otherwise + res_2s, er_sde, euler_cfg_pp, euler_a_cfg_pp] default: euler for Flux/SD3/Wan, euler_a otherwise --scheduler denoiser sigma scheduler, one of [discrete, karras, exponential, ays, gits, smoothstep, sgm_uniform, simple, kl_optimal, lcm, bong_tangent], default: discrete diff --git a/examples/common/common.cpp b/examples/common/common.cpp index d4c8a72b..8ca7a2dc 100644 --- a/examples/common/common.cpp +++ b/examples/common/common.cpp @@ -1244,12 +1244,12 @@ ArgOptions SDGenerationParams::get_options() { on_seed_arg}, {"", "--sampling-method", - "sampling method, one of [euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m, dpm++2mv2, ipndm, ipndm_v, lcm, ddim_trailing, tcd, res_multistep, res_2s, er_sde] " + "sampling method, one of [euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m, dpm++2mv2, ipndm, ipndm_v, lcm, ddim_trailing, tcd, res_multistep, res_2s, er_sde, euler_cfg_pp, euler_a_cfg_pp]" "(default: euler for Flux/SD3/Wan, euler_a otherwise)", on_sample_method_arg}, {"", "--high-noise-sampling-method", - "(high noise) sampling method, one of [euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m, dpm++2mv2, ipndm, ipndm_v, lcm, ddim_trailing, tcd, res_multistep, res_2s, er_sde]" + "(high noise) sampling method, one of [euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m, dpm++2mv2, ipndm, ipndm_v, lcm, ddim_trailing, tcd, res_multistep, res_2s, er_sde, euler_cfg_pp, euler_a_cfg_pp]" " default: euler for Flux/SD3/Wan, euler_a otherwise", on_high_noise_sample_method_arg}, {"", diff --git a/examples/server/README.md b/examples/server/README.md index 23b79c9d..ead185cf 100644 --- a/examples/server/README.md +++ b/examples/server/README.md @@ -264,10 +264,10 @@ Default Generation Options: -s, --seed RNG seed (default: 42, use random seed for < 0) --sampling-method sampling method, one of [euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m, dpm++2mv2, ipndm, ipndm_v, lcm, ddim_trailing, tcd, res_multistep, res_2s, - er_sde] (default: euler for Flux/SD3/Wan, euler_a otherwise) + er_sde, euler_cfg_pp, euler_a_cfg_pp] (default: euler for Flux/SD3/Wan, euler_a otherwise) --high-noise-sampling-method (high noise) sampling method, one of [euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m, dpm++2mv2, ipndm, ipndm_v, lcm, ddim_trailing, tcd, res_multistep, - res_2s, er_sde] default: euler for Flux/SD3/Wan, euler_a otherwise + res_2s, er_sde, euler_cfg_pp, euler_a_cfg_pp] default: euler for Flux/SD3/Wan, euler_a otherwise --scheduler denoiser sigma scheduler, one of [discrete, karras, exponential, ays, gits, smoothstep, sgm_uniform, simple, kl_optimal, lcm, bong_tangent], default: discrete diff --git a/examples/server/routes_sdapi.cpp b/examples/server/routes_sdapi.cpp index 1e01d292..1af8735c 100644 --- a/examples/server/routes_sdapi.cpp +++ b/examples/server/routes_sdapi.cpp @@ -67,6 +67,10 @@ static enum sample_method_t get_sdapi_sample_method(std::string name) { {"k_res_multistep", RES_MULTISTEP_SAMPLE_METHOD}, {"res 2s", RES_2S_SAMPLE_METHOD}, {"k_res_2s", RES_2S_SAMPLE_METHOD}, + {"euler_cfg_pp", EULER_CFG_PP_SAMPLE_METHOD}, + {"k_euler_cfg_pp", EULER_CFG_PP_SAMPLE_METHOD}, + {"euler_a_cfg_pp", EULER_CFG_PP_SAMPLE_METHOD}, + {"k_euler_a_cfg_pp", EULER_CFG_PP_SAMPLE_METHOD}, }; auto it = hardcoded.find(name); return it != hardcoded.end() ? it->second : SAMPLE_METHOD_COUNT; diff --git a/include/stable-diffusion.h b/include/stable-diffusion.h index c4c14949..7f87d669 100644 --- a/include/stable-diffusion.h +++ b/include/stable-diffusion.h @@ -51,6 +51,8 @@ enum sample_method_t { RES_MULTISTEP_SAMPLE_METHOD, RES_2S_SAMPLE_METHOD, ER_SDE_SAMPLE_METHOD, + EULER_CFG_PP_SAMPLE_METHOD, + EULER_A_CFG_PP_SAMPLE_METHOD, SAMPLE_METHOD_COUNT }; diff --git a/src/denoiser.hpp b/src/denoiser.hpp index 831da258..3e1de12d 100644 --- a/src/denoiser.hpp +++ b/src/denoiser.hpp @@ -752,7 +752,7 @@ struct Flux2FlowDenoiser : public FluxFlowDenoiser { } }; -typedef std::function(const sd::Tensor&, float, int)> denoise_cb_t; +typedef std::function(const sd::Tensor&, float, int, sd::Tensor*)> denoise_cb_t; static std::pair get_ancestral_step(float sigma_from, float sigma_to, @@ -828,7 +828,7 @@ static sd::Tensor sample_euler_ancestral(denoise_cb_t model, int steps = static_cast(sigmas.size()) - 1; for (int i = 0; i < steps; i++) { float sigma = sigmas[i]; - auto denoised_opt = model(x, sigma, i + 1); + auto denoised_opt = model(x, sigma, i + 1, nullptr); if (denoised_opt.empty()) { return {}; } @@ -851,7 +851,7 @@ static sd::Tensor sample_euler_flow(denoise_cb_t model, int steps = static_cast(sigmas.size()) - 1; for (int i = 0; i < steps; i++) { float sigma = sigmas[i]; - auto denoised_opt = model(x, sigma, i + 1); + auto denoised_opt = model(x, sigma, i + 1, nullptr); if (denoised_opt.empty()) { return {}; } @@ -873,7 +873,7 @@ static sd::Tensor sample_euler(denoise_cb_t model, int steps = static_cast(sigmas.size()) - 1; for (int i = 0; i < steps; i++) { float sigma = sigmas[i]; - auto denoised_opt = model(x, sigma, i + 1); + auto denoised_opt = model(x, sigma, i + 1, nullptr); if (denoised_opt.empty()) { return {}; } @@ -889,7 +889,7 @@ static sd::Tensor sample_heun(denoise_cb_t model, const std::vector& sigmas) { int steps = static_cast(sigmas.size()) - 1; for (int i = 0; i < steps; i++) { - auto denoised_opt = model(x, sigmas[i], -(i + 1)); + auto denoised_opt = model(x, sigmas[i], -(i + 1), nullptr); if (denoised_opt.empty()) { return {}; } @@ -900,7 +900,7 @@ static sd::Tensor sample_heun(denoise_cb_t model, x += d * dt; } else { sd::Tensor x2 = x + d * dt; - auto denoised2_opt = model(x2, sigmas[i + 1], i + 1); + auto denoised2_opt = model(x2, sigmas[i + 1], i + 1, nullptr); if (denoised2_opt.empty()) { return {}; } @@ -917,7 +917,7 @@ static sd::Tensor sample_dpm2(denoise_cb_t model, const std::vector& sigmas) { int steps = static_cast(sigmas.size()) - 1; for (int i = 0; i < steps; i++) { - auto denoised_opt = model(x, sigmas[i], -(i + 1)); + auto denoised_opt = model(x, sigmas[i], -(i + 1), nullptr); if (denoised_opt.empty()) { return {}; } @@ -930,7 +930,7 @@ static sd::Tensor sample_dpm2(denoise_cb_t model, float dt_1 = sigma_mid - sigmas[i]; float dt_2 = sigmas[i + 1] - sigmas[i]; sd::Tensor x2 = x + d * dt_1; - auto denoised2_opt = model(x2, sigma_mid, i + 1); + auto denoised2_opt = model(x2, sigma_mid, i + 1, nullptr); if (denoised2_opt.empty()) { return {}; } @@ -951,7 +951,7 @@ static sd::Tensor sample_dpmpp_2s_ancestral(denoise_cb_t model, int steps = static_cast(sigmas.size()) - 1; for (int i = 0; i < steps; i++) { - auto denoised_opt = model(x, sigmas[i], -(i + 1)); + auto denoised_opt = model(x, sigmas[i], -(i + 1), nullptr); if (denoised_opt.empty()) { return {}; } @@ -967,7 +967,7 @@ static sd::Tensor sample_dpmpp_2s_ancestral(denoise_cb_t model, float s = t + 0.5f * h; float sigma_s = sigma_fn(s); sd::Tensor x2 = (sigma_s / sigma_fn(t)) * x - (exp(-h * 0.5f) - 1) * denoised; - auto denoised2_opt = model(x2, sigma_s, i + 1); + auto denoised2_opt = model(x2, sigma_s, i + 1, nullptr); if (denoised2_opt.empty()) { return {}; } @@ -994,7 +994,7 @@ static sd::Tensor sample_dpmpp_2s_ancestral_flow(denoise_cb_t model, bool opt_first_step = (1.0 - sigma < 1e-6); - auto denoised_opt = model(x, sigma, (opt_first_step ? 1 : -1) * (i + 1)); + auto denoised_opt = model(x, sigma, (opt_first_step ? 1 : -1) * (i + 1), nullptr); if (denoised_opt.empty()) { return {}; } @@ -1023,8 +1023,8 @@ static sd::Tensor sample_dpmpp_2s_ancestral_flow(denoise_cb_t model, // so sigma_s = 1 = sigma, and sigma_s_i_ratio = sigma_s / sigma = 1 // u = (x*sigma_s_i_ratio)+(denoised*(1.0f-sigma_s_i_ratio)) // = (x*1)+(denoised*0) = x - // so D_i = model(u, sigma_s, i + 1) - // = model(x, sigma, i + 1) + // so D_i = model(u, sigma_s, i + 1, nullptr) + // = model(x, sigma, i + 1, nullptr) // = denoised D_i = denoised; @@ -1057,7 +1057,7 @@ static sd::Tensor sample_dpmpp_2s_ancestral_flow(denoise_cb_t model, float sigma_s_i_ratio = sigma_s / sigma; sd::Tensor u = (x * sigma_s_i_ratio) + (denoised * (1.0f - sigma_s_i_ratio)); - auto denoised2_opt = model(u, sigma_s, i + 1); + auto denoised2_opt = model(u, sigma_s, i + 1, nullptr); if (denoised2_opt.empty()) { return {}; } @@ -1084,7 +1084,7 @@ static sd::Tensor sample_dpmpp_2m(denoise_cb_t model, int steps = static_cast(sigmas.size()) - 1; for (int i = 0; i < steps; i++) { - auto denoised_opt = model(x, sigmas[i], i + 1); + auto denoised_opt = model(x, sigmas[i], i + 1, nullptr); if (denoised_opt.empty()) { return {}; } @@ -1116,7 +1116,7 @@ static sd::Tensor sample_dpmpp_2m_v2(denoise_cb_t model, int steps = static_cast(sigmas.size()) - 1; for (int i = 0; i < steps; i++) { - auto denoised_opt = model(x, sigmas[i], i + 1); + auto denoised_opt = model(x, sigmas[i], i + 1, nullptr); if (denoised_opt.empty()) { return {}; } @@ -1151,7 +1151,7 @@ static sd::Tensor sample_lcm(denoise_cb_t model, bool is_flow_denoiser) { int steps = static_cast(sigmas.size()) - 1; for (int i = 0; i < steps; i++) { - auto denoised_opt = model(x, sigmas[i], i + 1); + auto denoised_opt = model(x, sigmas[i], i + 1, nullptr); if (denoised_opt.empty()) { return {}; } @@ -1177,7 +1177,7 @@ static sd::Tensor sample_ipndm(denoise_cb_t model, float sigma = sigmas[i]; float sigma_next = sigmas[i + 1]; - auto denoised_opt = model(x, sigma, i + 1); + auto denoised_opt = model(x, sigma, i + 1, nullptr); if (denoised_opt.empty()) { return {}; } @@ -1221,7 +1221,7 @@ static sd::Tensor sample_ipndm_v(denoise_cb_t model, float sigma = sigmas[i]; float t_next = sigmas[i + 1]; - auto denoised_opt = model(x, sigma, i + 1); + auto denoised_opt = model(x, sigma, i + 1, nullptr); if (denoised_opt.empty()) { return {}; } @@ -1283,7 +1283,7 @@ static sd::Tensor sample_res_multistep(denoise_cb_t model, int steps = static_cast(sigmas.size()) - 1; for (int i = 0; i < steps; i++) { - auto denoised_opt = model(x, sigmas[i], i + 1); + auto denoised_opt = model(x, sigmas[i], i + 1, nullptr); if (denoised_opt.empty()) { return {}; } @@ -1360,7 +1360,7 @@ static sd::Tensor sample_res_2s(denoise_cb_t model, float sigma_from = sigmas[i]; float sigma_to = sigmas[i + 1]; - auto denoised_opt = model(x, sigma_from, -(i + 1)); + auto denoised_opt = model(x, sigma_from, -(i + 1), nullptr); if (denoised_opt.empty()) { return {}; } @@ -1386,7 +1386,7 @@ static sd::Tensor sample_res_2s(denoise_cb_t model, sd::Tensor eps1 = denoised - x0; sd::Tensor x2 = x0 + eps1 * (h * a21); - auto denoised2_opt = model(x2, sigma_c2, i + 1); + auto denoised2_opt = model(x2, sigma_c2, i + 1, nullptr); if (denoised2_opt.empty()) { return {}; } @@ -1463,7 +1463,7 @@ static sd::Tensor sample_er_sde(denoise_cb_t model, int steps = static_cast(sigmas.size()) - 1; for (int i = 0; i < steps; i++) { - sd::Tensor denoised = model(x, sigmas[i], i + 1); + sd::Tensor denoised = model(x, sigmas[i], i + 1, nullptr); if (denoised.empty()) { return {}; } @@ -1549,7 +1549,7 @@ static sd::Tensor sample_ddim_trailing(denoise_cb_t model, float sigma = sigmas[i]; float sigma_to = sigmas[i + 1]; - auto model_output_opt = model(x, sigma, i + 1); + auto model_output_opt = model(x, sigma, i + 1, nullptr); if (model_output_opt.empty()) { return {}; } @@ -1621,7 +1621,7 @@ static sd::Tensor sample_tcd(denoise_cb_t model, int timestep_s = (int)floor((1 - eta) * prev_timestep); float sigma = sigmas[i]; - auto model_output_opt = model(x, sigma, i + 1); + auto model_output_opt = model(x, sigma, i + 1, nullptr); if (model_output_opt.empty()) { return {}; } @@ -1649,6 +1649,56 @@ static sd::Tensor sample_tcd(denoise_cb_t model, return x; } +static sd::Tensor sample_euler_cfg_pp(denoise_cb_t model, + sd::Tensor x, + const std::vector& sigmas) { + int steps = static_cast(sigmas.size()) - 1; + for (int i = 0; i < steps; i++) { + float sigma = sigmas[i]; + sd::Tensor uncond_denoised; + + auto denoised_opt = model(x, sigma, i + 1, &uncond_denoised); + if (denoised_opt.empty() || uncond_denoised.empty()) { + return {}; + } + + sd::Tensor denoised = std::move(denoised_opt); + sd::Tensor d = (x - uncond_denoised) / sigma; + + x = denoised + d * sigmas[i + 1]; + } + return x; +} + +static sd::Tensor sample_euler_ancestral_cfg_pp(denoise_cb_t model, + sd::Tensor x, + const std::vector& sigmas, + std::shared_ptr rng, + float eta) { + int steps = static_cast(sigmas.size()) - 1; + for (int i = 0; i < steps; i++) { + float sigma = sigmas[i]; + sd::Tensor uncond_denoised; + + auto denoised_opt = model(x, sigma, i + 1, &uncond_denoised); + if (denoised_opt.empty() || uncond_denoised.empty()) { + return {}; + } + + sd::Tensor denoised = std::move(denoised_opt); + sd::Tensor d = (x - uncond_denoised) / sigma; + + auto [sigma_down, sigma_up] = get_ancestral_step(sigmas[i], sigmas[i + 1], eta); + + x = denoised + d * sigma_down; + + if (sigmas[i + 1] > 0) { + x += sd::Tensor::randn_like(x, rng) * sigma_up; + } + } + return x; +} + // k diffusion reverse ODE: dx = (x - D(x;\sigma)) / \sigma dt; \sigma(t) = t static sd::Tensor sample_k_diffusion(sample_method_t method, denoise_cb_t model, @@ -1694,6 +1744,10 @@ static sd::Tensor sample_k_diffusion(sample_method_t method, return sample_ddim_trailing(model, std::move(x), sigmas, rng, eta); case TCD_SAMPLE_METHOD: return sample_tcd(model, std::move(x), sigmas, rng, eta); + case EULER_CFG_PP_SAMPLE_METHOD: + return sample_euler_cfg_pp(model, std::move(x), sigmas); + case EULER_A_CFG_PP_SAMPLE_METHOD: + return sample_euler_ancestral_cfg_pp(model, std::move(x), sigmas, rng, eta); default: return {}; } diff --git a/src/stable-diffusion.cpp b/src/stable-diffusion.cpp index fd439ff1..85300026 100644 --- a/src/stable-diffusion.cpp +++ b/src/stable-diffusion.cpp @@ -73,6 +73,8 @@ const char* sampling_methods_str[] = { "Res Multistep", "Res 2s", "ER-SDE", + "Euler CFG++", + "Euler A CFG++", }; /*================================================== Helper Functions ================================================*/ @@ -1600,6 +1602,15 @@ public: cache_params, denoiser.get(), sigmas); + + // Spectrum cache is not supported for CFG++ samplers + if (method == EULER_CFG_PP_SAMPLE_METHOD || method == EULER_A_CFG_PP_SAMPLE_METHOD) { + if (cache_runtime.spectrum_enabled) { + LOG_WARN("Spectrum cache requested but not supported for CFG++ samplers"); + cache_runtime.spectrum_enabled = false; + } + } + size_t steps = sigmas.size() - 1; bool has_skiplayer = slg_scale != 0.0f && !skip_layers.empty(); if (has_skiplayer && !sd_version_is_dit(version)) { @@ -1614,7 +1625,7 @@ public: sd::Tensor denoised = x_t; SamplePreviewContext preview = prepare_sample_preview_context(); - auto denoise = [&](const sd::Tensor& x, float sigma, int step) -> sd::Tensor { + auto denoise = [&](const sd::Tensor& x, float sigma, int step, sd::Tensor* out_uncond_denoised = nullptr) -> sd::Tensor { if (step == 1 || step == -1) { pretty_progress(0, (int)steps, 0); } @@ -1637,15 +1648,17 @@ public: } if (cache_runtime.spectrum_enabled && cache_runtime.spectrum.should_predict()) { - cache_runtime.spectrum.predict(&denoised); - if (!denoise_mask.empty()) { - denoised = denoised * denoise_mask + init_latent * (1.0f - denoise_mask); + if (out_uncond_denoised == nullptr) { + cache_runtime.spectrum.predict(&denoised); + if (!denoise_mask.empty()) { + denoised = denoised * denoise_mask + init_latent * (1.0f - denoise_mask); + } + if (sd_should_preview_denoised() && preview.callback != nullptr) { + preview_image(step, denoised, version, preview.mode, preview.callback, preview.data, false); + } + report_sample_progress(step, steps, t0); + return denoised; } - if (sd_should_preview_denoised() && preview.callback != nullptr) { - preview_image(step, denoised, version, preview.mode, preview.callback, preview.data, false); - } - report_sample_progress(step, steps, t0); - return denoised; } if (sd_should_preview_noisy() && preview.callback != nullptr) { @@ -1768,6 +1781,10 @@ public: latent_result += (cond_out - skip_cond_out) * slg_scale; } denoised = latent_result * c_out + x * c_skip; + if (out_uncond_denoised != nullptr) { + sd::Tensor base_uncond = !uncond_out.empty() ? uncond_out : cond_out; + *out_uncond_denoised = base_uncond * c_out + x * c_skip; + } if (cache_runtime.spectrum_enabled) { cache_runtime.spectrum.update(denoised); } @@ -1965,6 +1982,8 @@ const char* sample_method_to_str[] = { "res_multistep", "res_2s", "er_sde", + "euler_cfg_pp", + "euler_a_cfg_pp", }; const char* sd_sample_method_name(enum sample_method_t sample_method) { @@ -2527,6 +2546,7 @@ static float resolve_eta(sd_ctx_t* sd_ctx, case EULER_A_SAMPLE_METHOD: case DPMPP2S_A_SAMPLE_METHOD: case ER_SDE_SAMPLE_METHOD: + case EULER_A_CFG_PP_SAMPLE_METHOD: return 1.0f; default:; }