refactor: split guidance composition (#1506)

This commit is contained in:
leejet 2026-05-17 20:20:16 +08:00 committed by GitHub
parent e43b24cf48
commit 50134e51dd
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 284 additions and 119 deletions

View File

@ -4,11 +4,13 @@
#include <algorithm> #include <algorithm>
#include <cctype> #include <cctype>
#include <cmath> #include <cmath>
#include <functional>
#include <string> #include <string>
#include <utility> #include <utility>
#include "ggml_extend.hpp" #include "ggml_extend.hpp"
#include "gits_noise.inl" #include "gits_noise.inl"
#include "guidance.h"
#include "tensor.hpp" #include "tensor.hpp"
/*================================================= CompVisDenoiser ==================================================*/ /*================================================= CompVisDenoiser ==================================================*/
@ -894,7 +896,7 @@ struct Flux2FlowDenoiser : public FluxFlowDenoiser {
} }
}; };
typedef std::function<sd::Tensor<float>(const sd::Tensor<float>&, float, int, sd::Tensor<float>*)> denoise_cb_t; typedef std::function<sd::guidance::GuiderOutput(const sd::Tensor<float>&, float, int)> denoise_cb_t;
static std::pair<float, float> get_ancestral_step(float sigma_from, static std::pair<float, float> get_ancestral_step(float sigma_from,
float sigma_to, float sigma_to,
@ -972,11 +974,11 @@ static sd::Tensor<float> sample_euler_ancestral(denoise_cb_t model,
for (int i = 0; i < steps; i++) { for (int i = 0; i < steps; i++) {
float sigma = sigmas[i]; float sigma = sigmas[i];
float sigma_to = sigmas[i + 1]; float sigma_to = sigmas[i + 1];
auto denoised_opt = model(x, sigma, i + 1, nullptr); auto denoised_opt = model(x, sigma, i + 1);
if (denoised_opt.empty()) { if (denoised_opt.pred.empty()) {
return {}; return {};
} }
sd::Tensor<float> denoised = std::move(denoised_opt); sd::Tensor<float> denoised = std::move(denoised_opt.pred);
if (sigma_to == 0.f) { if (sigma_to == 0.f) {
x = denoised; x = denoised;
} else if (eta == 0.f) { } else if (eta == 0.f) {
@ -1003,11 +1005,11 @@ static sd::Tensor<float> sample_euler(denoise_cb_t model,
int steps = static_cast<int>(sigmas.size()) - 1; int steps = static_cast<int>(sigmas.size()) - 1;
for (int i = 0; i < steps; i++) { for (int i = 0; i < steps; i++) {
float sigma = sigmas[i]; float sigma = sigmas[i];
auto denoised_opt = model(x, sigma, i + 1, nullptr); auto denoised_opt = model(x, sigma, i + 1);
if (denoised_opt.empty()) { if (denoised_opt.pred.empty()) {
return {}; return {};
} }
sd::Tensor<float> denoised = std::move(denoised_opt); sd::Tensor<float> denoised = std::move(denoised_opt.pred);
sd::Tensor<float> d = (x - denoised) / sigma; sd::Tensor<float> d = (x - denoised) / sigma;
x += d * (sigmas[i + 1] - sigma); x += d * (sigmas[i + 1] - sigma);
} }
@ -1019,22 +1021,22 @@ static sd::Tensor<float> sample_heun(denoise_cb_t model,
const std::vector<float>& sigmas) { const std::vector<float>& sigmas) {
int steps = static_cast<int>(sigmas.size()) - 1; int steps = static_cast<int>(sigmas.size()) - 1;
for (int i = 0; i < steps; i++) { for (int i = 0; i < steps; i++) {
auto denoised_opt = model(x, sigmas[i], -(i + 1), nullptr); auto denoised_opt = model(x, sigmas[i], -(i + 1));
if (denoised_opt.empty()) { if (denoised_opt.pred.empty()) {
return {}; return {};
} }
sd::Tensor<float> denoised = std::move(denoised_opt); sd::Tensor<float> denoised = std::move(denoised_opt.pred);
sd::Tensor<float> d = (x - denoised) / sigmas[i]; sd::Tensor<float> d = (x - denoised) / sigmas[i];
float dt = sigmas[i + 1] - sigmas[i]; float dt = sigmas[i + 1] - sigmas[i];
if (sigmas[i + 1] == 0) { if (sigmas[i + 1] == 0) {
x += d * dt; x += d * dt;
} else { } else {
sd::Tensor<float> x2 = x + d * dt; sd::Tensor<float> x2 = x + d * dt;
auto denoised2_opt = model(x2, sigmas[i + 1], i + 1, nullptr); auto denoised2_opt = model(x2, sigmas[i + 1], i + 1);
if (denoised2_opt.empty()) { if (denoised2_opt.pred.empty()) {
return {}; return {};
} }
sd::Tensor<float> denoised2 = std::move(denoised2_opt); sd::Tensor<float> denoised2 = std::move(denoised2_opt.pred);
d = (d + (x2 - denoised2) / sigmas[i + 1]) / 2.0f; d = (d + (x2 - denoised2) / sigmas[i + 1]) / 2.0f;
x += d * dt; x += d * dt;
} }
@ -1047,11 +1049,11 @@ static sd::Tensor<float> sample_dpm2(denoise_cb_t model,
const std::vector<float>& sigmas) { const std::vector<float>& sigmas) {
int steps = static_cast<int>(sigmas.size()) - 1; int steps = static_cast<int>(sigmas.size()) - 1;
for (int i = 0; i < steps; i++) { for (int i = 0; i < steps; i++) {
auto denoised_opt = model(x, sigmas[i], -(i + 1), nullptr); auto denoised_opt = model(x, sigmas[i], -(i + 1));
if (denoised_opt.empty()) { if (denoised_opt.pred.empty()) {
return {}; return {};
} }
sd::Tensor<float> denoised = std::move(denoised_opt); sd::Tensor<float> denoised = std::move(denoised_opt.pred);
sd::Tensor<float> d = (x - denoised) / sigmas[i]; sd::Tensor<float> d = (x - denoised) / sigmas[i];
if (sigmas[i + 1] == 0) { if (sigmas[i + 1] == 0) {
x += d * (sigmas[i + 1] - sigmas[i]); x += d * (sigmas[i + 1] - sigmas[i]);
@ -1060,11 +1062,11 @@ static sd::Tensor<float> sample_dpm2(denoise_cb_t model,
float dt_1 = sigma_mid - sigmas[i]; float dt_1 = sigma_mid - sigmas[i];
float dt_2 = sigmas[i + 1] - sigmas[i]; float dt_2 = sigmas[i + 1] - sigmas[i];
sd::Tensor<float> x2 = x + d * dt_1; sd::Tensor<float> x2 = x + d * dt_1;
auto denoised2_opt = model(x2, sigma_mid, i + 1, nullptr); auto denoised2_opt = model(x2, sigma_mid, i + 1);
if (denoised2_opt.empty()) { if (denoised2_opt.pred.empty()) {
return {}; return {};
} }
sd::Tensor<float> denoised2 = std::move(denoised2_opt); sd::Tensor<float> denoised2 = std::move(denoised2_opt.pred);
x += ((x2 - denoised2) / sigma_mid) * dt_2; x += ((x2 - denoised2) / sigma_mid) * dt_2;
} }
} }
@ -1081,11 +1083,11 @@ static sd::Tensor<float> sample_dpmpp_2s_ancestral(denoise_cb_t model,
int steps = static_cast<int>(sigmas.size()) - 1; int steps = static_cast<int>(sigmas.size()) - 1;
for (int i = 0; i < steps; i++) { for (int i = 0; i < steps; i++) {
auto denoised_opt = model(x, sigmas[i], -(i + 1), nullptr); auto denoised_opt = model(x, sigmas[i], -(i + 1));
if (denoised_opt.empty()) { if (denoised_opt.pred.empty()) {
return {}; return {};
} }
sd::Tensor<float> denoised = std::move(denoised_opt); sd::Tensor<float> denoised = std::move(denoised_opt.pred);
auto [sigma_down, sigma_up] = get_ancestral_step(sigmas[i], sigmas[i + 1], eta); auto [sigma_down, sigma_up] = get_ancestral_step(sigmas[i], sigmas[i + 1], eta);
if (sigma_down == 0) { if (sigma_down == 0) {
@ -1097,11 +1099,11 @@ static sd::Tensor<float> sample_dpmpp_2s_ancestral(denoise_cb_t model,
float s = t + 0.5f * h; float s = t + 0.5f * h;
float sigma_s = sigma_fn(s); float sigma_s = sigma_fn(s);
sd::Tensor<float> x2 = (sigma_s / sigma_fn(t)) * x - (exp(-h * 0.5f) - 1) * denoised; sd::Tensor<float> x2 = (sigma_s / sigma_fn(t)) * x - (exp(-h * 0.5f) - 1) * denoised;
auto denoised2_opt = model(x2, sigma_s, i + 1, nullptr); auto denoised2_opt = model(x2, sigma_s, i + 1);
if (denoised2_opt.empty()) { if (denoised2_opt.pred.empty()) {
return {}; return {};
} }
sd::Tensor<float> denoised2 = std::move(denoised2_opt); sd::Tensor<float> denoised2 = std::move(denoised2_opt.pred);
x = (sigma_fn(t_next) / sigma_fn(t)) * x - (exp(-h) - 1) * denoised2; x = (sigma_fn(t_next) / sigma_fn(t)) * x - (exp(-h) - 1) * denoised2;
} }
@ -1124,11 +1126,11 @@ static sd::Tensor<float> sample_dpmpp_2s_ancestral_flow(denoise_cb_t model,
bool opt_first_step = (1.0 - sigma < 1e-6); bool opt_first_step = (1.0 - sigma < 1e-6);
auto denoised_opt = model(x, sigma, (opt_first_step ? 1 : -1) * (i + 1), nullptr); auto denoised_opt = model(x, sigma, (opt_first_step ? 1 : -1) * (i + 1));
if (denoised_opt.empty()) { if (denoised_opt.pred.empty()) {
return {}; return {};
} }
sd::Tensor<float> denoised = std::move(denoised_opt); sd::Tensor<float> denoised = std::move(denoised_opt.pred);
if (sigma_to == 0.0f) { if (sigma_to == 0.0f) {
// Euler method (final step, no noise) // Euler method (final step, no noise)
@ -1153,8 +1155,8 @@ static sd::Tensor<float> sample_dpmpp_2s_ancestral_flow(denoise_cb_t model,
// so sigma_s = 1 = sigma, and sigma_s_i_ratio = sigma_s / sigma = 1 // so sigma_s = 1 = sigma, and sigma_s_i_ratio = sigma_s / sigma = 1
// u = (x*sigma_s_i_ratio)+(denoised*(1.0f-sigma_s_i_ratio)) // u = (x*sigma_s_i_ratio)+(denoised*(1.0f-sigma_s_i_ratio))
// = (x*1)+(denoised*0) = x // = (x*1)+(denoised*0) = x
// so D_i = model(u, sigma_s, i + 1, nullptr) // so D_i = model(u, sigma_s, i + 1)
// = model(x, sigma, i + 1, nullptr) // = model(x, sigma, i + 1)
// = denoised // = denoised
D_i = denoised; D_i = denoised;
@ -1187,11 +1189,11 @@ static sd::Tensor<float> sample_dpmpp_2s_ancestral_flow(denoise_cb_t model,
float sigma_s_i_ratio = sigma_s / sigma; float sigma_s_i_ratio = sigma_s / sigma;
sd::Tensor<float> u = (x * sigma_s_i_ratio) + (denoised * (1.0f - sigma_s_i_ratio)); sd::Tensor<float> u = (x * sigma_s_i_ratio) + (denoised * (1.0f - sigma_s_i_ratio));
auto denoised2_opt = model(u, sigma_s, i + 1, nullptr); auto denoised2_opt = model(u, sigma_s, i + 1);
if (denoised2_opt.empty()) { if (denoised2_opt.pred.empty()) {
return {}; return {};
} }
D_i = std::move(denoised2_opt); D_i = std::move(denoised2_opt.pred);
} }
float sigma_down_i_ratio = sigma_down / sigma; float sigma_down_i_ratio = sigma_down / sigma;
@ -1214,11 +1216,11 @@ static sd::Tensor<float> sample_dpmpp_2m(denoise_cb_t model,
int steps = static_cast<int>(sigmas.size()) - 1; int steps = static_cast<int>(sigmas.size()) - 1;
for (int i = 0; i < steps; i++) { for (int i = 0; i < steps; i++) {
auto denoised_opt = model(x, sigmas[i], i + 1, nullptr); auto denoised_opt = model(x, sigmas[i], i + 1);
if (denoised_opt.empty()) { if (denoised_opt.pred.empty()) {
return {}; return {};
} }
sd::Tensor<float> denoised = std::move(denoised_opt); sd::Tensor<float> denoised = std::move(denoised_opt.pred);
float t = t_fn(sigmas[i]); float t = t_fn(sigmas[i]);
float t_next = t_fn(sigmas[i + 1]); float t_next = t_fn(sigmas[i + 1]);
float h = t_next - t; float h = t_next - t;
@ -1246,11 +1248,11 @@ static sd::Tensor<float> sample_dpmpp_2m_v2(denoise_cb_t model,
int steps = static_cast<int>(sigmas.size()) - 1; int steps = static_cast<int>(sigmas.size()) - 1;
for (int i = 0; i < steps; i++) { for (int i = 0; i < steps; i++) {
auto denoised_opt = model(x, sigmas[i], i + 1, nullptr); auto denoised_opt = model(x, sigmas[i], i + 1);
if (denoised_opt.empty()) { if (denoised_opt.pred.empty()) {
return {}; return {};
} }
sd::Tensor<float> denoised = std::move(denoised_opt); sd::Tensor<float> denoised = std::move(denoised_opt.pred);
float t = t_fn(sigmas[i]); float t = t_fn(sigmas[i]);
float t_next = t_fn(sigmas[i + 1]); float t_next = t_fn(sigmas[i + 1]);
float h = t_next - t; float h = t_next - t;
@ -1354,11 +1356,11 @@ static sd::Tensor<float> sample_lcm(denoise_cb_t model,
int steps = static_cast<int>(sigmas.size()) - 1; int steps = static_cast<int>(sigmas.size()) - 1;
for (int i = 0; i < steps; i++) { for (int i = 0; i < steps; i++) {
auto denoised_opt = model(x, sigmas[i], i + 1, nullptr); auto denoised_opt = model(x, sigmas[i], i + 1);
if (denoised_opt.empty()) { if (denoised_opt.pred.empty()) {
return {}; return {};
} }
x = std::move(denoised_opt); x = std::move(denoised_opt.pred);
if (sigmas[i + 1] > 0) { if (sigmas[i + 1] > 0) {
if (is_flow_denoiser) { if (is_flow_denoiser) {
x *= (1 - sigmas[i + 1]); x *= (1 - sigmas[i + 1]);
@ -1400,11 +1402,11 @@ static sd::Tensor<float> sample_ipndm(denoise_cb_t model,
float sigma = sigmas[i]; float sigma = sigmas[i];
float sigma_next = sigmas[i + 1]; float sigma_next = sigmas[i + 1];
auto denoised_opt = model(x, sigma, i + 1, nullptr); auto denoised_opt = model(x, sigma, i + 1);
if (denoised_opt.empty()) { if (denoised_opt.pred.empty()) {
return {}; return {};
} }
sd::Tensor<float> denoised = std::move(denoised_opt); sd::Tensor<float> denoised = std::move(denoised_opt.pred);
sd::Tensor<float> d_cur = (x - denoised) / sigma; sd::Tensor<float> d_cur = (x - denoised) / sigma;
int order = std::min(max_order, i + 1); int order = std::min(max_order, i + 1);
@ -1444,11 +1446,11 @@ static sd::Tensor<float> sample_ipndm_v(denoise_cb_t model,
float sigma = sigmas[i]; float sigma = sigmas[i];
float t_next = sigmas[i + 1]; float t_next = sigmas[i + 1];
auto denoised_opt = model(x, sigma, i + 1, nullptr); auto denoised_opt = model(x, sigma, i + 1);
if (denoised_opt.empty()) { if (denoised_opt.pred.empty()) {
return {}; return {};
} }
sd::Tensor<float> denoised = std::move(denoised_opt); sd::Tensor<float> denoised = std::move(denoised_opt.pred);
sd::Tensor<float> d_cur = (x - denoised) / sigma; sd::Tensor<float> d_cur = (x - denoised) / sigma;
int order = std::min(max_order, i + 1); int order = std::min(max_order, i + 1);
@ -1506,11 +1508,11 @@ static sd::Tensor<float> sample_res_multistep(denoise_cb_t model,
int steps = static_cast<int>(sigmas.size()) - 1; int steps = static_cast<int>(sigmas.size()) - 1;
for (int i = 0; i < steps; i++) { for (int i = 0; i < steps; i++) {
auto denoised_opt = model(x, sigmas[i], i + 1, nullptr); auto denoised_opt = model(x, sigmas[i], i + 1);
if (denoised_opt.empty()) { if (denoised_opt.pred.empty()) {
return {}; return {};
} }
sd::Tensor<float> denoised = std::move(denoised_opt); sd::Tensor<float> denoised = std::move(denoised_opt.pred);
float sigma_from = sigmas[i]; float sigma_from = sigmas[i];
float sigma_to = sigmas[i + 1]; float sigma_to = sigmas[i + 1];
@ -1583,11 +1585,11 @@ static sd::Tensor<float> sample_res_2s(denoise_cb_t model,
float sigma_from = sigmas[i]; float sigma_from = sigmas[i];
float sigma_to = sigmas[i + 1]; float sigma_to = sigmas[i + 1];
auto denoised_opt = model(x, sigma_from, -(i + 1), nullptr); auto denoised_opt = model(x, sigma_from, -(i + 1));
if (denoised_opt.empty()) { if (denoised_opt.pred.empty()) {
return {}; return {};
} }
sd::Tensor<float> denoised = std::move(denoised_opt); sd::Tensor<float> denoised = std::move(denoised_opt.pred);
auto [sigma_down, sigma_up, alpha_scale] = get_ancestral_step(sigma_from, sigma_to, eta, is_flow_denoiser); auto [sigma_down, sigma_up, alpha_scale] = get_ancestral_step(sigma_from, sigma_to, eta, is_flow_denoiser);
@ -1609,11 +1611,11 @@ static sd::Tensor<float> sample_res_2s(denoise_cb_t model,
sd::Tensor<float> eps1 = denoised - x0; sd::Tensor<float> eps1 = denoised - x0;
sd::Tensor<float> x2 = x0 + eps1 * (h * a21); sd::Tensor<float> x2 = x0 + eps1 * (h * a21);
auto denoised2_opt = model(x2, sigma_c2, i + 1, nullptr); auto denoised2_opt = model(x2, sigma_c2, i + 1);
if (denoised2_opt.empty()) { if (denoised2_opt.pred.empty()) {
return {}; return {};
} }
sd::Tensor<float> denoised2 = std::move(denoised2_opt); sd::Tensor<float> denoised2 = std::move(denoised2_opt.pred);
sd::Tensor<float> eps2 = denoised2 - x0; sd::Tensor<float> eps2 = denoised2 - x0;
x = x0 + h * (b1 * eps1 + b2 * eps2); x = x0 + h * (b1 * eps1 + b2 * eps2);
} }
@ -1686,10 +1688,11 @@ static sd::Tensor<float> sample_er_sde(denoise_cb_t model,
int steps = static_cast<int>(sigmas.size()) - 1; int steps = static_cast<int>(sigmas.size()) - 1;
for (int i = 0; i < steps; i++) { for (int i = 0; i < steps; i++) {
sd::Tensor<float> denoised = model(x, sigmas[i], i + 1, nullptr); auto denoised_opt = model(x, sigmas[i], i + 1);
if (denoised.empty()) { if (denoised_opt.pred.empty()) {
return {}; return {};
} }
sd::Tensor<float> denoised = std::move(denoised_opt.pred);
int stage_used = std::min(max_stage, i + 1); int stage_used = std::min(max_stage, i + 1);
@ -1804,11 +1807,11 @@ static sd::Tensor<float> sample_tcd(denoise_cb_t model,
int timestep_s = (int)floor((1 - eta) * prev_timestep); int timestep_s = (int)floor((1 - eta) * prev_timestep);
float sigma = sigmas[i]; float sigma = sigmas[i];
auto denoised_opt = model(x, sigma, i + 1, nullptr); auto denoised_opt = model(x, sigma, i + 1);
if (denoised_opt.empty()) { if (denoised_opt.pred.empty()) {
return {}; return {};
} }
sd::Tensor<float> denoised = std::move(denoised_opt); sd::Tensor<float> denoised = std::move(denoised_opt.pred);
sd::Tensor<float> d = (x - denoised) / sigma; sd::Tensor<float> d = (x - denoised) / sigma;
float alpha_prod_t = 1.0f / (sigma * sigma + 1.0f); float alpha_prod_t = 1.0f / (sigma * sigma + 1.0f);
@ -1834,14 +1837,13 @@ static sd::Tensor<float> sample_euler_cfg_pp(denoise_cb_t model,
int steps = static_cast<int>(sigmas.size()) - 1; int steps = static_cast<int>(sigmas.size()) - 1;
for (int i = 0; i < steps; i++) { for (int i = 0; i < steps; i++) {
float sigma = sigmas[i]; float sigma = sigmas[i];
sd::Tensor<float> uncond_denoised; auto denoised_opt = model(x, sigma, i + 1);
if (denoised_opt.pred.empty() || denoised_opt.pred_uncond.empty()) {
auto denoised_opt = model(x, sigma, i + 1, &uncond_denoised);
if (denoised_opt.empty() || uncond_denoised.empty()) {
return {}; return {};
} }
sd::Tensor<float> denoised = std::move(denoised_opt); sd::Tensor<float> denoised = std::move(denoised_opt.pred);
sd::Tensor<float> uncond_denoised = std::move(denoised_opt.pred_uncond);
sd::Tensor<float> d = (x - uncond_denoised) / sigma; sd::Tensor<float> d = (x - uncond_denoised) / sigma;
x = denoised + d * sigmas[i + 1]; x = denoised + d * sigmas[i + 1];
@ -1857,14 +1859,13 @@ static sd::Tensor<float> sample_euler_ancestral_cfg_pp(denoise_cb_t model,
int steps = static_cast<int>(sigmas.size()) - 1; int steps = static_cast<int>(sigmas.size()) - 1;
for (int i = 0; i < steps; i++) { for (int i = 0; i < steps; i++) {
float sigma = sigmas[i]; float sigma = sigmas[i];
sd::Tensor<float> uncond_denoised; auto denoised_opt = model(x, sigma, i + 1);
if (denoised_opt.pred.empty() || denoised_opt.pred_uncond.empty()) {
auto denoised_opt = model(x, sigma, i + 1, &uncond_denoised);
if (denoised_opt.empty() || uncond_denoised.empty()) {
return {}; return {};
} }
sd::Tensor<float> denoised = std::move(denoised_opt); sd::Tensor<float> denoised = std::move(denoised_opt.pred);
sd::Tensor<float> uncond_denoised = std::move(denoised_opt.pred_uncond);
sd::Tensor<float> d = (x - uncond_denoised) / sigma; sd::Tensor<float> d = (x - uncond_denoised) / sigma;
auto [sigma_down, sigma_up] = get_ancestral_step(sigmas[i], sigmas[i + 1], eta); auto [sigma_down, sigma_up] = get_ancestral_step(sigmas[i], sigmas[i + 1], eta);

89
src/guidance.cpp Normal file
View File

@ -0,0 +1,89 @@
#include "guidance.h"
#include <utility>
namespace sd::guidance {
static bool has_tensor(const sd::Tensor<float>* tensor) {
return tensor != nullptr && !tensor->empty();
}
ClassifierFreeGuidance::ClassifierFreeGuidance(float guidance_scale,
float image_guidance_scale)
: guidance_scale_(guidance_scale),
image_guidance_scale_(image_guidance_scale) {
}
GuiderOutput ClassifierFreeGuidance::forward(const GuidanceInput& input,
GuiderOutput previous) const {
(void)previous;
GuiderOutput output;
if (!has_tensor(input.pred_cond)) {
return output;
}
const sd::Tensor<float>& pred_cond = *input.pred_cond;
output.pred = pred_cond;
if (has_tensor(input.pred_uncond)) {
const sd::Tensor<float>& pred_uncond = *input.pred_uncond;
if (has_tensor(input.pred_img_cond)) {
const sd::Tensor<float>& pred_img_cond = *input.pred_img_cond;
output.pred = pred_uncond +
image_guidance_scale_ * (pred_img_cond - pred_uncond) +
guidance_scale_ * (pred_cond - pred_img_cond);
} else {
output.pred = pred_uncond + guidance_scale_ * (pred_cond - pred_uncond);
}
} else if (has_tensor(input.pred_img_cond)) {
const sd::Tensor<float>& pred_img_cond = *input.pred_img_cond;
output.pred = pred_img_cond + guidance_scale_ * (pred_cond - pred_img_cond);
}
return output;
}
SkipLayerGuidance::SkipLayerGuidance(std::vector<int> layers,
float scale,
float start,
float stop)
: layers_(std::move(layers)),
scale_(scale),
start_(start),
stop_(stop) {
}
bool SkipLayerGuidance::is_enabled_for_step(const GuidanceInput& input) const {
if (scale_ == 0.0f || layers_.empty() || input.schedule_size == 0) {
return false;
}
int start_step = static_cast<int>(start_ * static_cast<float>(input.schedule_size));
int stop_step = static_cast<int>(stop_ * static_cast<float>(input.schedule_size));
return input.step > start_step && input.step < stop_step;
}
const std::vector<int>& SkipLayerGuidance::layers() const {
return layers_;
}
GuiderOutput SkipLayerGuidance::forward(const GuidanceInput& input,
GuiderOutput output) const {
if (!is_enabled_for_step(input) || !input.predict_skip_layer) {
return output;
}
if (output.pred.empty() || !has_tensor(input.pred_cond)) {
return GuiderOutput();
}
output.pred_skip_layer = input.predict_skip_layer();
if (output.pred_skip_layer.empty()) {
return GuiderOutput();
}
output.pred += (*input.pred_cond - output.pred_skip_layer) * scale_;
return output;
}
} // namespace sd::guidance

70
src/guidance.h Normal file
View File

@ -0,0 +1,70 @@
#ifndef __SD_GUIDANCE_H__
#define __SD_GUIDANCE_H__
#include <cstddef>
#include <functional>
#include <vector>
#include "tensor.hpp"
namespace sd::guidance {
struct GuiderOutput {
sd::Tensor<float> pred;
sd::Tensor<float> pred_cond;
sd::Tensor<float> pred_uncond;
sd::Tensor<float> pred_img_cond;
sd::Tensor<float> pred_skip_layer;
};
struct GuidanceInput {
int step = 0;
size_t schedule_size = 0;
const sd::Tensor<float>* pred_cond = nullptr;
const sd::Tensor<float>* pred_uncond = nullptr;
const sd::Tensor<float>* pred_img_cond = nullptr;
std::function<sd::Tensor<float>()> predict_skip_layer;
};
class BaseGuidance {
public:
virtual ~BaseGuidance() = default;
virtual GuiderOutput forward(const GuidanceInput& input,
GuiderOutput previous) const = 0;
};
class ClassifierFreeGuidance : public BaseGuidance {
float guidance_scale_ = 1.0f;
float image_guidance_scale_ = 1.0f;
public:
ClassifierFreeGuidance(float guidance_scale,
float image_guidance_scale);
GuiderOutput forward(const GuidanceInput& input,
GuiderOutput previous) const override;
};
class SkipLayerGuidance : public BaseGuidance {
std::vector<int> layers_;
float scale_ = 0.0f;
float start_ = 0.0f;
float stop_ = 1.0f;
public:
SkipLayerGuidance(std::vector<int> layers,
float scale,
float start,
float stop);
bool is_enabled_for_step(const GuidanceInput& input) const;
const std::vector<int>& layers() const;
GuiderOutput forward(const GuidanceInput& input,
GuiderOutput previous) const override;
};
} // namespace sd::guidance
#endif // __SD_GUIDANCE_H__

View File

@ -14,6 +14,7 @@
#include "denoiser.hpp" #include "denoiser.hpp"
#include "diffusion_model.hpp" #include "diffusion_model.hpp"
#include "esrgan.hpp" #include "esrgan.hpp"
#include "guidance.h"
#include "lora.hpp" #include "lora.hpp"
#include "ltx_audio_vae.h" #include "ltx_audio_vae.h"
#include "ltx_vae.hpp" #include "ltx_vae.hpp"
@ -1854,8 +1855,9 @@ public:
denoiser.get(), denoiser.get(),
sigmas); sigmas);
bool needs_uncond_denoised = method == EULER_CFG_PP_SAMPLE_METHOD || method == EULER_A_CFG_PP_SAMPLE_METHOD;
// Spectrum cache is not supported for CFG++ samplers // Spectrum cache is not supported for CFG++ samplers
if (method == EULER_CFG_PP_SAMPLE_METHOD || method == EULER_A_CFG_PP_SAMPLE_METHOD) { if (needs_uncond_denoised) {
if (cache_runtime.spectrum_enabled) { if (cache_runtime.spectrum_enabled) {
LOG_WARN("Spectrum cache requested but not supported for CFG++ samplers"); LOG_WARN("Spectrum cache requested but not supported for CFG++ samplers");
cache_runtime.spectrum_enabled = false; cache_runtime.spectrum_enabled = false;
@ -1868,6 +1870,11 @@ public:
has_skiplayer = false; has_skiplayer = false;
LOG_WARN("SLG is incompatible with this model type"); LOG_WARN("SLG is incompatible with this model type");
} }
sd::guidance::ClassifierFreeGuidance classifier_free_guidance(cfg_scale, img_cfg_scale);
sd::guidance::SkipLayerGuidance skip_layer_guidance(has_skiplayer ? skip_layers : std::vector<int>(),
has_skiplayer ? slg_scale : 0.0f,
guidance.slg.layer_start,
guidance.slg.layer_end);
if (version == VERSION_HIDREAM_O1 && !noise.empty()) { if (version == VERSION_HIDREAM_O1 && !noise.empty()) {
noise *= eta; noise *= eta;
@ -1880,7 +1887,7 @@ public:
sd::Tensor<float> denoised = x_t; sd::Tensor<float> denoised = x_t;
SamplePreviewContext preview = prepare_sample_preview_context(); SamplePreviewContext preview = prepare_sample_preview_context();
auto denoise = [&](const sd::Tensor<float>& x, float sigma, int step, sd::Tensor<float>* out_uncond_denoised = nullptr) -> sd::Tensor<float> { auto denoise = [&](const sd::Tensor<float>& x, float sigma, int step) -> sd::guidance::GuiderOutput {
if (step == 1 || step == -1) { if (step == 1 || step == -1) {
pretty_progress(0, (int)steps, 0); pretty_progress(0, (int)steps, 0);
} }
@ -1913,7 +1920,6 @@ public:
} }
if (cache_runtime.spectrum_enabled && cache_runtime.spectrum.should_predict()) { if (cache_runtime.spectrum_enabled && cache_runtime.spectrum.should_predict()) {
if (out_uncond_denoised == nullptr) {
cache_runtime.spectrum.predict(&denoised); cache_runtime.spectrum.predict(&denoised);
if (!denoise_mask.empty()) { if (!denoise_mask.empty()) {
denoised = denoised * denoise_mask + init_latent * (1.0f - denoise_mask); denoised = denoised * denoise_mask + init_latent * (1.0f - denoise_mask);
@ -1922,8 +1928,9 @@ public:
preview_image(step, denoised, version, preview.mode, preview.callback, preview.data, false); preview_image(step, denoised, version, preview.mode, preview.callback, preview.data, false);
} }
report_sample_progress(step, steps, t0); report_sample_progress(step, steps, t0);
return denoised; sd::guidance::GuiderOutput output;
} output.pred = denoised;
return output;
} }
if (sd_should_preview_noisy() && preview.callback != nullptr) { if (sd_should_preview_noisy() && preview.callback != nullptr) {
@ -1933,7 +1940,6 @@ public:
sd::Tensor<float> cond_out; sd::Tensor<float> cond_out;
sd::Tensor<float> uncond_out; sd::Tensor<float> uncond_out;
sd::Tensor<float> img_cond_out; sd::Tensor<float> img_cond_out;
sd::Tensor<float> skip_cond_out;
sd_sample::SampleStepCacheDispatcher step_cache(cache_runtime, step, sigma); sd_sample::SampleStepCacheDispatcher step_cache(cache_runtime, step, sigma);
std::vector<sd::Tensor<float>> controls; std::vector<sd::Tensor<float>> controls;
DiffusionParams diffusion_params; DiffusionParams diffusion_params;
@ -2023,42 +2029,40 @@ public:
return {}; return {};
} }
} }
bool is_skiplayer_step = has_skiplayer && sd::guidance::GuidanceInput guidance_input;
step > (int)(guidance.slg.layer_start * static_cast<int>(sigmas.size())) && guidance_input.step = step;
step < (int)(guidance.slg.layer_end * static_cast<int>(sigmas.size())); guidance_input.schedule_size = sigmas.size();
if (is_skiplayer_step) { guidance_input.pred_cond = &cond_out;
LOG_DEBUG("Skipping layers at step %d\n", step); guidance_input.pred_uncond = uncond_out.empty() ? nullptr : &uncond_out;
if (!step_cache.is_step_skipped()) { guidance_input.pred_img_cond = img_cond_out.empty() ? nullptr : &img_cond_out;
skip_cond_out = run_condition(cond,
cond.c_concat.empty() ? nullptr : &cond.c_concat, sd::guidance::GuiderOutput guided = classifier_free_guidance.forward(guidance_input, {});
&skip_layers); if (guided.pred.empty()) {
if (skip_cond_out.empty()) {
return {}; return {};
} }
if (skip_layer_guidance.is_enabled_for_step(guidance_input)) {
LOG_DEBUG("Skipping layers at step %d\n", step);
if (!step_cache.is_step_skipped()) {
guidance_input.predict_skip_layer = [&]() -> sd::Tensor<float> {
return run_condition(cond,
cond.c_concat.empty() ? nullptr : &cond.c_concat,
&skip_layer_guidance.layers());
};
} }
} }
GGML_ASSERT(!cond_out.empty()); guided = skip_layer_guidance.forward(guidance_input, std::move(guided));
sd::Tensor<float> latent_result = cond_out; if (guided.pred.empty()) {
if (!uncond_out.empty()) { return {};
if (!img_cond_out.empty()) {
latent_result = uncond_out +
img_cfg_scale * (img_cond_out - uncond_out) +
cfg_scale * (cond_out - img_cond_out);
} else {
latent_result = uncond_out + cfg_scale * (cond_out - uncond_out);
}
} else if (!img_cond_out.empty()) {
latent_result = img_cond_out + cfg_scale * (cond_out - img_cond_out);
} }
if (is_skiplayer_step && !skip_cond_out.empty()) { denoised = guided.pred * c_out + x * c_skip;
latent_result += (cond_out - skip_cond_out) * slg_scale; sd::guidance::GuiderOutput output;
} output.pred = denoised;
denoised = latent_result * c_out + x * c_skip; if (needs_uncond_denoised) {
if (out_uncond_denoised != nullptr) { const sd::Tensor<float>& base_uncond = !uncond_out.empty() ? uncond_out : cond_out;
sd::Tensor<float> base_uncond = !uncond_out.empty() ? uncond_out : cond_out; output.pred_uncond = base_uncond * c_out + x * c_skip;
*out_uncond_denoised = base_uncond * c_out + x * c_skip;
} }
if (cache_runtime.spectrum_enabled) { if (cache_runtime.spectrum_enabled) {
cache_runtime.spectrum.update(denoised); cache_runtime.spectrum.update(denoised);
@ -2070,7 +2074,8 @@ public:
preview_image(step, denoised, version, preview.mode, preview.callback, preview.data, false); preview_image(step, denoised, version, preview.mode, preview.callback, preview.data, false);
} }
report_sample_progress(step, steps, t0); report_sample_progress(step, steps, t0);
return denoised; output.pred = denoised;
return output;
}; };
auto x0_opt = sample_k_diffusion(method, denoise, x_t, sigmas, sampler_rng, eta, is_flow_denoiser, extra_sample_args); auto x0_opt = sample_k_diffusion(method, denoise, x_t, sigmas, sampler_rng, eta, is_flow_denoiser, extra_sample_args);