mirror of
https://github.com/leejet/stable-diffusion.cpp.git
synced 2026-06-09 15:56:39 +00:00
refactor: split guidance composition (#1506)
This commit is contained in:
parent
e43b24cf48
commit
50134e51dd
153
src/denoiser.hpp
153
src/denoiser.hpp
@ -4,11 +4,13 @@
|
||||
#include <algorithm>
|
||||
#include <cctype>
|
||||
#include <cmath>
|
||||
#include <functional>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
|
||||
#include "ggml_extend.hpp"
|
||||
#include "gits_noise.inl"
|
||||
#include "guidance.h"
|
||||
#include "tensor.hpp"
|
||||
|
||||
/*================================================= CompVisDenoiser ==================================================*/
|
||||
@ -894,7 +896,7 @@ struct Flux2FlowDenoiser : public FluxFlowDenoiser {
|
||||
}
|
||||
};
|
||||
|
||||
typedef std::function<sd::Tensor<float>(const sd::Tensor<float>&, float, int, sd::Tensor<float>*)> denoise_cb_t;
|
||||
typedef std::function<sd::guidance::GuiderOutput(const sd::Tensor<float>&, float, int)> denoise_cb_t;
|
||||
|
||||
static std::pair<float, float> get_ancestral_step(float sigma_from,
|
||||
float sigma_to,
|
||||
@ -972,11 +974,11 @@ static sd::Tensor<float> sample_euler_ancestral(denoise_cb_t model,
|
||||
for (int i = 0; i < steps; i++) {
|
||||
float sigma = sigmas[i];
|
||||
float sigma_to = sigmas[i + 1];
|
||||
auto denoised_opt = model(x, sigma, i + 1, nullptr);
|
||||
if (denoised_opt.empty()) {
|
||||
auto denoised_opt = model(x, sigma, i + 1);
|
||||
if (denoised_opt.pred.empty()) {
|
||||
return {};
|
||||
}
|
||||
sd::Tensor<float> denoised = std::move(denoised_opt);
|
||||
sd::Tensor<float> denoised = std::move(denoised_opt.pred);
|
||||
if (sigma_to == 0.f) {
|
||||
x = denoised;
|
||||
} else if (eta == 0.f) {
|
||||
@ -1003,11 +1005,11 @@ static sd::Tensor<float> sample_euler(denoise_cb_t model,
|
||||
int steps = static_cast<int>(sigmas.size()) - 1;
|
||||
for (int i = 0; i < steps; i++) {
|
||||
float sigma = sigmas[i];
|
||||
auto denoised_opt = model(x, sigma, i + 1, nullptr);
|
||||
if (denoised_opt.empty()) {
|
||||
auto denoised_opt = model(x, sigma, i + 1);
|
||||
if (denoised_opt.pred.empty()) {
|
||||
return {};
|
||||
}
|
||||
sd::Tensor<float> denoised = std::move(denoised_opt);
|
||||
sd::Tensor<float> denoised = std::move(denoised_opt.pred);
|
||||
sd::Tensor<float> d = (x - denoised) / sigma;
|
||||
x += d * (sigmas[i + 1] - sigma);
|
||||
}
|
||||
@ -1019,22 +1021,22 @@ static sd::Tensor<float> sample_heun(denoise_cb_t model,
|
||||
const std::vector<float>& sigmas) {
|
||||
int steps = static_cast<int>(sigmas.size()) - 1;
|
||||
for (int i = 0; i < steps; i++) {
|
||||
auto denoised_opt = model(x, sigmas[i], -(i + 1), nullptr);
|
||||
if (denoised_opt.empty()) {
|
||||
auto denoised_opt = model(x, sigmas[i], -(i + 1));
|
||||
if (denoised_opt.pred.empty()) {
|
||||
return {};
|
||||
}
|
||||
sd::Tensor<float> denoised = std::move(denoised_opt);
|
||||
sd::Tensor<float> denoised = std::move(denoised_opt.pred);
|
||||
sd::Tensor<float> d = (x - denoised) / sigmas[i];
|
||||
float dt = sigmas[i + 1] - sigmas[i];
|
||||
if (sigmas[i + 1] == 0) {
|
||||
x += d * dt;
|
||||
} else {
|
||||
sd::Tensor<float> x2 = x + d * dt;
|
||||
auto denoised2_opt = model(x2, sigmas[i + 1], i + 1, nullptr);
|
||||
if (denoised2_opt.empty()) {
|
||||
auto denoised2_opt = model(x2, sigmas[i + 1], i + 1);
|
||||
if (denoised2_opt.pred.empty()) {
|
||||
return {};
|
||||
}
|
||||
sd::Tensor<float> denoised2 = std::move(denoised2_opt);
|
||||
sd::Tensor<float> denoised2 = std::move(denoised2_opt.pred);
|
||||
d = (d + (x2 - denoised2) / sigmas[i + 1]) / 2.0f;
|
||||
x += d * dt;
|
||||
}
|
||||
@ -1047,11 +1049,11 @@ static sd::Tensor<float> sample_dpm2(denoise_cb_t model,
|
||||
const std::vector<float>& sigmas) {
|
||||
int steps = static_cast<int>(sigmas.size()) - 1;
|
||||
for (int i = 0; i < steps; i++) {
|
||||
auto denoised_opt = model(x, sigmas[i], -(i + 1), nullptr);
|
||||
if (denoised_opt.empty()) {
|
||||
auto denoised_opt = model(x, sigmas[i], -(i + 1));
|
||||
if (denoised_opt.pred.empty()) {
|
||||
return {};
|
||||
}
|
||||
sd::Tensor<float> denoised = std::move(denoised_opt);
|
||||
sd::Tensor<float> denoised = std::move(denoised_opt.pred);
|
||||
sd::Tensor<float> d = (x - denoised) / sigmas[i];
|
||||
if (sigmas[i + 1] == 0) {
|
||||
x += d * (sigmas[i + 1] - sigmas[i]);
|
||||
@ -1060,11 +1062,11 @@ static sd::Tensor<float> sample_dpm2(denoise_cb_t model,
|
||||
float dt_1 = sigma_mid - sigmas[i];
|
||||
float dt_2 = sigmas[i + 1] - sigmas[i];
|
||||
sd::Tensor<float> x2 = x + d * dt_1;
|
||||
auto denoised2_opt = model(x2, sigma_mid, i + 1, nullptr);
|
||||
if (denoised2_opt.empty()) {
|
||||
auto denoised2_opt = model(x2, sigma_mid, i + 1);
|
||||
if (denoised2_opt.pred.empty()) {
|
||||
return {};
|
||||
}
|
||||
sd::Tensor<float> denoised2 = std::move(denoised2_opt);
|
||||
sd::Tensor<float> denoised2 = std::move(denoised2_opt.pred);
|
||||
x += ((x2 - denoised2) / sigma_mid) * dt_2;
|
||||
}
|
||||
}
|
||||
@ -1081,11 +1083,11 @@ static sd::Tensor<float> sample_dpmpp_2s_ancestral(denoise_cb_t model,
|
||||
|
||||
int steps = static_cast<int>(sigmas.size()) - 1;
|
||||
for (int i = 0; i < steps; i++) {
|
||||
auto denoised_opt = model(x, sigmas[i], -(i + 1), nullptr);
|
||||
if (denoised_opt.empty()) {
|
||||
auto denoised_opt = model(x, sigmas[i], -(i + 1));
|
||||
if (denoised_opt.pred.empty()) {
|
||||
return {};
|
||||
}
|
||||
sd::Tensor<float> denoised = std::move(denoised_opt);
|
||||
sd::Tensor<float> denoised = std::move(denoised_opt.pred);
|
||||
auto [sigma_down, sigma_up] = get_ancestral_step(sigmas[i], sigmas[i + 1], eta);
|
||||
|
||||
if (sigma_down == 0) {
|
||||
@ -1097,11 +1099,11 @@ static sd::Tensor<float> sample_dpmpp_2s_ancestral(denoise_cb_t model,
|
||||
float s = t + 0.5f * h;
|
||||
float sigma_s = sigma_fn(s);
|
||||
sd::Tensor<float> x2 = (sigma_s / sigma_fn(t)) * x - (exp(-h * 0.5f) - 1) * denoised;
|
||||
auto denoised2_opt = model(x2, sigma_s, i + 1, nullptr);
|
||||
if (denoised2_opt.empty()) {
|
||||
auto denoised2_opt = model(x2, sigma_s, i + 1);
|
||||
if (denoised2_opt.pred.empty()) {
|
||||
return {};
|
||||
}
|
||||
sd::Tensor<float> denoised2 = std::move(denoised2_opt);
|
||||
sd::Tensor<float> denoised2 = std::move(denoised2_opt.pred);
|
||||
x = (sigma_fn(t_next) / sigma_fn(t)) * x - (exp(-h) - 1) * denoised2;
|
||||
}
|
||||
|
||||
@ -1124,11 +1126,11 @@ static sd::Tensor<float> sample_dpmpp_2s_ancestral_flow(denoise_cb_t model,
|
||||
|
||||
bool opt_first_step = (1.0 - sigma < 1e-6);
|
||||
|
||||
auto denoised_opt = model(x, sigma, (opt_first_step ? 1 : -1) * (i + 1), nullptr);
|
||||
if (denoised_opt.empty()) {
|
||||
auto denoised_opt = model(x, sigma, (opt_first_step ? 1 : -1) * (i + 1));
|
||||
if (denoised_opt.pred.empty()) {
|
||||
return {};
|
||||
}
|
||||
sd::Tensor<float> denoised = std::move(denoised_opt);
|
||||
sd::Tensor<float> denoised = std::move(denoised_opt.pred);
|
||||
|
||||
if (sigma_to == 0.0f) {
|
||||
// Euler method (final step, no noise)
|
||||
@ -1153,8 +1155,8 @@ static sd::Tensor<float> sample_dpmpp_2s_ancestral_flow(denoise_cb_t model,
|
||||
// so sigma_s = 1 = sigma, and sigma_s_i_ratio = sigma_s / sigma = 1
|
||||
// u = (x*sigma_s_i_ratio)+(denoised*(1.0f-sigma_s_i_ratio))
|
||||
// = (x*1)+(denoised*0) = x
|
||||
// so D_i = model(u, sigma_s, i + 1, nullptr)
|
||||
// = model(x, sigma, i + 1, nullptr)
|
||||
// so D_i = model(u, sigma_s, i + 1)
|
||||
// = model(x, sigma, i + 1)
|
||||
// = denoised
|
||||
D_i = denoised;
|
||||
|
||||
@ -1187,11 +1189,11 @@ static sd::Tensor<float> sample_dpmpp_2s_ancestral_flow(denoise_cb_t model,
|
||||
float sigma_s_i_ratio = sigma_s / sigma;
|
||||
sd::Tensor<float> u = (x * sigma_s_i_ratio) + (denoised * (1.0f - sigma_s_i_ratio));
|
||||
|
||||
auto denoised2_opt = model(u, sigma_s, i + 1, nullptr);
|
||||
if (denoised2_opt.empty()) {
|
||||
auto denoised2_opt = model(u, sigma_s, i + 1);
|
||||
if (denoised2_opt.pred.empty()) {
|
||||
return {};
|
||||
}
|
||||
D_i = std::move(denoised2_opt);
|
||||
D_i = std::move(denoised2_opt.pred);
|
||||
}
|
||||
|
||||
float sigma_down_i_ratio = sigma_down / sigma;
|
||||
@ -1214,11 +1216,11 @@ static sd::Tensor<float> sample_dpmpp_2m(denoise_cb_t model,
|
||||
|
||||
int steps = static_cast<int>(sigmas.size()) - 1;
|
||||
for (int i = 0; i < steps; i++) {
|
||||
auto denoised_opt = model(x, sigmas[i], i + 1, nullptr);
|
||||
if (denoised_opt.empty()) {
|
||||
auto denoised_opt = model(x, sigmas[i], i + 1);
|
||||
if (denoised_opt.pred.empty()) {
|
||||
return {};
|
||||
}
|
||||
sd::Tensor<float> denoised = std::move(denoised_opt);
|
||||
sd::Tensor<float> denoised = std::move(denoised_opt.pred);
|
||||
float t = t_fn(sigmas[i]);
|
||||
float t_next = t_fn(sigmas[i + 1]);
|
||||
float h = t_next - t;
|
||||
@ -1246,11 +1248,11 @@ static sd::Tensor<float> sample_dpmpp_2m_v2(denoise_cb_t model,
|
||||
|
||||
int steps = static_cast<int>(sigmas.size()) - 1;
|
||||
for (int i = 0; i < steps; i++) {
|
||||
auto denoised_opt = model(x, sigmas[i], i + 1, nullptr);
|
||||
if (denoised_opt.empty()) {
|
||||
auto denoised_opt = model(x, sigmas[i], i + 1);
|
||||
if (denoised_opt.pred.empty()) {
|
||||
return {};
|
||||
}
|
||||
sd::Tensor<float> denoised = std::move(denoised_opt);
|
||||
sd::Tensor<float> denoised = std::move(denoised_opt.pred);
|
||||
float t = t_fn(sigmas[i]);
|
||||
float t_next = t_fn(sigmas[i + 1]);
|
||||
float h = t_next - t;
|
||||
@ -1354,11 +1356,11 @@ static sd::Tensor<float> sample_lcm(denoise_cb_t model,
|
||||
|
||||
int steps = static_cast<int>(sigmas.size()) - 1;
|
||||
for (int i = 0; i < steps; i++) {
|
||||
auto denoised_opt = model(x, sigmas[i], i + 1, nullptr);
|
||||
if (denoised_opt.empty()) {
|
||||
auto denoised_opt = model(x, sigmas[i], i + 1);
|
||||
if (denoised_opt.pred.empty()) {
|
||||
return {};
|
||||
}
|
||||
x = std::move(denoised_opt);
|
||||
x = std::move(denoised_opt.pred);
|
||||
if (sigmas[i + 1] > 0) {
|
||||
if (is_flow_denoiser) {
|
||||
x *= (1 - sigmas[i + 1]);
|
||||
@ -1400,11 +1402,11 @@ static sd::Tensor<float> sample_ipndm(denoise_cb_t model,
|
||||
float sigma = sigmas[i];
|
||||
float sigma_next = sigmas[i + 1];
|
||||
|
||||
auto denoised_opt = model(x, sigma, i + 1, nullptr);
|
||||
if (denoised_opt.empty()) {
|
||||
auto denoised_opt = model(x, sigma, i + 1);
|
||||
if (denoised_opt.pred.empty()) {
|
||||
return {};
|
||||
}
|
||||
sd::Tensor<float> denoised = std::move(denoised_opt);
|
||||
sd::Tensor<float> denoised = std::move(denoised_opt.pred);
|
||||
|
||||
sd::Tensor<float> d_cur = (x - denoised) / sigma;
|
||||
int order = std::min(max_order, i + 1);
|
||||
@ -1444,11 +1446,11 @@ static sd::Tensor<float> sample_ipndm_v(denoise_cb_t model,
|
||||
float sigma = sigmas[i];
|
||||
float t_next = sigmas[i + 1];
|
||||
|
||||
auto denoised_opt = model(x, sigma, i + 1, nullptr);
|
||||
if (denoised_opt.empty()) {
|
||||
auto denoised_opt = model(x, sigma, i + 1);
|
||||
if (denoised_opt.pred.empty()) {
|
||||
return {};
|
||||
}
|
||||
sd::Tensor<float> denoised = std::move(denoised_opt);
|
||||
sd::Tensor<float> denoised = std::move(denoised_opt.pred);
|
||||
|
||||
sd::Tensor<float> d_cur = (x - denoised) / sigma;
|
||||
int order = std::min(max_order, i + 1);
|
||||
@ -1506,11 +1508,11 @@ static sd::Tensor<float> sample_res_multistep(denoise_cb_t model,
|
||||
|
||||
int steps = static_cast<int>(sigmas.size()) - 1;
|
||||
for (int i = 0; i < steps; i++) {
|
||||
auto denoised_opt = model(x, sigmas[i], i + 1, nullptr);
|
||||
if (denoised_opt.empty()) {
|
||||
auto denoised_opt = model(x, sigmas[i], i + 1);
|
||||
if (denoised_opt.pred.empty()) {
|
||||
return {};
|
||||
}
|
||||
sd::Tensor<float> denoised = std::move(denoised_opt);
|
||||
sd::Tensor<float> denoised = std::move(denoised_opt.pred);
|
||||
|
||||
float sigma_from = sigmas[i];
|
||||
float sigma_to = sigmas[i + 1];
|
||||
@ -1583,11 +1585,11 @@ static sd::Tensor<float> sample_res_2s(denoise_cb_t model,
|
||||
float sigma_from = sigmas[i];
|
||||
float sigma_to = sigmas[i + 1];
|
||||
|
||||
auto denoised_opt = model(x, sigma_from, -(i + 1), nullptr);
|
||||
if (denoised_opt.empty()) {
|
||||
auto denoised_opt = model(x, sigma_from, -(i + 1));
|
||||
if (denoised_opt.pred.empty()) {
|
||||
return {};
|
||||
}
|
||||
sd::Tensor<float> denoised = std::move(denoised_opt);
|
||||
sd::Tensor<float> denoised = std::move(denoised_opt.pred);
|
||||
|
||||
auto [sigma_down, sigma_up, alpha_scale] = get_ancestral_step(sigma_from, sigma_to, eta, is_flow_denoiser);
|
||||
|
||||
@ -1609,11 +1611,11 @@ static sd::Tensor<float> sample_res_2s(denoise_cb_t model,
|
||||
sd::Tensor<float> eps1 = denoised - x0;
|
||||
sd::Tensor<float> x2 = x0 + eps1 * (h * a21);
|
||||
|
||||
auto denoised2_opt = model(x2, sigma_c2, i + 1, nullptr);
|
||||
if (denoised2_opt.empty()) {
|
||||
auto denoised2_opt = model(x2, sigma_c2, i + 1);
|
||||
if (denoised2_opt.pred.empty()) {
|
||||
return {};
|
||||
}
|
||||
sd::Tensor<float> denoised2 = std::move(denoised2_opt);
|
||||
sd::Tensor<float> denoised2 = std::move(denoised2_opt.pred);
|
||||
sd::Tensor<float> eps2 = denoised2 - x0;
|
||||
x = x0 + h * (b1 * eps1 + b2 * eps2);
|
||||
}
|
||||
@ -1686,10 +1688,11 @@ static sd::Tensor<float> sample_er_sde(denoise_cb_t model,
|
||||
|
||||
int steps = static_cast<int>(sigmas.size()) - 1;
|
||||
for (int i = 0; i < steps; i++) {
|
||||
sd::Tensor<float> denoised = model(x, sigmas[i], i + 1, nullptr);
|
||||
if (denoised.empty()) {
|
||||
auto denoised_opt = model(x, sigmas[i], i + 1);
|
||||
if (denoised_opt.pred.empty()) {
|
||||
return {};
|
||||
}
|
||||
sd::Tensor<float> denoised = std::move(denoised_opt.pred);
|
||||
|
||||
int stage_used = std::min(max_stage, i + 1);
|
||||
|
||||
@ -1804,11 +1807,11 @@ static sd::Tensor<float> sample_tcd(denoise_cb_t model,
|
||||
int timestep_s = (int)floor((1 - eta) * prev_timestep);
|
||||
float sigma = sigmas[i];
|
||||
|
||||
auto denoised_opt = model(x, sigma, i + 1, nullptr);
|
||||
if (denoised_opt.empty()) {
|
||||
auto denoised_opt = model(x, sigma, i + 1);
|
||||
if (denoised_opt.pred.empty()) {
|
||||
return {};
|
||||
}
|
||||
sd::Tensor<float> denoised = std::move(denoised_opt);
|
||||
sd::Tensor<float> denoised = std::move(denoised_opt.pred);
|
||||
sd::Tensor<float> d = (x - denoised) / sigma;
|
||||
|
||||
float alpha_prod_t = 1.0f / (sigma * sigma + 1.0f);
|
||||
@ -1833,16 +1836,15 @@ static sd::Tensor<float> sample_euler_cfg_pp(denoise_cb_t model,
|
||||
const std::vector<float>& sigmas) {
|
||||
int steps = static_cast<int>(sigmas.size()) - 1;
|
||||
for (int i = 0; i < steps; i++) {
|
||||
float sigma = sigmas[i];
|
||||
sd::Tensor<float> uncond_denoised;
|
||||
|
||||
auto denoised_opt = model(x, sigma, i + 1, &uncond_denoised);
|
||||
if (denoised_opt.empty() || uncond_denoised.empty()) {
|
||||
float sigma = sigmas[i];
|
||||
auto denoised_opt = model(x, sigma, i + 1);
|
||||
if (denoised_opt.pred.empty() || denoised_opt.pred_uncond.empty()) {
|
||||
return {};
|
||||
}
|
||||
|
||||
sd::Tensor<float> denoised = std::move(denoised_opt);
|
||||
sd::Tensor<float> d = (x - uncond_denoised) / sigma;
|
||||
sd::Tensor<float> denoised = std::move(denoised_opt.pred);
|
||||
sd::Tensor<float> uncond_denoised = std::move(denoised_opt.pred_uncond);
|
||||
sd::Tensor<float> d = (x - uncond_denoised) / sigma;
|
||||
|
||||
x = denoised + d * sigmas[i + 1];
|
||||
}
|
||||
@ -1856,16 +1858,15 @@ static sd::Tensor<float> sample_euler_ancestral_cfg_pp(denoise_cb_t model,
|
||||
float eta) {
|
||||
int steps = static_cast<int>(sigmas.size()) - 1;
|
||||
for (int i = 0; i < steps; i++) {
|
||||
float sigma = sigmas[i];
|
||||
sd::Tensor<float> uncond_denoised;
|
||||
|
||||
auto denoised_opt = model(x, sigma, i + 1, &uncond_denoised);
|
||||
if (denoised_opt.empty() || uncond_denoised.empty()) {
|
||||
float sigma = sigmas[i];
|
||||
auto denoised_opt = model(x, sigma, i + 1);
|
||||
if (denoised_opt.pred.empty() || denoised_opt.pred_uncond.empty()) {
|
||||
return {};
|
||||
}
|
||||
|
||||
sd::Tensor<float> denoised = std::move(denoised_opt);
|
||||
sd::Tensor<float> d = (x - uncond_denoised) / sigma;
|
||||
sd::Tensor<float> denoised = std::move(denoised_opt.pred);
|
||||
sd::Tensor<float> uncond_denoised = std::move(denoised_opt.pred_uncond);
|
||||
sd::Tensor<float> d = (x - uncond_denoised) / sigma;
|
||||
|
||||
auto [sigma_down, sigma_up] = get_ancestral_step(sigmas[i], sigmas[i + 1], eta);
|
||||
|
||||
|
||||
89
src/guidance.cpp
Normal file
89
src/guidance.cpp
Normal file
@ -0,0 +1,89 @@
|
||||
#include "guidance.h"
|
||||
|
||||
#include <utility>
|
||||
|
||||
namespace sd::guidance {
|
||||
|
||||
static bool has_tensor(const sd::Tensor<float>* tensor) {
|
||||
return tensor != nullptr && !tensor->empty();
|
||||
}
|
||||
|
||||
ClassifierFreeGuidance::ClassifierFreeGuidance(float guidance_scale,
|
||||
float image_guidance_scale)
|
||||
: guidance_scale_(guidance_scale),
|
||||
image_guidance_scale_(image_guidance_scale) {
|
||||
}
|
||||
|
||||
GuiderOutput ClassifierFreeGuidance::forward(const GuidanceInput& input,
|
||||
GuiderOutput previous) const {
|
||||
(void)previous;
|
||||
|
||||
GuiderOutput output;
|
||||
if (!has_tensor(input.pred_cond)) {
|
||||
return output;
|
||||
}
|
||||
|
||||
const sd::Tensor<float>& pred_cond = *input.pred_cond;
|
||||
output.pred = pred_cond;
|
||||
if (has_tensor(input.pred_uncond)) {
|
||||
const sd::Tensor<float>& pred_uncond = *input.pred_uncond;
|
||||
if (has_tensor(input.pred_img_cond)) {
|
||||
const sd::Tensor<float>& pred_img_cond = *input.pred_img_cond;
|
||||
output.pred = pred_uncond +
|
||||
image_guidance_scale_ * (pred_img_cond - pred_uncond) +
|
||||
guidance_scale_ * (pred_cond - pred_img_cond);
|
||||
} else {
|
||||
output.pred = pred_uncond + guidance_scale_ * (pred_cond - pred_uncond);
|
||||
}
|
||||
} else if (has_tensor(input.pred_img_cond)) {
|
||||
const sd::Tensor<float>& pred_img_cond = *input.pred_img_cond;
|
||||
output.pred = pred_img_cond + guidance_scale_ * (pred_cond - pred_img_cond);
|
||||
}
|
||||
|
||||
return output;
|
||||
}
|
||||
|
||||
SkipLayerGuidance::SkipLayerGuidance(std::vector<int> layers,
|
||||
float scale,
|
||||
float start,
|
||||
float stop)
|
||||
: layers_(std::move(layers)),
|
||||
scale_(scale),
|
||||
start_(start),
|
||||
stop_(stop) {
|
||||
}
|
||||
|
||||
bool SkipLayerGuidance::is_enabled_for_step(const GuidanceInput& input) const {
|
||||
if (scale_ == 0.0f || layers_.empty() || input.schedule_size == 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
int start_step = static_cast<int>(start_ * static_cast<float>(input.schedule_size));
|
||||
int stop_step = static_cast<int>(stop_ * static_cast<float>(input.schedule_size));
|
||||
return input.step > start_step && input.step < stop_step;
|
||||
}
|
||||
|
||||
const std::vector<int>& SkipLayerGuidance::layers() const {
|
||||
return layers_;
|
||||
}
|
||||
|
||||
GuiderOutput SkipLayerGuidance::forward(const GuidanceInput& input,
|
||||
GuiderOutput output) const {
|
||||
if (!is_enabled_for_step(input) || !input.predict_skip_layer) {
|
||||
return output;
|
||||
}
|
||||
|
||||
if (output.pred.empty() || !has_tensor(input.pred_cond)) {
|
||||
return GuiderOutput();
|
||||
}
|
||||
|
||||
output.pred_skip_layer = input.predict_skip_layer();
|
||||
if (output.pred_skip_layer.empty()) {
|
||||
return GuiderOutput();
|
||||
}
|
||||
|
||||
output.pred += (*input.pred_cond - output.pred_skip_layer) * scale_;
|
||||
return output;
|
||||
}
|
||||
|
||||
} // namespace sd::guidance
|
||||
70
src/guidance.h
Normal file
70
src/guidance.h
Normal file
@ -0,0 +1,70 @@
|
||||
#ifndef __SD_GUIDANCE_H__
|
||||
#define __SD_GUIDANCE_H__
|
||||
|
||||
#include <cstddef>
|
||||
#include <functional>
|
||||
#include <vector>
|
||||
|
||||
#include "tensor.hpp"
|
||||
|
||||
namespace sd::guidance {
|
||||
|
||||
struct GuiderOutput {
|
||||
sd::Tensor<float> pred;
|
||||
sd::Tensor<float> pred_cond;
|
||||
sd::Tensor<float> pred_uncond;
|
||||
sd::Tensor<float> pred_img_cond;
|
||||
sd::Tensor<float> pred_skip_layer;
|
||||
};
|
||||
|
||||
struct GuidanceInput {
|
||||
int step = 0;
|
||||
size_t schedule_size = 0;
|
||||
const sd::Tensor<float>* pred_cond = nullptr;
|
||||
const sd::Tensor<float>* pred_uncond = nullptr;
|
||||
const sd::Tensor<float>* pred_img_cond = nullptr;
|
||||
|
||||
std::function<sd::Tensor<float>()> predict_skip_layer;
|
||||
};
|
||||
|
||||
class BaseGuidance {
|
||||
public:
|
||||
virtual ~BaseGuidance() = default;
|
||||
virtual GuiderOutput forward(const GuidanceInput& input,
|
||||
GuiderOutput previous) const = 0;
|
||||
};
|
||||
|
||||
class ClassifierFreeGuidance : public BaseGuidance {
|
||||
float guidance_scale_ = 1.0f;
|
||||
float image_guidance_scale_ = 1.0f;
|
||||
|
||||
public:
|
||||
ClassifierFreeGuidance(float guidance_scale,
|
||||
float image_guidance_scale);
|
||||
|
||||
GuiderOutput forward(const GuidanceInput& input,
|
||||
GuiderOutput previous) const override;
|
||||
};
|
||||
|
||||
class SkipLayerGuidance : public BaseGuidance {
|
||||
std::vector<int> layers_;
|
||||
float scale_ = 0.0f;
|
||||
float start_ = 0.0f;
|
||||
float stop_ = 1.0f;
|
||||
|
||||
public:
|
||||
SkipLayerGuidance(std::vector<int> layers,
|
||||
float scale,
|
||||
float start,
|
||||
float stop);
|
||||
|
||||
bool is_enabled_for_step(const GuidanceInput& input) const;
|
||||
const std::vector<int>& layers() const;
|
||||
|
||||
GuiderOutput forward(const GuidanceInput& input,
|
||||
GuiderOutput previous) const override;
|
||||
};
|
||||
|
||||
} // namespace sd::guidance
|
||||
|
||||
#endif // __SD_GUIDANCE_H__
|
||||
@ -14,6 +14,7 @@
|
||||
#include "denoiser.hpp"
|
||||
#include "diffusion_model.hpp"
|
||||
#include "esrgan.hpp"
|
||||
#include "guidance.h"
|
||||
#include "lora.hpp"
|
||||
#include "ltx_audio_vae.h"
|
||||
#include "ltx_vae.hpp"
|
||||
@ -1854,8 +1855,9 @@ public:
|
||||
denoiser.get(),
|
||||
sigmas);
|
||||
|
||||
bool needs_uncond_denoised = method == EULER_CFG_PP_SAMPLE_METHOD || method == EULER_A_CFG_PP_SAMPLE_METHOD;
|
||||
// Spectrum cache is not supported for CFG++ samplers
|
||||
if (method == EULER_CFG_PP_SAMPLE_METHOD || method == EULER_A_CFG_PP_SAMPLE_METHOD) {
|
||||
if (needs_uncond_denoised) {
|
||||
if (cache_runtime.spectrum_enabled) {
|
||||
LOG_WARN("Spectrum cache requested but not supported for CFG++ samplers");
|
||||
cache_runtime.spectrum_enabled = false;
|
||||
@ -1868,6 +1870,11 @@ public:
|
||||
has_skiplayer = false;
|
||||
LOG_WARN("SLG is incompatible with this model type");
|
||||
}
|
||||
sd::guidance::ClassifierFreeGuidance classifier_free_guidance(cfg_scale, img_cfg_scale);
|
||||
sd::guidance::SkipLayerGuidance skip_layer_guidance(has_skiplayer ? skip_layers : std::vector<int>(),
|
||||
has_skiplayer ? slg_scale : 0.0f,
|
||||
guidance.slg.layer_start,
|
||||
guidance.slg.layer_end);
|
||||
|
||||
if (version == VERSION_HIDREAM_O1 && !noise.empty()) {
|
||||
noise *= eta;
|
||||
@ -1880,7 +1887,7 @@ public:
|
||||
sd::Tensor<float> denoised = x_t;
|
||||
SamplePreviewContext preview = prepare_sample_preview_context();
|
||||
|
||||
auto denoise = [&](const sd::Tensor<float>& x, float sigma, int step, sd::Tensor<float>* out_uncond_denoised = nullptr) -> sd::Tensor<float> {
|
||||
auto denoise = [&](const sd::Tensor<float>& x, float sigma, int step) -> sd::guidance::GuiderOutput {
|
||||
if (step == 1 || step == -1) {
|
||||
pretty_progress(0, (int)steps, 0);
|
||||
}
|
||||
@ -1913,17 +1920,17 @@ public:
|
||||
}
|
||||
|
||||
if (cache_runtime.spectrum_enabled && cache_runtime.spectrum.should_predict()) {
|
||||
if (out_uncond_denoised == nullptr) {
|
||||
cache_runtime.spectrum.predict(&denoised);
|
||||
if (!denoise_mask.empty()) {
|
||||
denoised = denoised * denoise_mask + init_latent * (1.0f - denoise_mask);
|
||||
}
|
||||
if (sd_should_preview_denoised() && preview.callback != nullptr) {
|
||||
preview_image(step, denoised, version, preview.mode, preview.callback, preview.data, false);
|
||||
}
|
||||
report_sample_progress(step, steps, t0);
|
||||
return denoised;
|
||||
cache_runtime.spectrum.predict(&denoised);
|
||||
if (!denoise_mask.empty()) {
|
||||
denoised = denoised * denoise_mask + init_latent * (1.0f - denoise_mask);
|
||||
}
|
||||
if (sd_should_preview_denoised() && preview.callback != nullptr) {
|
||||
preview_image(step, denoised, version, preview.mode, preview.callback, preview.data, false);
|
||||
}
|
||||
report_sample_progress(step, steps, t0);
|
||||
sd::guidance::GuiderOutput output;
|
||||
output.pred = denoised;
|
||||
return output;
|
||||
}
|
||||
|
||||
if (sd_should_preview_noisy() && preview.callback != nullptr) {
|
||||
@ -1933,7 +1940,6 @@ public:
|
||||
sd::Tensor<float> cond_out;
|
||||
sd::Tensor<float> uncond_out;
|
||||
sd::Tensor<float> img_cond_out;
|
||||
sd::Tensor<float> skip_cond_out;
|
||||
sd_sample::SampleStepCacheDispatcher step_cache(cache_runtime, step, sigma);
|
||||
std::vector<sd::Tensor<float>> controls;
|
||||
DiffusionParams diffusion_params;
|
||||
@ -2023,42 +2029,40 @@ public:
|
||||
return {};
|
||||
}
|
||||
}
|
||||
bool is_skiplayer_step = has_skiplayer &&
|
||||
step > (int)(guidance.slg.layer_start * static_cast<int>(sigmas.size())) &&
|
||||
step < (int)(guidance.slg.layer_end * static_cast<int>(sigmas.size()));
|
||||
if (is_skiplayer_step) {
|
||||
sd::guidance::GuidanceInput guidance_input;
|
||||
guidance_input.step = step;
|
||||
guidance_input.schedule_size = sigmas.size();
|
||||
guidance_input.pred_cond = &cond_out;
|
||||
guidance_input.pred_uncond = uncond_out.empty() ? nullptr : &uncond_out;
|
||||
guidance_input.pred_img_cond = img_cond_out.empty() ? nullptr : &img_cond_out;
|
||||
|
||||
sd::guidance::GuiderOutput guided = classifier_free_guidance.forward(guidance_input, {});
|
||||
if (guided.pred.empty()) {
|
||||
return {};
|
||||
}
|
||||
|
||||
if (skip_layer_guidance.is_enabled_for_step(guidance_input)) {
|
||||
LOG_DEBUG("Skipping layers at step %d\n", step);
|
||||
if (!step_cache.is_step_skipped()) {
|
||||
skip_cond_out = run_condition(cond,
|
||||
cond.c_concat.empty() ? nullptr : &cond.c_concat,
|
||||
&skip_layers);
|
||||
if (skip_cond_out.empty()) {
|
||||
return {};
|
||||
}
|
||||
guidance_input.predict_skip_layer = [&]() -> sd::Tensor<float> {
|
||||
return run_condition(cond,
|
||||
cond.c_concat.empty() ? nullptr : &cond.c_concat,
|
||||
&skip_layer_guidance.layers());
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
GGML_ASSERT(!cond_out.empty());
|
||||
sd::Tensor<float> latent_result = cond_out;
|
||||
if (!uncond_out.empty()) {
|
||||
if (!img_cond_out.empty()) {
|
||||
latent_result = uncond_out +
|
||||
img_cfg_scale * (img_cond_out - uncond_out) +
|
||||
cfg_scale * (cond_out - img_cond_out);
|
||||
} else {
|
||||
latent_result = uncond_out + cfg_scale * (cond_out - uncond_out);
|
||||
}
|
||||
} else if (!img_cond_out.empty()) {
|
||||
latent_result = img_cond_out + cfg_scale * (cond_out - img_cond_out);
|
||||
guided = skip_layer_guidance.forward(guidance_input, std::move(guided));
|
||||
if (guided.pred.empty()) {
|
||||
return {};
|
||||
}
|
||||
|
||||
if (is_skiplayer_step && !skip_cond_out.empty()) {
|
||||
latent_result += (cond_out - skip_cond_out) * slg_scale;
|
||||
}
|
||||
denoised = latent_result * c_out + x * c_skip;
|
||||
if (out_uncond_denoised != nullptr) {
|
||||
sd::Tensor<float> base_uncond = !uncond_out.empty() ? uncond_out : cond_out;
|
||||
*out_uncond_denoised = base_uncond * c_out + x * c_skip;
|
||||
denoised = guided.pred * c_out + x * c_skip;
|
||||
sd::guidance::GuiderOutput output;
|
||||
output.pred = denoised;
|
||||
if (needs_uncond_denoised) {
|
||||
const sd::Tensor<float>& base_uncond = !uncond_out.empty() ? uncond_out : cond_out;
|
||||
output.pred_uncond = base_uncond * c_out + x * c_skip;
|
||||
}
|
||||
if (cache_runtime.spectrum_enabled) {
|
||||
cache_runtime.spectrum.update(denoised);
|
||||
@ -2070,7 +2074,8 @@ public:
|
||||
preview_image(step, denoised, version, preview.mode, preview.callback, preview.data, false);
|
||||
}
|
||||
report_sample_progress(step, steps, t0);
|
||||
return denoised;
|
||||
output.pred = denoised;
|
||||
return output;
|
||||
};
|
||||
|
||||
auto x0_opt = sample_k_diffusion(method, denoise, x_t, sigmas, sampler_rng, eta, is_flow_denoiser, extra_sample_args);
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user