refactor: split guidance composition (#1506)

2026-06-09 15:56:39 +00:00 · 2026-05-17 20:20:16 +08:00 · 2026-05-17 20:20:16 +08:00 · 50134e51dd
commit 50134e51dd
parent e43b24cf48
4 changed files with 284 additions and 119 deletions
--- a/src/denoiser.hpp
+++ b/src/denoiser.hpp
@ -4,11 +4,13 @@
 #include <algorithm>
 #include <cctype>
 #include <cmath>
 #include <functional>
 #include <string>
 #include <utility>
 #include "ggml_extend.hpp"
 #include "gits_noise.inl"
 #include "guidance.h"
 #include "tensor.hpp"
 /*================================================= CompVisDenoiser ==================================================*/
@ -894,7 +896,7 @@ struct Flux2FlowDenoiser : public FluxFlowDenoiser {
    }
 };
-typedef std::function<sd::Tensor<float>(const sd::Tensor<float>&, float, int, sd::Tensor<float>*)> denoise_cb_t;
+typedef std::function<sd::guidance::GuiderOutput(const sd::Tensor<float>&, float, int)> denoise_cb_t;
 static std::pair<float, float> get_ancestral_step(float sigma_from,
                                                  float sigma_to,
@ -972,11 +974,11 @@ static sd::Tensor<float> sample_euler_ancestral(denoise_cb_t model,
    for (int i = 0; i < steps; i++) {
        float sigma       = sigmas[i];
        float sigma_to    = sigmas[i + 1];
-        auto denoised_opt = model(x, sigma, i + 1, nullptr);
+        auto denoised_opt = model(x, sigma, i + 1);
-        if (denoised_opt.empty()) {
+        if (denoised_opt.pred.empty()) {
            return {};
        }
-        sd::Tensor<float> denoised = std::move(denoised_opt);
+        sd::Tensor<float> denoised = std::move(denoised_opt.pred);
        if (sigma_to == 0.f) {
            x = denoised;
        } else if (eta == 0.f) {
@ -1003,11 +1005,11 @@ static sd::Tensor<float> sample_euler(denoise_cb_t model,
    int steps = static_cast<int>(sigmas.size()) - 1;
    for (int i = 0; i < steps; i++) {
        float sigma       = sigmas[i];
-        auto denoised_opt = model(x, sigma, i + 1, nullptr);
+        auto denoised_opt = model(x, sigma, i + 1);
-        if (denoised_opt.empty()) {
+        if (denoised_opt.pred.empty()) {
            return {};
        }
-        sd::Tensor<float> denoised = std::move(denoised_opt);
+        sd::Tensor<float> denoised = std::move(denoised_opt.pred);
        sd::Tensor<float> d        = (x - denoised) / sigma;
        x += d * (sigmas[i + 1] - sigma);
    }
@ -1019,22 +1021,22 @@ static sd::Tensor<float> sample_heun(denoise_cb_t model,
                                     const std::vector<float>& sigmas) {
    int steps = static_cast<int>(sigmas.size()) - 1;
    for (int i = 0; i < steps; i++) {
-        auto denoised_opt = model(x, sigmas[i], -(i + 1), nullptr);
+        auto denoised_opt = model(x, sigmas[i], -(i + 1));
-        if (denoised_opt.empty()) {
+        if (denoised_opt.pred.empty()) {
            return {};
        }
-        sd::Tensor<float> denoised = std::move(denoised_opt);
+        sd::Tensor<float> denoised = std::move(denoised_opt.pred);
        sd::Tensor<float> d        = (x - denoised) / sigmas[i];
        float dt                   = sigmas[i + 1] - sigmas[i];
        if (sigmas[i + 1] == 0) {
            x += d * dt;
        } else {
            sd::Tensor<float> x2 = x + d * dt;
-            auto denoised2_opt   = model(x2, sigmas[i + 1], i + 1, nullptr);
+            auto denoised2_opt   = model(x2, sigmas[i + 1], i + 1);
-            if (denoised2_opt.empty()) {
+            if (denoised2_opt.pred.empty()) {
                return {};
            }
-            sd::Tensor<float> denoised2 = std::move(denoised2_opt);
+            sd::Tensor<float> denoised2 = std::move(denoised2_opt.pred);
            d                           = (d + (x2 - denoised2) / sigmas[i + 1]) / 2.0f;
            x += d * dt;
        }
@ -1047,11 +1049,11 @@ static sd::Tensor<float> sample_dpm2(denoise_cb_t model,
                                     const std::vector<float>& sigmas) {
    int steps = static_cast<int>(sigmas.size()) - 1;
    for (int i = 0; i < steps; i++) {
-        auto denoised_opt = model(x, sigmas[i], -(i + 1), nullptr);
+        auto denoised_opt = model(x, sigmas[i], -(i + 1));
-        if (denoised_opt.empty()) {
+        if (denoised_opt.pred.empty()) {
            return {};
        }
-        sd::Tensor<float> denoised = std::move(denoised_opt);
+        sd::Tensor<float> denoised = std::move(denoised_opt.pred);
        sd::Tensor<float> d        = (x - denoised) / sigmas[i];
        if (sigmas[i + 1] == 0) {
            x += d * (sigmas[i + 1] - sigmas[i]);
@ -1060,11 +1062,11 @@ static sd::Tensor<float> sample_dpm2(denoise_cb_t model,
            float dt_1           = sigma_mid - sigmas[i];
            float dt_2           = sigmas[i + 1] - sigmas[i];
            sd::Tensor<float> x2 = x + d * dt_1;
-            auto denoised2_opt   = model(x2, sigma_mid, i + 1, nullptr);
+            auto denoised2_opt   = model(x2, sigma_mid, i + 1);
-            if (denoised2_opt.empty()) {
+            if (denoised2_opt.pred.empty()) {
                return {};
            }
-            sd::Tensor<float> denoised2 = std::move(denoised2_opt);
+            sd::Tensor<float> denoised2 = std::move(denoised2_opt.pred);
            x += ((x2 - denoised2) / sigma_mid) * dt_2;
        }
    }
@ -1081,11 +1083,11 @@ static sd::Tensor<float> sample_dpmpp_2s_ancestral(denoise_cb_t model,
    int steps = static_cast<int>(sigmas.size()) - 1;
    for (int i = 0; i < steps; i++) {
-        auto denoised_opt = model(x, sigmas[i], -(i + 1), nullptr);
+        auto denoised_opt = model(x, sigmas[i], -(i + 1));
-        if (denoised_opt.empty()) {
+        if (denoised_opt.pred.empty()) {
            return {};
        }
-        sd::Tensor<float> denoised  = std::move(denoised_opt);
+        sd::Tensor<float> denoised  = std::move(denoised_opt.pred);
        auto [sigma_down, sigma_up] = get_ancestral_step(sigmas[i], sigmas[i + 1], eta);
        if (sigma_down == 0) {
@ -1097,11 +1099,11 @@ static sd::Tensor<float> sample_dpmpp_2s_ancestral(denoise_cb_t model,
            float s              = t + 0.5f * h;
            float sigma_s        = sigma_fn(s);
            sd::Tensor<float> x2 = (sigma_s / sigma_fn(t)) * x - (exp(-h * 0.5f) - 1) * denoised;
-            auto denoised2_opt   = model(x2, sigma_s, i + 1, nullptr);
+            auto denoised2_opt   = model(x2, sigma_s, i + 1);
-            if (denoised2_opt.empty()) {
+            if (denoised2_opt.pred.empty()) {
                return {};
            }
-            sd::Tensor<float> denoised2 = std::move(denoised2_opt);
+            sd::Tensor<float> denoised2 = std::move(denoised2_opt.pred);
            x                           = (sigma_fn(t_next) / sigma_fn(t)) * x - (exp(-h) - 1) * denoised2;
        }
@ -1124,11 +1126,11 @@ static sd::Tensor<float> sample_dpmpp_2s_ancestral_flow(denoise_cb_t model,
        bool opt_first_step = (1.0 - sigma < 1e-6);
-        auto denoised_opt = model(x, sigma, (opt_first_step ? 1 : -1) * (i + 1), nullptr);
+        auto denoised_opt = model(x, sigma, (opt_first_step ? 1 : -1) * (i + 1));
-        if (denoised_opt.empty()) {
+        if (denoised_opt.pred.empty()) {
            return {};
        }
-        sd::Tensor<float> denoised = std::move(denoised_opt);
+        sd::Tensor<float> denoised = std::move(denoised_opt.pred);
        if (sigma_to == 0.0f) {
            // Euler method (final step, no noise)
@ -1153,8 +1155,8 @@ static sd::Tensor<float> sample_dpmpp_2s_ancestral_flow(denoise_cb_t model,
                // so sigma_s = 1 = sigma, and sigma_s_i_ratio = sigma_s / sigma = 1
                // u = (x*sigma_s_i_ratio)+(denoised*(1.0f-sigma_s_i_ratio))
                //   = (x*1)+(denoised*0) = x
-                // so D_i = model(u, sigma_s, i + 1, nullptr)
+                // so D_i = model(u, sigma_s, i + 1)
-                //        = model(x, sigma,   i + 1, nullptr)
+                //        = model(x, sigma,   i + 1)
                //        = denoised
                D_i = denoised;
@ -1187,11 +1189,11 @@ static sd::Tensor<float> sample_dpmpp_2s_ancestral_flow(denoise_cb_t model,
                float sigma_s_i_ratio = sigma_s / sigma;
                sd::Tensor<float> u   = (x * sigma_s_i_ratio) + (denoised * (1.0f - sigma_s_i_ratio));
-                auto denoised2_opt = model(u, sigma_s, i + 1, nullptr);
+                auto denoised2_opt = model(u, sigma_s, i + 1);
-                if (denoised2_opt.empty()) {
+                if (denoised2_opt.pred.empty()) {
                    return {};
                }
-                D_i = std::move(denoised2_opt);
+                D_i = std::move(denoised2_opt.pred);
            }
            float sigma_down_i_ratio = sigma_down / sigma;
@ -1214,11 +1216,11 @@ static sd::Tensor<float> sample_dpmpp_2m(denoise_cb_t model,
    int steps = static_cast<int>(sigmas.size()) - 1;
    for (int i = 0; i < steps; i++) {
-        auto denoised_opt = model(x, sigmas[i], i + 1, nullptr);
+        auto denoised_opt = model(x, sigmas[i], i + 1);
-        if (denoised_opt.empty()) {
+        if (denoised_opt.pred.empty()) {
            return {};
        }
-        sd::Tensor<float> denoised = std::move(denoised_opt);
+        sd::Tensor<float> denoised = std::move(denoised_opt.pred);
        float t                    = t_fn(sigmas[i]);
        float t_next               = t_fn(sigmas[i + 1]);
        float h                    = t_next - t;
@ -1246,11 +1248,11 @@ static sd::Tensor<float> sample_dpmpp_2m_v2(denoise_cb_t model,
    int steps = static_cast<int>(sigmas.size()) - 1;
    for (int i = 0; i < steps; i++) {
-        auto denoised_opt = model(x, sigmas[i], i + 1, nullptr);
+        auto denoised_opt = model(x, sigmas[i], i + 1);
-        if (denoised_opt.empty()) {
+        if (denoised_opt.pred.empty()) {
            return {};
        }
-        sd::Tensor<float> denoised = std::move(denoised_opt);
+        sd::Tensor<float> denoised = std::move(denoised_opt.pred);
        float t                    = t_fn(sigmas[i]);
        float t_next               = t_fn(sigmas[i + 1]);
        float h                    = t_next - t;
@ -1354,11 +1356,11 @@ static sd::Tensor<float> sample_lcm(denoise_cb_t model,
    int steps = static_cast<int>(sigmas.size()) - 1;
    for (int i = 0; i < steps; i++) {
-        auto denoised_opt = model(x, sigmas[i], i + 1, nullptr);
+        auto denoised_opt = model(x, sigmas[i], i + 1);
-        if (denoised_opt.empty()) {
+        if (denoised_opt.pred.empty()) {
            return {};
        }
-        x = std::move(denoised_opt);
+        x = std::move(denoised_opt.pred);
        if (sigmas[i + 1] > 0) {
            if (is_flow_denoiser) {
                x *= (1 - sigmas[i + 1]);
@ -1400,11 +1402,11 @@ static sd::Tensor<float> sample_ipndm(denoise_cb_t model,
        float sigma      = sigmas[i];
        float sigma_next = sigmas[i + 1];
-        auto denoised_opt = model(x, sigma, i + 1, nullptr);
+        auto denoised_opt = model(x, sigma, i + 1);
-        if (denoised_opt.empty()) {
+        if (denoised_opt.pred.empty()) {
            return {};
        }
-        sd::Tensor<float> denoised = std::move(denoised_opt);
+        sd::Tensor<float> denoised = std::move(denoised_opt.pred);
        sd::Tensor<float> d_cur = (x - denoised) / sigma;
        int order               = std::min(max_order, i + 1);
@ -1444,11 +1446,11 @@ static sd::Tensor<float> sample_ipndm_v(denoise_cb_t model,
        float sigma  = sigmas[i];
        float t_next = sigmas[i + 1];
-        auto denoised_opt = model(x, sigma, i + 1, nullptr);
+        auto denoised_opt = model(x, sigma, i + 1);
-        if (denoised_opt.empty()) {
+        if (denoised_opt.pred.empty()) {
            return {};
        }
-        sd::Tensor<float> denoised = std::move(denoised_opt);
+        sd::Tensor<float> denoised = std::move(denoised_opt.pred);
        sd::Tensor<float> d_cur = (x - denoised) / sigma;
        int order               = std::min(max_order, i + 1);
@ -1506,11 +1508,11 @@ static sd::Tensor<float> sample_res_multistep(denoise_cb_t model,
    int steps = static_cast<int>(sigmas.size()) - 1;
    for (int i = 0; i < steps; i++) {
-        auto denoised_opt = model(x, sigmas[i], i + 1, nullptr);
+        auto denoised_opt = model(x, sigmas[i], i + 1);
-        if (denoised_opt.empty()) {
+        if (denoised_opt.pred.empty()) {
            return {};
        }
-        sd::Tensor<float> denoised = std::move(denoised_opt);
+        sd::Tensor<float> denoised = std::move(denoised_opt.pred);
        float sigma_from = sigmas[i];
        float sigma_to   = sigmas[i + 1];
@ -1583,11 +1585,11 @@ static sd::Tensor<float> sample_res_2s(denoise_cb_t model,
        float sigma_from = sigmas[i];
        float sigma_to   = sigmas[i + 1];
-        auto denoised_opt = model(x, sigma_from, -(i + 1), nullptr);
+        auto denoised_opt = model(x, sigma_from, -(i + 1));
-        if (denoised_opt.empty()) {
+        if (denoised_opt.pred.empty()) {
            return {};
        }
-        sd::Tensor<float> denoised = std::move(denoised_opt);
+        sd::Tensor<float> denoised = std::move(denoised_opt.pred);
        auto [sigma_down, sigma_up, alpha_scale] = get_ancestral_step(sigma_from, sigma_to, eta, is_flow_denoiser);
@ -1609,11 +1611,11 @@ static sd::Tensor<float> sample_res_2s(denoise_cb_t model,
            sd::Tensor<float> eps1 = denoised - x0;
            sd::Tensor<float> x2   = x0 + eps1 * (h * a21);
-            auto denoised2_opt = model(x2, sigma_c2, i + 1, nullptr);
+            auto denoised2_opt = model(x2, sigma_c2, i + 1);
-            if (denoised2_opt.empty()) {
+            if (denoised2_opt.pred.empty()) {
                return {};
            }
-            sd::Tensor<float> denoised2 = std::move(denoised2_opt);
+            sd::Tensor<float> denoised2 = std::move(denoised2_opt.pred);
            sd::Tensor<float> eps2      = denoised2 - x0;
            x                           = x0 + h * (b1 * eps1 + b2 * eps2);
        }
@ -1686,10 +1688,11 @@ static sd::Tensor<float> sample_er_sde(denoise_cb_t model,
    int steps = static_cast<int>(sigmas.size()) - 1;
    for (int i = 0; i < steps; i++) {
-        sd::Tensor<float> denoised = model(x, sigmas[i], i + 1, nullptr);
+        auto denoised_opt = model(x, sigmas[i], i + 1);
-        if (denoised.empty()) {
+        if (denoised_opt.pred.empty()) {
            return {};
        }
        sd::Tensor<float> denoised = std::move(denoised_opt.pred);
        int stage_used = std::min(max_stage, i + 1);
@ -1804,11 +1807,11 @@ static sd::Tensor<float> sample_tcd(denoise_cb_t model,
        int timestep_s    = (int)floor((1 - eta) * prev_timestep);
        float sigma       = sigmas[i];
-        auto denoised_opt = model(x, sigma, i + 1, nullptr);
+        auto denoised_opt = model(x, sigma, i + 1);
-        if (denoised_opt.empty()) {
+        if (denoised_opt.pred.empty()) {
            return {};
        }
-        sd::Tensor<float> denoised = std::move(denoised_opt);
+        sd::Tensor<float> denoised = std::move(denoised_opt.pred);
        sd::Tensor<float> d        = (x - denoised) / sigma;
        float alpha_prod_t      = 1.0f / (sigma * sigma + 1.0f);
@ -1834,14 +1837,13 @@ static sd::Tensor<float> sample_euler_cfg_pp(denoise_cb_t model,
    int steps = static_cast<int>(sigmas.size()) - 1;
    for (int i = 0; i < steps; i++) {
        float sigma       = sigmas[i];
-        sd::Tensor<float> uncond_denoised;
+        auto denoised_opt = model(x, sigma, i + 1);
-
+        if (denoised_opt.pred.empty() || denoised_opt.pred_uncond.empty()) {
        auto denoised_opt = model(x, sigma, i + 1, &uncond_denoised);
        if (denoised_opt.empty() || uncond_denoised.empty()) {
            return {};
        }
-        sd::Tensor<float> denoised = std::move(denoised_opt);
+        sd::Tensor<float> denoised        = std::move(denoised_opt.pred);
        sd::Tensor<float> uncond_denoised = std::move(denoised_opt.pred_uncond);
        sd::Tensor<float> d               = (x - uncond_denoised) / sigma;
        x = denoised + d * sigmas[i + 1];
@ -1857,14 +1859,13 @@ static sd::Tensor<float> sample_euler_ancestral_cfg_pp(denoise_cb_t model,
    int steps = static_cast<int>(sigmas.size()) - 1;
    for (int i = 0; i < steps; i++) {
        float sigma       = sigmas[i];
-        sd::Tensor<float> uncond_denoised;
+        auto denoised_opt = model(x, sigma, i + 1);
-
+        if (denoised_opt.pred.empty() || denoised_opt.pred_uncond.empty()) {
        auto denoised_opt = model(x, sigma, i + 1, &uncond_denoised);
        if (denoised_opt.empty() || uncond_denoised.empty()) {
            return {};
        }
-        sd::Tensor<float> denoised = std::move(denoised_opt);
+        sd::Tensor<float> denoised        = std::move(denoised_opt.pred);
        sd::Tensor<float> uncond_denoised = std::move(denoised_opt.pred_uncond);
        sd::Tensor<float> d               = (x - uncond_denoised) / sigma;
        auto [sigma_down, sigma_up] = get_ancestral_step(sigmas[i], sigmas[i + 1], eta);
--- a/src/guidance.cpp
+++ b/src/guidance.cpp
@ -0,0 +1,89 @@
 #include "guidance.h"
 #include <utility>
 namespace sd::guidance {
    static bool has_tensor(const sd::Tensor<float>* tensor) {
        return tensor != nullptr && !tensor->empty();
    }
    ClassifierFreeGuidance::ClassifierFreeGuidance(float guidance_scale,
                                                   float image_guidance_scale)
        : guidance_scale_(guidance_scale),
          image_guidance_scale_(image_guidance_scale) {
    }
    GuiderOutput ClassifierFreeGuidance::forward(const GuidanceInput& input,
                                                 GuiderOutput previous) const {
        (void)previous;
        GuiderOutput output;
        if (!has_tensor(input.pred_cond)) {
            return output;
        }
        const sd::Tensor<float>& pred_cond = *input.pred_cond;
        output.pred                        = pred_cond;
        if (has_tensor(input.pred_uncond)) {
            const sd::Tensor<float>& pred_uncond = *input.pred_uncond;
            if (has_tensor(input.pred_img_cond)) {
                const sd::Tensor<float>& pred_img_cond = *input.pred_img_cond;
                output.pred                            = pred_uncond +
                              image_guidance_scale_ * (pred_img_cond - pred_uncond) +
                              guidance_scale_ * (pred_cond - pred_img_cond);
            } else {
                output.pred = pred_uncond + guidance_scale_ * (pred_cond - pred_uncond);
            }
        } else if (has_tensor(input.pred_img_cond)) {
            const sd::Tensor<float>& pred_img_cond = *input.pred_img_cond;
            output.pred                            = pred_img_cond + guidance_scale_ * (pred_cond - pred_img_cond);
        }
        return output;
    }
    SkipLayerGuidance::SkipLayerGuidance(std::vector<int> layers,
                                         float scale,
                                         float start,
                                         float stop)
        : layers_(std::move(layers)),
          scale_(scale),
          start_(start),
          stop_(stop) {
    }
    bool SkipLayerGuidance::is_enabled_for_step(const GuidanceInput& input) const {
        if (scale_ == 0.0f || layers_.empty() || input.schedule_size == 0) {
            return false;
        }
        int start_step = static_cast<int>(start_ * static_cast<float>(input.schedule_size));
        int stop_step  = static_cast<int>(stop_ * static_cast<float>(input.schedule_size));
        return input.step > start_step && input.step < stop_step;
    }
    const std::vector<int>& SkipLayerGuidance::layers() const {
        return layers_;
    }
    GuiderOutput SkipLayerGuidance::forward(const GuidanceInput& input,
                                            GuiderOutput output) const {
        if (!is_enabled_for_step(input) || !input.predict_skip_layer) {
            return output;
        }
        if (output.pred.empty() || !has_tensor(input.pred_cond)) {
            return GuiderOutput();
        }
        output.pred_skip_layer = input.predict_skip_layer();
        if (output.pred_skip_layer.empty()) {
            return GuiderOutput();
        }
        output.pred += (*input.pred_cond - output.pred_skip_layer) * scale_;
        return output;
    }
 }  // namespace sd::guidance
--- a/src/guidance.h
+++ b/src/guidance.h
@ -0,0 +1,70 @@
 #ifndef __SD_GUIDANCE_H__
 #define __SD_GUIDANCE_H__
 #include <cstddef>
 #include <functional>
 #include <vector>
 #include "tensor.hpp"
 namespace sd::guidance {
    struct GuiderOutput {
        sd::Tensor<float> pred;
        sd::Tensor<float> pred_cond;
        sd::Tensor<float> pred_uncond;
        sd::Tensor<float> pred_img_cond;
        sd::Tensor<float> pred_skip_layer;
    };
    struct GuidanceInput {
        int step                               = 0;
        size_t schedule_size                   = 0;
        const sd::Tensor<float>* pred_cond     = nullptr;
        const sd::Tensor<float>* pred_uncond   = nullptr;
        const sd::Tensor<float>* pred_img_cond = nullptr;
        std::function<sd::Tensor<float>()> predict_skip_layer;
    };
    class BaseGuidance {
    public:
        virtual ~BaseGuidance()                                   = default;
        virtual GuiderOutput forward(const GuidanceInput& input,
                                     GuiderOutput previous) const = 0;
    };
    class ClassifierFreeGuidance : public BaseGuidance {
        float guidance_scale_       = 1.0f;
        float image_guidance_scale_ = 1.0f;
    public:
        ClassifierFreeGuidance(float guidance_scale,
                               float image_guidance_scale);
        GuiderOutput forward(const GuidanceInput& input,
                             GuiderOutput previous) const override;
    };
    class SkipLayerGuidance : public BaseGuidance {
        std::vector<int> layers_;
        float scale_ = 0.0f;
        float start_ = 0.0f;
        float stop_  = 1.0f;
    public:
        SkipLayerGuidance(std::vector<int> layers,
                          float scale,
                          float start,
                          float stop);
        bool is_enabled_for_step(const GuidanceInput& input) const;
        const std::vector<int>& layers() const;
        GuiderOutput forward(const GuidanceInput& input,
                             GuiderOutput previous) const override;
    };
 }  // namespace sd::guidance
 #endif  // __SD_GUIDANCE_H__
--- a/src/stable-diffusion.cpp
+++ b/src/stable-diffusion.cpp
@ -14,6 +14,7 @@
 #include "denoiser.hpp"
 #include "diffusion_model.hpp"
 #include "esrgan.hpp"
 #include "guidance.h"
 #include "lora.hpp"
 #include "ltx_audio_vae.h"
 #include "ltx_vae.hpp"
@ -1854,8 +1855,9 @@ public:
                                                                                           denoiser.get(),
                                                                                           sigmas);
        bool needs_uncond_denoised = method == EULER_CFG_PP_SAMPLE_METHOD || method == EULER_A_CFG_PP_SAMPLE_METHOD;
        // Spectrum cache is not supported for CFG++ samplers
-        if (method == EULER_CFG_PP_SAMPLE_METHOD || method == EULER_A_CFG_PP_SAMPLE_METHOD) {
+        if (needs_uncond_denoised) {
            if (cache_runtime.spectrum_enabled) {
                LOG_WARN("Spectrum cache requested but not supported for CFG++ samplers");
                cache_runtime.spectrum_enabled = false;
@ -1868,6 +1870,11 @@ public:
            has_skiplayer = false;
            LOG_WARN("SLG is incompatible with this model type");
        }
        sd::guidance::ClassifierFreeGuidance classifier_free_guidance(cfg_scale, img_cfg_scale);
        sd::guidance::SkipLayerGuidance skip_layer_guidance(has_skiplayer ? skip_layers : std::vector<int>(),
                                                            has_skiplayer ? slg_scale : 0.0f,
                                                            guidance.slg.layer_start,
                                                            guidance.slg.layer_end);
        if (version == VERSION_HIDREAM_O1 && !noise.empty()) {
            noise *= eta;
@ -1880,7 +1887,7 @@ public:
        sd::Tensor<float> denoised   = x_t;
        SamplePreviewContext preview = prepare_sample_preview_context();
-        auto denoise = [&](const sd::Tensor<float>& x, float sigma, int step, sd::Tensor<float>* out_uncond_denoised = nullptr) -> sd::Tensor<float> {
+        auto denoise = [&](const sd::Tensor<float>& x, float sigma, int step) -> sd::guidance::GuiderOutput {
            if (step == 1 || step == -1) {
                pretty_progress(0, (int)steps, 0);
            }
@ -1913,7 +1920,6 @@ public:
            }
            if (cache_runtime.spectrum_enabled && cache_runtime.spectrum.should_predict()) {
                if (out_uncond_denoised == nullptr) {
                cache_runtime.spectrum.predict(&denoised);
                if (!denoise_mask.empty()) {
                    denoised = denoised * denoise_mask + init_latent * (1.0f - denoise_mask);
@ -1922,8 +1928,9 @@ public:
                    preview_image(step, denoised, version, preview.mode, preview.callback, preview.data, false);
                }
                report_sample_progress(step, steps, t0);
-                    return denoised;
+                sd::guidance::GuiderOutput output;
-                }
+                output.pred = denoised;
                return output;
            }
            if (sd_should_preview_noisy() && preview.callback != nullptr) {
@ -1933,7 +1940,6 @@ public:
            sd::Tensor<float> cond_out;
            sd::Tensor<float> uncond_out;
            sd::Tensor<float> img_cond_out;
            sd::Tensor<float> skip_cond_out;
            sd_sample::SampleStepCacheDispatcher step_cache(cache_runtime, step, sigma);
            std::vector<sd::Tensor<float>> controls;
            DiffusionParams diffusion_params;
@ -2023,42 +2029,40 @@ public:
                    return {};
                }
            }
-            bool is_skiplayer_step = has_skiplayer &&
+            sd::guidance::GuidanceInput guidance_input;
-                                     step > (int)(guidance.slg.layer_start * static_cast<int>(sigmas.size())) &&
+            guidance_input.step          = step;
-                                     step < (int)(guidance.slg.layer_end * static_cast<int>(sigmas.size()));
+            guidance_input.schedule_size = sigmas.size();
-            if (is_skiplayer_step) {
+            guidance_input.pred_cond     = &cond_out;
-                LOG_DEBUG("Skipping layers at step %d\n", step);
+            guidance_input.pred_uncond   = uncond_out.empty() ? nullptr : &uncond_out;
-                if (!step_cache.is_step_skipped()) {
+            guidance_input.pred_img_cond = img_cond_out.empty() ? nullptr : &img_cond_out;
-                    skip_cond_out = run_condition(cond,
+
-                                                  cond.c_concat.empty() ? nullptr : &cond.c_concat,
+            sd::guidance::GuiderOutput guided = classifier_free_guidance.forward(guidance_input, {});
-                                                  &skip_layers);
+            if (guided.pred.empty()) {
                    if (skip_cond_out.empty()) {
                return {};
            }
            if (skip_layer_guidance.is_enabled_for_step(guidance_input)) {
                LOG_DEBUG("Skipping layers at step %d\n", step);
                if (!step_cache.is_step_skipped()) {
                    guidance_input.predict_skip_layer = [&]() -> sd::Tensor<float> {
                        return run_condition(cond,
                                             cond.c_concat.empty() ? nullptr : &cond.c_concat,
                                             &skip_layer_guidance.layers());
                    };
                }
            }
-            GGML_ASSERT(!cond_out.empty());
+            guided = skip_layer_guidance.forward(guidance_input, std::move(guided));
-            sd::Tensor<float> latent_result = cond_out;
+            if (guided.pred.empty()) {
-            if (!uncond_out.empty()) {
+                return {};
                if (!img_cond_out.empty()) {
                    latent_result = uncond_out +
                                    img_cfg_scale * (img_cond_out - uncond_out) +
                                    cfg_scale * (cond_out - img_cond_out);
                } else {
                    latent_result = uncond_out + cfg_scale * (cond_out - uncond_out);
                }
            } else if (!img_cond_out.empty()) {
                latent_result = img_cond_out + cfg_scale * (cond_out - img_cond_out);
            }
-            if (is_skiplayer_step && !skip_cond_out.empty()) {
+            denoised = guided.pred * c_out + x * c_skip;
-                latent_result += (cond_out - skip_cond_out) * slg_scale;
+            sd::guidance::GuiderOutput output;
-            }
+            output.pred = denoised;
-            denoised = latent_result * c_out + x * c_skip;
+            if (needs_uncond_denoised) {
-            if (out_uncond_denoised != nullptr) {
+                const sd::Tensor<float>& base_uncond = !uncond_out.empty() ? uncond_out : cond_out;
-                sd::Tensor<float> base_uncond = !uncond_out.empty() ? uncond_out : cond_out;
+                output.pred_uncond                   = base_uncond * c_out + x * c_skip;
                *out_uncond_denoised          = base_uncond * c_out + x * c_skip;
            }
            if (cache_runtime.spectrum_enabled) {
                cache_runtime.spectrum.update(denoised);
@ -2070,7 +2074,8 @@ public:
                preview_image(step, denoised, version, preview.mode, preview.callback, preview.data, false);
            }
            report_sample_progress(step, steps, t0);
-            return denoised;
+            output.pred = denoised;
            return output;
        };
        auto x0_opt = sample_k_diffusion(method, denoise, x_t, sigmas, sampler_rng, eta, is_flow_denoiser, extra_sample_args);