mirror of
https://github.com/leejet/stable-diffusion.cpp.git
synced 2026-03-24 10:18:51 +00:00
feat: add spectrum caching method (#1322)
This commit is contained in:
parent
c8fb3d2458
commit
dea4980f4e
@ -11,6 +11,7 @@ Caching methods accelerate diffusion inference by reusing intermediate computati
|
|||||||
| `dbcache` | DiT models | Block-level L1 residual threshold |
|
| `dbcache` | DiT models | Block-level L1 residual threshold |
|
||||||
| `taylorseer` | DiT models | Taylor series approximation |
|
| `taylorseer` | DiT models | Taylor series approximation |
|
||||||
| `cache-dit` | DiT models | Combined DBCache + TaylorSeer |
|
| `cache-dit` | DiT models | Combined DBCache + TaylorSeer |
|
||||||
|
| `spectrum` | UNET models | Chebyshev + Taylor output forecasting |
|
||||||
|
|
||||||
### UCache (UNET Models)
|
### UCache (UNET Models)
|
||||||
|
|
||||||
@ -118,6 +119,28 @@ Mask values: `1` = compute, `0` = can cache.
|
|||||||
--scm-policy dynamic
|
--scm-policy dynamic
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### Spectrum (UNET Models)
|
||||||
|
|
||||||
|
Spectrum uses Chebyshev polynomial fitting blended with Taylor extrapolation to predict denoised outputs, skipping entire UNet forward passes. Based on the paper [Spectrum: Adaptive Spectral Feature Forecasting for Efficient Diffusion Sampling](https://github.com/tingyu215/Spectrum).
|
||||||
|
|
||||||
|
```bash
|
||||||
|
sd-cli -m model.safetensors -p "a cat" --cache-mode spectrum
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Parameters
|
||||||
|
|
||||||
|
| Parameter | Description | Default |
|
||||||
|
|-----------|-------------|---------|
|
||||||
|
| `w` | Chebyshev vs Taylor blend weight (0=Taylor, 1=Chebyshev) | 0.40 |
|
||||||
|
| `m` | Chebyshev polynomial degree | 3 |
|
||||||
|
| `lam` | Ridge regression regularization | 1.0 |
|
||||||
|
| `window` | Initial window size (compute every N steps) | 2 |
|
||||||
|
| `flex` | Window growth per computed step after warmup | 0.50 |
|
||||||
|
| `warmup` | Steps to always compute before caching starts | 4 |
|
||||||
|
| `stop` | Stop caching at this fraction of total steps | 0.9 |
|
||||||
|
|
||||||
|
```
|
||||||
|
|
||||||
### Performance Tips
|
### Performance Tips
|
||||||
|
|
||||||
- Start with default thresholds and adjust based on output quality
|
- Start with default thresholds and adjust based on output quality
|
||||||
|
|||||||
@ -138,10 +138,12 @@ Generation Options:
|
|||||||
--skip-layers layers to skip for SLG steps (default: [7,8,9])
|
--skip-layers layers to skip for SLG steps (default: [7,8,9])
|
||||||
--high-noise-skip-layers (high noise) layers to skip for SLG steps (default: [7,8,9])
|
--high-noise-skip-layers (high noise) layers to skip for SLG steps (default: [7,8,9])
|
||||||
-r, --ref-image reference image for Flux Kontext models (can be used multiple times)
|
-r, --ref-image reference image for Flux Kontext models (can be used multiple times)
|
||||||
--cache-mode caching method: 'easycache' (DiT), 'ucache' (UNET), 'dbcache'/'taylorseer'/'cache-dit' (DiT block-level)
|
--cache-mode caching method: 'easycache' (DiT), 'ucache' (UNET), 'dbcache'/'taylorseer'/'cache-dit' (DiT block-level),
|
||||||
|
'spectrum' (UNET Chebyshev+Taylor forecasting)
|
||||||
--cache-option named cache params (key=value format, comma-separated). easycache/ucache:
|
--cache-option named cache params (key=value format, comma-separated). easycache/ucache:
|
||||||
threshold=,start=,end=,decay=,relative=,reset=; dbcache/taylorseer/cache-dit: Fn=,Bn=,threshold=,warmup=. Examples:
|
threshold=,start=,end=,decay=,relative=,reset=; dbcache/taylorseer/cache-dit: Fn=,Bn=,threshold=,warmup=;
|
||||||
"threshold=0.25" or "threshold=1.5,reset=0"
|
spectrum: w=,m=,lam=,window=,flex=,warmup=,stop=. Examples:
|
||||||
|
"threshold=0.25" or "threshold=1.5,reset=0" or "w=0.4,window=2"
|
||||||
--cache-preset cache-dit preset: 'slow'/'s', 'medium'/'m', 'fast'/'f', 'ultra'/'u'
|
--cache-preset cache-dit preset: 'slow'/'s', 'medium'/'m', 'fast'/'f', 'ultra'/'u'
|
||||||
--scm-mask SCM steps mask for cache-dit: comma-separated 0/1 (e.g., "1,1,1,0,0,1,0,0,1,0") - 1=compute, 0=can cache
|
--scm-mask SCM steps mask for cache-dit: comma-separated 0/1 (e.g., "1,1,1,0,0,1,0,0,1,0") - 1=compute, 0=can cache
|
||||||
--scm-policy SCM policy: 'dynamic' (default) or 'static'
|
--scm-policy SCM policy: 'dynamic' (default) or 'static'
|
||||||
|
|||||||
@ -1422,8 +1422,8 @@ struct SDGenerationParams {
|
|||||||
}
|
}
|
||||||
cache_mode = argv_to_utf8(index, argv);
|
cache_mode = argv_to_utf8(index, argv);
|
||||||
if (cache_mode != "easycache" && cache_mode != "ucache" &&
|
if (cache_mode != "easycache" && cache_mode != "ucache" &&
|
||||||
cache_mode != "dbcache" && cache_mode != "taylorseer" && cache_mode != "cache-dit") {
|
cache_mode != "dbcache" && cache_mode != "taylorseer" && cache_mode != "cache-dit" && cache_mode != "spectrum") {
|
||||||
fprintf(stderr, "error: invalid cache mode '%s', must be 'easycache', 'ucache', 'dbcache', 'taylorseer', or 'cache-dit'\n", cache_mode.c_str());
|
fprintf(stderr, "error: invalid cache mode '%s', must be 'easycache', 'ucache', 'dbcache', 'taylorseer', 'cache-dit', or 'spectrum'\n", cache_mode.c_str());
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
return 1;
|
return 1;
|
||||||
@ -1779,7 +1779,23 @@ struct SDGenerationParams {
|
|||||||
} else if (key == "Bn" || key == "bn") {
|
} else if (key == "Bn" || key == "bn") {
|
||||||
cache_params.Bn_compute_blocks = std::stoi(val);
|
cache_params.Bn_compute_blocks = std::stoi(val);
|
||||||
} else if (key == "warmup") {
|
} else if (key == "warmup") {
|
||||||
cache_params.max_warmup_steps = std::stoi(val);
|
if (cache_mode == "spectrum") {
|
||||||
|
cache_params.spectrum_warmup_steps = std::stoi(val);
|
||||||
|
} else {
|
||||||
|
cache_params.max_warmup_steps = std::stoi(val);
|
||||||
|
}
|
||||||
|
} else if (key == "w") {
|
||||||
|
cache_params.spectrum_w = std::stof(val);
|
||||||
|
} else if (key == "m") {
|
||||||
|
cache_params.spectrum_m = std::stoi(val);
|
||||||
|
} else if (key == "lam") {
|
||||||
|
cache_params.spectrum_lam = std::stof(val);
|
||||||
|
} else if (key == "window") {
|
||||||
|
cache_params.spectrum_window_size = std::stoi(val);
|
||||||
|
} else if (key == "flex") {
|
||||||
|
cache_params.spectrum_flex_window = std::stof(val);
|
||||||
|
} else if (key == "stop") {
|
||||||
|
cache_params.spectrum_stop_percent = std::stof(val);
|
||||||
} else {
|
} else {
|
||||||
LOG_ERROR("error: unknown cache parameter '%s'", key.c_str());
|
LOG_ERROR("error: unknown cache parameter '%s'", key.c_str());
|
||||||
return false;
|
return false;
|
||||||
@ -1827,6 +1843,15 @@ struct SDGenerationParams {
|
|||||||
cache_params.Bn_compute_blocks = 0;
|
cache_params.Bn_compute_blocks = 0;
|
||||||
cache_params.residual_diff_threshold = 0.08f;
|
cache_params.residual_diff_threshold = 0.08f;
|
||||||
cache_params.max_warmup_steps = 8;
|
cache_params.max_warmup_steps = 8;
|
||||||
|
} else if (cache_mode == "spectrum") {
|
||||||
|
cache_params.mode = SD_CACHE_SPECTRUM;
|
||||||
|
cache_params.spectrum_w = 0.40f;
|
||||||
|
cache_params.spectrum_m = 3;
|
||||||
|
cache_params.spectrum_lam = 1.0f;
|
||||||
|
cache_params.spectrum_window_size = 2;
|
||||||
|
cache_params.spectrum_flex_window = 0.50f;
|
||||||
|
cache_params.spectrum_warmup_steps = 4;
|
||||||
|
cache_params.spectrum_stop_percent = 0.9f;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!cache_option.empty()) {
|
if (!cache_option.empty()) {
|
||||||
|
|||||||
@ -251,6 +251,7 @@ enum sd_cache_mode_t {
|
|||||||
SD_CACHE_DBCACHE,
|
SD_CACHE_DBCACHE,
|
||||||
SD_CACHE_TAYLORSEER,
|
SD_CACHE_TAYLORSEER,
|
||||||
SD_CACHE_CACHE_DIT,
|
SD_CACHE_CACHE_DIT,
|
||||||
|
SD_CACHE_SPECTRUM,
|
||||||
};
|
};
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
@ -271,6 +272,13 @@ typedef struct {
|
|||||||
int taylorseer_skip_interval;
|
int taylorseer_skip_interval;
|
||||||
const char* scm_mask;
|
const char* scm_mask;
|
||||||
bool scm_policy_dynamic;
|
bool scm_policy_dynamic;
|
||||||
|
float spectrum_w;
|
||||||
|
int spectrum_m;
|
||||||
|
float spectrum_lam;
|
||||||
|
int spectrum_window_size;
|
||||||
|
float spectrum_flex_window;
|
||||||
|
int spectrum_warmup_steps;
|
||||||
|
float spectrum_stop_percent;
|
||||||
} sd_cache_params_t;
|
} sd_cache_params_t;
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
|
|||||||
195
src/spectrum.hpp
Normal file
195
src/spectrum.hpp
Normal file
@ -0,0 +1,195 @@
|
|||||||
|
#ifndef __SPECTRUM_HPP__
|
||||||
|
#define __SPECTRUM_HPP__
|
||||||
|
|
||||||
|
#include <cmath>
|
||||||
|
#include <cstring>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#include "ggml_extend.hpp"
|
||||||
|
|
||||||
|
struct SpectrumConfig {
|
||||||
|
float w = 0.40f;
|
||||||
|
int m = 3;
|
||||||
|
float lam = 1.0f;
|
||||||
|
int window_size = 2;
|
||||||
|
float flex_window = 0.50f;
|
||||||
|
int warmup_steps = 4;
|
||||||
|
float stop_percent = 0.9f;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct SpectrumState {
|
||||||
|
SpectrumConfig config;
|
||||||
|
int cnt = 0;
|
||||||
|
int num_cached = 0;
|
||||||
|
float curr_ws = 2.0f;
|
||||||
|
int K = 6;
|
||||||
|
int stop_step = 0;
|
||||||
|
int total_steps_skipped = 0;
|
||||||
|
|
||||||
|
std::vector<std::vector<float>> H_buf;
|
||||||
|
std::vector<float> T_buf;
|
||||||
|
|
||||||
|
void init(const SpectrumConfig& cfg, size_t total_steps) {
|
||||||
|
config = cfg;
|
||||||
|
cnt = 0;
|
||||||
|
num_cached = 0;
|
||||||
|
curr_ws = (float)cfg.window_size;
|
||||||
|
K = std::max(cfg.m + 1, 6);
|
||||||
|
stop_step = (int)(cfg.stop_percent * (float)total_steps);
|
||||||
|
total_steps_skipped = 0;
|
||||||
|
H_buf.clear();
|
||||||
|
T_buf.clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
float taus(int step_cnt) const {
|
||||||
|
return (step_cnt / 50.0f) * 2.0f - 1.0f;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool should_predict() {
|
||||||
|
if (cnt < config.warmup_steps)
|
||||||
|
return false;
|
||||||
|
if (stop_step > 0 && cnt >= stop_step)
|
||||||
|
return false;
|
||||||
|
if ((int)H_buf.size() < 2)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
int ws = std::max(1, (int)std::floor(curr_ws));
|
||||||
|
return (num_cached + 1) % ws != 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
void update(const struct ggml_tensor* denoised) {
|
||||||
|
int64_t ne = ggml_nelements(denoised);
|
||||||
|
const float* data = (const float*)denoised->data;
|
||||||
|
|
||||||
|
H_buf.emplace_back(data, data + ne);
|
||||||
|
T_buf.push_back(taus(cnt));
|
||||||
|
|
||||||
|
while ((int)H_buf.size() > K) {
|
||||||
|
H_buf.erase(H_buf.begin());
|
||||||
|
T_buf.erase(T_buf.begin());
|
||||||
|
}
|
||||||
|
|
||||||
|
if (cnt >= config.warmup_steps)
|
||||||
|
curr_ws += config.flex_window;
|
||||||
|
|
||||||
|
num_cached = 0;
|
||||||
|
cnt++;
|
||||||
|
}
|
||||||
|
|
||||||
|
void predict(struct ggml_tensor* denoised) {
|
||||||
|
int64_t F = (int64_t)H_buf[0].size();
|
||||||
|
int K_curr = (int)H_buf.size();
|
||||||
|
int M1 = config.m + 1;
|
||||||
|
float tau_at = taus(cnt);
|
||||||
|
|
||||||
|
// Design matrix X: K_curr x M1 (Chebyshev basis)
|
||||||
|
std::vector<float> X(K_curr * M1);
|
||||||
|
for (int i = 0; i < K_curr; i++) {
|
||||||
|
X[i * M1] = 1.0f;
|
||||||
|
if (M1 > 1)
|
||||||
|
X[i * M1 + 1] = T_buf[i];
|
||||||
|
for (int j = 2; j < M1; j++)
|
||||||
|
X[i * M1 + j] = 2.0f * T_buf[i] * X[i * M1 + j - 1] - X[i * M1 + j - 2];
|
||||||
|
}
|
||||||
|
|
||||||
|
// x_star: Chebyshev basis at current tau
|
||||||
|
std::vector<float> x_star(M1);
|
||||||
|
x_star[0] = 1.0f;
|
||||||
|
if (M1 > 1)
|
||||||
|
x_star[1] = tau_at;
|
||||||
|
for (int j = 2; j < M1; j++)
|
||||||
|
x_star[j] = 2.0f * tau_at * x_star[j - 1] - x_star[j - 2];
|
||||||
|
|
||||||
|
// XtX = X^T X + lambda I
|
||||||
|
std::vector<float> XtX(M1 * M1, 0.0f);
|
||||||
|
for (int i = 0; i < M1; i++) {
|
||||||
|
for (int j = 0; j < M1; j++) {
|
||||||
|
float sum = 0.0f;
|
||||||
|
for (int k = 0; k < K_curr; k++)
|
||||||
|
sum += X[k * M1 + i] * X[k * M1 + j];
|
||||||
|
XtX[i * M1 + j] = sum + (i == j ? config.lam : 0.0f);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Cholesky decomposition
|
||||||
|
std::vector<float> L(M1 * M1, 0.0f);
|
||||||
|
if (!cholesky_decompose(XtX.data(), L.data(), M1)) {
|
||||||
|
float trace = 0.0f;
|
||||||
|
for (int i = 0; i < M1; i++)
|
||||||
|
trace += XtX[i * M1 + i];
|
||||||
|
for (int i = 0; i < M1; i++)
|
||||||
|
XtX[i * M1 + i] += 1e-4f * trace / M1;
|
||||||
|
cholesky_decompose(XtX.data(), L.data(), M1);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Solve XtX v = x_star
|
||||||
|
std::vector<float> v(M1);
|
||||||
|
cholesky_solve(L.data(), x_star.data(), v.data(), M1);
|
||||||
|
|
||||||
|
// Prediction weights per history entry
|
||||||
|
std::vector<float> weights(K_curr, 0.0f);
|
||||||
|
for (int k = 0; k < K_curr; k++)
|
||||||
|
for (int j = 0; j < M1; j++)
|
||||||
|
weights[k] += X[k * M1 + j] * v[j];
|
||||||
|
|
||||||
|
// Blend Chebyshev and Taylor predictions
|
||||||
|
float* out = (float*)denoised->data;
|
||||||
|
float w_cheb = config.w;
|
||||||
|
float w_taylor = 1.0f - w_cheb;
|
||||||
|
const float* h_last = H_buf.back().data();
|
||||||
|
const float* h_prev = H_buf[H_buf.size() - 2].data();
|
||||||
|
|
||||||
|
for (int64_t f = 0; f < F; f++) {
|
||||||
|
float pred_cheb = 0.0f;
|
||||||
|
for (int k = 0; k < K_curr; k++)
|
||||||
|
pred_cheb += weights[k] * H_buf[k][f];
|
||||||
|
|
||||||
|
float pred_taylor = h_last[f] + 0.5f * (h_last[f] - h_prev[f]);
|
||||||
|
|
||||||
|
out[f] = w_taylor * pred_taylor + w_cheb * pred_cheb;
|
||||||
|
}
|
||||||
|
|
||||||
|
num_cached++;
|
||||||
|
total_steps_skipped++;
|
||||||
|
cnt++;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
static bool cholesky_decompose(const float* A, float* L, int n) {
|
||||||
|
std::memset(L, 0, n * n * sizeof(float));
|
||||||
|
for (int i = 0; i < n; i++) {
|
||||||
|
for (int j = 0; j <= i; j++) {
|
||||||
|
float sum = 0.0f;
|
||||||
|
for (int k = 0; k < j; k++)
|
||||||
|
sum += L[i * n + k] * L[j * n + k];
|
||||||
|
if (i == j) {
|
||||||
|
float diag = A[i * n + i] - sum;
|
||||||
|
if (diag <= 0.0f)
|
||||||
|
return false;
|
||||||
|
L[i * n + j] = std::sqrt(diag);
|
||||||
|
} else {
|
||||||
|
L[i * n + j] = (A[i * n + j] - sum) / L[j * n + j];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void cholesky_solve(const float* L, const float* b, float* x, int n) {
|
||||||
|
std::vector<float> y(n);
|
||||||
|
for (int i = 0; i < n; i++) {
|
||||||
|
float sum = 0.0f;
|
||||||
|
for (int j = 0; j < i; j++)
|
||||||
|
sum += L[i * n + j] * y[j];
|
||||||
|
y[i] = (b[i] - sum) / L[i * n + i];
|
||||||
|
}
|
||||||
|
for (int i = n - 1; i >= 0; i--) {
|
||||||
|
float sum = 0.0f;
|
||||||
|
for (int j = i + 1; j < n; j++)
|
||||||
|
sum += L[j * n + i] * x[j];
|
||||||
|
x[i] = (y[i] - sum) / L[i * n + i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif // __SPECTRUM_HPP__
|
||||||
@ -16,6 +16,7 @@
|
|||||||
#include "esrgan.hpp"
|
#include "esrgan.hpp"
|
||||||
#include "lora.hpp"
|
#include "lora.hpp"
|
||||||
#include "pmid.hpp"
|
#include "pmid.hpp"
|
||||||
|
#include "spectrum.hpp"
|
||||||
#include "tae.hpp"
|
#include "tae.hpp"
|
||||||
#include "ucache.hpp"
|
#include "ucache.hpp"
|
||||||
#include "vae.hpp"
|
#include "vae.hpp"
|
||||||
@ -1687,9 +1688,11 @@ public:
|
|||||||
EasyCacheState easycache_state;
|
EasyCacheState easycache_state;
|
||||||
UCacheState ucache_state;
|
UCacheState ucache_state;
|
||||||
CacheDitConditionState cachedit_state;
|
CacheDitConditionState cachedit_state;
|
||||||
|
SpectrumState spectrum_state;
|
||||||
bool easycache_enabled = false;
|
bool easycache_enabled = false;
|
||||||
bool ucache_enabled = false;
|
bool ucache_enabled = false;
|
||||||
bool cachedit_enabled = false;
|
bool cachedit_enabled = false;
|
||||||
|
bool spectrum_enabled = false;
|
||||||
|
|
||||||
if (cache_params != nullptr && cache_params->mode != SD_CACHE_DISABLED) {
|
if (cache_params != nullptr && cache_params->mode != SD_CACHE_DISABLED) {
|
||||||
bool percent_valid = true;
|
bool percent_valid = true;
|
||||||
@ -1793,6 +1796,27 @@ public:
|
|||||||
LOG_WARN("CacheDIT requested but could not be initialized for this run");
|
LOG_WARN("CacheDIT requested but could not be initialized for this run");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
} else if (cache_params->mode == SD_CACHE_SPECTRUM) {
|
||||||
|
bool spectrum_supported = sd_version_is_unet(version);
|
||||||
|
if (!spectrum_supported) {
|
||||||
|
LOG_WARN("Spectrum requested but not supported for this model type (only UNET models)");
|
||||||
|
} else {
|
||||||
|
SpectrumConfig spectrum_config;
|
||||||
|
spectrum_config.w = cache_params->spectrum_w;
|
||||||
|
spectrum_config.m = cache_params->spectrum_m;
|
||||||
|
spectrum_config.lam = cache_params->spectrum_lam;
|
||||||
|
spectrum_config.window_size = cache_params->spectrum_window_size;
|
||||||
|
spectrum_config.flex_window = cache_params->spectrum_flex_window;
|
||||||
|
spectrum_config.warmup_steps = cache_params->spectrum_warmup_steps;
|
||||||
|
spectrum_config.stop_percent = cache_params->spectrum_stop_percent;
|
||||||
|
size_t total_steps = sigmas.size() > 0 ? sigmas.size() - 1 : 0;
|
||||||
|
spectrum_state.init(spectrum_config, total_steps);
|
||||||
|
spectrum_enabled = true;
|
||||||
|
LOG_INFO("Spectrum enabled - w: %.2f, m: %d, lam: %.2f, window: %d, flex: %.2f, warmup: %d, stop: %.0f%%",
|
||||||
|
spectrum_config.w, spectrum_config.m, spectrum_config.lam,
|
||||||
|
spectrum_config.window_size, spectrum_config.flex_window,
|
||||||
|
spectrum_config.warmup_steps, spectrum_config.stop_percent * 100.0f);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2015,7 +2039,29 @@ public:
|
|||||||
timesteps_vec.assign(1, t);
|
timesteps_vec.assign(1, t);
|
||||||
}
|
}
|
||||||
|
|
||||||
timesteps_vec = process_timesteps(timesteps_vec, init_latent, denoise_mask);
|
timesteps_vec = process_timesteps(timesteps_vec, init_latent, denoise_mask);
|
||||||
|
|
||||||
|
if (spectrum_enabled && spectrum_state.should_predict()) {
|
||||||
|
spectrum_state.predict(denoised);
|
||||||
|
|
||||||
|
if (denoise_mask != nullptr) {
|
||||||
|
apply_mask(denoised, init_latent, denoise_mask);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (sd_preview_cb != nullptr && sd_should_preview_denoised()) {
|
||||||
|
if (step % sd_get_preview_interval() == 0) {
|
||||||
|
preview_image(work_ctx, step, denoised, version, sd_preview_mode, preview_tensor, sd_preview_cb, sd_preview_cb_data, false);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int64_t t1 = ggml_time_us();
|
||||||
|
if (step > 0 || step == -(int)steps) {
|
||||||
|
int showstep = std::abs(step);
|
||||||
|
pretty_progress(showstep, (int)steps, (t1 - t0) / 1000000.f / showstep);
|
||||||
|
}
|
||||||
|
return denoised;
|
||||||
|
}
|
||||||
|
|
||||||
auto timesteps = vector_to_ggml_tensor(work_ctx, timesteps_vec);
|
auto timesteps = vector_to_ggml_tensor(work_ctx, timesteps_vec);
|
||||||
std::vector<float> guidance_vec(1, guidance.distilled_guidance);
|
std::vector<float> guidance_vec(1, guidance.distilled_guidance);
|
||||||
auto guidance_tensor = vector_to_ggml_tensor(work_ctx, guidance_vec);
|
auto guidance_tensor = vector_to_ggml_tensor(work_ctx, guidance_vec);
|
||||||
@ -2189,6 +2235,10 @@ public:
|
|||||||
vec_denoised[i] = latent_result * c_out + vec_input[i] * c_skip;
|
vec_denoised[i] = latent_result * c_out + vec_input[i] * c_skip;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (spectrum_enabled) {
|
||||||
|
spectrum_state.update(denoised);
|
||||||
|
}
|
||||||
|
|
||||||
if (denoise_mask != nullptr) {
|
if (denoise_mask != nullptr) {
|
||||||
apply_mask(denoised, init_latent, denoise_mask);
|
apply_mask(denoised, init_latent, denoise_mask);
|
||||||
}
|
}
|
||||||
@ -2280,6 +2330,14 @@ public:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (spectrum_enabled && spectrum_state.total_steps_skipped > 0) {
|
||||||
|
size_t total_steps = sigmas.size() > 0 ? sigmas.size() - 1 : 0;
|
||||||
|
double speedup = static_cast<double>(total_steps) /
|
||||||
|
static_cast<double>(total_steps - spectrum_state.total_steps_skipped);
|
||||||
|
LOG_INFO("Spectrum skipped %d/%zu steps (%.2fx estimated speedup)",
|
||||||
|
spectrum_state.total_steps_skipped, total_steps, speedup);
|
||||||
|
}
|
||||||
|
|
||||||
if (inverse_noise_scaling) {
|
if (inverse_noise_scaling) {
|
||||||
x = denoiser->inverse_noise_scaling(sigmas[sigmas.size() - 1], x);
|
x = denoiser->inverse_noise_scaling(sigmas[sigmas.size() - 1], x);
|
||||||
}
|
}
|
||||||
@ -2941,6 +2999,13 @@ void sd_cache_params_init(sd_cache_params_t* cache_params) {
|
|||||||
cache_params->taylorseer_skip_interval = 1;
|
cache_params->taylorseer_skip_interval = 1;
|
||||||
cache_params->scm_mask = nullptr;
|
cache_params->scm_mask = nullptr;
|
||||||
cache_params->scm_policy_dynamic = true;
|
cache_params->scm_policy_dynamic = true;
|
||||||
|
cache_params->spectrum_w = 0.40f;
|
||||||
|
cache_params->spectrum_m = 3;
|
||||||
|
cache_params->spectrum_lam = 1.0f;
|
||||||
|
cache_params->spectrum_window_size = 2;
|
||||||
|
cache_params->spectrum_flex_window = 0.50f;
|
||||||
|
cache_params->spectrum_warmup_steps = 4;
|
||||||
|
cache_params->spectrum_stop_percent = 0.9f;
|
||||||
}
|
}
|
||||||
|
|
||||||
void sd_ctx_params_init(sd_ctx_params_t* sd_ctx_params) {
|
void sd_ctx_params_init(sd_ctx_params_t* sd_ctx_params) {
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user