mirror of
https://github.com/leejet/stable-diffusion.cpp.git
synced 2026-03-24 02:08:51 +00:00
Merge branch 'master' into vae_refactor
This commit is contained in:
commit
608a4c9b37
2
.github/workflows/build.yml
vendored
2
.github/workflows/build.yml
vendored
@ -162,7 +162,7 @@ jobs:
|
|||||||
|
|
||||||
strategy:
|
strategy:
|
||||||
matrix:
|
matrix:
|
||||||
variant: [musa, sycl, vulkan]
|
variant: [musa, sycl, vulkan, cuda]
|
||||||
|
|
||||||
env:
|
env:
|
||||||
REGISTRY: ghcr.io
|
REGISTRY: ghcr.io
|
||||||
|
|||||||
@ -36,7 +36,6 @@ option(SD_VULKAN "sd: vulkan backend" OFF)
|
|||||||
option(SD_OPENCL "sd: opencl backend" OFF)
|
option(SD_OPENCL "sd: opencl backend" OFF)
|
||||||
option(SD_SYCL "sd: sycl backend" OFF)
|
option(SD_SYCL "sd: sycl backend" OFF)
|
||||||
option(SD_MUSA "sd: musa backend" OFF)
|
option(SD_MUSA "sd: musa backend" OFF)
|
||||||
option(SD_FAST_SOFTMAX "sd: x1.5 faster softmax, indeterministic (sometimes, same seed don't generate same image), cuda only" OFF)
|
|
||||||
option(SD_BUILD_SHARED_LIBS "sd: build shared libs" OFF)
|
option(SD_BUILD_SHARED_LIBS "sd: build shared libs" OFF)
|
||||||
option(SD_BUILD_SHARED_GGML_LIB "sd: build ggml as a separate shared lib" OFF)
|
option(SD_BUILD_SHARED_GGML_LIB "sd: build ggml as a separate shared lib" OFF)
|
||||||
option(SD_USE_SYSTEM_GGML "sd: use system-installed GGML library" OFF)
|
option(SD_USE_SYSTEM_GGML "sd: use system-installed GGML library" OFF)
|
||||||
@ -70,18 +69,12 @@ if (SD_HIPBLAS)
|
|||||||
message("-- Use HIPBLAS as backend stable-diffusion")
|
message("-- Use HIPBLAS as backend stable-diffusion")
|
||||||
set(GGML_HIP ON)
|
set(GGML_HIP ON)
|
||||||
add_definitions(-DSD_USE_CUDA)
|
add_definitions(-DSD_USE_CUDA)
|
||||||
if(SD_FAST_SOFTMAX)
|
|
||||||
set(GGML_CUDA_FAST_SOFTMAX ON)
|
|
||||||
endif()
|
|
||||||
endif ()
|
endif ()
|
||||||
|
|
||||||
if(SD_MUSA)
|
if(SD_MUSA)
|
||||||
message("-- Use MUSA as backend stable-diffusion")
|
message("-- Use MUSA as backend stable-diffusion")
|
||||||
set(GGML_MUSA ON)
|
set(GGML_MUSA ON)
|
||||||
add_definitions(-DSD_USE_CUDA)
|
add_definitions(-DSD_USE_CUDA)
|
||||||
if(SD_FAST_SOFTMAX)
|
|
||||||
set(GGML_CUDA_FAST_SOFTMAX ON)
|
|
||||||
endif()
|
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
set(SD_LIB stable-diffusion)
|
set(SD_LIB stable-diffusion)
|
||||||
|
|||||||
25
Dockerfile.cuda
Normal file
25
Dockerfile.cuda
Normal file
@ -0,0 +1,25 @@
|
|||||||
|
ARG CUDA_VERSION=12.6.3
|
||||||
|
ARG UBUNTU_VERSION=24.04
|
||||||
|
|
||||||
|
FROM nvidia/cuda:${CUDA_VERSION}-cudnn-devel-ubuntu${UBUNTU_VERSION} AS build
|
||||||
|
|
||||||
|
RUN apt-get update && apt-get install -y --no-install-recommends build-essential git ccache cmake
|
||||||
|
|
||||||
|
WORKDIR /sd.cpp
|
||||||
|
|
||||||
|
COPY . .
|
||||||
|
|
||||||
|
ARG CUDACXX=/usr/local/cuda/bin/nvcc
|
||||||
|
RUN cmake . -B ./build -DSD_CUDA=ON
|
||||||
|
RUN cmake --build ./build --config Release --parallel
|
||||||
|
|
||||||
|
FROM nvidia/cuda:${CUDA_VERSION}-cudnn-runtime-ubuntu${UBUNTU_VERSION} AS runtime
|
||||||
|
|
||||||
|
RUN apt-get update && \
|
||||||
|
apt-get install --yes --no-install-recommends libgomp1 && \
|
||||||
|
apt-get clean
|
||||||
|
|
||||||
|
COPY --from=build /sd.cpp/build/bin/sd-cli /sd-cli
|
||||||
|
COPY --from=build /sd.cpp/build/bin/sd-server /sd-server
|
||||||
|
|
||||||
|
ENTRYPOINT [ "/sd-cli" ]
|
||||||
@ -5,6 +5,7 @@
|
|||||||
- Download Anima
|
- Download Anima
|
||||||
- safetensors: https://huggingface.co/circlestone-labs/Anima/tree/main/split_files/diffusion_models
|
- safetensors: https://huggingface.co/circlestone-labs/Anima/tree/main/split_files/diffusion_models
|
||||||
- gguf: https://huggingface.co/Bedovyy/Anima-GGUF/tree/main
|
- gguf: https://huggingface.co/Bedovyy/Anima-GGUF/tree/main
|
||||||
|
- gguf Anima2: https://huggingface.co/JusteLeo/Anima2-GGUF/tree/main
|
||||||
- Download vae
|
- Download vae
|
||||||
- safetensors: https://huggingface.co/circlestone-labs/Anima/tree/main/split_files/vae
|
- safetensors: https://huggingface.co/circlestone-labs/Anima/tree/main/split_files/vae
|
||||||
- Download Qwen3-0.6B-Base
|
- Download Qwen3-0.6B-Base
|
||||||
|
|||||||
@ -80,7 +80,7 @@ Uses Taylor series approximation to predict block outputs:
|
|||||||
Combines DBCache and TaylorSeer:
|
Combines DBCache and TaylorSeer:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
--cache-mode cache-dit --cache-preset fast
|
--cache-mode cache-dit
|
||||||
```
|
```
|
||||||
|
|
||||||
#### Parameters
|
#### Parameters
|
||||||
@ -92,14 +92,6 @@ Combines DBCache and TaylorSeer:
|
|||||||
| `threshold` | L1 residual difference threshold | 0.08 |
|
| `threshold` | L1 residual difference threshold | 0.08 |
|
||||||
| `warmup` | Steps before caching starts | 8 |
|
| `warmup` | Steps before caching starts | 8 |
|
||||||
|
|
||||||
#### Presets
|
|
||||||
|
|
||||||
Available presets: `slow`, `medium`, `fast`, `ultra` (or `s`, `m`, `f`, `u`).
|
|
||||||
|
|
||||||
```bash
|
|
||||||
--cache-mode cache-dit --cache-preset fast
|
|
||||||
```
|
|
||||||
|
|
||||||
#### SCM Options
|
#### SCM Options
|
||||||
|
|
||||||
Steps Computation Mask controls which steps can be cached:
|
Steps Computation Mask controls which steps can be cached:
|
||||||
|
|||||||
@ -139,12 +139,11 @@ Generation Options:
|
|||||||
--high-noise-skip-layers (high noise) layers to skip for SLG steps (default: [7,8,9])
|
--high-noise-skip-layers (high noise) layers to skip for SLG steps (default: [7,8,9])
|
||||||
-r, --ref-image reference image for Flux Kontext models (can be used multiple times)
|
-r, --ref-image reference image for Flux Kontext models (can be used multiple times)
|
||||||
--cache-mode caching method: 'easycache' (DiT), 'ucache' (UNET), 'dbcache'/'taylorseer'/'cache-dit' (DiT block-level),
|
--cache-mode caching method: 'easycache' (DiT), 'ucache' (UNET), 'dbcache'/'taylorseer'/'cache-dit' (DiT block-level),
|
||||||
'spectrum' (UNET Chebyshev+Taylor forecasting)
|
'spectrum' (UNET/DiT Chebyshev+Taylor forecasting)
|
||||||
--cache-option named cache params (key=value format, comma-separated). easycache/ucache:
|
--cache-option named cache params (key=value format, comma-separated). easycache/ucache:
|
||||||
threshold=,start=,end=,decay=,relative=,reset=; dbcache/taylorseer/cache-dit: Fn=,Bn=,threshold=,warmup=;
|
threshold=,start=,end=,decay=,relative=,reset=; dbcache/taylorseer/cache-dit: Fn=,Bn=,threshold=,warmup=;
|
||||||
spectrum: w=,m=,lam=,window=,flex=,warmup=,stop=. Examples:
|
spectrum: w=,m=,lam=,window=,flex=,warmup=,stop=. Examples:
|
||||||
"threshold=0.25" or "threshold=1.5,reset=0" or "w=0.4,window=2"
|
"threshold=0.25" or "threshold=1.5,reset=0" or "w=0.4,window=2"
|
||||||
--cache-preset cache-dit preset: 'slow'/'s', 'medium'/'m', 'fast'/'f', 'ultra'/'u'
|
|
||||||
--scm-mask SCM steps mask for cache-dit: comma-separated 0/1 (e.g., "1,1,1,0,0,1,0,0,1,0") - 1=compute, 0=can cache
|
--scm-mask SCM steps mask for cache-dit: comma-separated 0/1 (e.g., "1,1,1,0,0,1,0,0,1,0") - 1=compute, 0=can cache
|
||||||
--scm-policy SCM policy: 'dynamic' (default) or 'static'
|
--scm-policy SCM policy: 'dynamic' (default) or 'static'
|
||||||
```
|
```
|
||||||
|
|||||||
@ -1047,7 +1047,6 @@ struct SDGenerationParams {
|
|||||||
|
|
||||||
std::string cache_mode;
|
std::string cache_mode;
|
||||||
std::string cache_option;
|
std::string cache_option;
|
||||||
std::string cache_preset;
|
|
||||||
std::string scm_mask;
|
std::string scm_mask;
|
||||||
bool scm_policy_dynamic = true;
|
bool scm_policy_dynamic = true;
|
||||||
sd_cache_params_t cache_params{};
|
sd_cache_params_t cache_params{};
|
||||||
@ -1461,21 +1460,6 @@ struct SDGenerationParams {
|
|||||||
return 1;
|
return 1;
|
||||||
};
|
};
|
||||||
|
|
||||||
auto on_cache_preset_arg = [&](int argc, const char** argv, int index) {
|
|
||||||
if (++index >= argc) {
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
cache_preset = argv_to_utf8(index, argv);
|
|
||||||
if (cache_preset != "slow" && cache_preset != "s" && cache_preset != "S" &&
|
|
||||||
cache_preset != "medium" && cache_preset != "m" && cache_preset != "M" &&
|
|
||||||
cache_preset != "fast" && cache_preset != "f" && cache_preset != "F" &&
|
|
||||||
cache_preset != "ultra" && cache_preset != "u" && cache_preset != "U") {
|
|
||||||
fprintf(stderr, "error: invalid cache preset '%s', must be 'slow'/'s', 'medium'/'m', 'fast'/'f', or 'ultra'/'u'\n", cache_preset.c_str());
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
return 1;
|
|
||||||
};
|
|
||||||
|
|
||||||
options.manual_options = {
|
options.manual_options = {
|
||||||
{"-s",
|
{"-s",
|
||||||
"--seed",
|
"--seed",
|
||||||
@ -1513,16 +1497,12 @@ struct SDGenerationParams {
|
|||||||
on_ref_image_arg},
|
on_ref_image_arg},
|
||||||
{"",
|
{"",
|
||||||
"--cache-mode",
|
"--cache-mode",
|
||||||
"caching method: 'easycache' (DiT), 'ucache' (UNET), 'dbcache'/'taylorseer'/'cache-dit' (DiT block-level)",
|
"caching method: 'easycache' (DiT), 'ucache' (UNET), 'dbcache'/'taylorseer'/'cache-dit' (DiT block-level), 'spectrum' (UNET/DiT Chebyshev+Taylor forecasting)",
|
||||||
on_cache_mode_arg},
|
on_cache_mode_arg},
|
||||||
{"",
|
{"",
|
||||||
"--cache-option",
|
"--cache-option",
|
||||||
"named cache params (key=value format, comma-separated). easycache/ucache: threshold=,start=,end=,decay=,relative=,reset=; dbcache/taylorseer/cache-dit: Fn=,Bn=,threshold=,warmup=. Examples: \"threshold=0.25\" or \"threshold=1.5,reset=0\"",
|
"named cache params (key=value format, comma-separated). easycache/ucache: threshold=,start=,end=,decay=,relative=,reset=; dbcache/taylorseer/cache-dit: Fn=,Bn=,threshold=,warmup=; spectrum: w=,m=,lam=,window=,flex=,warmup=,stop=. Examples: \"threshold=0.25\" or \"threshold=1.5,reset=0\"",
|
||||||
on_cache_option_arg},
|
on_cache_option_arg},
|
||||||
{"",
|
|
||||||
"--cache-preset",
|
|
||||||
"cache-dit preset: 'slow'/'s', 'medium'/'m', 'fast'/'f', 'ultra'/'u'",
|
|
||||||
on_cache_preset_arg},
|
|
||||||
{"",
|
{"",
|
||||||
"--scm-mask",
|
"--scm-mask",
|
||||||
"SCM steps mask for cache-dit: comma-separated 0/1 (e.g., \"1,1,1,0,0,1,0,0,1,0\") - 1=compute, 0=can cache",
|
"SCM steps mask for cache-dit: comma-separated 0/1 (e.g., \"1,1,1,0,0,1,0,0,1,0\") - 1=compute, 0=can cache",
|
||||||
@ -1575,7 +1555,6 @@ struct SDGenerationParams {
|
|||||||
load_if_exists("negative_prompt", negative_prompt);
|
load_if_exists("negative_prompt", negative_prompt);
|
||||||
load_if_exists("cache_mode", cache_mode);
|
load_if_exists("cache_mode", cache_mode);
|
||||||
load_if_exists("cache_option", cache_option);
|
load_if_exists("cache_option", cache_option);
|
||||||
load_if_exists("cache_preset", cache_preset);
|
|
||||||
load_if_exists("scm_mask", scm_mask);
|
load_if_exists("scm_mask", scm_mask);
|
||||||
|
|
||||||
load_if_exists("clip_skip", clip_skip);
|
load_if_exists("clip_skip", clip_skip);
|
||||||
@ -1811,47 +1790,16 @@ struct SDGenerationParams {
|
|||||||
if (!cache_mode.empty()) {
|
if (!cache_mode.empty()) {
|
||||||
if (cache_mode == "easycache") {
|
if (cache_mode == "easycache") {
|
||||||
cache_params.mode = SD_CACHE_EASYCACHE;
|
cache_params.mode = SD_CACHE_EASYCACHE;
|
||||||
cache_params.reuse_threshold = 0.2f;
|
|
||||||
cache_params.start_percent = 0.15f;
|
|
||||||
cache_params.end_percent = 0.95f;
|
|
||||||
cache_params.error_decay_rate = 1.0f;
|
|
||||||
cache_params.use_relative_threshold = true;
|
|
||||||
cache_params.reset_error_on_compute = true;
|
|
||||||
} else if (cache_mode == "ucache") {
|
} else if (cache_mode == "ucache") {
|
||||||
cache_params.mode = SD_CACHE_UCACHE;
|
cache_params.mode = SD_CACHE_UCACHE;
|
||||||
cache_params.reuse_threshold = 1.0f;
|
|
||||||
cache_params.start_percent = 0.15f;
|
|
||||||
cache_params.end_percent = 0.95f;
|
|
||||||
cache_params.error_decay_rate = 1.0f;
|
|
||||||
cache_params.use_relative_threshold = true;
|
|
||||||
cache_params.reset_error_on_compute = true;
|
|
||||||
} else if (cache_mode == "dbcache") {
|
} else if (cache_mode == "dbcache") {
|
||||||
cache_params.mode = SD_CACHE_DBCACHE;
|
cache_params.mode = SD_CACHE_DBCACHE;
|
||||||
cache_params.Fn_compute_blocks = 8;
|
|
||||||
cache_params.Bn_compute_blocks = 0;
|
|
||||||
cache_params.residual_diff_threshold = 0.08f;
|
|
||||||
cache_params.max_warmup_steps = 8;
|
|
||||||
} else if (cache_mode == "taylorseer") {
|
} else if (cache_mode == "taylorseer") {
|
||||||
cache_params.mode = SD_CACHE_TAYLORSEER;
|
cache_params.mode = SD_CACHE_TAYLORSEER;
|
||||||
cache_params.Fn_compute_blocks = 8;
|
|
||||||
cache_params.Bn_compute_blocks = 0;
|
|
||||||
cache_params.residual_diff_threshold = 0.08f;
|
|
||||||
cache_params.max_warmup_steps = 8;
|
|
||||||
} else if (cache_mode == "cache-dit") {
|
} else if (cache_mode == "cache-dit") {
|
||||||
cache_params.mode = SD_CACHE_CACHE_DIT;
|
cache_params.mode = SD_CACHE_CACHE_DIT;
|
||||||
cache_params.Fn_compute_blocks = 8;
|
|
||||||
cache_params.Bn_compute_blocks = 0;
|
|
||||||
cache_params.residual_diff_threshold = 0.08f;
|
|
||||||
cache_params.max_warmup_steps = 8;
|
|
||||||
} else if (cache_mode == "spectrum") {
|
} else if (cache_mode == "spectrum") {
|
||||||
cache_params.mode = SD_CACHE_SPECTRUM;
|
cache_params.mode = SD_CACHE_SPECTRUM;
|
||||||
cache_params.spectrum_w = 0.40f;
|
|
||||||
cache_params.spectrum_m = 3;
|
|
||||||
cache_params.spectrum_lam = 1.0f;
|
|
||||||
cache_params.spectrum_window_size = 2;
|
|
||||||
cache_params.spectrum_flex_window = 0.50f;
|
|
||||||
cache_params.spectrum_warmup_steps = 4;
|
|
||||||
cache_params.spectrum_stop_percent = 0.9f;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!cache_option.empty()) {
|
if (!cache_option.empty()) {
|
||||||
|
|||||||
@ -129,11 +129,10 @@ Default Generation Options:
|
|||||||
--skip-layers layers to skip for SLG steps (default: [7,8,9])
|
--skip-layers layers to skip for SLG steps (default: [7,8,9])
|
||||||
--high-noise-skip-layers (high noise) layers to skip for SLG steps (default: [7,8,9])
|
--high-noise-skip-layers (high noise) layers to skip for SLG steps (default: [7,8,9])
|
||||||
-r, --ref-image reference image for Flux Kontext models (can be used multiple times)
|
-r, --ref-image reference image for Flux Kontext models (can be used multiple times)
|
||||||
--cache-mode caching method: 'easycache' (DiT), 'ucache' (UNET), 'dbcache'/'taylorseer'/'cache-dit' (DiT block-level)
|
--cache-mode caching method: 'easycache' (DiT), 'ucache' (UNET), 'dbcache'/'taylorseer'/'cache-dit' (DiT block-level), 'spectrum' (UNET/DiT Chebyshev+Taylor forecasting)
|
||||||
--cache-option named cache params (key=value format, comma-separated). easycache/ucache:
|
--cache-option named cache params (key=value format, comma-separated). easycache/ucache:
|
||||||
threshold=,start=,end=,decay=,relative=,reset=; dbcache/taylorseer/cache-dit: Fn=,Bn=,threshold=,warmup=. Examples:
|
threshold=,start=,end=,decay=,relative=,reset=; dbcache/taylorseer/cache-dit: Fn=,Bn=,threshold=,warmup=. Examples:
|
||||||
"threshold=0.25" or "threshold=1.5,reset=0"
|
"threshold=0.25" or "threshold=1.5,reset=0"
|
||||||
--cache-preset cache-dit preset: 'slow'/'s', 'medium'/'m', 'fast'/'f', 'ultra'/'u'
|
|
||||||
--scm-mask SCM steps mask for cache-dit: comma-separated 0/1 (e.g., "1,1,1,0,0,1,0,0,1,0") - 1=compute, 0=can cache
|
--scm-mask SCM steps mask for cache-dit: comma-separated 0/1 (e.g., "1,1,1,0,0,1,0,0,1,0") - 1=compute, 0=can cache
|
||||||
--scm-policy SCM policy: 'dynamic' (default) or 'static'
|
--scm-policy SCM policy: 'dynamic' (default) or 'static'
|
||||||
```
|
```
|
||||||
|
|||||||
@ -603,87 +603,6 @@ inline std::vector<int> generate_scm_mask(
|
|||||||
return mask;
|
return mask;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline std::vector<int> get_scm_preset(const std::string& preset, int total_steps) {
|
|
||||||
struct Preset {
|
|
||||||
std::vector<int> compute_bins;
|
|
||||||
std::vector<int> cache_bins;
|
|
||||||
};
|
|
||||||
|
|
||||||
Preset slow = {{8, 3, 3, 2, 1, 1}, {1, 2, 2, 2, 3}};
|
|
||||||
Preset medium = {{6, 2, 2, 2, 2, 1}, {1, 3, 3, 3, 3}};
|
|
||||||
Preset fast = {{6, 1, 1, 1, 1, 1}, {1, 3, 4, 5, 4}};
|
|
||||||
Preset ultra = {{4, 1, 1, 1, 1}, {2, 5, 6, 7}};
|
|
||||||
|
|
||||||
Preset* p = nullptr;
|
|
||||||
if (preset == "slow" || preset == "s" || preset == "S")
|
|
||||||
p = &slow;
|
|
||||||
else if (preset == "medium" || preset == "m" || preset == "M")
|
|
||||||
p = &medium;
|
|
||||||
else if (preset == "fast" || preset == "f" || preset == "F")
|
|
||||||
p = &fast;
|
|
||||||
else if (preset == "ultra" || preset == "u" || preset == "U")
|
|
||||||
p = &ultra;
|
|
||||||
else
|
|
||||||
return {};
|
|
||||||
|
|
||||||
if (total_steps != 28 && total_steps > 0) {
|
|
||||||
float scale = static_cast<float>(total_steps) / 28.0f;
|
|
||||||
std::vector<int> scaled_compute, scaled_cache;
|
|
||||||
|
|
||||||
for (int v : p->compute_bins) {
|
|
||||||
scaled_compute.push_back(std::max(1, static_cast<int>(v * scale + 0.5f)));
|
|
||||||
}
|
|
||||||
for (int v : p->cache_bins) {
|
|
||||||
scaled_cache.push_back(std::max(1, static_cast<int>(v * scale + 0.5f)));
|
|
||||||
}
|
|
||||||
|
|
||||||
return generate_scm_mask(scaled_compute, scaled_cache, total_steps);
|
|
||||||
}
|
|
||||||
|
|
||||||
return generate_scm_mask(p->compute_bins, p->cache_bins, total_steps);
|
|
||||||
}
|
|
||||||
|
|
||||||
inline float get_preset_threshold(const std::string& preset) {
|
|
||||||
if (preset == "slow" || preset == "s" || preset == "S")
|
|
||||||
return 0.20f;
|
|
||||||
if (preset == "medium" || preset == "m" || preset == "M")
|
|
||||||
return 0.25f;
|
|
||||||
if (preset == "fast" || preset == "f" || preset == "F")
|
|
||||||
return 0.30f;
|
|
||||||
if (preset == "ultra" || preset == "u" || preset == "U")
|
|
||||||
return 0.34f;
|
|
||||||
return 0.08f;
|
|
||||||
}
|
|
||||||
|
|
||||||
inline int get_preset_warmup(const std::string& preset) {
|
|
||||||
if (preset == "slow" || preset == "s" || preset == "S")
|
|
||||||
return 8;
|
|
||||||
if (preset == "medium" || preset == "m" || preset == "M")
|
|
||||||
return 6;
|
|
||||||
if (preset == "fast" || preset == "f" || preset == "F")
|
|
||||||
return 6;
|
|
||||||
if (preset == "ultra" || preset == "u" || preset == "U")
|
|
||||||
return 4;
|
|
||||||
return 8;
|
|
||||||
}
|
|
||||||
|
|
||||||
inline int get_preset_Fn(const std::string& preset) {
|
|
||||||
if (preset == "slow" || preset == "s" || preset == "S")
|
|
||||||
return 8;
|
|
||||||
if (preset == "medium" || preset == "m" || preset == "M")
|
|
||||||
return 8;
|
|
||||||
if (preset == "fast" || preset == "f" || preset == "F")
|
|
||||||
return 6;
|
|
||||||
if (preset == "ultra" || preset == "u" || preset == "U")
|
|
||||||
return 4;
|
|
||||||
return 8;
|
|
||||||
}
|
|
||||||
|
|
||||||
inline int get_preset_Bn(const std::string& preset) {
|
|
||||||
(void)preset;
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
inline void parse_dbcache_options(const std::string& opts, DBCacheConfig& cfg) {
|
inline void parse_dbcache_options(const std::string& opts, DBCacheConfig& cfg) {
|
||||||
if (opts.empty())
|
if (opts.empty())
|
||||||
return;
|
return;
|
||||||
|
|||||||
@ -91,6 +91,19 @@ void calculate_alphas_cumprod(float* alphas_cumprod,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static float get_cache_reuse_threshold(const sd_cache_params_t& params) {
|
||||||
|
float reuse_threshold = params.reuse_threshold;
|
||||||
|
if (reuse_threshold == INFINITY) {
|
||||||
|
if (params.mode == SD_CACHE_EASYCACHE) {
|
||||||
|
reuse_threshold = 0.2;
|
||||||
|
}
|
||||||
|
else if (params.mode == SD_CACHE_UCACHE) {
|
||||||
|
reuse_threshold = 1.0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return std::max(0.0f, reuse_threshold);
|
||||||
|
}
|
||||||
|
|
||||||
/*=============================================== StableDiffusionGGML ================================================*/
|
/*=============================================== StableDiffusionGGML ================================================*/
|
||||||
|
|
||||||
class StableDiffusionGGML {
|
class StableDiffusionGGML {
|
||||||
@ -1680,7 +1693,7 @@ public:
|
|||||||
} else {
|
} else {
|
||||||
EasyCacheConfig easycache_config;
|
EasyCacheConfig easycache_config;
|
||||||
easycache_config.enabled = true;
|
easycache_config.enabled = true;
|
||||||
easycache_config.reuse_threshold = std::max(0.0f, cache_params->reuse_threshold);
|
easycache_config.reuse_threshold = get_cache_reuse_threshold(*cache_params);
|
||||||
easycache_config.start_percent = cache_params->start_percent;
|
easycache_config.start_percent = cache_params->start_percent;
|
||||||
easycache_config.end_percent = cache_params->end_percent;
|
easycache_config.end_percent = cache_params->end_percent;
|
||||||
easycache_state.init(easycache_config, denoiser.get());
|
easycache_state.init(easycache_config, denoiser.get());
|
||||||
@ -1701,7 +1714,7 @@ public:
|
|||||||
} else {
|
} else {
|
||||||
UCacheConfig ucache_config;
|
UCacheConfig ucache_config;
|
||||||
ucache_config.enabled = true;
|
ucache_config.enabled = true;
|
||||||
ucache_config.reuse_threshold = std::max(0.0f, cache_params->reuse_threshold);
|
ucache_config.reuse_threshold = get_cache_reuse_threshold(*cache_params);
|
||||||
ucache_config.start_percent = cache_params->start_percent;
|
ucache_config.start_percent = cache_params->start_percent;
|
||||||
ucache_config.end_percent = cache_params->end_percent;
|
ucache_config.end_percent = cache_params->end_percent;
|
||||||
ucache_config.error_decay_rate = std::max(0.0f, std::min(1.0f, cache_params->error_decay_rate));
|
ucache_config.error_decay_rate = std::max(0.0f, std::min(1.0f, cache_params->error_decay_rate));
|
||||||
@ -1762,9 +1775,9 @@ public:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else if (cache_params->mode == SD_CACHE_SPECTRUM) {
|
} else if (cache_params->mode == SD_CACHE_SPECTRUM) {
|
||||||
bool spectrum_supported = sd_version_is_unet(version);
|
bool spectrum_supported = sd_version_is_unet(version) || sd_version_is_dit(version);
|
||||||
if (!spectrum_supported) {
|
if (!spectrum_supported) {
|
||||||
LOG_WARN("Spectrum requested but not supported for this model type (only UNET models)");
|
LOG_WARN("Spectrum requested but not supported for this model type (only UNET and DiT models)");
|
||||||
} else {
|
} else {
|
||||||
SpectrumConfig spectrum_config;
|
SpectrumConfig spectrum_config;
|
||||||
spectrum_config.w = cache_params->spectrum_w;
|
spectrum_config.w = cache_params->spectrum_w;
|
||||||
@ -2584,7 +2597,7 @@ enum lora_apply_mode_t str_to_lora_apply_mode(const char* str) {
|
|||||||
void sd_cache_params_init(sd_cache_params_t* cache_params) {
|
void sd_cache_params_init(sd_cache_params_t* cache_params) {
|
||||||
*cache_params = {};
|
*cache_params = {};
|
||||||
cache_params->mode = SD_CACHE_DISABLED;
|
cache_params->mode = SD_CACHE_DISABLED;
|
||||||
cache_params->reuse_threshold = 1.0f;
|
cache_params->reuse_threshold = INFINITY;
|
||||||
cache_params->start_percent = 0.15f;
|
cache_params->start_percent = 0.15f;
|
||||||
cache_params->end_percent = 0.95f;
|
cache_params->end_percent = 0.95f;
|
||||||
cache_params->error_decay_rate = 1.0f;
|
cache_params->error_decay_rate = 1.0f;
|
||||||
@ -2830,7 +2843,7 @@ char* sd_img_gen_params_to_str(const sd_img_gen_params_t* sd_img_gen_params) {
|
|||||||
snprintf(buf + strlen(buf), 4096 - strlen(buf),
|
snprintf(buf + strlen(buf), 4096 - strlen(buf),
|
||||||
"cache: %s (threshold=%.3f, start=%.2f, end=%.2f)\n",
|
"cache: %s (threshold=%.3f, start=%.2f, end=%.2f)\n",
|
||||||
cache_mode_str,
|
cache_mode_str,
|
||||||
sd_img_gen_params->cache.reuse_threshold,
|
get_cache_reuse_threshold(sd_img_gen_params->cache),
|
||||||
sd_img_gen_params->cache.start_percent,
|
sd_img_gen_params->cache.start_percent,
|
||||||
sd_img_gen_params->cache.end_percent);
|
sd_img_gen_params->cache.end_percent);
|
||||||
free(sample_params_str);
|
free(sample_params_str);
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user