diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index b2da00d..9816e42 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -162,7 +162,7 @@ jobs:
 
     strategy:
       matrix:
-        variant: [musa, sycl, vulkan]
+        variant: [musa, sycl, vulkan, cuda]
 
     env:
       REGISTRY: ghcr.io
diff --git a/CMakeLists.txt b/CMakeLists.txt
index b90086e..bad1ba4 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -36,7 +36,6 @@ option(SD_VULKAN                     "sd: vulkan backend" OFF)
 option(SD_OPENCL                     "sd: opencl backend" OFF)
 option(SD_SYCL                       "sd: sycl backend" OFF)
 option(SD_MUSA                       "sd: musa backend" OFF)
-option(SD_FAST_SOFTMAX               "sd: x1.5 faster softmax, indeterministic (sometimes, same seed don't generate same image), cuda only" OFF)
 option(SD_BUILD_SHARED_LIBS          "sd: build shared libs" OFF)
 option(SD_BUILD_SHARED_GGML_LIB      "sd: build ggml as a separate shared lib" OFF)
 option(SD_USE_SYSTEM_GGML            "sd: use system-installed GGML library" OFF)
@@ -70,18 +69,12 @@ if (SD_HIPBLAS)
     message("-- Use HIPBLAS as backend stable-diffusion")
     set(GGML_HIP ON)
     add_definitions(-DSD_USE_CUDA)
-    if(SD_FAST_SOFTMAX)
-        set(GGML_CUDA_FAST_SOFTMAX ON)
-    endif()
 endif ()
 
 if(SD_MUSA)
     message("-- Use MUSA as backend stable-diffusion")
     set(GGML_MUSA ON)
     add_definitions(-DSD_USE_CUDA)
-    if(SD_FAST_SOFTMAX)
-        set(GGML_CUDA_FAST_SOFTMAX ON)
-    endif()
 endif()
 
 set(SD_LIB stable-diffusion)
diff --git a/Dockerfile.cuda b/Dockerfile.cuda
new file mode 100644
index 0000000..13fef89
--- /dev/null
+++ b/Dockerfile.cuda
@@ -0,0 +1,25 @@
+ARG CUDA_VERSION=12.6.3
+ARG UBUNTU_VERSION=24.04
+
+FROM nvidia/cuda:${CUDA_VERSION}-cudnn-devel-ubuntu${UBUNTU_VERSION} AS build
+
+RUN apt-get update && apt-get install -y --no-install-recommends build-essential git ccache cmake
+
+WORKDIR /sd.cpp
+
+COPY . .
+
+ARG CUDACXX=/usr/local/cuda/bin/nvcc
+RUN cmake . -B ./build -DSD_CUDA=ON
+RUN cmake --build ./build --config Release --parallel
+
+FROM nvidia/cuda:${CUDA_VERSION}-cudnn-runtime-ubuntu${UBUNTU_VERSION} AS runtime
+
+RUN apt-get update && \
+    apt-get install --yes --no-install-recommends libgomp1 && \
+    apt-get clean
+
+COPY --from=build /sd.cpp/build/bin/sd-cli /sd-cli
+COPY --from=build /sd.cpp/build/bin/sd-server /sd-server
+
+ENTRYPOINT [ "/sd-cli" ]
diff --git a/docs/anima.md b/docs/anima.md
index 9c94178..debc370 100644
--- a/docs/anima.md
+++ b/docs/anima.md
@@ -5,6 +5,7 @@
 - Download Anima
     - safetensors: https://huggingface.co/circlestone-labs/Anima/tree/main/split_files/diffusion_models
     - gguf: https://huggingface.co/Bedovyy/Anima-GGUF/tree/main
+    - gguf Anima2: https://huggingface.co/JusteLeo/Anima2-GGUF/tree/main
 - Download vae
     - safetensors: https://huggingface.co/circlestone-labs/Anima/tree/main/split_files/vae
 - Download Qwen3-0.6B-Base
@@ -17,4 +18,4 @@
 .\bin\Release\sd-cli.exe --diffusion-model  ..\..\ComfyUI\models\diffusion_models\anima-preview.safetensors --vae ..\..\ComfyUI\models\vae\qwen_image_vae.safetensors  --llm ..\..\ComfyUI\models\text_encoders\qwen_3_06b_base.safetensors  -p "a lovely cat holding a sign says 'anima.cpp'" --cfg-scale 6.0 --sampling-method euler -v --offload-to-cpu --diffusion-fa
 ```
 
-<img alt="anima image example" src="../assets/anima/example.png" />
\ No newline at end of file
+<img alt="anima image example" src="../assets/anima/example.png" />
diff --git a/docs/caching.md b/docs/caching.md
index 559b26a..cb103ae 100644
--- a/docs/caching.md
+++ b/docs/caching.md
@@ -80,7 +80,7 @@ Uses Taylor series approximation to predict block outputs:
 Combines DBCache and TaylorSeer:
 
 ```bash
---cache-mode cache-dit --cache-preset fast
+--cache-mode cache-dit
 ```
 
 #### Parameters
@@ -92,14 +92,6 @@ Combines DBCache and TaylorSeer:
 | `threshold` | L1 residual difference threshold | 0.08 |
 | `warmup` | Steps before caching starts | 8 |
 
-#### Presets
-
-Available presets: `slow`, `medium`, `fast`, `ultra` (or `s`, `m`, `f`, `u`).
-
-```bash
---cache-mode cache-dit --cache-preset fast
-```
-
 #### SCM Options
 
 Steps Computation Mask controls which steps can be cached:
diff --git a/examples/cli/README.md b/examples/cli/README.md
index 6c2ef1e..904f3c4 100644
--- a/examples/cli/README.md
+++ b/examples/cli/README.md
@@ -139,12 +139,11 @@ Generation Options:
   --high-noise-skip-layers                 (high noise) layers to skip for SLG steps (default: [7,8,9])
   -r, --ref-image                          reference image for Flux Kontext models (can be used multiple times)
   --cache-mode                             caching method: 'easycache' (DiT), 'ucache' (UNET), 'dbcache'/'taylorseer'/'cache-dit' (DiT block-level),
-                                           'spectrum' (UNET Chebyshev+Taylor forecasting)
+                                           'spectrum' (UNET/DiT Chebyshev+Taylor forecasting)
   --cache-option                           named cache params (key=value format, comma-separated). easycache/ucache:
                                            threshold=,start=,end=,decay=,relative=,reset=; dbcache/taylorseer/cache-dit: Fn=,Bn=,threshold=,warmup=;
                                            spectrum: w=,m=,lam=,window=,flex=,warmup=,stop=. Examples:
                                            "threshold=0.25" or "threshold=1.5,reset=0" or "w=0.4,window=2"
-  --cache-preset                           cache-dit preset: 'slow'/'s', 'medium'/'m', 'fast'/'f', 'ultra'/'u'
   --scm-mask                               SCM steps mask for cache-dit: comma-separated 0/1 (e.g., "1,1,1,0,0,1,0,0,1,0") - 1=compute, 0=can cache
   --scm-policy                             SCM policy: 'dynamic' (default) or 'static'
 ```
diff --git a/examples/common/common.hpp b/examples/common/common.hpp
index 9c50c15..9389b03 100644
--- a/examples/common/common.hpp
+++ b/examples/common/common.hpp
@@ -1047,7 +1047,6 @@ struct SDGenerationParams {
 
     std::string cache_mode;
     std::string cache_option;
-    std::string cache_preset;
     std::string scm_mask;
     bool scm_policy_dynamic = true;
     sd_cache_params_t cache_params{};
@@ -1461,21 +1460,6 @@ struct SDGenerationParams {
             return 1;
         };
 
-        auto on_cache_preset_arg = [&](int argc, const char** argv, int index) {
-            if (++index >= argc) {
-                return -1;
-            }
-            cache_preset = argv_to_utf8(index, argv);
-            if (cache_preset != "slow" && cache_preset != "s" && cache_preset != "S" &&
-                cache_preset != "medium" && cache_preset != "m" && cache_preset != "M" &&
-                cache_preset != "fast" && cache_preset != "f" && cache_preset != "F" &&
-                cache_preset != "ultra" && cache_preset != "u" && cache_preset != "U") {
-                fprintf(stderr, "error: invalid cache preset '%s', must be 'slow'/'s', 'medium'/'m', 'fast'/'f', or 'ultra'/'u'\n", cache_preset.c_str());
-                return -1;
-            }
-            return 1;
-        };
-
         options.manual_options = {
             {"-s",
              "--seed",
@@ -1513,16 +1497,12 @@ struct SDGenerationParams {
              on_ref_image_arg},
             {"",
              "--cache-mode",
-             "caching method: 'easycache' (DiT), 'ucache' (UNET), 'dbcache'/'taylorseer'/'cache-dit' (DiT block-level)",
+             "caching method: 'easycache' (DiT), 'ucache' (UNET), 'dbcache'/'taylorseer'/'cache-dit' (DiT block-level), 'spectrum' (UNET/DiT Chebyshev+Taylor forecasting)",
              on_cache_mode_arg},
             {"",
              "--cache-option",
-             "named cache params (key=value format, comma-separated). easycache/ucache: threshold=,start=,end=,decay=,relative=,reset=; dbcache/taylorseer/cache-dit: Fn=,Bn=,threshold=,warmup=. Examples: \"threshold=0.25\" or \"threshold=1.5,reset=0\"",
+             "named cache params (key=value format, comma-separated). easycache/ucache: threshold=,start=,end=,decay=,relative=,reset=; dbcache/taylorseer/cache-dit: Fn=,Bn=,threshold=,warmup=; spectrum: w=,m=,lam=,window=,flex=,warmup=,stop=. Examples: \"threshold=0.25\" or \"threshold=1.5,reset=0\"",
              on_cache_option_arg},
-            {"",
-             "--cache-preset",
-             "cache-dit preset: 'slow'/'s', 'medium'/'m', 'fast'/'f', 'ultra'/'u'",
-             on_cache_preset_arg},
             {"",
              "--scm-mask",
              "SCM steps mask for cache-dit: comma-separated 0/1 (e.g., \"1,1,1,0,0,1,0,0,1,0\") - 1=compute, 0=can cache",
@@ -1575,7 +1555,6 @@ struct SDGenerationParams {
         load_if_exists("negative_prompt", negative_prompt);
         load_if_exists("cache_mode", cache_mode);
         load_if_exists("cache_option", cache_option);
-        load_if_exists("cache_preset", cache_preset);
         load_if_exists("scm_mask", scm_mask);
 
         load_if_exists("clip_skip", clip_skip);
@@ -1810,48 +1789,17 @@ struct SDGenerationParams {
 
         if (!cache_mode.empty()) {
             if (cache_mode == "easycache") {
-                cache_params.mode                   = SD_CACHE_EASYCACHE;
-                cache_params.reuse_threshold        = 0.2f;
-                cache_params.start_percent          = 0.15f;
-                cache_params.end_percent            = 0.95f;
-                cache_params.error_decay_rate       = 1.0f;
-                cache_params.use_relative_threshold = true;
-                cache_params.reset_error_on_compute = true;
+                cache_params.mode = SD_CACHE_EASYCACHE;
             } else if (cache_mode == "ucache") {
-                cache_params.mode                   = SD_CACHE_UCACHE;
-                cache_params.reuse_threshold        = 1.0f;
-                cache_params.start_percent          = 0.15f;
-                cache_params.end_percent            = 0.95f;
-                cache_params.error_decay_rate       = 1.0f;
-                cache_params.use_relative_threshold = true;
-                cache_params.reset_error_on_compute = true;
+                cache_params.mode = SD_CACHE_UCACHE;
             } else if (cache_mode == "dbcache") {
-                cache_params.mode                    = SD_CACHE_DBCACHE;
-                cache_params.Fn_compute_blocks       = 8;
-                cache_params.Bn_compute_blocks       = 0;
-                cache_params.residual_diff_threshold = 0.08f;
-                cache_params.max_warmup_steps        = 8;
+                cache_params.mode = SD_CACHE_DBCACHE;
             } else if (cache_mode == "taylorseer") {
-                cache_params.mode                    = SD_CACHE_TAYLORSEER;
-                cache_params.Fn_compute_blocks       = 8;
-                cache_params.Bn_compute_blocks       = 0;
-                cache_params.residual_diff_threshold = 0.08f;
-                cache_params.max_warmup_steps        = 8;
+                cache_params.mode = SD_CACHE_TAYLORSEER;
             } else if (cache_mode == "cache-dit") {
-                cache_params.mode                    = SD_CACHE_CACHE_DIT;
-                cache_params.Fn_compute_blocks       = 8;
-                cache_params.Bn_compute_blocks       = 0;
-                cache_params.residual_diff_threshold = 0.08f;
-                cache_params.max_warmup_steps        = 8;
+                cache_params.mode = SD_CACHE_CACHE_DIT;
             } else if (cache_mode == "spectrum") {
-                cache_params.mode                  = SD_CACHE_SPECTRUM;
-                cache_params.spectrum_w            = 0.40f;
-                cache_params.spectrum_m            = 3;
-                cache_params.spectrum_lam          = 1.0f;
-                cache_params.spectrum_window_size  = 2;
-                cache_params.spectrum_flex_window  = 0.50f;
-                cache_params.spectrum_warmup_steps = 4;
-                cache_params.spectrum_stop_percent = 0.9f;
+                cache_params.mode = SD_CACHE_SPECTRUM;
             }
 
             if (!cache_option.empty()) {
diff --git a/examples/server/README.md b/examples/server/README.md
index 7554436..38deff6 100644
--- a/examples/server/README.md
+++ b/examples/server/README.md
@@ -129,11 +129,10 @@ Default Generation Options:
   --skip-layers                            layers to skip for SLG steps (default: [7,8,9])
   --high-noise-skip-layers                 (high noise) layers to skip for SLG steps (default: [7,8,9])
   -r, --ref-image                          reference image for Flux Kontext models (can be used multiple times)
-  --cache-mode                             caching method: 'easycache' (DiT), 'ucache' (UNET), 'dbcache'/'taylorseer'/'cache-dit' (DiT block-level)
+  --cache-mode                             caching method: 'easycache' (DiT), 'ucache' (UNET), 'dbcache'/'taylorseer'/'cache-dit' (DiT block-level), 'spectrum' (UNET/DiT Chebyshev+Taylor forecasting)
   --cache-option                           named cache params (key=value format, comma-separated). easycache/ucache:
                                            threshold=,start=,end=,decay=,relative=,reset=; dbcache/taylorseer/cache-dit: Fn=,Bn=,threshold=,warmup=. Examples:
                                            "threshold=0.25" or "threshold=1.5,reset=0"
-  --cache-preset                           cache-dit preset: 'slow'/'s', 'medium'/'m', 'fast'/'f', 'ultra'/'u'
   --scm-mask                               SCM steps mask for cache-dit: comma-separated 0/1 (e.g., "1,1,1,0,0,1,0,0,1,0") - 1=compute, 0=can cache
   --scm-policy                             SCM policy: 'dynamic' (default) or 'static'
 ```
diff --git a/src/cache_dit.hpp b/src/cache_dit.hpp
index 6fe104d..4e3cf69 100644
--- a/src/cache_dit.hpp
+++ b/src/cache_dit.hpp
@@ -603,87 +603,6 @@ inline std::vector<int> generate_scm_mask(
     return mask;
 }
 
-inline std::vector<int> get_scm_preset(const std::string& preset, int total_steps) {
-    struct Preset {
-        std::vector<int> compute_bins;
-        std::vector<int> cache_bins;
-    };
-
-    Preset slow   = {{8, 3, 3, 2, 1, 1}, {1, 2, 2, 2, 3}};
-    Preset medium = {{6, 2, 2, 2, 2, 1}, {1, 3, 3, 3, 3}};
-    Preset fast   = {{6, 1, 1, 1, 1, 1}, {1, 3, 4, 5, 4}};
-    Preset ultra  = {{4, 1, 1, 1, 1}, {2, 5, 6, 7}};
-
-    Preset* p = nullptr;
-    if (preset == "slow" || preset == "s" || preset == "S")
-        p = &slow;
-    else if (preset == "medium" || preset == "m" || preset == "M")
-        p = &medium;
-    else if (preset == "fast" || preset == "f" || preset == "F")
-        p = &fast;
-    else if (preset == "ultra" || preset == "u" || preset == "U")
-        p = &ultra;
-    else
-        return {};
-
-    if (total_steps != 28 && total_steps > 0) {
-        float scale = static_cast<float>(total_steps) / 28.0f;
-        std::vector<int> scaled_compute, scaled_cache;
-
-        for (int v : p->compute_bins) {
-            scaled_compute.push_back(std::max(1, static_cast<int>(v * scale + 0.5f)));
-        }
-        for (int v : p->cache_bins) {
-            scaled_cache.push_back(std::max(1, static_cast<int>(v * scale + 0.5f)));
-        }
-
-        return generate_scm_mask(scaled_compute, scaled_cache, total_steps);
-    }
-
-    return generate_scm_mask(p->compute_bins, p->cache_bins, total_steps);
-}
-
-inline float get_preset_threshold(const std::string& preset) {
-    if (preset == "slow" || preset == "s" || preset == "S")
-        return 0.20f;
-    if (preset == "medium" || preset == "m" || preset == "M")
-        return 0.25f;
-    if (preset == "fast" || preset == "f" || preset == "F")
-        return 0.30f;
-    if (preset == "ultra" || preset == "u" || preset == "U")
-        return 0.34f;
-    return 0.08f;
-}
-
-inline int get_preset_warmup(const std::string& preset) {
-    if (preset == "slow" || preset == "s" || preset == "S")
-        return 8;
-    if (preset == "medium" || preset == "m" || preset == "M")
-        return 6;
-    if (preset == "fast" || preset == "f" || preset == "F")
-        return 6;
-    if (preset == "ultra" || preset == "u" || preset == "U")
-        return 4;
-    return 8;
-}
-
-inline int get_preset_Fn(const std::string& preset) {
-    if (preset == "slow" || preset == "s" || preset == "S")
-        return 8;
-    if (preset == "medium" || preset == "m" || preset == "M")
-        return 8;
-    if (preset == "fast" || preset == "f" || preset == "F")
-        return 6;
-    if (preset == "ultra" || preset == "u" || preset == "U")
-        return 4;
-    return 8;
-}
-
-inline int get_preset_Bn(const std::string& preset) {
-    (void)preset;
-    return 0;
-}
-
 inline void parse_dbcache_options(const std::string& opts, DBCacheConfig& cfg) {
     if (opts.empty())
         return;
diff --git a/src/stable-diffusion.cpp b/src/stable-diffusion.cpp
index 25bce01..b1243d6 100644
--- a/src/stable-diffusion.cpp
+++ b/src/stable-diffusion.cpp
@@ -91,6 +91,19 @@ void calculate_alphas_cumprod(float* alphas_cumprod,
     }
 }
 
+static float get_cache_reuse_threshold(const sd_cache_params_t& params) {
+    float reuse_threshold = params.reuse_threshold;
+    if (reuse_threshold == INFINITY) {
+        if (params.mode == SD_CACHE_EASYCACHE) {
+            reuse_threshold = 0.2;
+        }
+        else if (params.mode == SD_CACHE_UCACHE) {
+            reuse_threshold = 1.0;
+        }
+    }
+    return std::max(0.0f, reuse_threshold);
+}
+
 /*=============================================== StableDiffusionGGML ================================================*/
 
 class StableDiffusionGGML {
@@ -1680,7 +1693,7 @@ public:
                 } else {
                     EasyCacheConfig easycache_config;
                     easycache_config.enabled         = true;
-                    easycache_config.reuse_threshold = std::max(0.0f, cache_params->reuse_threshold);
+                    easycache_config.reuse_threshold = get_cache_reuse_threshold(*cache_params);
                     easycache_config.start_percent   = cache_params->start_percent;
                     easycache_config.end_percent     = cache_params->end_percent;
                     easycache_state.init(easycache_config, denoiser.get());
@@ -1701,7 +1714,7 @@ public:
                 } else {
                     UCacheConfig ucache_config;
                     ucache_config.enabled                = true;
-                    ucache_config.reuse_threshold        = std::max(0.0f, cache_params->reuse_threshold);
+                    ucache_config.reuse_threshold        = get_cache_reuse_threshold(*cache_params);
                     ucache_config.start_percent          = cache_params->start_percent;
                     ucache_config.end_percent            = cache_params->end_percent;
                     ucache_config.error_decay_rate       = std::max(0.0f, std::min(1.0f, cache_params->error_decay_rate));
@@ -1762,9 +1775,9 @@ public:
                     }
                 }
             } else if (cache_params->mode == SD_CACHE_SPECTRUM) {
-                bool spectrum_supported = sd_version_is_unet(version);
+                bool spectrum_supported = sd_version_is_unet(version) || sd_version_is_dit(version);
                 if (!spectrum_supported) {
-                    LOG_WARN("Spectrum requested but not supported for this model type (only UNET models)");
+                    LOG_WARN("Spectrum requested but not supported for this model type (only UNET and DiT models)");
                 } else {
                     SpectrumConfig spectrum_config;
                     spectrum_config.w            = cache_params->spectrum_w;
@@ -2584,7 +2597,7 @@ enum lora_apply_mode_t str_to_lora_apply_mode(const char* str) {
 void sd_cache_params_init(sd_cache_params_t* cache_params) {
     *cache_params                             = {};
     cache_params->mode                        = SD_CACHE_DISABLED;
-    cache_params->reuse_threshold             = 1.0f;
+    cache_params->reuse_threshold             = INFINITY;
     cache_params->start_percent               = 0.15f;
     cache_params->end_percent                 = 0.95f;
     cache_params->error_decay_rate            = 1.0f;
@@ -2830,7 +2843,7 @@ char* sd_img_gen_params_to_str(const sd_img_gen_params_t* sd_img_gen_params) {
     snprintf(buf + strlen(buf), 4096 - strlen(buf),
              "cache: %s (threshold=%.3f, start=%.2f, end=%.2f)\n",
              cache_mode_str,
-             sd_img_gen_params->cache.reuse_threshold,
+             get_cache_reuse_threshold(sd_img_gen_params->cache),
              sd_img_gen_params->cache.start_percent,
              sd_img_gen_params->cache.end_percent);
     free(sample_params_str);