From 98916e825686f8952126b2c54e1c4216eb418dc5 Mon Sep 17 00:00:00 2001
From: leejet <leejet714@gmail.com>
Date: Mon, 22 Dec 2025 23:58:28 +0800
Subject: [PATCH] docs: update README.md

---
 README.md                 |  1 +
 examples/cli/README.md    | 19 +++++++++++++------
 examples/server/README.md | 19 ++++++++++++++++---
 3 files changed, 30 insertions(+), 9 deletions(-)
diff --git a/README.md b/README.md
index aa29f84..3c66be9 100644
--- a/README.md
+++ b/README.md
@@ -143,6 +143,7 @@ If you want to improve performance or reduce VRAM/RAM usage, please refer to [pe
 - [Using TAESD to faster decoding](./docs/taesd.md)
 - [Docker](./docs/docker.md)
 - [Quantization and GGUF](./docs/quantization_and_gguf.md)
+- [Inference acceleration via caching](./docs/caching.md)
 
 ## Bindings
 
diff --git a/examples/cli/README.md b/examples/cli/README.md
index 0617a46..568f29d 100644
--- a/examples/cli/README.md
+++ b/examples/cli/README.md
@@ -53,6 +53,9 @@ Context Options:
   --diffusion-fa                           use flash attention in the diffusion model
   --diffusion-conv-direct                  use ggml_conv2d_direct in the diffusion model
   --vae-conv-direct                        use ggml_conv2d_direct in the vae model
+  --circular                               enable circular padding for convolutions
+  --circularx                              enable circular RoPE wrapping on x-axis (width) only
+  --circulary                              enable circular RoPE wrapping on y-axis (height) only
   --chroma-disable-dit-mask                disable dit mask for chroma
   --chroma-enable-t5-mask                  enable t5 mask for chroma
   --type                                   weight type (examples: f32, f16, q4_0, q4_1, q5_0, q5_1, q8_0, q2_K, q3_K, q4_K). If not specified, the default is the
@@ -94,6 +97,7 @@ Generation Options:
   --timestep-shift <int>                   shift timestep for NitroFusion models (default: 0). recommended N for NitroSD-Realism around 250 and 500 for
                                            NitroSD-Vibrant
   --upscale-repeats <int>                  Run the ESRGAN upscaler this many times (default: 1)
+  --upscale-tile-size <int>                tile size for ESRGAN upscaling (default: 128)
   --cfg-scale <float>                      unconditional guidance scale: (default: 7.0)
   --img-cfg-scale <float>                  image guidance scale for inpaint or instruct-pix2pix models: (default: same as --cfg-scale)
   --guidance <float>                       distilled guidance scale for models with guidance input (default: 3.5)
@@ -121,18 +125,21 @@ Generation Options:
                                            tcd] (default: euler for Flux/SD3/Wan, euler_a otherwise)
   --high-noise-sampling-method             (high noise) sampling method, one of [euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m, dpm++2mv2, ipndm, ipndm_v, lcm,
                                            ddim_trailing, tcd] default: euler for Flux/SD3/Wan, euler_a otherwise
-  --scheduler                              denoiser sigma scheduler, one of [discrete, karras, exponential, ays, gits, smoothstep, sgm_uniform, simple, kl_optimal, lcm],
-                                           default: discrete
+  --scheduler                              denoiser sigma scheduler, one of [discrete, karras, exponential, ays, gits, smoothstep, sgm_uniform, simple,
+                                           kl_optimal, lcm], default: discrete
   --sigmas                                 custom sigma values for the sampler, comma-separated (e.g., "14.61,7.8,3.5,0.0").
   --skip-layers                            layers to skip for SLG steps (default: [7,8,9])
   --high-noise-skip-layers                 (high noise) layers to skip for SLG steps (default: [7,8,9])
   -r, --ref-image                          reference image for Flux Kontext models (can be used multiple times)
   --cache-mode                             caching method: 'easycache' (DiT), 'ucache' (UNET), 'dbcache'/'taylorseer'/'cache-dit' (DiT block-level)
   --cache-option                           named cache params (key=value format, comma-separated):
-                                           - easycache/ucache: threshold=,start=,end=,decay=,relative=,reset=
-                                           - dbcache/taylorseer/cache-dit: Fn=,Bn=,threshold=,warmup=
-                                           Examples: "threshold=0.25" or "threshold=1.5,reset=0"
+                                                                                      - easycache/ucache:
+                                           threshold=,start=,end=,decay=,relative=,reset=
+                                                                                      - dbcache/taylorseer/cache-dit:
+                                           Fn=,Bn=,threshold=,warmup=
+                                                                                      Examples: "threshold=0.25" or
+                                           "threshold=1.5,reset=0"
   --cache-preset                           cache-dit preset: 'slow'/'s', 'medium'/'m', 'fast'/'f', 'ultra'/'u'
-  --scm-mask                               SCM steps mask: comma-separated 0/1 (1=compute, 0=can cache)
+  --scm-mask                               SCM steps mask for cache-dit: comma-separated 0/1 (e.g., "1,1,1,0,0,1,0,0,1,0") - 1=compute, 0=can cache
   --scm-policy                             SCM policy: 'dynamic' (default) or 'static'
 ```
diff --git a/examples/server/README.md b/examples/server/README.md
index ae10496..89d8560 100644
--- a/examples/server/README.md
+++ b/examples/server/README.md
@@ -45,6 +45,9 @@ Context Options:
   --diffusion-fa                           use flash attention in the diffusion model
   --diffusion-conv-direct                  use ggml_conv2d_direct in the diffusion model
   --vae-conv-direct                        use ggml_conv2d_direct in the vae model
+  --circular                               enable circular padding for convolutions
+  --circularx                              enable circular RoPE wrapping on x-axis (width) only
+  --circulary                              enable circular RoPE wrapping on y-axis (height) only
   --chroma-disable-dit-mask                disable dit mask for chroma
   --chroma-enable-t5-mask                  enable t5 mask for chroma
   --type                                   weight type (examples: f32, f16, q4_0, q4_1, q5_0, q5_1, q8_0, q2_K, q3_K, q4_K). If not specified, the default is the
@@ -114,11 +117,21 @@ Default Generation Options:
                                            tcd] (default: euler for Flux/SD3/Wan, euler_a otherwise)
   --high-noise-sampling-method             (high noise) sampling method, one of [euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m, dpm++2mv2, ipndm, ipndm_v, lcm,
                                            ddim_trailing, tcd] default: euler for Flux/SD3/Wan, euler_a otherwise
-  --scheduler                              denoiser sigma scheduler, one of [discrete, karras, exponential, ays, gits, smoothstep, sgm_uniform, simple, kl_optimal, lcm],
-                                           default: discrete
+  --scheduler                              denoiser sigma scheduler, one of [discrete, karras, exponential, ays, gits, smoothstep, sgm_uniform, simple,
+                                           kl_optimal, lcm], default: discrete
   --sigmas                                 custom sigma values for the sampler, comma-separated (e.g., "14.61,7.8,3.5,0.0").
   --skip-layers                            layers to skip for SLG steps (default: [7,8,9])
   --high-noise-skip-layers                 (high noise) layers to skip for SLG steps (default: [7,8,9])
   -r, --ref-image                          reference image for Flux Kontext models (can be used multiple times)
-  --easycache                              enable EasyCache for DiT models with optional "threshold,start_percent,end_percent" (default: 0.2,0.15,0.95)
+  --cache-mode                             caching method: 'easycache' (DiT), 'ucache' (UNET), 'dbcache'/'taylorseer'/'cache-dit' (DiT block-level)
+  --cache-option                           named cache params (key=value format, comma-separated):
+                                                                                      - easycache/ucache:
+                                           threshold=,start=,end=,decay=,relative=,reset=
+                                                                                      - dbcache/taylorseer/cache-dit:
+                                           Fn=,Bn=,threshold=,warmup=
+                                                                                      Examples: "threshold=0.25" or
+                                           "threshold=1.5,reset=0"
+  --cache-preset                           cache-dit preset: 'slow'/'s', 'medium'/'m', 'fast'/'f', 'ultra'/'u'
+  --scm-mask                               SCM steps mask for cache-dit: comma-separated 0/1 (e.g., "1,1,1,0,0,1,0,0,1,0") - 1=compute, 0=can cache
+  --scm-policy                             SCM policy: 'dynamic' (default) or 'static'
 ```