From 0c443ca5f042b8cfd89f40ee2f714a43d9b96f3c Mon Sep 17 00:00:00 2001 From: leejet Date: Wed, 3 Dec 2025 22:30:45 +0800 Subject: [PATCH] update docs --- examples/cli/README.md | 110 +++++++++++++++++++++-------------------- 1 file changed, 57 insertions(+), 53 deletions(-) diff --git a/examples/cli/README.md b/examples/cli/README.md index f6490ea..add5e3e 100644 --- a/examples/cli/README.md +++ b/examples/cli/README.md @@ -3,7 +3,21 @@ ``` usage: ./bin/sd [options] -Options: +CLI Options: + -o, --output path to write result image to (default: ./output.png) + --preview-path path to write preview image to (default: ./preview.png) + --preview-interval interval in denoising steps between consecutive updates of the image preview file (default is 1, meaning updating at + every step) + --canny apply canny preprocessor (edge detection) + -v, --verbose print extra info + --color colors the logging tags according to level + --taesd-preview-only prevents usage of taesd for decoding the final image. (for use with --preview tae) + --preview-noisy enables previewing noisy inputs of the models rather than the denoised outputs + -M, --mode run mode, one of [img_gen, vid_gen, upscale, convert], default: img_gen + --preview preview method. must be one of the following [none, proj, tae, vae] (default is none) + -h, --help show this help message and exit + +Context Options: -m, --model path to full model --clip_l path to the clip-l text encoder --clip_g path to the clip-g text encoder @@ -20,25 +34,52 @@ Options: --control-net path to control net model --embd-dir embeddings directory --lora-model-dir lora model directory - -i, --init-img path to the init image - --end-img path to the end image, required by flf2v --tensor-type-rules weight type per tensor pattern (example: "^vae\.=f16,model\.=q8_0") --photo-maker path to PHOTOMAKER model - --pm-id-images-dir path to PHOTOMAKER input id images dir - --pm-id-embed-path path to PHOTOMAKER v2 id embed + --upscale-model path to esrgan model. + -t, --threads number of threads to use during computation (default: -1). If threads <= 0, then threads will be set to the number of + CPU physical cores + --chroma-t5-mask-pad t5 mask pad size of chroma + --vae-tile-overlap tile overlap for vae tiling, in fraction of tile size (default: 0.5) + --flow-shift shift value for Flow models like SD3.x or WAN (default: auto) + --vae-tiling process vae in tiles to reduce memory usage + --force-sdxl-vae-conv-scale force use of conv scale on sdxl vae + --offload-to-cpu place the weights in RAM to save VRAM, and automatically load them into VRAM when needed + --control-net-cpu keep controlnet in cpu (for low vram) + --clip-on-cpu keep clip in cpu (for low vram) + --vae-on-cpu keep vae in cpu (for low vram) + --diffusion-fa use flash attention in the diffusion model + --diffusion-conv-direct use ggml_conv2d_direct in the diffusion model + --vae-conv-direct use ggml_conv2d_direct in the vae model + --chroma-disable-dit-mask disable dit mask for chroma + --chroma-enable-t5-mask enable t5 mask for chroma + --type weight type (examples: f32, f16, q4_0, q4_1, q5_0, q5_1, q8_0, q2_K, q3_K, q4_K). If not specified, the default is the + type of the weight file + --rng RNG, one of [std_default, cuda, cpu], default: cuda(sd-webui), cpu(comfyui) + --sampler-rng sampler RNG, one of [std_default, cuda, cpu]. If not specified, use --rng + --prediction prediction type override, one of [eps, v, edm_v, sd3_flow, flux_flow, flux2_flow] + --lora-apply-mode the way to apply LoRA, one of [auto, immediately, at_runtime], default is auto. In auto mode, if the model weights + contain any quantized parameters, the at_runtime mode will be used; otherwise, + immediately will be used.The immediately mode may have precision and + compatibility issues with quantized parameters, but it usually offers faster inference + speed and, in some cases, lower memory usage. The at_runtime mode, on the + other hand, is exactly the opposite. + --vae-tile-size tile size for vae tiling, format [X]x[Y] (default: 32x32) + --vae-relative-tile-size relative tile size for vae tiling, format [X]x[Y], in fraction of image size if < 1, in number of tiles per dim if >=1 + (overrides --vae-tile-size) + +Generation Options: + -p, --prompt the prompt to render + -n, --negative-prompt the negative prompt (default: "") + -i, --init-img path to the init image + --end-img path to the end image, required by flf2v --mask path to the mask image --control-image path to control image, control net --control-video path to control video frames, It must be a directory path. The video frames inside should be stored as images in lexicographical (character) order. For example, if the control video path is `frames`, the directory contain images such as 00.png, 01.png, ... etc. - -o, --output path to write result image to (default: ./output.png) - -p, --prompt the prompt to render - -n, --negative-prompt the negative prompt (default: "") - --preview-path path to write preview image to (default: ./preview.png) - --upscale-model path to esrgan model. - -t, --threads number of threads to use during computation (default: -1). If threads <= 0, then threads will be set to the number of - CPU physical cores - --upscale-repeats Run the ESRGAN upscaler this many times (default: 1) + --pm-id-images-dir path to PHOTOMAKER input id images dir + --pm-id-embed-path path to PHOTOMAKER v2 id embed -H, --height image height, in pixel space (default: 512) -W, --width image width, in pixel space (default: 512) --steps number of sample steps (default: 20) @@ -46,13 +87,11 @@ Options: --clip-skip ignore last layers of CLIP network; 1 ignores none, 2 ignores one layer (default: -1). <= 0 represents unspecified, will be 1 for SD1.x, 2 for SD2.x -b, --batch-count batch count - --chroma-t5-mask-pad t5 mask pad size of chroma --video-frames video frames (default: 1) --fps fps (default: 24) --timestep-shift shift timestep for NitroFusion models (default: 0). recommended N for NitroSD-Realism around 250 and 500 for NitroSD-Vibrant - --preview-interval interval in denoising steps between consecutive updates of the image preview file (default is 1, meaning updating at - every step) + --upscale-repeats Run the ESRGAN upscaler this many times (default: 1) --cfg-scale unconditional guidance scale: (default: 7.0) --img-cfg-scale image guidance scale for inpaint or instruct-pix2pix models: (default: same as --cfg-scale) --guidance distilled guidance scale for models with guidance input (default: 3.5) @@ -72,53 +111,18 @@ Options: --pm-style-strength --control-strength strength to apply Control Net (default: 0.9). 1.0 corresponds to full destruction of information in init image --moe-boundary timestep boundary for Wan2.2 MoE model. (default: 0.875). Only enabled if `--high-noise-steps` is set to -1 - --flow-shift shift value for Flow models like SD3.x or WAN (default: auto) --vace-strength wan vace strength - --vae-tile-overlap tile overlap for vae tiling, in fraction of tile size (default: 0.5) - --vae-tiling process vae in tiles to reduce memory usage - --force-sdxl-vae-conv-scale force use of conv scale on sdxl vae - --offload-to-cpu place the weights in RAM to save VRAM, and automatically load them into VRAM when needed - --control-net-cpu keep controlnet in cpu (for low vram) - --clip-on-cpu keep clip in cpu (for low vram) - --vae-on-cpu keep vae in cpu (for low vram) - --diffusion-fa use flash attention in the diffusion model - --diffusion-conv-direct use ggml_conv2d_direct in the diffusion model - --vae-conv-direct use ggml_conv2d_direct in the vae model - --canny apply canny preprocessor (edge detection) - -v, --verbose print extra info - --color colors the logging tags according to level - --chroma-disable-dit-mask disable dit mask for chroma - --chroma-enable-t5-mask enable t5 mask for chroma --increase-ref-index automatically increase the indices of references images based on the order they are listed (starting with 1). --disable-auto-resize-ref-image disable auto resize of ref images - --taesd-preview-only prevents usage of taesd for decoding the final image. (for use with --preview tae) - --preview-noisy enables previewing noisy inputs of the models rather than the denoised outputs - -M, --mode run mode, one of [img_gen, vid_gen, upscale, convert], default: img_gen - --type weight type (examples: f32, f16, q4_0, q4_1, q5_0, q5_1, q8_0, q2_K, q3_K, q4_K). If not specified, the default is the - type of the weight file - --rng RNG, one of [std_default, cuda, cpu], default: cuda(sd-webui), cpu(comfyui) - --sampler-rng sampler RNG, one of [std_default, cuda, cpu]. If not specified, use --rng -s, --seed RNG seed (default: 42, use random seed for < 0) --sampling-method sampling method, one of [euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m, dpm++2mv2, ipndm, ipndm_v, lcm, ddim_trailing, tcd] (default: euler for Flux/SD3/Wan, euler_a otherwise) - --prediction prediction type override, one of [eps, v, edm_v, sd3_flow, flux_flow, flux2_flow] - --lora-apply-mode the way to apply LoRA, one of [auto, immediately, at_runtime], default is auto. In auto mode, if the model weights - contain any quantized parameters, the at_runtime mode will be used; otherwise, - immediately will be used.The immediately mode may have precision and - compatibility issues with quantized parameters, but it usually offers faster inference - speed and, in some cases, lower memory usage. The at_runtime mode, on the - other hand, is exactly the opposite. + --high-noise-sampling-method (high noise) sampling method, one of [euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m, dpm++2mv2, ipndm, ipndm_v, lcm, + ddim_trailing, tcd] default: euler for Flux/SD3/Wan, euler_a otherwise --scheduler denoiser sigma scheduler, one of [discrete, karras, exponential, ays, gits, smoothstep, sgm_uniform, simple, lcm], default: discrete --skip-layers layers to skip for SLG steps (default: [7,8,9]) - --high-noise-sampling-method (high noise) sampling method, one of [euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m, dpm++2mv2, ipndm, ipndm_v, lcm, - ddim_trailing, tcd] default: euler for Flux/SD3/Wan, euler_a otherwise --high-noise-skip-layers (high noise) layers to skip for SLG steps (default: [7,8,9]) -r, --ref-image reference image for Flux Kontext models (can be used multiple times) - -h, --help show this help message and exit - --vae-tile-size tile size for vae tiling, format [X]x[Y] (default: 32x32) - --vae-relative-tile-size relative tile size for vae tiling, format [X]x[Y], in fraction of image size if < 1, in number of tiles per dim if >=1 - (overrides --vae-tile-size) - --preview preview method. must be one of the following [none, proj, tae, vae] (default is none) --easycache enable EasyCache for DiT models with optional "threshold,start_percent,end_percent" (default: 0.2,0.15,0.95) ```