mirror of
https://github.com/leejet/stable-diffusion.cpp.git
synced 2025-12-12 21:38:58 +00:00
refactor: optimize option printing (#900)
This commit is contained in:
parent
90ef5f8246
commit
0723ee51c9
@ -1,114 +1,110 @@
|
||||
# Run
|
||||
|
||||
```
|
||||
usage: ./bin/sd [arguments]
|
||||
usage: ./bin/sd [options]
|
||||
|
||||
arguments:
|
||||
-h, --help show this help message and exit
|
||||
-M, --mode [MODE] run mode, one of: [img_gen, vid_gen, upscale, convert], default: img_gen
|
||||
-t, --threads N number of threads to use during computation (default: -1)
|
||||
If threads <= 0, then threads will be set to the number of CPU physical cores
|
||||
--offload-to-cpu place the weights in RAM to save VRAM, and automatically load them into VRAM when needed
|
||||
-m, --model [MODEL] path to full model
|
||||
--diffusion-model path to the standalone diffusion model
|
||||
--high-noise-diffusion-model path to the standalone high noise diffusion model
|
||||
--clip_l path to the clip-l text encoder
|
||||
--clip_g path to the clip-g text encoder
|
||||
--clip_vision path to the clip-vision encoder
|
||||
--t5xxl path to the t5xxl text encoder
|
||||
--qwen2vl path to the qwen2vl text encoder
|
||||
--qwen2vl_vision path to the qwen2vl vit
|
||||
--vae [VAE] path to vae
|
||||
--taesd [TAESD_PATH] path to taesd. Using Tiny AutoEncoder for fast decoding (low quality)
|
||||
--control-net [CONTROL_PATH] path to control net model
|
||||
--embd-dir [EMBEDDING_PATH] path to embeddings
|
||||
--upscale-model [ESRGAN_PATH] path to esrgan model. For img_gen mode, upscale images after generate, just RealESRGAN_x4plus_anime_6B supported by now
|
||||
--upscale-repeats Run the ESRGAN upscaler this many times (default 1)
|
||||
--type [TYPE] weight type (examples: f32, f16, q4_0, q4_1, q5_0, q5_1, q8_0, q2_K, q3_K, q4_K)
|
||||
If not specified, the default is the type of the weight file
|
||||
--tensor-type-rules [EXPRESSION] weight type per tensor pattern (example: "^vae\.=f16,model\.=q8_0")
|
||||
--lora-model-dir [DIR] lora model directory
|
||||
-i, --init-img [IMAGE] path to the init image, required by img2img
|
||||
--mask [MASK] path to the mask image, required by img2img with mask
|
||||
-i, --end-img [IMAGE] path to the end image, required by flf2v
|
||||
--control-image [IMAGE] path to image condition, control net
|
||||
-r, --ref-image [PATH] reference image for Flux Kontext models (can be used multiple times)
|
||||
--control-video [PATH] path to control video frames, It must be a directory path.
|
||||
The video frames inside should be stored as images in lexicographical (character) order
|
||||
For example, if the control video path is `frames`, the directory contain images such as 00.png, 01.png, ... etc.
|
||||
--increase-ref-index automatically increase the indices of references images based on the order they are listed (starting with 1).
|
||||
--disable-auto-resize-ref-image disable auto resize of ref images
|
||||
-o, --output OUTPUT path to write result image to (default: ./output.png)
|
||||
-p, --prompt [PROMPT] the prompt to render
|
||||
-n, --negative-prompt PROMPT the negative prompt (default: "")
|
||||
--cfg-scale SCALE unconditional guidance scale: (default: 7.0)
|
||||
--img-cfg-scale SCALE image guidance scale for inpaint or instruct-pix2pix models: (default: same as --cfg-scale)
|
||||
--guidance SCALE distilled guidance scale for models with guidance input (default: 3.5)
|
||||
--slg-scale SCALE skip layer guidance (SLG) scale, only for DiT models: (default: 0)
|
||||
0 means disabled, a value of 2.5 is nice for sd3.5 medium
|
||||
--eta SCALE eta in DDIM, only for DDIM and TCD: (default: 0)
|
||||
--skip-layers LAYERS Layers to skip for SLG steps: (default: [7,8,9])
|
||||
--skip-layer-start START SLG enabling point: (default: 0.01)
|
||||
--skip-layer-end END SLG disabling point: (default: 0.2)
|
||||
--scheduler {discrete, karras, exponential, ays, gits, smoothstep, sgm_uniform, simple} Denoiser sigma scheduler (default: discrete)
|
||||
--sampling-method {euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m, dpm++2mv2, ipndm, ipndm_v, lcm, ddim_trailing, tcd}
|
||||
sampling method (default: "euler" for Flux/SD3/Wan, "euler_a" otherwise)
|
||||
--timestep-shift N shift timestep for NitroFusion models, default: 0, recommended N for NitroSD-Realism around 250 and 500 for NitroSD-Vibrant
|
||||
--steps STEPS number of sample steps (default: 20)
|
||||
--high-noise-cfg-scale SCALE (high noise) unconditional guidance scale: (default: 7.0)
|
||||
--high-noise-img-cfg-scale SCALE (high noise) image guidance scale for inpaint or instruct-pix2pix models: (default: same as --cfg-scale)
|
||||
--high-noise-guidance SCALE (high noise) distilled guidance scale for models with guidance input (default: 3.5)
|
||||
--high-noise-slg-scale SCALE (high noise) skip layer guidance (SLG) scale, only for DiT models: (default: 0)
|
||||
0 means disabled, a value of 2.5 is nice for sd3.5 medium
|
||||
--high-noise-eta SCALE (high noise) eta in DDIM, only for DDIM and TCD: (default: 0)
|
||||
--high-noise-skip-layers LAYERS (high noise) Layers to skip for SLG steps: (default: [7,8,9])
|
||||
--high-noise-skip-layer-start (high noise) SLG enabling point: (default: 0.01)
|
||||
--high-noise-skip-layer-end END (high noise) SLG disabling point: (default: 0.2)
|
||||
--high-noise-scheduler {discrete, karras, exponential, ays, gits, smoothstep, sgm_uniform, simple} Denoiser sigma scheduler (default: discrete)
|
||||
--high-noise-sampling-method {euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m, dpm++2mv2, ipndm, ipndm_v, lcm, ddim_trailing, tcd}
|
||||
(high noise) sampling method (default: "euler_a")
|
||||
--high-noise-steps STEPS (high noise) number of sample steps (default: -1 = auto)
|
||||
SLG will be enabled at step int([STEPS]*[START]) and disabled at int([STEPS]*[END])
|
||||
--strength STRENGTH strength for noising/unnoising (default: 0.75)
|
||||
--control-strength STRENGTH strength to apply Control Net (default: 0.9)
|
||||
1.0 corresponds to full destruction of information in init image
|
||||
-H, --height H image height, in pixel space (default: 512)
|
||||
-W, --width W image width, in pixel space (default: 512)
|
||||
--rng {std_default, cuda} RNG (default: cuda)
|
||||
-s SEED, --seed SEED RNG seed (default: 42, use random seed for < 0)
|
||||
-b, --batch-count COUNT number of images to generate
|
||||
--prediction {eps, v, edm_v, sd3_flow, flux_flow} Prediction type override
|
||||
--clip-skip N ignore last layers of CLIP network; 1 ignores none, 2 ignores one layer (default: -1)
|
||||
<= 0 represents unspecified, will be 1 for SD1.x, 2 for SD2.x
|
||||
--vae-tiling process vae in tiles to reduce memory usage
|
||||
--vae-tile-size [X]x[Y] tile size for vae tiling (default: 32x32)
|
||||
--vae-relative-tile-size [X]x[Y] relative tile size for vae tiling, in fraction of image size if < 1, in number of tiles per dim if >=1 (overrides --vae-tile-size)
|
||||
--vae-tile-overlap OVERLAP tile overlap for vae tiling, in fraction of tile size (default: 0.5)
|
||||
--force-sdxl-vae-conv-scale force use of conv scale on sdxl vae
|
||||
--vae-on-cpu keep vae in cpu (for low vram)
|
||||
--clip-on-cpu keep clip in cpu (for low vram)
|
||||
--diffusion-fa use flash attention in the diffusion model (for low vram)
|
||||
Might lower quality, since it implies converting k and v to f16.
|
||||
This might crash if it is not supported by the backend.
|
||||
--diffusion-conv-direct use Conv2d direct in the diffusion model
|
||||
This might crash if it is not supported by the backend.
|
||||
--vae-conv-direct use Conv2d direct in the vae model (should improve the performance)
|
||||
This might crash if it is not supported by the backend.
|
||||
--control-net-cpu keep controlnet in cpu (for low vram)
|
||||
--canny apply canny preprocessor (edge detection)
|
||||
--color colors the logging tags according to level
|
||||
--chroma-disable-dit-mask disable dit mask for chroma
|
||||
--chroma-enable-t5-mask enable t5 mask for chroma
|
||||
--chroma-t5-mask-pad PAD_SIZE t5 mask pad size of chroma
|
||||
--video-frames video frames (default: 1)
|
||||
--fps fps (default: 24)
|
||||
--moe-boundary BOUNDARY timestep boundary for Wan2.2 MoE model. (default: 0.875)
|
||||
only enabled if `--high-noise-steps` is set to -1
|
||||
--flow-shift SHIFT shift value for Flow models like SD3.x or WAN (default: auto)
|
||||
--vace-strength wan vace strength
|
||||
--photo-maker path to PHOTOMAKER model
|
||||
--pm-id-images-dir [DIR] path to PHOTOMAKER input id images dir
|
||||
--pm-id-embed-path [PATH] path to PHOTOMAKER v2 id embed
|
||||
--pm-style-strength strength for keeping PHOTOMAKER input identity (default: 20)
|
||||
-v, --verbose print extra info
|
||||
Options:
|
||||
-m, --model <string> path to full model
|
||||
--clip_l <string> path to the clip-l text encoder
|
||||
--clip_g <string> path to the clip-g text encoder
|
||||
--clip_vision <string> path to the clip-vision encoder
|
||||
--t5xxl <string> path to the t5xxl text encoder
|
||||
--qwen2vl <string> path to the qwen2vl text encoder
|
||||
--qwen2vl_vision <string> path to the qwen2vl vit
|
||||
--diffusion-model <string> path to the standalone diffusion model
|
||||
--high-noise-diffusion-model <string> path to the standalone high noise diffusion model
|
||||
--vae <string> path to standalone vae model
|
||||
--taesd <string> path to taesd. Using Tiny AutoEncoder for fast decoding (low quality)
|
||||
--control-net <string> path to control net model
|
||||
--embd-dir <string> embeddings directory
|
||||
--lora-model-dir <string> lora model directory
|
||||
-i, --init-img <string> path to the init image
|
||||
--end-img <string> path to the end image, required by flf2v
|
||||
--tensor-type-rules <string> weight type per tensor pattern (example: "^vae\.=f16,model\.=q8_0")
|
||||
--photo-maker <string> path to PHOTOMAKER model
|
||||
--pm-id-images-dir <string> path to PHOTOMAKER input id images dir
|
||||
--pm-id-embed-path <string> path to PHOTOMAKER v2 id embed
|
||||
--mask <string> path to the mask image
|
||||
--control-image <string> path to control image, control net
|
||||
--control-video <string> path to control video frames, It must be a directory path. The video frames inside should be stored as images in
|
||||
lexicographical (character) order. For example, if the control video path is
|
||||
`frames`, the directory contain images such as 00.png, 01.png, ... etc.
|
||||
-o, --output <string> path to write result image to (default: ./output.png)
|
||||
-p, --prompt <string> the prompt to render
|
||||
-n, --negative-prompt <string> the negative prompt (default: "")
|
||||
--upscale-model <string> path to esrgan model.
|
||||
-t, --threads <int> number of threads to use during computation (default: -1). If threads <= 0, then threads will be set to the number of
|
||||
CPU physical cores
|
||||
--upscale-repeats <int> Run the ESRGAN upscaler this many times (default: 1)
|
||||
-H, --height <int> image height, in pixel space (default: 512)
|
||||
-W, --width <int> image width, in pixel space (default: 512)
|
||||
--steps <int> number of sample steps (default: 20)
|
||||
--high-noise-steps <int> (high noise) number of sample steps (default: -1 = auto)
|
||||
--clip-skip <int> ignore last layers of CLIP network; 1 ignores none, 2 ignores one layer (default: -1). <= 0 represents unspecified,
|
||||
will be 1 for SD1.x, 2 for SD2.x
|
||||
-b, --batch-count <int> batch count
|
||||
--chroma-t5-mask-pad <int> t5 mask pad size of chroma
|
||||
--video-frames <int> video frames (default: 1)
|
||||
--fps <int> fps (default: 24)
|
||||
--timestep-shift <int> shift timestep for NitroFusion models (default: 0). recommended N for NitroSD-Realism around 250 and 500 for
|
||||
NitroSD-Vibrant
|
||||
--cfg-scale <float> unconditional guidance scale: (default: 7.0)
|
||||
--img-cfg-scale <float> image guidance scale for inpaint or instruct-pix2pix models: (default: same as --cfg-scale)
|
||||
--guidance <float> distilled guidance scale for models with guidance input (default: 3.5)
|
||||
--slg-scale <float> skip layer guidance (SLG) scale, only for DiT models: (default: 0). 0 means disabled, a value of 2.5 is nice for sd3.5
|
||||
medium
|
||||
--skip-layer-start <float> SLG enabling point (default: 0.01)
|
||||
--skip-layer-end <float> SLG disabling point (default: 0.2)
|
||||
--eta <float> eta in DDIM, only for DDIM and TCD (default: 0)
|
||||
--high-noise-cfg-scale <float> (high noise) unconditional guidance scale: (default: 7.0)
|
||||
--high-noise-img-cfg-scale <float> (high noise) image guidance scale for inpaint or instruct-pix2pix models (default: same as --cfg-scale)
|
||||
--high-noise-guidance <float> (high noise) distilled guidance scale for models with guidance input (default: 3.5)
|
||||
--high-noise-slg-scale <float> (high noise) skip layer guidance (SLG) scale, only for DiT models: (default: 0)
|
||||
--high-noise-skip-layer-start <float> (high noise) SLG enabling point (default: 0.01)
|
||||
--high-noise-skip-layer-end <float> (high noise) SLG disabling point (default: 0.2)
|
||||
--high-noise-eta <float> (high noise) eta in DDIM, only for DDIM and TCD (default: 0)
|
||||
--strength <float> strength for noising/unnoising (default: 0.75)
|
||||
--pm-style-strength <float>
|
||||
--control-strength <float> strength to apply Control Net (default: 0.9). 1.0 corresponds to full destruction of information in init image
|
||||
--moe-boundary <float> timestep boundary for Wan2.2 MoE model. (default: 0.875). Only enabled if `--high-noise-steps` is set to -1
|
||||
--flow-shift <float> shift value for Flow models like SD3.x or WAN (default: auto)
|
||||
--vace-strength <float> wan vace strength
|
||||
--vae-tile-overlap <float> tile overlap for vae tiling, in fraction of tile size (default: 0.5)
|
||||
--vae-tiling process vae in tiles to reduce memory usage
|
||||
--force-sdxl-vae-conv-scale force use of conv scale on sdxl vae
|
||||
--offload-to-cpu place the weights in RAM to save VRAM, and automatically load them into VRAM when needed
|
||||
--control-net-cpu keep controlnet in cpu (for low vram)
|
||||
--clip-on-cpu keep clip in cpu (for low vram)
|
||||
--vae-on-cpu keep vae in cpu (for low vram)
|
||||
--diffusion-fa use flash attention in the diffusion model
|
||||
--diffusion-conv-direct use ggml_conv2d_direct in the diffusion model
|
||||
--vae-conv-direct use ggml_conv2d_direct in the vae model
|
||||
--canny apply canny preprocessor (edge detection)
|
||||
-v, --verbose print extra info
|
||||
--color colors the logging tags according to level
|
||||
--chroma-disable-dit-mask disable dit mask for chroma
|
||||
--chroma-enable-t5-mask enable t5 mask for chroma
|
||||
--increase-ref-index automatically increase the indices of references images based on the order they are listed (starting with 1).
|
||||
--disable-auto-resize-ref-image disable auto resize of ref images
|
||||
-M, --mode run mode, one of [img_gen, vid_gen, upscale, convert], default: img_gen
|
||||
--type weight type (examples: f32, f16, q4_0, q4_1, q5_0, q5_1, q8_0, q2_K, q3_K, q4_K). If not specified, the default is the
|
||||
type of the weight file
|
||||
--rng RNG, one of [std_default, cuda], default: cuda
|
||||
-s, --seed RNG seed (default: 42, use random seed for < 0)
|
||||
--sampling-method sampling method, one of [euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m, dpm++2mv2, ipndm, ipndm_v, lcm, ddim_trailing,
|
||||
tcd] (default: euler for Flux/SD3/Wan, euler_a otherwise)
|
||||
--prediction prediction type override, one of [eps, v, edm_v, sd3_flow, flux_flow]
|
||||
--scheduler denoiser sigma scheduler, one of [discrete, karras, exponential, ays, gits, smoothstep, sgm_uniform, simple], default:
|
||||
discrete
|
||||
--skip-layers layers to skip for SLG steps (default: [7,8,9])
|
||||
--high-noise-sampling-method (high noise) sampling method, one of [euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m, dpm++2mv2, ipndm, ipndm_v, lcm,
|
||||
ddim_trailing, tcd] default: euler for Flux/SD3/Wan, euler_a otherwise
|
||||
--high-noise-scheduler (high noise) denoiser sigma scheduler, one of [discrete, karras, exponential, ays, gits, smoothstep, sgm_uniform,
|
||||
simple], default: discrete
|
||||
--high-noise-skip-layers (high noise) layers to skip for SLG steps (default: [7,8,9])
|
||||
-r, --ref-image reference image for Flux Kontext models (can be used multiple times)
|
||||
-h, --help show this help message and exit
|
||||
--vae-tile-size tile size for vae tiling, format [X]x[Y] (default: 32x32)
|
||||
--vae-relative-tile-size relative tile size for vae tiling, format [X]x[Y], in fraction of image size if < 1, in number of tiles per dim if >=1
|
||||
(overrides --vae-tile-size)
|
||||
```
|
||||
@ -7,6 +7,7 @@
|
||||
#include <map>
|
||||
#include <random>
|
||||
#include <regex>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
@ -213,119 +214,6 @@ void print_params(SDParams params) {
|
||||
free(high_noise_sample_params_str);
|
||||
}
|
||||
|
||||
void print_usage(int argc, const char* argv[]) {
|
||||
printf("usage: %s [arguments]\n", argv[0]);
|
||||
printf("\n");
|
||||
printf("arguments:\n");
|
||||
printf(" -h, --help show this help message and exit\n");
|
||||
printf(" -M, --mode [MODE] run mode, one of: [img_gen, vid_gen, upscale, convert], default: img_gen\n");
|
||||
printf(" -t, --threads N number of threads to use during computation (default: -1)\n");
|
||||
printf(" If threads <= 0, then threads will be set to the number of CPU physical cores\n");
|
||||
printf(" --offload-to-cpu place the weights in RAM to save VRAM, and automatically load them into VRAM when needed\n");
|
||||
printf(" -m, --model [MODEL] path to full model\n");
|
||||
printf(" --diffusion-model path to the standalone diffusion model\n");
|
||||
printf(" --high-noise-diffusion-model path to the standalone high noise diffusion model\n");
|
||||
printf(" --clip_l path to the clip-l text encoder\n");
|
||||
printf(" --clip_g path to the clip-g text encoder\n");
|
||||
printf(" --clip_vision path to the clip-vision encoder\n");
|
||||
printf(" --t5xxl path to the t5xxl text encoder\n");
|
||||
printf(" --qwen2vl path to the qwen2vl text encoder\n");
|
||||
printf(" --qwen2vl_vision path to the qwen2vl vit\n");
|
||||
printf(" --vae [VAE] path to vae\n");
|
||||
printf(" --taesd [TAESD_PATH] path to taesd. Using Tiny AutoEncoder for fast decoding (low quality)\n");
|
||||
printf(" --control-net [CONTROL_PATH] path to control net model\n");
|
||||
printf(" --embd-dir [EMBEDDING_PATH] path to embeddings\n");
|
||||
printf(" --upscale-model [ESRGAN_PATH] path to esrgan model. For img_gen mode, upscale images after generate, just RealESRGAN_x4plus_anime_6B supported by now\n");
|
||||
printf(" --upscale-repeats Run the ESRGAN upscaler this many times (default 1)\n");
|
||||
printf(" --type [TYPE] weight type (examples: f32, f16, q4_0, q4_1, q5_0, q5_1, q8_0, q2_K, q3_K, q4_K)\n");
|
||||
printf(" If not specified, the default is the type of the weight file\n");
|
||||
printf(" --tensor-type-rules [EXPRESSION] weight type per tensor pattern (example: \"^vae\\.=f16,model\\.=q8_0\")\n");
|
||||
printf(" --lora-model-dir [DIR] lora model directory\n");
|
||||
printf(" -i, --init-img [IMAGE] path to the init image, required by img2img\n");
|
||||
printf(" --mask [MASK] path to the mask image, required by img2img with mask\n");
|
||||
printf(" -i, --end-img [IMAGE] path to the end image, required by flf2v\n");
|
||||
printf(" --control-image [IMAGE] path to image condition, control net\n");
|
||||
printf(" -r, --ref-image [PATH] reference image for Flux Kontext models (can be used multiple times) \n");
|
||||
printf(" --disable-auto-resize-ref-image disable auto resize of ref images\n");
|
||||
printf(" --control-video [PATH] path to control video frames, It must be a directory path.\n");
|
||||
printf(" The video frames inside should be stored as images in lexicographical (character) order\n");
|
||||
printf(" For example, if the control video path is `frames`, the directory contain images such as 00.png, 01.png, ... etc.\n");
|
||||
printf(" --increase-ref-index automatically increase the indices of references images based on the order they are listed (starting with 1).\n");
|
||||
printf(" -o, --output OUTPUT path to write result image to (default: ./output.png)\n");
|
||||
printf(" -p, --prompt [PROMPT] the prompt to render\n");
|
||||
printf(" -n, --negative-prompt PROMPT the negative prompt (default: \"\")\n");
|
||||
printf(" --cfg-scale SCALE unconditional guidance scale: (default: 7.0)\n");
|
||||
printf(" --img-cfg-scale SCALE image guidance scale for inpaint or instruct-pix2pix models: (default: same as --cfg-scale)\n");
|
||||
printf(" --guidance SCALE distilled guidance scale for models with guidance input (default: 3.5)\n");
|
||||
printf(" --slg-scale SCALE skip layer guidance (SLG) scale, only for DiT models: (default: 0)\n");
|
||||
printf(" 0 means disabled, a value of 2.5 is nice for sd3.5 medium\n");
|
||||
printf(" --eta SCALE eta in DDIM, only for DDIM and TCD: (default: 0)\n");
|
||||
printf(" --skip-layers LAYERS Layers to skip for SLG steps: (default: [7,8,9])\n");
|
||||
printf(" --skip-layer-start START SLG enabling point: (default: 0.01)\n");
|
||||
printf(" --skip-layer-end END SLG disabling point: (default: 0.2)\n");
|
||||
printf(" --scheduler {discrete, karras, exponential, ays, gits, smoothstep, sgm_uniform, simple} Denoiser sigma scheduler (default: discrete)\n");
|
||||
printf(" --sampling-method {euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m, dpm++2mv2, ipndm, ipndm_v, lcm, ddim_trailing, tcd}\n");
|
||||
printf(" sampling method (default: \"euler\" for Flux/SD3/Wan, \"euler_a\" otherwise)\n");
|
||||
printf(" --timestep-shift N shift timestep for NitroFusion models, default: 0, recommended N for NitroSD-Realism around 250 and 500 for NitroSD-Vibrant\n");
|
||||
printf(" --steps STEPS number of sample steps (default: 20)\n");
|
||||
printf(" --high-noise-cfg-scale SCALE (high noise) unconditional guidance scale: (default: 7.0)\n");
|
||||
printf(" --high-noise-img-cfg-scale SCALE (high noise) image guidance scale for inpaint or instruct-pix2pix models: (default: same as --cfg-scale)\n");
|
||||
printf(" --high-noise-guidance SCALE (high noise) distilled guidance scale for models with guidance input (default: 3.5)\n");
|
||||
printf(" --high-noise-slg-scale SCALE (high noise) skip layer guidance (SLG) scale, only for DiT models: (default: 0)\n");
|
||||
printf(" 0 means disabled, a value of 2.5 is nice for sd3.5 medium\n");
|
||||
printf(" --high-noise-eta SCALE (high noise) eta in DDIM, only for DDIM and TCD: (default: 0)\n");
|
||||
printf(" --high-noise-skip-layers LAYERS (high noise) Layers to skip for SLG steps: (default: [7,8,9])\n");
|
||||
printf(" --high-noise-skip-layer-start (high noise) SLG enabling point: (default: 0.01)\n");
|
||||
printf(" --high-noise-skip-layer-end END (high noise) SLG disabling point: (default: 0.2)\n");
|
||||
printf(" --high-noise-scheduler {discrete, karras, exponential, ays, gits, smoothstep, sgm_uniform, simple} Denoiser sigma scheduler (default: discrete)\n");
|
||||
printf(" --high-noise-sampling-method {euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m, dpm++2mv2, ipndm, ipndm_v, lcm, ddim_trailing, tcd}\n");
|
||||
printf(" (high noise) sampling method (default: \"euler_a\")\n");
|
||||
printf(" --high-noise-steps STEPS (high noise) number of sample steps (default: -1 = auto)\n");
|
||||
printf(" SLG will be enabled at step int([STEPS]*[START]) and disabled at int([STEPS]*[END])\n");
|
||||
printf(" --strength STRENGTH strength for noising/unnoising (default: 0.75)\n");
|
||||
printf(" --control-strength STRENGTH strength to apply Control Net (default: 0.9)\n");
|
||||
printf(" 1.0 corresponds to full destruction of information in init image\n");
|
||||
printf(" -H, --height H image height, in pixel space (default: 512)\n");
|
||||
printf(" -W, --width W image width, in pixel space (default: 512)\n");
|
||||
printf(" --rng {std_default, cuda} RNG (default: cuda)\n");
|
||||
printf(" -s SEED, --seed SEED RNG seed (default: 42, use random seed for < 0)\n");
|
||||
printf(" -b, --batch-count COUNT number of images to generate\n");
|
||||
printf(" --prediction {eps, v, edm_v, sd3_flow, flux_flow} Prediction type override.\n");
|
||||
printf(" --clip-skip N ignore last layers of CLIP network; 1 ignores none, 2 ignores one layer (default: -1)\n");
|
||||
printf(" <= 0 represents unspecified, will be 1 for SD1.x, 2 for SD2.x\n");
|
||||
printf(" --vae-tiling process vae in tiles to reduce memory usage\n");
|
||||
printf(" --vae-tile-size [X]x[Y] tile size for vae tiling (default: 32x32)\n");
|
||||
printf(" --vae-relative-tile-size [X]x[Y] relative tile size for vae tiling, in fraction of image size if < 1, in number of tiles per dim if >=1 (overrides --vae-tile-size)\n");
|
||||
printf(" --vae-tile-overlap OVERLAP tile overlap for vae tiling, in fraction of tile size (default: 0.5)\n");
|
||||
printf(" --force-sdxl-vae-conv-scale force use of conv scale on sdxl vae\n");
|
||||
printf(" --vae-on-cpu keep vae in cpu (for low vram)\n");
|
||||
printf(" --clip-on-cpu keep clip in cpu (for low vram)\n");
|
||||
printf(" --diffusion-fa use flash attention in the diffusion model (for low vram)\n");
|
||||
printf(" Might lower quality, since it implies converting k and v to f16.\n");
|
||||
printf(" This might crash if it is not supported by the backend.\n");
|
||||
printf(" --diffusion-conv-direct use Conv2d direct in the diffusion model\n");
|
||||
printf(" This might crash if it is not supported by the backend.\n");
|
||||
printf(" --vae-conv-direct use Conv2d direct in the vae model (should improve the performance)\n");
|
||||
printf(" This might crash if it is not supported by the backend.\n");
|
||||
printf(" --control-net-cpu keep controlnet in cpu (for low vram)\n");
|
||||
printf(" --canny apply canny preprocessor (edge detection)\n");
|
||||
printf(" --color colors the logging tags according to level\n");
|
||||
printf(" --chroma-disable-dit-mask disable dit mask for chroma\n");
|
||||
printf(" --chroma-enable-t5-mask enable t5 mask for chroma\n");
|
||||
printf(" --chroma-t5-mask-pad PAD_SIZE t5 mask pad size of chroma\n");
|
||||
printf(" --video-frames video frames (default: 1)\n");
|
||||
printf(" --fps fps (default: 24)\n");
|
||||
printf(" --moe-boundary BOUNDARY timestep boundary for Wan2.2 MoE model. (default: 0.875)\n");
|
||||
printf(" only enabled if `--high-noise-steps` is set to -1\n");
|
||||
printf(" --flow-shift SHIFT shift value for Flow models like SD3.x or WAN (default: auto)\n");
|
||||
printf(" --vace-strength wan vace strength\n");
|
||||
printf(" --photo-maker path to PHOTOMAKER model\n");
|
||||
printf(" --pm-id-images-dir [DIR] path to PHOTOMAKER input id images dir\n");
|
||||
printf(" --pm-id-embed-path [PATH] path to PHOTOMAKER v2 id embed\n");
|
||||
printf(" --pm-style-strength strength for keeping PHOTOMAKER input identity (default: 20)\n");
|
||||
printf(" -v, --verbose print extra info\n");
|
||||
}
|
||||
|
||||
#if defined(_WIN32)
|
||||
static std::string utf16_to_utf8(const std::wstring& wstr) {
|
||||
if (wstr.empty())
|
||||
@ -495,94 +383,424 @@ bool parse_options(int argc, const char** argv, ArgOptions& options) {
|
||||
return true;
|
||||
}
|
||||
|
||||
static std::string wrap_text(const std::string& text, size_t width, size_t indent) {
|
||||
std::ostringstream oss;
|
||||
size_t line_len = 0;
|
||||
size_t pos = 0;
|
||||
|
||||
while (pos < text.size()) {
|
||||
// Preserve manual newlines
|
||||
if (text[pos] == '\n') {
|
||||
oss << '\n'
|
||||
<< std::string(indent, ' ');
|
||||
line_len = indent;
|
||||
++pos;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Add the character
|
||||
oss << text[pos];
|
||||
++line_len;
|
||||
++pos;
|
||||
|
||||
// If the current line exceeds width, try to break at the last space
|
||||
if (line_len >= width) {
|
||||
std::string current = oss.str();
|
||||
size_t back = current.size();
|
||||
|
||||
// Find the last space (for a clean break)
|
||||
while (back > 0 && current[back - 1] != ' ' && current[back - 1] != '\n')
|
||||
--back;
|
||||
|
||||
// If found a space to break on
|
||||
if (back > 0 && current[back - 1] != '\n') {
|
||||
std::string before = current.substr(0, back - 1);
|
||||
std::string after = current.substr(back);
|
||||
oss.str("");
|
||||
oss.clear();
|
||||
oss << before << "\n"
|
||||
<< std::string(indent, ' ') << after;
|
||||
} else {
|
||||
// If no space found, just break at width
|
||||
oss << "\n"
|
||||
<< std::string(indent, ' ');
|
||||
}
|
||||
line_len = indent;
|
||||
}
|
||||
}
|
||||
|
||||
return oss.str();
|
||||
}
|
||||
|
||||
void print_usage(int argc, const char* argv[], const ArgOptions& options) {
|
||||
constexpr size_t max_line_width = 120;
|
||||
|
||||
std::cout << "Usage: " << argv[0] << " [options]\n\n";
|
||||
std::cout << "Options:\n";
|
||||
|
||||
struct Entry {
|
||||
std::string names;
|
||||
std::string desc;
|
||||
};
|
||||
std::vector<Entry> entries;
|
||||
|
||||
auto add_entry = [&](const std::string& s, const std::string& l,
|
||||
const std::string& desc, const std::string& hint = "") {
|
||||
std::ostringstream ss;
|
||||
if (!s.empty())
|
||||
ss << s;
|
||||
if (!s.empty() && !l.empty())
|
||||
ss << ", ";
|
||||
if (!l.empty())
|
||||
ss << l;
|
||||
if (!hint.empty())
|
||||
ss << " " << hint;
|
||||
entries.push_back({ss.str(), desc});
|
||||
};
|
||||
|
||||
for (auto& o : options.string_options)
|
||||
add_entry(o.short_name, o.long_name, o.desc, "<string>");
|
||||
for (auto& o : options.int_options)
|
||||
add_entry(o.short_name, o.long_name, o.desc, "<int>");
|
||||
for (auto& o : options.float_options)
|
||||
add_entry(o.short_name, o.long_name, o.desc, "<float>");
|
||||
for (auto& o : options.bool_options)
|
||||
add_entry(o.short_name, o.long_name, o.desc, "");
|
||||
for (auto& o : options.manual_options)
|
||||
add_entry(o.short_name, o.long_name, o.desc);
|
||||
|
||||
size_t max_name_width = 0;
|
||||
for (auto& e : entries)
|
||||
max_name_width = std::max(max_name_width, e.names.size());
|
||||
|
||||
for (auto& e : entries) {
|
||||
size_t indent = 2 + max_name_width + 4;
|
||||
size_t desc_width = (max_line_width > indent ? max_line_width - indent : 40);
|
||||
std::string wrapped_desc = wrap_text(e.desc, max_line_width, indent);
|
||||
std::cout << " " << std::left << std::setw(static_cast<int>(max_name_width) + 4)
|
||||
<< e.names << wrapped_desc << "\n";
|
||||
}
|
||||
}
|
||||
|
||||
void parse_args(int argc, const char** argv, SDParams& params) {
|
||||
ArgOptions options;
|
||||
options.string_options = {
|
||||
{"-m", "--model", "", ¶ms.model_path},
|
||||
{"", "--clip_l", "", ¶ms.clip_l_path},
|
||||
{"", "--clip_g", "", ¶ms.clip_g_path},
|
||||
{"", "--clip_vision", "", ¶ms.clip_vision_path},
|
||||
{"", "--t5xxl", "", ¶ms.t5xxl_path},
|
||||
{"", "--qwen2vl", "", ¶ms.qwen2vl_path},
|
||||
{"", "--qwen2vl_vision", "", ¶ms.qwen2vl_vision_path},
|
||||
{"", "--diffusion-model", "", ¶ms.diffusion_model_path},
|
||||
{"", "--high-noise-diffusion-model", "", ¶ms.high_noise_diffusion_model_path},
|
||||
{"", "--vae", "", ¶ms.vae_path},
|
||||
{"", "--taesd", "", ¶ms.taesd_path},
|
||||
{"", "--control-net", "", ¶ms.control_net_path},
|
||||
{"", "--embd-dir", "", ¶ms.embedding_dir},
|
||||
{"", "--lora-model-dir", "", ¶ms.lora_model_dir},
|
||||
{"-i", "--init-img", "", ¶ms.init_image_path},
|
||||
{"", "--end-img", "", ¶ms.end_image_path},
|
||||
{"", "--tensor-type-rules", "", ¶ms.tensor_type_rules},
|
||||
{"", "--photo-maker", "", ¶ms.photo_maker_path},
|
||||
{"", "--pm-id-images-dir", "", ¶ms.pm_id_images_dir},
|
||||
{"", "--pm-id-embed-path", "", ¶ms.pm_id_embed_path},
|
||||
{"", "--mask", "", ¶ms.mask_image_path},
|
||||
{"", "--control-image", "", ¶ms.control_image_path},
|
||||
{"", "--control-video", "", ¶ms.control_video_path},
|
||||
{"-o", "--output", "", ¶ms.output_path},
|
||||
{"-p", "--prompt", "", ¶ms.prompt},
|
||||
{"-n", "--negative-prompt", "", ¶ms.negative_prompt},
|
||||
{"", "--upscale-model", "", ¶ms.esrgan_path},
|
||||
{"-m",
|
||||
"--model",
|
||||
"path to full model",
|
||||
¶ms.model_path},
|
||||
{"",
|
||||
"--clip_l",
|
||||
"path to the clip-l text encoder", ¶ms.clip_l_path},
|
||||
{"", "--clip_g",
|
||||
"path to the clip-g text encoder",
|
||||
¶ms.clip_g_path},
|
||||
{"",
|
||||
"--clip_vision",
|
||||
"path to the clip-vision encoder",
|
||||
¶ms.clip_vision_path},
|
||||
{"",
|
||||
"--t5xxl",
|
||||
"path to the t5xxl text encoder",
|
||||
¶ms.t5xxl_path},
|
||||
{"",
|
||||
"--qwen2vl",
|
||||
"path to the qwen2vl text encoder",
|
||||
¶ms.qwen2vl_path},
|
||||
{"",
|
||||
"--qwen2vl_vision",
|
||||
"path to the qwen2vl vit",
|
||||
¶ms.qwen2vl_vision_path},
|
||||
{"",
|
||||
"--diffusion-model",
|
||||
"path to the standalone diffusion model",
|
||||
¶ms.diffusion_model_path},
|
||||
{"",
|
||||
"--high-noise-diffusion-model",
|
||||
"path to the standalone high noise diffusion model",
|
||||
¶ms.high_noise_diffusion_model_path},
|
||||
{"",
|
||||
"--vae",
|
||||
"path to standalone vae model",
|
||||
¶ms.vae_path},
|
||||
{"",
|
||||
"--taesd",
|
||||
"path to taesd. Using Tiny AutoEncoder for fast decoding (low quality)",
|
||||
¶ms.taesd_path},
|
||||
{"",
|
||||
"--control-net",
|
||||
"path to control net model",
|
||||
¶ms.control_net_path},
|
||||
{"",
|
||||
"--embd-dir",
|
||||
"embeddings directory",
|
||||
¶ms.embedding_dir},
|
||||
{"",
|
||||
"--lora-model-dir",
|
||||
"lora model directory",
|
||||
¶ms.lora_model_dir},
|
||||
{"-i",
|
||||
"--init-img",
|
||||
"path to the init image",
|
||||
¶ms.init_image_path},
|
||||
{"",
|
||||
"--end-img",
|
||||
"path to the end image, required by flf2v",
|
||||
¶ms.end_image_path},
|
||||
{"",
|
||||
"--tensor-type-rules",
|
||||
"weight type per tensor pattern (example: \"^vae\\.=f16,model\\.=q8_0\")",
|
||||
¶ms.tensor_type_rules},
|
||||
{"",
|
||||
"--photo-maker",
|
||||
"path to PHOTOMAKER model",
|
||||
¶ms.photo_maker_path},
|
||||
{"",
|
||||
"--pm-id-images-dir",
|
||||
"path to PHOTOMAKER input id images dir",
|
||||
¶ms.pm_id_images_dir},
|
||||
{"",
|
||||
"--pm-id-embed-path",
|
||||
"path to PHOTOMAKER v2 id embed",
|
||||
¶ms.pm_id_embed_path},
|
||||
{"",
|
||||
"--mask",
|
||||
"path to the mask image",
|
||||
¶ms.mask_image_path},
|
||||
{"",
|
||||
"--control-image",
|
||||
"path to control image, control net",
|
||||
¶ms.control_image_path},
|
||||
{"",
|
||||
"--control-video",
|
||||
"path to control video frames, It must be a directory path. The video frames inside should be stored as images in "
|
||||
"lexicographical (character) order. For example, if the control video path is `frames`, the directory contain images "
|
||||
"such as 00.png, 01.png, ... etc.",
|
||||
¶ms.control_video_path},
|
||||
{"-o",
|
||||
"--output",
|
||||
"path to write result image to (default: ./output.png)",
|
||||
¶ms.output_path},
|
||||
{"-p",
|
||||
"--prompt",
|
||||
"the prompt to render",
|
||||
¶ms.prompt},
|
||||
{"-n",
|
||||
"--negative-prompt",
|
||||
"the negative prompt (default: \"\")",
|
||||
¶ms.negative_prompt},
|
||||
{"",
|
||||
"--upscale-model",
|
||||
"path to esrgan model.",
|
||||
¶ms.esrgan_path},
|
||||
};
|
||||
|
||||
options.int_options = {
|
||||
{"-t", "--threads", "", ¶ms.n_threads},
|
||||
{"", "--upscale-repeats", "", ¶ms.upscale_repeats},
|
||||
{"-H", "--height", "", ¶ms.height},
|
||||
{"-W", "--width", "", ¶ms.width},
|
||||
{"", "--steps", "", ¶ms.sample_params.sample_steps},
|
||||
{"", "--high-noise-steps", "", ¶ms.high_noise_sample_params.sample_steps},
|
||||
{"", "--clip-skip", "", ¶ms.clip_skip},
|
||||
{"-b", "--batch-count", "", ¶ms.batch_count},
|
||||
{"", "--chroma-t5-mask-pad", "", ¶ms.chroma_t5_mask_pad},
|
||||
{"", "--video-frames", "", ¶ms.video_frames},
|
||||
{"", "--fps", "", ¶ms.fps},
|
||||
{"", "--timestep-shift", "", ¶ms.sample_params.shifted_timestep},
|
||||
{"-t",
|
||||
"--threads",
|
||||
"number of threads to use during computation (default: -1). "
|
||||
"If threads <= 0, then threads will be set to the number of CPU physical cores",
|
||||
¶ms.n_threads},
|
||||
{"",
|
||||
"--upscale-repeats",
|
||||
"Run the ESRGAN upscaler this many times (default: 1)",
|
||||
¶ms.upscale_repeats},
|
||||
{"-H",
|
||||
"--height",
|
||||
"image height, in pixel space (default: 512)",
|
||||
¶ms.height},
|
||||
{"-W",
|
||||
"--width",
|
||||
"image width, in pixel space (default: 512)",
|
||||
¶ms.width},
|
||||
{"",
|
||||
"--steps",
|
||||
"number of sample steps (default: 20)",
|
||||
¶ms.sample_params.sample_steps},
|
||||
{"",
|
||||
"--high-noise-steps",
|
||||
"(high noise) number of sample steps (default: -1 = auto)",
|
||||
¶ms.high_noise_sample_params.sample_steps},
|
||||
{"",
|
||||
"--clip-skip",
|
||||
"ignore last layers of CLIP network; 1 ignores none, 2 ignores one layer (default: -1). "
|
||||
"<= 0 represents unspecified, will be 1 for SD1.x, 2 for SD2.x",
|
||||
¶ms.clip_skip},
|
||||
{"-b",
|
||||
"--batch-count",
|
||||
"batch count",
|
||||
¶ms.batch_count},
|
||||
{"",
|
||||
"--chroma-t5-mask-pad",
|
||||
"t5 mask pad size of chroma",
|
||||
¶ms.chroma_t5_mask_pad},
|
||||
{"",
|
||||
"--video-frames",
|
||||
"video frames (default: 1)",
|
||||
¶ms.video_frames},
|
||||
{"",
|
||||
"--fps",
|
||||
"fps (default: 24)",
|
||||
¶ms.fps},
|
||||
{"",
|
||||
"--timestep-shift",
|
||||
"shift timestep for NitroFusion models (default: 0). "
|
||||
"recommended N for NitroSD-Realism around 250 and 500 for NitroSD-Vibrant",
|
||||
¶ms.sample_params.shifted_timestep},
|
||||
};
|
||||
|
||||
options.float_options = {
|
||||
{"", "--cfg-scale", "", ¶ms.sample_params.guidance.txt_cfg},
|
||||
{"", "--img-cfg-scale", "", ¶ms.sample_params.guidance.img_cfg},
|
||||
{"", "--guidance", "", ¶ms.sample_params.guidance.distilled_guidance},
|
||||
{"", "--slg-scale", "", ¶ms.sample_params.guidance.slg.scale},
|
||||
{"", "--skip-layer-start", "", ¶ms.sample_params.guidance.slg.layer_start},
|
||||
{"", "--skip-layer-end", "", ¶ms.sample_params.guidance.slg.layer_end},
|
||||
{"", "--eta", "", ¶ms.sample_params.eta},
|
||||
{"", "--high-noise-cfg-scale", "", ¶ms.high_noise_sample_params.guidance.txt_cfg},
|
||||
{"", "--high-noise-img-cfg-scale", "", ¶ms.high_noise_sample_params.guidance.img_cfg},
|
||||
{"", "--high-noise-guidance", "", ¶ms.high_noise_sample_params.guidance.distilled_guidance},
|
||||
{"", "--high-noise-slg-scale", "", ¶ms.high_noise_sample_params.guidance.slg.scale},
|
||||
{"", "--high-noise-skip-layer-start", "", ¶ms.high_noise_sample_params.guidance.slg.layer_start},
|
||||
{"", "--high-noise-skip-layer-end", "", ¶ms.high_noise_sample_params.guidance.slg.layer_end},
|
||||
{"", "--high-noise-eta", "", ¶ms.high_noise_sample_params.eta},
|
||||
{"", "--strength", "", ¶ms.strength},
|
||||
{"", "--pm-style-strength", "", ¶ms.pm_style_strength},
|
||||
{"", "--control-strength", "", ¶ms.control_strength},
|
||||
{"", "--moe-boundary", "", ¶ms.moe_boundary},
|
||||
{"", "--flow-shift", "", ¶ms.flow_shift},
|
||||
{"", "--vace-strength", "", ¶ms.vace_strength},
|
||||
{"", "--vae-tile-overlap", "", ¶ms.vae_tiling_params.target_overlap},
|
||||
{"",
|
||||
"--cfg-scale",
|
||||
"unconditional guidance scale: (default: 7.0)",
|
||||
¶ms.sample_params.guidance.txt_cfg},
|
||||
{"",
|
||||
"--img-cfg-scale",
|
||||
"image guidance scale for inpaint or instruct-pix2pix models: (default: same as --cfg-scale)",
|
||||
¶ms.sample_params.guidance.img_cfg},
|
||||
{"",
|
||||
"--guidance",
|
||||
"distilled guidance scale for models with guidance input (default: 3.5)",
|
||||
¶ms.sample_params.guidance.distilled_guidance},
|
||||
{"",
|
||||
"--slg-scale",
|
||||
"skip layer guidance (SLG) scale, only for DiT models: (default: 0). 0 means disabled, a value of 2.5 is nice for sd3.5 medium",
|
||||
¶ms.sample_params.guidance.slg.scale},
|
||||
{"",
|
||||
"--skip-layer-start",
|
||||
"SLG enabling point (default: 0.01)",
|
||||
¶ms.sample_params.guidance.slg.layer_start},
|
||||
{"",
|
||||
"--skip-layer-end",
|
||||
"SLG disabling point (default: 0.2)",
|
||||
¶ms.sample_params.guidance.slg.layer_end},
|
||||
{"",
|
||||
"--eta",
|
||||
"eta in DDIM, only for DDIM and TCD (default: 0)",
|
||||
¶ms.sample_params.eta},
|
||||
{"",
|
||||
"--high-noise-cfg-scale",
|
||||
"(high noise) unconditional guidance scale: (default: 7.0)",
|
||||
¶ms.high_noise_sample_params.guidance.txt_cfg},
|
||||
{"",
|
||||
"--high-noise-img-cfg-scale",
|
||||
"(high noise) image guidance scale for inpaint or instruct-pix2pix models (default: same as --cfg-scale)",
|
||||
¶ms.high_noise_sample_params.guidance.img_cfg},
|
||||
{"",
|
||||
"--high-noise-guidance",
|
||||
"(high noise) distilled guidance scale for models with guidance input (default: 3.5)",
|
||||
¶ms.high_noise_sample_params.guidance.distilled_guidance},
|
||||
{"",
|
||||
"--high-noise-slg-scale",
|
||||
"(high noise) skip layer guidance (SLG) scale, only for DiT models: (default: 0)",
|
||||
¶ms.high_noise_sample_params.guidance.slg.scale},
|
||||
{"",
|
||||
"--high-noise-skip-layer-start",
|
||||
"(high noise) SLG enabling point (default: 0.01)",
|
||||
¶ms.high_noise_sample_params.guidance.slg.layer_start},
|
||||
{"",
|
||||
"--high-noise-skip-layer-end",
|
||||
"(high noise) SLG disabling point (default: 0.2)",
|
||||
¶ms.high_noise_sample_params.guidance.slg.layer_end},
|
||||
{"",
|
||||
"--high-noise-eta",
|
||||
"(high noise) eta in DDIM, only for DDIM and TCD (default: 0)",
|
||||
¶ms.high_noise_sample_params.eta},
|
||||
{"",
|
||||
"--strength",
|
||||
"strength for noising/unnoising (default: 0.75)",
|
||||
¶ms.strength},
|
||||
{"",
|
||||
"--pm-style-strength",
|
||||
"",
|
||||
¶ms.pm_style_strength},
|
||||
{"",
|
||||
"--control-strength",
|
||||
"strength to apply Control Net (default: 0.9). 1.0 corresponds to full destruction of information in init image",
|
||||
¶ms.control_strength},
|
||||
{"",
|
||||
"--moe-boundary",
|
||||
"timestep boundary for Wan2.2 MoE model. (default: 0.875). Only enabled if `--high-noise-steps` is set to -1",
|
||||
¶ms.moe_boundary},
|
||||
{"",
|
||||
"--flow-shift",
|
||||
"shift value for Flow models like SD3.x or WAN (default: auto)",
|
||||
¶ms.flow_shift},
|
||||
{"",
|
||||
"--vace-strength",
|
||||
"wan vace strength",
|
||||
¶ms.vace_strength},
|
||||
{"",
|
||||
"--vae-tile-overlap",
|
||||
"tile overlap for vae tiling, in fraction of tile size (default: 0.5)",
|
||||
¶ms.vae_tiling_params.target_overlap},
|
||||
};
|
||||
|
||||
options.bool_options = {
|
||||
{"", "--vae-tiling", "", true, ¶ms.vae_tiling_params.enabled},
|
||||
{"", "--force-sdxl-vae-conv-scale", "", true, ¶ms.force_sdxl_vae_conv_scale},
|
||||
{"", "--offload-to-cpu", "", true, ¶ms.offload_params_to_cpu},
|
||||
{"", "--control-net-cpu", "", true, ¶ms.control_net_cpu},
|
||||
{"", "--clip-on-cpu", "", true, ¶ms.clip_on_cpu},
|
||||
{"", "--vae-on-cpu", "", true, ¶ms.vae_on_cpu},
|
||||
{"", "--diffusion-fa", "", true, ¶ms.diffusion_flash_attn},
|
||||
{"", "--diffusion-conv-direct", "", true, ¶ms.diffusion_conv_direct},
|
||||
{"", "--vae-conv-direct", "", true, ¶ms.vae_conv_direct},
|
||||
{"", "--canny", "", true, ¶ms.canny_preprocess},
|
||||
{"-v", "--verbose", "", true, ¶ms.verbose},
|
||||
{"", "--color", "", true, ¶ms.color},
|
||||
{"", "--chroma-disable-dit-mask", "", false, ¶ms.chroma_use_dit_mask},
|
||||
{"", "--chroma-enable-t5-mask", "", true, ¶ms.chroma_use_t5_mask},
|
||||
{"", "--increase-ref-index", "", true, ¶ms.increase_ref_index},
|
||||
{"", "--disable-auto-resize-ref-image", "", false, ¶ms.auto_resize_ref_image},
|
||||
{"",
|
||||
"--vae-tiling",
|
||||
"process vae in tiles to reduce memory usage",
|
||||
true, ¶ms.vae_tiling_params.enabled},
|
||||
{"",
|
||||
"--force-sdxl-vae-conv-scale",
|
||||
"force use of conv scale on sdxl vae",
|
||||
true, ¶ms.force_sdxl_vae_conv_scale},
|
||||
{"",
|
||||
"--offload-to-cpu",
|
||||
"place the weights in RAM to save VRAM, and automatically load them into VRAM when needed",
|
||||
true, ¶ms.offload_params_to_cpu},
|
||||
{"",
|
||||
"--control-net-cpu",
|
||||
"keep controlnet in cpu (for low vram)",
|
||||
true, ¶ms.control_net_cpu},
|
||||
{"",
|
||||
"--clip-on-cpu",
|
||||
"keep clip in cpu (for low vram)",
|
||||
true, ¶ms.clip_on_cpu},
|
||||
{"",
|
||||
"--vae-on-cpu",
|
||||
"keep vae in cpu (for low vram)",
|
||||
true, ¶ms.vae_on_cpu},
|
||||
{"",
|
||||
"--diffusion-fa",
|
||||
"use flash attention in the diffusion model",
|
||||
true, ¶ms.diffusion_flash_attn},
|
||||
{"",
|
||||
"--diffusion-conv-direct",
|
||||
"use ggml_conv2d_direct in the diffusion model",
|
||||
true, ¶ms.diffusion_conv_direct},
|
||||
{"",
|
||||
"--vae-conv-direct",
|
||||
"use ggml_conv2d_direct in the vae model",
|
||||
true, ¶ms.vae_conv_direct},
|
||||
{"",
|
||||
"--canny",
|
||||
"apply canny preprocessor (edge detection)",
|
||||
true, ¶ms.canny_preprocess},
|
||||
{"-v",
|
||||
"--verbose",
|
||||
"print extra info",
|
||||
true, ¶ms.verbose},
|
||||
{"",
|
||||
"--color",
|
||||
"colors the logging tags according to level",
|
||||
true, ¶ms.color},
|
||||
{"",
|
||||
"--chroma-disable-dit-mask",
|
||||
"disable dit mask for chroma",
|
||||
false, ¶ms.chroma_use_dit_mask},
|
||||
{"",
|
||||
"--chroma-enable-t5-mask",
|
||||
"enable t5 mask for chroma",
|
||||
true, ¶ms.chroma_use_t5_mask},
|
||||
{"",
|
||||
"--increase-ref-index",
|
||||
"automatically increase the indices of references images based on the order they are listed (starting with 1).",
|
||||
true, ¶ms.increase_ref_index},
|
||||
{"",
|
||||
"--disable-auto-resize-ref-image",
|
||||
"disable auto resize of ref images",
|
||||
false, ¶ms.auto_resize_ref_image},
|
||||
};
|
||||
|
||||
auto on_mode_arg = [&](int argc, const char** argv, int index) {
|
||||
@ -715,7 +933,7 @@ void parse_args(int argc, const char** argv, SDParams& params) {
|
||||
};
|
||||
|
||||
auto on_help_arg = [&](int argc, const char** argv, int index) {
|
||||
print_usage(argc, argv);
|
||||
print_usage(argc, argv, options);
|
||||
exit(0);
|
||||
return 0;
|
||||
};
|
||||
@ -829,25 +1047,73 @@ void parse_args(int argc, const char** argv, SDParams& params) {
|
||||
};
|
||||
|
||||
options.manual_options = {
|
||||
{"-M", "--mode", "", on_mode_arg},
|
||||
{"", "--type", "", on_type_arg},
|
||||
{"", "--rng", "", on_rng_arg},
|
||||
{"-s", "--seed", "", on_seed_arg},
|
||||
{"", "--sampling-method", "", on_sample_method_arg},
|
||||
{"", "--prediction", "", on_prediction_arg},
|
||||
{"", "--scheduler", "", on_schedule_arg},
|
||||
{"", "--skip-layers", "", on_skip_layers_arg},
|
||||
{"", "--high-noise-sampling-method", "", on_high_noise_sample_method_arg},
|
||||
{"", "--high-noise-scheduler", "", on_high_noise_schedule_arg},
|
||||
{"", "--high-noise-skip-layers", "", on_high_noise_skip_layers_arg},
|
||||
{"-r", "--ref-image", "", on_ref_image_arg},
|
||||
{"-h", "--help", "", on_help_arg},
|
||||
{"", "--vae-tile-size", "", on_tile_size_arg},
|
||||
{"", "--vae-relative-tile-size", "", on_relative_tile_size_arg},
|
||||
{"-M",
|
||||
"--mode",
|
||||
"run mode, one of [img_gen, vid_gen, upscale, convert], default: img_gen",
|
||||
on_mode_arg},
|
||||
{"",
|
||||
"--type",
|
||||
"weight type (examples: f32, f16, q4_0, q4_1, q5_0, q5_1, q8_0, q2_K, q3_K, q4_K). "
|
||||
"If not specified, the default is the type of the weight file",
|
||||
on_type_arg},
|
||||
{"",
|
||||
"--rng",
|
||||
"RNG, one of [std_default, cuda], default: cuda",
|
||||
on_rng_arg},
|
||||
{"-s",
|
||||
"--seed",
|
||||
"RNG seed (default: 42, use random seed for < 0)",
|
||||
on_seed_arg},
|
||||
{"",
|
||||
"--sampling-method",
|
||||
"sampling method, one of [euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m, dpm++2mv2, ipndm, ipndm_v, lcm, ddim_trailing, tcd] "
|
||||
"(default: euler for Flux/SD3/Wan, euler_a otherwise)",
|
||||
on_sample_method_arg},
|
||||
{"",
|
||||
"--prediction",
|
||||
"prediction type override, one of [eps, v, edm_v, sd3_flow, flux_flow]",
|
||||
on_prediction_arg},
|
||||
{"",
|
||||
"--scheduler",
|
||||
"denoiser sigma scheduler, one of [discrete, karras, exponential, ays, gits, smoothstep, sgm_uniform, simple], default: discrete",
|
||||
on_schedule_arg},
|
||||
{"",
|
||||
"--skip-layers",
|
||||
"layers to skip for SLG steps (default: [7,8,9])",
|
||||
on_skip_layers_arg},
|
||||
{"",
|
||||
"--high-noise-sampling-method",
|
||||
"(high noise) sampling method, one of [euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m, dpm++2mv2, ipndm, ipndm_v, lcm, ddim_trailing, tcd]"
|
||||
" default: euler for Flux/SD3/Wan, euler_a otherwise",
|
||||
on_high_noise_sample_method_arg},
|
||||
{"",
|
||||
"--high-noise-scheduler",
|
||||
"(high noise) denoiser sigma scheduler, one of [discrete, karras, exponential, ays, gits, smoothstep, sgm_uniform, simple], default: discrete",
|
||||
on_high_noise_schedule_arg},
|
||||
{"",
|
||||
"--high-noise-skip-layers",
|
||||
"(high noise) layers to skip for SLG steps (default: [7,8,9])",
|
||||
on_high_noise_skip_layers_arg},
|
||||
{"-r",
|
||||
"--ref-image",
|
||||
"reference image for Flux Kontext models (can be used multiple times)",
|
||||
on_ref_image_arg},
|
||||
{"-h",
|
||||
"--help",
|
||||
"show this help message and exit",
|
||||
on_help_arg},
|
||||
{"",
|
||||
"--vae-tile-size",
|
||||
"tile size for vae tiling, format [X]x[Y] (default: 32x32)",
|
||||
on_tile_size_arg},
|
||||
{"",
|
||||
"--vae-relative-tile-size",
|
||||
"relative tile size for vae tiling, format [X]x[Y], in fraction of image size if < 1, in number of tiles per dim if >=1 (overrides --vae-tile-size)",
|
||||
on_relative_tile_size_arg},
|
||||
};
|
||||
|
||||
if (!parse_options(argc, argv, options)) {
|
||||
print_usage(argc, argv);
|
||||
print_usage(argc, argv, options);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
@ -857,19 +1123,19 @@ void parse_args(int argc, const char** argv, SDParams& params) {
|
||||
|
||||
if ((params.mode == IMG_GEN || params.mode == VID_GEN) && params.prompt.length() == 0) {
|
||||
fprintf(stderr, "error: the following arguments are required: prompt\n");
|
||||
print_usage(argc, argv);
|
||||
print_usage(argc, argv, options);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if (params.mode != UPSCALE && params.model_path.length() == 0 && params.diffusion_model_path.length() == 0) {
|
||||
fprintf(stderr, "error: the following arguments are required: model_path/diffusion_model\n");
|
||||
print_usage(argc, argv);
|
||||
print_usage(argc, argv, options);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if (params.output_path.length() == 0) {
|
||||
fprintf(stderr, "error: the following arguments are required: output_path\n");
|
||||
print_usage(argc, argv);
|
||||
print_usage(argc, argv, options);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user