Compare commits

..

20 Commits

Author SHA1 Message Date
leejet
90e87bc846
feat: add max-vram based segmented param offload (#1476) 2026-05-06 21:56:02 +08:00
Wagner Bruna
586b6f1481
feat: adapt res samplers for flow models for eta > 0 (#1436) 2026-05-06 21:49:06 +08:00
fszontagh
9097ce5211
fix: skip empty MultiLoraAdapter when no LoRAs target a model (#1469) 2026-05-06 21:45:47 +08:00
leejet
3d6064b37e
perf: speed up tensor_to_sd_image conversion (#1466) 2026-04-30 01:13:56 +08:00
Wagner Bruna
b8079e253d
feat: transition from compile-time to runtime backend discovery (#1448)
Co-authored-by: Stéphane du Hamel <stephduh@live.fr>
Co-authored-by: Cyberhan123 <255542417@qq.com>
Co-authored-by: leejet <leejet714@gmail.com>
2026-04-29 23:26:57 +08:00
Wagner Bruna
331cfa5387
fix: release VAE compute buffer after tiled encoding (#1465) 2026-04-29 22:25:30 +08:00
Douglas Griffith
a81677f59c
docs: performance tips markup (#1460) 2026-04-27 22:55:30 +08:00
leejet
f40a707d0f
feat: add sdcpp-specific generation metadata to image outputs (#1462) 2026-04-27 22:43:13 +08:00
akleine
970c4a3312
chore: replace some NULL with nullptr + use "%zu" for printing some size_t data (#1457) 2026-04-27 22:42:57 +08:00
leejet
b8bdffc199
feat: add more built-in highres upscalers (#1456) 2026-04-23 22:17:58 +08:00
leejet
c97702e105
feat: add sd-webui style Hires. fix support (#1451) 2026-04-22 23:51:09 +08:00
leejet
44cca3d626
feat: support safetensors export in convert mode (#1444) 2026-04-20 00:22:11 +08:00
leejet
0a7ae07f94
feat: add restricted torch legacy checkpoint loading (#1443) 2026-04-19 23:09:43 +08:00
leejet
66143340b6
refactor: move model file IO into dedicated module (#1442) 2026-04-19 17:52:56 +08:00
Wagner Bruna
7023fc4cfb
fix: correct image to image DDIM and TCD (#1410) 2026-04-19 17:51:28 +08:00
Wagner Bruna
e77e4c46bf
feat: adapt LCM for flow models (#1413) 2026-04-19 17:49:46 +08:00
leejet
7d33d4b2dd
chore: enable MSVC parallel compilation with /MP (#1438) 2026-04-18 15:44:43 +08:00
leejet
3c99f700de
ci: skip docker image build job on pull requests (#1439) 2026-04-18 15:25:04 +08:00
leejet
4d626d24b2
feat(server): implement vid_gen async API and mode-aware capabilities (#1437) 2026-04-18 15:06:36 +08:00
Wagner Bruna
f3f69e2fbe
feat: add DPM++ (2S) Ancestral implementation for flow models (#1428) 2026-04-18 15:05:09 +08:00
72 changed files with 7846 additions and 1969 deletions

View File

@ -176,6 +176,7 @@ jobs:
build-and-push-docker-images: build-and-push-docker-images:
name: Build and push container images name: Build and push container images
if: ${{ github.event_name != 'pull_request' }}
runs-on: ubuntu-latest runs-on: ubuntu-latest
permissions: permissions:

View File

@ -11,6 +11,10 @@ endif()
if (MSVC) if (MSVC)
add_compile_definitions(_CRT_SECURE_NO_WARNINGS) add_compile_definitions(_CRT_SECURE_NO_WARNINGS)
add_compile_definitions(_SILENCE_CXX17_CODECVT_HEADER_DEPRECATION_WARNING) add_compile_definitions(_SILENCE_CXX17_CODECVT_HEADER_DEPRECATION_WARNING)
add_compile_options(
$<$<COMPILE_LANGUAGE:C>:/MP>
$<$<COMPILE_LANGUAGE:CXX>:/MP>
)
endif() endif()
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin) set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
@ -68,37 +72,31 @@ option(SD_USE_SYSTEM_GGML "sd: use system-installed GGML library" OFF
if(SD_CUDA) if(SD_CUDA)
message("-- Use CUDA as backend stable-diffusion") message("-- Use CUDA as backend stable-diffusion")
set(GGML_CUDA ON) set(GGML_CUDA ON)
add_definitions(-DSD_USE_CUDA)
endif() endif()
if(SD_METAL) if(SD_METAL)
message("-- Use Metal as backend stable-diffusion") message("-- Use Metal as backend stable-diffusion")
set(GGML_METAL ON) set(GGML_METAL ON)
add_definitions(-DSD_USE_METAL)
endif() endif()
if (SD_VULKAN) if (SD_VULKAN)
message("-- Use Vulkan as backend stable-diffusion") message("-- Use Vulkan as backend stable-diffusion")
set(GGML_VULKAN ON) set(GGML_VULKAN ON)
add_definitions(-DSD_USE_VULKAN)
endif () endif ()
if (SD_OPENCL) if (SD_OPENCL)
message("-- Use OpenCL as backend stable-diffusion") message("-- Use OpenCL as backend stable-diffusion")
set(GGML_OPENCL ON) set(GGML_OPENCL ON)
add_definitions(-DSD_USE_OPENCL)
endif () endif ()
if (SD_HIPBLAS) if (SD_HIPBLAS)
message("-- Use HIPBLAS as backend stable-diffusion") message("-- Use HIPBLAS as backend stable-diffusion")
set(GGML_HIP ON) set(GGML_HIP ON)
add_definitions(-DSD_USE_CUDA)
endif () endif ()
if(SD_MUSA) if(SD_MUSA)
message("-- Use MUSA as backend stable-diffusion") message("-- Use MUSA as backend stable-diffusion")
set(GGML_MUSA ON) set(GGML_MUSA ON)
add_definitions(-DSD_USE_CUDA)
endif() endif()
if(SD_WEBP) if(SD_WEBP)
@ -152,10 +150,12 @@ endif()
set(SD_LIB stable-diffusion) set(SD_LIB stable-diffusion)
file(GLOB SD_LIB_SOURCES file(GLOB SD_LIB_SOURCES CONFIGURE_DEPENDS
"src/*.h" "src/*.h"
"src/*.cpp" "src/*.cpp"
"src/*.hpp" "src/*.hpp"
"src/model_io/*.h"
"src/model_io/*.cpp"
"src/tokenizers/*.h" "src/tokenizers/*.h"
"src/tokenizers/*.cpp" "src/tokenizers/*.cpp"
"src/tokenizers/vocab/*.h" "src/tokenizers/vocab/*.h"
@ -216,7 +216,6 @@ if(SD_SYCL)
message("-- Use SYCL as backend stable-diffusion") message("-- Use SYCL as backend stable-diffusion")
set(GGML_SYCL ON) set(GGML_SYCL ON)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-narrowing -fsycl") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-narrowing -fsycl")
add_definitions(-DSD_USE_SYCL)
# disable fast-math on host, see: # disable fast-math on host, see:
# https://www.intel.com/content/www/us/en/docs/cpp-compiler/developer-guide-reference/2021-10/fp-model-fp.html # https://www.intel.com/content/www/us/en/docs/cpp-compiler/developer-guide-reference/2021-10/fp-model-fp.html
if (WIN32) if (WIN32)

View File

@ -77,9 +77,10 @@ API and command-line option may change frequently.***
- OpenCL - OpenCL
- SYCL - SYCL
- Supported weight formats - Supported weight formats
- Pytorch checkpoint (`.ckpt` or `.pth`) - Pytorch checkpoint (`.ckpt` or `.pth` or `.pt`)
- Safetensors (`.safetensors`) - Safetensors (`.safetensors`)
- GGUF (`.gguf`) - GGUF (`.gguf`)
- Convert mode supports converting model weights to `.gguf` or `.safetensors`
- Supported platforms - Supported platforms
- Linux - Linux
- Mac OS - Mac OS

View File

@ -131,8 +131,6 @@ sd-cli -m model.safetensors -p "a cat" --cache-mode spectrum
| `warmup` | Steps to always compute before caching starts | 4 | | `warmup` | Steps to always compute before caching starts | 4 |
| `stop` | Stop caching at this fraction of total steps | 0.9 | | `stop` | Stop caching at this fraction of total steps | 0.9 |
```
### Performance Tips ### Performance Tips
- Start with default thresholds and adjust based on output quality - Start with default thresholds and adjust based on output quality

View File

@ -4,14 +4,17 @@
usage: ./bin/sd-cli [options] usage: ./bin/sd-cli [options]
CLI Options: CLI Options:
-o, --output <string> path to write result image to. you can use printf-style %d format specifiers for image sequences (default: -o, --output <string> path to write result image to. you can use printf-style %d format specifiers for image
./output.png) (eg. output_%03d.png). For video generation, single-file outputs support .avi, .webm, and animated .webp sequences (default: ./output.png) (eg. output_%03d.png). Single-file video outputs
--preview-path <string> path to write preview image to (default: ./preview.png). Multi-frame previews support .avi, .webm, and animated .webp support .avi, .webm, and animated .webp
--preview-interval <int> interval in denoising steps between consecutive updates of the image preview file (default is 1, meaning updating at
every step)
--output-begin-idx <int> starting index for output image sequence, must be non-negative (default 0 if specified %d in output path, 1 otherwise)
--image <string> path to the image to inspect (for metadata mode) --image <string> path to the image to inspect (for metadata mode)
--metadata-format <string> metadata output format, one of [text, json] (default: text) --metadata-format <string> metadata output format, one of [text, json] (default: text)
--preview-path <string> path to write preview image to (default: ./preview.png). Multi-frame previews support
.avi, .webm, and animated .webp
--preview-interval <int> interval in denoising steps between consecutive updates of the image preview file
(default is 1, meaning updating at every step)
--output-begin-idx <int> starting index for output image sequence, must be non-negative (default 0 if specified
%d in output path, 1 otherwise)
--canny apply canny preprocessor (edge detection) --canny apply canny preprocessor (edge detection)
--convert-name convert tensor name (for convert mode) --convert-name convert tensor name (for convert mode)
-v, --verbose print extra info -v, --verbose print extra info
@ -31,7 +34,8 @@ Context Options:
--clip_g <string> path to the clip-g text encoder --clip_g <string> path to the clip-g text encoder
--clip_vision <string> path to the clip-vision encoder --clip_vision <string> path to the clip-vision encoder
--t5xxl <string> path to the t5xxl text encoder --t5xxl <string> path to the t5xxl text encoder
--llm <string> path to the llm text encoder. For example: (qwenvl2.5 for qwen-image, mistral-small3.2 for flux2, ...) --llm <string> path to the llm text encoder. For example: (qwenvl2.5 for qwen-image,
mistral-small3.2 for flux2, ...)
--llm_vision <string> path to the llm vit --llm_vision <string> path to the llm vit
--qwen2vl <string> alias of --llm. Deprecated. --qwen2vl <string> alias of --llm. Deprecated.
--qwen2vl_vision <string> alias of --llm_vision. Deprecated. --qwen2vl_vision <string> alias of --llm_vision. Deprecated.
@ -43,16 +47,18 @@ Context Options:
--control-net <string> path to control net model --control-net <string> path to control net model
--embd-dir <string> embeddings directory --embd-dir <string> embeddings directory
--lora-model-dir <string> lora model directory --lora-model-dir <string> lora model directory
--hires-upscalers-dir <string> highres fix upscaler model directory
--tensor-type-rules <string> weight type per tensor pattern (example: "^vae\.=f16,model\.=q8_0") --tensor-type-rules <string> weight type per tensor pattern (example: "^vae\.=f16,model\.=q8_0")
--photo-maker <string> path to PHOTOMAKER model --photo-maker <string> path to PHOTOMAKER model
--upscale-model <string> path to esrgan model. --upscale-model <string> path to esrgan model.
-t, --threads <int> number of threads to use during computation (default: -1). If threads <= 0, then threads will be set to the number of -t, --threads <int> number of threads to use during computation (default: -1). If threads <= 0,
CPU physical cores then threads will be set to the number of CPU physical cores
--chroma-t5-mask-pad <int> t5 mask pad size of chroma --chroma-t5-mask-pad <int> t5 mask pad size of chroma
--vae-tile-overlap <float> tile overlap for vae tiling, in fraction of tile size (default: 0.5) --max-vram <float> maximum VRAM budget in GiB for graph-cut segmented execution. 0 disables
--vae-tiling process vae in tiles to reduce memory usage graph splitting
--force-sdxl-vae-conv-scale force use of conv scale on sdxl vae --force-sdxl-vae-conv-scale force use of conv scale on sdxl vae
--offload-to-cpu place the weights in RAM to save VRAM, and automatically load them into VRAM when needed --offload-to-cpu place the weights in RAM to save VRAM, and automatically load them into VRAM
when needed
--mmap whether to memory-map model --mmap whether to memory-map model
--control-net-cpu keep controlnet in cpu (for low vram) --control-net-cpu keep controlnet in cpu (for low vram)
--clip-on-cpu keep clip in cpu (for low vram) --clip-on-cpu keep clip in cpu (for low vram)
@ -67,20 +73,19 @@ Context Options:
--chroma-disable-dit-mask disable dit mask for chroma --chroma-disable-dit-mask disable dit mask for chroma
--qwen-image-zero-cond-t enable zero_cond_t for qwen image --qwen-image-zero-cond-t enable zero_cond_t for qwen image
--chroma-enable-t5-mask enable t5 mask for chroma --chroma-enable-t5-mask enable t5 mask for chroma
--type weight type (examples: f32, f16, q4_0, q4_1, q5_0, q5_1, q8_0, q2_K, q3_K, q4_K). If not specified, the default is the --type weight type (examples: f32, f16, q4_0, q4_1, q5_0, q5_1, q8_0, q2_K, q3_K,
type of the weight file q4_K). If not specified, the default is the type of the weight file
--rng RNG, one of [std_default, cuda, cpu], default: cuda(sd-webui), cpu(comfyui) --rng RNG, one of [std_default, cuda, cpu], default: cuda(sd-webui), cpu(comfyui)
--sampler-rng sampler RNG, one of [std_default, cuda, cpu]. If not specified, use --rng --sampler-rng sampler RNG, one of [std_default, cuda, cpu]. If not specified, use --rng
--prediction prediction type override, one of [eps, v, edm_v, sd3_flow, flux_flow, flux2_flow] --prediction prediction type override, one of [eps, v, edm_v, sd3_flow, flux_flow,
--lora-apply-mode the way to apply LoRA, one of [auto, immediately, at_runtime], default is auto. In auto mode, if the model weights flux2_flow]
contain any quantized parameters, the at_runtime mode will be used; otherwise, --lora-apply-mode the way to apply LoRA, one of [auto, immediately, at_runtime], default is
immediately will be used.The immediately mode may have precision and auto. In auto mode, if the model weights contain any quantized parameters,
compatibility issues with quantized parameters, but it usually offers faster inference the at_runtime mode will be used; otherwise, immediately will be used.The
speed and, in some cases, lower memory usage. The at_runtime mode, on the immediately mode may have precision and compatibility issues with quantized
other hand, is exactly the opposite. parameters, but it usually offers faster inference speed and, in some cases,
--vae-tile-size tile size for vae tiling, format [X]x[Y] (default: 32x32) lower memory usage. The at_runtime mode, on the other hand, is exactly the
--vae-relative-tile-size relative tile size for vae tiling, format [X]x[Y], in fraction of image size if < 1, in number of tiles per dim if >=1 opposite.
(overrides --vae-tile-size)
Generation Options: Generation Options:
-p, --prompt <string> the prompt to render -p, --prompt <string> the prompt to render
@ -89,69 +94,99 @@ Generation Options:
--end-img <string> path to the end image, required by flf2v --end-img <string> path to the end image, required by flf2v
--mask <string> path to the mask image --mask <string> path to the mask image
--control-image <string> path to control image, control net --control-image <string> path to control image, control net
--control-video <string> path to control video frames, It must be a directory path. The video frames inside should be stored as images in --control-video <string> path to control video frames, It must be a directory path. The video frames
lexicographical (character) order. For example, if the control video path is inside should be stored as images in lexicographical (character) order. For
`frames`, the directory contain images such as 00.png, 01.png, ... etc. example, if the control video path is `frames`, the directory contain images
such as 00.png, 01.png, ... etc.
--pm-id-images-dir <string> path to PHOTOMAKER input id images dir --pm-id-images-dir <string> path to PHOTOMAKER input id images dir
--pm-id-embed-path <string> path to PHOTOMAKER v2 id embed --pm-id-embed-path <string> path to PHOTOMAKER v2 id embed
--hires-upscaler <string> highres fix upscaler, Lanczos, Nearest, Latent, Latent (nearest), Latent
(nearest-exact), Latent (antialiased), Latent (bicubic), Latent (bicubic
antialiased), or a model name under --hires-upscalers-dir (default: Latent)
-H, --height <int> image height, in pixel space (default: 512) -H, --height <int> image height, in pixel space (default: 512)
-W, --width <int> image width, in pixel space (default: 512) -W, --width <int> image width, in pixel space (default: 512)
--steps <int> number of sample steps (default: 20) --steps <int> number of sample steps (default: 20)
--high-noise-steps <int> (high noise) number of sample steps (default: -1 = auto) --high-noise-steps <int> (high noise) number of sample steps (default: -1 = auto)
--clip-skip <int> ignore last layers of CLIP network; 1 ignores none, 2 ignores one layer (default: -1). <= 0 represents unspecified, --clip-skip <int> ignore last layers of CLIP network; 1 ignores none, 2 ignores one layer
will be 1 for SD1.x, 2 for SD2.x (default: -1). <= 0 represents unspecified, will be 1 for SD1.x, 2 for SD2.x
-b, --batch-count <int> batch count -b, --batch-count <int> batch count
--video-frames <int> video frames (default: 1) --video-frames <int> video frames (default: 1)
--fps <int> fps (default: 24) --fps <int> fps (default: 24)
--timestep-shift <int> shift timestep for NitroFusion models (default: 0). recommended N for NitroSD-Realism around 250 and 500 for --timestep-shift <int> shift timestep for NitroFusion models (default: 0). recommended N for
NitroSD-Vibrant NitroSD-Realism around 250 and 500 for NitroSD-Vibrant
--upscale-repeats <int> Run the ESRGAN upscaler this many times (default: 1) --upscale-repeats <int> Run the ESRGAN upscaler this many times (default: 1)
--upscale-tile-size <int> tile size for ESRGAN upscaling (default: 128) --upscale-tile-size <int> tile size for ESRGAN upscaling (default: 128)
--hires-width <int> highres fix target width, 0 to use --hires-scale (default: 0)
--hires-height <int> highres fix target height, 0 to use --hires-scale (default: 0)
--hires-steps <int> highres fix second pass sample steps, 0 to reuse --steps (default: 0)
--hires-upscale-tile-size <int> highres fix upscaler tile size, reserved for model-backed upscalers (default:
128)
--cfg-scale <float> unconditional guidance scale: (default: 7.0) --cfg-scale <float> unconditional guidance scale: (default: 7.0)
--img-cfg-scale <float> image guidance scale for inpaint or instruct-pix2pix models: (default: same as --cfg-scale) --img-cfg-scale <float> image guidance scale for inpaint or instruct-pix2pix models: (default: same
as --cfg-scale)
--guidance <float> distilled guidance scale for models with guidance input (default: 3.5) --guidance <float> distilled guidance scale for models with guidance input (default: 3.5)
--slg-scale <float> skip layer guidance (SLG) scale, only for DiT models: (default: 0). 0 means disabled, a value of 2.5 is nice for sd3.5 --slg-scale <float> skip layer guidance (SLG) scale, only for DiT models: (default: 0). 0 means
medium disabled, a value of 2.5 is nice for sd3.5 medium
--skip-layer-start <float> SLG enabling point (default: 0.01) --skip-layer-start <float> SLG enabling point (default: 0.01)
--skip-layer-end <float> SLG disabling point (default: 0.2) --skip-layer-end <float> SLG disabling point (default: 0.2)
--eta <float> noise multiplier (default: 0 for ddim_trailing, tcd, res_multistep and res_2s; 1 for euler_a, er_sde and dpm++2s_a) --eta <float> noise multiplier (default: 0 for ddim_trailing, tcd, res_multistep and
res_2s; 1 for euler_a, er_sde and dpm++2s_a)
--flow-shift <float> shift value for Flow models like SD3.x or WAN (default: auto) --flow-shift <float> shift value for Flow models like SD3.x or WAN (default: auto)
--high-noise-cfg-scale <float> (high noise) unconditional guidance scale: (default: 7.0) --high-noise-cfg-scale <float> (high noise) unconditional guidance scale: (default: 7.0)
--high-noise-img-cfg-scale <float> (high noise) image guidance scale for inpaint or instruct-pix2pix models (default: same as --cfg-scale) --high-noise-img-cfg-scale <float> (high noise) image guidance scale for inpaint or instruct-pix2pix models
--high-noise-guidance <float> (high noise) distilled guidance scale for models with guidance input (default: 3.5) (default: same as --cfg-scale)
--high-noise-slg-scale <float> (high noise) skip layer guidance (SLG) scale, only for DiT models: (default: 0) --high-noise-guidance <float> (high noise) distilled guidance scale for models with guidance input
(default: 3.5)
--high-noise-slg-scale <float> (high noise) skip layer guidance (SLG) scale, only for DiT models: (default:
0)
--high-noise-skip-layer-start <float> (high noise) SLG enabling point (default: 0.01) --high-noise-skip-layer-start <float> (high noise) SLG enabling point (default: 0.01)
--high-noise-skip-layer-end <float> (high noise) SLG disabling point (default: 0.2) --high-noise-skip-layer-end <float> (high noise) SLG disabling point (default: 0.2)
--high-noise-eta <float> (high noise) noise multiplier (default: 0 for ddim_trailing, tcd, res_multistep and res_2s; 1 for euler_a, er_sde and dpm++2s_a) --high-noise-eta <float> (high noise) noise multiplier (default: 0 for ddim_trailing, tcd,
res_multistep and res_2s; 1 for euler_a, er_sde and dpm++2s_a)
--strength <float> strength for noising/unnoising (default: 0.75) --strength <float> strength for noising/unnoising (default: 0.75)
--pm-style-strength <float> --pm-style-strength <float>
--control-strength <float> strength to apply Control Net (default: 0.9). 1.0 corresponds to full destruction of information in init image --control-strength <float> strength to apply Control Net (default: 0.9). 1.0 corresponds to full
--moe-boundary <float> timestep boundary for Wan2.2 MoE model. (default: 0.875). Only enabled if `--high-noise-steps` is set to -1 destruction of information in init image
--moe-boundary <float> timestep boundary for Wan2.2 MoE model. (default: 0.875). Only enabled if
`--high-noise-steps` is set to -1
--vace-strength <float> wan vace strength --vace-strength <float> wan vace strength
--increase-ref-index automatically increase the indices of references images based on the order they are listed (starting with 1). --vae-tile-overlap <float> tile overlap for vae tiling, in fraction of tile size (default: 0.5)
--hires-scale <float> highres fix scale when target size is not set (default: 2.0)
--hires-denoising-strength <float> highres fix second pass denoising strength (default: 0.7)
--increase-ref-index automatically increase the indices of references images based on the order
they are listed (starting with 1).
--disable-auto-resize-ref-image disable auto resize of ref images --disable-auto-resize-ref-image disable auto resize of ref images
--disable-image-metadata do not embed generation metadata on image files --disable-image-metadata do not embed generation metadata on image files
--vae-tiling process vae in tiles to reduce memory usage
--hires enable highres fix
-s, --seed RNG seed (default: 42, use random seed for < 0) -s, --seed RNG seed (default: 42, use random seed for < 0)
--sampling-method sampling method, one of [euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m, dpm++2mv2, ipndm, ipndm_v, lcm, ddim_trailing, --sampling-method sampling method, one of [euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m,
tcd, res_multistep, res_2s, er_sde] (default: euler for Flux/SD3/Wan, euler_a dpm++2mv2, ipndm, ipndm_v, lcm, ddim_trailing, tcd, res_multistep, res_2s,
otherwise) er_sde] (default: euler for Flux/SD3/Wan, euler_a otherwise)
--high-noise-sampling-method (high noise) sampling method, one of [euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m, dpm++2mv2, ipndm, ipndm_v, lcm, --high-noise-sampling-method (high noise) sampling method, one of [euler, euler_a, heun, dpm2, dpm++2s_a,
ddim_trailing, tcd, res_multistep, res_2s, er_sde] default: euler for Flux/SD3/Wan, dpm++2m, dpm++2mv2, ipndm, ipndm_v, lcm, ddim_trailing, tcd, res_multistep,
euler_a otherwise res_2s, er_sde] default: euler for Flux/SD3/Wan, euler_a otherwise
--scheduler denoiser sigma scheduler, one of [discrete, karras, exponential, ays, gits, smoothstep, sgm_uniform, simple, --scheduler denoiser sigma scheduler, one of [discrete, karras, exponential, ays, gits,
kl_optimal, lcm, bong_tangent], default: discrete smoothstep, sgm_uniform, simple, kl_optimal, lcm, bong_tangent], default:
--sigmas custom sigma values for the sampler, comma-separated (e.g., "14.61,7.8,3.5,0.0"). discrete
--sigmas custom sigma values for the sampler, comma-separated (e.g.,
"14.61,7.8,3.5,0.0").
--skip-layers layers to skip for SLG steps (default: [7,8,9]) --skip-layers layers to skip for SLG steps (default: [7,8,9])
--high-noise-skip-layers (high noise) layers to skip for SLG steps (default: [7,8,9]) --high-noise-skip-layers (high noise) layers to skip for SLG steps (default: [7,8,9])
-r, --ref-image reference image for Flux Kontext models (can be used multiple times) -r, --ref-image reference image for Flux Kontext models (can be used multiple times)
--cache-mode caching method: 'easycache' (DiT), 'ucache' (UNET), 'dbcache'/'taylorseer'/'cache-dit' (DiT block-level), --cache-mode caching method: 'easycache' (DiT), 'ucache' (UNET),
'spectrum' (UNET/DiT Chebyshev+Taylor forecasting) 'dbcache'/'taylorseer'/'cache-dit' (DiT block-level), 'spectrum' (UNET/DiT
Chebyshev+Taylor forecasting)
--cache-option named cache params (key=value format, comma-separated). easycache/ucache: --cache-option named cache params (key=value format, comma-separated). easycache/ucache:
threshold=,start=,end=,decay=,relative=,reset=; dbcache/taylorseer/cache-dit: Fn=,Bn=,threshold=,warmup=; threshold=,start=,end=,decay=,relative=,reset=; dbcache/taylorseer/cache-dit:
spectrum: w=,m=,lam=,window=,flex=,warmup=,stop=. Examples: Fn=,Bn=,threshold=,warmup=; spectrum: w=,m=,lam=,window=,flex=,warmup=,stop=.
"threshold=0.25" or "threshold=1.5,reset=0" or "w=0.4,window=2" Examples: "threshold=0.25" or "threshold=1.5,reset=0"
--scm-mask SCM steps mask for cache-dit: comma-separated 0/1 (e.g., "1,1,1,0,0,1,0,0,1,0") - 1=compute, 0=can cache --scm-mask SCM steps mask for cache-dit: comma-separated 0/1 (e.g.,
"1,1,1,0,0,1,0,0,1,0") - 1=compute, 0=can cache
--scm-policy SCM policy: 'dynamic' (default) or 'static' --scm-policy SCM policy: 'dynamic' (default) or 'static'
--vae-tile-size tile size for vae tiling, format [X]x[Y] (default: 32x32)
--vae-relative-tile-size relative tile size for vae tiling, format [X]x[Y], in fraction of image size
if < 1, in number of tiles per dim if >=1 (overrides --vae-tile-size)
``` ```
Metadata mode inspects PNG/JPEG container metadata without loading any model: Metadata mode inspects PNG/JPEG container metadata without loading any model:

View File

@ -278,7 +278,9 @@ void parse_args(int argc, const char** argv, SDCliParams& cli_params, SDContextP
bool valid = cli_params.resolve_and_validate(); bool valid = cli_params.resolve_and_validate();
if (valid && cli_params.mode != METADATA) { if (valid && cli_params.mode != METADATA) {
valid = ctx_params.resolve_and_validate(cli_params.mode) && valid = ctx_params.resolve_and_validate(cli_params.mode) &&
gen_params.resolve_and_validate(cli_params.mode, ctx_params.lora_model_dir); gen_params.resolve_and_validate(cli_params.mode,
ctx_params.lora_model_dir,
ctx_params.hires_upscalers_dir);
} }
if (!valid) { if (!valid) {
@ -431,8 +433,9 @@ bool save_results(const SDCliParams& cli_params,
if (!img.data) if (!img.data)
return false; return false;
const int64_t metadata_seed = cli_params.mode == VID_GEN ? gen_params.seed : gen_params.seed + idx;
std::string params = gen_params.embed_image_metadata std::string params = gen_params.embed_image_metadata
? get_image_params(ctx_params, gen_params, gen_params.seed + idx) ? get_image_params(ctx_params, gen_params, metadata_seed, cli_params.mode)
: ""; : "";
const bool ok = write_image_to_file(path.string(), img.data, img.width, img.height, img.channel, params, 90); const bool ok = write_image_to_file(path.string(), img.data, img.width, img.height, img.channel, params, 90);
LOG_INFO("save result image %d to '%s' (%s)", idx, path.string().c_str(), ok ? "success" : "failure"); LOG_INFO("save result image %d to '%s' (%s)", idx, path.string().c_str(), ok ? "success" : "failure");
@ -688,6 +691,13 @@ int main(int argc, const char* argv[]) {
vae_decode_only = false; vae_decode_only = false;
} }
if (gen_params.hires_enabled &&
(gen_params.resolved_hires_upscaler == SD_HIRES_UPSCALER_MODEL ||
gen_params.resolved_hires_upscaler == SD_HIRES_UPSCALER_LANCZOS ||
gen_params.resolved_hires_upscaler == SD_HIRES_UPSCALER_NEAREST)) {
vae_decode_only = false;
}
sd_ctx_params_t sd_ctx_params = ctx_params.to_sd_ctx_params_t(vae_decode_only, true, cli_params.taesd_preview); sd_ctx_params_t sd_ctx_params = ctx_params.to_sd_ctx_params_t(vae_decode_only, true, cli_params.taesd_preview);
SDImageVec results; SDImageVec results;

View File

@ -107,47 +107,60 @@ static bool is_absolute_path(const std::string& p) {
std::string ArgOptions::wrap_text(const std::string& text, size_t width, size_t indent) { std::string ArgOptions::wrap_text(const std::string& text, size_t width, size_t indent) {
std::ostringstream oss; std::ostringstream oss;
size_t line_len = 0;
size_t pos = 0; size_t pos = 0;
size_t line_len = 0;
while (pos < text.size()) { while (pos < text.size()) {
// Preserve manual newlines
if (text[pos] == '\n') { if (text[pos] == '\n') {
oss << '\n' oss << '\n'
<< std::string(indent, ' '); << std::string(indent, ' ');
line_len = indent; line_len = 0;
++pos; ++pos;
continue; continue;
} }
// Add the character if (std::isspace(static_cast<unsigned char>(text[pos]))) {
oss << text[pos];
++line_len;
++pos; ++pos;
continue;
// If the current line exceeds width, try to break at the last space }
if (line_len >= width) {
std::string current = oss.str(); size_t word_start = pos;
size_t back = current.size(); while (pos < text.size() &&
text[pos] != '\n' &&
// Find the last space (for a clean break) !std::isspace(static_cast<unsigned char>(text[pos]))) {
while (back > 0 && current[back - 1] != ' ' && current[back - 1] != '\n') ++pos;
--back; }
// If found a space to break on std::string word = text.substr(word_start, pos - word_start);
if (back > 0 && current[back - 1] != '\n') { while (!word.empty()) {
std::string before = current.substr(0, back - 1); size_t separator_len = line_len == 0 ? 0 : 1;
std::string after = current.substr(back); if (line_len + separator_len + word.size() <= width) {
oss.str(""); if (separator_len > 0) {
oss.clear(); oss << ' ';
oss << before << "\n" ++line_len;
<< std::string(indent, ' ') << after; }
} else { oss << word;
// If no space found, just break at width line_len += word.size();
oss << "\n" word.clear();
<< std::string(indent, ' '); continue;
}
if (line_len > 0) {
oss << '\n'
<< std::string(indent, ' ');
line_len = 0;
continue;
}
size_t chunk_len = std::min(width, word.size());
oss << word.substr(0, chunk_len);
line_len = chunk_len;
word.erase(0, chunk_len);
if (!word.empty()) {
oss << '\n'
<< std::string(indent, ' ');
line_len = 0;
} }
line_len = indent;
} }
} }
@ -351,7 +364,10 @@ ArgOptions SDContextParams::get_options() {
"--lora-model-dir", "--lora-model-dir",
"lora model directory", "lora model directory",
&lora_model_dir}, &lora_model_dir},
{"",
"--hires-upscalers-dir",
"highres fix upscaler model directory",
&hires_upscalers_dir},
{"", {"",
"--tensor-type-rules", "--tensor-type-rules",
"weight type per tensor pattern (example: \"^vae\\.=f16,model\\.=q8_0\")", "weight type per tensor pattern (example: \"^vae\\.=f16,model\\.=q8_0\")",
@ -378,7 +394,12 @@ ArgOptions SDContextParams::get_options() {
&chroma_t5_mask_pad}, &chroma_t5_mask_pad},
}; };
options.float_options = {}; options.float_options = {
{"",
"--max-vram",
"maximum VRAM budget in GiB for graph-cut segmented execution. 0 disables graph splitting",
&max_vram},
};
options.bool_options = { options.bool_options = {
{"", {"",
@ -649,10 +670,12 @@ std::string SDContextParams::to_string() const {
<< " wtype: " << sd_type_name(wtype) << ",\n" << " wtype: " << sd_type_name(wtype) << ",\n"
<< " tensor_type_rules: \"" << tensor_type_rules << "\",\n" << " tensor_type_rules: \"" << tensor_type_rules << "\",\n"
<< " lora_model_dir: \"" << lora_model_dir << "\",\n" << " lora_model_dir: \"" << lora_model_dir << "\",\n"
<< " hires_upscalers_dir: \"" << hires_upscalers_dir << "\",\n"
<< " photo_maker_path: \"" << photo_maker_path << "\",\n" << " photo_maker_path: \"" << photo_maker_path << "\",\n"
<< " rng_type: " << sd_rng_type_name(rng_type) << ",\n" << " rng_type: " << sd_rng_type_name(rng_type) << ",\n"
<< " sampler_rng_type: " << sd_rng_type_name(sampler_rng_type) << ",\n" << " sampler_rng_type: " << sd_rng_type_name(sampler_rng_type) << ",\n"
<< " offload_params_to_cpu: " << (offload_params_to_cpu ? "true" : "false") << ",\n" << " offload_params_to_cpu: " << (offload_params_to_cpu ? "true" : "false") << ",\n"
<< " max_vram: " << max_vram << ",\n"
<< " enable_mmap: " << (enable_mmap ? "true" : "false") << ",\n" << " enable_mmap: " << (enable_mmap ? "true" : "false") << ",\n"
<< " control_net_cpu: " << (control_net_cpu ? "true" : "false") << ",\n" << " control_net_cpu: " << (control_net_cpu ? "true" : "false") << ",\n"
<< " clip_on_cpu: " << (clip_on_cpu ? "true" : "false") << ",\n" << " clip_on_cpu: " << (clip_on_cpu ? "true" : "false") << ",\n"
@ -727,6 +750,7 @@ sd_ctx_params_t SDContextParams::to_sd_ctx_params_t(bool vae_decode_only, bool f
chroma_use_t5_mask, chroma_use_t5_mask,
chroma_t5_mask_pad, chroma_t5_mask_pad,
qwen_image_zero_cond_t, qwen_image_zero_cond_t,
max_vram,
}; };
return sd_ctx_params; return sd_ctx_params;
} }
@ -777,6 +801,12 @@ ArgOptions SDGenerationParams::get_options() {
"--pm-id-embed-path", "--pm-id-embed-path",
"path to PHOTOMAKER v2 id embed", "path to PHOTOMAKER v2 id embed",
&pm_id_embed_path}, &pm_id_embed_path},
{"",
"--hires-upscaler",
"highres fix upscaler, Lanczos, Nearest, Latent, Latent (nearest), Latent (nearest-exact), "
"Latent (antialiased), Latent (bicubic), Latent (bicubic antialiased), or a model name "
"under --hires-upscalers-dir (default: Latent)",
&hires_upscaler},
}; };
options.int_options = { options.int_options = {
@ -826,6 +856,22 @@ ArgOptions SDGenerationParams::get_options() {
"--upscale-tile-size", "--upscale-tile-size",
"tile size for ESRGAN upscaling (default: 128)", "tile size for ESRGAN upscaling (default: 128)",
&upscale_tile_size}, &upscale_tile_size},
{"",
"--hires-width",
"highres fix target width, 0 to use --hires-scale (default: 0)",
&hires_width},
{"",
"--hires-height",
"highres fix target height, 0 to use --hires-scale (default: 0)",
&hires_height},
{"",
"--hires-steps",
"highres fix second pass sample steps, 0 to reuse --steps (default: 0)",
&hires_steps},
{"",
"--hires-upscale-tile-size",
"highres fix upscaler tile size, reserved for model-backed upscalers (default: 128)",
&hires_upscale_tile_size},
}; };
options.float_options = { options.float_options = {
@ -913,6 +959,14 @@ ArgOptions SDGenerationParams::get_options() {
"--vae-tile-overlap", "--vae-tile-overlap",
"tile overlap for vae tiling, in fraction of tile size (default: 0.5)", "tile overlap for vae tiling, in fraction of tile size (default: 0.5)",
&vae_tiling_params.target_overlap}, &vae_tiling_params.target_overlap},
{"",
"--hires-scale",
"highres fix scale when target size is not set (default: 2.0)",
&hires_scale},
{"",
"--hires-denoising-strength",
"highres fix second pass denoising strength (default: 0.7)",
&hires_denoising_strength},
}; };
options.bool_options = { options.bool_options = {
@ -936,6 +990,11 @@ ArgOptions SDGenerationParams::get_options() {
"process vae in tiles to reduce memory usage", "process vae in tiles to reduce memory usage",
true, true,
&vae_tiling_params.enabled}, &vae_tiling_params.enabled},
{"",
"--hires",
"enable highres fix",
true,
&hires_enabled},
}; };
auto on_seed_arg = [&](int argc, const char** argv, int index) { auto on_seed_arg = [&](int argc, const char** argv, int index) {
@ -1424,6 +1483,37 @@ static bool parse_lora_json_field(const json& parent,
return true; return true;
} }
static bool resolve_model_file_from_dir(const std::string& model_name,
const std::string& model_dir,
const std::vector<std::string>& valid_ext,
const char* label,
std::string& resolved_path) {
if (model_dir.empty()) {
LOG_ERROR("%s directory is empty", label);
return false;
}
if (model_name.empty() ||
model_name.find('/') != std::string::npos ||
model_name.find('\\') != std::string::npos ||
fs::path(model_name).has_root_path() ||
fs::path(model_name).has_extension()) {
LOG_ERROR("%s must be a model name without path or extension: %s", label, model_name.c_str());
return false;
}
fs::path model_dir_path = model_dir;
for (const auto& ext : valid_ext) {
fs::path try_path = model_dir_path / (model_name + ext);
if (fs::exists(try_path) && fs::is_regular_file(try_path)) {
resolved_path = try_path.lexically_normal().string();
return true;
}
}
LOG_ERROR("can not find %s %s in %s", label, model_name.c_str(), model_dir_path.lexically_normal().string().c_str());
return false;
}
bool SDGenerationParams::from_json_str( bool SDGenerationParams::from_json_str(
const std::string& json_str, const std::string& json_str,
const std::function<std::string(const std::string&)>& lora_path_resolver) { const std::function<std::string(const std::string&)>& lora_path_resolver) {
@ -1487,6 +1577,34 @@ bool SDGenerationParams::from_json_str(
load_if_exists("increase_ref_index", increase_ref_index); load_if_exists("increase_ref_index", increase_ref_index);
load_if_exists("embed_image_metadata", embed_image_metadata); load_if_exists("embed_image_metadata", embed_image_metadata);
if (j.contains("hires") && j["hires"].is_object()) {
const json& hires_json = j["hires"];
if (hires_json.contains("enabled") && hires_json["enabled"].is_boolean()) {
hires_enabled = hires_json["enabled"];
}
if (hires_json.contains("upscaler") && hires_json["upscaler"].is_string()) {
hires_upscaler = hires_json["upscaler"];
}
if (hires_json.contains("scale") && hires_json["scale"].is_number()) {
hires_scale = hires_json["scale"];
}
if (hires_json.contains("target_width") && hires_json["target_width"].is_number_integer()) {
hires_width = hires_json["target_width"];
}
if (hires_json.contains("target_height") && hires_json["target_height"].is_number_integer()) {
hires_height = hires_json["target_height"];
}
if (hires_json.contains("steps") && hires_json["steps"].is_number_integer()) {
hires_steps = hires_json["steps"];
}
if (hires_json.contains("denoising_strength") && hires_json["denoising_strength"].is_number()) {
hires_denoising_strength = hires_json["denoising_strength"];
}
if (hires_json.contains("upscale_tile_size") && hires_json["upscale_tile_size"].is_number_integer()) {
hires_upscale_tile_size = hires_json["upscale_tile_size"];
}
}
auto parse_sample_params_json = [&](const json& sample_json, auto parse_sample_params_json = [&](const json& sample_json,
sd_sample_params_t& target_params, sd_sample_params_t& target_params,
std::vector<int>& target_skip_layers, std::vector<int>& target_skip_layers,
@ -1589,10 +1707,18 @@ bool SDGenerationParams::from_json_str(
LOG_ERROR("invalid init_image"); LOG_ERROR("invalid init_image");
return false; return false;
} }
if (!parse_image_json_field(j, "end_image", 3, width, height, end_image)) {
LOG_ERROR("invalid end_image");
return false;
}
if (!parse_image_array_json_field(j, "ref_images", 3, width, height, ref_images)) { if (!parse_image_array_json_field(j, "ref_images", 3, width, height, ref_images)) {
LOG_ERROR("invalid ref_images"); LOG_ERROR("invalid ref_images");
return false; return false;
} }
if (!parse_image_array_json_field(j, "control_frames", 3, width, height, control_frames)) {
LOG_ERROR("invalid control_frames");
return false;
}
if (!parse_image_json_field(j, "mask_image", 1, width, height, mask_image)) { if (!parse_image_json_field(j, "mask_image", 1, width, height, mask_image)) {
LOG_ERROR("invalid mask_image"); LOG_ERROR("invalid mask_image");
return false; return false;
@ -1792,7 +1918,7 @@ bool SDGenerationParams::initialize_cache_params() {
return true; return true;
} }
bool SDGenerationParams::resolve(const std::string& lora_model_dir, bool strict) { bool SDGenerationParams::resolve(const std::string& lora_model_dir, const std::string& hires_upscalers_dir, bool strict) {
if (high_noise_sample_params.sample_steps <= 0) { if (high_noise_sample_params.sample_steps <= 0) {
high_noise_sample_params.sample_steps = -1; high_noise_sample_params.sample_steps = -1;
} }
@ -1811,6 +1937,27 @@ bool SDGenerationParams::resolve(const std::string& lora_model_dir, bool strict)
sample_params.sample_steps = std::clamp(sample_params.sample_steps, 1, 100); sample_params.sample_steps = std::clamp(sample_params.sample_steps, 1, 100);
} }
hires_upscaler_model_path.clear();
if (hires_enabled) {
if (hires_upscaler.empty()) {
hires_upscaler = "Latent";
}
resolved_hires_upscaler = str_to_sd_hires_upscaler(hires_upscaler.c_str());
if (resolved_hires_upscaler == SD_HIRES_UPSCALER_NONE) {
hires_enabled = false;
} else if (resolved_hires_upscaler == SD_HIRES_UPSCALER_COUNT) {
static const std::vector<std::string> valid_ext = {".gguf", ".safetensors", ".pt", ".pth"};
if (!resolve_model_file_from_dir(hires_upscaler,
hires_upscalers_dir,
valid_ext,
"hires upscaler",
hires_upscaler_model_path)) {
return false;
}
resolved_hires_upscaler = SD_HIRES_UPSCALER_MODEL;
}
}
prompt_with_lora = prompt; prompt_with_lora = prompt;
if (!lora_model_dir.empty()) { if (!lora_model_dir.empty()) {
extract_and_remove_lora(lora_model_dir); extract_and_remove_lora(lora_model_dir);
@ -1875,6 +2022,29 @@ bool SDGenerationParams::validate(SDMode mode) {
return false; return false;
} }
if (hires_enabled) {
if (hires_width < 0 || hires_height < 0) {
LOG_ERROR("error: hires target width and height must be >= 0");
return false;
}
if (hires_scale <= 0.f && hires_width <= 0 && hires_height <= 0) {
LOG_ERROR("error: hires scale must be positive when target size is not set");
return false;
}
if (hires_steps < 0) {
LOG_ERROR("error: hires steps must be >= 0");
return false;
}
if (hires_denoising_strength <= 0.f || hires_denoising_strength > 1.f) {
LOG_ERROR("error: hires denoising strength must be in (0.0, 1.0]");
return false;
}
if (hires_upscale_tile_size < 1) {
LOG_ERROR("error: hires upscale tile size must be positive");
return false;
}
}
if (mode == UPSCALE) { if (mode == UPSCALE) {
if (init_image_path.length() == 0) { if (init_image_path.length() == 0) {
LOG_ERROR("error: upscale mode needs an init image (--init-img)\n"); LOG_ERROR("error: upscale mode needs an init image (--init-img)\n");
@ -1885,8 +2055,11 @@ bool SDGenerationParams::validate(SDMode mode) {
return true; return true;
} }
bool SDGenerationParams::resolve_and_validate(SDMode mode, const std::string& lora_model_dir, bool strict) { bool SDGenerationParams::resolve_and_validate(SDMode mode,
if (!resolve(lora_model_dir, strict)) { const std::string& lora_model_dir,
const std::string& hires_upscalers_dir,
bool strict) {
if (!resolve(lora_model_dir, hires_upscalers_dir, strict)) {
return false; return false;
} }
if (!validate(mode)) { if (!validate(mode)) {
@ -1957,6 +2130,16 @@ sd_img_gen_params_t SDGenerationParams::to_sd_img_gen_params_t() {
params.pm_params = pm_params; params.pm_params = pm_params;
params.vae_tiling_params = vae_tiling_params; params.vae_tiling_params = vae_tiling_params;
params.cache = cache_params; params.cache = cache_params;
params.hires.enabled = hires_enabled;
params.hires.upscaler = resolved_hires_upscaler;
params.hires.model_path = hires_upscaler_model_path.empty() ? nullptr : hires_upscaler_model_path.c_str();
params.hires.scale = hires_scale;
params.hires.target_width = hires_width;
params.hires.target_height = hires_height;
params.hires.steps = hires_steps;
params.hires.denoising_strength = hires_denoising_strength;
params.hires.upscale_tile_size = hires_upscale_tile_size;
return params; return params;
} }
@ -2081,6 +2264,15 @@ std::string SDGenerationParams::to_string() const {
<< " seed: " << seed << ",\n" << " seed: " << seed << ",\n"
<< " upscale_repeats: " << upscale_repeats << ",\n" << " upscale_repeats: " << upscale_repeats << ",\n"
<< " upscale_tile_size: " << upscale_tile_size << ",\n" << " upscale_tile_size: " << upscale_tile_size << ",\n"
<< " hires: { enabled: " << (hires_enabled ? "true" : "false")
<< ", upscaler: \"" << hires_upscaler << "\""
<< ", model_path: \"" << hires_upscaler_model_path << "\""
<< ", scale: " << hires_scale
<< ", target_width: " << hires_width
<< ", target_height: " << hires_height
<< ", steps: " << hires_steps
<< ", denoising_strength: " << hires_denoising_strength
<< ", upscale_tile_size: " << hires_upscale_tile_size << " },\n"
<< " vae_tiling_params: { " << " vae_tiling_params: { "
<< vae_tiling_params.enabled << ", " << vae_tiling_params.enabled << ", "
<< vae_tiling_params.tile_size_x << ", " << vae_tiling_params.tile_size_x << ", "
@ -2096,7 +2288,192 @@ std::string version_string() {
return std::string("stable-diffusion.cpp version ") + sd_version() + ", commit " + sd_commit(); return std::string("stable-diffusion.cpp version ") + sd_version() + ", commit " + sd_commit();
} }
std::string get_image_params(const SDContextParams& ctx_params, const SDGenerationParams& gen_params, int64_t seed) { static std::string safe_json_string(const char* value) {
return value ? value : "";
}
static void set_json_basename_if_not_empty(json& target, const char* key, const std::string& path) {
if (!path.empty()) {
target[key] = sd_basename(path);
}
}
static json build_sampling_metadata_json(const sd_sample_params_t& sample_params,
const std::vector<int>& skip_layers,
const std::vector<float>* custom_sigmas = nullptr) {
json sampling = {
{"steps", sample_params.sample_steps},
{"eta", sample_params.eta},
{"shifted_timestep", sample_params.shifted_timestep},
{"flow_shift", sample_params.flow_shift},
{"guidance",
{
{"txt_cfg", sample_params.guidance.txt_cfg},
{"img_cfg", sample_params.guidance.img_cfg},
{"distilled_guidance", sample_params.guidance.distilled_guidance},
{"slg",
{
{"scale", sample_params.guidance.slg.scale},
{"layers", skip_layers},
{"start", sample_params.guidance.slg.layer_start},
{"end", sample_params.guidance.slg.layer_end},
}},
}},
};
if (sample_params.sample_method != SAMPLE_METHOD_COUNT) {
sampling["method"] = safe_json_string(sd_sample_method_name(sample_params.sample_method));
}
if (sample_params.scheduler != SCHEDULER_COUNT) {
sampling["scheduler"] = safe_json_string(sd_scheduler_name(sample_params.scheduler));
}
if (custom_sigmas != nullptr) {
sampling["custom_sigmas"] = *custom_sigmas;
}
return sampling;
}
std::string build_sdcpp_image_metadata_json(const SDContextParams& ctx_params,
const SDGenerationParams& gen_params,
int64_t seed,
SDMode mode) {
json root;
root["schema"] = "sdcpp.image.params/v1";
root["mode"] = mode == VID_GEN ? "vid_gen" : "img_gen";
root["generator"] = {
{"name", "stable-diffusion.cpp"},
{"version", safe_json_string(sd_version())},
{"commit", safe_json_string(sd_commit())},
};
root["seed"] = seed;
root["width"] = gen_params.get_resolved_width();
root["height"] = gen_params.get_resolved_height();
root["prompt"] = {
{"positive", gen_params.prompt},
{"negative", gen_params.negative_prompt},
};
root["sampling"] = build_sampling_metadata_json(gen_params.sample_params,
gen_params.skip_layers,
&gen_params.custom_sigmas);
json models;
set_json_basename_if_not_empty(models, "model", ctx_params.model_path);
set_json_basename_if_not_empty(models, "clip_l", ctx_params.clip_l_path);
set_json_basename_if_not_empty(models, "clip_g", ctx_params.clip_g_path);
set_json_basename_if_not_empty(models, "clip_vision", ctx_params.clip_vision_path);
set_json_basename_if_not_empty(models, "t5xxl", ctx_params.t5xxl_path);
set_json_basename_if_not_empty(models, "llm", ctx_params.llm_path);
set_json_basename_if_not_empty(models, "llm_vision", ctx_params.llm_vision_path);
set_json_basename_if_not_empty(models, "diffusion_model", ctx_params.diffusion_model_path);
set_json_basename_if_not_empty(models, "high_noise_diffusion_model", ctx_params.high_noise_diffusion_model_path);
set_json_basename_if_not_empty(models, "vae", ctx_params.vae_path);
set_json_basename_if_not_empty(models, "taesd", ctx_params.taesd_path);
set_json_basename_if_not_empty(models, "control_net", ctx_params.control_net_path);
root["models"] = std::move(models);
root["clip_skip"] = gen_params.clip_skip;
root["strength"] = gen_params.strength;
root["control_strength"] = gen_params.control_strength;
root["auto_resize_ref_image"] = gen_params.auto_resize_ref_image;
root["increase_ref_index"] = gen_params.increase_ref_index;
if (mode == VID_GEN) {
root["video"] = {
{"frame_count", gen_params.video_frames},
{"fps", gen_params.fps},
};
root["moe_boundary"] = gen_params.moe_boundary;
root["vace_strength"] = gen_params.vace_strength;
root["high_noise_sampling"] = build_sampling_metadata_json(gen_params.high_noise_sample_params,
gen_params.high_noise_skip_layers);
}
root["rng"] = safe_json_string(sd_rng_type_name(ctx_params.rng_type));
if (ctx_params.sampler_rng_type != RNG_TYPE_COUNT) {
root["sampler_rng"] = safe_json_string(sd_rng_type_name(ctx_params.sampler_rng_type));
}
json loras = json::array();
for (const auto& entry : gen_params.lora_map) {
loras.push_back({
{"name", sd_basename(entry.first)},
{"multiplier", entry.second},
{"is_high_noise", false},
});
}
for (const auto& entry : gen_params.high_noise_lora_map) {
loras.push_back({
{"name", sd_basename(entry.first)},
{"multiplier", entry.second},
{"is_high_noise", true},
});
}
if (!loras.empty()) {
root["loras"] = std::move(loras);
}
if (gen_params.hires_enabled) {
root["hires"] = {
{"enabled", gen_params.hires_enabled},
{"upscaler", gen_params.hires_upscaler},
{"model", gen_params.hires_upscaler_model_path.empty() ? "" : sd_basename(gen_params.hires_upscaler_model_path)},
{"scale", gen_params.hires_scale},
{"target_width", gen_params.hires_width},
{"target_height", gen_params.hires_height},
{"steps", gen_params.hires_steps},
{"denoising_strength", gen_params.hires_denoising_strength},
{"upscale_tile_size", gen_params.hires_upscale_tile_size},
};
}
if (gen_params.cache_params.mode != SD_CACHE_DISABLED) {
root["cache"] = {
{"requested_mode", gen_params.cache_mode},
{"requested_option", gen_params.cache_option},
{"mode", gen_params.cache_params.mode},
{"scm_mask", gen_params.scm_mask},
{"scm_policy_dynamic", gen_params.scm_policy_dynamic},
{"reuse_threshold", gen_params.cache_params.reuse_threshold},
{"start_percent", gen_params.cache_params.start_percent},
{"end_percent", gen_params.cache_params.end_percent},
{"error_decay_rate", gen_params.cache_params.error_decay_rate},
{"use_relative_threshold", gen_params.cache_params.use_relative_threshold},
{"reset_error_on_compute", gen_params.cache_params.reset_error_on_compute},
{"Fn_compute_blocks", gen_params.cache_params.Fn_compute_blocks},
{"Bn_compute_blocks", gen_params.cache_params.Bn_compute_blocks},
{"residual_diff_threshold", gen_params.cache_params.residual_diff_threshold},
{"max_warmup_steps", gen_params.cache_params.max_warmup_steps},
{"max_cached_steps", gen_params.cache_params.max_cached_steps},
{"max_continuous_cached_steps", gen_params.cache_params.max_continuous_cached_steps},
{"taylorseer_n_derivatives", gen_params.cache_params.taylorseer_n_derivatives},
{"taylorseer_skip_interval", gen_params.cache_params.taylorseer_skip_interval},
{"spectrum_w", gen_params.cache_params.spectrum_w},
{"spectrum_m", gen_params.cache_params.spectrum_m},
{"spectrum_lam", gen_params.cache_params.spectrum_lam},
{"spectrum_window_size", gen_params.cache_params.spectrum_window_size},
{"spectrum_flex_window", gen_params.cache_params.spectrum_flex_window},
{"spectrum_warmup_steps", gen_params.cache_params.spectrum_warmup_steps},
{"spectrum_stop_percent", gen_params.cache_params.spectrum_stop_percent},
};
}
if (gen_params.vae_tiling_params.enabled) {
root["vae_tiling"] = {
{"enabled", gen_params.vae_tiling_params.enabled},
{"tile_size_x", gen_params.vae_tiling_params.tile_size_x},
{"tile_size_y", gen_params.vae_tiling_params.tile_size_y},
{"target_overlap", gen_params.vae_tiling_params.target_overlap},
{"rel_size_x", gen_params.vae_tiling_params.rel_size_x},
{"rel_size_y", gen_params.vae_tiling_params.rel_size_y},
};
}
return root.dump();
}
std::string get_image_params(const SDContextParams& ctx_params,
const SDGenerationParams& gen_params,
int64_t seed,
SDMode mode) {
std::string parameter_string; std::string parameter_string;
if (gen_params.prompt_with_lora.size() != 0) { if (gen_params.prompt_with_lora.size() != 0) {
parameter_string += gen_params.prompt_with_lora + "\n"; parameter_string += gen_params.prompt_with_lora + "\n";
@ -2109,7 +2486,7 @@ std::string get_image_params(const SDContextParams& ctx_params, const SDGenerati
parameter_string += "Steps: " + std::to_string(gen_params.sample_params.sample_steps) + ", "; parameter_string += "Steps: " + std::to_string(gen_params.sample_params.sample_steps) + ", ";
parameter_string += "CFG scale: " + std::to_string(gen_params.sample_params.guidance.txt_cfg) + ", "; parameter_string += "CFG scale: " + std::to_string(gen_params.sample_params.guidance.txt_cfg) + ", ";
if (gen_params.sample_params.guidance.slg.scale != 0 && gen_params.skip_layers.size() != 0) { if (gen_params.sample_params.guidance.slg.scale != 0 && gen_params.skip_layers.size() != 0) {
parameter_string += "SLG scale: " + std::to_string(gen_params.sample_params.guidance.txt_cfg) + ", "; parameter_string += "SLG scale: " + std::to_string(gen_params.sample_params.guidance.slg.scale) + ", ";
parameter_string += "Skip layers: ["; parameter_string += "Skip layers: [";
for (const auto& layer : gen_params.skip_layers) { for (const auto& layer : gen_params.skip_layers) {
parameter_string += std::to_string(layer) + ", "; parameter_string += std::to_string(layer) + ", ";
@ -2154,6 +2531,14 @@ std::string get_image_params(const SDContextParams& ctx_params, const SDGenerati
if (gen_params.clip_skip != -1) { if (gen_params.clip_skip != -1) {
parameter_string += "Clip skip: " + std::to_string(gen_params.clip_skip) + ", "; parameter_string += "Clip skip: " + std::to_string(gen_params.clip_skip) + ", ";
} }
if (gen_params.hires_enabled) {
parameter_string += "Hires upscale: " + gen_params.hires_upscaler + ", ";
parameter_string += "Hires scale: " + std::to_string(gen_params.hires_scale) + ", ";
parameter_string += "Hires resize: " + std::to_string(gen_params.hires_width) + "x" + std::to_string(gen_params.hires_height) + ", ";
parameter_string += "Hires steps: " + std::to_string(gen_params.hires_steps) + ", ";
parameter_string += "Denoising strength: " + std::to_string(gen_params.hires_denoising_strength) + ", ";
}
parameter_string += "Version: stable-diffusion.cpp"; parameter_string += "Version: stable-diffusion.cpp";
parameter_string += ", SDCPP: " + build_sdcpp_image_metadata_json(ctx_params, gen_params, seed, mode);
return parameter_string; return parameter_string;
} }

View File

@ -101,6 +101,7 @@ struct SDContextParams {
sd_type_t wtype = SD_TYPE_COUNT; sd_type_t wtype = SD_TYPE_COUNT;
std::string tensor_type_rules; std::string tensor_type_rules;
std::string lora_model_dir = "."; std::string lora_model_dir = ".";
std::string hires_upscalers_dir;
std::map<std::string, std::string> embedding_map; std::map<std::string, std::string> embedding_map;
std::vector<sd_embedding_t> embedding_vec; std::vector<sd_embedding_t> embedding_vec;
@ -108,6 +109,7 @@ struct SDContextParams {
rng_type_t rng_type = CUDA_RNG; rng_type_t rng_type = CUDA_RNG;
rng_type_t sampler_rng_type = RNG_TYPE_COUNT; rng_type_t sampler_rng_type = RNG_TYPE_COUNT;
bool offload_params_to_cpu = false; bool offload_params_to_cpu = false;
float max_vram = 0.f;
bool enable_mmap = false; bool enable_mmap = false;
bool control_net_cpu = false; bool control_net_cpu = false;
bool clip_on_cpu = false; bool clip_on_cpu = false;
@ -190,12 +192,23 @@ struct SDGenerationParams {
int upscale_repeats = 1; int upscale_repeats = 1;
int upscale_tile_size = 128; int upscale_tile_size = 128;
bool hires_enabled = false;
std::string hires_upscaler = "Latent";
std::string hires_upscaler_model_path;
float hires_scale = 2.f;
int hires_width = 0;
int hires_height = 0;
int hires_steps = 0;
float hires_denoising_strength = 0.7f;
int hires_upscale_tile_size = 128;
std::map<std::string, float> lora_map; std::map<std::string, float> lora_map;
std::map<std::string, float> high_noise_lora_map; std::map<std::string, float> high_noise_lora_map;
// Derived and normalized fields. // Derived and normalized fields.
std::string prompt_with_lora; // for metadata record only std::string prompt_with_lora; // for metadata record only
std::vector<sd_lora_t> lora_vec; std::vector<sd_lora_t> lora_vec;
sd_hires_upscaler_t resolved_hires_upscaler;
// Owned execution payload. // Owned execution payload.
SDImageOwner init_image; SDImageOwner init_image;
@ -225,15 +238,25 @@ struct SDGenerationParams {
void set_width_and_height_if_unset(int w, int h); void set_width_and_height_if_unset(int w, int h);
int get_resolved_width() const; int get_resolved_width() const;
int get_resolved_height() const; int get_resolved_height() const;
bool resolve(const std::string& lora_model_dir, bool strict = false); bool resolve(const std::string& lora_model_dir, const std::string& hires_upscalers_dir, bool strict = false);
bool validate(SDMode mode); bool validate(SDMode mode);
bool resolve_and_validate(SDMode mode, const std::string& lora_model_dir, bool strict = false); bool resolve_and_validate(SDMode mode,
const std::string& lora_model_dir,
const std::string& hires_upscalers_dir,
bool strict = false);
sd_img_gen_params_t to_sd_img_gen_params_t(); sd_img_gen_params_t to_sd_img_gen_params_t();
sd_vid_gen_params_t to_sd_vid_gen_params_t(); sd_vid_gen_params_t to_sd_vid_gen_params_t();
std::string to_string() const; std::string to_string() const;
}; };
std::string version_string(); std::string version_string();
std::string get_image_params(const SDContextParams& ctx_params, const SDGenerationParams& gen_params, int64_t seed); std::string build_sdcpp_image_metadata_json(const SDContextParams& ctx_params,
const SDGenerationParams& gen_params,
int64_t seed,
SDMode mode = IMG_GEN);
std::string get_image_params(const SDContextParams& ctx_params,
const SDGenerationParams& gen_params,
int64_t seed,
SDMode mode = IMG_GEN);
#endif // __EXAMPLES_COMMON_COMMON_H__ #endif // __EXAMPLES_COMMON_COMMON_H__

View File

@ -95,6 +95,57 @@ using WebPMuxPtr = std::unique_ptr<WebPMux, WebPMuxDeleter>;
using WebPAnimEncoderPtr = std::unique_ptr<WebPAnimEncoder, WebPAnimEncoderDeleter>; using WebPAnimEncoderPtr = std::unique_ptr<WebPAnimEncoder, WebPAnimEncoderDeleter>;
#endif #endif
#ifdef SD_USE_WEBM
class MemoryMkvWriter : public mkvmuxer::IMkvWriter {
public:
mkvmuxer::int32 Write(const void* buf, mkvmuxer::uint32 len) override {
if (buf == nullptr && len > 0) {
return -1;
}
const size_t end_pos = position_ + static_cast<size_t>(len);
if (end_pos > data_.size()) {
data_.resize(end_pos);
}
if (len > 0) {
memcpy(data_.data() + position_, buf, len);
}
position_ = end_pos;
return 0;
}
mkvmuxer::int64 Position() const override {
return static_cast<mkvmuxer::int64>(position_);
}
mkvmuxer::int32 Position(mkvmuxer::int64 position) override {
if (position < 0) {
return -1;
}
const size_t target = static_cast<size_t>(position);
if (target > data_.size()) {
data_.resize(target);
}
position_ = target;
return 0;
}
bool Seekable() const override {
return true;
}
void ElementStartNotify(mkvmuxer::uint64, mkvmuxer::int64) override {
}
const std::vector<uint8_t>& data() const {
return data_;
}
private:
std::vector<uint8_t> data_;
size_t position_ = 0;
};
#endif
bool read_binary_file_bytes(const char* path, std::vector<uint8_t>& data) { bool read_binary_file_bytes(const char* path, std::vector<uint8_t>& data) {
std::ifstream fin(fs::path(path), std::ios::binary); std::ifstream fin(fs::path(path), std::ios::binary);
if (!fin) { if (!fin) {
@ -570,6 +621,32 @@ void write_u16_le(FILE* f, uint16_t val) {
fwrite(&val, 2, 1, f); fwrite(&val, 2, 1, f);
} }
void write_u32_le(std::vector<uint8_t>& data, uint32_t val) {
data.push_back(static_cast<uint8_t>(val & 0xFF));
data.push_back(static_cast<uint8_t>((val >> 8) & 0xFF));
data.push_back(static_cast<uint8_t>((val >> 16) & 0xFF));
data.push_back(static_cast<uint8_t>((val >> 24) & 0xFF));
}
void write_u16_le(std::vector<uint8_t>& data, uint16_t val) {
data.push_back(static_cast<uint8_t>(val & 0xFF));
data.push_back(static_cast<uint8_t>((val >> 8) & 0xFF));
}
void patch_u32_le(std::vector<uint8_t>& data, size_t offset, uint32_t val) {
if (offset + 4 > data.size()) {
return;
}
data[offset + 0] = static_cast<uint8_t>(val & 0xFF);
data[offset + 1] = static_cast<uint8_t>((val >> 8) & 0xFF);
data[offset + 2] = static_cast<uint8_t>((val >> 16) & 0xFF);
data[offset + 3] = static_cast<uint8_t>((val >> 24) & 0xFF);
}
void write_fourcc(std::vector<uint8_t>& data, const char* fourcc) {
data.insert(data.end(), fourcc, fourcc + 4);
}
EncodedImageFormat encoded_image_format_from_path(const std::string& path) { EncodedImageFormat encoded_image_format_from_path(const std::string& path) {
std::string ext = fs::path(path).extension().string(); std::string ext = fs::path(path).extension().string();
std::transform(ext.begin(), ext.end(), ext.begin(), ::tolower); std::transform(ext.begin(), ext.end(), ext.begin(), ::tolower);
@ -699,95 +776,96 @@ uint8_t* load_image_from_memory(const char* image_bytes,
return load_image_common(true, image_bytes, len, width, height, expected_width, expected_height, expected_channel); return load_image_common(true, image_bytes, len, width, height, expected_width, expected_height, expected_channel);
} }
int create_mjpg_avi_from_sd_images(const char* filename, sd_image_t* images, int num_images, int fps, int quality) { std::vector<uint8_t> create_mjpg_avi_from_sd_images_to_vector(sd_image_t* images, int num_images, int fps, int quality) {
if (num_images == 0) { if (num_images == 0) {
fprintf(stderr, "Error: Image array is empty.\n"); fprintf(stderr, "Error: Image array is empty.\n");
return -1; return {};
} }
FilePtr file(fopen(filename, "wb"));
if (!file) {
perror("Error opening file for writing");
return -1;
}
FILE* f = file.get();
uint32_t width = images[0].width; uint32_t width = images[0].width;
uint32_t height = images[0].height; uint32_t height = images[0].height;
uint32_t channels = images[0].channel; uint32_t channels = images[0].channel;
if (channels != 3 && channels != 4) { if (channels != 3 && channels != 4) {
fprintf(stderr, "Error: Unsupported channel count: %u\n", channels); fprintf(stderr, "Error: Unsupported channel count: %u\n", channels);
return -1; return {};
} }
fwrite("RIFF", 4, 1, f); // stb_image_write changes JPEG sampling behavior above quality 90.
long riff_size_pos = ftell(f); // MJPG AVI playback is more compatible when we keep the encoder on the
write_u32_le(f, 0); // <= 90 path.
fwrite("AVI ", 4, 1, f); const int mjpg_quality = std::clamp(quality, 1, 90);
fwrite("LIST", 4, 1, f); std::vector<uint8_t> avi_data;
write_u32_le(f, 4 + 8 + 56 + 8 + 4 + 8 + 56 + 8 + 40); avi_data.reserve(static_cast<size_t>(num_images) * 1024);
fwrite("hdrl", 4, 1, f);
fwrite("avih", 4, 1, f); write_fourcc(avi_data, "RIFF");
write_u32_le(f, 56); const size_t riff_size_pos = avi_data.size();
write_u32_le(f, 1000000 / fps); write_u32_le(avi_data, 0);
write_u32_le(f, 0); write_fourcc(avi_data, "AVI ");
write_u32_le(f, 0);
write_u32_le(f, 0x110);
write_u32_le(f, num_images);
write_u32_le(f, 0);
write_u32_le(f, 1);
write_u32_le(f, width * height * 3);
write_u32_le(f, width);
write_u32_le(f, height);
write_u32_le(f, 0);
write_u32_le(f, 0);
write_u32_le(f, 0);
write_u32_le(f, 0);
fwrite("LIST", 4, 1, f); write_fourcc(avi_data, "LIST");
write_u32_le(f, 4 + 8 + 56 + 8 + 40); write_u32_le(avi_data, 4 + 8 + 56 + 8 + 4 + 8 + 56 + 8 + 40);
fwrite("strl", 4, 1, f); write_fourcc(avi_data, "hdrl");
fwrite("strh", 4, 1, f); write_fourcc(avi_data, "avih");
write_u32_le(f, 56); write_u32_le(avi_data, 56);
fwrite("vids", 4, 1, f); write_u32_le(avi_data, 1000000 / fps);
fwrite("MJPG", 4, 1, f); write_u32_le(avi_data, 0);
write_u32_le(f, 0); write_u32_le(avi_data, 0);
write_u16_le(f, 0); write_u32_le(avi_data, 0x110);
write_u16_le(f, 0); write_u32_le(avi_data, num_images);
write_u32_le(f, 0); write_u32_le(avi_data, 0);
write_u32_le(f, 1); write_u32_le(avi_data, 1);
write_u32_le(f, fps); write_u32_le(avi_data, width * height * 3);
write_u32_le(f, 0); write_u32_le(avi_data, width);
write_u32_le(f, num_images); write_u32_le(avi_data, height);
write_u32_le(f, width * height * 3); write_u32_le(avi_data, 0);
write_u32_le(f, (uint32_t)-1); write_u32_le(avi_data, 0);
write_u32_le(f, 0); write_u32_le(avi_data, 0);
write_u16_le(f, 0); write_u32_le(avi_data, 0);
write_u16_le(f, 0);
write_u16_le(f, 0);
write_u16_le(f, 0);
fwrite("strf", 4, 1, f); write_fourcc(avi_data, "LIST");
write_u32_le(f, 40); write_u32_le(avi_data, 4 + 8 + 56 + 8 + 40);
write_u32_le(f, 40); write_fourcc(avi_data, "strl");
write_u32_le(f, width);
write_u32_le(f, height);
write_u16_le(f, 1);
write_u16_le(f, 24);
fwrite("MJPG", 4, 1, f);
write_u32_le(f, width * height * 3);
write_u32_le(f, 0);
write_u32_le(f, 0);
write_u32_le(f, 0);
write_u32_le(f, 0);
fwrite("LIST", 4, 1, f); write_fourcc(avi_data, "strh");
long movi_size_pos = ftell(f); write_u32_le(avi_data, 56);
write_u32_le(f, 0); write_fourcc(avi_data, "vids");
fwrite("movi", 4, 1, f); write_fourcc(avi_data, "MJPG");
write_u32_le(avi_data, 0);
write_u16_le(avi_data, 0);
write_u16_le(avi_data, 0);
write_u32_le(avi_data, 0);
write_u32_le(avi_data, 1);
write_u32_le(avi_data, fps);
write_u32_le(avi_data, 0);
write_u32_le(avi_data, num_images);
write_u32_le(avi_data, width * height * 3);
write_u32_le(avi_data, static_cast<uint32_t>(-1));
write_u32_le(avi_data, 0);
write_u16_le(avi_data, 0);
write_u16_le(avi_data, 0);
write_u16_le(avi_data, 0);
write_u16_le(avi_data, 0);
write_fourcc(avi_data, "strf");
write_u32_le(avi_data, 40);
write_u32_le(avi_data, 40);
write_u32_le(avi_data, width);
write_u32_le(avi_data, height);
write_u16_le(avi_data, 1);
write_u16_le(avi_data, 24);
write_fourcc(avi_data, "MJPG");
write_u32_le(avi_data, width * height * 3);
write_u32_le(avi_data, 0);
write_u32_le(avi_data, 0);
write_u32_le(avi_data, 0);
write_u32_le(avi_data, 0);
write_fourcc(avi_data, "LIST");
const size_t movi_size_pos = avi_data.size();
write_u32_le(avi_data, 0);
write_fourcc(avi_data, "movi");
std::vector<avi_index_entry> index(static_cast<size_t>(num_images)); std::vector<avi_index_entry> index(static_cast<size_t>(num_images));
std::vector<uint8_t> jpeg_data; std::vector<uint8_t> jpeg_data;
@ -801,55 +879,61 @@ int create_mjpg_avi_from_sd_images(const char* filename, sd_image_t* images, int
buffer->insert(buffer->end(), src, src + size); buffer->insert(buffer->end(), src, src + size);
}; };
if (!stbi_write_jpg_to_func(write_to_buf, &jpeg_data, images[i].width, images[i].height, channels, images[i].data, quality)) { if (!stbi_write_jpg_to_func(write_to_buf, &jpeg_data, images[i].width, images[i].height, channels, images[i].data, mjpg_quality)) {
fprintf(stderr, "Error: Failed to encode JPEG frame.\n"); fprintf(stderr, "Error: Failed to encode JPEG frame.\n");
return -1; return {};
} }
fwrite("00dc", 4, 1, f); index[i].offset = static_cast<uint32_t>(avi_data.size());
write_u32_le(f, (uint32_t)jpeg_data.size()); write_fourcc(avi_data, "00dc");
index[i].offset = ftell(f) - 8; write_u32_le(avi_data, static_cast<uint32_t>(jpeg_data.size()));
index[i].size = (uint32_t)jpeg_data.size(); index[i].size = (uint32_t)jpeg_data.size();
fwrite(jpeg_data.data(), 1, jpeg_data.size(), f); avi_data.insert(avi_data.end(), jpeg_data.begin(), jpeg_data.end());
if (jpeg_data.size() % 2) { if (jpeg_data.size() % 2) {
fputc(0, f); avi_data.push_back(0);
} }
} }
long cur_pos = ftell(f); const size_t movi_size = avi_data.size() - movi_size_pos - 4;
long movi_size = cur_pos - movi_size_pos - 4; patch_u32_le(avi_data, movi_size_pos, static_cast<uint32_t>(movi_size));
fseek(f, movi_size_pos, SEEK_SET);
write_u32_le(f, movi_size);
fseek(f, cur_pos, SEEK_SET);
fwrite("idx1", 4, 1, f); write_fourcc(avi_data, "idx1");
write_u32_le(f, num_images * 16); write_u32_le(avi_data, num_images * 16);
for (int i = 0; i < num_images; i++) { for (int i = 0; i < num_images; i++) {
fwrite("00dc", 4, 1, f); write_fourcc(avi_data, "00dc");
write_u32_le(f, 0x10); write_u32_le(avi_data, 0x10);
write_u32_le(f, index[i].offset); write_u32_le(avi_data, index[i].offset);
write_u32_le(f, index[i].size); write_u32_le(avi_data, index[i].size);
} }
cur_pos = ftell(f); const size_t file_size = avi_data.size() - riff_size_pos - 4;
long file_size = cur_pos - riff_size_pos - 4; patch_u32_le(avi_data, riff_size_pos, static_cast<uint32_t>(file_size));
fseek(f, riff_size_pos, SEEK_SET);
write_u32_le(f, file_size);
fseek(f, cur_pos, SEEK_SET);
return avi_data;
}
int create_mjpg_avi_from_sd_images(const char* filename, sd_image_t* images, int num_images, int fps, int quality) {
std::vector<uint8_t> avi_data = create_mjpg_avi_from_sd_images_to_vector(images, num_images, fps, quality);
if (avi_data.empty()) {
return -1;
}
if (!write_binary_file_bytes(filename, avi_data)) {
perror("Error opening file for writing");
return -1;
}
return 0; return 0;
} }
#ifdef SD_USE_WEBP #ifdef SD_USE_WEBP
int create_animated_webp_from_sd_images(const char* filename, sd_image_t* images, int num_images, int fps, int quality) { std::vector<uint8_t> create_animated_webp_from_sd_images_to_vector(sd_image_t* images, int num_images, int fps, int quality) {
if (num_images == 0) { if (num_images == 0) {
fprintf(stderr, "Error: Image array is empty.\n"); fprintf(stderr, "Error: Image array is empty.\n");
return -1; return {};
} }
if (fps <= 0) { if (fps <= 0) {
fprintf(stderr, "Error: FPS must be positive.\n"); fprintf(stderr, "Error: FPS must be positive.\n");
return -1; return {};
} }
const int width = static_cast<int>(images[0].width); const int width = static_cast<int>(images[0].width);
@ -857,14 +941,14 @@ int create_animated_webp_from_sd_images(const char* filename, sd_image_t* images
const int channels = static_cast<int>(images[0].channel); const int channels = static_cast<int>(images[0].channel);
if (channels != 1 && channels != 3 && channels != 4) { if (channels != 1 && channels != 3 && channels != 4) {
fprintf(stderr, "Error: Unsupported channel count: %d\n", channels); fprintf(stderr, "Error: Unsupported channel count: %d\n", channels);
return -1; return {};
} }
WebPAnimEncoderOptions anim_options; WebPAnimEncoderOptions anim_options;
WebPConfig config; WebPConfig config;
if (!WebPAnimEncoderOptionsInit(&anim_options) || !WebPConfigInit(&config)) { if (!WebPAnimEncoderOptionsInit(&anim_options) || !WebPConfigInit(&config)) {
fprintf(stderr, "Error: Failed to initialize WebP animation encoder.\n"); fprintf(stderr, "Error: Failed to initialize WebP animation encoder.\n");
return -1; return {};
} }
config.quality = static_cast<float>(quality); config.quality = static_cast<float>(quality);
@ -875,13 +959,13 @@ int create_animated_webp_from_sd_images(const char* filename, sd_image_t* images
} }
if (!WebPValidateConfig(&config)) { if (!WebPValidateConfig(&config)) {
fprintf(stderr, "Error: Invalid WebP encoder configuration.\n"); fprintf(stderr, "Error: Invalid WebP encoder configuration.\n");
return -1; return {};
} }
WebPAnimEncoderPtr enc(WebPAnimEncoderNew(width, height, &anim_options)); WebPAnimEncoderPtr enc(WebPAnimEncoderNew(width, height, &anim_options));
if (enc == nullptr) { if (enc == nullptr) {
fprintf(stderr, "Error: Could not create WebPAnimEncoder object.\n"); fprintf(stderr, "Error: Could not create WebPAnimEncoder object.\n");
return -1; return {};
} }
const int frame_duration_ms = std::max(1, static_cast<int>(std::lround(1000.0 / static_cast<double>(fps)))); const int frame_duration_ms = std::max(1, static_cast<int>(std::lround(1000.0 / static_cast<double>(fps))));
@ -891,13 +975,13 @@ int create_animated_webp_from_sd_images(const char* filename, sd_image_t* images
const sd_image_t& image = images[i]; const sd_image_t& image = images[i];
if (static_cast<int>(image.width) != width || static_cast<int>(image.height) != height) { if (static_cast<int>(image.width) != width || static_cast<int>(image.height) != height) {
fprintf(stderr, "Error: Frame dimensions do not match.\n"); fprintf(stderr, "Error: Frame dimensions do not match.\n");
return -1; return {};
} }
WebPPictureGuard picture; WebPPictureGuard picture;
if (!picture.initialized) { if (!picture.initialized) {
fprintf(stderr, "Error: Failed to initialize WebPPicture.\n"); fprintf(stderr, "Error: Failed to initialize WebPPicture.\n");
return -1; return {};
} }
picture.picture.use_argb = 1; picture.picture.use_argb = 1;
picture.picture.width = width; picture.picture.width = width;
@ -921,12 +1005,12 @@ int create_animated_webp_from_sd_images(const char* filename, sd_image_t* images
if (!picture_ok) { if (!picture_ok) {
fprintf(stderr, "Error: Failed to import frame into WebPPicture.\n"); fprintf(stderr, "Error: Failed to import frame into WebPPicture.\n");
return -1; return {};
} }
if (!WebPAnimEncoderAdd(enc.get(), &picture.picture, timestamp_ms, &config)) { if (!WebPAnimEncoderAdd(enc.get(), &picture.picture, timestamp_ms, &config)) {
fprintf(stderr, "Error: Failed to add frame to animated WebP: %s\n", WebPAnimEncoderGetError(enc.get())); fprintf(stderr, "Error: Failed to add frame to animated WebP: %s\n", WebPAnimEncoderGetError(enc.get()));
return -1; return {};
} }
timestamp_ms += frame_duration_ms; timestamp_ms += frame_duration_ms;
@ -934,52 +1018,50 @@ int create_animated_webp_from_sd_images(const char* filename, sd_image_t* images
if (!WebPAnimEncoderAdd(enc.get(), nullptr, timestamp_ms, nullptr)) { if (!WebPAnimEncoderAdd(enc.get(), nullptr, timestamp_ms, nullptr)) {
fprintf(stderr, "Error: Failed to finalize animated WebP frames: %s\n", WebPAnimEncoderGetError(enc.get())); fprintf(stderr, "Error: Failed to finalize animated WebP frames: %s\n", WebPAnimEncoderGetError(enc.get()));
return -1; return {};
} }
WebPDataGuard webp_data; WebPDataGuard webp_data;
if (!WebPAnimEncoderAssemble(enc.get(), &webp_data.data)) { if (!WebPAnimEncoderAssemble(enc.get(), &webp_data.data)) {
fprintf(stderr, "Error: Failed to assemble animated WebP: %s\n", WebPAnimEncoderGetError(enc.get())); fprintf(stderr, "Error: Failed to assemble animated WebP: %s\n", WebPAnimEncoderGetError(enc.get()));
return -1; return {};
} }
FilePtr f(fopen(filename, "wb")); return std::vector<uint8_t>(webp_data.data.bytes, webp_data.data.bytes + webp_data.data.size);
if (!f) { }
int create_animated_webp_from_sd_images(const char* filename, sd_image_t* images, int num_images, int fps, int quality) {
std::vector<uint8_t> webp_data = create_animated_webp_from_sd_images_to_vector(images, num_images, fps, quality);
if (webp_data.empty()) {
return -1;
}
if (!write_binary_file_bytes(filename, webp_data)) {
perror("Error opening file for writing"); perror("Error opening file for writing");
return -1; return -1;
} }
if (webp_data.data.size > 0 && fwrite(webp_data.data.bytes, 1, webp_data.data.size, f.get()) != webp_data.data.size) {
fprintf(stderr, "Error: Failed to write animated WebP file.\n");
return -1;
}
return 0; return 0;
} }
#endif #endif
#ifdef SD_USE_WEBM #ifdef SD_USE_WEBM
int create_webm_from_sd_images(const char* filename, sd_image_t* images, int num_images, int fps, int quality) { std::vector<uint8_t> create_webm_from_sd_images_to_vector(sd_image_t* images, int num_images, int fps, int quality) {
if (num_images == 0) { if (num_images == 0) {
fprintf(stderr, "Error: Image array is empty.\n"); fprintf(stderr, "Error: Image array is empty.\n");
return -1; return {};
} }
if (fps <= 0) { if (fps <= 0) {
fprintf(stderr, "Error: FPS must be positive.\n"); fprintf(stderr, "Error: FPS must be positive.\n");
return -1; return {};
} }
const int width = static_cast<int>(images[0].width); const int width = static_cast<int>(images[0].width);
const int height = static_cast<int>(images[0].height); const int height = static_cast<int>(images[0].height);
if (width <= 0 || height <= 0) { if (width <= 0 || height <= 0) {
fprintf(stderr, "Error: Invalid frame dimensions.\n"); fprintf(stderr, "Error: Invalid frame dimensions.\n");
return -1; return {};
} }
mkvmuxer::MkvWriter writer; MemoryMkvWriter writer;
if (!writer.Open(filename)) {
fprintf(stderr, "Error: Could not open WebM file for writing.\n");
return -1;
}
const int ret = [&]() -> int { const int ret = [&]() -> int {
mkvmuxer::Segment segment; mkvmuxer::Segment segment;
@ -1045,30 +1127,63 @@ int create_webm_from_sd_images(const char* filename, sd_image_t* images, int num
} }
return 0; return 0;
}(); }();
writer.Close(); if (ret != 0) {
return ret; return {};
}
return writer.data();
}
int create_webm_from_sd_images(const char* filename, sd_image_t* images, int num_images, int fps, int quality) {
std::vector<uint8_t> webm_data = create_webm_from_sd_images_to_vector(images, num_images, fps, quality);
if (webm_data.empty()) {
return -1;
}
if (!write_binary_file_bytes(filename, webm_data)) {
perror("Error opening file for writing");
return -1;
}
return 0;
} }
#endif #endif
std::vector<uint8_t> create_video_from_sd_images_to_vector(const std::string& output_format,
sd_image_t* images,
int num_images,
int fps,
int quality) {
std::string format = output_format;
std::transform(format.begin(), format.end(), format.begin(),
[](unsigned char c) { return static_cast<char>(tolower(c)); });
if (!format.empty() && format[0] == '.') {
format.erase(format.begin());
}
#ifdef SD_USE_WEBM
if (format == "webm") {
return create_webm_from_sd_images_to_vector(images, num_images, fps, quality);
}
#endif
#ifdef SD_USE_WEBP
if (format == "webp") {
return create_animated_webp_from_sd_images_to_vector(images, num_images, fps, quality);
}
#endif
return create_mjpg_avi_from_sd_images_to_vector(images, num_images, fps, quality);
}
int create_video_from_sd_images(const char* filename, sd_image_t* images, int num_images, int fps, int quality) { int create_video_from_sd_images(const char* filename, sd_image_t* images, int num_images, int fps, int quality) {
std::string path = filename ? filename : ""; std::string path = filename ? filename : "";
auto pos = path.find_last_of('.'); auto pos = path.find_last_of('.');
std::string ext = pos == std::string::npos ? "" : path.substr(pos); std::string ext = pos == std::string::npos ? "" : path.substr(pos);
for (char& ch : ext) { std::vector<uint8_t> video_data = create_video_from_sd_images_to_vector(ext, images, num_images, fps, quality);
ch = static_cast<char>(tolower(static_cast<unsigned char>(ch))); if (video_data.empty()) {
return -1;
} }
if (!write_binary_file_bytes(filename, video_data)) {
#ifdef SD_USE_WEBM perror("Error opening file for writing");
if (ext == ".webm") { return -1;
return create_webm_from_sd_images(filename, images, num_images, fps, quality);
} }
#endif return 0;
#ifdef SD_USE_WEBP
if (ext == ".webp") {
return create_animated_webp_from_sd_images(filename, images, num_images, fps, quality);
}
#endif
return create_mjpg_avi_from_sd_images(filename, images, num_images, fps, quality);
} }

View File

@ -58,6 +58,10 @@ int create_mjpg_avi_from_sd_images(const char* filename,
int num_images, int num_images,
int fps, int fps,
int quality = 90); int quality = 90);
std::vector<uint8_t> create_mjpg_avi_from_sd_images_to_vector(sd_image_t* images,
int num_images,
int fps,
int quality = 90);
#ifdef SD_USE_WEBP #ifdef SD_USE_WEBP
int create_animated_webp_from_sd_images(const char* filename, int create_animated_webp_from_sd_images(const char* filename,
@ -65,6 +69,10 @@ int create_animated_webp_from_sd_images(const char* filename,
int num_images, int num_images,
int fps, int fps,
int quality = 90); int quality = 90);
std::vector<uint8_t> create_animated_webp_from_sd_images_to_vector(sd_image_t* images,
int num_images,
int fps,
int quality = 90);
#endif #endif
#ifdef SD_USE_WEBM #ifdef SD_USE_WEBM
@ -73,6 +81,10 @@ int create_webm_from_sd_images(const char* filename,
int num_images, int num_images,
int fps, int fps,
int quality = 90); int quality = 90);
std::vector<uint8_t> create_webm_from_sd_images_to_vector(sd_image_t* images,
int num_images,
int fps,
int quality = 90);
#endif #endif
int create_video_from_sd_images(const char* filename, int create_video_from_sd_images(const char* filename,
@ -80,5 +92,10 @@ int create_video_from_sd_images(const char* filename,
int num_images, int num_images,
int fps, int fps,
int quality = 90); int quality = 90);
std::vector<uint8_t> create_video_from_sd_images_to_vector(const std::string& output_format,
sd_image_t* images,
int num_images,
int fps,
int quality = 90);
#endif // __MEDIA_IO_H__ #endif // __MEDIA_IO_H__

View File

@ -136,7 +136,8 @@ Context Options:
--clip_g <string> path to the clip-g text encoder --clip_g <string> path to the clip-g text encoder
--clip_vision <string> path to the clip-vision encoder --clip_vision <string> path to the clip-vision encoder
--t5xxl <string> path to the t5xxl text encoder --t5xxl <string> path to the t5xxl text encoder
--llm <string> path to the llm text encoder. For example: (qwenvl2.5 for qwen-image, mistral-small3.2 for flux2, ...) --llm <string> path to the llm text encoder. For example: (qwenvl2.5 for qwen-image,
mistral-small3.2 for flux2, ...)
--llm_vision <string> path to the llm vit --llm_vision <string> path to the llm vit
--qwen2vl <string> alias of --llm. Deprecated. --qwen2vl <string> alias of --llm. Deprecated.
--qwen2vl_vision <string> alias of --llm_vision. Deprecated. --qwen2vl_vision <string> alias of --llm_vision. Deprecated.
@ -148,16 +149,18 @@ Context Options:
--control-net <string> path to control net model --control-net <string> path to control net model
--embd-dir <string> embeddings directory --embd-dir <string> embeddings directory
--lora-model-dir <string> lora model directory --lora-model-dir <string> lora model directory
--hires-upscalers-dir <string> highres fix upscaler model directory
--tensor-type-rules <string> weight type per tensor pattern (example: "^vae\.=f16,model\.=q8_0") --tensor-type-rules <string> weight type per tensor pattern (example: "^vae\.=f16,model\.=q8_0")
--photo-maker <string> path to PHOTOMAKER model --photo-maker <string> path to PHOTOMAKER model
--upscale-model <string> path to esrgan model. --upscale-model <string> path to esrgan model.
-t, --threads <int> number of threads to use during computation (default: -1). If threads <= 0, then threads will be set to the number of -t, --threads <int> number of threads to use during computation (default: -1). If threads <= 0,
CPU physical cores then threads will be set to the number of CPU physical cores
--chroma-t5-mask-pad <int> t5 mask pad size of chroma --chroma-t5-mask-pad <int> t5 mask pad size of chroma
--vae-tile-overlap <float> tile overlap for vae tiling, in fraction of tile size (default: 0.5) --max-vram <float> maximum VRAM budget in GiB for graph-cut segmented execution. 0 disables
--vae-tiling process vae in tiles to reduce memory usage graph splitting
--force-sdxl-vae-conv-scale force use of conv scale on sdxl vae --force-sdxl-vae-conv-scale force use of conv scale on sdxl vae
--offload-to-cpu place the weights in RAM to save VRAM, and automatically load them into VRAM when needed --offload-to-cpu place the weights in RAM to save VRAM, and automatically load them into VRAM
when needed
--mmap whether to memory-map model --mmap whether to memory-map model
--control-net-cpu keep controlnet in cpu (for low vram) --control-net-cpu keep controlnet in cpu (for low vram)
--clip-on-cpu keep clip in cpu (for low vram) --clip-on-cpu keep clip in cpu (for low vram)
@ -172,20 +175,19 @@ Context Options:
--chroma-disable-dit-mask disable dit mask for chroma --chroma-disable-dit-mask disable dit mask for chroma
--qwen-image-zero-cond-t enable zero_cond_t for qwen image --qwen-image-zero-cond-t enable zero_cond_t for qwen image
--chroma-enable-t5-mask enable t5 mask for chroma --chroma-enable-t5-mask enable t5 mask for chroma
--type weight type (examples: f32, f16, q4_0, q4_1, q5_0, q5_1, q8_0, q2_K, q3_K, q4_K). If not specified, the default is the --type weight type (examples: f32, f16, q4_0, q4_1, q5_0, q5_1, q8_0, q2_K, q3_K,
type of the weight file q4_K). If not specified, the default is the type of the weight file
--rng RNG, one of [std_default, cuda, cpu], default: cuda(sd-webui), cpu(comfyui) --rng RNG, one of [std_default, cuda, cpu], default: cuda(sd-webui), cpu(comfyui)
--sampler-rng sampler RNG, one of [std_default, cuda, cpu]. If not specified, use --rng --sampler-rng sampler RNG, one of [std_default, cuda, cpu]. If not specified, use --rng
--prediction prediction type override, one of [eps, v, edm_v, sd3_flow, flux_flow, flux2_flow] --prediction prediction type override, one of [eps, v, edm_v, sd3_flow, flux_flow,
--lora-apply-mode the way to apply LoRA, one of [auto, immediately, at_runtime], default is auto. In auto mode, if the model weights flux2_flow]
contain any quantized parameters, the at_runtime mode will be used; otherwise, --lora-apply-mode the way to apply LoRA, one of [auto, immediately, at_runtime], default is
immediately will be used.The immediately mode may have precision and auto. In auto mode, if the model weights contain any quantized parameters,
compatibility issues with quantized parameters, but it usually offers faster inference the at_runtime mode will be used; otherwise, immediately will be used.The
speed and, in some cases, lower memory usage. The at_runtime mode, on the immediately mode may have precision and compatibility issues with quantized
other hand, is exactly the opposite. parameters, but it usually offers faster inference speed and, in some cases,
--vae-tile-size tile size for vae tiling, format [X]x[Y] (default: 32x32) lower memory usage. The at_runtime mode, on the other hand, is exactly the
--vae-relative-tile-size relative tile size for vae tiling, format [X]x[Y], in fraction of image size if < 1, in number of tiles per dim if >=1 opposite.
(overrides --vae-tile-size)
Default Generation Options: Default Generation Options:
-p, --prompt <string> the prompt to render -p, --prompt <string> the prompt to render
@ -194,65 +196,97 @@ Default Generation Options:
--end-img <string> path to the end image, required by flf2v --end-img <string> path to the end image, required by flf2v
--mask <string> path to the mask image --mask <string> path to the mask image
--control-image <string> path to control image, control net --control-image <string> path to control image, control net
--control-video <string> path to control video frames, It must be a directory path. The video frames inside should be stored as images in --control-video <string> path to control video frames, It must be a directory path. The video frames
lexicographical (character) order. For example, if the control video path is inside should be stored as images in lexicographical (character) order. For
`frames`, the directory contain images such as 00.png, 01.png, ... etc. example, if the control video path is `frames`, the directory contain images
such as 00.png, 01.png, ... etc.
--pm-id-images-dir <string> path to PHOTOMAKER input id images dir --pm-id-images-dir <string> path to PHOTOMAKER input id images dir
--pm-id-embed-path <string> path to PHOTOMAKER v2 id embed --pm-id-embed-path <string> path to PHOTOMAKER v2 id embed
--hires-upscaler <string> highres fix upscaler, Lanczos, Nearest, Latent, Latent (nearest), Latent
(nearest-exact), Latent (antialiased), Latent (bicubic), Latent (bicubic
antialiased), or a model name under --hires-upscalers-dir (default: Latent)
-H, --height <int> image height, in pixel space (default: 512) -H, --height <int> image height, in pixel space (default: 512)
-W, --width <int> image width, in pixel space (default: 512) -W, --width <int> image width, in pixel space (default: 512)
--steps <int> number of sample steps (default: 20) --steps <int> number of sample steps (default: 20)
--high-noise-steps <int> (high noise) number of sample steps (default: -1 = auto) --high-noise-steps <int> (high noise) number of sample steps (default: -1 = auto)
--clip-skip <int> ignore last layers of CLIP network; 1 ignores none, 2 ignores one layer (default: -1). <= 0 represents unspecified, --clip-skip <int> ignore last layers of CLIP network; 1 ignores none, 2 ignores one layer
will be 1 for SD1.x, 2 for SD2.x (default: -1). <= 0 represents unspecified, will be 1 for SD1.x, 2 for SD2.x
-b, --batch-count <int> batch count -b, --batch-count <int> batch count
--video-frames <int> video frames (default: 1) --video-frames <int> video frames (default: 1)
--fps <int> fps (default: 24) --fps <int> fps (default: 24)
--timestep-shift <int> shift timestep for NitroFusion models (default: 0). recommended N for NitroSD-Realism around 250 and 500 for --timestep-shift <int> shift timestep for NitroFusion models (default: 0). recommended N for
NitroSD-Vibrant NitroSD-Realism around 250 and 500 for NitroSD-Vibrant
--upscale-repeats <int> Run the ESRGAN upscaler this many times (default: 1) --upscale-repeats <int> Run the ESRGAN upscaler this many times (default: 1)
--upscale-tile-size <int> tile size for ESRGAN upscaling (default: 128) --upscale-tile-size <int> tile size for ESRGAN upscaling (default: 128)
--hires-width <int> highres fix target width, 0 to use --hires-scale (default: 0)
--hires-height <int> highres fix target height, 0 to use --hires-scale (default: 0)
--hires-steps <int> highres fix second pass sample steps, 0 to reuse --steps (default: 0)
--hires-upscale-tile-size <int> highres fix upscaler tile size, reserved for model-backed upscalers (default:
128)
--cfg-scale <float> unconditional guidance scale: (default: 7.0) --cfg-scale <float> unconditional guidance scale: (default: 7.0)
--img-cfg-scale <float> image guidance scale for inpaint or instruct-pix2pix models: (default: same as --cfg-scale) --img-cfg-scale <float> image guidance scale for inpaint or instruct-pix2pix models: (default: same
as --cfg-scale)
--guidance <float> distilled guidance scale for models with guidance input (default: 3.5) --guidance <float> distilled guidance scale for models with guidance input (default: 3.5)
--slg-scale <float> skip layer guidance (SLG) scale, only for DiT models: (default: 0). 0 means disabled, a value of 2.5 is nice for sd3.5 --slg-scale <float> skip layer guidance (SLG) scale, only for DiT models: (default: 0). 0 means
medium disabled, a value of 2.5 is nice for sd3.5 medium
--skip-layer-start <float> SLG enabling point (default: 0.01) --skip-layer-start <float> SLG enabling point (default: 0.01)
--skip-layer-end <float> SLG disabling point (default: 0.2) --skip-layer-end <float> SLG disabling point (default: 0.2)
--eta <float> noise multiplier (default: 0 for ddim_trailing, tcd, res_multistep and res_2s; 1 for euler_a, er_sde and dpm++2s_a) --eta <float> noise multiplier (default: 0 for ddim_trailing, tcd, res_multistep and
res_2s; 1 for euler_a, er_sde and dpm++2s_a)
--flow-shift <float> shift value for Flow models like SD3.x or WAN (default: auto) --flow-shift <float> shift value for Flow models like SD3.x or WAN (default: auto)
--high-noise-cfg-scale <float> (high noise) unconditional guidance scale: (default: 7.0) --high-noise-cfg-scale <float> (high noise) unconditional guidance scale: (default: 7.0)
--high-noise-img-cfg-scale <float> (high noise) image guidance scale for inpaint or instruct-pix2pix models (default: same as --cfg-scale) --high-noise-img-cfg-scale <float> (high noise) image guidance scale for inpaint or instruct-pix2pix models
--high-noise-guidance <float> (high noise) distilled guidance scale for models with guidance input (default: 3.5) (default: same as --cfg-scale)
--high-noise-slg-scale <float> (high noise) skip layer guidance (SLG) scale, only for DiT models: (default: 0) --high-noise-guidance <float> (high noise) distilled guidance scale for models with guidance input
(default: 3.5)
--high-noise-slg-scale <float> (high noise) skip layer guidance (SLG) scale, only for DiT models: (default:
0)
--high-noise-skip-layer-start <float> (high noise) SLG enabling point (default: 0.01) --high-noise-skip-layer-start <float> (high noise) SLG enabling point (default: 0.01)
--high-noise-skip-layer-end <float> (high noise) SLG disabling point (default: 0.2) --high-noise-skip-layer-end <float> (high noise) SLG disabling point (default: 0.2)
--high-noise-eta <float> (high noise) noise multiplier (default: 0 for ddim_trailing, tcd, res_multistep and res_2s; 1 for euler_a, er_sde and dpm++2s_a) --high-noise-eta <float> (high noise) noise multiplier (default: 0 for ddim_trailing, tcd,
res_multistep and res_2s; 1 for euler_a, er_sde and dpm++2s_a)
--strength <float> strength for noising/unnoising (default: 0.75) --strength <float> strength for noising/unnoising (default: 0.75)
--pm-style-strength <float> --pm-style-strength <float>
--control-strength <float> strength to apply Control Net (default: 0.9). 1.0 corresponds to full destruction of information in init image --control-strength <float> strength to apply Control Net (default: 0.9). 1.0 corresponds to full
--moe-boundary <float> timestep boundary for Wan2.2 MoE model. (default: 0.875). Only enabled if `--high-noise-steps` is set to -1 destruction of information in init image
--moe-boundary <float> timestep boundary for Wan2.2 MoE model. (default: 0.875). Only enabled if
`--high-noise-steps` is set to -1
--vace-strength <float> wan vace strength --vace-strength <float> wan vace strength
--increase-ref-index automatically increase the indices of references images based on the order they are listed (starting with 1). --vae-tile-overlap <float> tile overlap for vae tiling, in fraction of tile size (default: 0.5)
--hires-scale <float> highres fix scale when target size is not set (default: 2.0)
--hires-denoising-strength <float> highres fix second pass denoising strength (default: 0.7)
--increase-ref-index automatically increase the indices of references images based on the order
they are listed (starting with 1).
--disable-auto-resize-ref-image disable auto resize of ref images --disable-auto-resize-ref-image disable auto resize of ref images
--disable-image-metadata do not embed generation metadata on image files --disable-image-metadata do not embed generation metadata on image files
--vae-tiling process vae in tiles to reduce memory usage
--hires enable highres fix
-s, --seed RNG seed (default: 42, use random seed for < 0) -s, --seed RNG seed (default: 42, use random seed for < 0)
--sampling-method sampling method, one of [euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m, dpm++2mv2, ipndm, ipndm_v, lcm, ddim_trailing, --sampling-method sampling method, one of [euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m,
tcd, res_multistep, res_2s, er_sde] (default: euler for Flux/SD3/Wan, euler_a dpm++2mv2, ipndm, ipndm_v, lcm, ddim_trailing, tcd, res_multistep, res_2s,
otherwise) er_sde] (default: euler for Flux/SD3/Wan, euler_a otherwise)
--high-noise-sampling-method (high noise) sampling method, one of [euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m, dpm++2mv2, ipndm, ipndm_v, lcm, --high-noise-sampling-method (high noise) sampling method, one of [euler, euler_a, heun, dpm2, dpm++2s_a,
ddim_trailing, tcd, res_multistep, res_2s, er_sde] default: euler for Flux/SD3/Wan, dpm++2m, dpm++2mv2, ipndm, ipndm_v, lcm, ddim_trailing, tcd, res_multistep,
euler_a otherwise res_2s, er_sde] default: euler for Flux/SD3/Wan, euler_a otherwise
--scheduler denoiser sigma scheduler, one of [discrete, karras, exponential, ays, gits, smoothstep, sgm_uniform, simple, --scheduler denoiser sigma scheduler, one of [discrete, karras, exponential, ays, gits,
kl_optimal, lcm, bong_tangent], default: discrete smoothstep, sgm_uniform, simple, kl_optimal, lcm, bong_tangent], default:
--sigmas custom sigma values for the sampler, comma-separated (e.g., "14.61,7.8,3.5,0.0"). discrete
--sigmas custom sigma values for the sampler, comma-separated (e.g.,
"14.61,7.8,3.5,0.0").
--skip-layers layers to skip for SLG steps (default: [7,8,9]) --skip-layers layers to skip for SLG steps (default: [7,8,9])
--high-noise-skip-layers (high noise) layers to skip for SLG steps (default: [7,8,9]) --high-noise-skip-layers (high noise) layers to skip for SLG steps (default: [7,8,9])
-r, --ref-image reference image for Flux Kontext models (can be used multiple times) -r, --ref-image reference image for Flux Kontext models (can be used multiple times)
--cache-mode caching method: 'easycache' (DiT), 'ucache' (UNET), 'dbcache'/'taylorseer'/'cache-dit' (DiT block-level), 'spectrum' (UNET/DiT Chebyshev+Taylor forecasting) --cache-mode caching method: 'easycache' (DiT), 'ucache' (UNET),
'dbcache'/'taylorseer'/'cache-dit' (DiT block-level), 'spectrum' (UNET/DiT
Chebyshev+Taylor forecasting)
--cache-option named cache params (key=value format, comma-separated). easycache/ucache: --cache-option named cache params (key=value format, comma-separated). easycache/ucache:
threshold=,start=,end=,decay=,relative=,reset=; dbcache/taylorseer/cache-dit: Fn=,Bn=,threshold=,warmup=. Examples: threshold=,start=,end=,decay=,relative=,reset=; dbcache/taylorseer/cache-dit:
"threshold=0.25" or "threshold=1.5,reset=0" Fn=,Bn=,threshold=,warmup=; spectrum: w=,m=,lam=,window=,flex=,warmup=,stop=.
--scm-mask SCM steps mask for cache-dit: comma-separated 0/1 (e.g., "1,1,1,0,0,1,0,0,1,0") - 1=compute, 0=can cache Examples: "threshold=0.25" or "threshold=1.5,reset=0"
--scm-mask SCM steps mask for cache-dit: comma-separated 0/1 (e.g.,
"1,1,1,0,0,1,0,0,1,0") - 1=compute, 0=can cache
--scm-policy SCM policy: 'dynamic' (default) or 'static' --scm-policy SCM policy: 'dynamic' (default) or 'static'
--vae-tile-size tile size for vae tiling, format [X]x[Y] (default: 32x32)
--vae-relative-tile-size relative tile size for vae tiling, format [X]x[Y], in fraction of image size
if < 1, in number of tiles per dim if >=1 (overrides --vae-tile-size)
``` ```

View File

@ -9,7 +9,7 @@ The server currently exposes three API families:
- `sdcpp API` under `/sdcpp/v1/...` - `sdcpp API` under `/sdcpp/v1/...`
The `sdcpp API` is the native API surface. The `sdcpp API` is the native API surface.
Its request schema is also the canonical schema for `sd_cpp_extra_args`. Its request schema is the same schema used by `sd_cpp_extra_args`.
Global LoRA rule: Global LoRA rule:
@ -38,6 +38,8 @@ Current generation-related endpoints include:
- `POST /sdapi/v1/txt2img` - `POST /sdapi/v1/txt2img`
- `POST /sdapi/v1/img2img` - `POST /sdapi/v1/img2img`
- `GET /sdapi/v1/loras` - `GET /sdapi/v1/loras`
- `GET /sdapi/v1/upscalers`
- `GET /sdapi/v1/latent-upscale-modes`
- `GET /sdapi/v1/samplers` - `GET /sdapi/v1/samplers`
- `GET /sdapi/v1/schedulers` - `GET /sdapi/v1/schedulers`
- `GET /sdapi/v1/sd-models` - `GET /sdapi/v1/sd-models`
@ -55,8 +57,6 @@ Current endpoints include:
- `POST /sdcpp/v1/jobs/{id}/cancel` - `POST /sdcpp/v1/jobs/{id}/cancel`
- `POST /sdcpp/v1/vid_gen` - `POST /sdcpp/v1/vid_gen`
`POST /sdcpp/v1/vid_gen` is currently exposed but returns `501 Not Implemented`.
## `sd_cpp_extra_args` ## `sd_cpp_extra_args`
`sd_cpp_extra_args` is an extension mechanism for the compatibility APIs. `sd_cpp_extra_args` is an extension mechanism for the compatibility APIs.
@ -79,12 +79,12 @@ Behavior:
- The JSON block is parsed using the same field rules as the `sdcpp API`. - The JSON block is parsed using the same field rules as the `sdcpp API`.
- The block is removed from the final prompt before generation. - The block is removed from the final prompt before generation.
Intended use: Supported use:
- extend `OpenAI API` requests with native `stable-diffusion.cpp` controls - extend `OpenAI API` requests with native `stable-diffusion.cpp` controls
- extend `sdapi` requests with native `stable-diffusion.cpp` controls - extend `sdapi` requests with native `stable-diffusion.cpp` controls
Not intended use: Unsupported use:
- do not use `sd_cpp_extra_args` with `/sdcpp/v1/*` - do not use `sd_cpp_extra_args` with `/sdcpp/v1/*`
@ -218,6 +218,13 @@ Currently supported request fields:
| `scheduler` | `string` | Scheduler name | | `scheduler` | `string` | Scheduler name |
| `lora` | `array<object>` | Structured LoRA list | | `lora` | `array<object>` | Structured LoRA list |
| `extra_images` | `array<string>` | Base64 or data URL images | | `extra_images` | `array<string>` | Base64 or data URL images |
| `enable_hr` | `boolean` | Enable highres fix for `txt2img` |
| `hr_upscaler` | `string` | `Lanczos`, `Nearest`, a latent mode such as `Latent (nearest-exact)`, or an upscaler model name from `/sdapi/v1/upscalers` |
| `hr_scale` | `number` | Highres scale when resize target is not set |
| `hr_resize_x` | `integer` | Highres target width, `0` to use scale |
| `hr_resize_y` | `integer` | Highres target height, `0` to use scale |
| `hr_steps` | `integer` | Highres second-pass sample steps, `0` to reuse `steps` |
| `denoising_strength` | `number` | Highres denoising strength for `txt2img` |
Native extension fields: Native extension fields:
@ -243,6 +250,8 @@ Currently supported request fields:
| `inpainting_mask_invert` | `integer` or `boolean` | Treated as invert flag | | `inpainting_mask_invert` | `integer` or `boolean` | Treated as invert flag |
| `denoising_strength` | `number` | Clamped to `0.0..1.0` | | `denoising_strength` | `number` | Clamped to `0.0..1.0` |
Highres fix fields are currently handled for `txt2img`; `img2img` uses `denoising_strength` as image-to-image strength.
Native extension fields: Native extension fields:
- any `sdcpp API` fields embedded through `sd_cpp_extra_args` inside `prompt` - any `sdcpp API` fields embedded through `sd_cpp_extra_args` inside `prompt`
@ -260,6 +269,8 @@ Response fields:
Currently exposed: Currently exposed:
- `GET /sdapi/v1/loras` - `GET /sdapi/v1/loras`
- `GET /sdapi/v1/upscalers`
- `GET /sdapi/v1/latent-upscale-modes`
- `GET /sdapi/v1/samplers` - `GET /sdapi/v1/samplers`
- `GET /sdapi/v1/schedulers` - `GET /sdapi/v1/schedulers`
- `GET /sdapi/v1/sd-models` - `GET /sdapi/v1/sd-models`
@ -274,6 +285,26 @@ Response fields:
| `[].name` | `string` | Display name derived from file stem | | `[].name` | `string` | Display name derived from file stem |
| `[].path` | `string` | Relative path under the configured LoRA directory | | `[].path` | `string` | Relative path under the configured LoRA directory |
`GET /sdapi/v1/upscalers`
| Field | Type | Notes |
| --- | --- | --- |
| `[].name` | `string` | Built-in name or model stem |
| `[].model_name` | `string \| null` | Model family label for model-backed upscalers |
| `[].model_path` | `string \| null` | Absolute model path for model-backed upscalers |
| `[].model_url` | `string \| null` | Currently always null |
| `[].scale` | `integer` | Currently `4` |
Built-in entries include `None`, `Lanczos`, and `Nearest`. Model-backed entries are scanned from the top level of `--hires-upscalers-dir`; subdirectories are not scanned.
`GET /sdapi/v1/latent-upscale-modes`
| Field | Type | Notes |
| --- | --- | --- |
| `[].name` | `string` | WebUI-compatible latent upscale mode name |
Built-in latent modes include `Latent`, `Latent (nearest)`, `Latent (nearest-exact)`, `Latent (antialiased)`, `Latent (bicubic)`, and `Latent (bicubic antialiased)`.
`GET /sdapi/v1/samplers` `GET /sdapi/v1/samplers`
| Field | Type | Notes | | Field | Type | Notes |
@ -372,20 +403,26 @@ Field types:
Returns frontend-friendly capability metadata. Returns frontend-friendly capability metadata.
Typical contents: The mode-aware fields are the primary interface. The top-level compatibility fields are deprecated mirrors kept for older clients.
| Field | Type | Top-level fields:
| --- | --- |
| `model` | `object` |
| `defaults` | `object` |
| `loras` | `array<object>` |
| `samplers` | `array<string>` |
| `schedulers` | `array<string>` |
| `output_formats` | `array<string>` |
| `limits` | `object` |
| `features` | `object` |
Nested fields currently returned: | Field | Type | Notes |
| --- | --- | --- |
| `model` | `object` | Loaded model metadata |
| `current_mode` | `string` | The native generation mode mirrored by top-level compatibility fields |
| `supported_modes` | `array<string>` | Supported native modes such as `img_gen` or `vid_gen` |
| `defaults` | `object` | Deprecated compatibility mirror of `defaults_by_mode[current_mode]` |
| `output_formats` | `array<string>` | Deprecated compatibility mirror of `output_formats_by_mode[current_mode]` |
| `features` | `object` | Deprecated compatibility mirror of `features_by_mode[current_mode]` |
| `defaults_by_mode` | `object` | Explicit defaults for each supported mode |
| `output_formats_by_mode` | `object` | Explicit output formats for each supported mode |
| `features_by_mode` | `object` | Explicit feature flags for each supported mode |
| `samplers` | `array<string>` | Available sampling methods |
| `schedulers` | `array<string>` | Available schedulers |
| `loras` | `array<object>` | Available LoRA entries |
| `upscalers` | `array<object>` | Available model-backed highres upscalers |
| `limits` | `object` | Shared queue and size limits |
`model` `model`
@ -395,50 +432,24 @@ Nested fields currently returned:
| `model.stem` | `string` | | `model.stem` | `string` |
| `model.path` | `string` | | `model.path` | `string` |
`defaults` Compatibility rules:
- `defaults`, `output_formats`, and `features` are deprecated compatibility mirrors
- those three top-level fields always mirror `current_mode`
- `supported_modes`, `defaults_by_mode`, `output_formats_by_mode`, and `features_by_mode` are the mode-aware fields
Mode-aware objects:
| Field | Type | | Field | Type |
| --- | --- | | --- | --- |
| `defaults.prompt` | `string` | | `defaults_by_mode.img_gen` | `object` |
| `defaults.negative_prompt` | `string` | | `defaults_by_mode.vid_gen` | `object` |
| `defaults.clip_skip` | `integer` | | `output_formats_by_mode.img_gen` | `array<string>` |
| `defaults.width` | `integer` | | `output_formats_by_mode.vid_gen` | `array<string>` |
| `defaults.height` | `integer` | | `features_by_mode.img_gen` | `object` |
| `defaults.strength` | `number` | | `features_by_mode.vid_gen` | `object` |
| `defaults.seed` | `integer` |
| `defaults.batch_count` | `integer` | Shared nested fields:
| `defaults.auto_resize_ref_image` | `boolean` |
| `defaults.increase_ref_index` | `boolean` |
| `defaults.control_strength` | `number` |
| `defaults.sample_params` | `object` |
| `defaults.sample_params.scheduler` | `string` |
| `defaults.sample_params.sample_method` | `string` |
| `defaults.sample_params.sample_steps` | `integer` |
| `defaults.sample_params.eta` | `number \| null` |
| `defaults.sample_params.shifted_timestep` | `integer` |
| `defaults.sample_params.flow_shift` | `number \| null` |
| `defaults.sample_params.guidance` | `object` |
| `defaults.sample_params.guidance.txt_cfg` | `number` |
| `defaults.sample_params.guidance.img_cfg` | `number \| null` |
| `defaults.sample_params.guidance.distilled_guidance` | `number` |
| `defaults.sample_params.guidance.slg` | `object` |
| `defaults.sample_params.guidance.slg.layers` | `array<integer>` |
| `defaults.sample_params.guidance.slg.layer_start` | `number` |
| `defaults.sample_params.guidance.slg.layer_end` | `number` |
| `defaults.sample_params.guidance.slg.scale` | `number` |
| `defaults.vae_tiling_params` | `object` |
| `defaults.vae_tiling_params.enabled` | `boolean` |
| `defaults.vae_tiling_params.tile_size_x` | `integer` |
| `defaults.vae_tiling_params.tile_size_y` | `integer` |
| `defaults.vae_tiling_params.target_overlap` | `number` |
| `defaults.vae_tiling_params.rel_size_x` | `number` |
| `defaults.vae_tiling_params.rel_size_y` | `number` |
| `defaults.cache_mode` | `string` |
| `defaults.cache_option` | `string` |
| `defaults.scm_mask` | `string` |
| `defaults.scm_policy_dynamic` | `boolean` |
| `defaults.output_format` | `string` |
| `defaults.output_compression` | `integer` |
`loras` `loras`
@ -447,6 +458,14 @@ Nested fields currently returned:
| `loras[].name` | `string` | | `loras[].name` | `string` |
| `loras[].path` | `string` | | `loras[].path` | `string` |
`upscalers`
| Field | Type | Notes |
| --- | --- | --- |
| `upscalers[].name` | `string` | Built-in name or model stem; use this value in `hires.upscaler` |
Built-in entries include `None`, `Lanczos`, `Nearest`, `Latent`, `Latent (nearest)`, `Latent (nearest-exact)`, `Latent (antialiased)`, `Latent (bicubic)`, and `Latent (bicubic antialiased)`. Model-backed entries are scanned from the top level of `--hires-upscalers-dir`; subdirectories are not scanned.
`limits` `limits`
| Field | Type | | Field | Type |
@ -458,19 +477,110 @@ Nested fields currently returned:
| `limits.max_batch_count` | `integer` | | `limits.max_batch_count` | `integer` |
| `limits.max_queue_size` | `integer` | | `limits.max_queue_size` | `integer` |
`features` Shared default fields used by both `img_gen` and `vid_gen`:
| Field | Type | | Field | Type |
| --- | --- | | --- | --- |
| `features.init_image` | `boolean` | | `prompt` | `string` |
| `features.mask_image` | `boolean` | | `negative_prompt` | `string` |
| `features.control_image` | `boolean` | | `clip_skip` | `integer` |
| `features.ref_images` | `boolean` | | `width` | `integer` |
| `features.lora` | `boolean` | | `height` | `integer` |
| `features.vae_tiling` | `boolean` | | `strength` | `number` |
| `features.cache` | `boolean` | | `seed` | `integer` |
| `features.cancel_queued` | `boolean` | | `sample_params` | `object` |
| `features.cancel_generating` | `boolean` | | `sample_params.scheduler` | `string` |
| `sample_params.sample_method` | `string` |
| `sample_params.sample_steps` | `integer` |
| `sample_params.eta` | `number \| null` |
| `sample_params.shifted_timestep` | `integer` |
| `sample_params.flow_shift` | `number \| null` |
| `sample_params.guidance.txt_cfg` | `number` |
| `sample_params.guidance.img_cfg` | `number \| null` |
| `sample_params.guidance.distilled_guidance` | `number` |
| `sample_params.guidance.slg.layers` | `array<integer>` |
| `sample_params.guidance.slg.layer_start` | `number` |
| `sample_params.guidance.slg.layer_end` | `number` |
| `sample_params.guidance.slg.scale` | `number` |
| `vae_tiling_params` | `object` |
| `vae_tiling_params.enabled` | `boolean` |
| `vae_tiling_params.tile_size_x` | `integer` |
| `vae_tiling_params.tile_size_y` | `integer` |
| `vae_tiling_params.target_overlap` | `number` |
| `vae_tiling_params.rel_size_x` | `number` |
| `vae_tiling_params.rel_size_y` | `number` |
| `cache_mode` | `string` |
| `cache_option` | `string` |
| `scm_mask` | `string` |
| `scm_policy_dynamic` | `boolean` |
| `output_format` | `string` |
| `output_compression` | `integer` |
`img_gen`-specific default fields:
| Field | Type |
| --- | --- |
| `batch_count` | `integer` |
| `auto_resize_ref_image` | `boolean` |
| `increase_ref_index` | `boolean` |
| `control_strength` | `number` |
| `hires` | `object` |
| `hires.enabled` | `boolean` |
| `hires.upscaler` | `string` |
| `hires.scale` | `number` |
| `hires.target_width` | `integer` |
| `hires.target_height` | `integer` |
| `hires.steps` | `integer` |
| `hires.denoising_strength` | `number` |
| `hires.upscale_tile_size` | `integer` |
`vid_gen`-specific default fields:
| Field | Type |
| --- | --- |
| `video_frames` | `integer` |
| `fps` | `integer` |
| `moe_boundary` | `number` |
| `vace_strength` | `number` |
| `high_noise_sample_params` | `object` |
| `high_noise_sample_params.scheduler` | `string` |
| `high_noise_sample_params.sample_method` | `string` |
| `high_noise_sample_params.sample_steps` | `integer` |
| `high_noise_sample_params.eta` | `number \| null` |
| `high_noise_sample_params.shifted_timestep` | `integer` |
| `high_noise_sample_params.flow_shift` | `number \| null` |
| `high_noise_sample_params.guidance.txt_cfg` | `number` |
| `high_noise_sample_params.guidance.img_cfg` | `number \| null` |
| `high_noise_sample_params.guidance.distilled_guidance` | `number` |
| `high_noise_sample_params.guidance.slg.layers` | `array<integer>` |
| `high_noise_sample_params.guidance.slg.layer_start` | `number` |
| `high_noise_sample_params.guidance.slg.layer_end` | `number` |
| `high_noise_sample_params.guidance.slg.scale` | `number` |
Fields returned in `features_by_mode.img_gen`:
- `init_image`
- `mask_image`
- `control_image`
- `ref_images`
- `lora`
- `vae_tiling`
- `hires`
- `cache`
- `cancel_queued`
- `cancel_generating`
Fields returned in `features_by_mode.vid_gen`:
- `init_image`
- `end_image`
- `control_frames`
- `high_noise_sample_params`
- `lora`
- `vae_tiling`
- `cache`
- `cancel_queued`
- `cancel_generating`
#### `POST /sdcpp/v1/img_gen` #### `POST /sdcpp/v1/img_gen`
@ -521,9 +631,7 @@ Typical status codes:
- `409 Conflict` - `409 Conflict`
- `410 Gone` - `410 Gone`
### Canonical Request Schema ### Request Body
The `sdcpp API` request body is the canonical native schema.
Example: Example:
@ -569,6 +677,16 @@ Example:
}, },
"lora": [], "lora": [],
"hires": {
"enabled": false,
"upscaler": "Latent",
"scale": 2.0,
"target_width": 0,
"target_height": 0,
"steps": 0,
"denoising_strength": 0.7,
"upscale_tile_size": 128
},
"vae_tiling_params": { "vae_tiling_params": {
"enabled": false, "enabled": false,
@ -612,7 +730,7 @@ Channel expectations:
If omitted or null: If omitted or null:
- single-image fields map to an empty `sd_image_t` - single-image fields map to an empty `sd_image_t`
- array fields map to `nullptr + count = 0` - array fields map to an empty C-style array, represented as `pointer = nullptr` and `count = 0`
### Field Mapping Summary ### Field Mapping Summary
@ -673,12 +791,23 @@ Other native fields:
| Field | Type | | Field | Type |
| --- | --- | | --- | --- |
| `hires` | `object` |
| `hires.enabled` | `boolean` |
| `hires.upscaler` | `string` |
| `hires.scale` | `number` |
| `hires.target_width` | `integer` |
| `hires.target_height` | `integer` |
| `hires.steps` | `integer` |
| `hires.denoising_strength` | `number` |
| `hires.upscale_tile_size` | `integer` |
| `vae_tiling_params` | `object` | | `vae_tiling_params` | `object` |
| `cache_mode` | `string` | | `cache_mode` | `string` |
| `cache_option` | `string` | | `cache_option` | `string` |
| `scm_mask` | `string` | | `scm_mask` | `string` |
| `scm_policy_dynamic` | `boolean` | | `scm_policy_dynamic` | `boolean` |
For `hires.upscaler`, use `Lanczos`, `Nearest`, `Latent`, `Latent (nearest)`, `Latent (nearest-exact)`, `Latent (antialiased)`, `Latent (bicubic)`, `Latent (bicubic antialiased)`, or an `upscalers[].name` value from `GET /sdcpp/v1/capabilities`. Model-backed upscalers are resolved as `--hires-upscalers-dir / (name + ext)` and must live directly in that directory.
HTTP-only output fields: HTTP-only output fields:
| Field | Type | | Field | Type |
@ -686,11 +815,11 @@ HTTP-only output fields:
| `output_format` | `string` | | `output_format` | `string` |
| `output_compression` | `integer` | | `output_compression` | `integer` |
### Optional Field Semantics ### Optional Field Handling
Clients should preserve unset semantics for optional sampling fields. Optional sampling fields may be omitted.
If a user has not explicitly provided one of these fields, the client should omit it instead of injecting a guessed fallback: When omitted, backend defaults apply to these fields:
- `sample_params.scheduler` - `sample_params.scheduler`
- `sample_params.sample_method` - `sample_params.sample_method`
@ -766,29 +895,394 @@ Example cancelled job:
} }
``` ```
### Validation and Retention ### Submission Errors
Recommended behavior: `POST /sdcpp/v1/img_gen` may return:
- malformed JSON returns `400` - `202 Accepted` when the job is created
- invalid image payloads return `400` - `400 Bad Request` for an empty body, unsupported model mode, invalid JSON, or invalid generation parameters
- invalid parameter structure returns `400` - `429 Too Many Requests` when the job queue is full
- queue full returns `429` or `503` - `500 Internal Server Error` for unexpected server exceptions during submission
- accepted runtime failures transition the job to `failed`
- unsupported in-progress cancellation may return `409`
Recommended retention controls: ### `vid_gen`
- pending job limit The following section documents the native async contract for video generation.
- completed job TTL
- failed job TTL
### Future `vid_gen` #### `POST /sdcpp/v1/vid_gen`
Future `vid_gen` should reuse the same async job model: Submits an async video generation job.
- `POST /sdcpp/v1/vid_gen` Successful submission returns `202 Accepted`.
- `GET /sdcpp/v1/jobs/{id}`
- `POST /sdcpp/v1/jobs/{id}/cancel`
Its request body should mirror `sd_vid_gen_params_t` in the same way that `img_gen` mirrors `sd_img_gen_params_t`. Example response:
```json
{
"id": "job_01HTXYZVID",
"kind": "vid_gen",
"status": "queued",
"created": 1775401200,
"poll_url": "/sdcpp/v1/jobs/job_01HTXYZVID"
}
```
Response fields:
| Field | Type |
| --- | --- |
| `id` | `string` |
| `kind` | `string` |
| `status` | `string` |
| `created` | `integer` |
| `poll_url` | `string` |
### Request Body
Compared with `img_gen`, the `vid_gen` request body:
- `vid_gen` is a single video sequence job, so `batch_count` is not part of the request schema
- `ref_images`, `mask_image`, `control_image`, `control_strength`, and `embed_image_metadata` are not part of the request schema
- `vid_gen` adds `end_image`, `control_frames`, `high_noise_sample_params`, `video_frames`, `fps`, `moe_boundary`, and `vace_strength`
Example:
```json
{
"prompt": "a cat walking through a rainy alley",
"negative_prompt": "",
"clip_skip": -1,
"width": 832,
"height": 480,
"strength": 0.75,
"seed": -1,
"video_frames": 33,
"fps": 16,
"moe_boundary": 0.875,
"vace_strength": 1.0,
"init_image": null,
"end_image": null,
"control_frames": [],
"sample_params": {
"scheduler": "discrete",
"sample_method": "euler",
"sample_steps": 28,
"eta": 1.0,
"shifted_timestep": 0,
"custom_sigmas": [],
"flow_shift": 0.0,
"guidance": {
"txt_cfg": 7.0,
"img_cfg": 7.0,
"distilled_guidance": 3.5,
"slg": {
"layers": [7, 8, 9],
"layer_start": 0.01,
"layer_end": 0.2,
"scale": 0.0
}
}
},
"high_noise_sample_params": {
"scheduler": "discrete",
"sample_method": "euler",
"sample_steps": -1,
"eta": 1.0,
"shifted_timestep": 0,
"flow_shift": 0.0,
"guidance": {
"txt_cfg": 7.0,
"img_cfg": 7.0,
"distilled_guidance": 3.5,
"slg": {
"layers": [7, 8, 9],
"layer_start": 0.01,
"layer_end": 0.2,
"scale": 0.0
}
}
},
"lora": [],
"vae_tiling_params": {
"enabled": false,
"tile_size_x": 0,
"tile_size_y": 0,
"target_overlap": 0.5,
"rel_size_x": 0.0,
"rel_size_y": 0.0
},
"cache_mode": "disabled",
"cache_option": "",
"scm_mask": "",
"scm_policy_dynamic": true,
"output_format": "webm",
"output_compression": 100
}
```
### LoRA Rules
- The server only accepts explicit LoRA entries from the `lora` field.
- Prompt-embedded `<lora:...>` tags are intentionally unsupported.
- `lora[].is_high_noise` controls whether a LoRA applies only to the high-noise stage.
### Image and Frame Encoding Rules
Any image field accepts:
- a raw base64 string, or
- a data URL such as `data:image/png;base64,...`
Channel expectations:
- `init_image`: 3 channels
- `end_image`: 3 channels
- `control_frames[]`: 3 channels
Frame ordering rules:
- `control_frames[]` order is the conditioning frame order
- `control_frames[]` is preserved in request order
If omitted or null:
- single-image fields map to an empty `sd_image_t`
- array fields map to an empty C-style array, represented as `pointer = nullptr` and `count = 0`
### Field Mapping Summary
Top-level scalar fields:
| Field | Type |
| --- | --- |
| `prompt` | `string` |
| `negative_prompt` | `string` |
| `clip_skip` | `integer` |
| `width` | `integer` |
| `height` | `integer` |
| `strength` | `number` |
| `seed` | `integer` |
| `video_frames` | `integer` |
| `fps` | `integer` |
| `moe_boundary` | `number` |
| `vace_strength` | `number` |
Image and frame fields:
| Field | Type |
| --- | --- |
| `init_image` | `string \| null` |
| `end_image` | `string \| null` |
| `control_frames` | `array<string>` |
LoRA fields:
| Field | Type |
| --- | --- |
| `lora[].path` | `string` |
| `lora[].multiplier` | `number` |
| `lora[].is_high_noise` | `boolean` |
Sampling fields:
| Field | Type |
| --- | --- |
| `sample_params.scheduler` | `string` |
| `sample_params.sample_method` | `string` |
| `sample_params.sample_steps` | `integer` |
| `sample_params.eta` | `number` |
| `sample_params.shifted_timestep` | `integer` |
| `sample_params.custom_sigmas` | `array<number>` |
| `sample_params.flow_shift` | `number` |
| `sample_params.guidance.txt_cfg` | `number` |
| `sample_params.guidance.img_cfg` | `number` |
| `sample_params.guidance.distilled_guidance` | `number` |
| `sample_params.guidance.slg.layers` | `array<integer>` |
| `sample_params.guidance.slg.layer_start` | `number` |
| `sample_params.guidance.slg.layer_end` | `number` |
| `sample_params.guidance.slg.scale` | `number` |
High-noise sampling fields:
| Field | Type |
| --- | --- |
| `high_noise_sample_params.scheduler` | `string` |
| `high_noise_sample_params.sample_method` | `string` |
| `high_noise_sample_params.sample_steps` | `integer` |
| `high_noise_sample_params.eta` | `number` |
| `high_noise_sample_params.shifted_timestep` | `integer` |
| `high_noise_sample_params.flow_shift` | `number` |
| `high_noise_sample_params.guidance.txt_cfg` | `number` |
| `high_noise_sample_params.guidance.img_cfg` | `number` |
| `high_noise_sample_params.guidance.distilled_guidance` | `number` |
| `high_noise_sample_params.guidance.slg.layers` | `array<integer>` |
| `high_noise_sample_params.guidance.slg.layer_start` | `number` |
| `high_noise_sample_params.guidance.slg.layer_end` | `number` |
| `high_noise_sample_params.guidance.slg.scale` | `number` |
Other native fields:
| Field | Type |
| --- | --- |
| `vae_tiling_params` | `object` |
| `cache_mode` | `string` |
| `cache_option` | `string` |
| `scm_mask` | `string` |
| `scm_policy_dynamic` | `boolean` |
HTTP-only output fields:
| Field | Type |
| --- | --- |
| `output_format` | `string` |
| `output_compression` | `integer` |
For `vid_gen`, `output_format` and `output_compression` control container encoding.
`fps` is request metadata for the generated sequence and is echoed in the completed job result.
Allowed `output_format` values:
- `webm`
- `webp`
- `avi`
Output format behavior:
- `output_format` defaults to `webm`
- `webp` means animated WebP
- `avi` means MJPG AVI
- `webm` requires the server to be built with WebM support; otherwise the request returns `400`
### Result Payload
Completed jobs return one encoded container payload, not a list of per-frame images.
Result fields:
- `result.b64_json` contains the whole encoded container file as base64
- `result.mime_type` identifies the media type
- `result.output_format` echoes the selected container format
- `result.fps` echoes the effective playback FPS
- `result.frame_count` reports the actual decoded frame count used to build the container
Expected MIME types:
| `output_format` | `mime_type` |
| --- | --- |
| `webm` | `video/webm` |
| `webp` | `image/webp` |
| `avi` | `video/x-msvideo` |
### Optional Field Handling
Optional sampling fields may be omitted.
When omitted, backend defaults apply to these fields:
- `sample_params.scheduler`
- `sample_params.sample_method`
- `sample_params.eta`
- `sample_params.flow_shift`
- `sample_params.guidance.img_cfg`
- `high_noise_sample_params.scheduler`
- `high_noise_sample_params.sample_method`
- `high_noise_sample_params.eta`
- `high_noise_sample_params.flow_shift`
- `high_noise_sample_params.guidance.img_cfg`
`high_noise_sample_params` may also be omitted entirely.
### Frame Count Semantics
`video_frames` is the requested target length, but the current core video path internally normalizes the effective frame count to the largest `4n + 1` value that does not exceed the requested count.
Examples:
- `video_frames = 33` stays `33`
- `video_frames = 34` becomes `33`
- `video_frames = 32` becomes `29`
The completed job payload includes the actual decoded `frame_count`.
### Completion Result
Example completed job:
```json
{
"id": "job_01HTXYZVID",
"kind": "vid_gen",
"status": "completed",
"created": 1775401200,
"started": 1775401203,
"completed": 1775401215,
"queue_position": 0,
"result": {
"output_format": "webm",
"mime_type": "video/webm",
"fps": 16,
"frame_count": 33,
"b64_json": "GkXfo59ChoEBQveBAULygQRC84EIQo..."
},
"error": null
}
```
The response returns the encoded `.webm`, animated `.webp`, or `.avi` container payload directly.
### Failure Result
Example failed job:
```json
{
"id": "job_01HTXYZVID",
"kind": "vid_gen",
"status": "failed",
"created": 1775401200,
"started": 1775401203,
"completed": 1775401204,
"queue_position": 0,
"result": null,
"error": {
"code": "generation_failed",
"message": "generate_video returned no results"
}
}
```
### Cancelled Result
Example cancelled job:
```json
{
"id": "job_01HTXYZVID",
"kind": "vid_gen",
"status": "cancelled",
"created": 1775401200,
"started": null,
"completed": 1775401202,
"queue_position": 0,
"result": null,
"error": {
"code": "cancelled",
"message": "job cancelled by client"
}
}
```
### Submission Errors
`POST /sdcpp/v1/vid_gen` may return:
- `202 Accepted` when the job is created
- `400 Bad Request` for an empty body, unsupported model mode, invalid JSON, invalid generation parameters, or an unsupported output format
- `429 Too Many Requests` when the job queue is full
- `500 Internal Server Error` for unexpected server exceptions during submission

View File

@ -95,6 +95,10 @@ bool cancel_queued_job(AsyncJobManager& manager, AsyncGenerationJob& job) {
job.status = AsyncJobStatus::Cancelled; job.status = AsyncJobStatus::Cancelled;
job.completed_at = unix_timestamp_now(); job.completed_at = unix_timestamp_now();
job.result_images_b64.clear(); job.result_images_b64.clear();
job.result_media_b64.clear();
job.result_media_mime_type.clear();
job.result_frame_count = 0;
job.result_fps = 0;
job.error_code = "cancelled"; job.error_code = "cancelled";
job.error_message = "job cancelled by client"; job.error_message = "job cancelled by client";
return true; return true;
@ -122,6 +126,15 @@ json make_async_job_json(const AsyncJobManager& manager, const AsyncGenerationJo
} }
if (job.status == AsyncJobStatus::Completed) { if (job.status == AsyncJobStatus::Completed) {
if (job.kind == AsyncJobKind::VidGen) {
result["result"] = {
{"output_format", job.vid_gen.output_format},
{"mime_type", job.result_media_mime_type},
{"fps", job.result_fps},
{"frame_count", job.result_frame_count},
{"b64_json", job.result_media_b64},
};
} else {
json images = json::array(); json images = json::array();
for (size_t i = 0; i < job.result_images_b64.size(); ++i) { for (size_t i = 0; i < job.result_images_b64.size(); ++i) {
images.push_back({{"index", i}, {"b64_json", job.result_images_b64[i]}}); images.push_back({{"index", i}, {"b64_json", job.result_images_b64[i]}});
@ -130,6 +143,7 @@ json make_async_job_json(const AsyncJobManager& manager, const AsyncGenerationJo
{"output_format", job.img_gen.output_format}, {"output_format", job.img_gen.output_format},
{"images", images}, {"images", images},
}; };
}
result["error"] = nullptr; result["error"] = nullptr;
} else if (job.status == AsyncJobStatus::Failed || } else if (job.status == AsyncJobStatus::Failed ||
job.status == AsyncJobStatus::Cancelled) { job.status == AsyncJobStatus::Cancelled) {
@ -156,16 +170,15 @@ bool execute_img_gen_job(ServerRuntime& runtime,
sd_img_gen_params_t params = job.img_gen.to_sd_img_gen_params_t(); sd_img_gen_params_t params = job.img_gen.to_sd_img_gen_params_t();
SDImageVec results; SDImageVec results;
int num_results = 0;
{ {
std::lock_guard<std::mutex> lock(*runtime.sd_ctx_mutex); std::lock_guard<std::mutex> lock(*runtime.sd_ctx_mutex);
sd_image_t* raw_results = generate_image(runtime.sd_ctx, &params); sd_image_t* raw_results = generate_image(runtime.sd_ctx, &params);
num_results = params.batch_count; results.adopt(raw_results, params.batch_count);
results.adopt(raw_results, num_results);
} }
if (results.empty() || num_results <= 0) { const int num_results = results.count();
if (num_results <= 0) {
error_message = "generate_image returned no results"; error_message = "generate_image returned no results";
return false; return false;
} }
@ -208,6 +221,47 @@ bool execute_img_gen_job(ServerRuntime& runtime,
return true; return true;
} }
bool execute_vid_gen_job(ServerRuntime& runtime,
AsyncGenerationJob& job,
std::string& output_media_b64,
std::string& output_media_mime_type,
int& output_frame_count,
int& output_fps,
std::string& error_message) {
sd_vid_gen_params_t params = job.vid_gen.to_sd_vid_gen_params_t();
SDImageVec results;
int num_results = 0;
{
std::lock_guard<std::mutex> lock(*runtime.sd_ctx_mutex);
sd_image_t* raw_results = generate_video(runtime.sd_ctx, &params, &num_results);
results.adopt(raw_results, num_results);
}
num_results = results.count();
if (num_results <= 0) {
error_message = "generate_video returned no results";
return false;
}
std::vector<uint8_t> video_bytes = create_video_from_sd_images_to_vector(job.vid_gen.output_format,
results.data(),
num_results,
job.vid_gen.gen_params.fps,
job.vid_gen.output_compression);
if (video_bytes.empty()) {
error_message = "failed to encode generated video container";
return false;
}
output_media_b64 = base64_encode(video_bytes);
output_media_mime_type = video_mime_type(job.vid_gen.output_format);
output_frame_count = num_results;
output_fps = job.vid_gen.gen_params.fps;
return true;
}
void async_job_worker(ServerRuntime& runtime) { void async_job_worker(ServerRuntime& runtime) {
AsyncJobManager& manager = *runtime.async_job_manager; AsyncJobManager& manager = *runtime.async_job_manager;
@ -240,11 +294,23 @@ void async_job_worker(ServerRuntime& runtime) {
} }
std::vector<std::string> output_images; std::vector<std::string> output_images;
std::string output_media_b64;
std::string output_media_mime_type;
int output_frame_count = 0;
int output_fps = 0;
std::string error_message; std::string error_message;
bool ok = false; bool ok = false;
if (job->kind == AsyncJobKind::ImgGen) { if (job->kind == AsyncJobKind::ImgGen) {
ok = execute_img_gen_job(runtime, *job, output_images, error_message); ok = execute_img_gen_job(runtime, *job, output_images, error_message);
} else if (job->kind == AsyncJobKind::VidGen) {
ok = execute_vid_gen_job(runtime,
*job,
output_media_b64,
output_media_mime_type,
output_frame_count,
output_fps,
error_message);
} else { } else {
error_message = "unsupported job kind"; error_message = "unsupported job kind";
} }
@ -260,6 +326,10 @@ void async_job_worker(ServerRuntime& runtime) {
if (ok) { if (ok) {
job->status = AsyncJobStatus::Completed; job->status = AsyncJobStatus::Completed;
job->result_images_b64 = std::move(output_images); job->result_images_b64 = std::move(output_images);
job->result_media_b64 = std::move(output_media_b64);
job->result_media_mime_type = std::move(output_media_mime_type);
job->result_frame_count = output_frame_count;
job->result_fps = output_fps;
job->error_code.clear(); job->error_code.clear();
job->error_message.clear(); job->error_message.clear();
} else { } else {
@ -267,6 +337,10 @@ void async_job_worker(ServerRuntime& runtime) {
job->error_code = "generation_failed"; job->error_code = "generation_failed";
job->error_message = error_message.empty() ? "unknown generation error" : error_message; job->error_message = error_message.empty() ? "unknown generation error" : error_message;
job->result_images_b64.clear(); job->result_images_b64.clear();
job->result_media_b64.clear();
job->result_media_mime_type.clear();
job->result_frame_count = 0;
job->result_fps = 0;
} }
purge_expired_jobs(manager); purge_expired_jobs(manager);

View File

@ -36,7 +36,12 @@ struct AsyncGenerationJob {
int64_t started_at = 0; int64_t started_at = 0;
int64_t completed_at = 0; int64_t completed_at = 0;
ImgGenJobRequest img_gen; ImgGenJobRequest img_gen;
VidGenJobRequest vid_gen;
std::vector<std::string> result_images_b64; std::vector<std::string> result_images_b64;
std::string result_media_b64;
std::string result_media_mime_type;
int result_frame_count = 0;
int result_fps = 0;
std::string error_code; std::string error_code;
std::string error_message; std::string error_message;
}; };
@ -63,4 +68,11 @@ bool execute_img_gen_job(ServerRuntime& runtime,
AsyncGenerationJob& job, AsyncGenerationJob& job,
std::vector<std::string>& output_images, std::vector<std::string>& output_images,
std::string& error_message); std::string& error_message);
bool execute_vid_gen_job(ServerRuntime& runtime,
AsyncGenerationJob& job,
std::string& output_media_b64,
std::string& output_media_mime_type,
int& output_frame_count,
int& output_fps,
std::string& error_message);
void async_job_worker(ServerRuntime& runtime); void async_job_worker(ServerRuntime& runtime);

@ -1 +1 @@
Subproject commit 740475a7a6794dc07fb23e8ec5dc56e7e80aa8c1 Subproject commit 797ccf80825cc035508ba9b599b2a21953e7f835

View File

@ -48,7 +48,9 @@ static void parse_args(int argc,
if (!svr_params.resolve_and_validate() || if (!svr_params.resolve_and_validate() ||
!ctx_params.resolve_and_validate(IMG_GEN) || !ctx_params.resolve_and_validate(IMG_GEN) ||
!default_gen_params.resolve_and_validate(IMG_GEN, ctx_params.lora_model_dir)) { !default_gen_params.resolve_and_validate(IMG_GEN,
ctx_params.lora_model_dir,
ctx_params.hires_upscalers_dir)) {
print_usage(argv[0], options_vec); print_usage(argv[0], options_vec);
exit(1); exit(1);
} }
@ -95,6 +97,8 @@ int main(int argc, const char** argv) {
std::vector<LoraEntry> lora_cache; std::vector<LoraEntry> lora_cache;
std::mutex lora_mutex; std::mutex lora_mutex;
std::vector<UpscalerEntry> upscaler_cache;
std::mutex upscaler_mutex;
AsyncJobManager async_job_manager; AsyncJobManager async_job_manager;
ServerRuntime runtime = { ServerRuntime runtime = {
sd_ctx.get(), sd_ctx.get(),
@ -104,6 +108,8 @@ int main(int argc, const char** argv) {
&default_gen_params, &default_gen_params,
&lora_cache, &lora_cache,
&lora_mutex, &lora_mutex,
&upscaler_cache,
&upscaler_mutex,
&async_job_manager, &async_job_manager,
}; };

View File

@ -70,7 +70,7 @@ static bool build_openai_generation_request(const httplib::Request& req,
} }
// Intentionally disable prompt-embedded LoRA tag parsing for server APIs. // Intentionally disable prompt-embedded LoRA tag parsing for server APIs.
if (!request.gen_params.resolve_and_validate(IMG_GEN, "", true)) { if (!request.gen_params.resolve_and_validate(IMG_GEN, "", runtime.ctx_params->hires_upscalers_dir, true)) {
error_message = "invalid params"; error_message = "invalid params";
return false; return false;
} }
@ -212,7 +212,7 @@ static bool build_openai_edit_request(const httplib::Request& req,
} }
// Intentionally disable prompt-embedded LoRA tag parsing for server APIs. // Intentionally disable prompt-embedded LoRA tag parsing for server APIs.
if (!request.gen_params.resolve_and_validate(IMG_GEN, "", true)) { if (!request.gen_params.resolve_and_validate(IMG_GEN, "", runtime.ctx_params->hires_upscalers_dir, true)) {
error_message = "invalid params"; error_message = "invalid params";
return false; return false;
} }
@ -253,6 +253,12 @@ void register_openai_api_endpoints(httplib::Server& svr, ServerRuntime& rt) {
svr.Post("/v1/images/generations", [runtime](const httplib::Request& req, httplib::Response& res) { svr.Post("/v1/images/generations", [runtime](const httplib::Request& req, httplib::Response& res) {
try { try {
if (!runtime_supports_generation_mode(*runtime, IMG_GEN)) {
res.status = 400;
res.set_content(json({{"error", unsupported_generation_mode_error(IMG_GEN)}}).dump(), "application/json");
return;
}
ImgGenJobRequest request; ImgGenJobRequest request;
std::string error_message; std::string error_message;
if (!build_openai_generation_request(req, *runtime, request, error_message)) { if (!build_openai_generation_request(req, *runtime, request, error_message)) {
@ -319,6 +325,12 @@ void register_openai_api_endpoints(httplib::Server& svr, ServerRuntime& rt) {
svr.Post("/v1/images/edits", [runtime](const httplib::Request& req, httplib::Response& res) { svr.Post("/v1/images/edits", [runtime](const httplib::Request& req, httplib::Response& res) {
try { try {
if (!runtime_supports_generation_mode(*runtime, IMG_GEN)) {
res.status = 400;
res.set_content(json({{"error", unsupported_generation_mode_error(IMG_GEN)}}).dump(), "application/json");
return;
}
ImgGenJobRequest request; ImgGenJobRequest request;
std::string error_message; std::string error_message;
if (!build_openai_edit_request(req, *runtime, request, error_message)) { if (!build_openai_edit_request(req, *runtime, request, error_message)) {

View File

@ -1,6 +1,7 @@
#include "routes.h" #include "routes.h"
#include <algorithm> #include <algorithm>
#include <cctype>
#include <cstring> #include <cstring>
#include <regex> #include <regex>
#include <string_view> #include <string_view>
@ -35,14 +36,20 @@ static fs::path resolve_display_model_path(const ServerRuntime& runtime) {
return {}; return {};
} }
static std::string lower_ascii(std::string value) {
std::transform(value.begin(), value.end(), value.begin(), [](unsigned char c) {
return static_cast<char>(std::tolower(c));
});
return value;
}
static enum sample_method_t get_sdapi_sample_method(std::string name) { static enum sample_method_t get_sdapi_sample_method(std::string name) {
enum sample_method_t result = str_to_sample_method(name.c_str()); enum sample_method_t result = str_to_sample_method(name.c_str());
if (result != SAMPLE_METHOD_COUNT) { if (result != SAMPLE_METHOD_COUNT) {
return result; return result;
} }
std::transform(name.begin(), name.end(), name.begin(), name = lower_ascii(name);
[](unsigned char c) { return static_cast<char>(std::tolower(c)); });
static const std::unordered_map<std::string_view, sample_method_t> hardcoded{ static const std::unordered_map<std::string_view, sample_method_t> hardcoded{
{"euler a", EULER_A_SAMPLE_METHOD}, {"euler a", EULER_A_SAMPLE_METHOD},
{"k_euler_a", EULER_A_SAMPLE_METHOD}, {"k_euler_a", EULER_A_SAMPLE_METHOD},
@ -114,6 +121,18 @@ static bool build_sdapi_img_gen_request(const json& j,
request.gen_params.width = j.value("width", -1); request.gen_params.width = j.value("width", -1);
request.gen_params.height = j.value("height", -1); request.gen_params.height = j.value("height", -1);
if (!img2img && j.value("enable_hr", false)) {
request.gen_params.hires_enabled = true;
request.gen_params.hires_scale = j.value("hr_scale", request.gen_params.hires_scale);
request.gen_params.hires_width = j.value("hr_resize_x", request.gen_params.hires_width);
request.gen_params.hires_height = j.value("hr_resize_y", request.gen_params.hires_height);
request.gen_params.hires_steps = j.value("hr_steps", request.gen_params.hires_steps);
request.gen_params.hires_denoising_strength =
j.value("denoising_strength", request.gen_params.hires_denoising_strength);
request.gen_params.hires_upscaler = j.value("hr_upscaler", request.gen_params.hires_upscaler);
}
std::string sd_cpp_extra_args_str = extract_and_remove_sd_cpp_extra_args(request.gen_params.prompt); std::string sd_cpp_extra_args_str = extract_and_remove_sd_cpp_extra_args(request.gen_params.prompt);
if (!sd_cpp_extra_args_str.empty() && !request.gen_params.from_json_str(sd_cpp_extra_args_str)) { if (!sd_cpp_extra_args_str.empty() && !request.gen_params.from_json_str(sd_cpp_extra_args_str)) {
error_message = "invalid sd_cpp_extra_args"; error_message = "invalid sd_cpp_extra_args";
@ -228,7 +247,7 @@ static bool build_sdapi_img_gen_request(const json& j,
} }
// Intentionally disable prompt-embedded LoRA tag parsing for server APIs. // Intentionally disable prompt-embedded LoRA tag parsing for server APIs.
if (!request.gen_params.resolve_and_validate(IMG_GEN, "", true)) { if (!request.gen_params.resolve_and_validate(IMG_GEN, "", runtime.ctx_params->hires_upscalers_dir, true)) {
error_message = "invalid params"; error_message = "invalid params";
return false; return false;
} }
@ -246,6 +265,11 @@ void register_sdapi_endpoints(httplib::Server& svr, ServerRuntime& rt) {
res.set_content(R"({"error":"empty body"})", "application/json"); res.set_content(R"({"error":"empty body"})", "application/json");
return; return;
} }
if (!runtime_supports_generation_mode(*runtime, IMG_GEN)) {
res.status = 400;
res.set_content(json({{"error", unsupported_generation_mode_error(IMG_GEN)}}).dump(), "application/json");
return;
}
json j = json::parse(req.body); json j = json::parse(req.body);
ImgGenJobRequest request; ImgGenJobRequest request;
@ -342,6 +366,52 @@ void register_sdapi_endpoints(httplib::Server& svr, ServerRuntime& rt) {
res.set_content(result.dump(), "application/json"); res.set_content(result.dump(), "application/json");
}); });
svr.Get("/sdapi/v1/upscalers", [runtime](const httplib::Request&, httplib::Response& res) {
refresh_upscaler_cache(*runtime);
auto make_builtin = [](const char* name) {
json item;
item["name"] = name;
item["model_name"] = nullptr;
item["model_path"] = nullptr;
item["model_url"] = nullptr;
item["scale"] = 4;
return item;
};
json result = json::array();
result.push_back(make_builtin("None"));
result.push_back(make_builtin("Lanczos"));
result.push_back(make_builtin("Nearest"));
{
std::lock_guard<std::mutex> lock(*runtime->upscaler_mutex);
for (const auto& e : *runtime->upscaler_cache) {
json item;
item["name"] = e.name;
item["model_name"] = e.model_name;
item["model_path"] = e.fullpath;
item["model_url"] = nullptr;
item["scale"] = e.scale;
result.push_back(item);
}
}
res.set_content(result.dump(), "application/json");
});
svr.Get("/sdapi/v1/latent-upscale-modes", [](const httplib::Request&, httplib::Response& res) {
json result = json::array({
{{"name", "Latent"}},
{{"name", "Latent (nearest)"}},
{{"name", "Latent (nearest-exact)"}},
{{"name", "Latent (antialiased)"}},
{{"name", "Latent (bicubic)"}},
{{"name", "Latent (bicubic antialiased)"}},
});
res.set_content(result.dump(), "application/json");
});
svr.Get("/sdapi/v1/samplers", [runtime](const httplib::Request&, httplib::Response& res) { svr.Get("/sdapi/v1/samplers", [runtime](const httplib::Request&, httplib::Response& res) {
std::vector<std::string> sampler_names; std::vector<std::string> sampler_names;
sampler_names.push_back("default"); sampler_names.push_back("default");

View File

@ -75,61 +75,9 @@ static fs::path resolve_display_model_path(const ServerRuntime& runtime) {
return {}; return {};
} }
static json make_capabilities_json(ServerRuntime& runtime) { static json make_sample_params_json(const sd_sample_params_t& sample_params, const std::vector<int>& skip_layers) {
refresh_lora_cache(runtime);
AsyncJobManager& manager = *runtime.async_job_manager;
const auto& defaults = *runtime.default_gen_params;
const auto& sample_params = defaults.sample_params;
const auto& guidance = sample_params.guidance; const auto& guidance = sample_params.guidance;
const fs::path model_path = resolve_display_model_path(runtime); return {
json samplers = json::array();
json schedulers = json::array();
json output_formats = json::array({"png", "jpeg"});
json available_loras = json::array();
for (int i = 0; i < SAMPLE_METHOD_COUNT; ++i) {
samplers.push_back(sd_sample_method_name((sample_method_t)i));
}
for (int i = 0; i < SCHEDULER_COUNT; ++i) {
schedulers.push_back(sd_scheduler_name((scheduler_t)i));
}
#ifdef SD_USE_WEBP
output_formats.push_back("webp");
#endif
{
std::lock_guard<std::mutex> lock(*runtime.lora_mutex);
for (const auto& entry : *runtime.lora_cache) {
available_loras.push_back({
{"name", entry.name},
{"path", entry.path},
});
}
}
json result;
result["model"] = {
{"name", model_path.filename().u8string()},
{"stem", model_path.stem().u8string()},
{"path", model_path.u8string()},
};
result["defaults"] = {
{"prompt", defaults.prompt},
{"negative_prompt", defaults.negative_prompt},
{"clip_skip", defaults.clip_skip},
{"width", defaults.width > 0 ? defaults.width : 512},
{"height", defaults.height > 0 ? defaults.height : 512},
{"strength", defaults.strength},
{"seed", defaults.seed},
{"batch_count", defaults.batch_count},
{"auto_resize_ref_image", defaults.auto_resize_ref_image},
{"increase_ref_index", defaults.increase_ref_index},
{"control_strength", defaults.control_strength},
{"sample_params",
{
{"scheduler", capability_scheduler_name(sample_params.scheduler)}, {"scheduler", capability_scheduler_name(sample_params.scheduler)},
{"sample_method", capability_sample_method_name(sample_params.sample_method)}, {"sample_method", capability_sample_method_name(sample_params.sample_method)},
{"sample_steps", sample_params.sample_steps}, {"sample_steps", sample_params.sample_steps},
@ -143,21 +91,234 @@ static json make_capabilities_json(ServerRuntime& runtime) {
{"distilled_guidance", guidance.distilled_guidance}, {"distilled_guidance", guidance.distilled_guidance},
{"slg", {"slg",
{ {
{"layers", defaults.skip_layers}, {"layers", skip_layers},
{"layer_start", guidance.slg.layer_start}, {"layer_start", guidance.slg.layer_start},
{"layer_end", guidance.slg.layer_end}, {"layer_end", guidance.slg.layer_end},
{"scale", guidance.slg.scale}, {"scale", guidance.slg.scale},
}}, }},
}}, }},
};
}
static json make_img_gen_defaults_json(const SDGenerationParams& defaults, const std::string& output_format) {
return {
{"prompt", defaults.prompt},
{"negative_prompt", defaults.negative_prompt},
{"clip_skip", defaults.clip_skip},
{"width", defaults.width > 0 ? defaults.width : 512},
{"height", defaults.height > 0 ? defaults.height : 512},
{"strength", defaults.strength},
{"seed", defaults.seed},
{"batch_count", defaults.batch_count},
{"auto_resize_ref_image", defaults.auto_resize_ref_image},
{"increase_ref_index", defaults.increase_ref_index},
{"control_strength", defaults.control_strength},
{"sample_params", make_sample_params_json(defaults.sample_params, defaults.skip_layers)},
{"hires",
{
{"enabled", defaults.hires_enabled},
{"upscaler", defaults.hires_upscaler},
{"scale", defaults.hires_scale},
{"target_width", defaults.hires_width},
{"target_height", defaults.hires_height},
{"steps", defaults.hires_steps},
{"denoising_strength", defaults.hires_denoising_strength},
{"upscale_tile_size", defaults.hires_upscale_tile_size},
}}, }},
{"vae_tiling_params", make_vae_tiling_json(defaults.vae_tiling_params)}, {"vae_tiling_params", make_vae_tiling_json(defaults.vae_tiling_params)},
{"cache_mode", defaults.cache_mode}, {"cache_mode", defaults.cache_mode},
{"cache_option", defaults.cache_option}, {"cache_option", defaults.cache_option},
{"scm_mask", defaults.scm_mask}, {"scm_mask", defaults.scm_mask},
{"scm_policy_dynamic", defaults.scm_policy_dynamic}, {"scm_policy_dynamic", defaults.scm_policy_dynamic},
{"output_format", "png"}, {"output_format", output_format},
{"output_compression", 100}, {"output_compression", 100},
}; };
}
static json make_vid_gen_defaults_json(const SDGenerationParams& defaults, const std::string& output_format) {
return {
{"prompt", defaults.prompt},
{"negative_prompt", defaults.negative_prompt},
{"clip_skip", defaults.clip_skip},
{"width", defaults.width > 0 ? defaults.width : 512},
{"height", defaults.height > 0 ? defaults.height : 512},
{"strength", defaults.strength},
{"seed", defaults.seed},
{"video_frames", defaults.video_frames},
{"fps", defaults.fps},
{"moe_boundary", defaults.moe_boundary},
{"vace_strength", defaults.vace_strength},
{"sample_params", make_sample_params_json(defaults.sample_params, defaults.skip_layers)},
{"high_noise_sample_params", make_sample_params_json(defaults.high_noise_sample_params, defaults.high_noise_skip_layers)},
{"vae_tiling_params", make_vae_tiling_json(defaults.vae_tiling_params)},
{"cache_mode", defaults.cache_mode},
{"cache_option", defaults.cache_option},
{"scm_mask", defaults.scm_mask},
{"scm_policy_dynamic", defaults.scm_policy_dynamic},
{"output_format", output_format},
{"output_compression", 100},
};
}
static json make_img_gen_features_json() {
return {
{"init_image", true},
{"mask_image", true},
{"control_image", true},
{"ref_images", true},
{"lora", true},
{"vae_tiling", true},
{"hires", true},
{"cache", true},
{"cancel_queued", true},
{"cancel_generating", false},
};
}
static json make_vid_gen_features_json() {
return {
{"init_image", true},
{"end_image", true},
{"control_frames", true},
{"high_noise_sample_params", true},
{"lora", true},
{"vae_tiling", true},
{"cache", true},
{"cancel_queued", true},
{"cancel_generating", false},
};
}
static json make_capabilities_json(ServerRuntime& runtime) {
refresh_lora_cache(runtime);
refresh_upscaler_cache(runtime);
AsyncJobManager& manager = *runtime.async_job_manager;
const auto& defaults = *runtime.default_gen_params;
const fs::path model_path = resolve_display_model_path(runtime);
const bool supports_img = runtime_supports_generation_mode(runtime, IMG_GEN);
const bool supports_vid = runtime_supports_generation_mode(runtime, VID_GEN);
json samplers = json::array();
json schedulers = json::array();
json image_output_formats = supported_img_output_formats();
json video_output_formats = supported_vid_output_formats();
json available_loras = json::array();
json available_upscalers = json::array();
json supported_modes = json::array();
for (int i = 0; i < SAMPLE_METHOD_COUNT; ++i) {
samplers.push_back(sd_sample_method_name((sample_method_t)i));
}
for (int i = 0; i < SCHEDULER_COUNT; ++i) {
schedulers.push_back(sd_scheduler_name((scheduler_t)i));
}
{
std::lock_guard<std::mutex> lock(*runtime.lora_mutex);
for (const auto& entry : *runtime.lora_cache) {
available_loras.push_back({
{"name", entry.name},
{"path", entry.path},
});
}
}
available_upscalers.push_back({
{"name", "None"},
});
available_upscalers.push_back({
{"name", "Lanczos"},
});
available_upscalers.push_back({
{"name", "Nearest"},
});
available_upscalers.push_back({
{"name", "Latent"},
});
available_upscalers.push_back({
{"name", "Latent (nearest)"},
});
available_upscalers.push_back({
{"name", "Latent (nearest-exact)"},
});
available_upscalers.push_back({
{"name", "Latent (antialiased)"},
});
available_upscalers.push_back({
{"name", "Latent (bicubic)"},
});
available_upscalers.push_back({
{"name", "Latent (bicubic antialiased)"},
});
{
std::lock_guard<std::mutex> lock(*runtime.upscaler_mutex);
for (const auto& entry : *runtime.upscaler_cache) {
available_upscalers.push_back({
{"name", entry.name},
});
}
}
if (supports_img) {
supported_modes.push_back("img_gen");
}
if (supports_vid) {
supported_modes.push_back("vid_gen");
}
std::string default_img_output_format = "png";
std::string default_vid_output_format = "avi";
if (!image_output_formats.empty()) {
default_img_output_format = image_output_formats[0].get<std::string>();
}
if (!video_output_formats.empty()) {
default_vid_output_format = video_output_formats[0].get<std::string>();
}
json defaults_by_mode = json::object();
json output_formats_by_mode = json::object();
json features_by_mode = json::object();
if (supports_img) {
defaults_by_mode["img_gen"] = make_img_gen_defaults_json(defaults, default_img_output_format);
output_formats_by_mode["img_gen"] = image_output_formats;
features_by_mode["img_gen"] = make_img_gen_features_json();
}
if (supports_vid) {
defaults_by_mode["vid_gen"] = make_vid_gen_defaults_json(defaults, default_vid_output_format);
output_formats_by_mode["vid_gen"] = video_output_formats;
features_by_mode["vid_gen"] = make_vid_gen_features_json();
}
json top_level_defaults = json::object();
json top_level_output_formats = json::array();
json top_level_features = {
{"cancel_queued", true},
{"cancel_generating", false},
};
std::string current_mode = "";
if (supports_img) {
current_mode = "img_gen";
top_level_defaults = defaults_by_mode["img_gen"];
top_level_output_formats = output_formats_by_mode["img_gen"];
top_level_features = features_by_mode["img_gen"];
} else if (supports_vid) {
current_mode = "vid_gen";
top_level_defaults = defaults_by_mode["vid_gen"];
top_level_output_formats = output_formats_by_mode["vid_gen"];
top_level_features = features_by_mode["vid_gen"];
}
json result;
result["model"] = {
{"name", model_path.filename().u8string()},
{"stem", model_path.stem().u8string()},
{"path", model_path.u8string()},
};
result["current_mode"] = current_mode;
result["supported_modes"] = supported_modes;
result["defaults"] = top_level_defaults;
result["defaults_by_mode"] = defaults_by_mode;
result["limits"] = { result["limits"] = {
{"min_width", 64}, {"min_width", 64},
{"max_width", 4096}, {"max_width", 4096},
@ -168,19 +329,12 @@ static json make_capabilities_json(ServerRuntime& runtime) {
}; };
result["samplers"] = samplers; result["samplers"] = samplers;
result["schedulers"] = schedulers; result["schedulers"] = schedulers;
result["output_formats"] = output_formats; result["output_formats"] = top_level_output_formats;
result["features"] = { result["output_formats_by_mode"] = output_formats_by_mode;
{"init_image", true}, result["features"] = top_level_features;
{"mask_image", true}, result["features_by_mode"] = features_by_mode;
{"control_image", true},
{"ref_images", true},
{"lora", true},
{"vae_tiling", true},
{"cache", true},
{"cancel_queued", true},
{"cancel_generating", false},
};
result["loras"] = available_loras; result["loras"] = available_loras;
result["upscalers"] = available_upscalers;
return result; return result;
} }
@ -204,7 +358,34 @@ static bool parse_img_gen_request(const json& body,
return false; return false;
} }
// Intentionally disable prompt-embedded LoRA tag parsing for server APIs. // Intentionally disable prompt-embedded LoRA tag parsing for server APIs.
if (!request.gen_params.resolve_and_validate(IMG_GEN, "", true)) { if (!request.gen_params.resolve_and_validate(IMG_GEN, "", runtime.ctx_params->hires_upscalers_dir, true)) {
error_message = "invalid generation parameters";
return false;
}
return true;
}
static bool parse_vid_gen_request(const json& body,
ServerRuntime& runtime,
VidGenJobRequest& request,
std::string& error_message) {
request.gen_params = *runtime.default_gen_params;
refresh_lora_cache(runtime);
if (!request.gen_params.from_json_str(body.dump(), [&](const std::string& path) {
return get_lora_full_path(runtime, path);
})) {
error_message = "invalid generation parameters";
return false;
}
std::string output_format = body.value("output_format", "webm");
int output_compression = body.value("output_compression", 100);
if (!assign_output_options(request, output_format, output_compression, error_message)) {
return false;
}
// Intentionally disable prompt-embedded LoRA tag parsing for server APIs.
if (!request.gen_params.resolve_and_validate(VID_GEN, "", runtime.ctx_params->hires_upscalers_dir, true)) {
error_message = "invalid generation parameters"; error_message = "invalid generation parameters";
return false; return false;
} }
@ -226,6 +407,11 @@ void register_sdcpp_api_endpoints(httplib::Server& svr, ServerRuntime& rt) {
res.set_content(R"({"error":"empty body"})", "application/json"); res.set_content(R"({"error":"empty body"})", "application/json");
return; return;
} }
if (!runtime_supports_generation_mode(*runtime, IMG_GEN)) {
res.status = 400;
res.set_content(json({{"error", unsupported_generation_mode_error(IMG_GEN)}}).dump(), "application/json");
return;
}
json body = json::parse(req.body); json body = json::parse(req.body);
ImgGenJobRequest request; ImgGenJobRequest request;
@ -276,9 +462,66 @@ void register_sdcpp_api_endpoints(httplib::Server& svr, ServerRuntime& rt) {
} }
}); });
svr.Post("/sdcpp/v1/vid_gen", [](const httplib::Request&, httplib::Response& res) { svr.Post("/sdcpp/v1/vid_gen", [runtime](const httplib::Request& req, httplib::Response& res) {
res.status = 501; try {
res.set_content(R"({"error":"vid_gen is reserved and not implemented yet"})", "application/json"); if (req.body.empty()) {
res.status = 400;
res.set_content(R"({"error":"empty body"})", "application/json");
return;
}
if (!runtime_supports_generation_mode(*runtime, VID_GEN)) {
res.status = 400;
res.set_content(json({{"error", unsupported_generation_mode_error(VID_GEN)}}).dump(), "application/json");
return;
}
json body = json::parse(req.body);
VidGenJobRequest request;
std::string error_message;
if (!parse_vid_gen_request(body, *runtime, request, error_message)) {
res.status = 400;
res.set_content(json({{"error", error_message}}).dump(), "application/json");
return;
}
AsyncJobManager& manager = *runtime->async_job_manager;
std::shared_ptr<AsyncGenerationJob> job = std::make_shared<AsyncGenerationJob>();
job->kind = AsyncJobKind::VidGen;
job->status = AsyncJobStatus::Queued;
job->created_at = unix_timestamp_now();
job->vid_gen = std::move(request);
{
std::lock_guard<std::mutex> lock(manager.mutex);
purge_expired_jobs(manager);
if (count_pending_jobs(manager) >= manager.max_pending_jobs) {
res.status = 429;
res.set_content(R"({"error":"job queue is full"})", "application/json");
return;
}
job->id = make_async_job_id(manager);
manager.jobs[job->id] = job;
manager.queue.push_back(job->id);
}
manager.cv.notify_one();
json out;
out["id"] = job->id;
out["kind"] = async_job_kind_name(job->kind);
out["status"] = async_job_status_name(job->status);
out["created"] = job->created_at;
out["poll_url"] = "/sdcpp/v1/jobs/" + job->id;
res.status = 202;
res.set_content(out.dump(), "application/json");
} catch (const json::parse_error& e) {
res.status = 400;
res.set_content(json({{"error", "invalid json"}, {"message", e.what()}}).dump(), "application/json");
} catch (const std::exception& e) {
res.status = 500;
res.set_content(json({{"error", "server_error"}, {"message", e.what()}}).dump(), "application/json");
}
}); });
svr.Get(R"(/sdcpp/v1/jobs/([A-Za-z0-9_\-]+))", [runtime](const httplib::Request& req, httplib::Response& res) { svr.Get(R"(/sdcpp/v1/jobs/([A-Za-z0-9_\-]+))", [runtime](const httplib::Request& req, httplib::Response& res) {

View File

@ -1,6 +1,7 @@
#include "runtime.h" #include "runtime.h"
#include <algorithm> #include <algorithm>
#include <cctype>
#include <chrono> #include <chrono>
#include <cstdlib> #include <cstdlib>
#include <filesystem> #include <filesystem>
@ -13,6 +14,18 @@
namespace fs = std::filesystem; namespace fs = std::filesystem;
static std::string lower_ascii(std::string value) {
std::transform(value.begin(), value.end(), value.begin(), [](unsigned char c) {
return static_cast<char>(std::tolower(c));
});
return value;
}
static bool is_supported_model_ext(const fs::path& p) {
auto ext = lower_ascii(p.extension().string());
return ext == ".gguf" || ext == ".pt" || ext == ".pth" || ext == ".safetensors";
}
static const std::string k_base64_chars = static const std::string k_base64_chars =
"ABCDEFGHIJKLMNOPQRSTUVWXYZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
"abcdefghijklmnopqrstuvwxyz" "abcdefghijklmnopqrstuvwxyz"
@ -45,6 +58,44 @@ std::string normalize_output_format(std::string output_format) {
return output_format; return output_format;
} }
std::vector<std::string> supported_img_output_formats(bool allow_webp) {
std::vector<std::string> formats = {"png", "jpeg"};
#ifdef SD_USE_WEBP
if (allow_webp) {
formats.push_back("webp");
}
#else
(void)allow_webp;
#endif
return formats;
}
std::vector<std::string> supported_vid_output_formats() {
std::vector<std::string> formats;
#ifdef SD_USE_WEBM
formats.push_back("webm");
#endif
#ifdef SD_USE_WEBP
formats.push_back("webp");
#endif
formats.push_back("avi");
return formats;
}
static std::string valid_vid_output_formats_message() {
const std::vector<std::string> formats = supported_vid_output_formats();
std::string message = "invalid output_format, must be one of [";
for (size_t i = 0; i < formats.size(); ++i) {
if (i > 0) {
message += ", ";
}
message += formats[i];
}
message += "]";
return message;
}
bool assign_output_options(ImgGenJobRequest& request, bool assign_output_options(ImgGenJobRequest& request,
std::string output_format, std::string output_format,
int output_compression, int output_compression,
@ -53,19 +104,88 @@ bool assign_output_options(ImgGenJobRequest& request,
request.output_format = normalize_output_format(std::move(output_format)); request.output_format = normalize_output_format(std::move(output_format));
request.output_compression = std::clamp(output_compression, 0, 100); request.output_compression = std::clamp(output_compression, 0, 100);
const bool valid_format = request.output_format == "png" || const std::vector<std::string> valid_formats = supported_img_output_formats(allow_webp);
request.output_format == "jpeg" || const bool valid_format = std::find(valid_formats.begin(),
(allow_webp && request.output_format == "webp"); valid_formats.end(),
request.output_format) != valid_formats.end();
if (!valid_format) { if (!valid_format) {
error_message = allow_webp error_message = "invalid output_format, must be one of [";
? "invalid output_format, must be one of [png, jpeg, webp]" for (size_t i = 0; i < valid_formats.size(); ++i) {
: "invalid output_format, must be one of [png, jpeg]"; if (i > 0) {
error_message += ", ";
}
error_message += valid_formats[i];
}
error_message += "]";
return false; return false;
} }
return true; return true;
} }
bool assign_output_options(VidGenJobRequest& request,
std::string output_format,
int output_compression,
std::string& error_message) {
request.output_format = normalize_output_format(std::move(output_format));
request.output_compression = std::clamp(output_compression, 0, 100);
if (request.output_format == "avi") {
return true;
}
if (request.output_format == "webm") {
#ifdef SD_USE_WEBM
return true;
#else
error_message = valid_vid_output_formats_message();
return false;
#endif
}
if (request.output_format == "webp") {
#ifdef SD_USE_WEBP
return true;
#else
error_message = valid_vid_output_formats_message();
return false;
#endif
}
error_message = valid_vid_output_formats_message();
return false;
}
std::string video_mime_type(const std::string& output_format) {
if (output_format == "webm") {
return "video/webm";
}
if (output_format == "webp") {
return "image/webp";
}
return "video/x-msvideo";
}
bool runtime_supports_generation_mode(const ServerRuntime& runtime, SDMode mode) {
if (mode == VID_GEN) {
return sd_ctx_supports_video_generation(runtime.sd_ctx);
}
if (mode == IMG_GEN) {
return sd_ctx_supports_image_generation(runtime.sd_ctx);
}
return true;
}
std::string unsupported_generation_mode_error(SDMode mode) {
if (mode == VID_GEN) {
return "loaded model does not support vid_gen";
}
if (mode == IMG_GEN) {
return "loaded model does not support img_gen";
}
return "loaded model does not support requested mode";
}
ArgOptions SDSvrParams::get_options() { ArgOptions SDSvrParams::get_options() {
ArgOptions options; ArgOptions options;
@ -134,20 +254,12 @@ void refresh_lora_cache(ServerRuntime& rt) {
fs::path lora_dir = rt.ctx_params->lora_model_dir; fs::path lora_dir = rt.ctx_params->lora_model_dir;
if (fs::exists(lora_dir) && fs::is_directory(lora_dir)) { if (fs::exists(lora_dir) && fs::is_directory(lora_dir)) {
auto is_lora_ext = [](const fs::path& p) {
auto ext = p.extension().string();
std::transform(ext.begin(), ext.end(), ext.begin(), [](unsigned char c) {
return static_cast<char>(std::tolower(c));
});
return ext == ".gguf" || ext == ".pt" || ext == ".pth" || ext == ".safetensors";
};
for (auto& entry : fs::recursive_directory_iterator(lora_dir)) { for (auto& entry : fs::recursive_directory_iterator(lora_dir)) {
if (!entry.is_regular_file()) { if (!entry.is_regular_file()) {
continue; continue;
} }
const fs::path& p = entry.path(); const fs::path& p = entry.path();
if (!is_lora_ext(p)) { if (!is_supported_model_ext(p)) {
continue; continue;
} }
@ -179,6 +291,40 @@ std::string get_lora_full_path(ServerRuntime& rt, const std::string& path) {
return it != rt.lora_cache->end() ? it->fullpath : ""; return it != rt.lora_cache->end() ? it->fullpath : "";
} }
void refresh_upscaler_cache(ServerRuntime& rt) {
std::vector<UpscalerEntry> new_cache;
fs::path upscaler_dir = rt.ctx_params->hires_upscalers_dir;
if (fs::exists(upscaler_dir) && fs::is_directory(upscaler_dir)) {
for (auto& entry : fs::directory_iterator(upscaler_dir)) {
if (!entry.is_regular_file()) {
continue;
}
const fs::path& p = entry.path();
if (!is_supported_model_ext(p)) {
continue;
}
UpscalerEntry upscaler_entry;
upscaler_entry.name = p.stem().u8string();
upscaler_entry.fullpath = fs::absolute(p).lexically_normal().u8string();
upscaler_entry.model_name = "ESRGAN_4x";
upscaler_entry.path = p.filename().u8string();
new_cache.push_back(std::move(upscaler_entry));
}
}
std::sort(new_cache.begin(), new_cache.end(), [](const UpscalerEntry& a, const UpscalerEntry& b) {
return a.name < b.name;
});
{
std::lock_guard<std::mutex> lock(*rt.upscaler_mutex);
*rt.upscaler_cache = std::move(new_cache);
}
}
int64_t unix_timestamp_now() { int64_t unix_timestamp_now() {
return std::chrono::duration_cast<std::chrono::seconds>( return std::chrono::duration_cast<std::chrono::seconds>(
std::chrono::system_clock::now().time_since_epoch()) std::chrono::system_clock::now().time_since_epoch())

View File

@ -37,6 +37,14 @@ struct LoraEntry {
std::string fullpath; std::string fullpath;
}; };
struct UpscalerEntry {
std::string name;
std::string path;
std::string fullpath;
std::string model_name;
int scale = 4;
};
struct ServerRuntime { struct ServerRuntime {
sd_ctx_t* sd_ctx; sd_ctx_t* sd_ctx;
std::mutex* sd_ctx_mutex; std::mutex* sd_ctx_mutex;
@ -45,6 +53,8 @@ struct ServerRuntime {
const SDGenerationParams* default_gen_params; const SDGenerationParams* default_gen_params;
std::vector<LoraEntry>* lora_cache; std::vector<LoraEntry>* lora_cache;
std::mutex* lora_mutex; std::mutex* lora_mutex;
std::vector<UpscalerEntry>* upscaler_cache;
std::mutex* upscaler_mutex;
AsyncJobManager* async_job_manager; AsyncJobManager* async_job_manager;
}; };
@ -58,13 +68,33 @@ struct ImgGenJobRequest {
} }
}; };
struct VidGenJobRequest {
SDGenerationParams gen_params;
std::string output_format = "webm";
int output_compression = 100;
sd_vid_gen_params_t to_sd_vid_gen_params_t() {
return gen_params.to_sd_vid_gen_params_t();
}
};
std::string base64_encode(const std::vector<uint8_t>& bytes); std::string base64_encode(const std::vector<uint8_t>& bytes);
std::string normalize_output_format(std::string output_format); std::string normalize_output_format(std::string output_format);
std::vector<std::string> supported_img_output_formats(bool allow_webp = true);
std::vector<std::string> supported_vid_output_formats();
bool assign_output_options(ImgGenJobRequest& request, bool assign_output_options(ImgGenJobRequest& request,
std::string output_format, std::string output_format,
int output_compression, int output_compression,
bool allow_webp, bool allow_webp,
std::string& error_message); std::string& error_message);
bool assign_output_options(VidGenJobRequest& request,
std::string output_format,
int output_compression,
std::string& error_message);
std::string video_mime_type(const std::string& output_format);
bool runtime_supports_generation_mode(const ServerRuntime& runtime, SDMode mode);
std::string unsupported_generation_mode_error(SDMode mode);
void refresh_lora_cache(ServerRuntime& rt); void refresh_lora_cache(ServerRuntime& rt);
std::string get_lora_full_path(ServerRuntime& rt, const std::string& path); std::string get_lora_full_path(ServerRuntime& rt, const std::string& path);
void refresh_upscaler_cache(ServerRuntime& rt);
int64_t unix_timestamp_now(); int64_t unix_timestamp_now();

View File

@ -1,5 +1,5 @@
for f in src/*.cpp src/*.h src/*.hpp src/tokenizers/*.h src/tokenizers/*.cpp src/tokenizers/vocab/*.h src/tokenizers/vocab/*.cpp \ for f in src/*.cpp src/*.h src/*.hpp src/tokenizers/*.h src/tokenizers/*.cpp src/tokenizers/vocab/*.h src/tokenizers/vocab/*.cpp \
examples/cli/*.cpp examples/cli/*.h examples/server/*.cpp \ src/model_io/*.h src/model_io/*.cpp examples/cli/*.cpp examples/cli/*.h examples/server/*.cpp \
examples/common/*.hpp examples/common/*.h examples/common/*.cpp; do examples/common/*.hpp examples/common/*.h examples/common/*.cpp; do
[[ "$f" == vocab* ]] && continue [[ "$f" == vocab* ]] && continue
echo "formatting '$f'" echo "formatting '$f'"

View File

@ -203,6 +203,7 @@ typedef struct {
bool chroma_use_t5_mask; bool chroma_use_t5_mask;
int chroma_t5_mask_pad; int chroma_t5_mask_pad;
bool qwen_image_zero_cond_t; bool qwen_image_zero_cond_t;
float max_vram;
} sd_ctx_params_t; } sd_ctx_params_t;
typedef struct { typedef struct {
@ -289,6 +290,32 @@ typedef struct {
const char* path; const char* path;
} sd_lora_t; } sd_lora_t;
enum sd_hires_upscaler_t {
SD_HIRES_UPSCALER_NONE,
SD_HIRES_UPSCALER_LATENT,
SD_HIRES_UPSCALER_LATENT_NEAREST,
SD_HIRES_UPSCALER_LATENT_NEAREST_EXACT,
SD_HIRES_UPSCALER_LATENT_ANTIALIASED,
SD_HIRES_UPSCALER_LATENT_BICUBIC,
SD_HIRES_UPSCALER_LATENT_BICUBIC_ANTIALIASED,
SD_HIRES_UPSCALER_LANCZOS,
SD_HIRES_UPSCALER_NEAREST,
SD_HIRES_UPSCALER_MODEL,
SD_HIRES_UPSCALER_COUNT,
};
typedef struct {
bool enabled;
enum sd_hires_upscaler_t upscaler;
const char* model_path;
float scale;
int target_width;
int target_height;
int steps;
float denoising_strength;
int upscale_tile_size;
} sd_hires_params_t;
typedef struct { typedef struct {
const sd_lora_t* loras; const sd_lora_t* loras;
uint32_t lora_count; uint32_t lora_count;
@ -312,6 +339,7 @@ typedef struct {
sd_pm_params_t pm_params; sd_pm_params_t pm_params;
sd_tiling_params_t vae_tiling_params; sd_tiling_params_t vae_tiling_params;
sd_cache_params_t cache; sd_cache_params_t cache;
sd_hires_params_t hires;
} sd_img_gen_params_t; } sd_img_gen_params_t;
typedef struct { typedef struct {
@ -348,6 +376,8 @@ SD_API void sd_set_progress_callback(sd_progress_cb_t cb, void* data);
SD_API void sd_set_preview_callback(sd_preview_cb_t cb, enum preview_t mode, int interval, bool denoised, bool noisy, void* data); SD_API void sd_set_preview_callback(sd_preview_cb_t cb, enum preview_t mode, int interval, bool denoised, bool noisy, void* data);
SD_API int32_t sd_get_num_physical_cores(); SD_API int32_t sd_get_num_physical_cores();
SD_API const char* sd_get_system_info(); SD_API const char* sd_get_system_info();
SD_API bool sd_ctx_supports_image_generation(const sd_ctx_t* sd_ctx);
SD_API bool sd_ctx_supports_video_generation(const sd_ctx_t* sd_ctx);
SD_API const char* sd_type_name(enum sd_type_t type); SD_API const char* sd_type_name(enum sd_type_t type);
SD_API enum sd_type_t str_to_sd_type(const char* str); SD_API enum sd_type_t str_to_sd_type(const char* str);
@ -363,8 +393,11 @@ SD_API const char* sd_preview_name(enum preview_t preview);
SD_API enum preview_t str_to_preview(const char* str); SD_API enum preview_t str_to_preview(const char* str);
SD_API const char* sd_lora_apply_mode_name(enum lora_apply_mode_t mode); SD_API const char* sd_lora_apply_mode_name(enum lora_apply_mode_t mode);
SD_API enum lora_apply_mode_t str_to_lora_apply_mode(const char* str); SD_API enum lora_apply_mode_t str_to_lora_apply_mode(const char* str);
SD_API const char* sd_hires_upscaler_name(enum sd_hires_upscaler_t upscaler);
SD_API enum sd_hires_upscaler_t str_to_sd_hires_upscaler(const char* str);
SD_API void sd_cache_params_init(sd_cache_params_t* cache_params); SD_API void sd_cache_params_init(sd_cache_params_t* cache_params);
SD_API void sd_hires_params_init(sd_hires_params_t* hires_params);
SD_API void sd_ctx_params_init(sd_ctx_params_t* sd_ctx_params); SD_API void sd_ctx_params_init(sd_ctx_params_t* sd_ctx_params);
SD_API char* sd_ctx_params_to_str(const sd_ctx_params_t* sd_ctx_params); SD_API char* sd_ctx_params_to_str(const sd_ctx_params_t* sd_ctx_params);

View File

@ -499,9 +499,15 @@ namespace Anima {
encoder_hidden_states = adapted_context; encoder_hidden_states = adapted_context;
} }
sd::ggml_graph_cut::mark_graph_cut(x, "anima.prelude", "x");
sd::ggml_graph_cut::mark_graph_cut(embedded_timestep, "anima.prelude", "embedded_timestep");
sd::ggml_graph_cut::mark_graph_cut(temb, "anima.prelude", "temb");
sd::ggml_graph_cut::mark_graph_cut(encoder_hidden_states, "anima.prelude", "context");
for (int i = 0; i < num_layers; i++) { for (int i = 0; i < num_layers; i++) {
auto block = std::dynamic_pointer_cast<TransformerBlock>(blocks["blocks." + std::to_string(i)]); auto block = std::dynamic_pointer_cast<TransformerBlock>(blocks["blocks." + std::to_string(i)]);
x = block->forward(ctx, x, encoder_hidden_states, embedded_timestep, temb, image_pe); x = block->forward(ctx, x, encoder_hidden_states, embedded_timestep, temb, image_pe);
sd::ggml_graph_cut::mark_graph_cut(x, "anima.blocks." + std::to_string(i), "x");
} }
x = final_layer->forward(ctx, x, embedded_timestep, temb); // [N, h*w, ph*pw*C] x = final_layer->forward(ctx, x, embedded_timestep, temb); // [N, h*w, ph*pw*C]

View File

@ -328,6 +328,7 @@ public:
auto conv_out = std::dynamic_pointer_cast<Conv2d>(blocks["conv_out"]); auto conv_out = std::dynamic_pointer_cast<Conv2d>(blocks["conv_out"]);
auto h = conv_in->forward(ctx, x); // [N, ch, h, w] auto h = conv_in->forward(ctx, x); // [N, ch, h, w]
// sd::ggml_graph_cut::mark_graph_cut(h, "vae.encoder.prelude", "h");
// downsampling // downsampling
size_t num_resolutions = ch_mult.size(); size_t num_resolutions = ch_mult.size();
@ -337,12 +338,14 @@ public:
auto down_block = std::dynamic_pointer_cast<ResnetBlock>(blocks[name]); auto down_block = std::dynamic_pointer_cast<ResnetBlock>(blocks[name]);
h = down_block->forward(ctx, h); h = down_block->forward(ctx, h);
// sd::ggml_graph_cut::mark_graph_cut(h, "vae.encoder.down." + std::to_string(i) + ".block." + std::to_string(j), "h");
} }
if (i != num_resolutions - 1) { if (i != num_resolutions - 1) {
std::string name = "down." + std::to_string(i) + ".downsample"; std::string name = "down." + std::to_string(i) + ".downsample";
auto down_sample = std::dynamic_pointer_cast<DownSampleBlock>(blocks[name]); auto down_sample = std::dynamic_pointer_cast<DownSampleBlock>(blocks[name]);
h = down_sample->forward(ctx, h); h = down_sample->forward(ctx, h);
// sd::ggml_graph_cut::mark_graph_cut(h, "vae.encoder.down." + std::to_string(i) + ".downsample", "h");
} }
} }
@ -350,6 +353,7 @@ public:
h = mid_block_1->forward(ctx, h); h = mid_block_1->forward(ctx, h);
h = mid_attn_1->forward(ctx, h); h = mid_attn_1->forward(ctx, h);
h = mid_block_2->forward(ctx, h); // [N, block_in, h, w] h = mid_block_2->forward(ctx, h); // [N, block_in, h, w]
// sd::ggml_graph_cut::mark_graph_cut(h, "vae.encoder.mid", "h");
// end // end
h = norm_out->forward(ctx, h); h = norm_out->forward(ctx, h);
@ -450,6 +454,7 @@ public:
// conv_in // conv_in
auto h = conv_in->forward(ctx, z); // [N, block_in, h, w] auto h = conv_in->forward(ctx, z); // [N, block_in, h, w]
// sd::ggml_graph_cut::mark_graph_cut(h, "vae.decoder.prelude", "h");
// middle // middle
h = mid_block_1->forward(ctx, h); h = mid_block_1->forward(ctx, h);
@ -457,6 +462,7 @@ public:
h = mid_attn_1->forward(ctx, h); h = mid_attn_1->forward(ctx, h);
h = mid_block_2->forward(ctx, h); // [N, block_in, h, w] h = mid_block_2->forward(ctx, h); // [N, block_in, h, w]
// sd::ggml_graph_cut::mark_graph_cut(h, "vae.decoder.mid", "h");
// upsampling // upsampling
int num_resolutions = static_cast<int>(ch_mult.size()); int num_resolutions = static_cast<int>(ch_mult.size());
@ -466,12 +472,14 @@ public:
auto up_block = std::dynamic_pointer_cast<ResnetBlock>(blocks[name]); auto up_block = std::dynamic_pointer_cast<ResnetBlock>(blocks[name]);
h = up_block->forward(ctx, h); h = up_block->forward(ctx, h);
// sd::ggml_graph_cut::mark_graph_cut(h, "vae.decoder.up." + std::to_string(i) + ".block." + std::to_string(j), "h");
} }
if (i != 0) { if (i != 0) {
std::string name = "up." + std::to_string(i) + ".upsample"; std::string name = "up." + std::to_string(i) + ".upsample";
auto up_sample = std::dynamic_pointer_cast<UpSampleBlock>(blocks[name]); auto up_sample = std::dynamic_pointer_cast<UpSampleBlock>(blocks[name]);
h = up_sample->forward(ctx, h); h = up_sample->forward(ctx, h);
// sd::ggml_graph_cut::mark_graph_cut(h, "vae.decoder.up." + std::to_string(i) + ".upsample", "h");
} }
} }
@ -599,6 +607,7 @@ public:
if (use_quant) { if (use_quant) {
auto post_quant_conv = std::dynamic_pointer_cast<Conv2d>(blocks["post_quant_conv"]); auto post_quant_conv = std::dynamic_pointer_cast<Conv2d>(blocks["post_quant_conv"]);
z = post_quant_conv->forward(ctx, z); // [N, z_channels, h, w] z = post_quant_conv->forward(ctx, z); // [N, z_channels, h, w]
// sd::ggml_graph_cut::mark_graph_cut(z, "vae.decode.prelude", "z");
} }
auto decoder = std::dynamic_pointer_cast<Decoder>(blocks["decoder"]); auto decoder = std::dynamic_pointer_cast<Decoder>(blocks["decoder"]);
@ -616,6 +625,7 @@ public:
if (use_quant) { if (use_quant) {
auto quant_conv = std::dynamic_pointer_cast<Conv2d>(blocks["quant_conv"]); auto quant_conv = std::dynamic_pointer_cast<Conv2d>(blocks["quant_conv"]);
z = quant_conv->forward(ctx, z); // [N, 2*embed_dim, h/8, w/8] z = quant_conv->forward(ctx, z); // [N, 2*embed_dim, h/8, w/8]
// sd::ggml_graph_cut::mark_graph_cut(z, "vae.encode.final", "z");
} }
if (sd_version_uses_flux2_vae(version)) { if (sd_version_uses_flux2_vae(version)) {
z = ggml_ext_chunk(ctx->ggml_ctx, z, 2, 2)[0]; z = ggml_ext_chunk(ctx->ggml_ctx, z, 2, 2)[0];

View File

@ -96,7 +96,8 @@ public:
ggml_tensor* forward(GGMLRunnerContext* ctx, ggml_tensor* forward(GGMLRunnerContext* ctx,
ggml_tensor* x, ggml_tensor* x,
ggml_tensor* mask = nullptr, ggml_tensor* mask = nullptr,
int clip_skip = -1) { int clip_skip = -1,
const std::string& graph_cut_prefix = "") {
// x: [N, n_token, d_model] // x: [N, n_token, d_model]
int layer_idx = n_layer - 1; int layer_idx = n_layer - 1;
// LOG_DEBUG("clip_skip %d", clip_skip); // LOG_DEBUG("clip_skip %d", clip_skip);
@ -112,6 +113,9 @@ public:
std::string name = "layers." + std::to_string(i); std::string name = "layers." + std::to_string(i);
auto layer = std::dynamic_pointer_cast<CLIPLayer>(blocks[name]); auto layer = std::dynamic_pointer_cast<CLIPLayer>(blocks[name]);
x = layer->forward(ctx, x, mask); // [N, n_token, d_model] x = layer->forward(ctx, x, mask); // [N, n_token, d_model]
if (!graph_cut_prefix.empty()) {
sd::ggml_graph_cut::mark_graph_cut(x, graph_cut_prefix + ".layers." + std::to_string(i), "x");
}
// LOG_DEBUG("layer %d", i); // LOG_DEBUG("layer %d", i);
} }
return x; return x;
@ -304,7 +308,8 @@ public:
auto final_layer_norm = std::dynamic_pointer_cast<LayerNorm>(blocks["final_layer_norm"]); auto final_layer_norm = std::dynamic_pointer_cast<LayerNorm>(blocks["final_layer_norm"]);
auto x = embeddings->forward(ctx, input_ids, tkn_embeddings); // [N, n_token, hidden_size] auto x = embeddings->forward(ctx, input_ids, tkn_embeddings); // [N, n_token, hidden_size]
x = encoder->forward(ctx, x, mask, return_pooled ? -1 : clip_skip); sd::ggml_graph_cut::mark_graph_cut(x, "clip_text.prelude", "x");
x = encoder->forward(ctx, x, mask, return_pooled ? -1 : clip_skip, "clip_text");
if (return_pooled || with_final_ln) { if (return_pooled || with_final_ln) {
x = final_layer_norm->forward(ctx, x); x = final_layer_norm->forward(ctx, x);
} }
@ -368,7 +373,8 @@ public:
auto x = embeddings->forward(ctx, pixel_values); // [N, num_positions, embed_dim] auto x = embeddings->forward(ctx, pixel_values); // [N, num_positions, embed_dim]
x = pre_layernorm->forward(ctx, x); x = pre_layernorm->forward(ctx, x);
x = encoder->forward(ctx, x, nullptr, clip_skip); sd::ggml_graph_cut::mark_graph_cut(x, "clip_vision.prelude", "x");
x = encoder->forward(ctx, x, nullptr, clip_skip, "clip_vision");
auto last_hidden_state = x; auto last_hidden_state = x;

View File

@ -1,7 +1,9 @@
#ifndef __COMMON_BLOCK_HPP__ #ifndef __COMMON_BLOCK_HPP__
#define __COMMON_BLOCK_HPP__ #define __COMMON_BLOCK_HPP__
#include "ggml-backend.h"
#include "ggml_extend.hpp" #include "ggml_extend.hpp"
#include "util.h"
class DownSampleBlock : public GGMLBlock { class DownSampleBlock : public GGMLBlock {
protected: protected:
@ -248,9 +250,6 @@ public:
float scale = 1.f; float scale = 1.f;
if (precision_fix) { if (precision_fix) {
scale = 1.f / 128.f; scale = 1.f / 128.f;
#ifdef SD_USE_VULKAN
force_prec_f32 = true;
#endif
} }
// The purpose of the scale here is to prevent NaN issues in certain situations. // The purpose of the scale here is to prevent NaN issues in certain situations.
// For example, when using Vulkan without enabling force_prec_f32, // For example, when using Vulkan without enabling force_prec_f32,
@ -264,6 +263,9 @@ public:
auto net_0 = std::dynamic_pointer_cast<UnaryBlock>(blocks["net.0"]); auto net_0 = std::dynamic_pointer_cast<UnaryBlock>(blocks["net.0"]);
auto net_2 = std::dynamic_pointer_cast<Linear>(blocks["net.2"]); auto net_2 = std::dynamic_pointer_cast<Linear>(blocks["net.2"]);
if (sd_backend_is(ctx->backend, "Vulkan")) {
net_2->set_force_prec_f32(true);
}
x = net_0->forward(ctx, x); // [ne3, ne2, ne1, inner_dim] x = net_0->forward(ctx, x); // [ne3, ne2, ne1, inner_dim]
x = net_2->forward(ctx, x); // [ne3, ne2, ne1, dim_out] x = net_2->forward(ctx, x); // [ne3, ne2, ne1, dim_out]

View File

@ -85,6 +85,7 @@ public:
virtual void free_params_buffer() = 0; virtual void free_params_buffer() = 0;
virtual void get_param_tensors(std::map<std::string, ggml_tensor*>& tensors) = 0; virtual void get_param_tensors(std::map<std::string, ggml_tensor*>& tensors) = 0;
virtual size_t get_params_buffer_size() = 0; virtual size_t get_params_buffer_size() = 0;
virtual void set_max_graph_vram_bytes(size_t max_vram_bytes) {}
virtual void set_flash_attention_enabled(bool enabled) = 0; virtual void set_flash_attention_enabled(bool enabled) = 0;
virtual void set_weight_adapter(const std::shared_ptr<WeightAdapter>& adapter) {} virtual void set_weight_adapter(const std::shared_ptr<WeightAdapter>& adapter) {}
virtual std::tuple<SDCondition, std::vector<bool>> get_learned_condition_with_trigger(int n_threads, virtual std::tuple<SDCondition, std::vector<bool>> get_learned_condition_with_trigger(int n_threads,
@ -165,6 +166,13 @@ struct FrozenCLIPEmbedderWithCustomWords : public Conditioner {
return buffer_size; return buffer_size;
} }
void set_max_graph_vram_bytes(size_t max_vram_bytes) override {
text_model->set_max_graph_vram_bytes(max_vram_bytes);
if (sd_version_is_sdxl(version)) {
text_model2->set_max_graph_vram_bytes(max_vram_bytes);
}
}
void set_flash_attention_enabled(bool enabled) override { void set_flash_attention_enabled(bool enabled) override {
text_model->set_flash_attention_enabled(enabled); text_model->set_flash_attention_enabled(enabled);
if (sd_version_is_sdxl(version)) { if (sd_version_is_sdxl(version)) {
@ -781,6 +789,18 @@ struct SD3CLIPEmbedder : public Conditioner {
return buffer_size; return buffer_size;
} }
void set_max_graph_vram_bytes(size_t max_vram_bytes) override {
if (clip_l) {
clip_l->set_max_graph_vram_bytes(max_vram_bytes);
}
if (clip_g) {
clip_g->set_max_graph_vram_bytes(max_vram_bytes);
}
if (t5) {
t5->set_max_graph_vram_bytes(max_vram_bytes);
}
}
void set_flash_attention_enabled(bool enabled) override { void set_flash_attention_enabled(bool enabled) override {
if (clip_l) { if (clip_l) {
clip_l->set_flash_attention_enabled(enabled); clip_l->set_flash_attention_enabled(enabled);
@ -1124,6 +1144,15 @@ struct FluxCLIPEmbedder : public Conditioner {
return buffer_size; return buffer_size;
} }
void set_max_graph_vram_bytes(size_t max_vram_bytes) override {
if (clip_l) {
clip_l->set_max_graph_vram_bytes(max_vram_bytes);
}
if (t5) {
t5->set_max_graph_vram_bytes(max_vram_bytes);
}
}
void set_flash_attention_enabled(bool enabled) override { void set_flash_attention_enabled(bool enabled) override {
if (clip_l) { if (clip_l) {
clip_l->set_flash_attention_enabled(enabled); clip_l->set_flash_attention_enabled(enabled);
@ -1349,6 +1378,12 @@ struct T5CLIPEmbedder : public Conditioner {
return buffer_size; return buffer_size;
} }
void set_max_graph_vram_bytes(size_t max_vram_bytes) override {
if (t5) {
t5->set_max_graph_vram_bytes(max_vram_bytes);
}
}
void set_flash_attention_enabled(bool enabled) override { void set_flash_attention_enabled(bool enabled) override {
if (t5) { if (t5) {
t5->set_flash_attention_enabled(enabled); t5->set_flash_attention_enabled(enabled);
@ -1525,6 +1560,10 @@ struct AnimaConditioner : public Conditioner {
return llm->get_params_buffer_size(); return llm->get_params_buffer_size();
} }
void set_max_graph_vram_bytes(size_t max_vram_bytes) override {
llm->set_max_graph_vram_bytes(max_vram_bytes);
}
void set_flash_attention_enabled(bool enabled) override { void set_flash_attention_enabled(bool enabled) override {
llm->set_flash_attention_enabled(enabled); llm->set_flash_attention_enabled(enabled);
} }
@ -1657,6 +1696,10 @@ struct LLMEmbedder : public Conditioner {
return buffer_size; return buffer_size;
} }
void set_max_graph_vram_bytes(size_t max_vram_bytes) override {
llm->set_max_graph_vram_bytes(max_vram_bytes);
}
void set_flash_attention_enabled(bool enabled) override { void set_flash_attention_enabled(bool enabled) override {
llm->set_flash_attention_enabled(enabled); llm->set_flash_attention_enabled(enabled);
} }

138
src/convert.cpp Normal file
View File

@ -0,0 +1,138 @@
#include <cstring>
#include <mutex>
#include <regex>
#include <vector>
#include "model.h"
#include "model_io/gguf_io.h"
#include "model_io/safetensors_io.h"
#include "util.h"
#include "ggml-cpu.h"
static ggml_type get_export_tensor_type(ModelLoader& model_loader,
const TensorStorage& tensor_storage,
ggml_type type,
const TensorTypeRules& tensor_type_rules) {
const std::string& name = tensor_storage.name;
ggml_type tensor_type = tensor_storage.type;
ggml_type dst_type = type;
for (const auto& tensor_type_rule : tensor_type_rules) {
std::regex pattern(tensor_type_rule.first);
if (std::regex_search(name, pattern)) {
dst_type = tensor_type_rule.second;
break;
}
}
if (model_loader.tensor_should_be_converted(tensor_storage, dst_type)) {
tensor_type = dst_type;
}
return tensor_type;
}
static bool load_tensors_for_export(ModelLoader& model_loader,
ggml_context* ggml_ctx,
ggml_type type,
const TensorTypeRules& tensor_type_rules,
std::vector<TensorWriteInfo>& tensors) {
std::mutex tensor_mutex;
auto on_new_tensor_cb = [&](const TensorStorage& tensor_storage, ggml_tensor** dst_tensor) -> bool {
const std::string& name = tensor_storage.name;
ggml_type tensor_type = get_export_tensor_type(model_loader, tensor_storage, type, tensor_type_rules);
std::lock_guard<std::mutex> lock(tensor_mutex);
ggml_tensor* tensor = ggml_new_tensor(ggml_ctx, tensor_type, tensor_storage.n_dims, tensor_storage.ne);
if (tensor == nullptr) {
LOG_ERROR("ggml_new_tensor failed");
return false;
}
ggml_set_name(tensor, name.c_str());
if (!tensor->data) {
GGML_ASSERT(ggml_nelements(tensor) == 0);
// Avoid crashing writers by setting a dummy pointer for zero-sized tensors.
LOG_DEBUG("setting dummy pointer for zero-sized tensor %s", name.c_str());
tensor->data = ggml_get_mem_buffer(ggml_ctx);
}
TensorWriteInfo write_info;
write_info.tensor = tensor;
write_info.n_dims = tensor_storage.n_dims;
for (int i = 0; i < tensor_storage.n_dims; ++i) {
write_info.ne[i] = tensor_storage.ne[i];
}
*dst_tensor = tensor;
tensors.push_back(std::move(write_info));
return true;
};
bool success = model_loader.load_tensors(on_new_tensor_cb);
LOG_INFO("load tensors done");
return success;
}
bool convert(const char* input_path,
const char* vae_path,
const char* output_path,
sd_type_t output_type,
const char* tensor_type_rules,
bool convert_name) {
ModelLoader model_loader;
if (!model_loader.init_from_file(input_path)) {
LOG_ERROR("init model loader from file failed: '%s'", input_path);
return false;
}
if (vae_path != nullptr && strlen(vae_path) > 0) {
if (!model_loader.init_from_file(vae_path, "vae.")) {
LOG_ERROR("init model loader from file failed: '%s'", vae_path);
return false;
}
}
if (convert_name) {
model_loader.convert_tensors_name();
}
ggml_type type = (ggml_type)output_type;
bool output_is_safetensors = ends_with(output_path, ".safetensors");
TensorTypeRules type_rules = parse_tensor_type_rules(tensor_type_rules);
auto backend = ggml_backend_cpu_init();
size_t mem_size = 1 * 1024 * 1024; // for padding
mem_size += model_loader.get_tensor_storage_map().size() * ggml_tensor_overhead();
mem_size += model_loader.get_params_mem_size(backend, type);
LOG_INFO("model tensors mem size: %.2fMB", mem_size / 1024.f / 1024.f);
ggml_context* ggml_ctx = ggml_init({mem_size, nullptr, false});
if (ggml_ctx == nullptr) {
LOG_ERROR("ggml_init failed for converter");
ggml_backend_free(backend);
return false;
}
std::vector<TensorWriteInfo> tensors;
bool success = load_tensors_for_export(model_loader, ggml_ctx, type, type_rules, tensors);
ggml_backend_free(backend);
std::string error;
if (success) {
if (output_is_safetensors) {
success = write_safetensors_file(output_path, tensors, &error);
} else {
success = write_gguf_file(output_path, tensors, &error);
}
}
if (!success && !error.empty()) {
LOG_ERROR("%s", error.c_str());
}
ggml_free(ggml_ctx);
return success;
}

View File

@ -808,6 +808,18 @@ static std::tuple<float, float, float> get_ancestral_step_flow(float sigma_from,
return {sigma_down, sigma_up, alpha_scale}; return {sigma_down, sigma_up, alpha_scale};
} }
static std::tuple<float, float, float> get_ancestral_step(float sigma_from,
float sigma_to,
float eta,
bool is_flow_denoiser) {
if (is_flow_denoiser) {
return get_ancestral_step_flow(sigma_from, sigma_to, eta);
} else {
auto [sigma_down, sigma_up] = get_ancestral_step(sigma_from, sigma_to, eta);
return {sigma_down, sigma_up, 1.0f};
}
}
static sd::Tensor<float> sample_euler_ancestral(denoise_cb_t model, static sd::Tensor<float> sample_euler_ancestral(denoise_cb_t model,
sd::Tensor<float> x, sd::Tensor<float> x,
const std::vector<float>& sigmas, const std::vector<float>& sigmas,
@ -970,6 +982,100 @@ static sd::Tensor<float> sample_dpmpp_2s_ancestral(denoise_cb_t model,
return x; return x;
} }
static sd::Tensor<float> sample_dpmpp_2s_ancestral_flow(denoise_cb_t model,
sd::Tensor<float> x,
const std::vector<float>& sigmas,
std::shared_ptr<RNG> rng,
float eta = 1.0f) {
int steps = static_cast<int>(sigmas.size()) - 1;
for (int i = 0; i < steps; i++) {
float sigma = sigmas[i];
float sigma_to = sigmas[i + 1];
bool opt_first_step = (1.0 - sigma < 1e-6);
auto denoised_opt = model(x, sigma, (opt_first_step ? 1 : -1) * (i + 1));
if (denoised_opt.empty()) {
return {};
}
sd::Tensor<float> denoised = std::move(denoised_opt);
if (sigma_to == 0.0f) {
// Euler method (final step, no noise)
// sigma_to == 0 --> sigma_down = 0, so:
// x + d * (sigma_down - sigma)
// = x + ((x - denoised) / sigma) * (sigma_down - sigma)
// = x + ((x - denoised) / sigma) * ( 0 - sigma)
// = x + ((x - denoised) ) * -1
// = x -x + denoised
x = denoised;
} else {
auto [sigma_down, sigma_up, alpha_scale] = get_ancestral_step_flow(sigma, sigma_to, eta);
sd::Tensor<float> D_i;
if (opt_first_step) {
// the reformulated exp_s calc already accounts for this, but we can avoid
// a redundant model call for the typical sigma 1 at the first step:
// exp_s = sqrt((1-sigma)/sigma * (1-sigma_down)/sigma_down)
// = sqrt((1- 1)/ 1 * (1-sigma_down)/sigma_down)
// = 0
// so sigma_s = 1 = sigma, and sigma_s_i_ratio = sigma_s / sigma = 1
// u = (x*sigma_s_i_ratio)+(denoised*(1.0f-sigma_s_i_ratio))
// = (x*1)+(denoised*0) = x
// so D_i = model(u, sigma_s, i + 1)
// = model(x, sigma, i + 1)
// = denoised
D_i = denoised;
} else {
float sigma_s;
// ref implementation would be:
// auto lambda_fn = [](float sigma) -> float {
// return std::log((1.0f - sigma) / sigma); };
// auto sigma_fn = [](float lbda) -> float {
// return 1.0f / (std::exp(lbda) + 1.0f); };
// t_i = lambda_fn(sigma);
// t_down = lambda_fn(sigma_down);
// float r = 0.5f;
// h = t_down - t_i;
// s = t_i + r * h;
// sigma_s = sigma_fn(s);
// assuming r is constant, we sidestep the singularity at sigma -> 1 by:
// s = 0.5 * (lambda_fn(sigma) + lambda_fn(sigma_down))
// = 0.5 * (log((1-sigma)/sigma) + log((1-sigma_down)/sigma_down))
// = 0.5 * log(((1-sigma)/sigma) * ((1-sigma_down)/sigma_down))
// = log(sqrt (((1-sigma)/sigma) * ((1-sigma_down)/sigma_down)))
// so exp(s) = sqrt((1-sigma)/sigma * (1-sigma_down)/sigma_down)
// and sigma_s = sigma_fn(s) = 1.0f / (exp(s) + 1.0f)
float exp_s = std::sqrt(((1 - sigma) / sigma) * ((1 - sigma_down) / sigma_down));
sigma_s = 1.0f / (exp_s + 1.0f);
float sigma_s_i_ratio = sigma_s / sigma;
sd::Tensor<float> u = (x * sigma_s_i_ratio) + (denoised * (1.0f - sigma_s_i_ratio));
auto denoised2_opt = model(u, sigma_s, i + 1);
if (denoised2_opt.empty()) {
return {};
}
D_i = std::move(denoised2_opt);
}
float sigma_down_i_ratio = sigma_down / sigma;
x = (x * sigma_down_i_ratio) + (D_i * (1.0f - sigma_down_i_ratio));
if (sigma_to > 0.0f && eta > 0.0f) {
x = alpha_scale * x + sd::Tensor<float>::randn_like(x, rng) * sigma_up;
}
}
}
return x;
}
static sd::Tensor<float> sample_dpmpp_2m(denoise_cb_t model, static sd::Tensor<float> sample_dpmpp_2m(denoise_cb_t model,
sd::Tensor<float> x, sd::Tensor<float> x,
const std::vector<float>& sigmas) { const std::vector<float>& sigmas) {
@ -1041,7 +1147,8 @@ static sd::Tensor<float> sample_dpmpp_2m_v2(denoise_cb_t model,
static sd::Tensor<float> sample_lcm(denoise_cb_t model, static sd::Tensor<float> sample_lcm(denoise_cb_t model,
sd::Tensor<float> x, sd::Tensor<float> x,
const std::vector<float>& sigmas, const std::vector<float>& sigmas,
std::shared_ptr<RNG> rng) { std::shared_ptr<RNG> rng,
bool is_flow_denoiser) {
int steps = static_cast<int>(sigmas.size()) - 1; int steps = static_cast<int>(sigmas.size()) - 1;
for (int i = 0; i < steps; i++) { for (int i = 0; i < steps; i++) {
auto denoised_opt = model(x, sigmas[i], i + 1); auto denoised_opt = model(x, sigmas[i], i + 1);
@ -1050,6 +1157,9 @@ static sd::Tensor<float> sample_lcm(denoise_cb_t model,
} }
x = std::move(denoised_opt); x = std::move(denoised_opt);
if (sigmas[i + 1] > 0) { if (sigmas[i + 1] > 0) {
if (is_flow_denoiser) {
x *= (1 - sigmas[i + 1]);
}
x += sd::Tensor<float>::randn_like(x, rng) * sigmas[i + 1]; x += sd::Tensor<float>::randn_like(x, rng) * sigmas[i + 1];
} }
} }
@ -1149,6 +1259,7 @@ static sd::Tensor<float> sample_res_multistep(denoise_cb_t model,
sd::Tensor<float> x, sd::Tensor<float> x,
const std::vector<float>& sigmas, const std::vector<float>& sigmas,
std::shared_ptr<RNG> rng, std::shared_ptr<RNG> rng,
bool is_flow_denoiser,
float eta) { float eta) {
sd::Tensor<float> old_denoised = x; sd::Tensor<float> old_denoised = x;
bool have_old_sigma = false; bool have_old_sigma = false;
@ -1180,7 +1291,8 @@ static sd::Tensor<float> sample_res_multistep(denoise_cb_t model,
float sigma_from = sigmas[i]; float sigma_from = sigmas[i];
float sigma_to = sigmas[i + 1]; float sigma_to = sigmas[i + 1];
auto [sigma_down, sigma_up] = get_ancestral_step(sigma_from, sigma_to, eta);
auto [sigma_down, sigma_up, alpha_scale] = get_ancestral_step(sigma_from, sigma_to, eta, is_flow_denoiser);
if (sigma_down == 0.0f || !have_old_sigma) { if (sigma_down == 0.0f || !have_old_sigma) {
x += ((x - denoised) / sigma_from) * (sigma_down - sigma_from); x += ((x - denoised) / sigma_from) * (sigma_down - sigma_from);
@ -1207,7 +1319,10 @@ static sd::Tensor<float> sample_res_multistep(denoise_cb_t model,
x = sigma_fn(h) * x + h * (b1 * denoised + b2 * old_denoised); x = sigma_fn(h) * x + h * (b1 * denoised + b2 * old_denoised);
} }
if (sigmas[i + 1] > 0 && sigma_up > 0.0f) { if (sigma_to > 0.0f && sigma_up > 0.0f) {
if (is_flow_denoiser) {
x *= alpha_scale;
}
x += sd::Tensor<float>::randn_like(x, rng) * sigma_up; x += sd::Tensor<float>::randn_like(x, rng) * sigma_up;
} }
@ -1222,6 +1337,7 @@ static sd::Tensor<float> sample_res_2s(denoise_cb_t model,
sd::Tensor<float> x, sd::Tensor<float> x,
const std::vector<float>& sigmas, const std::vector<float>& sigmas,
std::shared_ptr<RNG> rng, std::shared_ptr<RNG> rng,
bool is_flow_denoiser,
float eta) { float eta) {
const float c2 = 0.5f; const float c2 = 0.5f;
auto t_fn = [](float sigma) -> float { return -logf(sigma); }; auto t_fn = [](float sigma) -> float { return -logf(sigma); };
@ -1250,7 +1366,7 @@ static sd::Tensor<float> sample_res_2s(denoise_cb_t model,
} }
sd::Tensor<float> denoised = std::move(denoised_opt); sd::Tensor<float> denoised = std::move(denoised_opt);
auto [sigma_down, sigma_up] = get_ancestral_step(sigma_from, sigma_to, eta); auto [sigma_down, sigma_up, alpha_scale] = get_ancestral_step(sigma_from, sigma_to, eta, is_flow_denoiser);
sd::Tensor<float> x0 = x; sd::Tensor<float> x0 = x;
if (sigma_down == 0.0f || sigma_from == 0.0f) { if (sigma_down == 0.0f || sigma_from == 0.0f) {
@ -1279,7 +1395,10 @@ static sd::Tensor<float> sample_res_2s(denoise_cb_t model,
x = x0 + h * (b1 * eps1 + b2 * eps2); x = x0 + h * (b1 * eps1 + b2 * eps2);
} }
if (sigmas[i + 1] > 0 && sigma_up > 0.0f) { if (sigma_to > 0.0f && sigma_up > 0.0f) {
if (is_flow_denoiser) {
x *= alpha_scale;
}
x += sd::Tensor<float>::randn_like(x, rng) * sigma_up; x += sd::Tensor<float>::randn_like(x, rng) * sigma_up;
} }
} }
@ -1425,32 +1544,10 @@ static sd::Tensor<float> sample_ddim_trailing(denoise_cb_t model,
const std::vector<float>& sigmas, const std::vector<float>& sigmas,
std::shared_ptr<RNG> rng, std::shared_ptr<RNG> rng,
float eta) { float eta) {
float beta_start = 0.00085f;
float beta_end = 0.0120f;
std::vector<double> alphas_cumprod(TIMESTEPS);
std::vector<double> compvis_sigmas(TIMESTEPS);
for (int i = 0; i < TIMESTEPS; i++) {
alphas_cumprod[i] =
(i == 0 ? 1.0f : alphas_cumprod[i - 1]) *
(1.0f -
std::pow(sqrtf(beta_start) +
(sqrtf(beta_end) - sqrtf(beta_start)) *
((float)i / (TIMESTEPS - 1)),
2));
compvis_sigmas[i] =
std::sqrt((1 - alphas_cumprod[i]) / alphas_cumprod[i]);
}
int steps = static_cast<int>(sigmas.size()) - 1; int steps = static_cast<int>(sigmas.size()) - 1;
for (int i = 0; i < steps; i++) { for (int i = 0; i < steps; i++) {
int timestep = static_cast<int>(roundf(TIMESTEPS - i * ((float)TIMESTEPS / steps))) - 1; float sigma = sigmas[i];
int prev_timestep = timestep - TIMESTEPS / steps; float sigma_to = sigmas[i + 1];
float sigma = static_cast<float>(compvis_sigmas[timestep]);
if (i == 0) {
x *= std::sqrt(sigma * sigma + 1) / sigma;
} else {
x *= std::sqrt(sigma * sigma + 1);
}
auto model_output_opt = model(x, sigma, i + 1); auto model_output_opt = model(x, sigma, i + 1);
if (model_output_opt.empty()) { if (model_output_opt.empty()) {
@ -1459,8 +1556,8 @@ static sd::Tensor<float> sample_ddim_trailing(denoise_cb_t model,
sd::Tensor<float> model_output = std::move(model_output_opt); sd::Tensor<float> model_output = std::move(model_output_opt);
model_output = (x - model_output) * (1.0f / sigma); model_output = (x - model_output) * (1.0f / sigma);
float alpha_prod_t = static_cast<float>(alphas_cumprod[timestep]); float alpha_prod_t = 1.0f / (sigma * sigma + 1.0f);
float alpha_prod_t_prev = static_cast<float>(prev_timestep >= 0 ? alphas_cumprod[prev_timestep] : alphas_cumprod[0]); float alpha_prod_t_prev = 1.0f / (sigma_to * sigma_to + 1.0f);
float beta_prod_t = 1.0f - alpha_prod_t; float beta_prod_t = 1.0f - alpha_prod_t;
sd::Tensor<float> pred_original_sample = ((x / std::sqrt(sigma * sigma + 1)) - sd::Tensor<float> pred_original_sample = ((x / std::sqrt(sigma * sigma + 1)) -
@ -1472,11 +1569,11 @@ static sd::Tensor<float> sample_ddim_trailing(denoise_cb_t model,
(1.0f - alpha_prod_t / alpha_prod_t_prev); (1.0f - alpha_prod_t / alpha_prod_t_prev);
float std_dev_t = eta * std::sqrt(variance); float std_dev_t = eta * std::sqrt(variance);
x = std::sqrt(alpha_prod_t_prev) * pred_original_sample + x = pred_original_sample +
std::sqrt(1.0f - alpha_prod_t_prev - std::pow(std_dev_t, 2)) * model_output; std::sqrt((1.0f - alpha_prod_t_prev - std::pow(std_dev_t, 2)) / alpha_prod_t_prev) * model_output;
if (eta > 0) { if (eta > 0) {
x += std_dev_t * sd::Tensor<float>::randn_like(x, rng); x += std_dev_t / std::sqrt(alpha_prod_t_prev) * sd::Tensor<float>::randn_like(x, rng);
} }
} }
return x; return x;
@ -1503,19 +1600,26 @@ static sd::Tensor<float> sample_tcd(denoise_cb_t model,
std::sqrt((1 - alphas_cumprod[i]) / alphas_cumprod[i]); std::sqrt((1 - alphas_cumprod[i]) / alphas_cumprod[i]);
} }
int original_steps = 50; auto get_timestep_from_sigma = [&](float s) -> int {
auto it = std::lower_bound(compvis_sigmas.begin(), compvis_sigmas.end(), s);
if (it == compvis_sigmas.begin())
return 0;
if (it == compvis_sigmas.end())
return TIMESTEPS - 1;
int idx_high = static_cast<int>(std::distance(compvis_sigmas.begin(), it));
int idx_low = idx_high - 1;
if (std::abs(compvis_sigmas[idx_high] - s) < std::abs(compvis_sigmas[idx_low] - s)) {
return idx_high;
}
return idx_low;
};
int steps = static_cast<int>(sigmas.size()) - 1; int steps = static_cast<int>(sigmas.size()) - 1;
for (int i = 0; i < steps; i++) { for (int i = 0; i < steps; i++) {
int timestep = TIMESTEPS - 1 - (TIMESTEPS / original_steps) * (int)floor(i * ((float)original_steps / steps)); float sigma_to = sigmas[i + 1];
int prev_timestep = i >= steps - 1 ? 0 : TIMESTEPS - 1 - (TIMESTEPS / original_steps) * (int)floor((i + 1) * ((float)original_steps / steps)); int prev_timestep = get_timestep_from_sigma(sigma_to);
int timestep_s = (int)floor((1 - eta) * prev_timestep); int timestep_s = (int)floor((1 - eta) * prev_timestep);
float sigma = static_cast<float>(compvis_sigmas[timestep]); float sigma = sigmas[i];
if (i == 0) {
x *= std::sqrt(sigma * sigma + 1) / sigma;
} else {
x *= std::sqrt(sigma * sigma + 1);
}
auto model_output_opt = model(x, sigma, i + 1); auto model_output_opt = model(x, sigma, i + 1);
if (model_output_opt.empty()) { if (model_output_opt.empty()) {
@ -1524,9 +1628,9 @@ static sd::Tensor<float> sample_tcd(denoise_cb_t model,
sd::Tensor<float> model_output = std::move(model_output_opt); sd::Tensor<float> model_output = std::move(model_output_opt);
model_output = (x - model_output) * (1.0f / sigma); model_output = (x - model_output) * (1.0f / sigma);
float alpha_prod_t = static_cast<float>(alphas_cumprod[timestep]); float alpha_prod_t = 1.0f / (sigma * sigma + 1.0f);
float beta_prod_t = 1.0f - alpha_prod_t; float beta_prod_t = 1.0f - alpha_prod_t;
float alpha_prod_t_prev = static_cast<float>(prev_timestep >= 0 ? alphas_cumprod[prev_timestep] : alphas_cumprod[0]); float alpha_prod_t_prev = 1.0f / (sigma_to * sigma_to + 1.0f);
float alpha_prod_s = static_cast<float>(alphas_cumprod[timestep_s]); float alpha_prod_s = static_cast<float>(alphas_cumprod[timestep_s]);
float beta_prod_s = 1.0f - alpha_prod_s; float beta_prod_s = 1.0f - alpha_prod_s;
@ -1534,12 +1638,12 @@ static sd::Tensor<float> sample_tcd(denoise_cb_t model,
std::sqrt(beta_prod_t) * model_output) * std::sqrt(beta_prod_t) * model_output) *
(1.0f / std::sqrt(alpha_prod_t)); (1.0f / std::sqrt(alpha_prod_t));
x = std::sqrt(alpha_prod_s) * pred_original_sample + x = std::sqrt(alpha_prod_s / alpha_prod_t_prev) * pred_original_sample +
std::sqrt(beta_prod_s) * model_output; std::sqrt(beta_prod_s / alpha_prod_t_prev) * model_output;
if (eta > 0 && i != steps - 1) { if (eta > 0 && sigma_to > 0.0f) {
x = std::sqrt(alpha_prod_t_prev / alpha_prod_s) * x + x = std::sqrt(alpha_prod_t_prev / alpha_prod_s) * x +
std::sqrt(1.0f - alpha_prod_t_prev / alpha_prod_s) * sd::Tensor<float>::randn_like(x, rng); std::sqrt(1.0f / alpha_prod_t_prev - 1.0f / alpha_prod_s) * sd::Tensor<float>::randn_like(x, rng);
} }
} }
return x; return x;
@ -1566,21 +1670,24 @@ static sd::Tensor<float> sample_k_diffusion(sample_method_t method,
case DPM2_SAMPLE_METHOD: case DPM2_SAMPLE_METHOD:
return sample_dpm2(model, std::move(x), sigmas); return sample_dpm2(model, std::move(x), sigmas);
case DPMPP2S_A_SAMPLE_METHOD: case DPMPP2S_A_SAMPLE_METHOD:
if (is_flow_denoiser)
return sample_dpmpp_2s_ancestral_flow(model, std::move(x), sigmas, rng, eta);
else
return sample_dpmpp_2s_ancestral(model, std::move(x), sigmas, rng, eta); return sample_dpmpp_2s_ancestral(model, std::move(x), sigmas, rng, eta);
case DPMPP2M_SAMPLE_METHOD: case DPMPP2M_SAMPLE_METHOD:
return sample_dpmpp_2m(model, std::move(x), sigmas); return sample_dpmpp_2m(model, std::move(x), sigmas);
case DPMPP2Mv2_SAMPLE_METHOD: case DPMPP2Mv2_SAMPLE_METHOD:
return sample_dpmpp_2m_v2(model, std::move(x), sigmas); return sample_dpmpp_2m_v2(model, std::move(x), sigmas);
case LCM_SAMPLE_METHOD: case LCM_SAMPLE_METHOD:
return sample_lcm(model, std::move(x), sigmas, rng); return sample_lcm(model, std::move(x), sigmas, rng, is_flow_denoiser);
case IPNDM_SAMPLE_METHOD: case IPNDM_SAMPLE_METHOD:
return sample_ipndm(model, std::move(x), sigmas); return sample_ipndm(model, std::move(x), sigmas);
case IPNDM_V_SAMPLE_METHOD: case IPNDM_V_SAMPLE_METHOD:
return sample_ipndm_v(model, std::move(x), sigmas); return sample_ipndm_v(model, std::move(x), sigmas);
case RES_MULTISTEP_SAMPLE_METHOD: case RES_MULTISTEP_SAMPLE_METHOD:
return sample_res_multistep(model, std::move(x), sigmas, rng, eta); return sample_res_multistep(model, std::move(x), sigmas, rng, is_flow_denoiser, eta);
case RES_2S_SAMPLE_METHOD: case RES_2S_SAMPLE_METHOD:
return sample_res_2s(model, std::move(x), sigmas, rng, eta); return sample_res_2s(model, std::move(x), sigmas, rng, is_flow_denoiser, eta);
case ER_SDE_SAMPLE_METHOD: case ER_SDE_SAMPLE_METHOD:
return sample_er_sde(model, std::move(x), sigmas, rng, is_flow_denoiser, eta); return sample_er_sde(model, std::move(x), sigmas, rng, is_flow_denoiser, eta);
case DDIM_TRAILING_SAMPLE_METHOD: case DDIM_TRAILING_SAMPLE_METHOD:

View File

@ -49,6 +49,7 @@ struct DiffusionModel {
virtual void set_weight_adapter(const std::shared_ptr<WeightAdapter>& adapter){}; virtual void set_weight_adapter(const std::shared_ptr<WeightAdapter>& adapter){};
virtual int64_t get_adm_in_channels() = 0; virtual int64_t get_adm_in_channels() = 0;
virtual void set_flash_attention_enabled(bool enabled) = 0; virtual void set_flash_attention_enabled(bool enabled) = 0;
virtual void set_max_graph_vram_bytes(size_t max_vram_bytes) = 0;
virtual void set_circular_axes(bool circular_x, bool circular_y) = 0; virtual void set_circular_axes(bool circular_x, bool circular_y) = 0;
}; };
@ -98,6 +99,10 @@ struct UNetModel : public DiffusionModel {
unet.set_flash_attention_enabled(enabled); unet.set_flash_attention_enabled(enabled);
} }
void set_max_graph_vram_bytes(size_t max_vram_bytes) override {
unet.set_max_graph_vram_bytes(max_vram_bytes);
}
void set_circular_axes(bool circular_x, bool circular_y) override { void set_circular_axes(bool circular_x, bool circular_y) override {
unet.set_circular_axes(circular_x, circular_y); unet.set_circular_axes(circular_x, circular_y);
} }
@ -164,6 +169,10 @@ struct MMDiTModel : public DiffusionModel {
mmdit.set_flash_attention_enabled(enabled); mmdit.set_flash_attention_enabled(enabled);
} }
void set_max_graph_vram_bytes(size_t max_vram_bytes) override {
mmdit.set_max_graph_vram_bytes(max_vram_bytes);
}
void set_circular_axes(bool circular_x, bool circular_y) override { void set_circular_axes(bool circular_x, bool circular_y) override {
mmdit.set_circular_axes(circular_x, circular_y); mmdit.set_circular_axes(circular_x, circular_y);
} }
@ -229,6 +238,10 @@ struct FluxModel : public DiffusionModel {
flux.set_flash_attention_enabled(enabled); flux.set_flash_attention_enabled(enabled);
} }
void set_max_graph_vram_bytes(size_t max_vram_bytes) override {
flux.set_max_graph_vram_bytes(max_vram_bytes);
}
void set_circular_axes(bool circular_x, bool circular_y) override { void set_circular_axes(bool circular_x, bool circular_y) override {
flux.set_circular_axes(circular_x, circular_y); flux.set_circular_axes(circular_x, circular_y);
} }
@ -299,6 +312,10 @@ struct AnimaModel : public DiffusionModel {
anima.set_flash_attention_enabled(enabled); anima.set_flash_attention_enabled(enabled);
} }
void set_max_graph_vram_bytes(size_t max_vram_bytes) override {
anima.set_max_graph_vram_bytes(max_vram_bytes);
}
void set_circular_axes(bool circular_x, bool circular_y) override { void set_circular_axes(bool circular_x, bool circular_y) override {
anima.set_circular_axes(circular_x, circular_y); anima.set_circular_axes(circular_x, circular_y);
} }
@ -364,6 +381,10 @@ struct WanModel : public DiffusionModel {
wan.set_flash_attention_enabled(enabled); wan.set_flash_attention_enabled(enabled);
} }
void set_max_graph_vram_bytes(size_t max_vram_bytes) override {
wan.set_max_graph_vram_bytes(max_vram_bytes);
}
void set_circular_axes(bool circular_x, bool circular_y) override { void set_circular_axes(bool circular_x, bool circular_y) override {
wan.set_circular_axes(circular_x, circular_y); wan.set_circular_axes(circular_x, circular_y);
} }
@ -433,6 +454,10 @@ struct QwenImageModel : public DiffusionModel {
qwen_image.set_flash_attention_enabled(enabled); qwen_image.set_flash_attention_enabled(enabled);
} }
void set_max_graph_vram_bytes(size_t max_vram_bytes) override {
qwen_image.set_max_graph_vram_bytes(max_vram_bytes);
}
void set_circular_axes(bool circular_x, bool circular_y) override { void set_circular_axes(bool circular_x, bool circular_y) override {
qwen_image.set_circular_axes(circular_x, circular_y); qwen_image.set_circular_axes(circular_x, circular_y);
} }
@ -499,6 +524,10 @@ struct ZImageModel : public DiffusionModel {
z_image.set_flash_attention_enabled(enabled); z_image.set_flash_attention_enabled(enabled);
} }
void set_max_graph_vram_bytes(size_t max_vram_bytes) override {
z_image.set_max_graph_vram_bytes(max_vram_bytes);
}
void set_circular_axes(bool circular_x, bool circular_y) override { void set_circular_axes(bool circular_x, bool circular_y) override {
z_image.set_circular_axes(circular_x, circular_y); z_image.set_circular_axes(circular_x, circular_y);
} }
@ -564,6 +593,10 @@ struct ErnieImageModel : public DiffusionModel {
ernie_image.set_flash_attention_enabled(enabled); ernie_image.set_flash_attention_enabled(enabled);
} }
void set_max_graph_vram_bytes(size_t max_vram_bytes) override {
ernie_image.set_max_graph_vram_bytes(max_vram_bytes);
}
void set_circular_axes(bool circular_x, bool circular_y) override { void set_circular_axes(bool circular_x, bool circular_y) override {
ernie_image.set_circular_axes(circular_x, circular_y); ernie_image.set_circular_axes(circular_x, circular_y);
} }

View File

@ -295,6 +295,8 @@ namespace ErnieImage {
auto c = time_embedding->forward(ctx, sample); // [N, hidden_size] auto c = time_embedding->forward(ctx, sample); // [N, hidden_size]
auto mod_params = adaLN_mod->forward(ctx, ggml_silu(ctx->ggml_ctx, c)); // [N, 6 * hidden_size] auto mod_params = adaLN_mod->forward(ctx, ggml_silu(ctx->ggml_ctx, c)); // [N, 6 * hidden_size]
sd::ggml_graph_cut::mark_graph_cut(hidden_states, "ernie_image.prelude", "hidden_states");
// sd::ggml_graph_cut::mark_graph_cut(mod_params, "ernie_image.prelude", "mod_params");
auto chunks = ggml_ext_chunk(ctx->ggml_ctx, mod_params, 6, 0); auto chunks = ggml_ext_chunk(ctx->ggml_ctx, mod_params, 6, 0);
std::vector<ggml_tensor*> temb; std::vector<ggml_tensor*> temb;
temb.reserve(6); temb.reserve(6);
@ -305,6 +307,7 @@ namespace ErnieImage {
for (int i = 0; i < params.num_layers; i++) { for (int i = 0; i < params.num_layers; i++) {
auto layer = std::dynamic_pointer_cast<ErnieImageSharedAdaLNBlock>(blocks["layers." + std::to_string(i)]); auto layer = std::dynamic_pointer_cast<ErnieImageSharedAdaLNBlock>(blocks["layers." + std::to_string(i)]);
hidden_states = layer->forward(ctx, hidden_states, pe, temb); hidden_states = layer->forward(ctx, hidden_states, pe, temb);
sd::ggml_graph_cut::mark_graph_cut(hidden_states, "ernie_image.layers." + std::to_string(i), "hidden_states");
} }
hidden_states = final_norm->forward(ctx, hidden_states, c); hidden_states = final_norm->forward(ctx, hidden_states, c);

View File

@ -125,26 +125,32 @@ public:
auto conv_last = std::dynamic_pointer_cast<Conv2d>(blocks["conv_last"]); auto conv_last = std::dynamic_pointer_cast<Conv2d>(blocks["conv_last"]);
auto feat = conv_first->forward(ctx, x); auto feat = conv_first->forward(ctx, x);
sd::ggml_graph_cut::mark_graph_cut(feat, "esrgan.prelude", "feat");
auto body_feat = feat; auto body_feat = feat;
for (int i = 0; i < num_block; i++) { for (int i = 0; i < num_block; i++) {
std::string name = "body." + std::to_string(i); std::string name = "body." + std::to_string(i);
auto block = std::dynamic_pointer_cast<RRDB>(blocks[name]); auto block = std::dynamic_pointer_cast<RRDB>(blocks[name]);
body_feat = block->forward(ctx, body_feat); body_feat = block->forward(ctx, body_feat);
sd::ggml_graph_cut::mark_graph_cut(body_feat, "esrgan.body." + std::to_string(i), "feat");
} }
body_feat = conv_body->forward(ctx, body_feat); body_feat = conv_body->forward(ctx, body_feat);
feat = ggml_add(ctx->ggml_ctx, feat, body_feat); feat = ggml_add(ctx->ggml_ctx, feat, body_feat);
sd::ggml_graph_cut::mark_graph_cut(feat, "esrgan.body.out", "feat");
// upsample // upsample
if (scale >= 2) { if (scale >= 2) {
auto conv_up1 = std::dynamic_pointer_cast<Conv2d>(blocks["conv_up1"]); auto conv_up1 = std::dynamic_pointer_cast<Conv2d>(blocks["conv_up1"]);
feat = lrelu(ctx, conv_up1->forward(ctx, ggml_upscale(ctx->ggml_ctx, feat, 2, GGML_SCALE_MODE_NEAREST))); feat = lrelu(ctx, conv_up1->forward(ctx, ggml_upscale(ctx->ggml_ctx, feat, 2, GGML_SCALE_MODE_NEAREST)));
sd::ggml_graph_cut::mark_graph_cut(feat, "esrgan.up1", "feat");
if (scale == 4) { if (scale == 4) {
auto conv_up2 = std::dynamic_pointer_cast<Conv2d>(blocks["conv_up2"]); auto conv_up2 = std::dynamic_pointer_cast<Conv2d>(blocks["conv_up2"]);
feat = lrelu(ctx, conv_up2->forward(ctx, ggml_upscale(ctx->ggml_ctx, feat, 2, GGML_SCALE_MODE_NEAREST))); feat = lrelu(ctx, conv_up2->forward(ctx, ggml_upscale(ctx->ggml_ctx, feat, 2, GGML_SCALE_MODE_NEAREST)));
sd::ggml_graph_cut::mark_graph_cut(feat, "esrgan.up2", "feat");
} }
} }
// for all scales // for all scales
auto out = conv_last->forward(ctx, lrelu(ctx, conv_hr->forward(ctx, feat))); auto out = conv_last->forward(ctx, lrelu(ctx, conv_hr->forward(ctx, feat)));
sd::ggml_graph_cut::mark_graph_cut(out, "esrgan.final", "out");
return out; return out;
} }
}; };

View File

@ -928,6 +928,9 @@ namespace Flux {
} }
txt = txt_in->forward(ctx, txt); txt = txt_in->forward(ctx, txt);
sd::ggml_graph_cut::mark_graph_cut(img, "flux.prelude", "img");
sd::ggml_graph_cut::mark_graph_cut(txt, "flux.prelude", "txt");
sd::ggml_graph_cut::mark_graph_cut(vec, "flux.prelude", "vec");
for (int i = 0; i < params.depth; i++) { for (int i = 0; i < params.depth; i++) {
if (skip_layers.size() > 0 && std::find(skip_layers.begin(), skip_layers.end(), i) != skip_layers.end()) { if (skip_layers.size() > 0 && std::find(skip_layers.begin(), skip_layers.end(), i) != skip_layers.end()) {
@ -939,6 +942,8 @@ namespace Flux {
auto img_txt = block->forward(ctx, img, txt, vec, pe, txt_img_mask, ds_img_mods, ds_txt_mods); auto img_txt = block->forward(ctx, img, txt, vec, pe, txt_img_mask, ds_img_mods, ds_txt_mods);
img = img_txt.first; // [N, n_img_token, hidden_size] img = img_txt.first; // [N, n_img_token, hidden_size]
txt = img_txt.second; // [N, n_txt_token, hidden_size] txt = img_txt.second; // [N, n_txt_token, hidden_size]
sd::ggml_graph_cut::mark_graph_cut(img, "flux.double_blocks." + std::to_string(i), "img");
sd::ggml_graph_cut::mark_graph_cut(txt, "flux.double_blocks." + std::to_string(i), "txt");
} }
auto txt_img = ggml_concat(ctx->ggml_ctx, txt, img, 1); // [N, n_txt_token + n_img_token, hidden_size] auto txt_img = ggml_concat(ctx->ggml_ctx, txt, img, 1); // [N, n_txt_token + n_img_token, hidden_size]
@ -949,6 +954,7 @@ namespace Flux {
auto block = std::dynamic_pointer_cast<SingleStreamBlock>(blocks["single_blocks." + std::to_string(i)]); auto block = std::dynamic_pointer_cast<SingleStreamBlock>(blocks["single_blocks." + std::to_string(i)]);
txt_img = block->forward(ctx, txt_img, vec, pe, txt_img_mask, ss_mods); txt_img = block->forward(ctx, txt_img, vec, pe, txt_img_mask, ss_mods);
sd::ggml_graph_cut::mark_graph_cut(txt_img, "flux.single_blocks." + std::to_string(i), "txt_img");
} }
img = ggml_view_3d(ctx->ggml_ctx, img = ggml_view_3d(ctx->ggml_ctx,

File diff suppressed because it is too large Load Diff

298
src/ggml_extend_backend.hpp Normal file
View File

@ -0,0 +1,298 @@
#ifndef __GGML_EXTEND_BACKEND_HPP__
#define __GGML_EXTEND_BACKEND_HPP__
#include <cstring>
#include <mutex>
#include "ggml-backend.h"
#include "ggml.h"
#ifndef __STATIC_INLINE__
#define __STATIC_INLINE__ static inline
#endif
inline void ggml_backend_load_all_once() {
// If the registry already has devices and the CPU backend is present,
// assume either static registration or explicit host-side preloading has
// completed and avoid rescanning the default paths.
if (ggml_backend_dev_count() > 0 && ggml_backend_reg_by_name("CPU") != nullptr) {
return;
}
// In dynamic-backend mode the backend modules are discovered at runtime,
// so we must load them before asking for the CPU backend or its proc table.
// If the host preloaded only a subset of backends, allow one default-path
// scan so missing modules can still be discovered.
static std::once_flag once;
std::call_once(once, []() {
if (ggml_backend_dev_count() > 0 && ggml_backend_reg_by_name("CPU") != nullptr) {
return;
}
ggml_backend_load_all();
});
}
// Do not gate this branch on GGML_CPU or GGML_CPU_ALL_VARIANTS:
// those are CMake options used to configure ggml itself, but they are not
// exported as PUBLIC compile definitions to stable-diffusion in backend-DL mode.
// In practice, this target can reliably see GGML_BACKEND_DL, but not whether
// the CPU backend was compiled as a loadable module. We therefore use runtime
// backend discovery instead of compile-time assumptions.
__STATIC_INLINE__ ggml_backend_reg_t ggml_backend_cpu_reg() {
ggml_backend_reg_t reg = ggml_backend_reg_by_name("CPU");
if (reg != nullptr) {
return reg;
}
ggml_backend_load_all_once();
return ggml_backend_reg_by_name("CPU");
}
__STATIC_INLINE__ ggml_backend_reg_t ggml_backend_reg_from_backend(ggml_backend_t backend) {
if (backend != nullptr) {
ggml_backend_dev_t device = ggml_backend_get_device(backend);
if (device != nullptr) {
return ggml_backend_dev_backend_reg(device);
}
}
return ggml_backend_cpu_reg();
}
__STATIC_INLINE__ ggml_backend_t ggml_backend_cpu_init() {
ggml_backend_t backend = ggml_backend_init_by_type(GGML_BACKEND_DEVICE_TYPE_CPU, nullptr);
if (backend != nullptr) {
return backend;
}
ggml_backend_load_all_once();
return ggml_backend_init_by_type(GGML_BACKEND_DEVICE_TYPE_CPU, nullptr);
}
__STATIC_INLINE__ bool ggml_backend_is_cpu(ggml_backend_t backend) {
if (backend == nullptr) {
return false;
}
ggml_backend_dev_t device = ggml_backend_get_device(backend);
if (device != nullptr) {
return ggml_backend_dev_type(device) == GGML_BACKEND_DEVICE_TYPE_CPU;
}
const char* backend_name = ggml_backend_name(backend);
return backend_name != nullptr && std::strcmp(backend_name, "CPU") == 0;
}
__STATIC_INLINE__ void ggml_backend_cpu_set_n_threads(ggml_backend_t backend_cpu, int n_threads) {
ggml_backend_reg_t reg = ggml_backend_reg_from_backend(backend_cpu);
if (reg == nullptr) {
return;
}
auto fn = reinterpret_cast<ggml_backend_set_n_threads_t>(ggml_backend_reg_get_proc_address(reg, "ggml_backend_set_n_threads"));
if (fn != nullptr) {
fn(backend_cpu, n_threads);
}
}
using __ggml_backend_cpu_set_threadpool_t = void (*)(ggml_backend_t backend_cpu, ggml_threadpool_t threadpool);
__STATIC_INLINE__ void ggml_backend_cpu_set_threadpool(ggml_backend_t backend_cpu, ggml_threadpool_t threadpool) {
ggml_backend_reg_t reg = ggml_backend_reg_from_backend(backend_cpu);
if (reg == nullptr) {
return;
}
auto fn = reinterpret_cast<__ggml_backend_cpu_set_threadpool_t>(ggml_backend_reg_get_proc_address(reg, "ggml_backend_cpu_set_threadpool"));
if (fn != nullptr) {
fn(backend_cpu, threadpool);
}
}
__STATIC_INLINE__ void ggml_backend_cpu_set_abort_callback(ggml_backend_t backend_cpu, ggml_abort_callback abort_callback, void* abort_callback_data) {
ggml_backend_reg_t reg = ggml_backend_reg_from_backend(backend_cpu);
if (reg == nullptr) {
return;
}
auto fn = reinterpret_cast<ggml_backend_set_abort_callback_t>(ggml_backend_reg_get_proc_address(reg, "ggml_backend_set_abort_callback"));
if (fn != nullptr) {
fn(backend_cpu, abort_callback, abort_callback_data);
}
}
__STATIC_INLINE__ ggml_backend_buffer_t ggml_backend_tensor_buffer(const struct ggml_tensor* tensor) {
if (tensor == nullptr) {
return nullptr;
}
return tensor->view_src ? tensor->view_src->buffer : tensor->buffer;
}
__STATIC_INLINE__ bool ggml_backend_tensor_is_host_accessible(const struct ggml_tensor* tensor) {
if (tensor == nullptr || tensor->data == nullptr) {
return false;
}
ggml_backend_buffer_t buffer = ggml_backend_tensor_buffer(tensor);
return buffer == nullptr || ggml_backend_buffer_is_host(buffer);
}
__STATIC_INLINE__ size_t ggml_backend_tensor_offset(const struct ggml_tensor* tensor, int64_t i0, int64_t i1, int64_t i2, int64_t i3) {
return (size_t)(i0 * tensor->nb[0] + i1 * tensor->nb[1] + i2 * tensor->nb[2] + i3 * tensor->nb[3]);
}
template <typename T>
__STATIC_INLINE__ void ggml_backend_tensor_write_scalar(const struct ggml_tensor* tensor, int64_t i0, int64_t i1, int64_t i2, int64_t i3, T value) {
const size_t offset = ggml_backend_tensor_offset(tensor, i0, i1, i2, i3);
if (ggml_backend_tensor_is_host_accessible(tensor)) {
auto* dst = reinterpret_cast<T*>(reinterpret_cast<char*>(tensor->data) + offset);
*dst = value;
return;
}
ggml_backend_tensor_set(const_cast<struct ggml_tensor*>(tensor), &value, offset, sizeof(T));
}
__STATIC_INLINE__ void ggml_set_f32_nd(const struct ggml_tensor* tensor, int64_t i0, int64_t i1, int64_t i2, int64_t i3, float value) {
switch (tensor->type) {
case GGML_TYPE_I8:
ggml_backend_tensor_write_scalar(tensor, i0, i1, i2, i3, static_cast<int8_t>(value));
break;
case GGML_TYPE_I16:
ggml_backend_tensor_write_scalar(tensor, i0, i1, i2, i3, static_cast<int16_t>(value));
break;
case GGML_TYPE_I32:
ggml_backend_tensor_write_scalar(tensor, i0, i1, i2, i3, static_cast<int32_t>(value));
break;
case GGML_TYPE_F16:
ggml_backend_tensor_write_scalar(tensor, i0, i1, i2, i3, ggml_fp32_to_fp16(value));
break;
case GGML_TYPE_BF16:
ggml_backend_tensor_write_scalar(tensor, i0, i1, i2, i3, ggml_fp32_to_bf16(value));
break;
case GGML_TYPE_F32:
ggml_backend_tensor_write_scalar(tensor, i0, i1, i2, i3, value);
break;
default:
GGML_ABORT("fatal error");
}
}
__STATIC_INLINE__ void ggml_set_f32_1d(const struct ggml_tensor* tensor, int i, float value) {
if (!ggml_is_contiguous(tensor)) {
int64_t id[4] = {0, 0, 0, 0};
ggml_unravel_index(tensor, i, &id[0], &id[1], &id[2], &id[3]);
ggml_set_f32_nd(tensor, id[0], id[1], id[2], id[3], value);
return;
}
switch (tensor->type) {
case GGML_TYPE_I8:
ggml_backend_tensor_write_scalar(tensor, i, 0, 0, 0, static_cast<int8_t>(value));
break;
case GGML_TYPE_I16:
ggml_backend_tensor_write_scalar(tensor, i, 0, 0, 0, static_cast<int16_t>(value));
break;
case GGML_TYPE_I32:
ggml_backend_tensor_write_scalar(tensor, i, 0, 0, 0, static_cast<int32_t>(value));
break;
case GGML_TYPE_F16:
ggml_backend_tensor_write_scalar(tensor, i, 0, 0, 0, ggml_fp32_to_fp16(value));
break;
case GGML_TYPE_BF16:
ggml_backend_tensor_write_scalar(tensor, i, 0, 0, 0, ggml_fp32_to_bf16(value));
break;
case GGML_TYPE_F32:
ggml_backend_tensor_write_scalar(tensor, i, 0, 0, 0, value);
break;
default:
GGML_ABORT("fatal error");
}
}
__STATIC_INLINE__ enum ggml_status ggml_graph_compute_with_ctx(struct ggml_context* ctx, struct ggml_cgraph* cgraph, int n_threads) {
(void)ctx;
// The legacy ggml_graph_compute_with_ctx() symbol lives in ggml-cpu, but
// the backend proc table does not expose it in GGML_BACKEND_DL mode.
// Recreate the old behavior by initializing the CPU backend explicitly and
// executing the graph through the generic backend API.
ggml_backend_t backend = ggml_backend_cpu_init();
if (backend == nullptr) {
return GGML_STATUS_ALLOC_FAILED;
}
ggml_backend_cpu_set_n_threads(backend, n_threads);
const enum ggml_status status = ggml_backend_graph_compute(backend, cgraph);
ggml_backend_free(backend);
return status;
}
__STATIC_INLINE__ ggml_tensor* ggml_set_f32(struct ggml_tensor* tensor, float value) {
GGML_ASSERT(tensor != nullptr);
if (ggml_backend_tensor_is_host_accessible(tensor) && ggml_is_contiguous(tensor)) {
const int64_t nelements = ggml_nelements(tensor);
switch (tensor->type) {
case GGML_TYPE_I8: {
auto* data = reinterpret_cast<int8_t*>(tensor->data);
const int8_t v = static_cast<int8_t>(value);
for (int64_t i = 0; i < nelements; ++i) {
data[i] = v;
}
} break;
case GGML_TYPE_I16: {
auto* data = reinterpret_cast<int16_t*>(tensor->data);
const int16_t v = static_cast<int16_t>(value);
for (int64_t i = 0; i < nelements; ++i) {
data[i] = v;
}
} break;
case GGML_TYPE_I32: {
auto* data = reinterpret_cast<int32_t*>(tensor->data);
const int32_t v = static_cast<int32_t>(value);
for (int64_t i = 0; i < nelements; ++i) {
data[i] = v;
}
} break;
case GGML_TYPE_F16: {
auto* data = reinterpret_cast<ggml_fp16_t*>(tensor->data);
const ggml_fp16_t v = ggml_fp32_to_fp16(value);
for (int64_t i = 0; i < nelements; ++i) {
data[i] = v;
}
} break;
case GGML_TYPE_BF16: {
auto* data = reinterpret_cast<ggml_bf16_t*>(tensor->data);
const ggml_bf16_t v = ggml_fp32_to_bf16(value);
for (int64_t i = 0; i < nelements; ++i) {
data[i] = v;
}
} break;
case GGML_TYPE_F32: {
auto* data = reinterpret_cast<float*>(tensor->data);
for (int64_t i = 0; i < nelements; ++i) {
data[i] = value;
}
} break;
default:
GGML_ABORT("fatal error");
}
return tensor;
}
const int64_t nelements = ggml_nelements(tensor);
for (int64_t i = 0; i < nelements; ++i) {
ggml_set_f32_1d(tensor, static_cast<int>(i), value);
}
return tensor;
}
#endif

676
src/ggml_graph_cut.cpp Normal file
View File

@ -0,0 +1,676 @@
#include "ggml_graph_cut.h"
#include <algorithm>
#include <cstring>
#include <map>
#include <set>
#include <sstream>
#include <stack>
#include <unordered_map>
#include "ggml-alloc.h"
#include "ggml-backend.h"
#include "util.h"
#include "../ggml/src/ggml-impl.h"
namespace sd::ggml_graph_cut {
static std::string graph_cut_tensor_display_name(const ggml_tensor* tensor) {
if (tensor == nullptr) {
return "<null>";
}
if (tensor->name[0] != '\0') {
return tensor->name;
}
return sd_format("<tensor@%p>", (const void*)tensor);
}
static int graph_leaf_index(ggml_cgraph* gf, const ggml_tensor* tensor) {
GGML_ASSERT(gf != nullptr);
GGML_ASSERT(tensor != nullptr);
for (int i = 0; i < gf->n_leafs; ++i) {
if (gf->leafs[i] == tensor) {
return i;
}
}
return -1;
}
static bool is_params_tensor(const std::unordered_set<const ggml_tensor*>& params_tensor_set,
const ggml_tensor* tensor) {
if (tensor == nullptr) {
return false;
}
return params_tensor_set.find(tensor) != params_tensor_set.end();
}
static Plan::InputShape input_shape(const ggml_tensor* tensor) {
Plan::InputShape shape;
if (tensor == nullptr) {
return shape;
}
shape.type = tensor->type;
for (int i = 0; i < GGML_MAX_DIMS; ++i) {
shape.ne[static_cast<size_t>(i)] = tensor->ne[i];
}
return shape;
}
static size_t graph_cut_segment_vram_bytes(const Segment& segment) {
return segment.compute_buffer_size +
segment.input_param_bytes +
segment.input_previous_cut_bytes +
segment.output_bytes;
}
static Segment make_segment_seed(const Plan& plan,
size_t start_segment_index,
size_t end_segment_index) {
GGML_ASSERT(start_segment_index < plan.segments.size());
GGML_ASSERT(end_segment_index < plan.segments.size());
GGML_ASSERT(start_segment_index <= end_segment_index);
Segment seed;
const auto& start_segment = plan.segments[start_segment_index];
const auto& target_segment = plan.segments[end_segment_index];
std::unordered_set<int> seen_output_node_indices;
for (size_t seg_idx = start_segment_index; seg_idx <= end_segment_index; ++seg_idx) {
for (int output_node_index : plan.segments[seg_idx].output_node_indices) {
if (seen_output_node_indices.insert(output_node_index).second) {
seed.output_node_indices.push_back(output_node_index);
}
}
}
if (start_segment_index == end_segment_index) {
seed.group_name = target_segment.group_name;
} else {
seed.group_name = sd_format("%s..%s",
start_segment.group_name.c_str(),
target_segment.group_name.c_str());
}
return seed;
}
static void build_segment(ggml_cgraph* gf,
Plan& plan,
Segment& segment,
const std::unordered_map<const ggml_tensor*, int>& producer_index,
std::unordered_set<int>& available_cut_output_node_indices,
ggml_backend_t backend,
const std::unordered_set<const ggml_tensor*>& params_tensor_set,
const char* log_desc) {
std::set<int> internal_nodes;
std::unordered_set<const ggml_tensor*> input_seen;
std::vector<Segment::InputRef> input_refs;
std::stack<ggml_tensor*> work_stack;
for (int output_node_index : segment.output_node_indices) {
ggml_tensor* output = ggml_graph_node(gf, output_node_index);
if (output != nullptr) {
work_stack.push(output);
}
}
while (!work_stack.empty()) {
ggml_tensor* tensor = work_stack.top();
work_stack.pop();
if (tensor == nullptr) {
continue;
}
auto producer_it = producer_index.find(tensor);
if (producer_it == producer_index.end()) {
if (input_seen.insert(tensor).second) {
Segment::InputRef input_ref;
input_ref.type = is_params_tensor(params_tensor_set, tensor) ? Segment::INPUT_PARAM : Segment::INPUT_EXTERNAL;
input_ref.display_name = graph_cut_tensor_display_name(tensor);
input_ref.leaf_index = graph_leaf_index(gf, tensor);
input_refs.push_back(std::move(input_ref));
}
continue;
}
int node_idx = producer_it->second;
if (available_cut_output_node_indices.find(node_idx) != available_cut_output_node_indices.end()) {
if (input_seen.insert(tensor).second) {
Segment::InputRef input_ref;
input_ref.type = Segment::INPUT_PREVIOUS_CUT;
input_ref.display_name = graph_cut_tensor_display_name(tensor);
input_ref.node_index = node_idx;
input_refs.push_back(std::move(input_ref));
}
continue;
}
if (!internal_nodes.insert(node_idx).second) {
continue;
}
ggml_tensor* node = ggml_graph_node(gf, node_idx);
for (int src_idx = 0; src_idx < GGML_MAX_SRC; ++src_idx) {
if (node->src[src_idx] != nullptr) {
work_stack.push(node->src[src_idx]);
}
}
}
if (!internal_nodes.empty()) {
segment.internal_node_indices.assign(internal_nodes.begin(), internal_nodes.end());
}
std::sort(input_refs.begin(),
input_refs.end(),
[](const Segment::InputRef& a, const Segment::InputRef& b) {
if (a.type != b.type) {
return a.type < b.type;
}
return a.display_name < b.display_name;
});
segment.input_refs = input_refs;
for (const auto& input : input_refs) {
ggml_tensor* current_input = input_tensor(gf, input);
size_t tensor_bytes = current_input == nullptr
? 0
: (input.type == Segment::INPUT_PREVIOUS_CUT
? cache_tensor_bytes(current_input)
: ggml_nbytes(current_input));
switch (input.type) {
case Segment::INPUT_PREVIOUS_CUT:
segment.input_previous_cut_bytes += tensor_bytes;
break;
case Segment::INPUT_PARAM:
segment.input_param_bytes += tensor_bytes;
break;
case Segment::INPUT_EXTERNAL:
default:
segment.input_external_bytes += tensor_bytes;
break;
}
}
for (int output_node_index : segment.output_node_indices) {
ggml_tensor* output = ggml_graph_node(gf, output_node_index);
segment.output_bytes += cache_tensor_bytes(output);
}
segment.compute_buffer_size = measure_segment_compute_buffer(backend, gf, segment, log_desc);
for (int output_node_index : segment.output_node_indices) {
available_cut_output_node_indices.insert(output_node_index);
}
plan.segments.push_back(std::move(segment));
}
bool is_graph_cut_tensor(const ggml_tensor* tensor) {
if (tensor == nullptr || tensor->name[0] == '\0') {
return false;
}
return std::strncmp(tensor->name, GGML_RUNNER_CUT_PREFIX, std::strlen(GGML_RUNNER_CUT_PREFIX)) == 0;
}
std::string make_graph_cut_name(const std::string& group, const std::string& output) {
return std::string(GGML_RUNNER_CUT_PREFIX) + group + "|" + output;
}
void mark_graph_cut(ggml_tensor* tensor, const std::string& group, const std::string& output) {
if (tensor == nullptr) {
return;
}
auto name = make_graph_cut_name(group, output);
ggml_set_name(tensor, name.c_str());
}
int leaf_count(ggml_cgraph* gf) {
GGML_ASSERT(gf != nullptr);
return gf->n_leafs;
}
ggml_tensor* leaf_tensor(ggml_cgraph* gf, int leaf_index) {
GGML_ASSERT(gf != nullptr);
if (leaf_index < 0 || leaf_index >= gf->n_leafs) {
return nullptr;
}
return gf->leafs[leaf_index];
}
ggml_backend_buffer_t tensor_buffer(const ggml_tensor* tensor) {
if (tensor == nullptr) {
return nullptr;
}
return tensor->view_src ? tensor->view_src->buffer : tensor->buffer;
}
ggml_tensor* cache_source_tensor(ggml_tensor* tensor) {
if (tensor == nullptr) {
return nullptr;
}
return tensor->view_src ? tensor->view_src : tensor;
}
size_t cache_tensor_bytes(const ggml_tensor* tensor) {
if (tensor == nullptr) {
return 0;
}
const ggml_tensor* cache_src = tensor->view_src ? tensor->view_src : tensor;
return ggml_nbytes(cache_src);
}
bool plan_matches_graph(ggml_cgraph* gf, const Plan& plan) {
GGML_ASSERT(gf != nullptr);
if (ggml_graph_n_nodes(gf) != plan.n_nodes || gf->n_leafs != plan.n_leafs) {
return false;
}
for (const auto& input_shape_ref : plan.input_shapes) {
if (input_shape_ref.leaf_index < 0 || input_shape_ref.leaf_index >= gf->n_leafs) {
return false;
}
ggml_tensor* leaf = gf->leafs[input_shape_ref.leaf_index];
if (leaf == nullptr || input_shape_ref.type != leaf->type) {
return false;
}
for (int d = 0; d < GGML_MAX_DIMS; ++d) {
if (input_shape_ref.ne[static_cast<size_t>(d)] != leaf->ne[d]) {
return false;
}
}
}
return true;
}
ggml_tensor* output_tensor(ggml_cgraph* gf, const Segment& segment, size_t output_index) {
GGML_ASSERT(gf != nullptr);
if (output_index >= segment.output_node_indices.size()) {
return nullptr;
}
int node_index = segment.output_node_indices[output_index];
if (node_index < 0 || node_index >= ggml_graph_n_nodes(gf)) {
return nullptr;
}
return ggml_graph_node(gf, node_index);
}
ggml_tensor* input_tensor(ggml_cgraph* gf, const Segment::InputRef& input_ref) {
GGML_ASSERT(gf != nullptr);
if (input_ref.type == Segment::INPUT_PREVIOUS_CUT) {
if (input_ref.node_index < 0 || input_ref.node_index >= ggml_graph_n_nodes(gf)) {
return nullptr;
}
return ggml_graph_node(gf, input_ref.node_index);
}
if (input_ref.leaf_index < 0 || input_ref.leaf_index >= gf->n_leafs) {
return nullptr;
}
return leaf_tensor(gf, input_ref.leaf_index);
}
std::vector<ggml_tensor*> param_tensors(ggml_cgraph* gf, const Segment& segment) {
GGML_ASSERT(gf != nullptr);
std::vector<ggml_tensor*> tensors;
std::unordered_set<ggml_tensor*> seen_tensors;
tensors.reserve(segment.input_refs.size());
seen_tensors.reserve(segment.input_refs.size());
for (const auto& input_ref : segment.input_refs) {
if (input_ref.type != Segment::INPUT_PARAM) {
continue;
}
ggml_tensor* tensor = input_tensor(gf, input_ref);
if (tensor == nullptr) {
continue;
}
if (seen_tensors.insert(tensor).second) {
tensors.push_back(tensor);
}
}
return tensors;
}
std::vector<ggml_tensor*> runtime_param_tensors(ggml_cgraph* gf, const Segment& segment, const char* log_desc) {
std::vector<ggml_tensor*> tensors = param_tensors(gf, segment);
std::vector<ggml_tensor*> filtered_tensors;
filtered_tensors.reserve(tensors.size());
for (ggml_tensor* tensor : tensors) {
if (tensor_buffer(tensor) == nullptr) {
LOG_WARN("%s graph cut skipping param input without buffer: segment=%s tensor=%s",
log_desc == nullptr ? "unknown" : log_desc,
segment.group_name.c_str(),
tensor->name);
continue;
}
filtered_tensors.push_back(tensor);
}
return filtered_tensors;
}
std::unordered_set<std::string> collect_future_input_names(ggml_cgraph* gf,
const Plan& plan,
size_t current_segment_index) {
GGML_ASSERT(gf != nullptr);
std::unordered_set<std::string> future_input_names;
for (size_t seg_idx = current_segment_index + 1; seg_idx < plan.segments.size(); ++seg_idx) {
const auto& segment = plan.segments[seg_idx];
for (const auto& input_ref : segment.input_refs) {
if (input_ref.type != Segment::INPUT_PREVIOUS_CUT) {
continue;
}
ggml_tensor* current_input = input_tensor(gf, input_ref);
if (current_input != nullptr && current_input->name[0] != '\0') {
future_input_names.insert(current_input->name);
}
}
}
return future_input_names;
}
ggml_cgraph* build_segment_graph(ggml_cgraph* gf,
const Segment& segment,
ggml_context** graph_ctx_out) {
GGML_ASSERT(gf != nullptr);
GGML_ASSERT(graph_ctx_out != nullptr);
const size_t graph_size = segment.internal_node_indices.size() + segment.input_refs.size() + 8;
ggml_init_params params = {
/*.mem_size =*/ggml_graph_overhead_custom(graph_size, false) + 1024,
/*.mem_buffer =*/nullptr,
/*.no_alloc =*/true,
};
ggml_context* graph_ctx = ggml_init(params);
GGML_ASSERT(graph_ctx != nullptr);
ggml_cgraph* segment_graph = ggml_new_graph_custom(graph_ctx, graph_size, false);
GGML_ASSERT(segment_graph != nullptr);
for (const auto& input : segment.input_refs) {
ggml_tensor* current_input = input_tensor(gf, input);
if (current_input == nullptr) {
continue;
}
GGML_ASSERT(segment_graph->n_leafs < segment_graph->size);
segment_graph->leafs[segment_graph->n_leafs++] = current_input;
}
for (int output_node_index : segment.output_node_indices) {
ggml_tensor* output = ggml_graph_node(gf, output_node_index);
if (output == nullptr) {
continue;
}
ggml_set_output(output);
}
for (int node_idx : segment.internal_node_indices) {
ggml_graph_add_node(segment_graph, ggml_graph_node(gf, node_idx));
}
*graph_ctx_out = graph_ctx;
return segment_graph;
}
size_t measure_segment_compute_buffer(ggml_backend_t backend,
ggml_cgraph* gf,
const Segment& segment,
const char* log_desc) {
GGML_ASSERT(backend != nullptr);
GGML_ASSERT(gf != nullptr);
if (segment.internal_node_indices.empty()) {
return 0;
}
ggml_context* graph_ctx = nullptr;
ggml_cgraph* segment_graph = build_segment_graph(gf, segment, &graph_ctx);
ggml_gallocr_t allocr = ggml_gallocr_new(ggml_backend_get_default_buffer_type(backend));
size_t sizes[1] = {0};
ggml_gallocr_reserve_n_size(
allocr,
segment_graph,
nullptr,
nullptr,
sizes);
size_t buffer_size = sizes[0];
ggml_gallocr_free(allocr);
ggml_free(graph_ctx);
return buffer_size;
}
Plan build_plan(ggml_backend_t backend,
ggml_cgraph* gf,
const std::unordered_set<const ggml_tensor*>& params_tensor_set,
const char* log_desc) {
GGML_ASSERT(backend != nullptr);
GGML_ASSERT(gf != nullptr);
Plan plan;
plan.available = true;
const int n_nodes = ggml_graph_n_nodes(gf);
if (n_nodes <= 0) {
return plan;
}
plan.n_nodes = n_nodes;
plan.n_leafs = gf->n_leafs;
for (int i = 0; i < gf->n_leafs; ++i) {
ggml_tensor* leaf = gf->leafs[i];
if (is_params_tensor(params_tensor_set, leaf)) {
continue;
}
auto shape = input_shape(leaf);
shape.leaf_index = i;
plan.input_shapes.push_back(shape);
}
std::unordered_map<const ggml_tensor*, int> producer_index;
producer_index.reserve(static_cast<size_t>(n_nodes));
for (int i = 0; i < n_nodes; ++i) {
producer_index[ggml_graph_node(gf, i)] = i;
}
std::vector<Segment> grouped_segments;
std::unordered_map<std::string, size_t> group_to_segment;
for (int i = 0; i < n_nodes; ++i) {
ggml_tensor* node = ggml_graph_node(gf, i);
if (!is_graph_cut_tensor(node)) {
continue;
}
plan.has_cuts = true;
std::string full_name(node->name);
std::string payload = full_name.substr(std::strlen(GGML_RUNNER_CUT_PREFIX));
size_t sep = payload.find('|');
std::string group = sep == std::string::npos ? payload : payload.substr(0, sep);
auto it = group_to_segment.find(group);
if (it == group_to_segment.end()) {
Segment segment;
segment.group_name = group;
segment.output_node_indices.push_back(i);
group_to_segment[group] = grouped_segments.size();
grouped_segments.push_back(std::move(segment));
} else {
auto& segment = grouped_segments[it->second];
segment.output_node_indices.push_back(i);
}
}
if (!plan.has_cuts) {
return plan;
}
std::unordered_set<int> available_cut_output_node_indices;
available_cut_output_node_indices.reserve(static_cast<size_t>(n_nodes));
for (auto& segment : grouped_segments) {
build_segment(gf,
plan,
segment,
producer_index,
available_cut_output_node_indices,
backend,
params_tensor_set,
log_desc);
}
ggml_tensor* final_output = ggml_graph_node(gf, -1);
if (final_output != nullptr && available_cut_output_node_indices.find(n_nodes - 1) == available_cut_output_node_indices.end()) {
Segment final_segment;
final_segment.group_name = "ggml_runner.final";
final_segment.output_node_indices.push_back(n_nodes - 1);
build_segment(gf,
plan,
final_segment,
producer_index,
available_cut_output_node_indices,
backend,
params_tensor_set,
log_desc);
}
return plan;
}
Plan apply_max_vram_budget(ggml_cgraph* gf,
const Plan& base_plan,
size_t max_graph_vram_bytes,
ggml_backend_t backend,
const std::unordered_set<const ggml_tensor*>& params_tensor_set,
const char* log_desc) {
GGML_ASSERT(backend != nullptr);
GGML_ASSERT(gf != nullptr);
int64_t t_budget_begin = ggml_time_ms();
if (max_graph_vram_bytes == 0 || !base_plan.has_cuts || base_plan.segments.size() <= 1) {
return base_plan;
}
const int n_nodes = ggml_graph_n_nodes(gf);
std::unordered_map<const ggml_tensor*, int> producer_index;
producer_index.reserve(static_cast<size_t>(n_nodes));
for (int i = 0; i < n_nodes; ++i) {
producer_index[ggml_graph_node(gf, i)] = i;
}
Plan merged_plan;
merged_plan.available = true;
merged_plan.has_cuts = base_plan.has_cuts;
merged_plan.valid = base_plan.valid;
merged_plan.n_nodes = base_plan.n_nodes;
merged_plan.n_leafs = base_plan.n_leafs;
std::unordered_set<int> available_cut_output_node_indices;
available_cut_output_node_indices.reserve(static_cast<size_t>(n_nodes));
size_t start_segment_index = 0;
while (start_segment_index < base_plan.segments.size()) {
Plan single_plan;
auto single_available_cut_output_node_indices = available_cut_output_node_indices;
auto single_seed = make_segment_seed(base_plan,
start_segment_index,
start_segment_index);
build_segment(gf,
single_plan,
single_seed,
producer_index,
single_available_cut_output_node_indices,
backend,
params_tensor_set,
log_desc);
GGML_ASSERT(!single_plan.segments.empty());
size_t best_end_segment_index = start_segment_index;
bool can_merge_next_segment = graph_cut_segment_vram_bytes(single_plan.segments.back()) <= max_graph_vram_bytes;
while (can_merge_next_segment && best_end_segment_index + 1 < base_plan.segments.size()) {
const size_t next_end_segment_index = best_end_segment_index + 1;
Plan candidate_plan;
auto candidate_available_cut_output_node_indices = available_cut_output_node_indices;
auto candidate_seed = make_segment_seed(base_plan,
start_segment_index,
next_end_segment_index);
build_segment(gf,
candidate_plan,
candidate_seed,
producer_index,
candidate_available_cut_output_node_indices,
backend,
params_tensor_set,
log_desc);
GGML_ASSERT(!candidate_plan.segments.empty());
const auto& candidate_segment = candidate_plan.segments.back();
if (graph_cut_segment_vram_bytes(candidate_segment) > max_graph_vram_bytes) {
break;
}
best_end_segment_index = next_end_segment_index;
}
auto best_seed = make_segment_seed(base_plan,
start_segment_index,
best_end_segment_index);
build_segment(gf,
merged_plan,
best_seed,
producer_index,
available_cut_output_node_indices,
backend,
params_tensor_set,
log_desc);
start_segment_index = best_end_segment_index + 1;
}
if (log_desc != nullptr && merged_plan.segments.size() != base_plan.segments.size()) {
LOG_INFO("%s graph cut max_vram=%.2f MB merged %zu segments -> %zu segments",
log_desc,
max_graph_vram_bytes / 1024.0 / 1024.0,
base_plan.segments.size(),
merged_plan.segments.size());
}
if (log_desc != nullptr) {
LOG_INFO("%s graph cut max_vram budget merge took %lld ms",
log_desc,
ggml_time_ms() - t_budget_begin);
}
return merged_plan;
}
Plan resolve_plan(ggml_backend_t backend,
ggml_cgraph* gf,
PlanCache* cache,
size_t max_graph_vram_bytes,
const std::unordered_set<const ggml_tensor*>& params_tensor_set,
const char* log_desc) {
GGML_ASSERT(backend != nullptr);
GGML_ASSERT(gf != nullptr);
GGML_ASSERT(cache != nullptr);
int64_t t_prepare_begin = ggml_time_ms();
Plan base_plan;
int64_t t_plan_begin = ggml_time_ms();
if (cache->graph_cut_plan.available && plan_matches_graph(gf, cache->graph_cut_plan)) {
base_plan = cache->graph_cut_plan;
} else {
base_plan = build_plan(backend, gf, params_tensor_set, log_desc);
cache->graph_cut_plan = base_plan;
cache->graph_cut_plan.available = true;
cache->budgeted_graph_cut_plan.available = false;
if (log_desc != nullptr) {
LOG_INFO("%s build cached graph cut plan done (taking %lld ms)", log_desc, ggml_time_ms() - t_plan_begin);
}
}
Plan resolved_plan = base_plan;
if (max_graph_vram_bytes > 0 && base_plan.has_cuts) {
if (cache->budgeted_graph_cut_plan.available &&
cache->budgeted_graph_cut_plan_max_vram_bytes == max_graph_vram_bytes &&
plan_matches_graph(gf, cache->budgeted_graph_cut_plan)) {
resolved_plan = cache->budgeted_graph_cut_plan;
} else {
resolved_plan = apply_max_vram_budget(gf,
base_plan,
max_graph_vram_bytes,
backend,
params_tensor_set,
log_desc);
cache->budgeted_graph_cut_plan = resolved_plan;
cache->budgeted_graph_cut_plan.available = true;
cache->budgeted_graph_cut_plan_max_vram_bytes = max_graph_vram_bytes;
}
}
return resolved_plan;
}
} // namespace sd::ggml_graph_cut

104
src/ggml_graph_cut.h Normal file
View File

@ -0,0 +1,104 @@
#ifndef __SD_GGML_GRAPH_CUT_H__
#define __SD_GGML_GRAPH_CUT_H__
#include <array>
#include <string>
#include <unordered_set>
#include <vector>
#include "ggml-backend.h"
#include "ggml.h"
namespace sd::ggml_graph_cut {
struct Segment {
enum InputType {
INPUT_EXTERNAL = 0,
INPUT_PREVIOUS_CUT,
INPUT_PARAM,
};
struct InputRef {
InputType type = INPUT_EXTERNAL;
std::string display_name;
int leaf_index = -1;
int node_index = -1;
};
size_t compute_buffer_size = 0;
size_t output_bytes = 0;
size_t input_external_bytes = 0;
size_t input_previous_cut_bytes = 0;
size_t input_param_bytes = 0;
std::string group_name;
std::vector<int> internal_node_indices;
std::vector<int> output_node_indices;
std::vector<InputRef> input_refs;
};
struct Plan {
struct InputShape {
int leaf_index = -1;
ggml_type type = GGML_TYPE_COUNT;
std::array<int64_t, GGML_MAX_DIMS> ne = {0, 0, 0, 0};
};
bool available = false;
bool has_cuts = false;
bool valid = true;
int n_nodes = 0;
int n_leafs = 0;
std::vector<InputShape> input_shapes;
std::vector<Segment> segments;
};
struct PlanCache {
Plan graph_cut_plan;
Plan budgeted_graph_cut_plan;
size_t budgeted_graph_cut_plan_max_vram_bytes = 0;
};
static constexpr const char* GGML_RUNNER_CUT_PREFIX = "ggml_runner_cut:";
bool is_graph_cut_tensor(const ggml_tensor* tensor);
std::string make_graph_cut_name(const std::string& group, const std::string& output);
void mark_graph_cut(ggml_tensor* tensor, const std::string& group, const std::string& output);
int leaf_count(ggml_cgraph* gf);
ggml_tensor* leaf_tensor(ggml_cgraph* gf, int leaf_index);
ggml_backend_buffer_t tensor_buffer(const ggml_tensor* tensor);
ggml_tensor* cache_source_tensor(ggml_tensor* tensor);
size_t cache_tensor_bytes(const ggml_tensor* tensor);
bool plan_matches_graph(ggml_cgraph* gf, const Plan& plan);
ggml_tensor* output_tensor(ggml_cgraph* gf, const Segment& segment, size_t output_index);
ggml_tensor* input_tensor(ggml_cgraph* gf, const Segment::InputRef& input_ref);
std::vector<ggml_tensor*> param_tensors(ggml_cgraph* gf, const Segment& segment);
std::vector<ggml_tensor*> runtime_param_tensors(ggml_cgraph* gf, const Segment& segment, const char* log_desc);
std::unordered_set<std::string> collect_future_input_names(ggml_cgraph* gf,
const Plan& plan,
size_t current_segment_index);
ggml_cgraph* build_segment_graph(ggml_cgraph* gf,
const Segment& segment,
ggml_context** graph_ctx_out);
size_t measure_segment_compute_buffer(ggml_backend_t backend,
ggml_cgraph* gf,
const Segment& segment,
const char* log_desc);
Plan build_plan(ggml_backend_t backend,
ggml_cgraph* gf,
const std::unordered_set<const ggml_tensor*>& params_tensor_set,
const char* log_desc);
Plan apply_max_vram_budget(ggml_cgraph* gf,
const Plan& base_plan,
size_t max_graph_vram_bytes,
ggml_backend_t backend,
const std::unordered_set<const ggml_tensor*>& params_tensor_set,
const char* log_desc);
Plan resolve_plan(ggml_backend_t backend,
ggml_cgraph* gf,
PlanCache* cache,
size_t max_graph_vram_bytes,
const std::unordered_set<const ggml_tensor*>& params_tensor_set,
const char* log_desc);
} // namespace sd::ggml_graph_cut
#endif

View File

@ -346,6 +346,7 @@ namespace LLM {
auto merger = std::dynamic_pointer_cast<PatchMerger>(blocks["merger"]); auto merger = std::dynamic_pointer_cast<PatchMerger>(blocks["merger"]);
auto x = patch_embed->forward(ctx, pixel_values); auto x = patch_embed->forward(ctx, pixel_values);
sd::ggml_graph_cut::mark_graph_cut(x, "llm.vision.prelude", "x");
x = ggml_reshape_4d(ctx->ggml_ctx, x, x->ne[0] * spatial_merge_size * spatial_merge_size, x->ne[1] / spatial_merge_size / spatial_merge_size, x->ne[2], x->ne[3]); x = ggml_reshape_4d(ctx->ggml_ctx, x, x->ne[0] * spatial_merge_size * spatial_merge_size, x->ne[1] / spatial_merge_size / spatial_merge_size, x->ne[2], x->ne[3]);
x = ggml_get_rows(ctx->ggml_ctx, x, window_index); x = ggml_get_rows(ctx->ggml_ctx, x, window_index);
@ -359,9 +360,11 @@ namespace LLM {
mask = nullptr; mask = nullptr;
} }
x = block->forward(ctx, x, pe, mask); x = block->forward(ctx, x, pe, mask);
sd::ggml_graph_cut::mark_graph_cut(x, "llm.vision.blocks." + std::to_string(i), "x");
} }
x = merger->forward(ctx, x); x = merger->forward(ctx, x);
sd::ggml_graph_cut::mark_graph_cut(x, "llm.vision.final", "x");
x = ggml_get_rows(ctx->ggml_ctx, x, window_inverse_index); x = ggml_get_rows(ctx->ggml_ctx, x, window_inverse_index);
@ -506,6 +509,7 @@ namespace LLM {
auto norm = std::dynamic_pointer_cast<RMSNorm>(blocks["norm"]); auto norm = std::dynamic_pointer_cast<RMSNorm>(blocks["norm"]);
auto x = embed_tokens->forward(ctx, input_ids); auto x = embed_tokens->forward(ctx, input_ids);
sd::ggml_graph_cut::mark_graph_cut(x, "llm.text.prelude", "x");
std::vector<ggml_tensor*> intermediate_outputs; std::vector<ggml_tensor*> intermediate_outputs;
@ -552,6 +556,10 @@ namespace LLM {
auto block = std::dynamic_pointer_cast<TransformerBlock>(blocks["layers." + std::to_string(i)]); auto block = std::dynamic_pointer_cast<TransformerBlock>(blocks["layers." + std::to_string(i)]);
x = block->forward(ctx, x, input_pos, attention_mask); x = block->forward(ctx, x, input_pos, attention_mask);
if (out_layers.size() > 1) {
x = ggml_cont(ctx->ggml_ctx, x);
}
sd::ggml_graph_cut::mark_graph_cut(x, "llm.text.layers." + std::to_string(i), "x");
if (out_layers.find(i + 1) != out_layers.end()) { if (out_layers.find(i + 1) != out_layers.end()) {
intermediate_outputs.push_back(x); intermediate_outputs.push_back(x);
} }

View File

@ -129,7 +129,7 @@ struct LoraModel : public GGMLRunner {
} }
} }
ggml_tensor* get_lora_weight_diff(const std::string& model_tensor_name, ggml_context* ctx) { ggml_tensor* get_lora_weight_diff(const std::string& model_tensor_name, ggml_context* ctx, ggml_backend_t backend) {
ggml_tensor* updown = nullptr; ggml_tensor* updown = nullptr;
int index = 0; int index = 0;
while (true) { while (true) {
@ -152,17 +152,17 @@ struct LoraModel : public GGMLRunner {
auto iter = lora_tensors.find(lora_up_name); auto iter = lora_tensors.find(lora_up_name);
if (iter != lora_tensors.end()) { if (iter != lora_tensors.end()) {
lora_up = ggml_ext_cast_f32(ctx, iter->second); lora_up = ggml_ext_cast_f32(ctx, backend, iter->second);
} }
iter = lora_tensors.find(lora_mid_name); iter = lora_tensors.find(lora_mid_name);
if (iter != lora_tensors.end()) { if (iter != lora_tensors.end()) {
lora_mid = ggml_ext_cast_f32(ctx, iter->second); lora_mid = ggml_ext_cast_f32(ctx, backend, iter->second);
} }
iter = lora_tensors.find(lora_down_name); iter = lora_tensors.find(lora_down_name);
if (iter != lora_tensors.end()) { if (iter != lora_tensors.end()) {
lora_down = ggml_ext_cast_f32(ctx, iter->second); lora_down = ggml_ext_cast_f32(ctx, backend, iter->second);
} }
if (lora_up == nullptr || lora_down == nullptr) { if (lora_up == nullptr || lora_down == nullptr) {
@ -208,7 +208,7 @@ struct LoraModel : public GGMLRunner {
return updown; return updown;
} }
ggml_tensor* get_raw_weight_diff(const std::string& model_tensor_name, ggml_context* ctx) { ggml_tensor* get_raw_weight_diff(const std::string& model_tensor_name, ggml_context* ctx, ggml_backend_t backend) {
ggml_tensor* updown = nullptr; ggml_tensor* updown = nullptr;
int index = 0; int index = 0;
while (true) { while (true) {
@ -225,7 +225,7 @@ struct LoraModel : public GGMLRunner {
auto iter = lora_tensors.find(diff_name); auto iter = lora_tensors.find(diff_name);
if (iter != lora_tensors.end()) { if (iter != lora_tensors.end()) {
curr_updown = ggml_ext_cast_f32(ctx, iter->second); curr_updown = ggml_ext_cast_f32(ctx, backend, iter->second);
} else { } else {
break; break;
} }
@ -248,7 +248,7 @@ struct LoraModel : public GGMLRunner {
return updown; return updown;
} }
ggml_tensor* get_loha_weight_diff(const std::string& model_tensor_name, ggml_context* ctx) { ggml_tensor* get_loha_weight_diff(const std::string& model_tensor_name, ggml_context* ctx, ggml_backend_t backend) {
ggml_tensor* updown = nullptr; ggml_tensor* updown = nullptr;
int index = 0; int index = 0;
while (true) { while (true) {
@ -276,33 +276,33 @@ struct LoraModel : public GGMLRunner {
auto iter = lora_tensors.find(hada_1_down_name); auto iter = lora_tensors.find(hada_1_down_name);
if (iter != lora_tensors.end()) { if (iter != lora_tensors.end()) {
hada_1_down = ggml_ext_cast_f32(ctx, iter->second); hada_1_down = ggml_ext_cast_f32(ctx, backend, iter->second);
} }
iter = lora_tensors.find(hada_1_up_name); iter = lora_tensors.find(hada_1_up_name);
if (iter != lora_tensors.end()) { if (iter != lora_tensors.end()) {
hada_1_up = ggml_ext_cast_f32(ctx, iter->second); hada_1_up = ggml_ext_cast_f32(ctx, backend, iter->second);
} }
iter = lora_tensors.find(hada_1_mid_name); iter = lora_tensors.find(hada_1_mid_name);
if (iter != lora_tensors.end()) { if (iter != lora_tensors.end()) {
hada_1_mid = ggml_ext_cast_f32(ctx, iter->second); hada_1_mid = ggml_ext_cast_f32(ctx, backend, iter->second);
hada_1_up = ggml_cont(ctx, ggml_transpose(ctx, hada_1_up)); hada_1_up = ggml_cont(ctx, ggml_transpose(ctx, hada_1_up));
} }
iter = lora_tensors.find(hada_2_down_name); iter = lora_tensors.find(hada_2_down_name);
if (iter != lora_tensors.end()) { if (iter != lora_tensors.end()) {
hada_2_down = ggml_ext_cast_f32(ctx, iter->second); hada_2_down = ggml_ext_cast_f32(ctx, backend, iter->second);
} }
iter = lora_tensors.find(hada_2_up_name); iter = lora_tensors.find(hada_2_up_name);
if (iter != lora_tensors.end()) { if (iter != lora_tensors.end()) {
hada_2_up = ggml_ext_cast_f32(ctx, iter->second); hada_2_up = ggml_ext_cast_f32(ctx, backend, iter->second);
} }
iter = lora_tensors.find(hada_2_mid_name); iter = lora_tensors.find(hada_2_mid_name);
if (iter != lora_tensors.end()) { if (iter != lora_tensors.end()) {
hada_2_mid = ggml_ext_cast_f32(ctx, iter->second); hada_2_mid = ggml_ext_cast_f32(ctx, backend, iter->second);
hada_2_up = ggml_cont(ctx, ggml_transpose(ctx, hada_2_up)); hada_2_up = ggml_cont(ctx, ggml_transpose(ctx, hada_2_up));
} }
@ -351,7 +351,7 @@ struct LoraModel : public GGMLRunner {
return updown; return updown;
} }
ggml_tensor* get_lokr_weight_diff(const std::string& model_tensor_name, ggml_context* ctx) { ggml_tensor* get_lokr_weight_diff(const std::string& model_tensor_name, ggml_context* ctx, ggml_backend_t backend) {
ggml_tensor* updown = nullptr; ggml_tensor* updown = nullptr;
int index = 0; int index = 0;
while (true) { while (true) {
@ -378,24 +378,24 @@ struct LoraModel : public GGMLRunner {
auto iter = lora_tensors.find(lokr_w1_name); auto iter = lora_tensors.find(lokr_w1_name);
if (iter != lora_tensors.end()) { if (iter != lora_tensors.end()) {
lokr_w1 = ggml_ext_cast_f32(ctx, iter->second); lokr_w1 = ggml_ext_cast_f32(ctx, backend, iter->second);
} }
iter = lora_tensors.find(lokr_w2_name); iter = lora_tensors.find(lokr_w2_name);
if (iter != lora_tensors.end()) { if (iter != lora_tensors.end()) {
lokr_w2 = ggml_ext_cast_f32(ctx, iter->second); lokr_w2 = ggml_ext_cast_f32(ctx, backend, iter->second);
} }
int64_t rank = 1; int64_t rank = 1;
if (lokr_w1 == nullptr) { if (lokr_w1 == nullptr) {
iter = lora_tensors.find(lokr_w1_a_name); iter = lora_tensors.find(lokr_w1_a_name);
if (iter != lora_tensors.end()) { if (iter != lora_tensors.end()) {
lokr_w1_a = ggml_ext_cast_f32(ctx, iter->second); lokr_w1_a = ggml_ext_cast_f32(ctx, backend, iter->second);
} }
iter = lora_tensors.find(lokr_w1_b_name); iter = lora_tensors.find(lokr_w1_b_name);
if (iter != lora_tensors.end()) { if (iter != lora_tensors.end()) {
lokr_w1_b = ggml_ext_cast_f32(ctx, iter->second); lokr_w1_b = ggml_ext_cast_f32(ctx, backend, iter->second);
} }
if (lokr_w1_a == nullptr || lokr_w1_b == nullptr) { if (lokr_w1_a == nullptr || lokr_w1_b == nullptr) {
@ -410,12 +410,12 @@ struct LoraModel : public GGMLRunner {
if (lokr_w2 == nullptr) { if (lokr_w2 == nullptr) {
iter = lora_tensors.find(lokr_w2_a_name); iter = lora_tensors.find(lokr_w2_a_name);
if (iter != lora_tensors.end()) { if (iter != lora_tensors.end()) {
lokr_w2_a = ggml_ext_cast_f32(ctx, iter->second); lokr_w2_a = ggml_ext_cast_f32(ctx, backend, iter->second);
} }
iter = lora_tensors.find(lokr_w2_b_name); iter = lora_tensors.find(lokr_w2_b_name);
if (iter != lora_tensors.end()) { if (iter != lora_tensors.end()) {
lokr_w2_b = ggml_ext_cast_f32(ctx, iter->second); lokr_w2_b = ggml_ext_cast_f32(ctx, backend, iter->second);
} }
if (lokr_w2_a == nullptr || lokr_w2_b == nullptr) { if (lokr_w2_a == nullptr || lokr_w2_b == nullptr) {
@ -468,23 +468,23 @@ struct LoraModel : public GGMLRunner {
return updown; return updown;
} }
ggml_tensor* get_weight_diff(const std::string& model_tensor_name, ggml_context* ctx, ggml_tensor* model_tensor, bool with_lora_and_lokr = true) { ggml_tensor* get_weight_diff(const std::string& model_tensor_name, ggml_backend_t backend, ggml_context* ctx, ggml_tensor* model_tensor, bool with_lora_and_lokr = true) {
// lora // lora
ggml_tensor* diff = nullptr; ggml_tensor* diff = nullptr;
if (with_lora_and_lokr) { if (with_lora_and_lokr) {
diff = get_lora_weight_diff(model_tensor_name, ctx); diff = get_lora_weight_diff(model_tensor_name, ctx, backend);
} }
// diff // diff
if (diff == nullptr) { if (diff == nullptr) {
diff = get_raw_weight_diff(model_tensor_name, ctx); diff = get_raw_weight_diff(model_tensor_name, ctx, backend);
} }
// loha // loha
if (diff == nullptr) { if (diff == nullptr) {
diff = get_loha_weight_diff(model_tensor_name, ctx); diff = get_loha_weight_diff(model_tensor_name, ctx, backend);
} }
// lokr // lokr
if (diff == nullptr && with_lora_and_lokr) { if (diff == nullptr && with_lora_and_lokr) {
diff = get_lokr_weight_diff(model_tensor_name, ctx); diff = get_lokr_weight_diff(model_tensor_name, ctx, backend);
} }
if (diff != nullptr) { if (diff != nullptr) {
if (ggml_nelements(diff) < ggml_nelements(model_tensor)) { if (ggml_nelements(diff) < ggml_nelements(model_tensor)) {
@ -502,6 +502,7 @@ struct LoraModel : public GGMLRunner {
} }
ggml_tensor* get_out_diff(ggml_context* ctx, ggml_tensor* get_out_diff(ggml_context* ctx,
ggml_backend_t backend,
ggml_tensor* x, ggml_tensor* x,
WeightAdapter::ForwardParams forward_params, WeightAdapter::ForwardParams forward_params,
const std::string& model_tensor_name) { const std::string& model_tensor_name) {
@ -590,7 +591,7 @@ struct LoraModel : public GGMLRunner {
} }
scale_value *= multiplier; scale_value *= multiplier;
auto curr_out_diff = ggml_ext_lokr_forward(ctx, x, lokr_w1, lokr_w1_a, lokr_w1_b, lokr_w2, lokr_w2_a, lokr_w2_b, is_conv2d, forward_params.conv2d, scale_value); auto curr_out_diff = ggml_ext_lokr_forward(ctx, backend, x, lokr_w1, lokr_w1_a, lokr_w1_b, lokr_w2, lokr_w2_a, lokr_w2_b, is_conv2d, forward_params.conv2d, scale_value);
if (out_diff == nullptr) { if (out_diff == nullptr) {
out_diff = curr_out_diff; out_diff = curr_out_diff;
} else { } else {
@ -761,7 +762,7 @@ struct LoraModel : public GGMLRunner {
ggml_tensor* model_tensor = it.second; ggml_tensor* model_tensor = it.second;
// lora // lora
ggml_tensor* diff = get_weight_diff(model_tensor_name, compute_ctx, model_tensor); ggml_tensor* diff = get_weight_diff(model_tensor_name, runtime_backend, compute_ctx, model_tensor);
if (diff == nullptr) { if (diff == nullptr) {
continue; continue;
} }
@ -774,7 +775,7 @@ struct LoraModel : public GGMLRunner {
ggml_tensor* final_tensor; ggml_tensor* final_tensor;
if (model_tensor->type != GGML_TYPE_F32 && model_tensor->type != GGML_TYPE_F16) { if (model_tensor->type != GGML_TYPE_F32 && model_tensor->type != GGML_TYPE_F16) {
final_tensor = ggml_ext_cast_f32(compute_ctx, model_tensor); final_tensor = ggml_ext_cast_f32(compute_ctx, runtime_backend, model_tensor);
final_tensor = ggml_add_inplace(compute_ctx, final_tensor, diff); final_tensor = ggml_add_inplace(compute_ctx, final_tensor, diff);
final_tensor = ggml_cpy(compute_ctx, final_tensor, model_tensor); final_tensor = ggml_cpy(compute_ctx, final_tensor, model_tensor);
} else { } else {
@ -841,34 +842,35 @@ public:
: lora_models(lora_models) { : lora_models(lora_models) {
} }
ggml_tensor* patch_weight(ggml_context* ctx, ggml_tensor* weight, const std::string& weight_name, bool with_lora_and_lokr) { ggml_tensor* patch_weight(ggml_context* ctx, ggml_backend_t backend, ggml_tensor* weight, const std::string& weight_name, bool with_lora_and_lokr) {
for (auto& lora_model : lora_models) { for (auto& lora_model : lora_models) {
ggml_tensor* diff = lora_model->get_weight_diff(weight_name, ctx, weight, with_lora_and_lokr); ggml_tensor* diff = lora_model->get_weight_diff(weight_name, backend, ctx, weight, with_lora_and_lokr);
if (diff == nullptr) { if (diff == nullptr) {
continue; continue;
} }
if (weight->type != GGML_TYPE_F32 && weight->type != GGML_TYPE_F16) { if (weight->type != GGML_TYPE_F32 && weight->type != GGML_TYPE_F16) {
weight = ggml_ext_cast_f32(ctx, weight); weight = ggml_ext_cast_f32(ctx, backend, weight);
} }
weight = ggml_add(ctx, weight, diff); weight = ggml_add(ctx, weight, diff);
} }
return weight; return weight;
} }
ggml_tensor* patch_weight(ggml_context* ctx, ggml_tensor* weight, const std::string& weight_name) override { ggml_tensor* patch_weight(ggml_context* ctx, ggml_backend_t backend, ggml_tensor* weight, const std::string& weight_name) override {
return patch_weight(ctx, weight, weight_name, true); return patch_weight(ctx, backend, weight, weight_name, true);
} }
ggml_tensor* forward_with_lora(ggml_context* ctx, ggml_tensor* forward_with_lora(ggml_context* ctx,
ggml_backend_t backend,
ggml_tensor* x, ggml_tensor* x,
ggml_tensor* w, ggml_tensor* w,
ggml_tensor* b, ggml_tensor* b,
const std::string& prefix, const std::string& prefix,
WeightAdapter::ForwardParams forward_params) override { WeightAdapter::ForwardParams forward_params) override {
w = patch_weight(ctx, w, prefix + "weight", false); w = patch_weight(ctx, backend, w, prefix + "weight", false);
if (b) { if (b) {
b = patch_weight(ctx, b, prefix + "bias", false); b = patch_weight(ctx, backend, b, prefix + "bias", false);
} }
ggml_tensor* out; ggml_tensor* out;
if (forward_params.op_type == ForwardParams::op_type_t::OP_LINEAR) { if (forward_params.op_type == ForwardParams::op_type_t::OP_LINEAR) {
@ -890,7 +892,7 @@ public:
forward_params.conv2d.scale); forward_params.conv2d.scale);
} }
for (auto& lora_model : lora_models) { for (auto& lora_model : lora_models) {
ggml_tensor* out_diff = lora_model->get_out_diff(ctx, x, forward_params, prefix + "weight"); ggml_tensor* out_diff = lora_model->get_out_diff(ctx, backend, x, forward_params, prefix + "weight");
if (out_diff == nullptr) { if (out_diff == nullptr) {
continue; continue;
} }

View File

@ -767,6 +767,8 @@ public:
auto context_x = block->forward(ctx, context, x, c_mod); auto context_x = block->forward(ctx, context, x, c_mod);
context = context_x.first; context = context_x.first;
x = context_x.second; x = context_x.second;
sd::ggml_graph_cut::mark_graph_cut(context, "mmdit.joint_blocks." + std::to_string(i), "context");
sd::ggml_graph_cut::mark_graph_cut(x, "mmdit.joint_blocks." + std::to_string(i), "x");
} }
x = final_layer->forward(ctx, x, c_mod); // (N, T, patch_size ** 2 * out_channels) x = final_layer->forward(ctx, x, c_mod); // (N, T, patch_size ** 2 * out_channels)
@ -809,6 +811,11 @@ public:
context = context_embedder->forward(ctx, context); // [N, L, D] aka [N, L, 1536] context = context_embedder->forward(ctx, context); // [N, L, D] aka [N, L, 1536]
} }
sd::ggml_graph_cut::mark_graph_cut(x, "mmdit.prelude", "x");
sd::ggml_graph_cut::mark_graph_cut(c, "mmdit.prelude", "c");
if (context != nullptr) {
sd::ggml_graph_cut::mark_graph_cut(context, "mmdit.prelude", "context");
}
x = forward_core_with_concat(ctx, x, c, context, skip_layers); // (N, H*W, patch_size ** 2 * out_channels) x = forward_core_with_concat(ctx, x, c, context, skip_layers); // (N, H*W, patch_size ** 2 * out_channels)

View File

@ -2,6 +2,7 @@
#include <atomic> #include <atomic>
#include <chrono> #include <chrono>
#include <cstdarg> #include <cstdarg>
#include <cstdlib>
#include <fstream> #include <fstream>
#include <functional> #include <functional>
#include <mutex> #include <mutex>
@ -12,64 +13,21 @@
#include <unordered_map> #include <unordered_map>
#include <vector> #include <vector>
#include "gguf_reader.hpp"
#include "model.h" #include "model.h"
#include "model_io/gguf_io.h"
#include "model_io/safetensors_io.h"
#include "model_io/torch_legacy_io.h"
#include "model_io/torch_zip_io.h"
#include "stable-diffusion.h" #include "stable-diffusion.h"
#include "util.h" #include "util.h"
#include "ggml-alloc.h" #include "ggml-alloc.h"
#include "ggml-backend.h" #include "ggml-backend.h"
#include "ggml-cpu.h"
#include "ggml.h" #include "ggml.h"
#include "ggml_extend_backend.hpp"
#include "zip.h"
#include "name_conversion.h" #include "name_conversion.h"
#include "stable-diffusion.h"
#ifdef SD_USE_METAL
#include "ggml-metal.h"
#endif
#ifdef SD_USE_VULKAN
#include "ggml-vulkan.h"
#endif
#ifdef SD_USE_OPENCL
#include "ggml-opencl.h"
#endif
#define ST_HEADER_SIZE_LEN 8
uint64_t read_u64(uint8_t* buffer) {
// little endian
uint64_t value = 0;
value |= static_cast<int64_t>(buffer[7]) << 56;
value |= static_cast<int64_t>(buffer[6]) << 48;
value |= static_cast<int64_t>(buffer[5]) << 40;
value |= static_cast<int64_t>(buffer[4]) << 32;
value |= static_cast<int64_t>(buffer[3]) << 24;
value |= static_cast<int64_t>(buffer[2]) << 16;
value |= static_cast<int64_t>(buffer[1]) << 8;
value |= static_cast<int64_t>(buffer[0]);
return value;
}
int32_t read_int(uint8_t* buffer) {
// little endian
int value = 0;
value |= buffer[3] << 24;
value |= buffer[2] << 16;
value |= buffer[1] << 8;
value |= buffer[0];
return value;
}
uint16_t read_short(uint8_t* buffer) {
// little endian
uint16_t value = 0;
value |= buffer[1] << 8;
value |= buffer[0];
return value;
}
/*================================================= Preprocess ==================================================*/ /*================================================= Preprocess ==================================================*/
@ -110,7 +68,7 @@ const char* unused_tensors[] = {
"first_stage_model.bn.", "first_stage_model.bn.",
}; };
bool is_unused_tensor(std::string name) { bool is_unused_tensor(const std::string& name) {
for (size_t i = 0; i < sizeof(unused_tensors) / sizeof(const char*); i++) { for (size_t i = 0; i < sizeof(unused_tensors) / sizeof(const char*); i++) {
if (starts_with(name, unused_tensors[i])) { if (starts_with(name, unused_tensors[i])) {
return true; return true;
@ -250,79 +208,6 @@ void ModelLoader::add_tensor_storage(const TensorStorage& tensor_storage) {
tensor_storage_map[tensor_storage.name] = tensor_storage; tensor_storage_map[tensor_storage.name] = tensor_storage;
} }
bool is_zip_file(const std::string& file_path) {
zip_t* zip = zip_open(file_path.c_str(), 0, 'r');
if (zip == nullptr) {
return false;
}
zip_close(zip);
return true;
}
bool is_gguf_file(const std::string& file_path) {
std::ifstream file(file_path, std::ios::binary);
if (!file.is_open()) {
return false;
}
char magic[4];
file.read(magic, sizeof(magic));
if (!file) {
return false;
}
for (uint32_t i = 0; i < sizeof(magic); i++) {
if (magic[i] != GGUF_MAGIC[i]) {
return false;
}
}
return true;
}
bool is_safetensors_file(const std::string& file_path) {
std::ifstream file(file_path, std::ios::binary);
if (!file.is_open()) {
return false;
}
// get file size
file.seekg(0, file.end);
size_t file_size_ = file.tellg();
file.seekg(0, file.beg);
// read header size
if (file_size_ <= ST_HEADER_SIZE_LEN) {
return false;
}
uint8_t header_size_buf[ST_HEADER_SIZE_LEN];
file.read((char*)header_size_buf, ST_HEADER_SIZE_LEN);
if (!file) {
return false;
}
size_t header_size_ = read_u64(header_size_buf);
if (header_size_ >= file_size_ || header_size_ <= 2) {
return false;
}
// read header
std::vector<char> header_buf;
header_buf.resize(header_size_ + 1);
header_buf[header_size_] = '\0';
file.read(header_buf.data(), header_size_);
if (!file) {
return false;
}
try {
nlohmann::json header_ = nlohmann::json::parse(header_buf.data());
} catch (const std::exception&) {
return false;
}
return true;
}
bool ModelLoader::init_from_file(const std::string& file_path, const std::string& prefix) { bool ModelLoader::init_from_file(const std::string& file_path, const std::string& prefix) {
if (is_directory(file_path)) { if (is_directory(file_path)) {
LOG_INFO("load %s using diffusers format", file_path.c_str()); LOG_INFO("load %s using diffusers format", file_path.c_str());
@ -333,9 +218,12 @@ bool ModelLoader::init_from_file(const std::string& file_path, const std::string
} else if (is_safetensors_file(file_path)) { } else if (is_safetensors_file(file_path)) {
LOG_INFO("load %s using safetensors format", file_path.c_str()); LOG_INFO("load %s using safetensors format", file_path.c_str());
return init_from_safetensors_file(file_path, prefix); return init_from_safetensors_file(file_path, prefix);
} else if (is_zip_file(file_path)) { } else if (is_torch_zip_file(file_path)) {
LOG_INFO("load %s using checkpoint format", file_path.c_str()); LOG_INFO("load %s using torch zip format", file_path.c_str());
return init_from_ckpt_file(file_path, prefix); return init_from_torch_zip_file(file_path, prefix);
} else if (init_from_torch_legacy_file(file_path, prefix)) {
LOG_INFO("load %s using torch legacy format", file_path.c_str());
return true;
} else { } else {
if (file_exists(file_path)) { if (file_exists(file_path)) {
LOG_WARN("unknown format %s", file_path.c_str()); LOG_WARN("unknown format %s", file_path.c_str());
@ -375,37 +263,24 @@ bool ModelLoader::init_from_file_and_convert_name(const std::string& file_path,
bool ModelLoader::init_from_gguf_file(const std::string& file_path, const std::string& prefix) { bool ModelLoader::init_from_gguf_file(const std::string& file_path, const std::string& prefix) {
LOG_DEBUG("init from '%s'", file_path.c_str()); LOG_DEBUG("init from '%s'", file_path.c_str());
file_paths_.push_back(file_path);
size_t file_index = file_paths_.size() - 1;
gguf_context* ctx_gguf_ = nullptr; std::vector<TensorStorage> tensor_storages;
ggml_context* ctx_meta_ = nullptr; std::string error;
if (!read_gguf_file(file_path, tensor_storages, &error)) {
ctx_gguf_ = gguf_init_from_file(file_path.c_str(), {true, &ctx_meta_}); LOG_ERROR("%s", error.c_str());
if (!ctx_gguf_) {
LOG_ERROR("failed to open '%s' with gguf_init_from_file. Try to open it with GGUFReader.", file_path.c_str());
GGUFReader gguf_reader;
if (!gguf_reader.load(file_path)) {
LOG_ERROR("failed to open '%s' with GGUFReader.", file_path.c_str());
return false; return false;
} }
size_t data_offset = gguf_reader.data_offset(); file_paths_.push_back(file_path);
for (const auto& gguf_tensor_info : gguf_reader.tensors()) { size_t file_index = file_paths_.size() - 1;
std::string name = gguf_tensor_info.name;
if (!starts_with(name, prefix)) { for (auto& tensor_storage : tensor_storages) {
name = prefix + name; // LOG_DEBUG("%s", tensor_storage.name.c_str());
if (!starts_with(tensor_storage.name, prefix)) {
tensor_storage.name = prefix + tensor_storage.name;
} }
tensor_storage.file_index = file_index;
TensorStorage tensor_storage(
name,
gguf_tensor_info.type,
gguf_tensor_info.shape.data(),
static_cast<int>(gguf_tensor_info.shape.size()),
file_index,
data_offset + gguf_tensor_info.offset);
// LOG_DEBUG("%s %s", name.c_str(), tensor_storage.to_string().c_str());
add_tensor_storage(tensor_storage); add_tensor_storage(tensor_storage);
} }
@ -413,204 +288,96 @@ bool ModelLoader::init_from_gguf_file(const std::string& file_path, const std::s
return true; return true;
} }
int n_tensors = static_cast<int>(gguf_get_n_tensors(ctx_gguf_));
size_t total_size = 0;
size_t data_offset = gguf_get_data_offset(ctx_gguf_);
for (int i = 0; i < n_tensors; i++) {
std::string name = gguf_get_tensor_name(ctx_gguf_, i);
ggml_tensor* dummy = ggml_get_tensor(ctx_meta_, name.c_str());
size_t offset = data_offset + gguf_get_tensor_offset(ctx_gguf_, i);
// LOG_DEBUG("%s", name.c_str());
if (!starts_with(name, prefix)) {
name = prefix + name;
}
TensorStorage tensor_storage(name, dummy->type, dummy->ne, ggml_n_dims(dummy), file_index, offset);
GGML_ASSERT(ggml_nbytes(dummy) == tensor_storage.nbytes());
add_tensor_storage(tensor_storage);
}
gguf_free(ctx_gguf_);
ggml_free(ctx_meta_);
return true;
}
/*================================================= SafeTensorsModelLoader ==================================================*/ /*================================================= SafeTensorsModelLoader ==================================================*/
ggml_type str_to_ggml_type(const std::string& dtype) {
ggml_type ttype = GGML_TYPE_COUNT;
if (dtype == "F16") {
ttype = GGML_TYPE_F16;
} else if (dtype == "BF16") {
ttype = GGML_TYPE_BF16;
} else if (dtype == "F32") {
ttype = GGML_TYPE_F32;
} else if (dtype == "F64") {
ttype = GGML_TYPE_F32;
} else if (dtype == "F8_E4M3") {
ttype = GGML_TYPE_F16;
} else if (dtype == "F8_E5M2") {
ttype = GGML_TYPE_F16;
} else if (dtype == "I64") {
ttype = GGML_TYPE_I32;
}
return ttype;
}
// https://huggingface.co/docs/safetensors/index
bool ModelLoader::init_from_safetensors_file(const std::string& file_path, const std::string& prefix) { bool ModelLoader::init_from_safetensors_file(const std::string& file_path, const std::string& prefix) {
LOG_DEBUG("init from '%s', prefix = '%s'", file_path.c_str(), prefix.c_str()); LOG_DEBUG("init from '%s', prefix = '%s'", file_path.c_str(), prefix.c_str());
std::vector<TensorStorage> tensor_storages;
std::string error;
if (!read_safetensors_file(file_path, tensor_storages, &error)) {
LOG_ERROR("%s", error.c_str());
return false;
}
file_paths_.push_back(file_path); file_paths_.push_back(file_path);
size_t file_index = file_paths_.size() - 1; size_t file_index = file_paths_.size() - 1;
std::ifstream file(file_path, std::ios::binary);
if (!file.is_open()) {
LOG_ERROR("failed to open '%s'", file_path.c_str());
file_paths_.pop_back();
return false;
}
// get file size for (auto& tensor_storage : tensor_storages) {
file.seekg(0, file.end); if (is_unused_tensor(tensor_storage.name)) {
size_t file_size_ = file.tellg();
file.seekg(0, file.beg);
// read header size
if (file_size_ <= ST_HEADER_SIZE_LEN) {
LOG_ERROR("invalid safetensor file '%s'", file_path.c_str());
file_paths_.pop_back();
return false;
}
uint8_t header_size_buf[ST_HEADER_SIZE_LEN];
file.read((char*)header_size_buf, ST_HEADER_SIZE_LEN);
if (!file) {
LOG_ERROR("read safetensors header size failed: '%s'", file_path.c_str());
return false;
}
size_t header_size_ = read_u64(header_size_buf);
if (header_size_ >= file_size_) {
LOG_ERROR("invalid safetensor file '%s'", file_path.c_str());
file_paths_.pop_back();
return false;
}
// read header
std::vector<char> header_buf;
header_buf.resize(header_size_ + 1);
header_buf[header_size_] = '\0';
file.read(header_buf.data(), header_size_);
if (!file) {
LOG_ERROR("read safetensors header failed: '%s'", file_path.c_str());
file_paths_.pop_back();
return false;
}
nlohmann::json header_;
try {
header_ = nlohmann::json::parse(header_buf.data());
} catch (const std::exception&) {
LOG_ERROR("parsing safetensors header failed", file_path.c_str());
file_paths_.pop_back();
return false;
}
for (auto& item : header_.items()) {
std::string name = item.key();
nlohmann::json tensor_info = item.value();
// LOG_DEBUG("%s %s\n", name.c_str(), tensor_info.dump().c_str());
if (name == "__metadata__") {
continue; continue;
} }
if (is_unused_tensor(name)) { if (!starts_with(tensor_storage.name, prefix)) {
continue; tensor_storage.name = prefix + tensor_storage.name;
}
std::string dtype = tensor_info["dtype"];
nlohmann::json shape = tensor_info["shape"];
if (dtype == "U8") {
continue;
}
size_t begin = tensor_info["data_offsets"][0].get<size_t>();
size_t end = tensor_info["data_offsets"][1].get<size_t>();
ggml_type type = str_to_ggml_type(dtype);
if (type == GGML_TYPE_COUNT) {
LOG_ERROR("unsupported dtype '%s' (tensor '%s')", dtype.c_str(), name.c_str());
return false;
}
if (shape.size() > SD_MAX_DIMS) {
LOG_ERROR("invalid tensor '%s'", name.c_str());
return false;
}
int n_dims = (int)shape.size();
int64_t ne[SD_MAX_DIMS] = {1, 1, 1, 1, 1};
for (int i = 0; i < n_dims; i++) {
ne[i] = shape[i].get<int64_t>();
}
if (n_dims == 5) {
n_dims = 4;
ne[0] = ne[0] * ne[1];
ne[1] = ne[2];
ne[2] = ne[3];
ne[3] = ne[4];
}
// ggml_n_dims returns 1 for scalars
if (n_dims == 0) {
n_dims = 1;
}
if (!starts_with(name, prefix)) {
name = prefix + name;
}
TensorStorage tensor_storage(name, type, ne, n_dims, file_index, ST_HEADER_SIZE_LEN + header_size_ + begin);
tensor_storage.reverse_ne();
size_t tensor_data_size = end - begin;
bool tensor_size_ok;
if (dtype == "F8_E4M3") {
tensor_storage.is_f8_e4m3 = true;
// f8 -> f16
tensor_size_ok = (tensor_storage.nbytes() == tensor_data_size * 2);
} else if (dtype == "F8_E5M2") {
tensor_storage.is_f8_e5m2 = true;
// f8 -> f16
tensor_size_ok = (tensor_storage.nbytes() == tensor_data_size * 2);
} else if (dtype == "F64") {
tensor_storage.is_f64 = true;
// f64 -> f32
tensor_size_ok = (tensor_storage.nbytes() * 2 == tensor_data_size);
} else if (dtype == "I64") {
tensor_storage.is_i64 = true;
// i64 -> i32
tensor_size_ok = (tensor_storage.nbytes() * 2 == tensor_data_size);
} else {
tensor_size_ok = (tensor_storage.nbytes() == tensor_data_size);
}
if (!tensor_size_ok) {
LOG_ERROR("size mismatch for tensor '%s' (%s)\n", name.c_str(), dtype.c_str());
return false;
} }
tensor_storage.file_index = file_index;
add_tensor_storage(tensor_storage); add_tensor_storage(tensor_storage);
// LOG_DEBUG("%s %s", tensor_storage.to_string().c_str(), dtype.c_str()); // LOG_DEBUG("%s", tensor_storage.to_string().c_str());
}
return true;
}
/*================================================= TorchLegacyModelLoader ==================================================*/
bool ModelLoader::init_from_torch_legacy_file(const std::string& file_path, const std::string& prefix) {
LOG_DEBUG("init from torch legacy '%s'", file_path.c_str());
std::vector<TensorStorage> tensor_storages;
std::string error;
if (!read_torch_legacy_file(file_path, tensor_storages, &error)) {
if ((!error.empty()) && (ends_with(file_path, ".pt") || ends_with(file_path, ".pth"))) {
LOG_WARN("%s", error.c_str());
}
return false;
}
file_paths_.push_back(file_path);
size_t file_index = file_paths_.size() - 1;
for (auto& tensor_storage : tensor_storages) {
if (is_unused_tensor(tensor_storage.name)) {
continue;
}
if (!starts_with(tensor_storage.name, prefix)) {
tensor_storage.name = prefix + tensor_storage.name;
}
tensor_storage.file_index = file_index;
add_tensor_storage(tensor_storage);
}
return true;
}
/*================================================= TorchZipModelLoader ==================================================*/
bool ModelLoader::init_from_torch_zip_file(const std::string& file_path, const std::string& prefix) {
LOG_DEBUG("init from '%s'", file_path.c_str());
std::vector<TensorStorage> tensor_storages;
std::string error;
if (!read_torch_zip_file(file_path, tensor_storages, &error)) {
LOG_ERROR("%s", error.c_str());
return false;
}
file_paths_.push_back(file_path);
size_t file_index = file_paths_.size() - 1;
for (auto& tensor_storage : tensor_storages) {
if (!starts_with(tensor_storage.name, prefix)) {
tensor_storage.name = prefix + tensor_storage.name;
}
tensor_storage.file_index = file_index;
add_tensor_storage(tensor_storage);
// LOG_DEBUG("%s", tensor_storage.to_string().c_str());
} }
return true; return true;
@ -642,367 +409,6 @@ bool ModelLoader::init_from_diffusers_file(const std::string& file_path, const s
return true; return true;
} }
/*================================================= CkptModelLoader ==================================================*/
// $ python -m pickletools sd-v1-4/archive/data.pkl | head -n 100
// 0: \x80 PROTO 2
// 2: } EMPTY_DICT
// 3: q BINPUT 0
// 5: ( MARK
// 6: X BINUNICODE 'epoch'
// 16: q BINPUT 1
// 18: K BININT1 6
// 20: X BINUNICODE 'global_step'
// 36: q BINPUT 2
// 38: J BININT 470000
// 43: X BINUNICODE 'pytorch-lightning_version'
// 73: q BINPUT 3
// 75: X BINUNICODE '1.4.2'
// 85: q BINPUT 4
// 87: X BINUNICODE 'state_dict'
// 102: q BINPUT 5
// 104: } EMPTY_DICT
// 105: q BINPUT 6
// 107: ( MARK
// 108: X BINUNICODE 'betas'
// 118: q BINPUT 7
// 120: c GLOBAL 'torch._utils _rebuild_tensor_v2'
// 153: q BINPUT 8
// 155: ( MARK
// 156: ( MARK
// 157: X BINUNICODE 'storage'
// 169: q BINPUT 9
// 171: c GLOBAL 'torch FloatStorage'
// 191: q BINPUT 10
// 193: X BINUNICODE '0'
// 199: q BINPUT 11
// 201: X BINUNICODE 'cpu'
// 209: q BINPUT 12
// 211: M BININT2 1000
// 214: t TUPLE (MARK at 156)
// 215: q BINPUT 13
// 217: Q BINPERSID
// 218: K BININT1 0
// 220: M BININT2 1000
// ...............................
// 3201: q BINPUT 250
// 3203: R REDUCE
// 3204: q BINPUT 251
// 3206: X BINUNICODE 'model.diffusion_model.input_blocks.1.1.proj_in.weight'
// 3264: q BINPUT 252
// 3266: h BINGET 8
// 3268: ( MARK
// 3269: ( MARK
// 3270: h BINGET 9
// 3272: h BINGET 10
// 3274: X BINUNICODE '30'
// 3281: q BINPUT 253
// 3283: h BINGET 12
// 3285: J BININT 102400
// 3290: t TUPLE (MARK at 3269)
// 3291: q BINPUT 254
// 3293: Q BINPERSID
// 3294: K BININT1 0
// 3296: ( MARK
// 3297: M BININT2 320
// 3300: M BININT2 320
// 3303: K BININT1 1
// 3305: K BININT1 1
// 3307: t TUPLE (MARK at 3296)
// 3308: q BINPUT 255
// 3310: ( MARK
// 3311: M BININT2 320
// 3314: K BININT1 1
// 3316: K BININT1 1
// 3318: K BININT1 1
// 3320: t TUPLE (MARK at 3310)
// 3321: r LONG_BINPUT 256
// 3326: \x89 NEWFALSE
// 3327: h BINGET 16
// 3329: ) EMPTY_TUPLE
// 3330: R REDUCE
// 3331: r LONG_BINPUT 257
// 3336: t TUPLE (MARK at 3268)
// 3337: r LONG_BINPUT 258
// 3342: R REDUCE
// 3343: r LONG_BINPUT 259
// 3348: X BINUNICODE 'model.diffusion_model.input_blocks.1.1.proj_in.bias'
// 3404: r LONG_BINPUT 260
// 3409: h BINGET 8
// 3411: ( MARK
// 3412: ( MARK
// 3413: h BINGET 9
// 3415: h BINGET 10
// 3417: X BINUNICODE '31'
struct PickleTensorReader {
enum ReadPhase {
READ_NAME,
READ_DATA,
CHECK_SIZE,
READ_DIMENS
};
ReadPhase phase = READ_NAME;
size_t entry_size = 0;
int32_t nelements = 0;
TensorStorage tensor_storage;
static ggml_type global_type; // all pickle_tensors data type
static bool read_global_type;
bool read_int_value(uint32_t value) {
if (phase == CHECK_SIZE) {
if (entry_size == value * ggml_type_size(tensor_storage.type)) {
nelements = value;
phase = READ_DIMENS;
return true;
} else {
phase = READ_NAME;
}
} else if (phase == READ_DIMENS) {
if (tensor_storage.n_dims + 1 > SD_MAX_DIMS) { // too many dimens
phase = READ_NAME;
tensor_storage.n_dims = 0;
}
if (nelements % value == 0) {
tensor_storage.ne[tensor_storage.n_dims] = value;
tensor_storage.n_dims++;
}
}
return false;
}
void read_global(const std::string& str) {
if (str == "FloatStorage") {
if (read_global_type) {
global_type = GGML_TYPE_F32;
read_global_type = false;
}
tensor_storage.type = GGML_TYPE_F32;
} else if (str == "HalfStorage") {
if (read_global_type) {
global_type = GGML_TYPE_F16;
read_global_type = false;
}
tensor_storage.type = GGML_TYPE_F16;
}
}
void read_string(const std::string& str, zip_t* zip, std::string dir) {
if (str == "storage") {
read_global_type = true;
} else if (str != "state_dict") {
if (phase == READ_DATA) {
std::string entry_name = dir + "data/" + std::string(str);
size_t i, n = zip_entries_total(zip);
for (i = 0; i < n; ++i) {
zip_entry_openbyindex(zip, i);
{
std::string name = zip_entry_name(zip);
if (name == entry_name) {
tensor_storage.index_in_zip = (int)i;
entry_size = zip_entry_size(zip);
zip_entry_close(zip);
break;
}
}
zip_entry_close(zip);
}
phase = entry_size > 0 ? CHECK_SIZE : READ_NAME;
}
if (!read_global_type && phase == READ_NAME) {
tensor_storage.name = str;
phase = READ_DATA;
tensor_storage.type = global_type;
}
}
}
};
ggml_type PickleTensorReader::global_type = GGML_TYPE_F32; // all pickle_tensors data type
bool PickleTensorReader::read_global_type = false;
int find_char(uint8_t* buffer, int len, char c) {
for (int pos = 0; pos < len; pos++) {
if (buffer[pos] == c) {
return pos;
}
}
return -1;
}
#define MAX_STRING_BUFFER 512
bool ModelLoader::parse_data_pkl(uint8_t* buffer,
size_t buffer_size,
zip_t* zip,
std::string dir,
size_t file_index,
const std::string prefix) {
uint8_t* buffer_end = buffer + buffer_size;
if (buffer[0] == 0x80) { // proto
if (buffer[1] != 2) {
LOG_ERROR("Unsupported protocol\n");
return false;
}
buffer += 2; // 0x80 and version
char string_buffer[MAX_STRING_BUFFER];
bool finish = false;
PickleTensorReader reader;
// read pickle binary file
while (!finish && buffer < buffer_end) {
uint8_t opcode = *buffer;
buffer++;
// https://github.com/python/cpython/blob/3.7/Lib/pickletools.py#L1048
// https://github.com/python/cpython/blob/main/Lib/pickle.py#L105
switch (opcode) {
case '}': // EMPTY_DICT = b'}' # push empty dict
break;
case ']': // EMPTY_LIST = b']' # push empty list
break;
// skip unused sections
case 'h': // BINGET = b'h' # " " " " " " ; " " 1-byte arg
case 'q': // BINPUT = b'q' # " " " " " ; " " 1-byte arg
case 'Q': // BINPERSID = b'Q' # " " " ; " " " " stack
buffer++;
break;
case 'r': // LONG_BINPUT = b'r' # " " " " " ; " " 4-byte arg
buffer += 4;
break;
case 0x95: // FRAME = b'\x95' # indicate the beginning of a new frame
buffer += 8;
break;
case 0x94: // MEMOIZE = b'\x94' # store top of the stack in memo
break;
case '(': // MARK = b'(' # push special markobject on stack
break;
case 'K': // BININT1 = b'K' # push 1-byte unsigned int
{
uint8_t value = *buffer;
if (reader.read_int_value(value)) {
buffer++;
}
buffer++;
} break;
case 'M': // BININT2 = b'M' # push 2-byte unsigned int
{
uint16_t value = read_short(buffer);
if (reader.read_int_value(value)) {
buffer++;
}
buffer += 2;
} break;
case 'J': // BININT = b'J' # push four-byte signed int
{
const int32_t value = read_int(buffer);
if (reader.read_int_value(value)) {
buffer++; // skip tuple after read num_elements
}
buffer += 4;
} break;
case 'X': // BINUNICODE = b'X' # " " " ; counted UTF-8 string argument
{
const int32_t len = read_int(buffer);
buffer += 4;
memset(string_buffer, 0, MAX_STRING_BUFFER);
if (len > MAX_STRING_BUFFER) {
LOG_WARN("tensor name very large");
}
memcpy(string_buffer, buffer, len < MAX_STRING_BUFFER ? len : (MAX_STRING_BUFFER - 1));
buffer += len;
reader.read_string(string_buffer, zip, dir);
} break;
case 0x8C: // SHORT_BINUNICODE = b'\x8c' # push short string; UTF-8 length < 256 bytes
{
const int8_t len = *buffer;
buffer++;
memset(string_buffer, 0, MAX_STRING_BUFFER);
memcpy(string_buffer, buffer, len);
buffer += len;
// printf("String: '%s'\n", string_buffer);
} break;
case 'c': // GLOBAL = b'c' # push self.find_class(modname, name); 2 string args
{
int len = find_char(buffer, MAX_STRING_BUFFER, '\n');
buffer += len + 1;
len = find_char(buffer, MAX_STRING_BUFFER, '\n');
memset(string_buffer, 0, MAX_STRING_BUFFER);
memcpy(string_buffer, buffer, len);
buffer += len + 1;
reader.read_global(string_buffer);
} break;
case 0x86: // TUPLE2 = b'\x86' # build 2-tuple from two topmost stack items
case 0x85: // TUPLE1 = b'\x85' # build 1-tuple from stack top
case 't': // TUPLE = b't' # build tuple from topmost stack items
if (reader.phase == PickleTensorReader::READ_DIMENS) {
reader.tensor_storage.reverse_ne();
reader.tensor_storage.file_index = file_index;
// if(strcmp(prefix.c_str(), "scarlett") == 0)
// printf(" ZIP got tensor %s \n ", reader.tensor_storage.name.c_str());
std::string name = reader.tensor_storage.name;
if (!starts_with(name, prefix)) {
name = prefix + name;
}
reader.tensor_storage.name = name;
add_tensor_storage(reader.tensor_storage);
// LOG_DEBUG("%s", reader.tensor_storage.name.c_str());
// reset
reader = PickleTensorReader();
}
break;
case '.': // STOP = b'.' # every pickle ends with STOP
finish = true;
break;
default:
break;
}
}
}
return true;
}
bool ModelLoader::init_from_ckpt_file(const std::string& file_path, const std::string& prefix) {
LOG_DEBUG("init from '%s'", file_path.c_str());
file_paths_.push_back(file_path);
size_t file_index = file_paths_.size() - 1;
zip_t* zip = zip_open(file_path.c_str(), 0, 'r');
if (zip == nullptr) {
LOG_ERROR("failed to open '%s'", file_path.c_str());
return false;
}
int n = (int)zip_entries_total(zip);
for (int i = 0; i < n; ++i) {
zip_entry_openbyindex(zip, i);
{
std::string name = zip_entry_name(zip);
size_t pos = name.find("data.pkl");
if (pos != std::string::npos) {
std::string dir = name.substr(0, pos);
printf("ZIP %d, name = %s, dir = %s \n", i, name.c_str(), dir.c_str());
void* pkl_data = nullptr;
size_t pkl_size;
zip_entry_read(zip, &pkl_data, &pkl_size);
// LOG_DEBUG("%lld", pkl_size);
parse_data_pkl((uint8_t*)pkl_data, pkl_size, zip, dir, file_index, prefix);
free(pkl_data);
}
}
zip_entry_close(zip);
}
zip_close(zip);
return true;
}
SDVersion ModelLoader::get_sd_version() { SDVersion ModelLoader::get_sd_version() {
TensorStorage token_embedding_weight, input_block_weight; TensorStorage token_embedding_weight, input_block_weight;
@ -1268,8 +674,8 @@ std::map<ggml_type, uint32_t> ModelLoader::get_vae_wtype_stat() {
return wtype_stat; return wtype_stat;
} }
static std::vector<std::pair<std::string, ggml_type>> parse_tensor_type_rules(const std::string& tensor_type_rules) { TensorTypeRules parse_tensor_type_rules(const std::string& tensor_type_rules) {
std::vector<std::pair<std::string, ggml_type>> result; TensorTypeRules result;
for (const auto& item : split_string(tensor_type_rules, ',')) { for (const auto& item : split_string(tensor_type_rules, ',')) {
if (item.size() == 0) if (item.size() == 0)
continue; continue;
@ -1702,76 +1108,6 @@ bool ModelLoader::tensor_should_be_converted(const TensorStorage& tensor_storage
return false; return false;
} }
bool ModelLoader::save_to_gguf_file(const std::string& file_path, ggml_type type, const std::string& tensor_type_rules_str) {
auto backend = ggml_backend_cpu_init();
size_t mem_size = 1 * 1024 * 1024; // for padding
mem_size += tensor_storage_map.size() * ggml_tensor_overhead();
mem_size += get_params_mem_size(backend, type);
LOG_INFO("model tensors mem size: %.2fMB", mem_size / 1024.f / 1024.f);
ggml_context* ggml_ctx = ggml_init({mem_size, nullptr, false});
gguf_context* gguf_ctx = gguf_init_empty();
auto tensor_type_rules = parse_tensor_type_rules(tensor_type_rules_str);
std::mutex tensor_mutex;
auto on_new_tensor_cb = [&](const TensorStorage& tensor_storage, ggml_tensor** dst_tensor) -> bool {
const std::string& name = tensor_storage.name;
ggml_type tensor_type = tensor_storage.type;
ggml_type dst_type = type;
for (const auto& tensor_type_rule : tensor_type_rules) {
std::regex pattern(tensor_type_rule.first);
if (std::regex_search(name, pattern)) {
dst_type = tensor_type_rule.second;
break;
}
}
if (tensor_should_be_converted(tensor_storage, dst_type)) {
tensor_type = dst_type;
}
std::lock_guard<std::mutex> lock(tensor_mutex);
ggml_tensor* tensor = ggml_new_tensor(ggml_ctx, tensor_type, tensor_storage.n_dims, tensor_storage.ne);
if (tensor == nullptr) {
LOG_ERROR("ggml_new_tensor failed");
return false;
}
ggml_set_name(tensor, name.c_str());
// LOG_DEBUG("%s %d %s %d[%d %d %d %d] %d[%d %d %d %d]", name.c_str(),
// ggml_nbytes(tensor), ggml_type_name(tensor_type),
// tensor_storage.n_dims,
// tensor_storage.ne[0], tensor_storage.ne[1], tensor_storage.ne[2], tensor_storage.ne[3],
// tensor->n_dims, tensor->ne[0], tensor->ne[1], tensor->ne[2], tensor->ne[3]);
if (!tensor->data) {
GGML_ASSERT(ggml_nelements(tensor) == 0);
// avoid crashing the gguf writer by setting a dummy pointer for zero-sized tensors
LOG_DEBUG("setting dummy pointer for zero-sized tensor %s", name.c_str());
tensor->data = ggml_get_mem_buffer(ggml_ctx);
}
*dst_tensor = tensor;
gguf_add_tensor(gguf_ctx, tensor);
return true;
};
bool success = load_tensors(on_new_tensor_cb);
ggml_backend_free(backend);
LOG_INFO("load tensors done");
LOG_INFO("trying to save tensors to %s", file_path.c_str());
if (success) {
gguf_write_to_file(gguf_ctx, file_path.c_str(), false);
}
ggml_free(ggml_ctx);
gguf_free(gguf_ctx);
return success;
}
int64_t ModelLoader::get_params_mem_size(ggml_backend_t backend, ggml_type type) { int64_t ModelLoader::get_params_mem_size(ggml_backend_t backend, ggml_type type) {
size_t alignment = 128; size_t alignment = 128;
if (backend != nullptr) { if (backend != nullptr) {
@ -1791,29 +1127,3 @@ int64_t ModelLoader::get_params_mem_size(ggml_backend_t backend, ggml_type type)
return mem_size; return mem_size;
} }
bool convert(const char* input_path,
const char* vae_path,
const char* output_path,
sd_type_t output_type,
const char* tensor_type_rules,
bool convert_name) {
ModelLoader model_loader;
if (!model_loader.init_from_file(input_path)) {
LOG_ERROR("init model loader from file failed: '%s'", input_path);
return false;
}
if (vae_path != nullptr && strlen(vae_path) > 0) {
if (!model_loader.init_from_file(vae_path, "vae.")) {
LOG_ERROR("init model loader from file failed: '%s'", vae_path);
return false;
}
}
if (convert_name) {
model_loader.convert_tensors_name();
}
bool success = model_loader.save_to_gguf_file(output_path, (ggml_type)output_type, tensor_type_rules);
return success;
}

View File

@ -5,20 +5,13 @@
#include <map> #include <map>
#include <memory> #include <memory>
#include <set> #include <set>
#include <sstream>
#include <string> #include <string>
#include <tuple>
#include <utility>
#include <vector> #include <vector>
#include "ggml-backend.h" #include "ggml-backend.h"
#include "ggml.h" #include "ggml.h"
#include "gguf.h" #include "model_io/tensor_storage.h"
#include "json.hpp"
#include "ordered_map.hpp" #include "ordered_map.hpp"
#include "zip.h"
#define SD_MAX_DIMS 5
enum SDVersion { enum SDVersion {
VERSION_SD1, VERSION_SD1,
@ -195,116 +188,10 @@ enum PMVersion {
PM_VERSION_2, PM_VERSION_2,
}; };
struct TensorStorage {
std::string name;
ggml_type type = GGML_TYPE_F32;
ggml_type expected_type = GGML_TYPE_COUNT;
bool is_f8_e4m3 = false;
bool is_f8_e5m2 = false;
bool is_f64 = false;
bool is_i64 = false;
int64_t ne[SD_MAX_DIMS] = {1, 1, 1, 1, 1};
int n_dims = 0;
size_t file_index = 0;
int index_in_zip = -1; // >= means stored in a zip file
uint64_t offset = 0; // offset in file
TensorStorage() = default;
TensorStorage(std::string name, ggml_type type, const int64_t* ne, int n_dims, size_t file_index, size_t offset = 0)
: name(std::move(name)), type(type), n_dims(n_dims), file_index(file_index), offset(offset) {
for (int i = 0; i < n_dims; i++) {
this->ne[i] = ne[i];
}
}
int64_t nelements() const {
int64_t n = 1;
for (int i = 0; i < SD_MAX_DIMS; i++) {
n *= ne[i];
}
return n;
}
int64_t nbytes() const {
return nelements() * ggml_type_size(type) / ggml_blck_size(type);
}
int64_t nbytes_to_read() const {
if (is_f8_e4m3 || is_f8_e5m2) {
return nbytes() / 2;
} else if (is_f64 || is_i64) {
return nbytes() * 2;
} else {
return nbytes();
}
}
void unsqueeze() {
if (n_dims == 2) {
n_dims = 4;
ne[3] = ne[1];
ne[2] = ne[0];
ne[1] = 1;
ne[0] = 1;
}
}
std::vector<TensorStorage> chunk(size_t n) {
std::vector<TensorStorage> chunks;
uint64_t chunk_size = nbytes_to_read() / n;
// printf("%d/%d\n", chunk_size, nbytes_to_read());
reverse_ne();
for (size_t i = 0; i < n; i++) {
TensorStorage chunk_i = *this;
chunk_i.ne[0] = ne[0] / n;
chunk_i.offset = offset + i * chunk_size;
chunk_i.reverse_ne();
chunks.push_back(chunk_i);
}
reverse_ne();
return chunks;
}
void reverse_ne() {
int64_t new_ne[SD_MAX_DIMS] = {1, 1, 1, 1, 1};
for (int i = 0; i < n_dims; i++) {
new_ne[i] = ne[n_dims - 1 - i];
}
for (int i = 0; i < n_dims; i++) {
ne[i] = new_ne[i];
}
}
std::string to_string() const {
std::stringstream ss;
const char* type_name = ggml_type_name(type);
if (is_f8_e4m3) {
type_name = "f8_e4m3";
} else if (is_f8_e5m2) {
type_name = "f8_e5m2";
} else if (is_f64) {
type_name = "f64";
} else if (is_i64) {
type_name = "i64";
}
ss << name << " | " << type_name << " | ";
ss << n_dims << " [";
for (int i = 0; i < SD_MAX_DIMS; i++) {
ss << ne[i];
if (i != SD_MAX_DIMS - 1) {
ss << ", ";
}
}
ss << "]";
return ss.str();
}
};
typedef std::function<bool(const TensorStorage&, ggml_tensor**)> on_new_tensor_cb_t;
typedef OrderedMap<std::string, TensorStorage> String2TensorStorage; typedef OrderedMap<std::string, TensorStorage> String2TensorStorage;
using TensorTypeRules = std::vector<std::pair<std::string, ggml_type>>;
TensorTypeRules parse_tensor_type_rules(const std::string& tensor_type_rules);
class ModelLoader { class ModelLoader {
protected: protected:
@ -314,16 +201,10 @@ protected:
void add_tensor_storage(const TensorStorage& tensor_storage); void add_tensor_storage(const TensorStorage& tensor_storage);
bool parse_data_pkl(uint8_t* buffer,
size_t buffer_size,
zip_t* zip,
std::string dir,
size_t file_index,
const std::string prefix);
bool init_from_gguf_file(const std::string& file_path, const std::string& prefix = ""); bool init_from_gguf_file(const std::string& file_path, const std::string& prefix = "");
bool init_from_safetensors_file(const std::string& file_path, const std::string& prefix = ""); bool init_from_safetensors_file(const std::string& file_path, const std::string& prefix = "");
bool init_from_ckpt_file(const std::string& file_path, const std::string& prefix = ""); bool init_from_torch_zip_file(const std::string& file_path, const std::string& prefix = "");
bool init_from_torch_legacy_file(const std::string& file_path, const std::string& prefix = "");
bool init_from_diffusers_file(const std::string& file_path, const std::string& prefix = ""); bool init_from_diffusers_file(const std::string& file_path, const std::string& prefix = "");
public: public:
@ -353,7 +234,6 @@ public:
return names; return names;
} }
bool save_to_gguf_file(const std::string& file_path, ggml_type type, const std::string& tensor_type_rules);
bool tensor_should_be_converted(const TensorStorage& tensor_storage, ggml_type type); bool tensor_should_be_converted(const TensorStorage& tensor_storage, ggml_type type);
int64_t get_params_mem_size(ggml_backend_t backend, ggml_type type = GGML_TYPE_COUNT); int64_t get_params_mem_size(ggml_backend_t backend, ggml_type type = GGML_TYPE_COUNT);
~ModelLoader() = default; ~ModelLoader() = default;

57
src/model_io/binary_io.h Normal file
View File

@ -0,0 +1,57 @@
#ifndef __SD_MODEL_IO_BINARY_IO_H__
#define __SD_MODEL_IO_BINARY_IO_H__
#include <cstdint>
#include <ostream>
namespace model_io {
inline int32_t read_int(const uint8_t* buffer) {
uint32_t value = 0;
value |= static_cast<uint32_t>(buffer[3]) << 24;
value |= static_cast<uint32_t>(buffer[2]) << 16;
value |= static_cast<uint32_t>(buffer[1]) << 8;
value |= static_cast<uint32_t>(buffer[0]);
return static_cast<int32_t>(value);
}
inline uint16_t read_short(const uint8_t* buffer) {
uint16_t value = 0;
value |= static_cast<uint16_t>(buffer[1]) << 8;
value |= static_cast<uint16_t>(buffer[0]);
return value;
}
inline uint64_t read_u64(const uint8_t* buffer) {
uint64_t value = 0;
value |= static_cast<uint64_t>(buffer[7]) << 56;
value |= static_cast<uint64_t>(buffer[6]) << 48;
value |= static_cast<uint64_t>(buffer[5]) << 40;
value |= static_cast<uint64_t>(buffer[4]) << 32;
value |= static_cast<uint64_t>(buffer[3]) << 24;
value |= static_cast<uint64_t>(buffer[2]) << 16;
value |= static_cast<uint64_t>(buffer[1]) << 8;
value |= static_cast<uint64_t>(buffer[0]);
return value;
}
inline void write_u64(std::ostream& stream, uint64_t value) {
uint8_t buffer[8];
for (int i = 0; i < 8; ++i) {
buffer[i] = static_cast<uint8_t>((value >> (8 * i)) & 0xFF);
}
stream.write((const char*)buffer, sizeof(buffer));
}
inline int find_char(const uint8_t* buffer, int len, char c) {
for (int pos = 0; pos < len; pos++) {
if (buffer[pos] == (uint8_t)c) {
return pos;
}
}
return -1;
}
} // namespace model_io
#endif // __SD_MODEL_IO_BINARY_IO_H__

123
src/model_io/gguf_io.cpp Normal file
View File

@ -0,0 +1,123 @@
#include "gguf_io.h"
#include <cstdint>
#include <fstream>
#include <string>
#include <vector>
#include "gguf.h"
#include "gguf_reader_ext.h"
#include "util.h"
static void set_error(std::string* error, const std::string& message) {
if (error != nullptr) {
*error = message;
}
}
bool is_gguf_file(const std::string& file_path) {
std::ifstream file(file_path, std::ios::binary);
if (!file.is_open()) {
return false;
}
char magic[4];
file.read(magic, sizeof(magic));
if (!file) {
return false;
}
for (uint32_t i = 0; i < sizeof(magic); i++) {
if (magic[i] != GGUF_MAGIC[i]) {
return false;
}
}
return true;
}
bool read_gguf_file(const std::string& file_path,
std::vector<TensorStorage>& tensor_storages,
std::string* error) {
tensor_storages.clear();
gguf_context* ctx_gguf_ = nullptr;
ggml_context* ctx_meta_ = nullptr;
ctx_gguf_ = gguf_init_from_file(file_path.c_str(), {true, &ctx_meta_});
if (!ctx_gguf_) {
GGUFReader gguf_reader;
if (!gguf_reader.load(file_path)) {
set_error(error, "failed to open '" + file_path + "' with GGUFReader");
return false;
}
size_t data_offset = gguf_reader.data_offset();
for (const auto& gguf_tensor_info : gguf_reader.tensors()) {
TensorStorage tensor_storage(
gguf_tensor_info.name,
gguf_tensor_info.type,
gguf_tensor_info.shape.data(),
static_cast<int>(gguf_tensor_info.shape.size()),
0,
data_offset + gguf_tensor_info.offset);
tensor_storages.push_back(tensor_storage);
}
return true;
}
int n_tensors = static_cast<int>(gguf_get_n_tensors(ctx_gguf_));
size_t data_offset = gguf_get_data_offset(ctx_gguf_);
for (int i = 0; i < n_tensors; i++) {
std::string name = gguf_get_tensor_name(ctx_gguf_, i);
ggml_tensor* dummy = ggml_get_tensor(ctx_meta_, name.c_str());
size_t offset = data_offset + gguf_get_tensor_offset(ctx_gguf_, i);
TensorStorage tensor_storage(name, dummy->type, dummy->ne, ggml_n_dims(dummy), 0, offset);
if (ggml_nbytes(dummy) != tensor_storage.nbytes()) {
gguf_free(ctx_gguf_);
ggml_free(ctx_meta_);
set_error(error, "size mismatch for tensor '" + name + "'");
return false;
}
tensor_storages.push_back(tensor_storage);
}
gguf_free(ctx_gguf_);
ggml_free(ctx_meta_);
return true;
}
bool write_gguf_file(const std::string& file_path,
const std::vector<TensorWriteInfo>& tensors,
std::string* error) {
gguf_context* gguf_ctx = gguf_init_empty();
if (gguf_ctx == nullptr) {
set_error(error, "gguf_init_empty failed");
return false;
}
for (const TensorWriteInfo& write_tensor : tensors) {
ggml_tensor* tensor = write_tensor.tensor;
if (tensor == nullptr) {
set_error(error, "null tensor cannot be written to GGUF");
gguf_free(gguf_ctx);
return false;
}
gguf_add_tensor(gguf_ctx, tensor);
}
LOG_INFO("trying to save tensors to %s", file_path.c_str());
bool success = gguf_write_to_file(gguf_ctx, file_path.c_str(), false);
if (!success) {
set_error(error, "failed to write GGUF file '" + file_path + "'");
}
gguf_free(gguf_ctx);
return success;
}

17
src/model_io/gguf_io.h Normal file
View File

@ -0,0 +1,17 @@
#ifndef __SD_MODEL_IO_GGUF_IO_H__
#define __SD_MODEL_IO_GGUF_IO_H__
#include <string>
#include <vector>
#include "tensor_storage.h"
bool is_gguf_file(const std::string& file_path);
bool read_gguf_file(const std::string& file_path,
std::vector<TensorStorage>& tensor_storages,
std::string* error = nullptr);
bool write_gguf_file(const std::string& file_path,
const std::vector<TensorWriteInfo>& tensors,
std::string* error = nullptr);
#endif // __SD_MODEL_IO_GGUF_IO_H__

View File

@ -1,5 +1,5 @@
#ifndef __GGUF_READER_HPP__ #ifndef __SD_MODEL_IO_GGUF_READER_EXT_H__
#define __GGUF_READER_HPP__ #define __SD_MODEL_IO_GGUF_READER_EXT_H__
#include <cstdint> #include <cstdint>
#include <fstream> #include <fstream>
@ -231,4 +231,4 @@ public:
size_t data_offset() const { return data_offset_; } size_t data_offset() const { return data_offset_; }
}; };
#endif // __GGUF_READER_HPP__ #endif // __SD_MODEL_IO_GGUF_READER_EXT_H__

1064
src/model_io/pickle_io.cpp Normal file

File diff suppressed because it is too large Load Diff

21
src/model_io/pickle_io.h Normal file
View File

@ -0,0 +1,21 @@
#ifndef __SD_MODEL_IO_PICKLE_IO_H__
#define __SD_MODEL_IO_PICKLE_IO_H__
#include <cstddef>
#include <cstdint>
#include <string>
#include <unordered_map>
#include <vector>
#include "tensor_storage.h"
bool skip_pickle_object(const uint8_t* buffer, size_t buffer_size, size_t* object_size);
bool pickle_object_is_torch_magic_number(const uint8_t* buffer, size_t buffer_size);
bool parse_pickle_uint32_object(const uint8_t* buffer, size_t buffer_size, uint32_t* value);
bool parse_torch_state_dict_pickle(const uint8_t* buffer,
size_t buffer_size,
std::vector<TensorStorage>& tensor_storages,
std::unordered_map<std::string, uint64_t>& storage_nbytes,
std::string* error = nullptr);
#endif // __SD_MODEL_IO_PICKLE_IO_H__

View File

@ -0,0 +1,316 @@
#include "safetensors_io.h"
#include <cstdint>
#include <exception>
#include <fstream>
#include <string>
#include <vector>
#include "binary_io.h"
#include "json.hpp"
#include "util.h"
static constexpr size_t ST_HEADER_SIZE_LEN = 8;
static void set_error(std::string* error, const std::string& message) {
if (error != nullptr) {
*error = message;
}
}
bool is_safetensors_file(const std::string& file_path) {
std::ifstream file(file_path, std::ios::binary);
if (!file.is_open()) {
return false;
}
// get file size
file.seekg(0, file.end);
size_t file_size_ = file.tellg();
file.seekg(0, file.beg);
// read header size
if (file_size_ <= ST_HEADER_SIZE_LEN) {
return false;
}
uint8_t header_size_buf[ST_HEADER_SIZE_LEN];
file.read((char*)header_size_buf, ST_HEADER_SIZE_LEN);
if (!file) {
return false;
}
size_t header_size_ = model_io::read_u64(header_size_buf);
if (header_size_ >= file_size_ || header_size_ <= 2) {
return false;
}
// read header
std::vector<char> header_buf;
header_buf.resize(header_size_ + 1);
header_buf[header_size_] = '\0';
file.read(header_buf.data(), header_size_);
if (!file) {
return false;
}
try {
nlohmann::json header_ = nlohmann::json::parse(header_buf.data());
} catch (const std::exception&) {
return false;
}
return true;
}
static ggml_type safetensors_dtype_to_ggml_type(const std::string& dtype) {
ggml_type ttype = GGML_TYPE_COUNT;
if (dtype == "F16") {
ttype = GGML_TYPE_F16;
} else if (dtype == "BF16") {
ttype = GGML_TYPE_BF16;
} else if (dtype == "F32") {
ttype = GGML_TYPE_F32;
} else if (dtype == "F64") {
ttype = GGML_TYPE_F32;
} else if (dtype == "F8_E4M3") {
ttype = GGML_TYPE_F16;
} else if (dtype == "F8_E5M2") {
ttype = GGML_TYPE_F16;
} else if (dtype == "I32") {
ttype = GGML_TYPE_I32;
} else if (dtype == "I64") {
ttype = GGML_TYPE_I32;
}
return ttype;
}
// https://huggingface.co/docs/safetensors/index
bool read_safetensors_file(const std::string& file_path,
std::vector<TensorStorage>& tensor_storages,
std::string* error) {
std::ifstream file(file_path, std::ios::binary);
if (!file.is_open()) {
set_error(error, "failed to open '" + file_path + "'");
return false;
}
// get file size
file.seekg(0, file.end);
size_t file_size_ = file.tellg();
file.seekg(0, file.beg);
// read header size
if (file_size_ <= ST_HEADER_SIZE_LEN) {
set_error(error, "invalid safetensor file '" + file_path + "'");
return false;
}
uint8_t header_size_buf[ST_HEADER_SIZE_LEN];
file.read((char*)header_size_buf, ST_HEADER_SIZE_LEN);
if (!file) {
set_error(error, "read safetensors header size failed: '" + file_path + "'");
return false;
}
size_t header_size_ = model_io::read_u64(header_size_buf);
if (header_size_ >= file_size_) {
set_error(error, "invalid safetensor file '" + file_path + "'");
return false;
}
// read header
std::vector<char> header_buf;
header_buf.resize(header_size_ + 1);
header_buf[header_size_] = '\0';
file.read(header_buf.data(), header_size_);
if (!file) {
set_error(error, "read safetensors header failed: '" + file_path + "'");
return false;
}
nlohmann::json header_;
try {
header_ = nlohmann::json::parse(header_buf.data());
} catch (const std::exception&) {
set_error(error, "parsing safetensors header failed: '" + file_path + "'");
return false;
}
tensor_storages.clear();
for (auto& item : header_.items()) {
std::string name = item.key();
nlohmann::json tensor_info = item.value();
// LOG_DEBUG("%s %s\n", name.c_str(), tensor_info.dump().c_str());
if (name == "__metadata__") {
continue;
}
std::string dtype = tensor_info["dtype"];
nlohmann::json shape = tensor_info["shape"];
if (dtype == "U8") {
continue;
}
size_t begin = tensor_info["data_offsets"][0].get<size_t>();
size_t end = tensor_info["data_offsets"][1].get<size_t>();
ggml_type type = safetensors_dtype_to_ggml_type(dtype);
if (type == GGML_TYPE_COUNT) {
set_error(error, "unsupported dtype '" + dtype + "' (tensor '" + name + "')");
return false;
}
if (shape.size() > SD_MAX_DIMS) {
set_error(error, "invalid tensor '" + name + "'");
return false;
}
int n_dims = (int)shape.size();
int64_t ne[SD_MAX_DIMS] = {1, 1, 1, 1, 1};
for (int i = 0; i < n_dims; i++) {
ne[i] = shape[i].get<int64_t>();
}
if (n_dims == 5) {
n_dims = 4;
ne[0] = ne[0] * ne[1];
ne[1] = ne[2];
ne[2] = ne[3];
ne[3] = ne[4];
}
// ggml_n_dims returns 1 for scalars
if (n_dims == 0) {
n_dims = 1;
}
TensorStorage tensor_storage(name, type, ne, n_dims, 0, ST_HEADER_SIZE_LEN + header_size_ + begin);
tensor_storage.reverse_ne();
size_t tensor_data_size = end - begin;
bool tensor_size_ok;
if (dtype == "F8_E4M3") {
tensor_storage.is_f8_e4m3 = true;
// f8 -> f16
tensor_size_ok = (tensor_storage.nbytes() == tensor_data_size * 2);
} else if (dtype == "F8_E5M2") {
tensor_storage.is_f8_e5m2 = true;
// f8 -> f16
tensor_size_ok = (tensor_storage.nbytes() == tensor_data_size * 2);
} else if (dtype == "F64") {
tensor_storage.is_f64 = true;
// f64 -> f32
tensor_size_ok = (tensor_storage.nbytes() * 2 == tensor_data_size);
} else if (dtype == "I64") {
tensor_storage.is_i64 = true;
// i64 -> i32
tensor_size_ok = (tensor_storage.nbytes() * 2 == tensor_data_size);
} else {
tensor_size_ok = (tensor_storage.nbytes() == tensor_data_size);
}
if (!tensor_size_ok) {
set_error(error, "size mismatch for tensor '" + name + "' (" + dtype + ")");
return false;
}
tensor_storages.push_back(tensor_storage);
// LOG_DEBUG("%s %s", tensor_storage.to_string().c_str(), dtype.c_str());
}
return true;
}
static bool ggml_type_to_safetensors_dtype(ggml_type type, std::string* dtype) {
switch (type) {
case GGML_TYPE_F16:
*dtype = "F16";
return true;
case GGML_TYPE_BF16:
*dtype = "BF16";
return true;
case GGML_TYPE_F32:
*dtype = "F32";
return true;
case GGML_TYPE_I32:
*dtype = "I32";
return true;
default:
return false;
}
}
bool write_safetensors_file(const std::string& file_path,
const std::vector<TensorWriteInfo>& tensors,
std::string* error) {
nlohmann::ordered_json header = nlohmann::ordered_json::object();
uint64_t data_offset = 0;
for (const TensorWriteInfo& write_tensor : tensors) {
ggml_tensor* tensor = write_tensor.tensor;
if (tensor == nullptr) {
set_error(error, "null tensor cannot be written to safetensors");
return false;
}
const std::string name = ggml_get_name(tensor);
std::string dtype;
if (!ggml_type_to_safetensors_dtype(tensor->type, &dtype)) {
set_error(error,
"unsupported safetensors dtype '" + std::string(ggml_type_name(tensor->type)) +
"' for tensor '" + name + "'");
return false;
}
const uint64_t tensor_nbytes = ggml_nbytes(tensor);
nlohmann::ordered_json json_tensor_info = nlohmann::ordered_json::object();
json_tensor_info["dtype"] = dtype;
nlohmann::ordered_json shape = nlohmann::ordered_json::array();
for (int i = 0; i < write_tensor.n_dims; ++i) {
shape.push_back(write_tensor.ne[write_tensor.n_dims - 1 - i]);
}
json_tensor_info["shape"] = shape;
nlohmann::ordered_json data_offsets = nlohmann::ordered_json::array();
data_offsets.push_back(data_offset);
data_offsets.push_back(data_offset + tensor_nbytes);
json_tensor_info["data_offsets"] = data_offsets;
header[name] = json_tensor_info;
data_offset += tensor_nbytes;
}
const std::string header_str = header.dump();
std::ofstream file(file_path, std::ios::binary);
if (!file.is_open()) {
set_error(error, "failed to open '" + file_path + "' for writing");
return false;
}
LOG_INFO("trying to save tensors to %s", file_path.c_str());
model_io::write_u64(file, header_str.size());
file.write(header_str.data(), header_str.size());
if (!file) {
set_error(error, "failed to write safetensors header to '" + file_path + "'");
return false;
}
for (const TensorWriteInfo& write_tensor : tensors) {
ggml_tensor* tensor = write_tensor.tensor;
const std::string name = ggml_get_name(tensor);
const size_t tensor_nbytes = ggml_nbytes(tensor);
file.write((const char*)tensor->data, tensor_nbytes);
if (!file) {
set_error(error,
"failed to write tensor '" + name + "' to '" + file_path + "'");
return false;
}
}
return true;
}

View File

@ -0,0 +1,17 @@
#ifndef __SD_MODEL_IO_SAFETENSORS_IO_H__
#define __SD_MODEL_IO_SAFETENSORS_IO_H__
#include <string>
#include <vector>
#include "tensor_storage.h"
bool is_safetensors_file(const std::string& file_path);
bool read_safetensors_file(const std::string& file_path,
std::vector<TensorStorage>& tensor_storages,
std::string* error = nullptr);
bool write_safetensors_file(const std::string& file_path,
const std::vector<TensorWriteInfo>& tensors,
std::string* error = nullptr);
#endif // __SD_MODEL_IO_SAFETENSORS_IO_H__

View File

@ -0,0 +1,132 @@
#ifndef __SD_TENSOR_STORAGE_H__
#define __SD_TENSOR_STORAGE_H__
#include <cstddef>
#include <cstdint>
#include <functional>
#include <sstream>
#include <string>
#include <utility>
#include <vector>
#include "ggml.h"
#define SD_MAX_DIMS 5
struct TensorStorage {
std::string name;
ggml_type type = GGML_TYPE_F32;
ggml_type expected_type = GGML_TYPE_COUNT;
bool is_f8_e4m3 = false;
bool is_f8_e5m2 = false;
bool is_f64 = false;
bool is_i64 = false;
int64_t ne[SD_MAX_DIMS] = {1, 1, 1, 1, 1};
int n_dims = 0;
std::string storage_key;
size_t file_index = 0;
int index_in_zip = -1; // >= means stored in a zip file
uint64_t offset = 0; // offset in file
TensorStorage() = default;
TensorStorage(std::string name, ggml_type type, const int64_t* ne, int n_dims, size_t file_index, size_t offset = 0)
: name(std::move(name)), type(type), n_dims(n_dims), file_index(file_index), offset(offset) {
for (int i = 0; i < n_dims; i++) {
this->ne[i] = ne[i];
}
}
int64_t nelements() const {
int64_t n = 1;
for (int i = 0; i < SD_MAX_DIMS; i++) {
n *= ne[i];
}
return n;
}
int64_t nbytes() const {
return nelements() * ggml_type_size(type) / ggml_blck_size(type);
}
int64_t nbytes_to_read() const {
if (is_f8_e4m3 || is_f8_e5m2) {
return nbytes() / 2;
} else if (is_f64 || is_i64) {
return nbytes() * 2;
} else {
return nbytes();
}
}
void unsqueeze() {
if (n_dims == 2) {
n_dims = 4;
ne[3] = ne[1];
ne[2] = ne[0];
ne[1] = 1;
ne[0] = 1;
}
}
std::vector<TensorStorage> chunk(size_t n) {
std::vector<TensorStorage> chunks;
uint64_t chunk_size = nbytes_to_read() / n;
// printf("%d/%d\n", chunk_size, nbytes_to_read());
reverse_ne();
for (size_t i = 0; i < n; i++) {
TensorStorage chunk_i = *this;
chunk_i.ne[0] = ne[0] / n;
chunk_i.offset = offset + i * chunk_size;
chunk_i.reverse_ne();
chunks.push_back(chunk_i);
}
reverse_ne();
return chunks;
}
void reverse_ne() {
int64_t new_ne[SD_MAX_DIMS] = {1, 1, 1, 1, 1};
for (int i = 0; i < n_dims; i++) {
new_ne[i] = ne[n_dims - 1 - i];
}
for (int i = 0; i < n_dims; i++) {
ne[i] = new_ne[i];
}
}
std::string to_string() const {
std::stringstream ss;
const char* type_name = ggml_type_name(type);
if (is_f8_e4m3) {
type_name = "f8_e4m3";
} else if (is_f8_e5m2) {
type_name = "f8_e5m2";
} else if (is_f64) {
type_name = "f64";
} else if (is_i64) {
type_name = "i64";
}
ss << name << " | " << type_name << " | ";
ss << n_dims << " [";
for (int i = 0; i < SD_MAX_DIMS; i++) {
ss << ne[i];
if (i != SD_MAX_DIMS - 1) {
ss << ", ";
}
}
ss << "]";
return ss.str();
}
};
struct TensorWriteInfo {
int64_t ne[SD_MAX_DIMS] = {1, 1, 1, 1, 1};
int n_dims = 0;
ggml_tensor* tensor = nullptr;
};
typedef std::function<bool(const TensorStorage&, ggml_tensor**)> on_new_tensor_cb_t;
#endif // __SD_TENSOR_STORAGE_H__

View File

@ -0,0 +1,252 @@
#include "torch_legacy_io.h"
#include <algorithm>
#include <cstdint>
#include <fstream>
#include <string>
#include <unordered_map>
#include <vector>
#include "pickle_io.h"
#include "util.h"
// torch.save format background:
//
// - Before PyTorch 1.6.0, torch.save used this legacy non-zip format by
// default.
// - Since PyTorch 1.6.0, torch.save defaults to an uncompressed ZIP64 archive
// containing data.pkl, data/, version, and, since PyTorch 2.1.0, byteorder.
// - The old format can still be produced explicitly with:
// torch.save(obj, path, _use_new_zipfile_serialization=False)
//
// Whether obj is a state_dict or a whole nn.Module does not change the outer
// container format selected by torch.save. It changes the pickled object inside:
//
// - state_dict: usually an OrderedDict[str, Tensor]. pickle_io.cpp supports a
// restricted subset of this layout because tensor metadata and raw storages
// can be recovered without executing pickle callables.
// - whole module/checkpoint object: arbitrary Python object graph. This may
// require importing user classes and executing pickle GLOBAL/REDUCE rebuild
// logic, so it is intentionally not supported here.
//
// Legacy non-zip PyTorch files are not a single pickle object:
//
// 1. pickle object: PyTorch legacy magic number
// 2. pickle object: legacy protocol version, expected to be 1001
// 3. pickle object: sys_info metadata, ignored by this reader
// 4. pickle object: state_dict metadata, parsed by pickle_io.cpp
// 5. pickle object: serialized storage key list, skipped here
// 6. raw storage data payloads
// - PyTorch writes storages after the pickles, ordered by storage key
// - each storage has an 8-byte legacy storage header followed by raw bytes
static constexpr size_t LEGACY_STORAGE_HEADER_SIZE = 8;
static void set_error(std::string* error, const std::string& message) {
if (error != nullptr) {
*error = message;
}
}
static std::string bytes_to_hex(const std::vector<uint8_t>& bytes) {
static const char* hex = "0123456789ABCDEF";
std::string result;
result.reserve(bytes.size() * 3);
for (size_t i = 0; i < bytes.size(); ++i) {
if (i > 0) {
result.push_back('-');
}
result.push_back(hex[(bytes[i] >> 4) & 0x0F]);
result.push_back(hex[bytes[i] & 0x0F]);
}
return result;
}
static bool is_probably_tar_file(const std::vector<uint8_t>& header) {
return header.size() >= 262 &&
header[257] == 'u' &&
header[258] == 's' &&
header[259] == 't' &&
header[260] == 'a' &&
header[261] == 'r';
}
static std::string torch_legacy_diagnostics(const std::string& file_path, const std::vector<uint8_t>& buffer) {
if (!ends_with(file_path, ".pt") && !ends_with(file_path, ".pth")) {
return "";
}
if (buffer.empty()) {
return "unsupported PyTorch file '" + file_path + "': empty file";
}
size_t short_len = std::min<size_t>(buffer.size(), 32);
std::vector<uint8_t> short_header(buffer.begin(), buffer.begin() + short_len);
const bool raw_pickle = buffer[0] == 0x80;
const bool tar_file = is_probably_tar_file(buffer);
std::string message = "unsupported PyTorch file '" + file_path + "': first bytes " +
bytes_to_hex(short_header) +
", raw_pickle=" + (raw_pickle ? "true" : "false") +
", tar=" + (tar_file ? "true" : "false");
if (raw_pickle) {
message += "; raw pickle did not match the restricted state_dict layouts currently supported";
} else if (tar_file) {
message += "; legacy tar PyTorch checkpoints are not supported yet";
}
return message;
}
bool read_torch_legacy_file(const std::string& file_path,
std::vector<TensorStorage>& tensor_storages,
std::string* error) {
std::ifstream file(file_path, std::ios::binary);
if (!file.is_open()) {
set_error(error, "failed to open '" + file_path + "'");
return false;
}
file.seekg(0, file.end);
size_t file_size = (size_t)file.tellg();
file.seekg(0, file.beg);
if (file_size == 0) {
set_error(error, "empty file '" + file_path + "'");
return false;
}
std::vector<uint8_t> buffer(file_size);
file.read((char*)buffer.data(), file_size);
if (!file) {
set_error(error, "failed to read '" + file_path + "'");
return false;
}
auto finalize_tensor_offsets = [&](size_t storage_data_offset,
const std::unordered_map<std::string, uint64_t>& legacy_storage_map) -> bool {
if (storage_data_offset > file_size) {
return false;
}
std::vector<std::string> storage_keys;
storage_keys.reserve(legacy_storage_map.size());
for (const auto& [storage_key, _] : legacy_storage_map) {
storage_keys.push_back(storage_key);
}
std::sort(storage_keys.begin(), storage_keys.end());
std::unordered_map<std::string, uint64_t> storage_offsets;
uint64_t current_offset = storage_data_offset;
for (const auto& storage_key : storage_keys) {
auto it = legacy_storage_map.find(storage_key);
if (it == legacy_storage_map.end()) {
return false;
}
if (current_offset + LEGACY_STORAGE_HEADER_SIZE + it->second > file_size) {
return false;
}
storage_offsets[storage_key] = current_offset + LEGACY_STORAGE_HEADER_SIZE;
current_offset += LEGACY_STORAGE_HEADER_SIZE + it->second;
}
for (auto& tensor_storage : tensor_storages) {
if (tensor_storage.storage_key.empty()) {
continue;
}
auto it_offset = storage_offsets.find(tensor_storage.storage_key);
auto it_size = legacy_storage_map.find(tensor_storage.storage_key);
if (it_offset == storage_offsets.end() || it_size == legacy_storage_map.end()) {
return false;
}
uint64_t base_offset = it_offset->second;
uint64_t storage_nbytes = it_size->second;
uint64_t tensor_nbytes = tensor_storage.nbytes_to_read();
if (tensor_storage.offset + tensor_nbytes > storage_nbytes) {
return false;
}
tensor_storage.offset = base_offset + tensor_storage.offset;
tensor_storage.storage_key.clear();
}
return true;
};
auto parse_state_dict_at = [&](size_t state_dict_offset, size_t state_dict_size, size_t* storage_data_offset) -> bool {
tensor_storages.clear();
std::unordered_map<std::string, uint64_t> legacy_storage_map;
if (!parse_torch_state_dict_pickle(buffer.data() + state_dict_offset,
state_dict_size,
tensor_storages,
legacy_storage_map,
error)) {
return false;
}
size_t offset_after_state_dict = state_dict_offset + state_dict_size;
size_t storage_keys_size = 0;
if (!skip_pickle_object(buffer.data() + offset_after_state_dict,
buffer.size() - offset_after_state_dict,
&storage_keys_size)) {
return false;
}
*storage_data_offset = offset_after_state_dict + storage_keys_size;
return finalize_tensor_offsets(*storage_data_offset, legacy_storage_map);
};
size_t object_size_1 = 0;
size_t offset = 0;
if (skip_pickle_object(buffer.data(), buffer.size(), &object_size_1) &&
pickle_object_is_torch_magic_number(buffer.data(), object_size_1)) {
offset += object_size_1;
size_t object_size_2 = 0;
if (!skip_pickle_object(buffer.data() + offset, buffer.size() - offset, &object_size_2)) {
set_error(error, torch_legacy_diagnostics(file_path, buffer));
return false;
}
uint32_t protocol_version = 0;
if (!parse_pickle_uint32_object(buffer.data() + offset, object_size_2, &protocol_version) || protocol_version != 1001) {
set_error(error, torch_legacy_diagnostics(file_path, buffer));
return false;
}
offset += object_size_2;
size_t object_size_3 = 0;
if (!skip_pickle_object(buffer.data() + offset, buffer.size() - offset, &object_size_3)) {
set_error(error, torch_legacy_diagnostics(file_path, buffer));
return false;
}
offset += object_size_3;
size_t state_dict_size = 0;
if (!skip_pickle_object(buffer.data() + offset, buffer.size() - offset, &state_dict_size)) {
set_error(error, torch_legacy_diagnostics(file_path, buffer));
return false;
}
size_t storage_data_offset = 0;
if (parse_state_dict_at(offset, state_dict_size, &storage_data_offset)) {
return true;
}
if (error != nullptr && error->empty()) {
set_error(error, torch_legacy_diagnostics(file_path, buffer));
}
return false;
}
size_t state_dict_size = 0;
if (skip_pickle_object(buffer.data(), buffer.size(), &state_dict_size)) {
size_t storage_data_offset = 0;
if (parse_state_dict_at(0, state_dict_size, &storage_data_offset)) {
return true;
}
}
if (error != nullptr && error->empty()) {
set_error(error, torch_legacy_diagnostics(file_path, buffer));
}
return false;
}

View File

@ -0,0 +1,13 @@
#ifndef __SD_MODEL_IO_TORCH_LEGACY_IO_H__
#define __SD_MODEL_IO_TORCH_LEGACY_IO_H__
#include <string>
#include <vector>
#include "tensor_storage.h"
bool read_torch_legacy_file(const std::string& file_path,
std::vector<TensorStorage>& tensor_storages,
std::string* error = nullptr);
#endif // __SD_MODEL_IO_TORCH_LEGACY_IO_H__

View File

@ -0,0 +1,140 @@
#include "torch_zip_io.h"
#include <cstdint>
#include <cstdlib>
#include <string>
#include <unordered_map>
#include <vector>
#include "pickle_io.h"
#include "zip.h"
static void set_error(std::string* error, const std::string& message) {
if (error != nullptr) {
*error = message;
}
}
bool is_torch_zip_file(const std::string& file_path) {
zip_t* zip = zip_open(file_path.c_str(), 0, 'r');
if (zip == nullptr) {
return false;
}
zip_close(zip);
return true;
}
static bool find_zip_entry(zip_t* zip, const std::string& entry_name, int* index, uint64_t* size) {
size_t n = zip_entries_total(zip);
for (size_t i = 0; i < n; ++i) {
zip_entry_openbyindex(zip, i);
std::string name = zip_entry_name(zip);
if (name == entry_name) {
*index = (int)i;
*size = zip_entry_size(zip);
zip_entry_close(zip);
return true;
}
zip_entry_close(zip);
}
return false;
}
static bool parse_zip_data_pkl(const uint8_t* buffer,
size_t buffer_size,
zip_t* zip,
const std::string& dir,
std::vector<TensorStorage>& tensor_storages,
std::string* error) {
std::vector<TensorStorage> parsed_tensors;
std::unordered_map<std::string, uint64_t> storage_nbytes;
if (!parse_torch_state_dict_pickle(buffer, buffer_size, parsed_tensors, storage_nbytes, error)) {
if (error != nullptr && error->empty()) {
*error = "failed to parse torch zip pickle metadata";
}
return false;
}
for (auto& tensor_storage : parsed_tensors) {
if (tensor_storage.storage_key.empty()) {
set_error(error, "tensor '" + tensor_storage.name + "' has no storage key");
return false;
}
const std::string entry_name = dir + "data/" + tensor_storage.storage_key;
int zip_index = -1;
uint64_t entry_size = 0;
if (!find_zip_entry(zip, entry_name, &zip_index, &entry_size)) {
set_error(error, "storage entry '" + entry_name + "' was not found");
return false;
}
auto it_storage_size = storage_nbytes.find(tensor_storage.storage_key);
if (it_storage_size != storage_nbytes.end() && entry_size < it_storage_size->second) {
set_error(error, "storage entry '" + entry_name + "' is smaller than pickle metadata");
return false;
}
uint64_t tensor_nbytes = tensor_storage.nbytes_to_read();
if (tensor_storage.offset + tensor_nbytes > entry_size) {
set_error(error, "tensor '" + tensor_storage.name + "' exceeds storage entry '" + entry_name + "'");
return false;
}
tensor_storage.index_in_zip = zip_index;
tensor_storage.storage_key.clear();
tensor_storages.push_back(tensor_storage);
}
return true;
}
bool read_torch_zip_file(const std::string& file_path,
std::vector<TensorStorage>& tensor_storages,
std::string* error) {
zip_t* zip = zip_open(file_path.c_str(), 0, 'r');
if (zip == nullptr) {
set_error(error, "failed to open '" + file_path + "'");
return false;
}
tensor_storages.clear();
bool success = true;
bool found_data_pkl = false;
int n = (int)zip_entries_total(zip);
for (int i = 0; i < n; ++i) {
zip_entry_openbyindex(zip, i);
std::string name = zip_entry_name(zip);
size_t pos = name.find("data.pkl");
if (pos != std::string::npos) {
found_data_pkl = true;
std::string dir = name.substr(0, pos);
void* pkl_data = nullptr;
size_t pkl_size = 0;
zip_entry_read(zip, &pkl_data, &pkl_size);
if (pkl_data == nullptr || pkl_size == 0) {
set_error(error, "failed to read '" + name + "' from '" + file_path + "'");
success = false;
} else if (!parse_zip_data_pkl((const uint8_t*)pkl_data, pkl_size, zip, dir, tensor_storages, error)) {
success = false;
}
free(pkl_data);
}
zip_entry_close(zip);
if (!success) {
break;
}
}
if (success && !found_data_pkl) {
set_error(error, "data.pkl was not found in '" + file_path + "'");
success = false;
}
zip_close(zip);
return success;
}

View File

@ -0,0 +1,14 @@
#ifndef __SD_MODEL_IO_TORCH_ZIP_IO_H__
#define __SD_MODEL_IO_TORCH_ZIP_IO_H__
#include <string>
#include <vector>
#include "tensor_storage.h"
bool is_torch_zip_file(const std::string& file_path);
bool read_torch_zip_file(const std::string& file_path,
std::vector<TensorStorage>& tensor_storages,
std::string* error = nullptr);
#endif // __SD_MODEL_IO_TORCH_ZIP_IO_H__

View File

@ -24,6 +24,75 @@ static inline void preprocessing_set_4d(sd::Tensor<float>& tensor, float value,
tensor.values()[static_cast<size_t>(preprocessing_offset_4d(tensor, i0, i1, i2, i3))] = value; tensor.values()[static_cast<size_t>(preprocessing_offset_4d(tensor, i0, i1, i2, i3))] = value;
} }
static inline uint8_t preprocessing_float_to_u8(float value) {
if (value <= 0.0f) {
return 0;
}
if (value >= 1.0f) {
return 255;
}
return static_cast<uint8_t>(value * 255.0f + 0.5f);
}
static inline void preprocessing_tensor_frame_to_sd_image(const sd::Tensor<float>& tensor, int frame_index, uint8_t* image_data) {
const auto& shape = tensor.shape();
GGML_ASSERT(shape.size() == 4 || shape.size() == 5);
GGML_ASSERT(image_data != nullptr);
const int width = static_cast<int>(shape[0]);
const int height = static_cast<int>(shape[1]);
const int channel = static_cast<int>(shape[shape.size() == 5 ? 3 : 2]);
const size_t pixels = static_cast<size_t>(width) * static_cast<size_t>(height);
const float* src = tensor.data();
if (shape.size() == 4) {
GGML_ASSERT(frame_index >= 0 && frame_index < shape[3]);
const size_t frame_stride = pixels * static_cast<size_t>(channel);
const float* frame_ptr = src + static_cast<size_t>(frame_index) * frame_stride;
if (channel == 3) {
const float* c0 = frame_ptr;
const float* c1 = frame_ptr + pixels;
const float* c2 = frame_ptr + pixels * 2;
for (size_t i = 0; i < pixels; ++i) {
image_data[i * 3 + 0] = preprocessing_float_to_u8(c0[i]);
image_data[i * 3 + 1] = preprocessing_float_to_u8(c1[i]);
image_data[i * 3 + 2] = preprocessing_float_to_u8(c2[i]);
}
return;
}
for (size_t i = 0; i < pixels; ++i) {
for (int c = 0; c < channel; ++c) {
image_data[i * static_cast<size_t>(channel) + static_cast<size_t>(c)] =
preprocessing_float_to_u8(frame_ptr[i + pixels * static_cast<size_t>(c)]);
}
}
return;
}
GGML_ASSERT(frame_index >= 0 && frame_index < shape[2]);
const size_t channel_stride = pixels * static_cast<size_t>(shape[2]);
const float* frame_ptr = src + static_cast<size_t>(frame_index) * pixels;
if (channel == 3) {
const float* c0 = frame_ptr;
const float* c1 = frame_ptr + channel_stride;
const float* c2 = frame_ptr + channel_stride * 2;
for (size_t i = 0; i < pixels; ++i) {
image_data[i * 3 + 0] = preprocessing_float_to_u8(c0[i]);
image_data[i * 3 + 1] = preprocessing_float_to_u8(c1[i]);
image_data[i * 3 + 2] = preprocessing_float_to_u8(c2[i]);
}
return;
}
for (size_t i = 0; i < pixels; ++i) {
for (int c = 0; c < channel; ++c) {
image_data[i * static_cast<size_t>(channel) + static_cast<size_t>(c)] =
preprocessing_float_to_u8(frame_ptr[i + channel_stride * static_cast<size_t>(c)]);
}
}
}
static inline sd::Tensor<float> sd_image_to_preprocessing_tensor(sd_image_t image) { static inline sd::Tensor<float> sd_image_to_preprocessing_tensor(sd_image_t image) {
sd::Tensor<float> tensor({static_cast<int64_t>(image.width), static_cast<int64_t>(image.height), static_cast<int64_t>(image.channel), 1}); sd::Tensor<float> tensor({static_cast<int64_t>(image.width), static_cast<int64_t>(image.height), static_cast<int64_t>(image.channel), 1});
for (uint32_t y = 0; y < image.height; ++y) { for (uint32_t y = 0; y < image.height; ++y) {
@ -39,20 +108,7 @@ static inline sd::Tensor<float> sd_image_to_preprocessing_tensor(sd_image_t imag
static inline void preprocessing_tensor_to_sd_image(const sd::Tensor<float>& tensor, uint8_t* image_data) { static inline void preprocessing_tensor_to_sd_image(const sd::Tensor<float>& tensor, uint8_t* image_data) {
GGML_ASSERT(tensor.dim() == 4); GGML_ASSERT(tensor.dim() == 4);
GGML_ASSERT(tensor.shape()[3] == 1); GGML_ASSERT(tensor.shape()[3] == 1);
GGML_ASSERT(image_data != nullptr); preprocessing_tensor_frame_to_sd_image(tensor, 0, image_data);
int width = static_cast<int>(tensor.shape()[0]);
int height = static_cast<int>(tensor.shape()[1]);
int channel = static_cast<int>(tensor.shape()[2]);
for (int y = 0; y < height; ++y) {
for (int x = 0; x < width; ++x) {
for (int c = 0; c < channel; ++c) {
float value = preprocessing_get_4d(tensor, x, y, c, 0);
value = std::min(1.0f, std::max(0.0f, value));
image_data[(y * width + x) * channel + c] = static_cast<uint8_t>(std::round(value * 255.0f));
}
}
}
} }
static inline sd::Tensor<float> gaussian_kernel_tensor(int kernel_size) { static inline sd::Tensor<float> gaussian_kernel_tensor(int kernel_size) {

View File

@ -95,9 +95,7 @@ namespace Qwen {
float scale = 1.f / 32.f; float scale = 1.f / 32.f;
bool force_prec_f32 = false; bool force_prec_f32 = false;
#ifdef SD_USE_VULKAN
force_prec_f32 = true;
#endif
// The purpose of the scale here is to prevent NaN issues in certain situations. // The purpose of the scale here is to prevent NaN issues in certain situations.
// For example when using CUDA but the weights are k-quants (not all prompts). // For example when using CUDA but the weights are k-quants (not all prompts).
blocks["to_out.0"] = std::shared_ptr<GGMLBlock>(new Linear(inner_dim, out_dim, out_bias, false, force_prec_f32, scale)); blocks["to_out.0"] = std::shared_ptr<GGMLBlock>(new Linear(inner_dim, out_dim, out_bias, false, force_prec_f32, scale));
@ -124,6 +122,10 @@ namespace Qwen {
auto to_v = std::dynamic_pointer_cast<Linear>(blocks["to_v"]); auto to_v = std::dynamic_pointer_cast<Linear>(blocks["to_v"]);
auto to_out_0 = std::dynamic_pointer_cast<Linear>(blocks["to_out.0"]); auto to_out_0 = std::dynamic_pointer_cast<Linear>(blocks["to_out.0"]);
if (sd_backend_is(ctx->backend, "Vulkan")) {
to_out_0->set_force_prec_f32(true);
}
auto norm_added_q = std::dynamic_pointer_cast<UnaryBlock>(blocks["norm_added_q"]); auto norm_added_q = std::dynamic_pointer_cast<UnaryBlock>(blocks["norm_added_q"]);
auto norm_added_k = std::dynamic_pointer_cast<UnaryBlock>(blocks["norm_added_k"]); auto norm_added_k = std::dynamic_pointer_cast<UnaryBlock>(blocks["norm_added_k"]);
@ -410,6 +412,9 @@ namespace Qwen {
auto img = img_in->forward(ctx, x); auto img = img_in->forward(ctx, x);
auto txt = txt_norm->forward(ctx, context); auto txt = txt_norm->forward(ctx, context);
txt = txt_in->forward(ctx, txt); txt = txt_in->forward(ctx, txt);
sd::ggml_graph_cut::mark_graph_cut(img, "qwen_image.prelude", "img");
sd::ggml_graph_cut::mark_graph_cut(txt, "qwen_image.prelude", "txt");
// sd::ggml_graph_cut::mark_graph_cut(t_emb, "qwen_image.prelude", "t_emb");
for (int i = 0; i < params.num_layers; i++) { for (int i = 0; i < params.num_layers; i++) {
auto block = std::dynamic_pointer_cast<QwenImageTransformerBlock>(blocks["transformer_blocks." + std::to_string(i)]); auto block = std::dynamic_pointer_cast<QwenImageTransformerBlock>(blocks["transformer_blocks." + std::to_string(i)]);
@ -417,6 +422,8 @@ namespace Qwen {
auto result = block->forward(ctx, img, txt, t_emb, pe, modulate_index); auto result = block->forward(ctx, img, txt, t_emb, pe, modulate_index);
img = result.first; img = result.first;
txt = result.second; txt = result.second;
sd::ggml_graph_cut::mark_graph_cut(img, "qwen_image.transformer_blocks." + std::to_string(i), "img");
sd::ggml_graph_cut::mark_graph_cut(txt, "qwen_image.transformer_blocks." + std::to_string(i), "txt");
} }
if (params.zero_cond_t) { if (params.zero_cond_t) {

View File

@ -17,6 +17,7 @@
#include "pmid.hpp" #include "pmid.hpp"
#include "sample-cache.h" #include "sample-cache.h"
#include "tae.hpp" #include "tae.hpp"
#include "upscaler.h"
#include "vae.hpp" #include "vae.hpp"
#include "latent-preview.h" #include "latent-preview.h"
@ -143,6 +144,7 @@ public:
std::string taesd_path; std::string taesd_path;
sd_tiling_params_t vae_tiling_params = {false, 0, 0, 0.5f, 0, 0}; sd_tiling_params_t vae_tiling_params = {false, 0, 0, 0.5f, 0, 0};
bool offload_params_to_cpu = false; bool offload_params_to_cpu = false;
float max_vram = 0.f;
bool use_pmid = false; bool use_pmid = false;
bool is_using_v_parameterization = false; bool is_using_v_parameterization = false;
@ -171,60 +173,7 @@ public:
} }
void init_backend() { void init_backend() {
#ifdef SD_USE_CUDA backend = sd_get_default_backend();
LOG_DEBUG("Using CUDA backend");
backend = ggml_backend_cuda_init(0);
#endif
#ifdef SD_USE_METAL
LOG_DEBUG("Using Metal backend");
backend = ggml_backend_metal_init();
#endif
#ifdef SD_USE_VULKAN
LOG_DEBUG("Using Vulkan backend");
size_t device = 0;
const int device_count = ggml_backend_vk_get_device_count();
if (device_count) {
const char* SD_VK_DEVICE = getenv("SD_VK_DEVICE");
if (SD_VK_DEVICE != nullptr) {
std::string sd_vk_device_str = SD_VK_DEVICE;
try {
device = std::stoull(sd_vk_device_str);
} catch (const std::invalid_argument&) {
LOG_WARN("SD_VK_DEVICE environment variable is not a valid integer (%s). Falling back to device 0.", SD_VK_DEVICE);
device = 0;
} catch (const std::out_of_range&) {
LOG_WARN("SD_VK_DEVICE environment variable value is out of range for `unsigned long long` type (%s). Falling back to device 0.", SD_VK_DEVICE);
device = 0;
}
if (device >= device_count) {
LOG_WARN("Cannot find targeted vulkan device (%llu). Falling back to device 0.", device);
device = 0;
}
}
LOG_INFO("Vulkan: Using device %llu", device);
backend = ggml_backend_vk_init(device);
}
if (!backend) {
LOG_WARN("Failed to initialize Vulkan backend");
}
#endif
#ifdef SD_USE_OPENCL
LOG_DEBUG("Using OpenCL backend");
// ggml_log_set(ggml_log_callback_default, nullptr); // Optional ggml logs
backend = ggml_backend_opencl_init();
if (!backend) {
LOG_WARN("Failed to initialize OpenCL backend");
}
#endif
#ifdef SD_USE_SYCL
LOG_DEBUG("Using SYCL backend");
backend = ggml_backend_sycl_init(0);
#endif
if (!backend) {
LOG_DEBUG("Using CPU backend");
backend = ggml_backend_cpu_init();
}
} }
std::shared_ptr<RNG> get_rng(rng_type_t rng_type) { std::shared_ptr<RNG> get_rng(rng_type_t rng_type) {
@ -242,6 +191,7 @@ public:
vae_decode_only = sd_ctx_params->vae_decode_only; vae_decode_only = sd_ctx_params->vae_decode_only;
free_params_immediately = sd_ctx_params->free_params_immediately; free_params_immediately = sd_ctx_params->free_params_immediately;
offload_params_to_cpu = sd_ctx_params->offload_params_to_cpu; offload_params_to_cpu = sd_ctx_params->offload_params_to_cpu;
max_vram = sd_ctx_params->max_vram;
bool use_tae = false; bool use_tae = false;
@ -427,6 +377,10 @@ public:
bool clip_on_cpu = sd_ctx_params->keep_clip_on_cpu; bool clip_on_cpu = sd_ctx_params->keep_clip_on_cpu;
const size_t max_graph_vram_bytes = max_vram <= 0.f
? 0
: static_cast<size_t>(static_cast<double>(max_vram) * 1024.0 * 1024.0 * 1024.0);
{ {
clip_backend = backend; clip_backend = backend;
if (clip_on_cpu && !ggml_backend_is_cpu(backend)) { if (clip_on_cpu && !ggml_backend_is_cpu(backend)) {
@ -516,6 +470,7 @@ public:
clip_vision = std::make_shared<FrozenCLIPVisionEmbedder>(backend, clip_vision = std::make_shared<FrozenCLIPVisionEmbedder>(backend,
offload_params_to_cpu, offload_params_to_cpu,
tensor_storage_map); tensor_storage_map);
clip_vision->set_max_graph_vram_bytes(max_graph_vram_bytes);
clip_vision->alloc_params_buffer(); clip_vision->alloc_params_buffer();
clip_vision->get_param_tensors(tensors); clip_vision->get_param_tensors(tensors);
} }
@ -592,9 +547,11 @@ public:
} }
} }
cond_stage_model->set_max_graph_vram_bytes(max_graph_vram_bytes);
cond_stage_model->alloc_params_buffer(); cond_stage_model->alloc_params_buffer();
cond_stage_model->get_param_tensors(tensors); cond_stage_model->get_param_tensors(tensors);
diffusion_model->set_max_graph_vram_bytes(max_graph_vram_bytes);
diffusion_model->alloc_params_buffer(); diffusion_model->alloc_params_buffer();
diffusion_model->get_param_tensors(tensors); diffusion_model->get_param_tensors(tensors);
@ -603,6 +560,7 @@ public:
} }
if (high_noise_diffusion_model) { if (high_noise_diffusion_model) {
high_noise_diffusion_model->set_max_graph_vram_bytes(max_graph_vram_bytes);
high_noise_diffusion_model->alloc_params_buffer(); high_noise_diffusion_model->alloc_params_buffer();
high_noise_diffusion_model->get_param_tensors(tensors); high_noise_diffusion_model->get_param_tensors(tensors);
} }
@ -675,16 +633,19 @@ public:
} else if (use_tae && !tae_preview_only) { } else if (use_tae && !tae_preview_only) {
LOG_INFO("using TAE for encoding / decoding"); LOG_INFO("using TAE for encoding / decoding");
first_stage_model = create_tae(); first_stage_model = create_tae();
first_stage_model->set_max_graph_vram_bytes(max_graph_vram_bytes);
first_stage_model->alloc_params_buffer(); first_stage_model->alloc_params_buffer();
first_stage_model->get_param_tensors(tensors, "tae"); first_stage_model->get_param_tensors(tensors, "tae");
} else { } else {
LOG_INFO("using VAE for encoding / decoding"); LOG_INFO("using VAE for encoding / decoding");
first_stage_model = create_vae(); first_stage_model = create_vae();
first_stage_model->set_max_graph_vram_bytes(max_graph_vram_bytes);
first_stage_model->alloc_params_buffer(); first_stage_model->alloc_params_buffer();
first_stage_model->get_param_tensors(tensors, "first_stage_model"); first_stage_model->get_param_tensors(tensors, "first_stage_model");
if (use_tae && tae_preview_only) { if (use_tae && tae_preview_only) {
LOG_INFO("using TAE for preview"); LOG_INFO("using TAE for preview");
preview_vae = create_tae(); preview_vae = create_tae();
preview_vae->set_max_graph_vram_bytes(max_graph_vram_bytes);
preview_vae->alloc_params_buffer(); preview_vae->alloc_params_buffer();
preview_vae->get_param_tensors(tensors, "tae"); preview_vae->get_param_tensors(tensors, "tae");
} }
@ -1156,9 +1117,14 @@ public:
cond_stage_lora_models.push_back(lora); cond_stage_lora_models.push_back(lora);
} }
} }
// Only attach the adapter when there are LoRAs targeting the cond_stage model.
// An empty MultiLoraAdapter still routes every linear/conv through
// forward_with_lora() instead of the direct kernel path — slower for no benefit.
if (!cond_stage_lora_models.empty()) {
auto multi_lora_adapter = std::make_shared<MultiLoraAdapter>(cond_stage_lora_models); auto multi_lora_adapter = std::make_shared<MultiLoraAdapter>(cond_stage_lora_models);
cond_stage_model->set_weight_adapter(multi_lora_adapter); cond_stage_model->set_weight_adapter(multi_lora_adapter);
} }
}
if (diffusion_model) { if (diffusion_model) {
std::vector<std::shared_ptr<LoraModel>> lora_models; std::vector<std::shared_ptr<LoraModel>> lora_models;
auto lora_state_diff = lora_state; auto lora_state_diff = lora_state;
@ -1188,12 +1154,14 @@ public:
diffusion_lora_models.push_back(lora); diffusion_lora_models.push_back(lora);
} }
} }
if (!diffusion_lora_models.empty()) {
auto multi_lora_adapter = std::make_shared<MultiLoraAdapter>(diffusion_lora_models); auto multi_lora_adapter = std::make_shared<MultiLoraAdapter>(diffusion_lora_models);
diffusion_model->set_weight_adapter(multi_lora_adapter); diffusion_model->set_weight_adapter(multi_lora_adapter);
if (high_noise_diffusion_model) { if (high_noise_diffusion_model) {
high_noise_diffusion_model->set_weight_adapter(multi_lora_adapter); high_noise_diffusion_model->set_weight_adapter(multi_lora_adapter);
} }
} }
}
if (first_stage_model) { if (first_stage_model) {
std::vector<std::shared_ptr<LoraModel>> lora_models; std::vector<std::shared_ptr<LoraModel>> lora_models;
@ -1224,10 +1192,12 @@ public:
first_stage_lora_models.push_back(lora); first_stage_lora_models.push_back(lora);
} }
} }
if (!first_stage_lora_models.empty()) {
auto multi_lora_adapter = std::make_shared<MultiLoraAdapter>(first_stage_lora_models); auto multi_lora_adapter = std::make_shared<MultiLoraAdapter>(first_stage_lora_models);
first_stage_model->set_weight_adapter(multi_lora_adapter); first_stage_model->set_weight_adapter(multi_lora_adapter);
} }
} }
}
void lora_stat() { void lora_stat() {
if (!cond_stage_lora_models.empty()) { if (!cond_stage_lora_models.empty()) {
@ -2113,6 +2083,35 @@ enum lora_apply_mode_t str_to_lora_apply_mode(const char* str) {
return LORA_APPLY_MODE_COUNT; return LORA_APPLY_MODE_COUNT;
} }
const char* hires_upscaler_to_str[] = {
"None",
"Latent",
"Latent (nearest)",
"Latent (nearest-exact)",
"Latent (antialiased)",
"Latent (bicubic)",
"Latent (bicubic antialiased)",
"Lanczos",
"Nearest",
"Model",
};
const char* sd_hires_upscaler_name(enum sd_hires_upscaler_t upscaler) {
if (upscaler >= SD_HIRES_UPSCALER_NONE && upscaler < SD_HIRES_UPSCALER_COUNT) {
return hires_upscaler_to_str[upscaler];
}
return NONE_STR;
}
enum sd_hires_upscaler_t str_to_sd_hires_upscaler(const char* str) {
for (int i = 0; i < SD_HIRES_UPSCALER_COUNT; i++) {
if (!strcmp(str, hires_upscaler_to_str[i])) {
return (enum sd_hires_upscaler_t)i;
}
}
return SD_HIRES_UPSCALER_COUNT;
}
void sd_cache_params_init(sd_cache_params_t* cache_params) { void sd_cache_params_init(sd_cache_params_t* cache_params) {
*cache_params = {}; *cache_params = {};
cache_params->mode = SD_CACHE_DISABLED; cache_params->mode = SD_CACHE_DISABLED;
@ -2141,6 +2140,19 @@ void sd_cache_params_init(sd_cache_params_t* cache_params) {
cache_params->spectrum_stop_percent = 0.9f; cache_params->spectrum_stop_percent = 0.9f;
} }
void sd_hires_params_init(sd_hires_params_t* hires_params) {
*hires_params = {};
hires_params->enabled = false;
hires_params->upscaler = SD_HIRES_UPSCALER_LATENT;
hires_params->model_path = nullptr;
hires_params->scale = 2.0f;
hires_params->target_width = 0;
hires_params->target_height = 0;
hires_params->steps = 0;
hires_params->denoising_strength = 0.7f;
hires_params->upscale_tile_size = 128;
}
void sd_ctx_params_init(sd_ctx_params_t* sd_ctx_params) { void sd_ctx_params_init(sd_ctx_params_t* sd_ctx_params) {
*sd_ctx_params = {}; *sd_ctx_params = {};
sd_ctx_params->vae_decode_only = true; sd_ctx_params->vae_decode_only = true;
@ -2152,6 +2164,7 @@ void sd_ctx_params_init(sd_ctx_params_t* sd_ctx_params) {
sd_ctx_params->prediction = PREDICTION_COUNT; sd_ctx_params->prediction = PREDICTION_COUNT;
sd_ctx_params->lora_apply_mode = LORA_APPLY_AUTO; sd_ctx_params->lora_apply_mode = LORA_APPLY_AUTO;
sd_ctx_params->offload_params_to_cpu = false; sd_ctx_params->offload_params_to_cpu = false;
sd_ctx_params->max_vram = 0.f;
sd_ctx_params->enable_mmap = false; sd_ctx_params->enable_mmap = false;
sd_ctx_params->keep_clip_on_cpu = false; sd_ctx_params->keep_clip_on_cpu = false;
sd_ctx_params->keep_control_net_on_cpu = false; sd_ctx_params->keep_control_net_on_cpu = false;
@ -2193,6 +2206,7 @@ char* sd_ctx_params_to_str(const sd_ctx_params_t* sd_ctx_params) {
"sampler_rng_type: %s\n" "sampler_rng_type: %s\n"
"prediction: %s\n" "prediction: %s\n"
"offload_params_to_cpu: %s\n" "offload_params_to_cpu: %s\n"
"max_vram: %.3f\n"
"keep_clip_on_cpu: %s\n" "keep_clip_on_cpu: %s\n"
"keep_control_net_on_cpu: %s\n" "keep_control_net_on_cpu: %s\n"
"keep_vae_on_cpu: %s\n" "keep_vae_on_cpu: %s\n"
@ -2225,6 +2239,7 @@ char* sd_ctx_params_to_str(const sd_ctx_params_t* sd_ctx_params) {
sd_rng_type_name(sd_ctx_params->sampler_rng_type), sd_rng_type_name(sd_ctx_params->sampler_rng_type),
sd_prediction_name(sd_ctx_params->prediction), sd_prediction_name(sd_ctx_params->prediction),
BOOL_STR(sd_ctx_params->offload_params_to_cpu), BOOL_STR(sd_ctx_params->offload_params_to_cpu),
sd_ctx_params->max_vram,
BOOL_STR(sd_ctx_params->keep_clip_on_cpu), BOOL_STR(sd_ctx_params->keep_clip_on_cpu),
BOOL_STR(sd_ctx_params->keep_control_net_on_cpu), BOOL_STR(sd_ctx_params->keep_control_net_on_cpu),
BOOL_STR(sd_ctx_params->keep_vae_on_cpu), BOOL_STR(sd_ctx_params->keep_vae_on_cpu),
@ -2310,6 +2325,7 @@ void sd_img_gen_params_init(sd_img_gen_params_t* sd_img_gen_params) {
sd_img_gen_params->pm_params = {nullptr, 0, nullptr, 20.f}; sd_img_gen_params->pm_params = {nullptr, 0, nullptr, 20.f};
sd_img_gen_params->vae_tiling_params = {false, 0, 0, 0.5f, 0.0f, 0.0f}; sd_img_gen_params->vae_tiling_params = {false, 0, 0, 0.5f, 0.0f, 0.0f};
sd_cache_params_init(&sd_img_gen_params->cache); sd_cache_params_init(&sd_img_gen_params->cache);
sd_hires_params_init(&sd_img_gen_params->hires);
} }
char* sd_img_gen_params_to_str(const sd_img_gen_params_t* sd_img_gen_params) { char* sd_img_gen_params_to_str(const sd_img_gen_params_t* sd_img_gen_params) {
@ -2336,7 +2352,8 @@ char* sd_img_gen_params_to_str(const sd_img_gen_params_t* sd_img_gen_params) {
"increase_ref_index: %s\n" "increase_ref_index: %s\n"
"control_strength: %.2f\n" "control_strength: %.2f\n"
"photo maker: {style_strength = %.2f, id_images_count = %d, id_embed_path = %s}\n" "photo maker: {style_strength = %.2f, id_images_count = %d, id_embed_path = %s}\n"
"VAE tiling: %s\n", "VAE tiling: %s\n"
"hires: {enabled=%s, upscaler=%s, model_path=%s, scale=%.2f, target=%dx%d, steps=%d, denoising_strength=%.2f}\n",
SAFE_STR(sd_img_gen_params->prompt), SAFE_STR(sd_img_gen_params->prompt),
SAFE_STR(sd_img_gen_params->negative_prompt), SAFE_STR(sd_img_gen_params->negative_prompt),
sd_img_gen_params->clip_skip, sd_img_gen_params->clip_skip,
@ -2353,7 +2370,15 @@ char* sd_img_gen_params_to_str(const sd_img_gen_params_t* sd_img_gen_params) {
sd_img_gen_params->pm_params.style_strength, sd_img_gen_params->pm_params.style_strength,
sd_img_gen_params->pm_params.id_images_count, sd_img_gen_params->pm_params.id_images_count,
SAFE_STR(sd_img_gen_params->pm_params.id_embed_path), SAFE_STR(sd_img_gen_params->pm_params.id_embed_path),
BOOL_STR(sd_img_gen_params->vae_tiling_params.enabled)); BOOL_STR(sd_img_gen_params->vae_tiling_params.enabled),
BOOL_STR(sd_img_gen_params->hires.enabled),
sd_hires_upscaler_name(sd_img_gen_params->hires.upscaler),
SAFE_STR(sd_img_gen_params->hires.model_path),
sd_img_gen_params->hires.scale,
sd_img_gen_params->hires.target_width,
sd_img_gen_params->hires.target_height,
sd_img_gen_params->hires.steps,
sd_img_gen_params->hires.denoising_strength);
const char* cache_mode_str = "disabled"; const char* cache_mode_str = "disabled";
if (sd_img_gen_params->cache.mode == SD_CACHE_EASYCACHE) { if (sd_img_gen_params->cache.mode == SD_CACHE_EASYCACHE) {
cache_mode_str = "easycache"; cache_mode_str = "easycache";
@ -2390,6 +2415,14 @@ struct sd_ctx_t {
StableDiffusionGGML* sd = nullptr; StableDiffusionGGML* sd = nullptr;
}; };
static bool sd_version_supports_video_generation(SDVersion version) {
return version == VERSION_SVD || sd_version_is_wan(version);
}
static bool sd_version_supports_image_generation(SDVersion version) {
return !sd_version_supports_video_generation(version);
}
sd_ctx_t* new_sd_ctx(const sd_ctx_params_t* sd_ctx_params) { sd_ctx_t* new_sd_ctx(const sd_ctx_params_t* sd_ctx_params) {
sd_ctx_t* sd_ctx = (sd_ctx_t*)malloc(sizeof(sd_ctx_t)); sd_ctx_t* sd_ctx = (sd_ctx_t*)malloc(sizeof(sd_ctx_t));
if (sd_ctx == nullptr) { if (sd_ctx == nullptr) {
@ -2419,6 +2452,20 @@ void free_sd_ctx(sd_ctx_t* sd_ctx) {
free(sd_ctx); free(sd_ctx);
} }
SD_API bool sd_ctx_supports_image_generation(const sd_ctx_t* sd_ctx) {
if (sd_ctx == nullptr || sd_ctx->sd == nullptr) {
return false;
}
return sd_version_supports_image_generation(sd_ctx->sd->version);
}
SD_API bool sd_ctx_supports_video_generation(const sd_ctx_t* sd_ctx) {
if (sd_ctx == nullptr || sd_ctx->sd == nullptr) {
return false;
}
return sd_version_supports_video_generation(sd_ctx->sd->version);
}
enum sample_method_t sd_get_default_sample_method(const sd_ctx_t* sd_ctx) { enum sample_method_t sd_get_default_sample_method(const sd_ctx_t* sd_ctx) {
if (sd_ctx != nullptr && sd_ctx->sd != nullptr) { if (sd_ctx != nullptr && sd_ctx->sd != nullptr) {
if (sd_version_is_dit(sd_ctx->sd->version)) { if (sd_version_is_dit(sd_ctx->sd->version)) {
@ -2435,8 +2482,10 @@ enum scheduler_t sd_get_default_scheduler(const sd_ctx_t* sd_ctx, enum sample_me
return EXPONENTIAL_SCHEDULER; return EXPONENTIAL_SCHEDULER;
} }
} }
if (sample_method == LCM_SAMPLE_METHOD) { if (sample_method == LCM_SAMPLE_METHOD || sample_method == TCD_SAMPLE_METHOD) {
return LCM_SCHEDULER; return LCM_SCHEDULER;
} else if (sample_method == DDIM_TRAILING_SAMPLE_METHOD) {
return SIMPLE_SCHEDULER;
} }
return DISCRETE_SCHEDULER; return DISCRETE_SCHEDULER;
} }
@ -2510,6 +2559,7 @@ struct GenerationRequest {
sd_guidance_params_t guidance = {}; sd_guidance_params_t guidance = {};
sd_guidance_params_t high_noise_guidance = {}; sd_guidance_params_t high_noise_guidance = {};
sd_pm_params_t pm_params = {}; sd_pm_params_t pm_params = {};
sd_hires_params_t hires = {};
int frames = -1; int frames = -1;
float vace_strength = 1.f; float vace_strength = 1.f;
@ -2531,6 +2581,7 @@ struct GenerationRequest {
auto_resize_ref_image = sd_img_gen_params->auto_resize_ref_image; auto_resize_ref_image = sd_img_gen_params->auto_resize_ref_image;
guidance = sd_img_gen_params->sample_params.guidance; guidance = sd_img_gen_params->sample_params.guidance;
pm_params = sd_img_gen_params->pm_params; pm_params = sd_img_gen_params->pm_params;
hires = sd_img_gen_params->hires;
cache_params = &sd_img_gen_params->cache; cache_params = &sd_img_gen_params->cache;
resolve(sd_ctx); resolve(sd_ctx);
} }
@ -2553,26 +2604,76 @@ struct GenerationRequest {
} }
void align_generation_request_size() { void align_generation_request_size() {
align_image_size(&width, &height, "generation request");
}
void align_image_size(int* target_width, int* target_height, const char* label) {
int spatial_multiple = vae_scale_factor * diffusion_model_down_factor; int spatial_multiple = vae_scale_factor * diffusion_model_down_factor;
int width_offset = align_up_offset(width, spatial_multiple); int width_offset = align_up_offset(*target_width, spatial_multiple);
int height_offset = align_up_offset(height, spatial_multiple); int height_offset = align_up_offset(*target_height, spatial_multiple);
if (width_offset <= 0 && height_offset <= 0) { if (width_offset <= 0 && height_offset <= 0) {
return; return;
} }
int original_width = width; int original_width = *target_width;
int original_height = height; int original_height = *target_height;
width += width_offset; *target_width += width_offset;
height += height_offset; *target_height += height_offset;
LOG_WARN("align up %dx%d to %dx%d (multiple=%d)", LOG_WARN("align %s up %dx%d to %dx%d (multiple=%d)",
label,
original_width, original_width,
original_height, original_height,
width, *target_width,
height, *target_height,
spatial_multiple); spatial_multiple);
} }
void resolve_hires() {
if (!hires.enabled) {
return;
}
if (hires.upscaler == SD_HIRES_UPSCALER_NONE) {
hires.enabled = false;
return;
}
if (hires.upscaler < SD_HIRES_UPSCALER_NONE || hires.upscaler >= SD_HIRES_UPSCALER_COUNT) {
LOG_WARN("hires upscaler '%d' is invalid, disabling hires", hires.upscaler);
hires.enabled = false;
return;
}
if (hires.upscaler == SD_HIRES_UPSCALER_MODEL && strlen(SAFE_STR(hires.model_path)) == 0) {
LOG_WARN("hires model upscaler requires a model path, disabling hires");
hires.enabled = false;
return;
}
if (hires.scale <= 0.f && hires.target_width <= 0 && hires.target_height <= 0) {
LOG_WARN("hires scale must be positive when no target size is set, disabling hires");
hires.enabled = false;
return;
}
hires.denoising_strength = std::clamp(hires.denoising_strength, 0.0001f, 1.f);
hires.steps = std::max(0, hires.steps);
if (hires.target_width > 0 && hires.target_height > 0) {
// pass
} else if (hires.target_width > 0) {
hires.target_height = hires.target_width;
} else if (hires.target_height > 0) {
hires.target_width = hires.target_height;
} else {
hires.target_width = static_cast<int>(std::round(width * hires.scale));
hires.target_height = static_cast<int>(std::round(height * hires.scale));
}
if (hires.target_width <= 0 || hires.target_height <= 0) {
LOG_WARN("hires target size is not positive, disabling hires");
hires.enabled = false;
return;
}
align_image_size(&hires.target_width, &hires.target_height, "hires target");
}
static void resolve_guidance(sd_ctx_t* sd_ctx, static void resolve_guidance(sd_ctx_t* sd_ctx,
sd_guidance_params_t* guidance, sd_guidance_params_t* guidance,
bool* use_uncond, bool* use_uncond,
@ -2613,6 +2714,7 @@ struct GenerationRequest {
void resolve(sd_ctx_t* sd_ctx) { void resolve(sd_ctx_t* sd_ctx) {
align_generation_request_size(); align_generation_request_size();
resolve_hires();
seed = resolve_seed(seed); seed = resolve_seed(seed);
resolve_guidance(sd_ctx, &guidance, &use_uncond, &use_img_cond); resolve_guidance(sd_ctx, &guidance, &use_uncond, &use_img_cond);
@ -3103,7 +3205,7 @@ static sd_image_t* decode_image_outputs(sd_ctx_t* sd_ctx,
} }
decoded_images.push_back(std::move(image)); decoded_images.push_back(std::move(image));
int64_t t2 = ggml_time_ms(); int64_t t2 = ggml_time_ms();
LOG_INFO("latent %" PRId64 " decoded, taking %.2fs", i + 1, (t2 - t1) * 1.0f / 1000); LOG_INFO("latent %zu decoded, taking %.2fs", i + 1, (t2 - t1) * 1.0f / 1000);
} }
int64_t t4 = ggml_time_ms(); int64_t t4 = ggml_time_ms();
@ -3125,6 +3227,135 @@ static sd_image_t* decode_image_outputs(sd_ctx_t* sd_ctx,
return result_images; return result_images;
} }
static sd::Tensor<float> upscale_hires_latent(sd_ctx_t* sd_ctx,
const sd::Tensor<float>& latent,
const GenerationRequest& request,
UpscalerGGML* upscaler) {
auto get_hires_latent_target_shape = [&]() {
std::vector<int64_t> target_shape = latent.shape();
if (target_shape.size() < 2) {
target_shape.clear();
return target_shape;
}
target_shape[0] = request.hires.target_width / request.vae_scale_factor;
target_shape[1] = request.hires.target_height / request.vae_scale_factor;
return target_shape;
};
if (request.hires.upscaler == SD_HIRES_UPSCALER_LATENT ||
request.hires.upscaler == SD_HIRES_UPSCALER_LATENT_NEAREST ||
request.hires.upscaler == SD_HIRES_UPSCALER_LATENT_NEAREST_EXACT ||
request.hires.upscaler == SD_HIRES_UPSCALER_LATENT_ANTIALIASED ||
request.hires.upscaler == SD_HIRES_UPSCALER_LATENT_BICUBIC ||
request.hires.upscaler == SD_HIRES_UPSCALER_LATENT_BICUBIC_ANTIALIASED) {
std::vector<int64_t> target_shape = get_hires_latent_target_shape();
if (target_shape.empty()) {
LOG_ERROR("latent has invalid shape for hires upscale");
return {};
}
sd::ops::InterpolateMode mode = sd::ops::InterpolateMode::Nearest;
bool antialias = false;
switch (request.hires.upscaler) {
case SD_HIRES_UPSCALER_LATENT:
mode = sd::ops::InterpolateMode::Bilinear;
break;
case SD_HIRES_UPSCALER_LATENT_NEAREST:
mode = sd::ops::InterpolateMode::Nearest;
break;
case SD_HIRES_UPSCALER_LATENT_NEAREST_EXACT:
mode = sd::ops::InterpolateMode::NearestExact;
break;
case SD_HIRES_UPSCALER_LATENT_ANTIALIASED:
mode = sd::ops::InterpolateMode::Bilinear;
antialias = true;
break;
case SD_HIRES_UPSCALER_LATENT_BICUBIC:
mode = sd::ops::InterpolateMode::Bicubic;
break;
case SD_HIRES_UPSCALER_LATENT_BICUBIC_ANTIALIASED:
mode = sd::ops::InterpolateMode::Bicubic;
antialias = true;
break;
default:
break;
}
LOG_INFO("hires %s upscale %" PRId64 "x%" PRId64 " -> %" PRId64 "x%" PRId64,
sd_hires_upscaler_name(request.hires.upscaler),
latent.shape()[0],
latent.shape()[1],
target_shape[0],
target_shape[1]);
return sd::ops::interpolate(latent, target_shape, mode, false, antialias);
} else if (request.hires.upscaler == SD_HIRES_UPSCALER_MODEL ||
request.hires.upscaler == SD_HIRES_UPSCALER_LANCZOS ||
request.hires.upscaler == SD_HIRES_UPSCALER_NEAREST) {
if (sd_ctx->sd->vae_decode_only) {
LOG_ERROR("hires %s upscaler requires VAE encoder weights; create the context with vae_decode_only=false",
sd_hires_upscaler_name(request.hires.upscaler));
return {};
}
if (request.hires.upscaler == SD_HIRES_UPSCALER_MODEL && upscaler == nullptr) {
LOG_ERROR("hires model upscaler context is null");
return {};
}
sd::Tensor<float> decoded = sd_ctx->sd->decode_first_stage(latent);
if (decoded.empty()) {
LOG_ERROR("decode_first_stage failed before hires %s upscale",
sd_hires_upscaler_name(request.hires.upscaler));
return {};
}
sd::Tensor<float> upscaled_tensor;
if (request.hires.upscaler == SD_HIRES_UPSCALER_MODEL) {
upscaled_tensor = upscaler->upscale_tensor(decoded);
if (upscaled_tensor.empty()) {
LOG_ERROR("hires model upscale failed");
return {};
}
if (upscaled_tensor.shape()[0] != request.hires.target_width ||
upscaled_tensor.shape()[1] != request.hires.target_height) {
upscaled_tensor = sd::ops::interpolate(upscaled_tensor,
{request.hires.target_width,
request.hires.target_height,
upscaled_tensor.shape()[2],
upscaled_tensor.shape()[3]});
}
} else {
sd::ops::InterpolateMode mode = request.hires.upscaler == SD_HIRES_UPSCALER_LANCZOS
? sd::ops::InterpolateMode::Lanczos
: sd::ops::InterpolateMode::Nearest;
LOG_INFO("hires %s image upscale %" PRId64 "x%" PRId64 " -> %dx%d",
sd_hires_upscaler_name(request.hires.upscaler),
decoded.shape()[0],
decoded.shape()[1],
request.hires.target_width,
request.hires.target_height);
upscaled_tensor = sd::ops::interpolate(decoded,
{request.hires.target_width,
request.hires.target_height,
decoded.shape()[2],
decoded.shape()[3]},
mode);
upscaled_tensor = sd::ops::clamp(upscaled_tensor, 0.0f, 1.0f);
}
sd::Tensor<float> upscaled_latent = sd_ctx->sd->encode_first_stage(upscaled_tensor);
if (upscaled_latent.empty()) {
LOG_ERROR("encode_first_stage failed after hires %s upscale",
sd_hires_upscaler_name(request.hires.upscaler));
}
return upscaled_latent;
}
LOG_ERROR("unsupported hires upscaler '%s'", sd_hires_upscaler_name(request.hires.upscaler));
return {};
}
SD_API sd_image_t* generate_image(sd_ctx_t* sd_ctx, const sd_img_gen_params_t* sd_img_gen_params) { SD_API sd_image_t* generate_image(sd_ctx_t* sd_ctx, const sd_img_gen_params_t* sd_img_gen_params) {
if (sd_ctx == nullptr || sd_img_gen_params == nullptr) { if (sd_ctx == nullptr || sd_img_gen_params == nullptr) {
return nullptr; return nullptr;
@ -3212,14 +3443,143 @@ SD_API sd_image_t* generate_image(sd_ctx_t* sd_ctx, const sd_img_gen_params_t* s
} }
return nullptr; return nullptr;
} }
if (sd_ctx->sd->free_params_immediately) { if (sd_ctx->sd->free_params_immediately && !request.hires.enabled) {
sd_ctx->sd->diffusion_model->free_params_buffer(); sd_ctx->sd->diffusion_model->free_params_buffer();
} }
int64_t denoise_end = ggml_time_ms(); int64_t denoise_end = ggml_time_ms();
LOG_INFO("generating %" PRId64 " latent images completed, taking %.2fs", LOG_INFO("generating %zu latent images completed, taking %.2fs",
final_latents.size(), final_latents.size(),
(denoise_end - denoise_start) * 1.0f / 1000); (denoise_end - denoise_start) * 1.0f / 1000);
if (request.hires.enabled && request.hires.target_width > 0) {
LOG_INFO("hires fix: upscaling to %dx%d", request.hires.target_width, request.hires.target_height);
std::unique_ptr<UpscalerGGML> hires_upscaler;
if (request.hires.upscaler == SD_HIRES_UPSCALER_MODEL) {
LOG_INFO("hires fix: loading model upscaler from '%s'", request.hires.model_path);
hires_upscaler = std::make_unique<UpscalerGGML>(sd_ctx->sd->n_threads,
false,
request.hires.upscale_tile_size);
const size_t max_graph_vram_bytes = sd_ctx->sd->max_vram <= 0.f
? 0
: static_cast<size_t>(static_cast<double>(sd_ctx->sd->max_vram) * 1024.0 * 1024.0 * 1024.0);
hires_upscaler->set_max_graph_vram_bytes(max_graph_vram_bytes);
if (!hires_upscaler->load_from_file(request.hires.model_path,
sd_ctx->sd->offload_params_to_cpu,
sd_ctx->sd->n_threads)) {
LOG_ERROR("load hires model upscaler failed");
if (sd_ctx->sd->free_params_immediately) {
sd_ctx->sd->diffusion_model->free_params_buffer();
}
return nullptr;
}
}
int hires_steps = request.hires.steps > 0 ? request.hires.steps : plan.sample_steps;
// sd-webui behavior: scale up total steps so trimming by denoising_strength yields exactly hires_steps effective steps,
// unlike img2img which trims from a fixed step count
hires_steps = static_cast<int>(hires_steps / request.hires.denoising_strength);
std::vector<float> hires_sigmas = sd_ctx->sd->denoiser->get_sigmas(
hires_steps,
sd_ctx->sd->get_image_seq_len(request.hires.target_height, request.hires.target_width),
sd_img_gen_params->sample_params.scheduler,
sd_ctx->sd->version);
size_t t_enc = static_cast<size_t>(hires_steps * request.hires.denoising_strength);
if (t_enc >= static_cast<size_t>(hires_steps)) {
t_enc = static_cast<size_t>(hires_steps) - 1;
}
std::vector<float> hires_sigma_sched(hires_sigmas.begin() + hires_steps - static_cast<int>(t_enc) - 1,
hires_sigmas.end());
LOG_INFO("hires fix: %d steps, denoising_strength=%.2f, sigma_sched_size=%zu",
hires_steps,
request.hires.denoising_strength,
hires_sigma_sched.size());
std::vector<sd::Tensor<float>> hires_final_latents;
int64_t hires_denoise_start = ggml_time_ms();
for (int b = 0; b < (int)final_latents.size(); b++) {
int64_t cur_seed = request.seed + b;
sd_ctx->sd->rng->manual_seed(cur_seed);
sd_ctx->sd->sampler_rng->manual_seed(cur_seed);
sd::Tensor<float> upscaled = upscale_hires_latent(sd_ctx,
final_latents[b],
request,
hires_upscaler.get());
if (upscaled.empty()) {
if (sd_ctx->sd->free_params_immediately) {
sd_ctx->sd->diffusion_model->free_params_buffer();
}
return nullptr;
}
sd::Tensor<float> noise = sd::randn_like<float>(upscaled, sd_ctx->sd->rng);
sd::Tensor<float> hires_denoise_mask;
if (!latents.denoise_mask.empty()) {
std::vector<int64_t> mask_shape = latents.denoise_mask.shape();
mask_shape[0] = upscaled.shape()[0];
mask_shape[1] = upscaled.shape()[1];
hires_denoise_mask = sd::ops::interpolate(latents.denoise_mask,
mask_shape,
sd::ops::InterpolateMode::NearestMax);
}
int64_t hires_sample_start = ggml_time_ms();
sd::Tensor<float> x_0 = sd_ctx->sd->sample(sd_ctx->sd->diffusion_model,
true,
upscaled,
std::move(noise),
embeds.cond,
embeds.uncond,
embeds.img_cond,
embeds.id_cond,
latents.control_image,
request.control_strength,
request.guidance,
plan.eta,
request.shifted_timestep,
plan.sample_method,
sd_ctx->sd->is_flow_denoiser(),
hires_sigma_sched,
plan.start_merge_step,
latents.ref_latents,
request.increase_ref_index,
hires_denoise_mask,
sd::Tensor<float>(),
1.f,
request.cache_params);
int64_t hires_sample_end = ggml_time_ms();
if (!x_0.empty()) {
LOG_INFO("hires sampling %d/%d completed, taking %.2fs",
b + 1,
(int)final_latents.size(),
(hires_sample_end - hires_sample_start) * 1.0f / 1000);
hires_final_latents.push_back(std::move(x_0));
continue;
}
LOG_ERROR("hires sampling for image %d/%d failed after %.2fs",
b + 1,
(int)final_latents.size(),
(hires_sample_end - hires_sample_start) * 1.0f / 1000);
if (sd_ctx->sd->free_params_immediately) {
sd_ctx->sd->diffusion_model->free_params_buffer();
}
return nullptr;
}
if (sd_ctx->sd->free_params_immediately) {
sd_ctx->sd->diffusion_model->free_params_buffer();
}
int64_t hires_denoise_end = ggml_time_ms();
LOG_INFO("hires fix completed, taking %.2fs", (hires_denoise_end - hires_denoise_start) * 1.0f / 1000);
final_latents = std::move(hires_final_latents);
}
auto result = decode_image_outputs(sd_ctx, request, final_latents); auto result = decode_image_outputs(sd_ctx, request, final_latents);
if (result == nullptr) { if (result == nullptr) {
return nullptr; return nullptr;

View File

@ -251,7 +251,8 @@ public:
ggml_tensor* x, ggml_tensor* x,
ggml_tensor* past_bias = nullptr, ggml_tensor* past_bias = nullptr,
ggml_tensor* attention_mask = nullptr, ggml_tensor* attention_mask = nullptr,
ggml_tensor* relative_position_bucket = nullptr) { ggml_tensor* relative_position_bucket = nullptr,
const std::string& graph_cut_prefix = "") {
// x: [N, n_token, model_dim] // x: [N, n_token, model_dim]
for (int i = 0; i < num_layers; i++) { for (int i = 0; i < num_layers; i++) {
auto block = std::dynamic_pointer_cast<T5Block>(blocks["block." + std::to_string(i)]); auto block = std::dynamic_pointer_cast<T5Block>(blocks["block." + std::to_string(i)]);
@ -259,6 +260,9 @@ public:
auto ret = block->forward(ctx, x, past_bias, attention_mask, relative_position_bucket); auto ret = block->forward(ctx, x, past_bias, attention_mask, relative_position_bucket);
x = ret.first; x = ret.first;
past_bias = ret.second; past_bias = ret.second;
if (!graph_cut_prefix.empty()) {
sd::ggml_graph_cut::mark_graph_cut(x, graph_cut_prefix + ".block." + std::to_string(i), "x");
}
} }
auto final_layer_norm = std::dynamic_pointer_cast<T5LayerNorm>(blocks["final_layer_norm"]); auto final_layer_norm = std::dynamic_pointer_cast<T5LayerNorm>(blocks["final_layer_norm"]);
@ -305,7 +309,8 @@ public:
auto encoder = std::dynamic_pointer_cast<T5Stack>(blocks["encoder"]); auto encoder = std::dynamic_pointer_cast<T5Stack>(blocks["encoder"]);
auto x = shared->forward(ctx, input_ids); auto x = shared->forward(ctx, input_ids);
x = encoder->forward(ctx, x, past_bias, attention_mask, relative_position_bucket); sd::ggml_graph_cut::mark_graph_cut(x, "t5.prelude", "x");
x = encoder->forward(ctx, x, past_bias, attention_mask, relative_position_bucket, "t5");
return x; return x;
} }
}; };

View File

@ -815,11 +815,202 @@ namespace sd {
namespace ops { namespace ops {
enum class InterpolateMode { enum class InterpolateMode {
Nearest, Nearest,
NearestExact,
NearestMax, NearestMax,
NearestMin, NearestMin,
NearestAvg, NearestAvg,
Bilinear,
Bicubic,
Lanczos,
}; };
inline bool is_nearest_like_interpolate_mode(InterpolateMode mode) {
return mode == InterpolateMode::Nearest ||
mode == InterpolateMode::NearestExact ||
mode == InterpolateMode::NearestMax ||
mode == InterpolateMode::NearestMin ||
mode == InterpolateMode::NearestAvg;
}
inline bool is_2d_filter_interpolate_mode(InterpolateMode mode) {
return mode == InterpolateMode::Bilinear ||
mode == InterpolateMode::Bicubic ||
mode == InterpolateMode::Lanczos;
}
inline int64_t nearest_exact_interpolate_index(int64_t output_index,
int64_t input_size,
int64_t output_size) {
const double scale = static_cast<double>(input_size) / static_cast<double>(output_size);
const double center = (static_cast<double>(output_index) + 0.5) * scale - 0.5;
return std::min(std::max<int64_t>(static_cast<int64_t>(std::floor(center + 0.5)), 0), input_size - 1);
}
inline double linear_interpolate_weight(double x) {
x = std::abs(x);
return x < 1.0 ? 1.0 - x : 0.0;
}
inline double cubic_interpolate_weight(double x) {
constexpr double a = -0.75; // Match PyTorch bicubic interpolation.
x = std::abs(x);
if (x <= 1.0) {
return ((a + 2.0) * x - (a + 3.0)) * x * x + 1.0;
}
if (x < 2.0) {
return ((a * x - 5.0 * a) * x + 8.0 * a) * x - 4.0 * a;
}
return 0.0;
}
inline double sinc(double x) {
constexpr double pi = 3.14159265358979323846;
if (std::abs(x) < 1e-12) {
return 1.0;
}
const double pix = pi * x;
return std::sin(pix) / pix;
}
inline double lanczos_interpolate_weight(double x) {
constexpr double radius = 3.0;
x = std::abs(x);
if (x >= radius) {
return 0.0;
}
return sinc(x) * sinc(x / radius);
}
struct InterpolateContributor {
int64_t index;
double weight;
};
inline std::vector<std::vector<InterpolateContributor>> make_interpolate_contributors(
int64_t input_size,
int64_t output_size,
InterpolateMode mode,
bool antialias) {
std::vector<std::vector<InterpolateContributor>> contributors(static_cast<size_t>(output_size));
const double scale = static_cast<double>(input_size) / static_cast<double>(output_size);
const double filter_scale = antialias ? std::max(1.0, scale) : 1.0;
for (int64_t out = 0; out < output_size; ++out) {
const double center = (static_cast<double>(out) + 0.5) * scale - 0.5;
int64_t start = 0;
int64_t end = 0;
if (mode == InterpolateMode::Bilinear) {
const double support = filter_scale;
start = static_cast<int64_t>(std::ceil(center - support));
end = static_cast<int64_t>(std::floor(center + support));
} else if (mode == InterpolateMode::Bicubic) {
const double support = 2.0 * filter_scale;
start = static_cast<int64_t>(std::ceil(center - support));
end = static_cast<int64_t>(std::floor(center + support));
} else if (mode == InterpolateMode::Lanczos) {
const double support = 3.0 * filter_scale;
start = static_cast<int64_t>(std::ceil(center - support));
end = static_cast<int64_t>(std::floor(center + support));
} else {
tensor_throw_invalid_argument("Unsupported 2D filter interpolate mode: mode=" +
std::to_string(static_cast<int>(mode)));
}
double weight_sum = 0.0;
std::vector<InterpolateContributor>& axis_contributors = contributors[static_cast<size_t>(out)];
axis_contributors.reserve(static_cast<size_t>(end - start + 1));
for (int64_t in = start; in <= end; ++in) {
double weight = 0.0;
if (mode == InterpolateMode::Bilinear) {
weight = linear_interpolate_weight((center - static_cast<double>(in)) / filter_scale);
} else if (mode == InterpolateMode::Bicubic) {
weight = cubic_interpolate_weight((center - static_cast<double>(in)) / filter_scale);
} else {
weight = lanczos_interpolate_weight((center - static_cast<double>(in)) / filter_scale);
}
if (weight == 0.0) {
continue;
}
const int64_t clamped_index = std::min(std::max<int64_t>(in, 0), input_size - 1);
axis_contributors.push_back({clamped_index, weight});
weight_sum += weight;
}
if ((antialias || mode == InterpolateMode::Lanczos) &&
std::abs(weight_sum) > 1e-12) {
for (auto& contributor : axis_contributors) {
contributor.weight /= weight_sum;
}
}
if (axis_contributors.empty()) {
const int64_t nearest = std::min(
std::max<int64_t>(static_cast<int64_t>(std::floor(center + 0.5)), 0),
input_size - 1);
axis_contributors.push_back({nearest, 1.0});
}
}
return contributors;
}
template <typename T>
inline Tensor<T> interpolate_2d_filter(const Tensor<T>& input,
const std::vector<int64_t>& output_shape,
InterpolateMode mode,
bool antialias) {
if (input.dim() < 2) {
tensor_throw_invalid_argument("2D filter interpolate requires rank >= 2: input_shape=" +
tensor_shape_to_string(input.shape()) + ", output_shape=" +
tensor_shape_to_string(output_shape));
}
for (size_t i = 2; i < output_shape.size(); ++i) {
if (input.shape()[i] != output_shape[i]) {
tensor_throw_invalid_argument("2D filter interpolate only supports resizing dimensions 0 and 1: input_shape=" +
tensor_shape_to_string(input.shape()) + ", output_shape=" +
tensor_shape_to_string(output_shape));
}
}
Tensor<T> output(output_shape);
const int64_t input_width = input.shape()[0];
const int64_t input_height = input.shape()[1];
const int64_t output_width = output_shape[0];
const int64_t output_height = output_shape[1];
const int64_t input_plane = input_width * input_height;
const int64_t output_plane = output_width * output_height;
const int64_t plane_count = input.numel() / input_plane;
auto x_contributors = make_interpolate_contributors(input_width, output_width, mode, antialias);
auto y_contributors = make_interpolate_contributors(input_height, output_height, mode, antialias);
for (int64_t plane = 0; plane < plane_count; ++plane) {
const int64_t input_plane_offset = plane * input_plane;
const int64_t output_plane_offset = plane * output_plane;
for (int64_t y = 0; y < output_height; ++y) {
const auto& y_axis = y_contributors[static_cast<size_t>(y)];
for (int64_t x = 0; x < output_width; ++x) {
const auto& x_axis = x_contributors[static_cast<size_t>(x)];
double value = 0.0;
for (const auto& yc : y_axis) {
const int64_t input_row_offset = input_plane_offset + yc.index * input_width;
for (const auto& xc : x_axis) {
value += static_cast<double>(input.data()[input_row_offset + xc.index]) *
xc.weight * yc.weight;
}
}
output.data()[output_plane_offset + y * output_width + x] = static_cast<T>(value);
}
}
}
return output;
}
inline int64_t normalize_slice_bound(int64_t index, int64_t dim_size) { inline int64_t normalize_slice_bound(int64_t index, int64_t dim_size) {
if (index < 0) { if (index < 0) {
index += dim_size; index += dim_size;
@ -1014,17 +1205,20 @@ namespace sd {
inline Tensor<T> interpolate(const Tensor<T>& input, inline Tensor<T> interpolate(const Tensor<T>& input,
std::vector<int64_t> output_shape, std::vector<int64_t> output_shape,
InterpolateMode mode = InterpolateMode::Nearest, InterpolateMode mode = InterpolateMode::Nearest,
bool align_corners = false) { bool align_corners = false,
const bool is_nearest_like_mode = (mode == InterpolateMode::Nearest || bool antialias = false) {
mode == InterpolateMode::NearestMax || const bool is_nearest_like_mode = is_nearest_like_interpolate_mode(mode);
mode == InterpolateMode::NearestMin || const bool is_2d_filter_mode = is_2d_filter_interpolate_mode(mode);
mode == InterpolateMode::NearestAvg); if (!is_nearest_like_mode && !is_2d_filter_mode) {
if (!is_nearest_like_mode) { tensor_throw_invalid_argument("Unsupported interpolate mode: mode=" +
tensor_throw_invalid_argument("Only nearest-like interpolate modes are implemented, got mode=" + std::to_string(static_cast<int>(mode)));
}
if (antialias && !is_2d_filter_mode) {
tensor_throw_invalid_argument("Tensor interpolate antialias requires a 2D filter mode: mode=" +
std::to_string(static_cast<int>(mode))); std::to_string(static_cast<int>(mode)));
} }
if (align_corners) { if (align_corners) {
tensor_throw_invalid_argument("align_corners is not supported for nearest-like interpolate: input_shape=" + tensor_throw_invalid_argument("align_corners is not supported for tensor interpolate: input_shape=" +
tensor_shape_to_string(input.shape()) + ", output_shape=" + tensor_shape_to_string(input.shape()) + ", output_shape=" +
tensor_shape_to_string(output_shape)); tensor_shape_to_string(output_shape));
} }
@ -1051,6 +1245,10 @@ namespace sd {
} }
} }
if (is_2d_filter_mode) {
return interpolate_2d_filter(input, output_shape, mode, antialias);
}
bool has_downsampling = false; bool has_downsampling = false;
for (int64_t i = 0; i < input.dim(); ++i) { for (int64_t i = 0; i < input.dim(); ++i) {
if (input.shape()[i] > output_shape[i]) { if (input.shape()[i] > output_shape[i]) {
@ -1060,13 +1258,21 @@ namespace sd {
} }
Tensor<T> output(std::move(output_shape)); Tensor<T> output(std::move(output_shape));
if (mode == InterpolateMode::Nearest || !has_downsampling) { if (mode == InterpolateMode::Nearest ||
mode == InterpolateMode::NearestExact ||
!has_downsampling) {
for (int64_t flat = 0; flat < output.numel(); ++flat) { for (int64_t flat = 0; flat < output.numel(); ++flat) {
std::vector<int64_t> output_coord = tensor_unravel_index(flat, output.shape()); std::vector<int64_t> output_coord = tensor_unravel_index(flat, output.shape());
std::vector<int64_t> input_coord(static_cast<size_t>(input.dim()), 0); std::vector<int64_t> input_coord(static_cast<size_t>(input.dim()), 0);
for (size_t i = 0; i < static_cast<size_t>(input.dim()); ++i) { for (size_t i = 0; i < static_cast<size_t>(input.dim()); ++i) {
if (mode == InterpolateMode::NearestExact) {
input_coord[i] = nearest_exact_interpolate_index(output_coord[i],
input.shape()[i],
output.shape()[i]);
} else {
input_coord[i] = output_coord[i] * input.shape()[i] / output.shape()[i]; input_coord[i] = output_coord[i] * input.shape()[i] / output.shape()[i];
} }
}
output[flat] = input.index(input_coord); output[flat] = input.index(input_coord);
} }
@ -1083,6 +1289,12 @@ namespace sd {
return T(0); return T(0);
case InterpolateMode::Nearest: case InterpolateMode::Nearest:
return T(0); return T(0);
case InterpolateMode::NearestExact:
return T(0);
case InterpolateMode::Bilinear:
case InterpolateMode::Bicubic:
case InterpolateMode::Lanczos:
break;
} }
tensor_throw_invalid_argument("Unsupported interpolate mode: mode=" + tensor_throw_invalid_argument("Unsupported interpolate mode: mode=" +
@ -1102,6 +1314,12 @@ namespace sd {
break; break;
case InterpolateMode::Nearest: case InterpolateMode::Nearest:
break; break;
case InterpolateMode::NearestExact:
break;
case InterpolateMode::Bilinear:
case InterpolateMode::Bicubic:
case InterpolateMode::Lanczos:
break;
} }
}; };
@ -1157,17 +1375,20 @@ namespace sd {
const std::optional<std::vector<int64_t>>& size, const std::optional<std::vector<int64_t>>& size,
const std::optional<std::vector<double>>& scale_factor, const std::optional<std::vector<double>>& scale_factor,
InterpolateMode mode = InterpolateMode::Nearest, InterpolateMode mode = InterpolateMode::Nearest,
bool align_corners = false) { bool align_corners = false,
const bool is_nearest_like_mode = (mode == InterpolateMode::Nearest || bool antialias = false) {
mode == InterpolateMode::NearestMax || const bool is_nearest_like_mode = is_nearest_like_interpolate_mode(mode);
mode == InterpolateMode::NearestMin || const bool is_2d_filter_mode = is_2d_filter_interpolate_mode(mode);
mode == InterpolateMode::NearestAvg); if (!is_nearest_like_mode && !is_2d_filter_mode) {
if (!is_nearest_like_mode) { tensor_throw_invalid_argument("Unsupported interpolate mode: mode=" +
tensor_throw_invalid_argument("Only nearest-like interpolate modes are implemented, got mode=" + std::to_string(static_cast<int>(mode)));
}
if (antialias && !is_2d_filter_mode) {
tensor_throw_invalid_argument("Tensor interpolate antialias requires a 2D filter mode: mode=" +
std::to_string(static_cast<int>(mode))); std::to_string(static_cast<int>(mode)));
} }
if (align_corners) { if (align_corners) {
tensor_throw_invalid_argument("align_corners is not supported for nearest-like interpolate: input_shape=" + tensor_throw_invalid_argument("align_corners is not supported for tensor interpolate: input_shape=" +
tensor_shape_to_string(input.shape())); tensor_shape_to_string(input.shape()));
} }
if (size.has_value() == scale_factor.has_value()) { if (size.has_value() == scale_factor.has_value()) {
@ -1211,7 +1432,7 @@ namespace sd {
} }
} }
return interpolate(input, std::move(output_shape), mode, align_corners); return interpolate(input, std::move(output_shape), mode, align_corners, antialias);
} }
template <typename T> template <typename T>
@ -1219,12 +1440,14 @@ namespace sd {
const std::optional<std::vector<int64_t>>& size, const std::optional<std::vector<int64_t>>& size,
double scale_factor, double scale_factor,
InterpolateMode mode = InterpolateMode::Nearest, InterpolateMode mode = InterpolateMode::Nearest,
bool align_corners = false) { bool align_corners = false,
bool antialias = false) {
return interpolate(input, return interpolate(input,
size, size,
std::vector<double>(size.has_value() ? size->size() : input.dim(), scale_factor), std::vector<double>(size.has_value() ? size->size() : input.dim(), scale_factor),
mode, mode,
align_corners); align_corners,
antialias);
} }
template <typename T> template <typename T>

View File

@ -62,7 +62,7 @@ void CLIPTokenizer::load_from_merges(const std::string& merges_utf8_str) {
} }
vocab.push_back(utf8_to_utf32("<|startoftext|>")); vocab.push_back(utf8_to_utf32("<|startoftext|>"));
vocab.push_back(utf8_to_utf32("<|endoftext|>")); vocab.push_back(utf8_to_utf32("<|endoftext|>"));
LOG_DEBUG("vocab size: %llu", vocab.size()); LOG_DEBUG("vocab size: %zu", vocab.size());
int i = 0; int i = 0;
for (const auto& token : vocab) { for (const auto& token : vocab) {
encoder[token] = i; encoder[token] = i;

View File

@ -28,7 +28,7 @@ void MistralTokenizer::load_from_merges(const std::string& merges_utf8_str, cons
byte_decoder[pair.second] = pair.first; byte_decoder[pair.second] = pair.first;
} }
std::vector<std::u32string> merges = split_utf32(merges_utf8_str); std::vector<std::u32string> merges = split_utf32(merges_utf8_str);
LOG_DEBUG("merges size %llu", merges.size()); LOG_DEBUG("merges size %zu", merges.size());
std::vector<std::pair<std::u32string, std::u32string>> merge_pairs; std::vector<std::pair<std::u32string, std::u32string>> merge_pairs;
for (const auto& merge : merges) { for (const auto& merge : merges) {
size_t space_pos = merge.find(' '); size_t space_pos = merge.find(' ');

View File

@ -11,7 +11,7 @@ void Qwen2Tokenizer::load_from_merges(const std::string& merges_utf8_str) {
} }
std::vector<std::u32string> merges = split_utf32(merges_utf8_str); std::vector<std::u32string> merges = split_utf32(merges_utf8_str);
LOG_DEBUG("merges size %llu", merges.size()); LOG_DEBUG("merges size %zu", merges.size());
std::vector<std::pair<std::u32string, std::u32string>> merge_pairs; std::vector<std::pair<std::u32string, std::u32string>> merge_pairs;
for (const auto& merge : merges) { for (const auto& merge : merges) {
size_t space_pos = merge.find(' '); size_t space_pos = merge.find(' ');

View File

@ -482,12 +482,14 @@ public:
emb = ggml_add(ctx->ggml_ctx, emb, label_emb); // [N, time_embed_dim] emb = ggml_add(ctx->ggml_ctx, emb, label_emb); // [N, time_embed_dim]
} }
// sd::ggml_graph_cut::mark_graph_cut(emb, "unet.prelude", "emb");
// input_blocks // input_blocks
std::vector<ggml_tensor*> hs; std::vector<ggml_tensor*> hs;
// input block 0 // input block 0
auto h = input_blocks_0_0->forward(ctx, x); auto h = input_blocks_0_0->forward(ctx, x);
sd::ggml_graph_cut::mark_graph_cut(h, "unet.input_blocks.0", "h");
ggml_set_name(h, "bench-start"); ggml_set_name(h, "bench-start");
hs.push_back(h); hs.push_back(h);
@ -505,6 +507,7 @@ public:
std::string name = "input_blocks." + std::to_string(input_block_idx) + ".1"; std::string name = "input_blocks." + std::to_string(input_block_idx) + ".1";
h = attention_layer_forward(name, ctx, h, context, num_video_frames); // [N, mult*model_channels, h, w] h = attention_layer_forward(name, ctx, h, context, num_video_frames); // [N, mult*model_channels, h, w]
} }
sd::ggml_graph_cut::mark_graph_cut(h, "unet.input_blocks." + std::to_string(input_block_idx), "h");
hs.push_back(h); hs.push_back(h);
} }
if (tiny_unet) { if (tiny_unet) {
@ -518,6 +521,7 @@ public:
auto block = std::dynamic_pointer_cast<DownSampleBlock>(blocks[name]); auto block = std::dynamic_pointer_cast<DownSampleBlock>(blocks[name]);
h = block->forward(ctx, h); // [N, mult*model_channels, h/(2^(i+1)), w/(2^(i+1))] h = block->forward(ctx, h); // [N, mult*model_channels, h/(2^(i+1)), w/(2^(i+1))]
// sd::ggml_graph_cut::mark_graph_cut(h, "unet.input_blocks." + std::to_string(input_block_idx), "h");
hs.push_back(h); hs.push_back(h);
} }
} }
@ -531,6 +535,7 @@ public:
h = resblock_forward("middle_block.2", ctx, h, emb, num_video_frames); // [N, 4*model_channels, h/8, w/8] h = resblock_forward("middle_block.2", ctx, h, emb, num_video_frames); // [N, 4*model_channels, h/8, w/8]
} }
} }
sd::ggml_graph_cut::mark_graph_cut(h, "unet.middle_block", "h");
if (controls.size() > 0) { if (controls.size() > 0) {
auto cs = ggml_ext_scale(ctx->ggml_ctx, controls[controls.size() - 1], control_strength, true); auto cs = ggml_ext_scale(ctx->ggml_ctx, controls[controls.size() - 1], control_strength, true);
h = ggml_add(ctx->ggml_ctx, h, cs); // middle control h = ggml_add(ctx->ggml_ctx, h, cs); // middle control
@ -581,6 +586,7 @@ public:
} }
output_block_idx += 1; output_block_idx += 1;
sd::ggml_graph_cut::mark_graph_cut(h, "unet.output_blocks." + std::to_string(output_block_idx - 1), "h");
} }
} }

View File

@ -1,50 +1,31 @@
#include "esrgan.hpp" #include "upscaler.h"
#include "ggml_extend.hpp" #include "ggml_extend.hpp"
#include "model.h" #include "model.h"
#include "stable-diffusion.h" #include "stable-diffusion.h"
#include "util.h" #include "util.h"
struct UpscalerGGML { UpscalerGGML::UpscalerGGML(int n_threads,
ggml_backend_t backend = nullptr; // general backend bool direct,
ggml_type model_data_type = GGML_TYPE_F16; int tile_size)
std::shared_ptr<ESRGAN> esrgan_upscaler;
std::string esrgan_path;
int n_threads;
bool direct = false;
int tile_size = 128;
UpscalerGGML(int n_threads,
bool direct = false,
int tile_size = 128)
: n_threads(n_threads), : n_threads(n_threads),
direct(direct), direct(direct),
tile_size(tile_size) { tile_size(tile_size) {
} }
bool load_from_file(const std::string& esrgan_path, void UpscalerGGML::set_max_graph_vram_bytes(size_t max_vram_bytes) {
max_graph_vram_bytes = max_vram_bytes;
if (esrgan_upscaler) {
esrgan_upscaler->set_max_graph_vram_bytes(max_vram_bytes);
}
}
bool UpscalerGGML::load_from_file(const std::string& esrgan_path,
bool offload_params_to_cpu, bool offload_params_to_cpu,
int n_threads) { int n_threads) {
ggml_log_set(ggml_log_callback_default, nullptr); ggml_log_set(ggml_log_callback_default, nullptr);
#ifdef SD_USE_CUDA
LOG_DEBUG("Using CUDA backend"); backend = sd_get_default_backend();
backend = ggml_backend_cuda_init(0);
#endif
#ifdef SD_USE_METAL
LOG_DEBUG("Using Metal backend");
backend = ggml_backend_metal_init();
#endif
#ifdef SD_USE_VULKAN
LOG_DEBUG("Using Vulkan backend");
backend = ggml_backend_vk_init(0);
#endif
#ifdef SD_USE_OPENCL
LOG_DEBUG("Using OpenCL backend");
backend = ggml_backend_opencl_init();
#endif
#ifdef SD_USE_SYCL
LOG_DEBUG("Using SYCL backend");
backend = ggml_backend_sycl_init(0);
#endif
ModelLoader model_loader; ModelLoader model_loader;
if (!model_loader.init_from_file_and_convert_name(esrgan_path)) { if (!model_loader.init_from_file_and_convert_name(esrgan_path)) {
LOG_ERROR("init model loader from file failed: '%s'", esrgan_path.c_str()); LOG_ERROR("init model loader from file failed: '%s'", esrgan_path.c_str());
@ -56,6 +37,7 @@ struct UpscalerGGML {
} }
LOG_INFO("Upscaler weight type: %s", ggml_type_name(model_data_type)); LOG_INFO("Upscaler weight type: %s", ggml_type_name(model_data_type));
esrgan_upscaler = std::make_shared<ESRGAN>(backend, offload_params_to_cpu, tile_size, model_loader.get_tensor_storage_map()); esrgan_upscaler = std::make_shared<ESRGAN>(backend, offload_params_to_cpu, tile_size, model_loader.get_tensor_storage_map());
esrgan_upscaler->set_max_graph_vram_bytes(max_graph_vram_bytes);
if (direct) { if (direct) {
esrgan_upscaler->set_conv2d_direct_enabled(true); esrgan_upscaler->set_conv2d_direct_enabled(true);
} }
@ -65,7 +47,7 @@ struct UpscalerGGML {
return true; return true;
} }
sd::Tensor<float> upscale_tensor(const sd::Tensor<float>& input_tensor) { sd::Tensor<float> UpscalerGGML::upscale_tensor(const sd::Tensor<float>& input_tensor) {
sd::Tensor<float> upscaled; sd::Tensor<float> upscaled;
if (tile_size <= 0 || (input_tensor.shape()[0] <= tile_size && input_tensor.shape()[1] <= tile_size)) { if (tile_size <= 0 || (input_tensor.shape()[0] <= tile_size && input_tensor.shape()[1] <= tile_size)) {
upscaled = esrgan_upscaler->compute(n_threads, input_tensor); upscaled = esrgan_upscaler->compute(n_threads, input_tensor);
@ -98,7 +80,7 @@ struct UpscalerGGML {
return upscaled; return upscaled;
} }
sd_image_t upscale(sd_image_t input_image, uint32_t upscale_factor) { sd_image_t UpscalerGGML::upscale(sd_image_t input_image, uint32_t upscale_factor) {
// upscale_factor, unused for RealESRGAN_x4plus_anime_6B.pth // upscale_factor, unused for RealESRGAN_x4plus_anime_6B.pth
sd_image_t upscaled_image = {0, 0, 0, nullptr}; sd_image_t upscaled_image = {0, 0, 0, nullptr};
int output_width = (int)input_image.width * esrgan_upscaler->scale; int output_width = (int)input_image.width * esrgan_upscaler->scale;
@ -119,7 +101,6 @@ struct UpscalerGGML {
upscaled_image = upscaled_data; upscaled_image = upscaled_data;
return upscaled_image; return upscaled_image;
} }
};
struct upscaler_ctx_t { struct upscaler_ctx_t {
UpscalerGGML* upscaler = nullptr; UpscalerGGML* upscaler = nullptr;

33
src/upscaler.h Normal file
View File

@ -0,0 +1,33 @@
#ifndef __SD_UPSCALER_H__
#define __SD_UPSCALER_H__
#include "esrgan.hpp"
#include "stable-diffusion.h"
#include "tensor.hpp"
#include <memory>
#include <string>
struct UpscalerGGML {
ggml_backend_t backend = nullptr; // general backend
ggml_type model_data_type = GGML_TYPE_F16;
std::shared_ptr<ESRGAN> esrgan_upscaler;
std::string esrgan_path;
int n_threads;
bool direct = false;
int tile_size = 128;
size_t max_graph_vram_bytes = 0;
UpscalerGGML(int n_threads,
bool direct = false,
int tile_size = 128);
bool load_from_file(const std::string& esrgan_path,
bool offload_params_to_cpu,
int n_threads);
void set_max_graph_vram_bytes(size_t max_vram_bytes);
sd::Tensor<float> upscale_tensor(const sd::Tensor<float>& input_tensor);
sd_image_t upscale(sd_image_t input_image, uint32_t upscale_factor);
};
#endif // __SD_UPSCALER_H__

View File

@ -23,8 +23,9 @@
#include <unistd.h> #include <unistd.h>
#endif #endif
#include "ggml-cpu.h" #include "ggml-backend.h"
#include "ggml.h" #include "ggml.h"
#include "ggml_extend_backend.hpp"
#include "stable-diffusion.h" #include "stable-diffusion.h"
bool ends_with(const std::string& str, const std::string& ending) { bool ends_with(const std::string& str, const std::string& ending) {
@ -119,10 +120,10 @@ std::unique_ptr<MmapWrapper> MmapWrapper::create(const std::string& filename) {
filename.c_str(), filename.c_str(),
GENERIC_READ, GENERIC_READ,
FILE_SHARE_READ, FILE_SHARE_READ,
NULL, nullptr,
OPEN_EXISTING, OPEN_EXISTING,
FILE_ATTRIBUTE_NORMAL, FILE_ATTRIBUTE_NORMAL,
NULL); nullptr);
if (file_handle == INVALID_HANDLE_VALUE) { if (file_handle == INVALID_HANDLE_VALUE) {
return nullptr; return nullptr;
@ -136,16 +137,16 @@ std::unique_ptr<MmapWrapper> MmapWrapper::create(const std::string& filename) {
file_size = static_cast<size_t>(size.QuadPart); file_size = static_cast<size_t>(size.QuadPart);
HANDLE mapping_handle = CreateFileMapping(file_handle, NULL, PAGE_READONLY, 0, 0, NULL); HANDLE mapping_handle = CreateFileMapping(file_handle, nullptr, PAGE_READONLY, 0, 0, nullptr);
if (mapping_handle == NULL) { if (mapping_handle == nullptr) {
CloseHandle(file_handle); CloseHandle(file_handle);
return nullptr; return nullptr;
} }
mapped_data = MapViewOfFile(mapping_handle, FILE_MAP_READ, 0, 0, file_size); mapped_data = MapViewOfFile(mapping_handle, FILE_MAP_READ, 0, 0, file_size);
if (mapped_data == NULL) { if (mapped_data == nullptr) {
CloseHandle(mapping_handle); CloseHandle(mapping_handle);
CloseHandle(file_handle); CloseHandle(file_handle);
return nullptr; return nullptr;
@ -203,7 +204,7 @@ std::unique_ptr<MmapWrapper> MmapWrapper::create(const std::string& filename) {
size_t file_size = sb.st_size; size_t file_size = sb.st_size;
void* mapped_data = mmap(NULL, file_size, PROT_READ, mmap_flags, file_descriptor, 0); void* mapped_data = mmap(nullptr, file_size, PROT_READ, mmap_flags, file_descriptor, 0);
close(file_descriptor); close(file_descriptor);
@ -495,26 +496,6 @@ sd_progress_cb_t sd_get_progress_callback() {
void* sd_get_progress_callback_data() { void* sd_get_progress_callback_data() {
return sd_progress_cb_data; return sd_progress_cb_data;
} }
const char* sd_get_system_info() {
static char buffer[1024];
std::stringstream ss;
ss << "System Info: \n";
ss << " SSE3 = " << ggml_cpu_has_sse3() << " | ";
ss << " AVX = " << ggml_cpu_has_avx() << " | ";
ss << " AVX2 = " << ggml_cpu_has_avx2() << " | ";
ss << " AVX512 = " << ggml_cpu_has_avx512() << " | ";
ss << " AVX512_VBMI = " << ggml_cpu_has_avx512_vbmi() << " | ";
ss << " AVX512_VNNI = " << ggml_cpu_has_avx512_vnni() << " | ";
ss << " FMA = " << ggml_cpu_has_fma() << " | ";
ss << " NEON = " << ggml_cpu_has_neon() << " | ";
ss << " ARM_FMA = " << ggml_cpu_has_arm_fma() << " | ";
ss << " F16C = " << ggml_cpu_has_f16c() << " | ";
ss << " FP16_VA = " << ggml_cpu_has_fp16_va() << " | ";
ss << " WASM_SIMD = " << ggml_cpu_has_wasm_simd() << " | ";
ss << " VSX = " << ggml_cpu_has_vsx() << " | ";
snprintf(buffer, sizeof(buffer), "%s", ss.str().c_str());
return buffer;
}
sd_image_t tensor_to_sd_image(const sd::Tensor<float>& tensor, int frame_index) { sd_image_t tensor_to_sd_image(const sd::Tensor<float>& tensor, int frame_index) {
const auto& shape = tensor.shape(); const auto& shape = tensor.shape();
@ -524,17 +505,7 @@ sd_image_t tensor_to_sd_image(const sd::Tensor<float>& tensor, int frame_index)
int channel = static_cast<int>(shape[shape.size() == 5 ? 3 : 2]); int channel = static_cast<int>(shape[shape.size() == 5 ? 3 : 2]);
uint8_t* data = (uint8_t*)malloc(static_cast<size_t>(width * height * channel)); uint8_t* data = (uint8_t*)malloc(static_cast<size_t>(width * height * channel));
GGML_ASSERT(data != nullptr); GGML_ASSERT(data != nullptr);
preprocessing_tensor_frame_to_sd_image(tensor, frame_index, data);
for (int iw = 0; iw < width; ++iw) {
for (int ih = 0; ih < height; ++ih) {
for (int ic = 0; ic < channel; ++ic) {
float value = shape.size() == 5 ? tensor.index(iw, ih, frame_index, ic, 0)
: tensor.index(iw, ih, ic, frame_index);
value = std::clamp(value, 0.0f, 1.0f);
data[(ih * width + iw) * channel + ic] = static_cast<uint8_t>(std::round(value * 255.0f));
}
}
}
return { return {
static_cast<uint32_t>(width), static_cast<uint32_t>(width),
static_cast<uint32_t>(height), static_cast<uint32_t>(height),
@ -718,3 +689,100 @@ std::vector<std::pair<std::string, float>> parse_prompt_attention(const std::str
return res; return res;
} }
// test if the backend is a specific one, e.g. "CUDA", "ROCm", "Vulkan" etc.
bool sd_backend_is(ggml_backend_t backend, const std::string& name) {
if (!backend) {
return false;
}
ggml_backend_dev_t dev = ggml_backend_get_device(backend);
if (!dev)
return false;
std::string dev_name = ggml_backend_dev_name(dev);
return dev_name.find(name) != std::string::npos;
}
ggml_backend_t sd_get_default_backend() {
ggml_backend_load_all_once();
static std::once_flag once;
std::call_once(once, []() {
size_t dev_count = ggml_backend_dev_count();
if (dev_count == 0) {
LOG_ERROR("No devices found!");
} else {
LOG_DEBUG("Found %zu backend devices:", dev_count);
for (size_t i = 0; i < dev_count; ++i) {
auto dev = ggml_backend_dev_get(i);
LOG_DEBUG("#%zu: %s", i, ggml_backend_dev_name(dev));
}
}
});
ggml_backend_t backend = nullptr;
const char* SD_VK_DEVICE = getenv("SD_VK_DEVICE");
if (SD_VK_DEVICE != nullptr) {
std::string sd_vk_device_str = SD_VK_DEVICE;
try {
unsigned long long device = std::stoull(sd_vk_device_str);
std::string vk_device_name = "Vulkan" + std::to_string(device);
if (backend_name_exists(vk_device_name)) {
LOG_INFO("Selecting %s as main device by env var SD_VK_DEVICE", vk_device_name.c_str());
backend = init_named_backend(vk_device_name);
if (!backend) {
LOG_WARN("Device %s requested by SD_VK_DEVICE failed to init. Falling back to the default device.", vk_device_name.c_str());
}
} else {
LOG_WARN("Device %s requested by SD_VK_DEVICE was not found. Falling back to the default device.", vk_device_name.c_str());
}
} catch (const std::invalid_argument&) {
LOG_WARN("SD_VK_DEVICE environment variable is not a valid integer (%s). Falling back to the default device.", SD_VK_DEVICE);
} catch (const std::out_of_range&) {
LOG_WARN("SD_VK_DEVICE environment variable value is out of range for `unsigned long long` type (%s). Falling back to the default device.", SD_VK_DEVICE);
}
}
if (!backend) {
std::string dev_name = get_default_backend_name();
backend = init_named_backend(dev_name);
if (!backend && !dev_name.empty()) {
LOG_WARN("device %s failed to init", dev_name.c_str());
}
}
if (!backend) {
LOG_WARN("loading CPU backend");
backend = ggml_backend_cpu_init();
}
if (ggml_backend_is_cpu(backend)) {
LOG_DEBUG("Using CPU backend");
}
return backend;
}
// namespace is needed to avoid conflicts with ggml_backend_extend.hpp
namespace ggml_cpu {
#include "ggml-cpu.h"
}
const char* sd_get_system_info() {
using namespace ggml_cpu;
static char buffer[1024];
std::stringstream ss;
ss << "System Info: \n";
ss << " SSE3 = " << ggml_cpu_has_sse3() << " | ";
ss << " AVX = " << ggml_cpu_has_avx() << " | ";
ss << " AVX2 = " << ggml_cpu_has_avx2() << " | ";
ss << " AVX512 = " << ggml_cpu_has_avx512() << " | ";
ss << " AVX512_VBMI = " << ggml_cpu_has_avx512_vbmi() << " | ";
ss << " AVX512_VNNI = " << ggml_cpu_has_avx512_vnni() << " | ";
ss << " FMA = " << ggml_cpu_has_fma() << " | ";
ss << " NEON = " << ggml_cpu_has_neon() << " | ";
ss << " ARM_FMA = " << ggml_cpu_has_arm_fma() << " | ";
ss << " F16C = " << ggml_cpu_has_f16c() << " | ";
ss << " FP16_VA = " << ggml_cpu_has_fp16_va() << " | ";
ss << " WASM_SIMD = " << ggml_cpu_has_wasm_simd() << " | ";
ss << " VSX = " << ggml_cpu_has_vsx() << " | ";
snprintf(buffer, sizeof(buffer), "%s", ss.str().c_str());
return buffer;
}

View File

@ -6,6 +6,7 @@
#include <string> #include <string>
#include <vector> #include <vector>
#include "ggml-backend.h"
#include "stable-diffusion.h" #include "stable-diffusion.h"
#include "tensor.hpp" #include "tensor.hpp"
@ -82,6 +83,10 @@ int sd_get_preview_interval();
bool sd_should_preview_denoised(); bool sd_should_preview_denoised();
bool sd_should_preview_noisy(); bool sd_should_preview_noisy();
// test if the backend is a specific one, e.g. "CUDA", "ROCm", "Vulkan" etc.
bool sd_backend_is(ggml_backend_t backend, const std::string& name);
ggml_backend_t sd_get_default_backend();
#define LOG_DEBUG(format, ...) log_printf(SD_LOG_DEBUG, __FILE__, __LINE__, format, ##__VA_ARGS__) #define LOG_DEBUG(format, ...) log_printf(SD_LOG_DEBUG, __FILE__, __LINE__, format, ##__VA_ARGS__)
#define LOG_INFO(format, ...) log_printf(SD_LOG_INFO, __FILE__, __LINE__, format, ##__VA_ARGS__) #define LOG_INFO(format, ...) log_printf(SD_LOG_INFO, __FILE__, __LINE__, format, ##__VA_ARGS__)
#define LOG_WARN(format, ...) log_printf(SD_LOG_WARN, __FILE__, __LINE__, format, ##__VA_ARGS__) #define LOG_WARN(format, ...) log_printf(SD_LOG_WARN, __FILE__, __LINE__, format, ##__VA_ARGS__)

View File

@ -142,9 +142,10 @@ public:
"vae encode compute failed while processing a tile"); "vae encode compute failed while processing a tile");
} else { } else {
output = _compute(n_threads, input, false); output = _compute(n_threads, input, false);
free_compute_buffer();
} }
free_compute_buffer();
if (output.empty()) { if (output.empty()) {
LOG_ERROR("vae encode compute failed"); LOG_ERROR("vae encode compute failed");
return {}; return {};

View File

@ -692,6 +692,7 @@ namespace WAN {
} else { } else {
x = conv1->forward(ctx, x); x = conv1->forward(ctx, x);
} }
// sd::ggml_graph_cut::mark_graph_cut(x, "wan_vae.encoder.prelude", "x");
// downsamples // downsamples
std::vector<int64_t> dims = {dim}; std::vector<int64_t> dims = {dim};
@ -717,12 +718,14 @@ namespace WAN {
x = layer->forward(ctx, x, b, feat_cache, feat_idx, chunk_idx); x = layer->forward(ctx, x, b, feat_cache, feat_idx, chunk_idx);
} }
} }
// sd::ggml_graph_cut::mark_graph_cut(x, "wan_vae.encoder.down." + std::to_string(i), "x");
} }
// middle // middle
x = middle_0->forward(ctx, x, b, feat_cache, feat_idx); x = middle_0->forward(ctx, x, b, feat_cache, feat_idx);
x = middle_1->forward(ctx, x, b); x = middle_1->forward(ctx, x, b);
x = middle_2->forward(ctx, x, b, feat_cache, feat_idx); x = middle_2->forward(ctx, x, b, feat_cache, feat_idx);
// sd::ggml_graph_cut::mark_graph_cut(x, "wan_vae.encoder.mid", "x");
// head // head
x = head_0->forward(ctx, x); x = head_0->forward(ctx, x);
@ -863,11 +866,13 @@ namespace WAN {
} else { } else {
x = conv1->forward(ctx, x); x = conv1->forward(ctx, x);
} }
// sd::ggml_graph_cut::mark_graph_cut(x, "wan_vae.decoder.prelude", "x");
// middle // middle
x = middle_0->forward(ctx, x, b, feat_cache, feat_idx); x = middle_0->forward(ctx, x, b, feat_cache, feat_idx);
x = middle_1->forward(ctx, x, b); x = middle_1->forward(ctx, x, b);
x = middle_2->forward(ctx, x, b, feat_cache, feat_idx); x = middle_2->forward(ctx, x, b, feat_cache, feat_idx);
// sd::ggml_graph_cut::mark_graph_cut(x, "wan_vae.decoder.mid", "x");
// upsamples // upsamples
std::vector<int64_t> dims = {dim_mult[dim_mult.size() - 1] * dim}; std::vector<int64_t> dims = {dim_mult[dim_mult.size() - 1] * dim};
@ -893,6 +898,7 @@ namespace WAN {
x = layer->forward(ctx, x, b, feat_cache, feat_idx, chunk_idx); x = layer->forward(ctx, x, b, feat_cache, feat_idx, chunk_idx);
} }
} }
// sd::ggml_graph_cut::mark_graph_cut(x, "wan_vae.decoder.up." + std::to_string(i), "x");
} }
// head // head
@ -1031,6 +1037,7 @@ namespace WAN {
if (wan2_2) { if (wan2_2) {
x = patchify(ctx->ggml_ctx, x, 2, b); x = patchify(ctx->ggml_ctx, x, 2, b);
} }
// sd::ggml_graph_cut::mark_graph_cut(x, "wan_vae.encode.prelude", "x");
auto encoder = std::dynamic_pointer_cast<Encoder3d>(blocks["encoder"]); auto encoder = std::dynamic_pointer_cast<Encoder3d>(blocks["encoder"]);
auto conv1 = std::dynamic_pointer_cast<CausalConv3d>(blocks["conv1"]); auto conv1 = std::dynamic_pointer_cast<CausalConv3d>(blocks["conv1"]);
@ -1051,6 +1058,7 @@ namespace WAN {
} }
out = conv1->forward(ctx, out); out = conv1->forward(ctx, out);
auto mu = ggml_ext_chunk(ctx->ggml_ctx, out, 2, 3)[0]; auto mu = ggml_ext_chunk(ctx->ggml_ctx, out, 2, 3)[0];
// sd::ggml_graph_cut::mark_graph_cut(mu, "wan_vae.encode.final", "mu");
clear_cache(); clear_cache();
return mu; return mu;
} }
@ -1068,6 +1076,7 @@ namespace WAN {
int64_t iter_ = z->ne[2]; int64_t iter_ = z->ne[2];
auto x = conv2->forward(ctx, z); auto x = conv2->forward(ctx, z);
// sd::ggml_graph_cut::mark_graph_cut(x, "wan_vae.decode.prelude", "x");
ggml_tensor* out; ggml_tensor* out;
for (int i = 0; i < iter_; i++) { for (int i = 0; i < iter_; i++) {
_conv_idx = 0; _conv_idx = 0;
@ -1083,6 +1092,7 @@ namespace WAN {
if (wan2_2) { if (wan2_2) {
out = unpatchify(ctx->ggml_ctx, out, 2, b); out = unpatchify(ctx->ggml_ctx, out, 2, b);
} }
// sd::ggml_graph_cut::mark_graph_cut(out, "wan_vae.decode.final", "out");
clear_cache(); clear_cache();
return out; return out;
} }
@ -1098,12 +1108,14 @@ namespace WAN {
auto conv2 = std::dynamic_pointer_cast<CausalConv3d>(blocks["conv2"]); auto conv2 = std::dynamic_pointer_cast<CausalConv3d>(blocks["conv2"]);
auto x = conv2->forward(ctx, z); auto x = conv2->forward(ctx, z);
// sd::ggml_graph_cut::mark_graph_cut(x, "wan_vae.decode_partial.prelude", "x");
auto in = ggml_ext_slice(ctx->ggml_ctx, x, 2, i, i + 1); // [b*c, 1, h, w] auto in = ggml_ext_slice(ctx->ggml_ctx, x, 2, i, i + 1); // [b*c, 1, h, w]
_conv_idx = 0; _conv_idx = 0;
auto out = decoder->forward(ctx, in, b, _feat_map, _conv_idx, i); auto out = decoder->forward(ctx, in, b, _feat_map, _conv_idx, i);
if (wan2_2) { if (wan2_2) {
out = unpatchify(ctx->ggml_ctx, out, 2, b); out = unpatchify(ctx->ggml_ctx, out, 2, b);
} }
// sd::ggml_graph_cut::mark_graph_cut(out, "wan_vae.decode_partial.final", "out");
return out; return out;
} }
}; };
@ -1984,6 +1996,13 @@ namespace WAN {
c = ggml_reshape_3d(ctx->ggml_ctx, c, c->ne[0] * c->ne[1] * c->ne[2], c->ne[3] / N, N); // [N, dim, t_len*h_len*w_len] c = ggml_reshape_3d(ctx->ggml_ctx, c, c->ne[0] * c->ne[1] * c->ne[2], c->ne[3] / N, N); // [N, dim, t_len*h_len*w_len]
c = ggml_ext_cont(ctx->ggml_ctx, ggml_ext_torch_permute(ctx->ggml_ctx, c, 1, 0, 2, 3)); // [N, t_len*h_len*w_len, dim] c = ggml_ext_cont(ctx->ggml_ctx, ggml_ext_torch_permute(ctx->ggml_ctx, c, 1, 0, 2, 3)); // [N, t_len*h_len*w_len, dim]
} }
sd::ggml_graph_cut::mark_graph_cut(x, "wan.prelude", "x");
// sd::ggml_graph_cut::mark_graph_cut(e, "wan.prelude", "e");
// sd::ggml_graph_cut::mark_graph_cut(e0, "wan.prelude", "e0");
// sd::ggml_graph_cut::mark_graph_cut(context, "wan.prelude", "context");
if (c != nullptr) {
sd::ggml_graph_cut::mark_graph_cut(c, "wan.prelude", "c");
}
auto x_orig = x; auto x_orig = x;
@ -2004,6 +2023,10 @@ namespace WAN {
c_skip = ggml_ext_scale(ctx->ggml_ctx, c_skip, vace_strength); c_skip = ggml_ext_scale(ctx->ggml_ctx, c_skip, vace_strength);
x = ggml_add(ctx->ggml_ctx, x, c_skip); x = ggml_add(ctx->ggml_ctx, x, c_skip);
} }
sd::ggml_graph_cut::mark_graph_cut(x, "wan.blocks." + std::to_string(i), "x");
if (c != nullptr) {
sd::ggml_graph_cut::mark_graph_cut(c, "wan.blocks." + std::to_string(i), "c");
}
} }
x = head->forward(ctx, x, e); // [N, t_len*h_len*w_len, pt*ph*pw*out_dim] x = head->forward(ctx, x, e); // [N, t_len*h_len*w_len, pt*ph*pw*out_dim]

View File

@ -31,10 +31,6 @@ namespace ZImage {
: head_dim(head_dim), num_heads(num_heads), num_kv_heads(num_kv_heads), qk_norm(qk_norm) { : head_dim(head_dim), num_heads(num_heads), num_kv_heads(num_kv_heads), qk_norm(qk_norm) {
blocks["qkv"] = std::make_shared<Linear>(hidden_size, (num_heads + num_kv_heads * 2) * head_dim, false); blocks["qkv"] = std::make_shared<Linear>(hidden_size, (num_heads + num_kv_heads * 2) * head_dim, false);
float scale = 1.f; float scale = 1.f;
#if GGML_USE_HIP
// Prevent NaN issues with certain ROCm setups
scale = 1.f / 16.f;
#endif
blocks["out"] = std::make_shared<Linear>(num_heads * head_dim, hidden_size, false, false, false, scale); blocks["out"] = std::make_shared<Linear>(num_heads * head_dim, hidden_size, false, false, false, scale);
if (qk_norm) { if (qk_norm) {
blocks["q_norm"] = std::make_shared<RMSNorm>(head_dim); blocks["q_norm"] = std::make_shared<RMSNorm>(head_dim);
@ -52,6 +48,10 @@ namespace ZImage {
auto qkv_proj = std::dynamic_pointer_cast<Linear>(blocks["qkv"]); auto qkv_proj = std::dynamic_pointer_cast<Linear>(blocks["qkv"]);
auto out_proj = std::dynamic_pointer_cast<Linear>(blocks["out"]); auto out_proj = std::dynamic_pointer_cast<Linear>(blocks["out"]);
if (sd_backend_is(ctx->backend, "ROCm")) {
out_proj->set_scale(1.f / 16.f);
}
auto qkv = qkv_proj->forward(ctx, x); // [N, n_token, (num_heads + num_kv_heads*2)*head_dim] auto qkv = qkv_proj->forward(ctx, x); // [N, n_token, (num_heads + num_kv_heads*2)*head_dim]
qkv = ggml_reshape_4d(ctx->ggml_ctx, qkv, head_dim, num_heads + num_kv_heads * 2, qkv->ne[1], qkv->ne[2]); // [N, n_token, num_heads + num_kv_heads*2, head_dim] qkv = ggml_reshape_4d(ctx->ggml_ctx, qkv, head_dim, num_heads + num_kv_heads * 2, qkv->ne[1], qkv->ne[2]); // [N, n_token, num_heads + num_kv_heads*2, head_dim]
@ -115,9 +115,7 @@ namespace ZImage {
bool force_prec_f32 = false; bool force_prec_f32 = false;
float scale = 1.f / 128.f; float scale = 1.f / 128.f;
#ifdef SD_USE_VULKAN
force_prec_f32 = true;
#endif
// The purpose of the scale here is to prevent NaN issues in certain situations. // The purpose of the scale here is to prevent NaN issues in certain situations.
// For example, when using CUDA but the weights are k-quants. // For example, when using CUDA but the weights are k-quants.
blocks["w2"] = std::make_shared<Linear>(hidden_dim, dim, false, false, force_prec_f32, scale); blocks["w2"] = std::make_shared<Linear>(hidden_dim, dim, false, false, force_prec_f32, scale);
@ -129,6 +127,10 @@ namespace ZImage {
auto w2 = std::dynamic_pointer_cast<Linear>(blocks["w2"]); auto w2 = std::dynamic_pointer_cast<Linear>(blocks["w2"]);
auto w3 = std::dynamic_pointer_cast<Linear>(blocks["w3"]); auto w3 = std::dynamic_pointer_cast<Linear>(blocks["w3"]);
if (sd_backend_is(ctx->backend, "Vulkan")) {
w2->set_force_prec_f32(true);
}
auto x1 = w1->forward(ctx, x); auto x1 = w1->forward(ctx, x);
auto x3 = w3->forward(ctx, x); auto x3 = w3->forward(ctx, x);
x = ggml_swiglu_split(ctx->ggml_ctx, x1, x3); x = ggml_swiglu_split(ctx->ggml_ctx, x1, x3);
@ -369,6 +371,9 @@ namespace ZImage {
auto txt = cap_embedder_1->forward(ctx, cap_embedder_0->forward(ctx, context)); // [N, n_txt_token, hidden_size] auto txt = cap_embedder_1->forward(ctx, cap_embedder_0->forward(ctx, context)); // [N, n_txt_token, hidden_size]
auto img = x_embedder->forward(ctx, x); // [N, n_img_token, hidden_size] auto img = x_embedder->forward(ctx, x); // [N, n_img_token, hidden_size]
sd::ggml_graph_cut::mark_graph_cut(txt, "z_image.prelude", "txt");
sd::ggml_graph_cut::mark_graph_cut(img, "z_image.prelude", "img");
sd::ggml_graph_cut::mark_graph_cut(t_emb, "z_image.prelude", "t_emb");
int64_t n_txt_pad_token = Rope::bound_mod(static_cast<int>(n_txt_token), SEQ_MULTI_OF); int64_t n_txt_pad_token = Rope::bound_mod(static_cast<int>(n_txt_token), SEQ_MULTI_OF);
if (n_txt_pad_token > 0) { if (n_txt_pad_token > 0) {
@ -391,20 +396,24 @@ namespace ZImage {
auto block = std::dynamic_pointer_cast<JointTransformerBlock>(blocks["context_refiner." + std::to_string(i)]); auto block = std::dynamic_pointer_cast<JointTransformerBlock>(blocks["context_refiner." + std::to_string(i)]);
txt = block->forward(ctx, txt, txt_pe, nullptr, nullptr); txt = block->forward(ctx, txt, txt_pe, nullptr, nullptr);
sd::ggml_graph_cut::mark_graph_cut(txt, "z_image.context_refiner." + std::to_string(i), "txt");
} }
for (int i = 0; i < z_image_params.num_refiner_layers; i++) { for (int i = 0; i < z_image_params.num_refiner_layers; i++) {
auto block = std::dynamic_pointer_cast<JointTransformerBlock>(blocks["noise_refiner." + std::to_string(i)]); auto block = std::dynamic_pointer_cast<JointTransformerBlock>(blocks["noise_refiner." + std::to_string(i)]);
img = block->forward(ctx, img, img_pe, nullptr, t_emb); img = block->forward(ctx, img, img_pe, nullptr, t_emb);
sd::ggml_graph_cut::mark_graph_cut(img, "z_image.noise_refiner." + std::to_string(i), "img");
} }
auto txt_img = ggml_concat(ctx->ggml_ctx, txt, img, 1); // [N, n_txt_token + n_txt_pad_token + n_img_token + n_img_pad_token, hidden_size] auto txt_img = ggml_concat(ctx->ggml_ctx, txt, img, 1); // [N, n_txt_token + n_txt_pad_token + n_img_token + n_img_pad_token, hidden_size]
sd::ggml_graph_cut::mark_graph_cut(txt_img, "z_image.prelude", "txt_img");
for (int i = 0; i < z_image_params.num_layers; i++) { for (int i = 0; i < z_image_params.num_layers; i++) {
auto block = std::dynamic_pointer_cast<JointTransformerBlock>(blocks["layers." + std::to_string(i)]); auto block = std::dynamic_pointer_cast<JointTransformerBlock>(blocks["layers." + std::to_string(i)]);
txt_img = block->forward(ctx, txt_img, pe, nullptr, t_emb); txt_img = block->forward(ctx, txt_img, pe, nullptr, t_emb);
sd::ggml_graph_cut::mark_graph_cut(txt_img, "z_image.layers." + std::to_string(i), "txt_img");
} }
txt_img = final_layer->forward(ctx, txt_img, t_emb); // [N, n_txt_token + n_txt_pad_token + n_img_token + n_img_pad_token, ph*pw*C] txt_img = final_layer->forward(ctx, txt_img, t_emb); // [N, n_txt_token + n_txt_pad_token + n_img_token + n_img_pad_token, ph*pw*C]