Compare commits

...

20 Commits

Author SHA1 Message Date
leejet
90e87bc846
feat: add max-vram based segmented param offload (#1476) 2026-05-06 21:56:02 +08:00
Wagner Bruna
586b6f1481
feat: adapt res samplers for flow models for eta > 0 (#1436) 2026-05-06 21:49:06 +08:00
fszontagh
9097ce5211
fix: skip empty MultiLoraAdapter when no LoRAs target a model (#1469) 2026-05-06 21:45:47 +08:00
leejet
3d6064b37e
perf: speed up tensor_to_sd_image conversion (#1466) 2026-04-30 01:13:56 +08:00
Wagner Bruna
b8079e253d
feat: transition from compile-time to runtime backend discovery (#1448)
Co-authored-by: Stéphane du Hamel <stephduh@live.fr>
Co-authored-by: Cyberhan123 <255542417@qq.com>
Co-authored-by: leejet <leejet714@gmail.com>
2026-04-29 23:26:57 +08:00
Wagner Bruna
331cfa5387
fix: release VAE compute buffer after tiled encoding (#1465) 2026-04-29 22:25:30 +08:00
Douglas Griffith
a81677f59c
docs: performance tips markup (#1460) 2026-04-27 22:55:30 +08:00
leejet
f40a707d0f
feat: add sdcpp-specific generation metadata to image outputs (#1462) 2026-04-27 22:43:13 +08:00
akleine
970c4a3312
chore: replace some NULL with nullptr + use "%zu" for printing some size_t data (#1457) 2026-04-27 22:42:57 +08:00
leejet
b8bdffc199
feat: add more built-in highres upscalers (#1456) 2026-04-23 22:17:58 +08:00
leejet
c97702e105
feat: add sd-webui style Hires. fix support (#1451) 2026-04-22 23:51:09 +08:00
leejet
44cca3d626
feat: support safetensors export in convert mode (#1444) 2026-04-20 00:22:11 +08:00
leejet
0a7ae07f94
feat: add restricted torch legacy checkpoint loading (#1443) 2026-04-19 23:09:43 +08:00
leejet
66143340b6
refactor: move model file IO into dedicated module (#1442) 2026-04-19 17:52:56 +08:00
Wagner Bruna
7023fc4cfb
fix: correct image to image DDIM and TCD (#1410) 2026-04-19 17:51:28 +08:00
Wagner Bruna
e77e4c46bf
feat: adapt LCM for flow models (#1413) 2026-04-19 17:49:46 +08:00
leejet
7d33d4b2dd
chore: enable MSVC parallel compilation with /MP (#1438) 2026-04-18 15:44:43 +08:00
leejet
3c99f700de
ci: skip docker image build job on pull requests (#1439) 2026-04-18 15:25:04 +08:00
leejet
4d626d24b2
feat(server): implement vid_gen async API and mode-aware capabilities (#1437) 2026-04-18 15:06:36 +08:00
Wagner Bruna
f3f69e2fbe
feat: add DPM++ (2S) Ancestral implementation for flow models (#1428) 2026-04-18 15:05:09 +08:00
72 changed files with 7846 additions and 1969 deletions

View File

@ -176,6 +176,7 @@ jobs:
build-and-push-docker-images:
name: Build and push container images
if: ${{ github.event_name != 'pull_request' }}
runs-on: ubuntu-latest
permissions:

View File

@ -11,6 +11,10 @@ endif()
if (MSVC)
add_compile_definitions(_CRT_SECURE_NO_WARNINGS)
add_compile_definitions(_SILENCE_CXX17_CODECVT_HEADER_DEPRECATION_WARNING)
add_compile_options(
$<$<COMPILE_LANGUAGE:C>:/MP>
$<$<COMPILE_LANGUAGE:CXX>:/MP>
)
endif()
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
@ -68,37 +72,31 @@ option(SD_USE_SYSTEM_GGML "sd: use system-installed GGML library" OFF
if(SD_CUDA)
message("-- Use CUDA as backend stable-diffusion")
set(GGML_CUDA ON)
add_definitions(-DSD_USE_CUDA)
endif()
if(SD_METAL)
message("-- Use Metal as backend stable-diffusion")
set(GGML_METAL ON)
add_definitions(-DSD_USE_METAL)
endif()
if (SD_VULKAN)
message("-- Use Vulkan as backend stable-diffusion")
set(GGML_VULKAN ON)
add_definitions(-DSD_USE_VULKAN)
endif ()
if (SD_OPENCL)
message("-- Use OpenCL as backend stable-diffusion")
set(GGML_OPENCL ON)
add_definitions(-DSD_USE_OPENCL)
endif ()
if (SD_HIPBLAS)
message("-- Use HIPBLAS as backend stable-diffusion")
set(GGML_HIP ON)
add_definitions(-DSD_USE_CUDA)
endif ()
if(SD_MUSA)
message("-- Use MUSA as backend stable-diffusion")
set(GGML_MUSA ON)
add_definitions(-DSD_USE_CUDA)
endif()
if(SD_WEBP)
@ -152,10 +150,12 @@ endif()
set(SD_LIB stable-diffusion)
file(GLOB SD_LIB_SOURCES
file(GLOB SD_LIB_SOURCES CONFIGURE_DEPENDS
"src/*.h"
"src/*.cpp"
"src/*.hpp"
"src/model_io/*.h"
"src/model_io/*.cpp"
"src/tokenizers/*.h"
"src/tokenizers/*.cpp"
"src/tokenizers/vocab/*.h"
@ -216,7 +216,6 @@ if(SD_SYCL)
message("-- Use SYCL as backend stable-diffusion")
set(GGML_SYCL ON)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-narrowing -fsycl")
add_definitions(-DSD_USE_SYCL)
# disable fast-math on host, see:
# https://www.intel.com/content/www/us/en/docs/cpp-compiler/developer-guide-reference/2021-10/fp-model-fp.html
if (WIN32)

View File

@ -77,9 +77,10 @@ API and command-line option may change frequently.***
- OpenCL
- SYCL
- Supported weight formats
- Pytorch checkpoint (`.ckpt` or `.pth`)
- Pytorch checkpoint (`.ckpt` or `.pth` or `.pt`)
- Safetensors (`.safetensors`)
- GGUF (`.gguf`)
- Convert mode supports converting model weights to `.gguf` or `.safetensors`
- Supported platforms
- Linux
- Mac OS

View File

@ -131,8 +131,6 @@ sd-cli -m model.safetensors -p "a cat" --cache-mode spectrum
| `warmup` | Steps to always compute before caching starts | 4 |
| `stop` | Stop caching at this fraction of total steps | 0.9 |
```
### Performance Tips
- Start with default thresholds and adjust based on output quality

View File

@ -4,26 +4,29 @@
usage: ./bin/sd-cli [options]
CLI Options:
-o, --output <string> path to write result image to. you can use printf-style %d format specifiers for image sequences (default:
./output.png) (eg. output_%03d.png). For video generation, single-file outputs support .avi, .webm, and animated .webp
--preview-path <string> path to write preview image to (default: ./preview.png). Multi-frame previews support .avi, .webm, and animated .webp
--preview-interval <int> interval in denoising steps between consecutive updates of the image preview file (default is 1, meaning updating at
every step)
--output-begin-idx <int> starting index for output image sequence, must be non-negative (default 0 if specified %d in output path, 1 otherwise)
--image <string> path to the image to inspect (for metadata mode)
--metadata-format <string> metadata output format, one of [text, json] (default: text)
--canny apply canny preprocessor (edge detection)
--convert-name convert tensor name (for convert mode)
-v, --verbose print extra info
--color colors the logging tags according to level
--taesd-preview-only prevents usage of taesd for decoding the final image. (for use with --preview tae)
--preview-noisy enables previewing noisy inputs of the models rather than the denoised outputs
--metadata-raw include raw hex previews for unparsed metadata payloads
--metadata-brief truncate long metadata text values in text output
--metadata-all include structural/container entries such as IHDR, IDAT, and non-metadata JPEG segments
-M, --mode run mode, one of [img_gen, vid_gen, upscale, convert, metadata], default: img_gen
--preview preview method. must be one of the following [none, proj, tae, vae] (default is none)
-h, --help show this help message and exit
-o, --output <string> path to write result image to. you can use printf-style %d format specifiers for image
sequences (default: ./output.png) (eg. output_%03d.png). Single-file video outputs
support .avi, .webm, and animated .webp
--image <string> path to the image to inspect (for metadata mode)
--metadata-format <string> metadata output format, one of [text, json] (default: text)
--preview-path <string> path to write preview image to (default: ./preview.png). Multi-frame previews support
.avi, .webm, and animated .webp
--preview-interval <int> interval in denoising steps between consecutive updates of the image preview file
(default is 1, meaning updating at every step)
--output-begin-idx <int> starting index for output image sequence, must be non-negative (default 0 if specified
%d in output path, 1 otherwise)
--canny apply canny preprocessor (edge detection)
--convert-name convert tensor name (for convert mode)
-v, --verbose print extra info
--color colors the logging tags according to level
--taesd-preview-only prevents usage of taesd for decoding the final image. (for use with --preview tae)
--preview-noisy enables previewing noisy inputs of the models rather than the denoised outputs
--metadata-raw include raw hex previews for unparsed metadata payloads
--metadata-brief truncate long metadata text values in text output
--metadata-all include structural/container entries such as IHDR, IDAT, and non-metadata JPEG segments
-M, --mode run mode, one of [img_gen, vid_gen, upscale, convert, metadata], default: img_gen
--preview preview method. must be one of the following [none, proj, tae, vae] (default is none)
-h, --help show this help message and exit
Context Options:
-m, --model <string> path to full model
@ -31,7 +34,8 @@ Context Options:
--clip_g <string> path to the clip-g text encoder
--clip_vision <string> path to the clip-vision encoder
--t5xxl <string> path to the t5xxl text encoder
--llm <string> path to the llm text encoder. For example: (qwenvl2.5 for qwen-image, mistral-small3.2 for flux2, ...)
--llm <string> path to the llm text encoder. For example: (qwenvl2.5 for qwen-image,
mistral-small3.2 for flux2, ...)
--llm_vision <string> path to the llm vit
--qwen2vl <string> alias of --llm. Deprecated.
--qwen2vl_vision <string> alias of --llm_vision. Deprecated.
@ -43,16 +47,18 @@ Context Options:
--control-net <string> path to control net model
--embd-dir <string> embeddings directory
--lora-model-dir <string> lora model directory
--hires-upscalers-dir <string> highres fix upscaler model directory
--tensor-type-rules <string> weight type per tensor pattern (example: "^vae\.=f16,model\.=q8_0")
--photo-maker <string> path to PHOTOMAKER model
--upscale-model <string> path to esrgan model.
-t, --threads <int> number of threads to use during computation (default: -1). If threads <= 0, then threads will be set to the number of
CPU physical cores
-t, --threads <int> number of threads to use during computation (default: -1). If threads <= 0,
then threads will be set to the number of CPU physical cores
--chroma-t5-mask-pad <int> t5 mask pad size of chroma
--vae-tile-overlap <float> tile overlap for vae tiling, in fraction of tile size (default: 0.5)
--vae-tiling process vae in tiles to reduce memory usage
--max-vram <float> maximum VRAM budget in GiB for graph-cut segmented execution. 0 disables
graph splitting
--force-sdxl-vae-conv-scale force use of conv scale on sdxl vae
--offload-to-cpu place the weights in RAM to save VRAM, and automatically load them into VRAM when needed
--offload-to-cpu place the weights in RAM to save VRAM, and automatically load them into VRAM
when needed
--mmap whether to memory-map model
--control-net-cpu keep controlnet in cpu (for low vram)
--clip-on-cpu keep clip in cpu (for low vram)
@ -67,20 +73,19 @@ Context Options:
--chroma-disable-dit-mask disable dit mask for chroma
--qwen-image-zero-cond-t enable zero_cond_t for qwen image
--chroma-enable-t5-mask enable t5 mask for chroma
--type weight type (examples: f32, f16, q4_0, q4_1, q5_0, q5_1, q8_0, q2_K, q3_K, q4_K). If not specified, the default is the
type of the weight file
--type weight type (examples: f32, f16, q4_0, q4_1, q5_0, q5_1, q8_0, q2_K, q3_K,
q4_K). If not specified, the default is the type of the weight file
--rng RNG, one of [std_default, cuda, cpu], default: cuda(sd-webui), cpu(comfyui)
--sampler-rng sampler RNG, one of [std_default, cuda, cpu]. If not specified, use --rng
--prediction prediction type override, one of [eps, v, edm_v, sd3_flow, flux_flow, flux2_flow]
--lora-apply-mode the way to apply LoRA, one of [auto, immediately, at_runtime], default is auto. In auto mode, if the model weights
contain any quantized parameters, the at_runtime mode will be used; otherwise,
immediately will be used.The immediately mode may have precision and
compatibility issues with quantized parameters, but it usually offers faster inference
speed and, in some cases, lower memory usage. The at_runtime mode, on the
other hand, is exactly the opposite.
--vae-tile-size tile size for vae tiling, format [X]x[Y] (default: 32x32)
--vae-relative-tile-size relative tile size for vae tiling, format [X]x[Y], in fraction of image size if < 1, in number of tiles per dim if >=1
(overrides --vae-tile-size)
--prediction prediction type override, one of [eps, v, edm_v, sd3_flow, flux_flow,
flux2_flow]
--lora-apply-mode the way to apply LoRA, one of [auto, immediately, at_runtime], default is
auto. In auto mode, if the model weights contain any quantized parameters,
the at_runtime mode will be used; otherwise, immediately will be used.The
immediately mode may have precision and compatibility issues with quantized
parameters, but it usually offers faster inference speed and, in some cases,
lower memory usage. The at_runtime mode, on the other hand, is exactly the
opposite.
Generation Options:
-p, --prompt <string> the prompt to render
@ -89,69 +94,99 @@ Generation Options:
--end-img <string> path to the end image, required by flf2v
--mask <string> path to the mask image
--control-image <string> path to control image, control net
--control-video <string> path to control video frames, It must be a directory path. The video frames inside should be stored as images in
lexicographical (character) order. For example, if the control video path is
`frames`, the directory contain images such as 00.png, 01.png, ... etc.
--control-video <string> path to control video frames, It must be a directory path. The video frames
inside should be stored as images in lexicographical (character) order. For
example, if the control video path is `frames`, the directory contain images
such as 00.png, 01.png, ... etc.
--pm-id-images-dir <string> path to PHOTOMAKER input id images dir
--pm-id-embed-path <string> path to PHOTOMAKER v2 id embed
--hires-upscaler <string> highres fix upscaler, Lanczos, Nearest, Latent, Latent (nearest), Latent
(nearest-exact), Latent (antialiased), Latent (bicubic), Latent (bicubic
antialiased), or a model name under --hires-upscalers-dir (default: Latent)
-H, --height <int> image height, in pixel space (default: 512)
-W, --width <int> image width, in pixel space (default: 512)
--steps <int> number of sample steps (default: 20)
--high-noise-steps <int> (high noise) number of sample steps (default: -1 = auto)
--clip-skip <int> ignore last layers of CLIP network; 1 ignores none, 2 ignores one layer (default: -1). <= 0 represents unspecified,
will be 1 for SD1.x, 2 for SD2.x
--clip-skip <int> ignore last layers of CLIP network; 1 ignores none, 2 ignores one layer
(default: -1). <= 0 represents unspecified, will be 1 for SD1.x, 2 for SD2.x
-b, --batch-count <int> batch count
--video-frames <int> video frames (default: 1)
--fps <int> fps (default: 24)
--timestep-shift <int> shift timestep for NitroFusion models (default: 0). recommended N for NitroSD-Realism around 250 and 500 for
NitroSD-Vibrant
--timestep-shift <int> shift timestep for NitroFusion models (default: 0). recommended N for
NitroSD-Realism around 250 and 500 for NitroSD-Vibrant
--upscale-repeats <int> Run the ESRGAN upscaler this many times (default: 1)
--upscale-tile-size <int> tile size for ESRGAN upscaling (default: 128)
--hires-width <int> highres fix target width, 0 to use --hires-scale (default: 0)
--hires-height <int> highres fix target height, 0 to use --hires-scale (default: 0)
--hires-steps <int> highres fix second pass sample steps, 0 to reuse --steps (default: 0)
--hires-upscale-tile-size <int> highres fix upscaler tile size, reserved for model-backed upscalers (default:
128)
--cfg-scale <float> unconditional guidance scale: (default: 7.0)
--img-cfg-scale <float> image guidance scale for inpaint or instruct-pix2pix models: (default: same as --cfg-scale)
--img-cfg-scale <float> image guidance scale for inpaint or instruct-pix2pix models: (default: same
as --cfg-scale)
--guidance <float> distilled guidance scale for models with guidance input (default: 3.5)
--slg-scale <float> skip layer guidance (SLG) scale, only for DiT models: (default: 0). 0 means disabled, a value of 2.5 is nice for sd3.5
medium
--slg-scale <float> skip layer guidance (SLG) scale, only for DiT models: (default: 0). 0 means
disabled, a value of 2.5 is nice for sd3.5 medium
--skip-layer-start <float> SLG enabling point (default: 0.01)
--skip-layer-end <float> SLG disabling point (default: 0.2)
--eta <float> noise multiplier (default: 0 for ddim_trailing, tcd, res_multistep and res_2s; 1 for euler_a, er_sde and dpm++2s_a)
--eta <float> noise multiplier (default: 0 for ddim_trailing, tcd, res_multistep and
res_2s; 1 for euler_a, er_sde and dpm++2s_a)
--flow-shift <float> shift value for Flow models like SD3.x or WAN (default: auto)
--high-noise-cfg-scale <float> (high noise) unconditional guidance scale: (default: 7.0)
--high-noise-img-cfg-scale <float> (high noise) image guidance scale for inpaint or instruct-pix2pix models (default: same as --cfg-scale)
--high-noise-guidance <float> (high noise) distilled guidance scale for models with guidance input (default: 3.5)
--high-noise-slg-scale <float> (high noise) skip layer guidance (SLG) scale, only for DiT models: (default: 0)
--high-noise-img-cfg-scale <float> (high noise) image guidance scale for inpaint or instruct-pix2pix models
(default: same as --cfg-scale)
--high-noise-guidance <float> (high noise) distilled guidance scale for models with guidance input
(default: 3.5)
--high-noise-slg-scale <float> (high noise) skip layer guidance (SLG) scale, only for DiT models: (default:
0)
--high-noise-skip-layer-start <float> (high noise) SLG enabling point (default: 0.01)
--high-noise-skip-layer-end <float> (high noise) SLG disabling point (default: 0.2)
--high-noise-eta <float> (high noise) noise multiplier (default: 0 for ddim_trailing, tcd, res_multistep and res_2s; 1 for euler_a, er_sde and dpm++2s_a)
--high-noise-eta <float> (high noise) noise multiplier (default: 0 for ddim_trailing, tcd,
res_multistep and res_2s; 1 for euler_a, er_sde and dpm++2s_a)
--strength <float> strength for noising/unnoising (default: 0.75)
--pm-style-strength <float>
--control-strength <float> strength to apply Control Net (default: 0.9). 1.0 corresponds to full destruction of information in init image
--moe-boundary <float> timestep boundary for Wan2.2 MoE model. (default: 0.875). Only enabled if `--high-noise-steps` is set to -1
--pm-style-strength <float>
--control-strength <float> strength to apply Control Net (default: 0.9). 1.0 corresponds to full
destruction of information in init image
--moe-boundary <float> timestep boundary for Wan2.2 MoE model. (default: 0.875). Only enabled if
`--high-noise-steps` is set to -1
--vace-strength <float> wan vace strength
--increase-ref-index automatically increase the indices of references images based on the order they are listed (starting with 1).
--vae-tile-overlap <float> tile overlap for vae tiling, in fraction of tile size (default: 0.5)
--hires-scale <float> highres fix scale when target size is not set (default: 2.0)
--hires-denoising-strength <float> highres fix second pass denoising strength (default: 0.7)
--increase-ref-index automatically increase the indices of references images based on the order
they are listed (starting with 1).
--disable-auto-resize-ref-image disable auto resize of ref images
--disable-image-metadata do not embed generation metadata on image files
--vae-tiling process vae in tiles to reduce memory usage
--hires enable highres fix
-s, --seed RNG seed (default: 42, use random seed for < 0)
--sampling-method sampling method, one of [euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m, dpm++2mv2, ipndm, ipndm_v, lcm, ddim_trailing,
tcd, res_multistep, res_2s, er_sde] (default: euler for Flux/SD3/Wan, euler_a
otherwise)
--high-noise-sampling-method (high noise) sampling method, one of [euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m, dpm++2mv2, ipndm, ipndm_v, lcm,
ddim_trailing, tcd, res_multistep, res_2s, er_sde] default: euler for Flux/SD3/Wan,
euler_a otherwise
--scheduler denoiser sigma scheduler, one of [discrete, karras, exponential, ays, gits, smoothstep, sgm_uniform, simple,
kl_optimal, lcm, bong_tangent], default: discrete
--sigmas custom sigma values for the sampler, comma-separated (e.g., "14.61,7.8,3.5,0.0").
--sampling-method sampling method, one of [euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m,
dpm++2mv2, ipndm, ipndm_v, lcm, ddim_trailing, tcd, res_multistep, res_2s,
er_sde] (default: euler for Flux/SD3/Wan, euler_a otherwise)
--high-noise-sampling-method (high noise) sampling method, one of [euler, euler_a, heun, dpm2, dpm++2s_a,
dpm++2m, dpm++2mv2, ipndm, ipndm_v, lcm, ddim_trailing, tcd, res_multistep,
res_2s, er_sde] default: euler for Flux/SD3/Wan, euler_a otherwise
--scheduler denoiser sigma scheduler, one of [discrete, karras, exponential, ays, gits,
smoothstep, sgm_uniform, simple, kl_optimal, lcm, bong_tangent], default:
discrete
--sigmas custom sigma values for the sampler, comma-separated (e.g.,
"14.61,7.8,3.5,0.0").
--skip-layers layers to skip for SLG steps (default: [7,8,9])
--high-noise-skip-layers (high noise) layers to skip for SLG steps (default: [7,8,9])
-r, --ref-image reference image for Flux Kontext models (can be used multiple times)
--cache-mode caching method: 'easycache' (DiT), 'ucache' (UNET), 'dbcache'/'taylorseer'/'cache-dit' (DiT block-level),
'spectrum' (UNET/DiT Chebyshev+Taylor forecasting)
--cache-mode caching method: 'easycache' (DiT), 'ucache' (UNET),
'dbcache'/'taylorseer'/'cache-dit' (DiT block-level), 'spectrum' (UNET/DiT
Chebyshev+Taylor forecasting)
--cache-option named cache params (key=value format, comma-separated). easycache/ucache:
threshold=,start=,end=,decay=,relative=,reset=; dbcache/taylorseer/cache-dit: Fn=,Bn=,threshold=,warmup=;
spectrum: w=,m=,lam=,window=,flex=,warmup=,stop=. Examples:
"threshold=0.25" or "threshold=1.5,reset=0" or "w=0.4,window=2"
--scm-mask SCM steps mask for cache-dit: comma-separated 0/1 (e.g., "1,1,1,0,0,1,0,0,1,0") - 1=compute, 0=can cache
threshold=,start=,end=,decay=,relative=,reset=; dbcache/taylorseer/cache-dit:
Fn=,Bn=,threshold=,warmup=; spectrum: w=,m=,lam=,window=,flex=,warmup=,stop=.
Examples: "threshold=0.25" or "threshold=1.5,reset=0"
--scm-mask SCM steps mask for cache-dit: comma-separated 0/1 (e.g.,
"1,1,1,0,0,1,0,0,1,0") - 1=compute, 0=can cache
--scm-policy SCM policy: 'dynamic' (default) or 'static'
--vae-tile-size tile size for vae tiling, format [X]x[Y] (default: 32x32)
--vae-relative-tile-size relative tile size for vae tiling, format [X]x[Y], in fraction of image size
if < 1, in number of tiles per dim if >=1 (overrides --vae-tile-size)
```
Metadata mode inspects PNG/JPEG container metadata without loading any model:

View File

@ -278,7 +278,9 @@ void parse_args(int argc, const char** argv, SDCliParams& cli_params, SDContextP
bool valid = cli_params.resolve_and_validate();
if (valid && cli_params.mode != METADATA) {
valid = ctx_params.resolve_and_validate(cli_params.mode) &&
gen_params.resolve_and_validate(cli_params.mode, ctx_params.lora_model_dir);
gen_params.resolve_and_validate(cli_params.mode,
ctx_params.lora_model_dir,
ctx_params.hires_upscalers_dir);
}
if (!valid) {
@ -431,10 +433,11 @@ bool save_results(const SDCliParams& cli_params,
if (!img.data)
return false;
std::string params = gen_params.embed_image_metadata
? get_image_params(ctx_params, gen_params, gen_params.seed + idx)
: "";
const bool ok = write_image_to_file(path.string(), img.data, img.width, img.height, img.channel, params, 90);
const int64_t metadata_seed = cli_params.mode == VID_GEN ? gen_params.seed : gen_params.seed + idx;
std::string params = gen_params.embed_image_metadata
? get_image_params(ctx_params, gen_params, metadata_seed, cli_params.mode)
: "";
const bool ok = write_image_to_file(path.string(), img.data, img.width, img.height, img.channel, params, 90);
LOG_INFO("save result image %d to '%s' (%s)", idx, path.string().c_str(), ok ? "success" : "failure");
return ok;
};
@ -688,6 +691,13 @@ int main(int argc, const char* argv[]) {
vae_decode_only = false;
}
if (gen_params.hires_enabled &&
(gen_params.resolved_hires_upscaler == SD_HIRES_UPSCALER_MODEL ||
gen_params.resolved_hires_upscaler == SD_HIRES_UPSCALER_LANCZOS ||
gen_params.resolved_hires_upscaler == SD_HIRES_UPSCALER_NEAREST)) {
vae_decode_only = false;
}
sd_ctx_params_t sd_ctx_params = ctx_params.to_sd_ctx_params_t(vae_decode_only, true, cli_params.taesd_preview);
SDImageVec results;

View File

@ -107,47 +107,60 @@ static bool is_absolute_path(const std::string& p) {
std::string ArgOptions::wrap_text(const std::string& text, size_t width, size_t indent) {
std::ostringstream oss;
size_t line_len = 0;
size_t pos = 0;
size_t line_len = 0;
while (pos < text.size()) {
// Preserve manual newlines
if (text[pos] == '\n') {
oss << '\n'
<< std::string(indent, ' ');
line_len = indent;
line_len = 0;
++pos;
continue;
}
// Add the character
oss << text[pos];
++line_len;
++pos;
if (std::isspace(static_cast<unsigned char>(text[pos]))) {
++pos;
continue;
}
// If the current line exceeds width, try to break at the last space
if (line_len >= width) {
std::string current = oss.str();
size_t back = current.size();
size_t word_start = pos;
while (pos < text.size() &&
text[pos] != '\n' &&
!std::isspace(static_cast<unsigned char>(text[pos]))) {
++pos;
}
// Find the last space (for a clean break)
while (back > 0 && current[back - 1] != ' ' && current[back - 1] != '\n')
--back;
// If found a space to break on
if (back > 0 && current[back - 1] != '\n') {
std::string before = current.substr(0, back - 1);
std::string after = current.substr(back);
oss.str("");
oss.clear();
oss << before << "\n"
<< std::string(indent, ' ') << after;
} else {
// If no space found, just break at width
oss << "\n"
<< std::string(indent, ' ');
std::string word = text.substr(word_start, pos - word_start);
while (!word.empty()) {
size_t separator_len = line_len == 0 ? 0 : 1;
if (line_len + separator_len + word.size() <= width) {
if (separator_len > 0) {
oss << ' ';
++line_len;
}
oss << word;
line_len += word.size();
word.clear();
continue;
}
if (line_len > 0) {
oss << '\n'
<< std::string(indent, ' ');
line_len = 0;
continue;
}
size_t chunk_len = std::min(width, word.size());
oss << word.substr(0, chunk_len);
line_len = chunk_len;
word.erase(0, chunk_len);
if (!word.empty()) {
oss << '\n'
<< std::string(indent, ' ');
line_len = 0;
}
line_len = indent;
}
}
@ -351,7 +364,10 @@ ArgOptions SDContextParams::get_options() {
"--lora-model-dir",
"lora model directory",
&lora_model_dir},
{"",
"--hires-upscalers-dir",
"highres fix upscaler model directory",
&hires_upscalers_dir},
{"",
"--tensor-type-rules",
"weight type per tensor pattern (example: \"^vae\\.=f16,model\\.=q8_0\")",
@ -378,7 +394,12 @@ ArgOptions SDContextParams::get_options() {
&chroma_t5_mask_pad},
};
options.float_options = {};
options.float_options = {
{"",
"--max-vram",
"maximum VRAM budget in GiB for graph-cut segmented execution. 0 disables graph splitting",
&max_vram},
};
options.bool_options = {
{"",
@ -649,10 +670,12 @@ std::string SDContextParams::to_string() const {
<< " wtype: " << sd_type_name(wtype) << ",\n"
<< " tensor_type_rules: \"" << tensor_type_rules << "\",\n"
<< " lora_model_dir: \"" << lora_model_dir << "\",\n"
<< " hires_upscalers_dir: \"" << hires_upscalers_dir << "\",\n"
<< " photo_maker_path: \"" << photo_maker_path << "\",\n"
<< " rng_type: " << sd_rng_type_name(rng_type) << ",\n"
<< " sampler_rng_type: " << sd_rng_type_name(sampler_rng_type) << ",\n"
<< " offload_params_to_cpu: " << (offload_params_to_cpu ? "true" : "false") << ",\n"
<< " max_vram: " << max_vram << ",\n"
<< " enable_mmap: " << (enable_mmap ? "true" : "false") << ",\n"
<< " control_net_cpu: " << (control_net_cpu ? "true" : "false") << ",\n"
<< " clip_on_cpu: " << (clip_on_cpu ? "true" : "false") << ",\n"
@ -727,6 +750,7 @@ sd_ctx_params_t SDContextParams::to_sd_ctx_params_t(bool vae_decode_only, bool f
chroma_use_t5_mask,
chroma_t5_mask_pad,
qwen_image_zero_cond_t,
max_vram,
};
return sd_ctx_params;
}
@ -777,6 +801,12 @@ ArgOptions SDGenerationParams::get_options() {
"--pm-id-embed-path",
"path to PHOTOMAKER v2 id embed",
&pm_id_embed_path},
{"",
"--hires-upscaler",
"highres fix upscaler, Lanczos, Nearest, Latent, Latent (nearest), Latent (nearest-exact), "
"Latent (antialiased), Latent (bicubic), Latent (bicubic antialiased), or a model name "
"under --hires-upscalers-dir (default: Latent)",
&hires_upscaler},
};
options.int_options = {
@ -826,6 +856,22 @@ ArgOptions SDGenerationParams::get_options() {
"--upscale-tile-size",
"tile size for ESRGAN upscaling (default: 128)",
&upscale_tile_size},
{"",
"--hires-width",
"highres fix target width, 0 to use --hires-scale (default: 0)",
&hires_width},
{"",
"--hires-height",
"highres fix target height, 0 to use --hires-scale (default: 0)",
&hires_height},
{"",
"--hires-steps",
"highres fix second pass sample steps, 0 to reuse --steps (default: 0)",
&hires_steps},
{"",
"--hires-upscale-tile-size",
"highres fix upscaler tile size, reserved for model-backed upscalers (default: 128)",
&hires_upscale_tile_size},
};
options.float_options = {
@ -913,6 +959,14 @@ ArgOptions SDGenerationParams::get_options() {
"--vae-tile-overlap",
"tile overlap for vae tiling, in fraction of tile size (default: 0.5)",
&vae_tiling_params.target_overlap},
{"",
"--hires-scale",
"highres fix scale when target size is not set (default: 2.0)",
&hires_scale},
{"",
"--hires-denoising-strength",
"highres fix second pass denoising strength (default: 0.7)",
&hires_denoising_strength},
};
options.bool_options = {
@ -936,6 +990,11 @@ ArgOptions SDGenerationParams::get_options() {
"process vae in tiles to reduce memory usage",
true,
&vae_tiling_params.enabled},
{"",
"--hires",
"enable highres fix",
true,
&hires_enabled},
};
auto on_seed_arg = [&](int argc, const char** argv, int index) {
@ -1424,6 +1483,37 @@ static bool parse_lora_json_field(const json& parent,
return true;
}
static bool resolve_model_file_from_dir(const std::string& model_name,
const std::string& model_dir,
const std::vector<std::string>& valid_ext,
const char* label,
std::string& resolved_path) {
if (model_dir.empty()) {
LOG_ERROR("%s directory is empty", label);
return false;
}
if (model_name.empty() ||
model_name.find('/') != std::string::npos ||
model_name.find('\\') != std::string::npos ||
fs::path(model_name).has_root_path() ||
fs::path(model_name).has_extension()) {
LOG_ERROR("%s must be a model name without path or extension: %s", label, model_name.c_str());
return false;
}
fs::path model_dir_path = model_dir;
for (const auto& ext : valid_ext) {
fs::path try_path = model_dir_path / (model_name + ext);
if (fs::exists(try_path) && fs::is_regular_file(try_path)) {
resolved_path = try_path.lexically_normal().string();
return true;
}
}
LOG_ERROR("can not find %s %s in %s", label, model_name.c_str(), model_dir_path.lexically_normal().string().c_str());
return false;
}
bool SDGenerationParams::from_json_str(
const std::string& json_str,
const std::function<std::string(const std::string&)>& lora_path_resolver) {
@ -1487,6 +1577,34 @@ bool SDGenerationParams::from_json_str(
load_if_exists("increase_ref_index", increase_ref_index);
load_if_exists("embed_image_metadata", embed_image_metadata);
if (j.contains("hires") && j["hires"].is_object()) {
const json& hires_json = j["hires"];
if (hires_json.contains("enabled") && hires_json["enabled"].is_boolean()) {
hires_enabled = hires_json["enabled"];
}
if (hires_json.contains("upscaler") && hires_json["upscaler"].is_string()) {
hires_upscaler = hires_json["upscaler"];
}
if (hires_json.contains("scale") && hires_json["scale"].is_number()) {
hires_scale = hires_json["scale"];
}
if (hires_json.contains("target_width") && hires_json["target_width"].is_number_integer()) {
hires_width = hires_json["target_width"];
}
if (hires_json.contains("target_height") && hires_json["target_height"].is_number_integer()) {
hires_height = hires_json["target_height"];
}
if (hires_json.contains("steps") && hires_json["steps"].is_number_integer()) {
hires_steps = hires_json["steps"];
}
if (hires_json.contains("denoising_strength") && hires_json["denoising_strength"].is_number()) {
hires_denoising_strength = hires_json["denoising_strength"];
}
if (hires_json.contains("upscale_tile_size") && hires_json["upscale_tile_size"].is_number_integer()) {
hires_upscale_tile_size = hires_json["upscale_tile_size"];
}
}
auto parse_sample_params_json = [&](const json& sample_json,
sd_sample_params_t& target_params,
std::vector<int>& target_skip_layers,
@ -1589,10 +1707,18 @@ bool SDGenerationParams::from_json_str(
LOG_ERROR("invalid init_image");
return false;
}
if (!parse_image_json_field(j, "end_image", 3, width, height, end_image)) {
LOG_ERROR("invalid end_image");
return false;
}
if (!parse_image_array_json_field(j, "ref_images", 3, width, height, ref_images)) {
LOG_ERROR("invalid ref_images");
return false;
}
if (!parse_image_array_json_field(j, "control_frames", 3, width, height, control_frames)) {
LOG_ERROR("invalid control_frames");
return false;
}
if (!parse_image_json_field(j, "mask_image", 1, width, height, mask_image)) {
LOG_ERROR("invalid mask_image");
return false;
@ -1792,7 +1918,7 @@ bool SDGenerationParams::initialize_cache_params() {
return true;
}
bool SDGenerationParams::resolve(const std::string& lora_model_dir, bool strict) {
bool SDGenerationParams::resolve(const std::string& lora_model_dir, const std::string& hires_upscalers_dir, bool strict) {
if (high_noise_sample_params.sample_steps <= 0) {
high_noise_sample_params.sample_steps = -1;
}
@ -1811,6 +1937,27 @@ bool SDGenerationParams::resolve(const std::string& lora_model_dir, bool strict)
sample_params.sample_steps = std::clamp(sample_params.sample_steps, 1, 100);
}
hires_upscaler_model_path.clear();
if (hires_enabled) {
if (hires_upscaler.empty()) {
hires_upscaler = "Latent";
}
resolved_hires_upscaler = str_to_sd_hires_upscaler(hires_upscaler.c_str());
if (resolved_hires_upscaler == SD_HIRES_UPSCALER_NONE) {
hires_enabled = false;
} else if (resolved_hires_upscaler == SD_HIRES_UPSCALER_COUNT) {
static const std::vector<std::string> valid_ext = {".gguf", ".safetensors", ".pt", ".pth"};
if (!resolve_model_file_from_dir(hires_upscaler,
hires_upscalers_dir,
valid_ext,
"hires upscaler",
hires_upscaler_model_path)) {
return false;
}
resolved_hires_upscaler = SD_HIRES_UPSCALER_MODEL;
}
}
prompt_with_lora = prompt;
if (!lora_model_dir.empty()) {
extract_and_remove_lora(lora_model_dir);
@ -1875,6 +2022,29 @@ bool SDGenerationParams::validate(SDMode mode) {
return false;
}
if (hires_enabled) {
if (hires_width < 0 || hires_height < 0) {
LOG_ERROR("error: hires target width and height must be >= 0");
return false;
}
if (hires_scale <= 0.f && hires_width <= 0 && hires_height <= 0) {
LOG_ERROR("error: hires scale must be positive when target size is not set");
return false;
}
if (hires_steps < 0) {
LOG_ERROR("error: hires steps must be >= 0");
return false;
}
if (hires_denoising_strength <= 0.f || hires_denoising_strength > 1.f) {
LOG_ERROR("error: hires denoising strength must be in (0.0, 1.0]");
return false;
}
if (hires_upscale_tile_size < 1) {
LOG_ERROR("error: hires upscale tile size must be positive");
return false;
}
}
if (mode == UPSCALE) {
if (init_image_path.length() == 0) {
LOG_ERROR("error: upscale mode needs an init image (--init-img)\n");
@ -1885,8 +2055,11 @@ bool SDGenerationParams::validate(SDMode mode) {
return true;
}
bool SDGenerationParams::resolve_and_validate(SDMode mode, const std::string& lora_model_dir, bool strict) {
if (!resolve(lora_model_dir, strict)) {
bool SDGenerationParams::resolve_and_validate(SDMode mode,
const std::string& lora_model_dir,
const std::string& hires_upscalers_dir,
bool strict) {
if (!resolve(lora_model_dir, hires_upscalers_dir, strict)) {
return false;
}
if (!validate(mode)) {
@ -1957,6 +2130,16 @@ sd_img_gen_params_t SDGenerationParams::to_sd_img_gen_params_t() {
params.pm_params = pm_params;
params.vae_tiling_params = vae_tiling_params;
params.cache = cache_params;
params.hires.enabled = hires_enabled;
params.hires.upscaler = resolved_hires_upscaler;
params.hires.model_path = hires_upscaler_model_path.empty() ? nullptr : hires_upscaler_model_path.c_str();
params.hires.scale = hires_scale;
params.hires.target_width = hires_width;
params.hires.target_height = hires_height;
params.hires.steps = hires_steps;
params.hires.denoising_strength = hires_denoising_strength;
params.hires.upscale_tile_size = hires_upscale_tile_size;
return params;
}
@ -2081,6 +2264,15 @@ std::string SDGenerationParams::to_string() const {
<< " seed: " << seed << ",\n"
<< " upscale_repeats: " << upscale_repeats << ",\n"
<< " upscale_tile_size: " << upscale_tile_size << ",\n"
<< " hires: { enabled: " << (hires_enabled ? "true" : "false")
<< ", upscaler: \"" << hires_upscaler << "\""
<< ", model_path: \"" << hires_upscaler_model_path << "\""
<< ", scale: " << hires_scale
<< ", target_width: " << hires_width
<< ", target_height: " << hires_height
<< ", steps: " << hires_steps
<< ", denoising_strength: " << hires_denoising_strength
<< ", upscale_tile_size: " << hires_upscale_tile_size << " },\n"
<< " vae_tiling_params: { "
<< vae_tiling_params.enabled << ", "
<< vae_tiling_params.tile_size_x << ", "
@ -2096,7 +2288,192 @@ std::string version_string() {
return std::string("stable-diffusion.cpp version ") + sd_version() + ", commit " + sd_commit();
}
std::string get_image_params(const SDContextParams& ctx_params, const SDGenerationParams& gen_params, int64_t seed) {
static std::string safe_json_string(const char* value) {
return value ? value : "";
}
static void set_json_basename_if_not_empty(json& target, const char* key, const std::string& path) {
if (!path.empty()) {
target[key] = sd_basename(path);
}
}
static json build_sampling_metadata_json(const sd_sample_params_t& sample_params,
const std::vector<int>& skip_layers,
const std::vector<float>* custom_sigmas = nullptr) {
json sampling = {
{"steps", sample_params.sample_steps},
{"eta", sample_params.eta},
{"shifted_timestep", sample_params.shifted_timestep},
{"flow_shift", sample_params.flow_shift},
{"guidance",
{
{"txt_cfg", sample_params.guidance.txt_cfg},
{"img_cfg", sample_params.guidance.img_cfg},
{"distilled_guidance", sample_params.guidance.distilled_guidance},
{"slg",
{
{"scale", sample_params.guidance.slg.scale},
{"layers", skip_layers},
{"start", sample_params.guidance.slg.layer_start},
{"end", sample_params.guidance.slg.layer_end},
}},
}},
};
if (sample_params.sample_method != SAMPLE_METHOD_COUNT) {
sampling["method"] = safe_json_string(sd_sample_method_name(sample_params.sample_method));
}
if (sample_params.scheduler != SCHEDULER_COUNT) {
sampling["scheduler"] = safe_json_string(sd_scheduler_name(sample_params.scheduler));
}
if (custom_sigmas != nullptr) {
sampling["custom_sigmas"] = *custom_sigmas;
}
return sampling;
}
std::string build_sdcpp_image_metadata_json(const SDContextParams& ctx_params,
const SDGenerationParams& gen_params,
int64_t seed,
SDMode mode) {
json root;
root["schema"] = "sdcpp.image.params/v1";
root["mode"] = mode == VID_GEN ? "vid_gen" : "img_gen";
root["generator"] = {
{"name", "stable-diffusion.cpp"},
{"version", safe_json_string(sd_version())},
{"commit", safe_json_string(sd_commit())},
};
root["seed"] = seed;
root["width"] = gen_params.get_resolved_width();
root["height"] = gen_params.get_resolved_height();
root["prompt"] = {
{"positive", gen_params.prompt},
{"negative", gen_params.negative_prompt},
};
root["sampling"] = build_sampling_metadata_json(gen_params.sample_params,
gen_params.skip_layers,
&gen_params.custom_sigmas);
json models;
set_json_basename_if_not_empty(models, "model", ctx_params.model_path);
set_json_basename_if_not_empty(models, "clip_l", ctx_params.clip_l_path);
set_json_basename_if_not_empty(models, "clip_g", ctx_params.clip_g_path);
set_json_basename_if_not_empty(models, "clip_vision", ctx_params.clip_vision_path);
set_json_basename_if_not_empty(models, "t5xxl", ctx_params.t5xxl_path);
set_json_basename_if_not_empty(models, "llm", ctx_params.llm_path);
set_json_basename_if_not_empty(models, "llm_vision", ctx_params.llm_vision_path);
set_json_basename_if_not_empty(models, "diffusion_model", ctx_params.diffusion_model_path);
set_json_basename_if_not_empty(models, "high_noise_diffusion_model", ctx_params.high_noise_diffusion_model_path);
set_json_basename_if_not_empty(models, "vae", ctx_params.vae_path);
set_json_basename_if_not_empty(models, "taesd", ctx_params.taesd_path);
set_json_basename_if_not_empty(models, "control_net", ctx_params.control_net_path);
root["models"] = std::move(models);
root["clip_skip"] = gen_params.clip_skip;
root["strength"] = gen_params.strength;
root["control_strength"] = gen_params.control_strength;
root["auto_resize_ref_image"] = gen_params.auto_resize_ref_image;
root["increase_ref_index"] = gen_params.increase_ref_index;
if (mode == VID_GEN) {
root["video"] = {
{"frame_count", gen_params.video_frames},
{"fps", gen_params.fps},
};
root["moe_boundary"] = gen_params.moe_boundary;
root["vace_strength"] = gen_params.vace_strength;
root["high_noise_sampling"] = build_sampling_metadata_json(gen_params.high_noise_sample_params,
gen_params.high_noise_skip_layers);
}
root["rng"] = safe_json_string(sd_rng_type_name(ctx_params.rng_type));
if (ctx_params.sampler_rng_type != RNG_TYPE_COUNT) {
root["sampler_rng"] = safe_json_string(sd_rng_type_name(ctx_params.sampler_rng_type));
}
json loras = json::array();
for (const auto& entry : gen_params.lora_map) {
loras.push_back({
{"name", sd_basename(entry.first)},
{"multiplier", entry.second},
{"is_high_noise", false},
});
}
for (const auto& entry : gen_params.high_noise_lora_map) {
loras.push_back({
{"name", sd_basename(entry.first)},
{"multiplier", entry.second},
{"is_high_noise", true},
});
}
if (!loras.empty()) {
root["loras"] = std::move(loras);
}
if (gen_params.hires_enabled) {
root["hires"] = {
{"enabled", gen_params.hires_enabled},
{"upscaler", gen_params.hires_upscaler},
{"model", gen_params.hires_upscaler_model_path.empty() ? "" : sd_basename(gen_params.hires_upscaler_model_path)},
{"scale", gen_params.hires_scale},
{"target_width", gen_params.hires_width},
{"target_height", gen_params.hires_height},
{"steps", gen_params.hires_steps},
{"denoising_strength", gen_params.hires_denoising_strength},
{"upscale_tile_size", gen_params.hires_upscale_tile_size},
};
}
if (gen_params.cache_params.mode != SD_CACHE_DISABLED) {
root["cache"] = {
{"requested_mode", gen_params.cache_mode},
{"requested_option", gen_params.cache_option},
{"mode", gen_params.cache_params.mode},
{"scm_mask", gen_params.scm_mask},
{"scm_policy_dynamic", gen_params.scm_policy_dynamic},
{"reuse_threshold", gen_params.cache_params.reuse_threshold},
{"start_percent", gen_params.cache_params.start_percent},
{"end_percent", gen_params.cache_params.end_percent},
{"error_decay_rate", gen_params.cache_params.error_decay_rate},
{"use_relative_threshold", gen_params.cache_params.use_relative_threshold},
{"reset_error_on_compute", gen_params.cache_params.reset_error_on_compute},
{"Fn_compute_blocks", gen_params.cache_params.Fn_compute_blocks},
{"Bn_compute_blocks", gen_params.cache_params.Bn_compute_blocks},
{"residual_diff_threshold", gen_params.cache_params.residual_diff_threshold},
{"max_warmup_steps", gen_params.cache_params.max_warmup_steps},
{"max_cached_steps", gen_params.cache_params.max_cached_steps},
{"max_continuous_cached_steps", gen_params.cache_params.max_continuous_cached_steps},
{"taylorseer_n_derivatives", gen_params.cache_params.taylorseer_n_derivatives},
{"taylorseer_skip_interval", gen_params.cache_params.taylorseer_skip_interval},
{"spectrum_w", gen_params.cache_params.spectrum_w},
{"spectrum_m", gen_params.cache_params.spectrum_m},
{"spectrum_lam", gen_params.cache_params.spectrum_lam},
{"spectrum_window_size", gen_params.cache_params.spectrum_window_size},
{"spectrum_flex_window", gen_params.cache_params.spectrum_flex_window},
{"spectrum_warmup_steps", gen_params.cache_params.spectrum_warmup_steps},
{"spectrum_stop_percent", gen_params.cache_params.spectrum_stop_percent},
};
}
if (gen_params.vae_tiling_params.enabled) {
root["vae_tiling"] = {
{"enabled", gen_params.vae_tiling_params.enabled},
{"tile_size_x", gen_params.vae_tiling_params.tile_size_x},
{"tile_size_y", gen_params.vae_tiling_params.tile_size_y},
{"target_overlap", gen_params.vae_tiling_params.target_overlap},
{"rel_size_x", gen_params.vae_tiling_params.rel_size_x},
{"rel_size_y", gen_params.vae_tiling_params.rel_size_y},
};
}
return root.dump();
}
std::string get_image_params(const SDContextParams& ctx_params,
const SDGenerationParams& gen_params,
int64_t seed,
SDMode mode) {
std::string parameter_string;
if (gen_params.prompt_with_lora.size() != 0) {
parameter_string += gen_params.prompt_with_lora + "\n";
@ -2109,7 +2486,7 @@ std::string get_image_params(const SDContextParams& ctx_params, const SDGenerati
parameter_string += "Steps: " + std::to_string(gen_params.sample_params.sample_steps) + ", ";
parameter_string += "CFG scale: " + std::to_string(gen_params.sample_params.guidance.txt_cfg) + ", ";
if (gen_params.sample_params.guidance.slg.scale != 0 && gen_params.skip_layers.size() != 0) {
parameter_string += "SLG scale: " + std::to_string(gen_params.sample_params.guidance.txt_cfg) + ", ";
parameter_string += "SLG scale: " + std::to_string(gen_params.sample_params.guidance.slg.scale) + ", ";
parameter_string += "Skip layers: [";
for (const auto& layer : gen_params.skip_layers) {
parameter_string += std::to_string(layer) + ", ";
@ -2154,6 +2531,14 @@ std::string get_image_params(const SDContextParams& ctx_params, const SDGenerati
if (gen_params.clip_skip != -1) {
parameter_string += "Clip skip: " + std::to_string(gen_params.clip_skip) + ", ";
}
if (gen_params.hires_enabled) {
parameter_string += "Hires upscale: " + gen_params.hires_upscaler + ", ";
parameter_string += "Hires scale: " + std::to_string(gen_params.hires_scale) + ", ";
parameter_string += "Hires resize: " + std::to_string(gen_params.hires_width) + "x" + std::to_string(gen_params.hires_height) + ", ";
parameter_string += "Hires steps: " + std::to_string(gen_params.hires_steps) + ", ";
parameter_string += "Denoising strength: " + std::to_string(gen_params.hires_denoising_strength) + ", ";
}
parameter_string += "Version: stable-diffusion.cpp";
parameter_string += ", SDCPP: " + build_sdcpp_image_metadata_json(ctx_params, gen_params, seed, mode);
return parameter_string;
}

View File

@ -101,6 +101,7 @@ struct SDContextParams {
sd_type_t wtype = SD_TYPE_COUNT;
std::string tensor_type_rules;
std::string lora_model_dir = ".";
std::string hires_upscalers_dir;
std::map<std::string, std::string> embedding_map;
std::vector<sd_embedding_t> embedding_vec;
@ -108,6 +109,7 @@ struct SDContextParams {
rng_type_t rng_type = CUDA_RNG;
rng_type_t sampler_rng_type = RNG_TYPE_COUNT;
bool offload_params_to_cpu = false;
float max_vram = 0.f;
bool enable_mmap = false;
bool control_net_cpu = false;
bool clip_on_cpu = false;
@ -190,12 +192,23 @@ struct SDGenerationParams {
int upscale_repeats = 1;
int upscale_tile_size = 128;
bool hires_enabled = false;
std::string hires_upscaler = "Latent";
std::string hires_upscaler_model_path;
float hires_scale = 2.f;
int hires_width = 0;
int hires_height = 0;
int hires_steps = 0;
float hires_denoising_strength = 0.7f;
int hires_upscale_tile_size = 128;
std::map<std::string, float> lora_map;
std::map<std::string, float> high_noise_lora_map;
// Derived and normalized fields.
std::string prompt_with_lora; // for metadata record only
std::vector<sd_lora_t> lora_vec;
sd_hires_upscaler_t resolved_hires_upscaler;
// Owned execution payload.
SDImageOwner init_image;
@ -225,15 +238,25 @@ struct SDGenerationParams {
void set_width_and_height_if_unset(int w, int h);
int get_resolved_width() const;
int get_resolved_height() const;
bool resolve(const std::string& lora_model_dir, bool strict = false);
bool resolve(const std::string& lora_model_dir, const std::string& hires_upscalers_dir, bool strict = false);
bool validate(SDMode mode);
bool resolve_and_validate(SDMode mode, const std::string& lora_model_dir, bool strict = false);
bool resolve_and_validate(SDMode mode,
const std::string& lora_model_dir,
const std::string& hires_upscalers_dir,
bool strict = false);
sd_img_gen_params_t to_sd_img_gen_params_t();
sd_vid_gen_params_t to_sd_vid_gen_params_t();
std::string to_string() const;
};
std::string version_string();
std::string get_image_params(const SDContextParams& ctx_params, const SDGenerationParams& gen_params, int64_t seed);
std::string build_sdcpp_image_metadata_json(const SDContextParams& ctx_params,
const SDGenerationParams& gen_params,
int64_t seed,
SDMode mode = IMG_GEN);
std::string get_image_params(const SDContextParams& ctx_params,
const SDGenerationParams& gen_params,
int64_t seed,
SDMode mode = IMG_GEN);
#endif // __EXAMPLES_COMMON_COMMON_H__

View File

@ -95,6 +95,57 @@ using WebPMuxPtr = std::unique_ptr<WebPMux, WebPMuxDeleter>;
using WebPAnimEncoderPtr = std::unique_ptr<WebPAnimEncoder, WebPAnimEncoderDeleter>;
#endif
#ifdef SD_USE_WEBM
class MemoryMkvWriter : public mkvmuxer::IMkvWriter {
public:
mkvmuxer::int32 Write(const void* buf, mkvmuxer::uint32 len) override {
if (buf == nullptr && len > 0) {
return -1;
}
const size_t end_pos = position_ + static_cast<size_t>(len);
if (end_pos > data_.size()) {
data_.resize(end_pos);
}
if (len > 0) {
memcpy(data_.data() + position_, buf, len);
}
position_ = end_pos;
return 0;
}
mkvmuxer::int64 Position() const override {
return static_cast<mkvmuxer::int64>(position_);
}
mkvmuxer::int32 Position(mkvmuxer::int64 position) override {
if (position < 0) {
return -1;
}
const size_t target = static_cast<size_t>(position);
if (target > data_.size()) {
data_.resize(target);
}
position_ = target;
return 0;
}
bool Seekable() const override {
return true;
}
void ElementStartNotify(mkvmuxer::uint64, mkvmuxer::int64) override {
}
const std::vector<uint8_t>& data() const {
return data_;
}
private:
std::vector<uint8_t> data_;
size_t position_ = 0;
};
#endif
bool read_binary_file_bytes(const char* path, std::vector<uint8_t>& data) {
std::ifstream fin(fs::path(path), std::ios::binary);
if (!fin) {
@ -570,6 +621,32 @@ void write_u16_le(FILE* f, uint16_t val) {
fwrite(&val, 2, 1, f);
}
void write_u32_le(std::vector<uint8_t>& data, uint32_t val) {
data.push_back(static_cast<uint8_t>(val & 0xFF));
data.push_back(static_cast<uint8_t>((val >> 8) & 0xFF));
data.push_back(static_cast<uint8_t>((val >> 16) & 0xFF));
data.push_back(static_cast<uint8_t>((val >> 24) & 0xFF));
}
void write_u16_le(std::vector<uint8_t>& data, uint16_t val) {
data.push_back(static_cast<uint8_t>(val & 0xFF));
data.push_back(static_cast<uint8_t>((val >> 8) & 0xFF));
}
void patch_u32_le(std::vector<uint8_t>& data, size_t offset, uint32_t val) {
if (offset + 4 > data.size()) {
return;
}
data[offset + 0] = static_cast<uint8_t>(val & 0xFF);
data[offset + 1] = static_cast<uint8_t>((val >> 8) & 0xFF);
data[offset + 2] = static_cast<uint8_t>((val >> 16) & 0xFF);
data[offset + 3] = static_cast<uint8_t>((val >> 24) & 0xFF);
}
void write_fourcc(std::vector<uint8_t>& data, const char* fourcc) {
data.insert(data.end(), fourcc, fourcc + 4);
}
EncodedImageFormat encoded_image_format_from_path(const std::string& path) {
std::string ext = fs::path(path).extension().string();
std::transform(ext.begin(), ext.end(), ext.begin(), ::tolower);
@ -699,95 +776,96 @@ uint8_t* load_image_from_memory(const char* image_bytes,
return load_image_common(true, image_bytes, len, width, height, expected_width, expected_height, expected_channel);
}
int create_mjpg_avi_from_sd_images(const char* filename, sd_image_t* images, int num_images, int fps, int quality) {
std::vector<uint8_t> create_mjpg_avi_from_sd_images_to_vector(sd_image_t* images, int num_images, int fps, int quality) {
if (num_images == 0) {
fprintf(stderr, "Error: Image array is empty.\n");
return -1;
return {};
}
FilePtr file(fopen(filename, "wb"));
if (!file) {
perror("Error opening file for writing");
return -1;
}
FILE* f = file.get();
uint32_t width = images[0].width;
uint32_t height = images[0].height;
uint32_t channels = images[0].channel;
if (channels != 3 && channels != 4) {
fprintf(stderr, "Error: Unsupported channel count: %u\n", channels);
return -1;
return {};
}
fwrite("RIFF", 4, 1, f);
long riff_size_pos = ftell(f);
write_u32_le(f, 0);
fwrite("AVI ", 4, 1, f);
// stb_image_write changes JPEG sampling behavior above quality 90.
// MJPG AVI playback is more compatible when we keep the encoder on the
// <= 90 path.
const int mjpg_quality = std::clamp(quality, 1, 90);
fwrite("LIST", 4, 1, f);
write_u32_le(f, 4 + 8 + 56 + 8 + 4 + 8 + 56 + 8 + 40);
fwrite("hdrl", 4, 1, f);
std::vector<uint8_t> avi_data;
avi_data.reserve(static_cast<size_t>(num_images) * 1024);
fwrite("avih", 4, 1, f);
write_u32_le(f, 56);
write_u32_le(f, 1000000 / fps);
write_u32_le(f, 0);
write_u32_le(f, 0);
write_u32_le(f, 0x110);
write_u32_le(f, num_images);
write_u32_le(f, 0);
write_u32_le(f, 1);
write_u32_le(f, width * height * 3);
write_u32_le(f, width);
write_u32_le(f, height);
write_u32_le(f, 0);
write_u32_le(f, 0);
write_u32_le(f, 0);
write_u32_le(f, 0);
write_fourcc(avi_data, "RIFF");
const size_t riff_size_pos = avi_data.size();
write_u32_le(avi_data, 0);
write_fourcc(avi_data, "AVI ");
fwrite("LIST", 4, 1, f);
write_u32_le(f, 4 + 8 + 56 + 8 + 40);
fwrite("strl", 4, 1, f);
write_fourcc(avi_data, "LIST");
write_u32_le(avi_data, 4 + 8 + 56 + 8 + 4 + 8 + 56 + 8 + 40);
write_fourcc(avi_data, "hdrl");
fwrite("strh", 4, 1, f);
write_u32_le(f, 56);
fwrite("vids", 4, 1, f);
fwrite("MJPG", 4, 1, f);
write_u32_le(f, 0);
write_u16_le(f, 0);
write_u16_le(f, 0);
write_u32_le(f, 0);
write_u32_le(f, 1);
write_u32_le(f, fps);
write_u32_le(f, 0);
write_u32_le(f, num_images);
write_u32_le(f, width * height * 3);
write_u32_le(f, (uint32_t)-1);
write_u32_le(f, 0);
write_u16_le(f, 0);
write_u16_le(f, 0);
write_u16_le(f, 0);
write_u16_le(f, 0);
write_fourcc(avi_data, "avih");
write_u32_le(avi_data, 56);
write_u32_le(avi_data, 1000000 / fps);
write_u32_le(avi_data, 0);
write_u32_le(avi_data, 0);
write_u32_le(avi_data, 0x110);
write_u32_le(avi_data, num_images);
write_u32_le(avi_data, 0);
write_u32_le(avi_data, 1);
write_u32_le(avi_data, width * height * 3);
write_u32_le(avi_data, width);
write_u32_le(avi_data, height);
write_u32_le(avi_data, 0);
write_u32_le(avi_data, 0);
write_u32_le(avi_data, 0);
write_u32_le(avi_data, 0);
fwrite("strf", 4, 1, f);
write_u32_le(f, 40);
write_u32_le(f, 40);
write_u32_le(f, width);
write_u32_le(f, height);
write_u16_le(f, 1);
write_u16_le(f, 24);
fwrite("MJPG", 4, 1, f);
write_u32_le(f, width * height * 3);
write_u32_le(f, 0);
write_u32_le(f, 0);
write_u32_le(f, 0);
write_u32_le(f, 0);
write_fourcc(avi_data, "LIST");
write_u32_le(avi_data, 4 + 8 + 56 + 8 + 40);
write_fourcc(avi_data, "strl");
fwrite("LIST", 4, 1, f);
long movi_size_pos = ftell(f);
write_u32_le(f, 0);
fwrite("movi", 4, 1, f);
write_fourcc(avi_data, "strh");
write_u32_le(avi_data, 56);
write_fourcc(avi_data, "vids");
write_fourcc(avi_data, "MJPG");
write_u32_le(avi_data, 0);
write_u16_le(avi_data, 0);
write_u16_le(avi_data, 0);
write_u32_le(avi_data, 0);
write_u32_le(avi_data, 1);
write_u32_le(avi_data, fps);
write_u32_le(avi_data, 0);
write_u32_le(avi_data, num_images);
write_u32_le(avi_data, width * height * 3);
write_u32_le(avi_data, static_cast<uint32_t>(-1));
write_u32_le(avi_data, 0);
write_u16_le(avi_data, 0);
write_u16_le(avi_data, 0);
write_u16_le(avi_data, 0);
write_u16_le(avi_data, 0);
write_fourcc(avi_data, "strf");
write_u32_le(avi_data, 40);
write_u32_le(avi_data, 40);
write_u32_le(avi_data, width);
write_u32_le(avi_data, height);
write_u16_le(avi_data, 1);
write_u16_le(avi_data, 24);
write_fourcc(avi_data, "MJPG");
write_u32_le(avi_data, width * height * 3);
write_u32_le(avi_data, 0);
write_u32_le(avi_data, 0);
write_u32_le(avi_data, 0);
write_u32_le(avi_data, 0);
write_fourcc(avi_data, "LIST");
const size_t movi_size_pos = avi_data.size();
write_u32_le(avi_data, 0);
write_fourcc(avi_data, "movi");
std::vector<avi_index_entry> index(static_cast<size_t>(num_images));
std::vector<uint8_t> jpeg_data;
@ -801,55 +879,61 @@ int create_mjpg_avi_from_sd_images(const char* filename, sd_image_t* images, int
buffer->insert(buffer->end(), src, src + size);
};
if (!stbi_write_jpg_to_func(write_to_buf, &jpeg_data, images[i].width, images[i].height, channels, images[i].data, quality)) {
if (!stbi_write_jpg_to_func(write_to_buf, &jpeg_data, images[i].width, images[i].height, channels, images[i].data, mjpg_quality)) {
fprintf(stderr, "Error: Failed to encode JPEG frame.\n");
return -1;
return {};
}
fwrite("00dc", 4, 1, f);
write_u32_le(f, (uint32_t)jpeg_data.size());
index[i].offset = ftell(f) - 8;
index[i].size = (uint32_t)jpeg_data.size();
fwrite(jpeg_data.data(), 1, jpeg_data.size(), f);
index[i].offset = static_cast<uint32_t>(avi_data.size());
write_fourcc(avi_data, "00dc");
write_u32_le(avi_data, static_cast<uint32_t>(jpeg_data.size()));
index[i].size = (uint32_t)jpeg_data.size();
avi_data.insert(avi_data.end(), jpeg_data.begin(), jpeg_data.end());
if (jpeg_data.size() % 2) {
fputc(0, f);
avi_data.push_back(0);
}
}
long cur_pos = ftell(f);
long movi_size = cur_pos - movi_size_pos - 4;
fseek(f, movi_size_pos, SEEK_SET);
write_u32_le(f, movi_size);
fseek(f, cur_pos, SEEK_SET);
const size_t movi_size = avi_data.size() - movi_size_pos - 4;
patch_u32_le(avi_data, movi_size_pos, static_cast<uint32_t>(movi_size));
fwrite("idx1", 4, 1, f);
write_u32_le(f, num_images * 16);
write_fourcc(avi_data, "idx1");
write_u32_le(avi_data, num_images * 16);
for (int i = 0; i < num_images; i++) {
fwrite("00dc", 4, 1, f);
write_u32_le(f, 0x10);
write_u32_le(f, index[i].offset);
write_u32_le(f, index[i].size);
write_fourcc(avi_data, "00dc");
write_u32_le(avi_data, 0x10);
write_u32_le(avi_data, index[i].offset);
write_u32_le(avi_data, index[i].size);
}
cur_pos = ftell(f);
long file_size = cur_pos - riff_size_pos - 4;
fseek(f, riff_size_pos, SEEK_SET);
write_u32_le(f, file_size);
fseek(f, cur_pos, SEEK_SET);
const size_t file_size = avi_data.size() - riff_size_pos - 4;
patch_u32_le(avi_data, riff_size_pos, static_cast<uint32_t>(file_size));
return avi_data;
}
int create_mjpg_avi_from_sd_images(const char* filename, sd_image_t* images, int num_images, int fps, int quality) {
std::vector<uint8_t> avi_data = create_mjpg_avi_from_sd_images_to_vector(images, num_images, fps, quality);
if (avi_data.empty()) {
return -1;
}
if (!write_binary_file_bytes(filename, avi_data)) {
perror("Error opening file for writing");
return -1;
}
return 0;
}
#ifdef SD_USE_WEBP
int create_animated_webp_from_sd_images(const char* filename, sd_image_t* images, int num_images, int fps, int quality) {
std::vector<uint8_t> create_animated_webp_from_sd_images_to_vector(sd_image_t* images, int num_images, int fps, int quality) {
if (num_images == 0) {
fprintf(stderr, "Error: Image array is empty.\n");
return -1;
return {};
}
if (fps <= 0) {
fprintf(stderr, "Error: FPS must be positive.\n");
return -1;
return {};
}
const int width = static_cast<int>(images[0].width);
@ -857,14 +941,14 @@ int create_animated_webp_from_sd_images(const char* filename, sd_image_t* images
const int channels = static_cast<int>(images[0].channel);
if (channels != 1 && channels != 3 && channels != 4) {
fprintf(stderr, "Error: Unsupported channel count: %d\n", channels);
return -1;
return {};
}
WebPAnimEncoderOptions anim_options;
WebPConfig config;
if (!WebPAnimEncoderOptionsInit(&anim_options) || !WebPConfigInit(&config)) {
fprintf(stderr, "Error: Failed to initialize WebP animation encoder.\n");
return -1;
return {};
}
config.quality = static_cast<float>(quality);
@ -875,13 +959,13 @@ int create_animated_webp_from_sd_images(const char* filename, sd_image_t* images
}
if (!WebPValidateConfig(&config)) {
fprintf(stderr, "Error: Invalid WebP encoder configuration.\n");
return -1;
return {};
}
WebPAnimEncoderPtr enc(WebPAnimEncoderNew(width, height, &anim_options));
if (enc == nullptr) {
fprintf(stderr, "Error: Could not create WebPAnimEncoder object.\n");
return -1;
return {};
}
const int frame_duration_ms = std::max(1, static_cast<int>(std::lround(1000.0 / static_cast<double>(fps))));
@ -891,13 +975,13 @@ int create_animated_webp_from_sd_images(const char* filename, sd_image_t* images
const sd_image_t& image = images[i];
if (static_cast<int>(image.width) != width || static_cast<int>(image.height) != height) {
fprintf(stderr, "Error: Frame dimensions do not match.\n");
return -1;
return {};
}
WebPPictureGuard picture;
if (!picture.initialized) {
fprintf(stderr, "Error: Failed to initialize WebPPicture.\n");
return -1;
return {};
}
picture.picture.use_argb = 1;
picture.picture.width = width;
@ -921,12 +1005,12 @@ int create_animated_webp_from_sd_images(const char* filename, sd_image_t* images
if (!picture_ok) {
fprintf(stderr, "Error: Failed to import frame into WebPPicture.\n");
return -1;
return {};
}
if (!WebPAnimEncoderAdd(enc.get(), &picture.picture, timestamp_ms, &config)) {
fprintf(stderr, "Error: Failed to add frame to animated WebP: %s\n", WebPAnimEncoderGetError(enc.get()));
return -1;
return {};
}
timestamp_ms += frame_duration_ms;
@ -934,52 +1018,50 @@ int create_animated_webp_from_sd_images(const char* filename, sd_image_t* images
if (!WebPAnimEncoderAdd(enc.get(), nullptr, timestamp_ms, nullptr)) {
fprintf(stderr, "Error: Failed to finalize animated WebP frames: %s\n", WebPAnimEncoderGetError(enc.get()));
return -1;
return {};
}
WebPDataGuard webp_data;
if (!WebPAnimEncoderAssemble(enc.get(), &webp_data.data)) {
fprintf(stderr, "Error: Failed to assemble animated WebP: %s\n", WebPAnimEncoderGetError(enc.get()));
return -1;
return {};
}
FilePtr f(fopen(filename, "wb"));
if (!f) {
return std::vector<uint8_t>(webp_data.data.bytes, webp_data.data.bytes + webp_data.data.size);
}
int create_animated_webp_from_sd_images(const char* filename, sd_image_t* images, int num_images, int fps, int quality) {
std::vector<uint8_t> webp_data = create_animated_webp_from_sd_images_to_vector(images, num_images, fps, quality);
if (webp_data.empty()) {
return -1;
}
if (!write_binary_file_bytes(filename, webp_data)) {
perror("Error opening file for writing");
return -1;
}
if (webp_data.data.size > 0 && fwrite(webp_data.data.bytes, 1, webp_data.data.size, f.get()) != webp_data.data.size) {
fprintf(stderr, "Error: Failed to write animated WebP file.\n");
return -1;
}
return 0;
}
#endif
#ifdef SD_USE_WEBM
int create_webm_from_sd_images(const char* filename, sd_image_t* images, int num_images, int fps, int quality) {
std::vector<uint8_t> create_webm_from_sd_images_to_vector(sd_image_t* images, int num_images, int fps, int quality) {
if (num_images == 0) {
fprintf(stderr, "Error: Image array is empty.\n");
return -1;
return {};
}
if (fps <= 0) {
fprintf(stderr, "Error: FPS must be positive.\n");
return -1;
return {};
}
const int width = static_cast<int>(images[0].width);
const int height = static_cast<int>(images[0].height);
if (width <= 0 || height <= 0) {
fprintf(stderr, "Error: Invalid frame dimensions.\n");
return -1;
return {};
}
mkvmuxer::MkvWriter writer;
if (!writer.Open(filename)) {
fprintf(stderr, "Error: Could not open WebM file for writing.\n");
return -1;
}
MemoryMkvWriter writer;
const int ret = [&]() -> int {
mkvmuxer::Segment segment;
@ -1045,30 +1127,63 @@ int create_webm_from_sd_images(const char* filename, sd_image_t* images, int num
}
return 0;
}();
writer.Close();
return ret;
if (ret != 0) {
return {};
}
return writer.data();
}
int create_webm_from_sd_images(const char* filename, sd_image_t* images, int num_images, int fps, int quality) {
std::vector<uint8_t> webm_data = create_webm_from_sd_images_to_vector(images, num_images, fps, quality);
if (webm_data.empty()) {
return -1;
}
if (!write_binary_file_bytes(filename, webm_data)) {
perror("Error opening file for writing");
return -1;
}
return 0;
}
#endif
int create_video_from_sd_images(const char* filename, sd_image_t* images, int num_images, int fps, int quality) {
std::string path = filename ? filename : "";
auto pos = path.find_last_of('.');
std::string ext = pos == std::string::npos ? "" : path.substr(pos);
for (char& ch : ext) {
ch = static_cast<char>(tolower(static_cast<unsigned char>(ch)));
std::vector<uint8_t> create_video_from_sd_images_to_vector(const std::string& output_format,
sd_image_t* images,
int num_images,
int fps,
int quality) {
std::string format = output_format;
std::transform(format.begin(), format.end(), format.begin(),
[](unsigned char c) { return static_cast<char>(tolower(c)); });
if (!format.empty() && format[0] == '.') {
format.erase(format.begin());
}
#ifdef SD_USE_WEBM
if (ext == ".webm") {
return create_webm_from_sd_images(filename, images, num_images, fps, quality);
if (format == "webm") {
return create_webm_from_sd_images_to_vector(images, num_images, fps, quality);
}
#endif
#ifdef SD_USE_WEBP
if (ext == ".webp") {
return create_animated_webp_from_sd_images(filename, images, num_images, fps, quality);
if (format == "webp") {
return create_animated_webp_from_sd_images_to_vector(images, num_images, fps, quality);
}
#endif
return create_mjpg_avi_from_sd_images(filename, images, num_images, fps, quality);
return create_mjpg_avi_from_sd_images_to_vector(images, num_images, fps, quality);
}
int create_video_from_sd_images(const char* filename, sd_image_t* images, int num_images, int fps, int quality) {
std::string path = filename ? filename : "";
auto pos = path.find_last_of('.');
std::string ext = pos == std::string::npos ? "" : path.substr(pos);
std::vector<uint8_t> video_data = create_video_from_sd_images_to_vector(ext, images, num_images, fps, quality);
if (video_data.empty()) {
return -1;
}
if (!write_binary_file_bytes(filename, video_data)) {
perror("Error opening file for writing");
return -1;
}
return 0;
}

View File

@ -58,6 +58,10 @@ int create_mjpg_avi_from_sd_images(const char* filename,
int num_images,
int fps,
int quality = 90);
std::vector<uint8_t> create_mjpg_avi_from_sd_images_to_vector(sd_image_t* images,
int num_images,
int fps,
int quality = 90);
#ifdef SD_USE_WEBP
int create_animated_webp_from_sd_images(const char* filename,
@ -65,6 +69,10 @@ int create_animated_webp_from_sd_images(const char* filename,
int num_images,
int fps,
int quality = 90);
std::vector<uint8_t> create_animated_webp_from_sd_images_to_vector(sd_image_t* images,
int num_images,
int fps,
int quality = 90);
#endif
#ifdef SD_USE_WEBM
@ -73,6 +81,10 @@ int create_webm_from_sd_images(const char* filename,
int num_images,
int fps,
int quality = 90);
std::vector<uint8_t> create_webm_from_sd_images_to_vector(sd_image_t* images,
int num_images,
int fps,
int quality = 90);
#endif
int create_video_from_sd_images(const char* filename,
@ -80,5 +92,10 @@ int create_video_from_sd_images(const char* filename,
int num_images,
int fps,
int quality = 90);
std::vector<uint8_t> create_video_from_sd_images_to_vector(const std::string& output_format,
sd_image_t* images,
int num_images,
int fps,
int quality = 90);
#endif // __MEDIA_IO_H__

View File

@ -123,11 +123,11 @@ In this case, the server will load and serve the specified `index.html` file ins
usage: ./bin/sd-server [options]
Svr Options:
-l, --listen-ip <string> server listen ip (default: 127.0.0.1)
-l, --listen-ip <string> server listen ip (default: 127.0.0.1)
--serve-html-path <string> path to HTML file to serve at root (optional)
--listen-port <int> server listen port (default: 1234)
-v, --verbose print extra info
--color colors the logging tags according to level
--color colors the logging tags according to level
-h, --help show this help message and exit
Context Options:
@ -136,7 +136,8 @@ Context Options:
--clip_g <string> path to the clip-g text encoder
--clip_vision <string> path to the clip-vision encoder
--t5xxl <string> path to the t5xxl text encoder
--llm <string> path to the llm text encoder. For example: (qwenvl2.5 for qwen-image, mistral-small3.2 for flux2, ...)
--llm <string> path to the llm text encoder. For example: (qwenvl2.5 for qwen-image,
mistral-small3.2 for flux2, ...)
--llm_vision <string> path to the llm vit
--qwen2vl <string> alias of --llm. Deprecated.
--qwen2vl_vision <string> alias of --llm_vision. Deprecated.
@ -148,16 +149,18 @@ Context Options:
--control-net <string> path to control net model
--embd-dir <string> embeddings directory
--lora-model-dir <string> lora model directory
--hires-upscalers-dir <string> highres fix upscaler model directory
--tensor-type-rules <string> weight type per tensor pattern (example: "^vae\.=f16,model\.=q8_0")
--photo-maker <string> path to PHOTOMAKER model
--upscale-model <string> path to esrgan model.
-t, --threads <int> number of threads to use during computation (default: -1). If threads <= 0, then threads will be set to the number of
CPU physical cores
-t, --threads <int> number of threads to use during computation (default: -1). If threads <= 0,
then threads will be set to the number of CPU physical cores
--chroma-t5-mask-pad <int> t5 mask pad size of chroma
--vae-tile-overlap <float> tile overlap for vae tiling, in fraction of tile size (default: 0.5)
--vae-tiling process vae in tiles to reduce memory usage
--max-vram <float> maximum VRAM budget in GiB for graph-cut segmented execution. 0 disables
graph splitting
--force-sdxl-vae-conv-scale force use of conv scale on sdxl vae
--offload-to-cpu place the weights in RAM to save VRAM, and automatically load them into VRAM when needed
--offload-to-cpu place the weights in RAM to save VRAM, and automatically load them into VRAM
when needed
--mmap whether to memory-map model
--control-net-cpu keep controlnet in cpu (for low vram)
--clip-on-cpu keep clip in cpu (for low vram)
@ -172,20 +175,19 @@ Context Options:
--chroma-disable-dit-mask disable dit mask for chroma
--qwen-image-zero-cond-t enable zero_cond_t for qwen image
--chroma-enable-t5-mask enable t5 mask for chroma
--type weight type (examples: f32, f16, q4_0, q4_1, q5_0, q5_1, q8_0, q2_K, q3_K, q4_K). If not specified, the default is the
type of the weight file
--type weight type (examples: f32, f16, q4_0, q4_1, q5_0, q5_1, q8_0, q2_K, q3_K,
q4_K). If not specified, the default is the type of the weight file
--rng RNG, one of [std_default, cuda, cpu], default: cuda(sd-webui), cpu(comfyui)
--sampler-rng sampler RNG, one of [std_default, cuda, cpu]. If not specified, use --rng
--prediction prediction type override, one of [eps, v, edm_v, sd3_flow, flux_flow, flux2_flow]
--lora-apply-mode the way to apply LoRA, one of [auto, immediately, at_runtime], default is auto. In auto mode, if the model weights
contain any quantized parameters, the at_runtime mode will be used; otherwise,
immediately will be used.The immediately mode may have precision and
compatibility issues with quantized parameters, but it usually offers faster inference
speed and, in some cases, lower memory usage. The at_runtime mode, on the
other hand, is exactly the opposite.
--vae-tile-size tile size for vae tiling, format [X]x[Y] (default: 32x32)
--vae-relative-tile-size relative tile size for vae tiling, format [X]x[Y], in fraction of image size if < 1, in number of tiles per dim if >=1
(overrides --vae-tile-size)
--prediction prediction type override, one of [eps, v, edm_v, sd3_flow, flux_flow,
flux2_flow]
--lora-apply-mode the way to apply LoRA, one of [auto, immediately, at_runtime], default is
auto. In auto mode, if the model weights contain any quantized parameters,
the at_runtime mode will be used; otherwise, immediately will be used.The
immediately mode may have precision and compatibility issues with quantized
parameters, but it usually offers faster inference speed and, in some cases,
lower memory usage. The at_runtime mode, on the other hand, is exactly the
opposite.
Default Generation Options:
-p, --prompt <string> the prompt to render
@ -194,65 +196,97 @@ Default Generation Options:
--end-img <string> path to the end image, required by flf2v
--mask <string> path to the mask image
--control-image <string> path to control image, control net
--control-video <string> path to control video frames, It must be a directory path. The video frames inside should be stored as images in
lexicographical (character) order. For example, if the control video path is
`frames`, the directory contain images such as 00.png, 01.png, ... etc.
--control-video <string> path to control video frames, It must be a directory path. The video frames
inside should be stored as images in lexicographical (character) order. For
example, if the control video path is `frames`, the directory contain images
such as 00.png, 01.png, ... etc.
--pm-id-images-dir <string> path to PHOTOMAKER input id images dir
--pm-id-embed-path <string> path to PHOTOMAKER v2 id embed
--hires-upscaler <string> highres fix upscaler, Lanczos, Nearest, Latent, Latent (nearest), Latent
(nearest-exact), Latent (antialiased), Latent (bicubic), Latent (bicubic
antialiased), or a model name under --hires-upscalers-dir (default: Latent)
-H, --height <int> image height, in pixel space (default: 512)
-W, --width <int> image width, in pixel space (default: 512)
--steps <int> number of sample steps (default: 20)
--high-noise-steps <int> (high noise) number of sample steps (default: -1 = auto)
--clip-skip <int> ignore last layers of CLIP network; 1 ignores none, 2 ignores one layer (default: -1). <= 0 represents unspecified,
will be 1 for SD1.x, 2 for SD2.x
--clip-skip <int> ignore last layers of CLIP network; 1 ignores none, 2 ignores one layer
(default: -1). <= 0 represents unspecified, will be 1 for SD1.x, 2 for SD2.x
-b, --batch-count <int> batch count
--video-frames <int> video frames (default: 1)
--fps <int> fps (default: 24)
--timestep-shift <int> shift timestep for NitroFusion models (default: 0). recommended N for NitroSD-Realism around 250 and 500 for
NitroSD-Vibrant
--timestep-shift <int> shift timestep for NitroFusion models (default: 0). recommended N for
NitroSD-Realism around 250 and 500 for NitroSD-Vibrant
--upscale-repeats <int> Run the ESRGAN upscaler this many times (default: 1)
--upscale-tile-size <int> tile size for ESRGAN upscaling (default: 128)
--hires-width <int> highres fix target width, 0 to use --hires-scale (default: 0)
--hires-height <int> highres fix target height, 0 to use --hires-scale (default: 0)
--hires-steps <int> highres fix second pass sample steps, 0 to reuse --steps (default: 0)
--hires-upscale-tile-size <int> highres fix upscaler tile size, reserved for model-backed upscalers (default:
128)
--cfg-scale <float> unconditional guidance scale: (default: 7.0)
--img-cfg-scale <float> image guidance scale for inpaint or instruct-pix2pix models: (default: same as --cfg-scale)
--img-cfg-scale <float> image guidance scale for inpaint or instruct-pix2pix models: (default: same
as --cfg-scale)
--guidance <float> distilled guidance scale for models with guidance input (default: 3.5)
--slg-scale <float> skip layer guidance (SLG) scale, only for DiT models: (default: 0). 0 means disabled, a value of 2.5 is nice for sd3.5
medium
--slg-scale <float> skip layer guidance (SLG) scale, only for DiT models: (default: 0). 0 means
disabled, a value of 2.5 is nice for sd3.5 medium
--skip-layer-start <float> SLG enabling point (default: 0.01)
--skip-layer-end <float> SLG disabling point (default: 0.2)
--eta <float> noise multiplier (default: 0 for ddim_trailing, tcd, res_multistep and res_2s; 1 for euler_a, er_sde and dpm++2s_a)
--eta <float> noise multiplier (default: 0 for ddim_trailing, tcd, res_multistep and
res_2s; 1 for euler_a, er_sde and dpm++2s_a)
--flow-shift <float> shift value for Flow models like SD3.x or WAN (default: auto)
--high-noise-cfg-scale <float> (high noise) unconditional guidance scale: (default: 7.0)
--high-noise-img-cfg-scale <float> (high noise) image guidance scale for inpaint or instruct-pix2pix models (default: same as --cfg-scale)
--high-noise-guidance <float> (high noise) distilled guidance scale for models with guidance input (default: 3.5)
--high-noise-slg-scale <float> (high noise) skip layer guidance (SLG) scale, only for DiT models: (default: 0)
--high-noise-img-cfg-scale <float> (high noise) image guidance scale for inpaint or instruct-pix2pix models
(default: same as --cfg-scale)
--high-noise-guidance <float> (high noise) distilled guidance scale for models with guidance input
(default: 3.5)
--high-noise-slg-scale <float> (high noise) skip layer guidance (SLG) scale, only for DiT models: (default:
0)
--high-noise-skip-layer-start <float> (high noise) SLG enabling point (default: 0.01)
--high-noise-skip-layer-end <float> (high noise) SLG disabling point (default: 0.2)
--high-noise-eta <float> (high noise) noise multiplier (default: 0 for ddim_trailing, tcd, res_multistep and res_2s; 1 for euler_a, er_sde and dpm++2s_a)
--high-noise-eta <float> (high noise) noise multiplier (default: 0 for ddim_trailing, tcd,
res_multistep and res_2s; 1 for euler_a, er_sde and dpm++2s_a)
--strength <float> strength for noising/unnoising (default: 0.75)
--pm-style-strength <float>
--control-strength <float> strength to apply Control Net (default: 0.9). 1.0 corresponds to full destruction of information in init image
--moe-boundary <float> timestep boundary for Wan2.2 MoE model. (default: 0.875). Only enabled if `--high-noise-steps` is set to -1
--pm-style-strength <float>
--control-strength <float> strength to apply Control Net (default: 0.9). 1.0 corresponds to full
destruction of information in init image
--moe-boundary <float> timestep boundary for Wan2.2 MoE model. (default: 0.875). Only enabled if
`--high-noise-steps` is set to -1
--vace-strength <float> wan vace strength
--increase-ref-index automatically increase the indices of references images based on the order they are listed (starting with 1).
--vae-tile-overlap <float> tile overlap for vae tiling, in fraction of tile size (default: 0.5)
--hires-scale <float> highres fix scale when target size is not set (default: 2.0)
--hires-denoising-strength <float> highres fix second pass denoising strength (default: 0.7)
--increase-ref-index automatically increase the indices of references images based on the order
they are listed (starting with 1).
--disable-auto-resize-ref-image disable auto resize of ref images
--disable-image-metadata do not embed generation metadata on image files
--vae-tiling process vae in tiles to reduce memory usage
--hires enable highres fix
-s, --seed RNG seed (default: 42, use random seed for < 0)
--sampling-method sampling method, one of [euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m, dpm++2mv2, ipndm, ipndm_v, lcm, ddim_trailing,
tcd, res_multistep, res_2s, er_sde] (default: euler for Flux/SD3/Wan, euler_a
otherwise)
--high-noise-sampling-method (high noise) sampling method, one of [euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m, dpm++2mv2, ipndm, ipndm_v, lcm,
ddim_trailing, tcd, res_multistep, res_2s, er_sde] default: euler for Flux/SD3/Wan,
euler_a otherwise
--scheduler denoiser sigma scheduler, one of [discrete, karras, exponential, ays, gits, smoothstep, sgm_uniform, simple,
kl_optimal, lcm, bong_tangent], default: discrete
--sigmas custom sigma values for the sampler, comma-separated (e.g., "14.61,7.8,3.5,0.0").
--sampling-method sampling method, one of [euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m,
dpm++2mv2, ipndm, ipndm_v, lcm, ddim_trailing, tcd, res_multistep, res_2s,
er_sde] (default: euler for Flux/SD3/Wan, euler_a otherwise)
--high-noise-sampling-method (high noise) sampling method, one of [euler, euler_a, heun, dpm2, dpm++2s_a,
dpm++2m, dpm++2mv2, ipndm, ipndm_v, lcm, ddim_trailing, tcd, res_multistep,
res_2s, er_sde] default: euler for Flux/SD3/Wan, euler_a otherwise
--scheduler denoiser sigma scheduler, one of [discrete, karras, exponential, ays, gits,
smoothstep, sgm_uniform, simple, kl_optimal, lcm, bong_tangent], default:
discrete
--sigmas custom sigma values for the sampler, comma-separated (e.g.,
"14.61,7.8,3.5,0.0").
--skip-layers layers to skip for SLG steps (default: [7,8,9])
--high-noise-skip-layers (high noise) layers to skip for SLG steps (default: [7,8,9])
-r, --ref-image reference image for Flux Kontext models (can be used multiple times)
--cache-mode caching method: 'easycache' (DiT), 'ucache' (UNET), 'dbcache'/'taylorseer'/'cache-dit' (DiT block-level), 'spectrum' (UNET/DiT Chebyshev+Taylor forecasting)
--cache-mode caching method: 'easycache' (DiT), 'ucache' (UNET),
'dbcache'/'taylorseer'/'cache-dit' (DiT block-level), 'spectrum' (UNET/DiT
Chebyshev+Taylor forecasting)
--cache-option named cache params (key=value format, comma-separated). easycache/ucache:
threshold=,start=,end=,decay=,relative=,reset=; dbcache/taylorseer/cache-dit: Fn=,Bn=,threshold=,warmup=. Examples:
"threshold=0.25" or "threshold=1.5,reset=0"
--scm-mask SCM steps mask for cache-dit: comma-separated 0/1 (e.g., "1,1,1,0,0,1,0,0,1,0") - 1=compute, 0=can cache
threshold=,start=,end=,decay=,relative=,reset=; dbcache/taylorseer/cache-dit:
Fn=,Bn=,threshold=,warmup=; spectrum: w=,m=,lam=,window=,flex=,warmup=,stop=.
Examples: "threshold=0.25" or "threshold=1.5,reset=0"
--scm-mask SCM steps mask for cache-dit: comma-separated 0/1 (e.g.,
"1,1,1,0,0,1,0,0,1,0") - 1=compute, 0=can cache
--scm-policy SCM policy: 'dynamic' (default) or 'static'
--vae-tile-size tile size for vae tiling, format [X]x[Y] (default: 32x32)
--vae-relative-tile-size relative tile size for vae tiling, format [X]x[Y], in fraction of image size
if < 1, in number of tiles per dim if >=1 (overrides --vae-tile-size)
```

View File

@ -9,7 +9,7 @@ The server currently exposes three API families:
- `sdcpp API` under `/sdcpp/v1/...`
The `sdcpp API` is the native API surface.
Its request schema is also the canonical schema for `sd_cpp_extra_args`.
Its request schema is the same schema used by `sd_cpp_extra_args`.
Global LoRA rule:
@ -38,6 +38,8 @@ Current generation-related endpoints include:
- `POST /sdapi/v1/txt2img`
- `POST /sdapi/v1/img2img`
- `GET /sdapi/v1/loras`
- `GET /sdapi/v1/upscalers`
- `GET /sdapi/v1/latent-upscale-modes`
- `GET /sdapi/v1/samplers`
- `GET /sdapi/v1/schedulers`
- `GET /sdapi/v1/sd-models`
@ -55,8 +57,6 @@ Current endpoints include:
- `POST /sdcpp/v1/jobs/{id}/cancel`
- `POST /sdcpp/v1/vid_gen`
`POST /sdcpp/v1/vid_gen` is currently exposed but returns `501 Not Implemented`.
## `sd_cpp_extra_args`
`sd_cpp_extra_args` is an extension mechanism for the compatibility APIs.
@ -79,12 +79,12 @@ Behavior:
- The JSON block is parsed using the same field rules as the `sdcpp API`.
- The block is removed from the final prompt before generation.
Intended use:
Supported use:
- extend `OpenAI API` requests with native `stable-diffusion.cpp` controls
- extend `sdapi` requests with native `stable-diffusion.cpp` controls
Not intended use:
Unsupported use:
- do not use `sd_cpp_extra_args` with `/sdcpp/v1/*`
@ -218,6 +218,13 @@ Currently supported request fields:
| `scheduler` | `string` | Scheduler name |
| `lora` | `array<object>` | Structured LoRA list |
| `extra_images` | `array<string>` | Base64 or data URL images |
| `enable_hr` | `boolean` | Enable highres fix for `txt2img` |
| `hr_upscaler` | `string` | `Lanczos`, `Nearest`, a latent mode such as `Latent (nearest-exact)`, or an upscaler model name from `/sdapi/v1/upscalers` |
| `hr_scale` | `number` | Highres scale when resize target is not set |
| `hr_resize_x` | `integer` | Highres target width, `0` to use scale |
| `hr_resize_y` | `integer` | Highres target height, `0` to use scale |
| `hr_steps` | `integer` | Highres second-pass sample steps, `0` to reuse `steps` |
| `denoising_strength` | `number` | Highres denoising strength for `txt2img` |
Native extension fields:
@ -243,6 +250,8 @@ Currently supported request fields:
| `inpainting_mask_invert` | `integer` or `boolean` | Treated as invert flag |
| `denoising_strength` | `number` | Clamped to `0.0..1.0` |
Highres fix fields are currently handled for `txt2img`; `img2img` uses `denoising_strength` as image-to-image strength.
Native extension fields:
- any `sdcpp API` fields embedded through `sd_cpp_extra_args` inside `prompt`
@ -260,6 +269,8 @@ Response fields:
Currently exposed:
- `GET /sdapi/v1/loras`
- `GET /sdapi/v1/upscalers`
- `GET /sdapi/v1/latent-upscale-modes`
- `GET /sdapi/v1/samplers`
- `GET /sdapi/v1/schedulers`
- `GET /sdapi/v1/sd-models`
@ -274,6 +285,26 @@ Response fields:
| `[].name` | `string` | Display name derived from file stem |
| `[].path` | `string` | Relative path under the configured LoRA directory |
`GET /sdapi/v1/upscalers`
| Field | Type | Notes |
| --- | --- | --- |
| `[].name` | `string` | Built-in name or model stem |
| `[].model_name` | `string \| null` | Model family label for model-backed upscalers |
| `[].model_path` | `string \| null` | Absolute model path for model-backed upscalers |
| `[].model_url` | `string \| null` | Currently always null |
| `[].scale` | `integer` | Currently `4` |
Built-in entries include `None`, `Lanczos`, and `Nearest`. Model-backed entries are scanned from the top level of `--hires-upscalers-dir`; subdirectories are not scanned.
`GET /sdapi/v1/latent-upscale-modes`
| Field | Type | Notes |
| --- | --- | --- |
| `[].name` | `string` | WebUI-compatible latent upscale mode name |
Built-in latent modes include `Latent`, `Latent (nearest)`, `Latent (nearest-exact)`, `Latent (antialiased)`, `Latent (bicubic)`, and `Latent (bicubic antialiased)`.
`GET /sdapi/v1/samplers`
| Field | Type | Notes |
@ -372,20 +403,26 @@ Field types:
Returns frontend-friendly capability metadata.
Typical contents:
The mode-aware fields are the primary interface. The top-level compatibility fields are deprecated mirrors kept for older clients.
| Field | Type |
| --- | --- |
| `model` | `object` |
| `defaults` | `object` |
| `loras` | `array<object>` |
| `samplers` | `array<string>` |
| `schedulers` | `array<string>` |
| `output_formats` | `array<string>` |
| `limits` | `object` |
| `features` | `object` |
Top-level fields:
Nested fields currently returned:
| Field | Type | Notes |
| --- | --- | --- |
| `model` | `object` | Loaded model metadata |
| `current_mode` | `string` | The native generation mode mirrored by top-level compatibility fields |
| `supported_modes` | `array<string>` | Supported native modes such as `img_gen` or `vid_gen` |
| `defaults` | `object` | Deprecated compatibility mirror of `defaults_by_mode[current_mode]` |
| `output_formats` | `array<string>` | Deprecated compatibility mirror of `output_formats_by_mode[current_mode]` |
| `features` | `object` | Deprecated compatibility mirror of `features_by_mode[current_mode]` |
| `defaults_by_mode` | `object` | Explicit defaults for each supported mode |
| `output_formats_by_mode` | `object` | Explicit output formats for each supported mode |
| `features_by_mode` | `object` | Explicit feature flags for each supported mode |
| `samplers` | `array<string>` | Available sampling methods |
| `schedulers` | `array<string>` | Available schedulers |
| `loras` | `array<object>` | Available LoRA entries |
| `upscalers` | `array<object>` | Available model-backed highres upscalers |
| `limits` | `object` | Shared queue and size limits |
`model`
@ -395,50 +432,24 @@ Nested fields currently returned:
| `model.stem` | `string` |
| `model.path` | `string` |
`defaults`
Compatibility rules:
- `defaults`, `output_formats`, and `features` are deprecated compatibility mirrors
- those three top-level fields always mirror `current_mode`
- `supported_modes`, `defaults_by_mode`, `output_formats_by_mode`, and `features_by_mode` are the mode-aware fields
Mode-aware objects:
| Field | Type |
| --- | --- |
| `defaults.prompt` | `string` |
| `defaults.negative_prompt` | `string` |
| `defaults.clip_skip` | `integer` |
| `defaults.width` | `integer` |
| `defaults.height` | `integer` |
| `defaults.strength` | `number` |
| `defaults.seed` | `integer` |
| `defaults.batch_count` | `integer` |
| `defaults.auto_resize_ref_image` | `boolean` |
| `defaults.increase_ref_index` | `boolean` |
| `defaults.control_strength` | `number` |
| `defaults.sample_params` | `object` |
| `defaults.sample_params.scheduler` | `string` |
| `defaults.sample_params.sample_method` | `string` |
| `defaults.sample_params.sample_steps` | `integer` |
| `defaults.sample_params.eta` | `number \| null` |
| `defaults.sample_params.shifted_timestep` | `integer` |
| `defaults.sample_params.flow_shift` | `number \| null` |
| `defaults.sample_params.guidance` | `object` |
| `defaults.sample_params.guidance.txt_cfg` | `number` |
| `defaults.sample_params.guidance.img_cfg` | `number \| null` |
| `defaults.sample_params.guidance.distilled_guidance` | `number` |
| `defaults.sample_params.guidance.slg` | `object` |
| `defaults.sample_params.guidance.slg.layers` | `array<integer>` |
| `defaults.sample_params.guidance.slg.layer_start` | `number` |
| `defaults.sample_params.guidance.slg.layer_end` | `number` |
| `defaults.sample_params.guidance.slg.scale` | `number` |
| `defaults.vae_tiling_params` | `object` |
| `defaults.vae_tiling_params.enabled` | `boolean` |
| `defaults.vae_tiling_params.tile_size_x` | `integer` |
| `defaults.vae_tiling_params.tile_size_y` | `integer` |
| `defaults.vae_tiling_params.target_overlap` | `number` |
| `defaults.vae_tiling_params.rel_size_x` | `number` |
| `defaults.vae_tiling_params.rel_size_y` | `number` |
| `defaults.cache_mode` | `string` |
| `defaults.cache_option` | `string` |
| `defaults.scm_mask` | `string` |
| `defaults.scm_policy_dynamic` | `boolean` |
| `defaults.output_format` | `string` |
| `defaults.output_compression` | `integer` |
| `defaults_by_mode.img_gen` | `object` |
| `defaults_by_mode.vid_gen` | `object` |
| `output_formats_by_mode.img_gen` | `array<string>` |
| `output_formats_by_mode.vid_gen` | `array<string>` |
| `features_by_mode.img_gen` | `object` |
| `features_by_mode.vid_gen` | `object` |
Shared nested fields:
`loras`
@ -447,6 +458,14 @@ Nested fields currently returned:
| `loras[].name` | `string` |
| `loras[].path` | `string` |
`upscalers`
| Field | Type | Notes |
| --- | --- | --- |
| `upscalers[].name` | `string` | Built-in name or model stem; use this value in `hires.upscaler` |
Built-in entries include `None`, `Lanczos`, `Nearest`, `Latent`, `Latent (nearest)`, `Latent (nearest-exact)`, `Latent (antialiased)`, `Latent (bicubic)`, and `Latent (bicubic antialiased)`. Model-backed entries are scanned from the top level of `--hires-upscalers-dir`; subdirectories are not scanned.
`limits`
| Field | Type |
@ -458,19 +477,110 @@ Nested fields currently returned:
| `limits.max_batch_count` | `integer` |
| `limits.max_queue_size` | `integer` |
`features`
Shared default fields used by both `img_gen` and `vid_gen`:
| Field | Type |
| --- | --- |
| `features.init_image` | `boolean` |
| `features.mask_image` | `boolean` |
| `features.control_image` | `boolean` |
| `features.ref_images` | `boolean` |
| `features.lora` | `boolean` |
| `features.vae_tiling` | `boolean` |
| `features.cache` | `boolean` |
| `features.cancel_queued` | `boolean` |
| `features.cancel_generating` | `boolean` |
| `prompt` | `string` |
| `negative_prompt` | `string` |
| `clip_skip` | `integer` |
| `width` | `integer` |
| `height` | `integer` |
| `strength` | `number` |
| `seed` | `integer` |
| `sample_params` | `object` |
| `sample_params.scheduler` | `string` |
| `sample_params.sample_method` | `string` |
| `sample_params.sample_steps` | `integer` |
| `sample_params.eta` | `number \| null` |
| `sample_params.shifted_timestep` | `integer` |
| `sample_params.flow_shift` | `number \| null` |
| `sample_params.guidance.txt_cfg` | `number` |
| `sample_params.guidance.img_cfg` | `number \| null` |
| `sample_params.guidance.distilled_guidance` | `number` |
| `sample_params.guidance.slg.layers` | `array<integer>` |
| `sample_params.guidance.slg.layer_start` | `number` |
| `sample_params.guidance.slg.layer_end` | `number` |
| `sample_params.guidance.slg.scale` | `number` |
| `vae_tiling_params` | `object` |
| `vae_tiling_params.enabled` | `boolean` |
| `vae_tiling_params.tile_size_x` | `integer` |
| `vae_tiling_params.tile_size_y` | `integer` |
| `vae_tiling_params.target_overlap` | `number` |
| `vae_tiling_params.rel_size_x` | `number` |
| `vae_tiling_params.rel_size_y` | `number` |
| `cache_mode` | `string` |
| `cache_option` | `string` |
| `scm_mask` | `string` |
| `scm_policy_dynamic` | `boolean` |
| `output_format` | `string` |
| `output_compression` | `integer` |
`img_gen`-specific default fields:
| Field | Type |
| --- | --- |
| `batch_count` | `integer` |
| `auto_resize_ref_image` | `boolean` |
| `increase_ref_index` | `boolean` |
| `control_strength` | `number` |
| `hires` | `object` |
| `hires.enabled` | `boolean` |
| `hires.upscaler` | `string` |
| `hires.scale` | `number` |
| `hires.target_width` | `integer` |
| `hires.target_height` | `integer` |
| `hires.steps` | `integer` |
| `hires.denoising_strength` | `number` |
| `hires.upscale_tile_size` | `integer` |
`vid_gen`-specific default fields:
| Field | Type |
| --- | --- |
| `video_frames` | `integer` |
| `fps` | `integer` |
| `moe_boundary` | `number` |
| `vace_strength` | `number` |
| `high_noise_sample_params` | `object` |
| `high_noise_sample_params.scheduler` | `string` |
| `high_noise_sample_params.sample_method` | `string` |
| `high_noise_sample_params.sample_steps` | `integer` |
| `high_noise_sample_params.eta` | `number \| null` |
| `high_noise_sample_params.shifted_timestep` | `integer` |
| `high_noise_sample_params.flow_shift` | `number \| null` |
| `high_noise_sample_params.guidance.txt_cfg` | `number` |
| `high_noise_sample_params.guidance.img_cfg` | `number \| null` |
| `high_noise_sample_params.guidance.distilled_guidance` | `number` |
| `high_noise_sample_params.guidance.slg.layers` | `array<integer>` |
| `high_noise_sample_params.guidance.slg.layer_start` | `number` |
| `high_noise_sample_params.guidance.slg.layer_end` | `number` |
| `high_noise_sample_params.guidance.slg.scale` | `number` |
Fields returned in `features_by_mode.img_gen`:
- `init_image`
- `mask_image`
- `control_image`
- `ref_images`
- `lora`
- `vae_tiling`
- `hires`
- `cache`
- `cancel_queued`
- `cancel_generating`
Fields returned in `features_by_mode.vid_gen`:
- `init_image`
- `end_image`
- `control_frames`
- `high_noise_sample_params`
- `lora`
- `vae_tiling`
- `cache`
- `cancel_queued`
- `cancel_generating`
#### `POST /sdcpp/v1/img_gen`
@ -521,9 +631,7 @@ Typical status codes:
- `409 Conflict`
- `410 Gone`
### Canonical Request Schema
The `sdcpp API` request body is the canonical native schema.
### Request Body
Example:
@ -569,6 +677,16 @@ Example:
},
"lora": [],
"hires": {
"enabled": false,
"upscaler": "Latent",
"scale": 2.0,
"target_width": 0,
"target_height": 0,
"steps": 0,
"denoising_strength": 0.7,
"upscale_tile_size": 128
},
"vae_tiling_params": {
"enabled": false,
@ -612,7 +730,7 @@ Channel expectations:
If omitted or null:
- single-image fields map to an empty `sd_image_t`
- array fields map to `nullptr + count = 0`
- array fields map to an empty C-style array, represented as `pointer = nullptr` and `count = 0`
### Field Mapping Summary
@ -673,12 +791,23 @@ Other native fields:
| Field | Type |
| --- | --- |
| `hires` | `object` |
| `hires.enabled` | `boolean` |
| `hires.upscaler` | `string` |
| `hires.scale` | `number` |
| `hires.target_width` | `integer` |
| `hires.target_height` | `integer` |
| `hires.steps` | `integer` |
| `hires.denoising_strength` | `number` |
| `hires.upscale_tile_size` | `integer` |
| `vae_tiling_params` | `object` |
| `cache_mode` | `string` |
| `cache_option` | `string` |
| `scm_mask` | `string` |
| `scm_policy_dynamic` | `boolean` |
For `hires.upscaler`, use `Lanczos`, `Nearest`, `Latent`, `Latent (nearest)`, `Latent (nearest-exact)`, `Latent (antialiased)`, `Latent (bicubic)`, `Latent (bicubic antialiased)`, or an `upscalers[].name` value from `GET /sdcpp/v1/capabilities`. Model-backed upscalers are resolved as `--hires-upscalers-dir / (name + ext)` and must live directly in that directory.
HTTP-only output fields:
| Field | Type |
@ -686,11 +815,11 @@ HTTP-only output fields:
| `output_format` | `string` |
| `output_compression` | `integer` |
### Optional Field Semantics
### Optional Field Handling
Clients should preserve unset semantics for optional sampling fields.
Optional sampling fields may be omitted.
If a user has not explicitly provided one of these fields, the client should omit it instead of injecting a guessed fallback:
When omitted, backend defaults apply to these fields:
- `sample_params.scheduler`
- `sample_params.sample_method`
@ -766,29 +895,394 @@ Example cancelled job:
}
```
### Validation and Retention
### Submission Errors
Recommended behavior:
`POST /sdcpp/v1/img_gen` may return:
- malformed JSON returns `400`
- invalid image payloads return `400`
- invalid parameter structure returns `400`
- queue full returns `429` or `503`
- accepted runtime failures transition the job to `failed`
- unsupported in-progress cancellation may return `409`
- `202 Accepted` when the job is created
- `400 Bad Request` for an empty body, unsupported model mode, invalid JSON, or invalid generation parameters
- `429 Too Many Requests` when the job queue is full
- `500 Internal Server Error` for unexpected server exceptions during submission
Recommended retention controls:
### `vid_gen`
- pending job limit
- completed job TTL
- failed job TTL
The following section documents the native async contract for video generation.
### Future `vid_gen`
#### `POST /sdcpp/v1/vid_gen`
Future `vid_gen` should reuse the same async job model:
Submits an async video generation job.
- `POST /sdcpp/v1/vid_gen`
- `GET /sdcpp/v1/jobs/{id}`
- `POST /sdcpp/v1/jobs/{id}/cancel`
Successful submission returns `202 Accepted`.
Its request body should mirror `sd_vid_gen_params_t` in the same way that `img_gen` mirrors `sd_img_gen_params_t`.
Example response:
```json
{
"id": "job_01HTXYZVID",
"kind": "vid_gen",
"status": "queued",
"created": 1775401200,
"poll_url": "/sdcpp/v1/jobs/job_01HTXYZVID"
}
```
Response fields:
| Field | Type |
| --- | --- |
| `id` | `string` |
| `kind` | `string` |
| `status` | `string` |
| `created` | `integer` |
| `poll_url` | `string` |
### Request Body
Compared with `img_gen`, the `vid_gen` request body:
- `vid_gen` is a single video sequence job, so `batch_count` is not part of the request schema
- `ref_images`, `mask_image`, `control_image`, `control_strength`, and `embed_image_metadata` are not part of the request schema
- `vid_gen` adds `end_image`, `control_frames`, `high_noise_sample_params`, `video_frames`, `fps`, `moe_boundary`, and `vace_strength`
Example:
```json
{
"prompt": "a cat walking through a rainy alley",
"negative_prompt": "",
"clip_skip": -1,
"width": 832,
"height": 480,
"strength": 0.75,
"seed": -1,
"video_frames": 33,
"fps": 16,
"moe_boundary": 0.875,
"vace_strength": 1.0,
"init_image": null,
"end_image": null,
"control_frames": [],
"sample_params": {
"scheduler": "discrete",
"sample_method": "euler",
"sample_steps": 28,
"eta": 1.0,
"shifted_timestep": 0,
"custom_sigmas": [],
"flow_shift": 0.0,
"guidance": {
"txt_cfg": 7.0,
"img_cfg": 7.0,
"distilled_guidance": 3.5,
"slg": {
"layers": [7, 8, 9],
"layer_start": 0.01,
"layer_end": 0.2,
"scale": 0.0
}
}
},
"high_noise_sample_params": {
"scheduler": "discrete",
"sample_method": "euler",
"sample_steps": -1,
"eta": 1.0,
"shifted_timestep": 0,
"flow_shift": 0.0,
"guidance": {
"txt_cfg": 7.0,
"img_cfg": 7.0,
"distilled_guidance": 3.5,
"slg": {
"layers": [7, 8, 9],
"layer_start": 0.01,
"layer_end": 0.2,
"scale": 0.0
}
}
},
"lora": [],
"vae_tiling_params": {
"enabled": false,
"tile_size_x": 0,
"tile_size_y": 0,
"target_overlap": 0.5,
"rel_size_x": 0.0,
"rel_size_y": 0.0
},
"cache_mode": "disabled",
"cache_option": "",
"scm_mask": "",
"scm_policy_dynamic": true,
"output_format": "webm",
"output_compression": 100
}
```
### LoRA Rules
- The server only accepts explicit LoRA entries from the `lora` field.
- Prompt-embedded `<lora:...>` tags are intentionally unsupported.
- `lora[].is_high_noise` controls whether a LoRA applies only to the high-noise stage.
### Image and Frame Encoding Rules
Any image field accepts:
- a raw base64 string, or
- a data URL such as `data:image/png;base64,...`
Channel expectations:
- `init_image`: 3 channels
- `end_image`: 3 channels
- `control_frames[]`: 3 channels
Frame ordering rules:
- `control_frames[]` order is the conditioning frame order
- `control_frames[]` is preserved in request order
If omitted or null:
- single-image fields map to an empty `sd_image_t`
- array fields map to an empty C-style array, represented as `pointer = nullptr` and `count = 0`
### Field Mapping Summary
Top-level scalar fields:
| Field | Type |
| --- | --- |
| `prompt` | `string` |
| `negative_prompt` | `string` |
| `clip_skip` | `integer` |
| `width` | `integer` |
| `height` | `integer` |
| `strength` | `number` |
| `seed` | `integer` |
| `video_frames` | `integer` |
| `fps` | `integer` |
| `moe_boundary` | `number` |
| `vace_strength` | `number` |
Image and frame fields:
| Field | Type |
| --- | --- |
| `init_image` | `string \| null` |
| `end_image` | `string \| null` |
| `control_frames` | `array<string>` |
LoRA fields:
| Field | Type |
| --- | --- |
| `lora[].path` | `string` |
| `lora[].multiplier` | `number` |
| `lora[].is_high_noise` | `boolean` |
Sampling fields:
| Field | Type |
| --- | --- |
| `sample_params.scheduler` | `string` |
| `sample_params.sample_method` | `string` |
| `sample_params.sample_steps` | `integer` |
| `sample_params.eta` | `number` |
| `sample_params.shifted_timestep` | `integer` |
| `sample_params.custom_sigmas` | `array<number>` |
| `sample_params.flow_shift` | `number` |
| `sample_params.guidance.txt_cfg` | `number` |
| `sample_params.guidance.img_cfg` | `number` |
| `sample_params.guidance.distilled_guidance` | `number` |
| `sample_params.guidance.slg.layers` | `array<integer>` |
| `sample_params.guidance.slg.layer_start` | `number` |
| `sample_params.guidance.slg.layer_end` | `number` |
| `sample_params.guidance.slg.scale` | `number` |
High-noise sampling fields:
| Field | Type |
| --- | --- |
| `high_noise_sample_params.scheduler` | `string` |
| `high_noise_sample_params.sample_method` | `string` |
| `high_noise_sample_params.sample_steps` | `integer` |
| `high_noise_sample_params.eta` | `number` |
| `high_noise_sample_params.shifted_timestep` | `integer` |
| `high_noise_sample_params.flow_shift` | `number` |
| `high_noise_sample_params.guidance.txt_cfg` | `number` |
| `high_noise_sample_params.guidance.img_cfg` | `number` |
| `high_noise_sample_params.guidance.distilled_guidance` | `number` |
| `high_noise_sample_params.guidance.slg.layers` | `array<integer>` |
| `high_noise_sample_params.guidance.slg.layer_start` | `number` |
| `high_noise_sample_params.guidance.slg.layer_end` | `number` |
| `high_noise_sample_params.guidance.slg.scale` | `number` |
Other native fields:
| Field | Type |
| --- | --- |
| `vae_tiling_params` | `object` |
| `cache_mode` | `string` |
| `cache_option` | `string` |
| `scm_mask` | `string` |
| `scm_policy_dynamic` | `boolean` |
HTTP-only output fields:
| Field | Type |
| --- | --- |
| `output_format` | `string` |
| `output_compression` | `integer` |
For `vid_gen`, `output_format` and `output_compression` control container encoding.
`fps` is request metadata for the generated sequence and is echoed in the completed job result.
Allowed `output_format` values:
- `webm`
- `webp`
- `avi`
Output format behavior:
- `output_format` defaults to `webm`
- `webp` means animated WebP
- `avi` means MJPG AVI
- `webm` requires the server to be built with WebM support; otherwise the request returns `400`
### Result Payload
Completed jobs return one encoded container payload, not a list of per-frame images.
Result fields:
- `result.b64_json` contains the whole encoded container file as base64
- `result.mime_type` identifies the media type
- `result.output_format` echoes the selected container format
- `result.fps` echoes the effective playback FPS
- `result.frame_count` reports the actual decoded frame count used to build the container
Expected MIME types:
| `output_format` | `mime_type` |
| --- | --- |
| `webm` | `video/webm` |
| `webp` | `image/webp` |
| `avi` | `video/x-msvideo` |
### Optional Field Handling
Optional sampling fields may be omitted.
When omitted, backend defaults apply to these fields:
- `sample_params.scheduler`
- `sample_params.sample_method`
- `sample_params.eta`
- `sample_params.flow_shift`
- `sample_params.guidance.img_cfg`
- `high_noise_sample_params.scheduler`
- `high_noise_sample_params.sample_method`
- `high_noise_sample_params.eta`
- `high_noise_sample_params.flow_shift`
- `high_noise_sample_params.guidance.img_cfg`
`high_noise_sample_params` may also be omitted entirely.
### Frame Count Semantics
`video_frames` is the requested target length, but the current core video path internally normalizes the effective frame count to the largest `4n + 1` value that does not exceed the requested count.
Examples:
- `video_frames = 33` stays `33`
- `video_frames = 34` becomes `33`
- `video_frames = 32` becomes `29`
The completed job payload includes the actual decoded `frame_count`.
### Completion Result
Example completed job:
```json
{
"id": "job_01HTXYZVID",
"kind": "vid_gen",
"status": "completed",
"created": 1775401200,
"started": 1775401203,
"completed": 1775401215,
"queue_position": 0,
"result": {
"output_format": "webm",
"mime_type": "video/webm",
"fps": 16,
"frame_count": 33,
"b64_json": "GkXfo59ChoEBQveBAULygQRC84EIQo..."
},
"error": null
}
```
The response returns the encoded `.webm`, animated `.webp`, or `.avi` container payload directly.
### Failure Result
Example failed job:
```json
{
"id": "job_01HTXYZVID",
"kind": "vid_gen",
"status": "failed",
"created": 1775401200,
"started": 1775401203,
"completed": 1775401204,
"queue_position": 0,
"result": null,
"error": {
"code": "generation_failed",
"message": "generate_video returned no results"
}
}
```
### Cancelled Result
Example cancelled job:
```json
{
"id": "job_01HTXYZVID",
"kind": "vid_gen",
"status": "cancelled",
"created": 1775401200,
"started": null,
"completed": 1775401202,
"queue_position": 0,
"result": null,
"error": {
"code": "cancelled",
"message": "job cancelled by client"
}
}
```
### Submission Errors
`POST /sdcpp/v1/vid_gen` may return:
- `202 Accepted` when the job is created
- `400 Bad Request` for an empty body, unsupported model mode, invalid JSON, invalid generation parameters, or an unsupported output format
- `429 Too Many Requests` when the job queue is full
- `500 Internal Server Error` for unexpected server exceptions during submission

View File

@ -95,8 +95,12 @@ bool cancel_queued_job(AsyncJobManager& manager, AsyncGenerationJob& job) {
job.status = AsyncJobStatus::Cancelled;
job.completed_at = unix_timestamp_now();
job.result_images_b64.clear();
job.error_code = "cancelled";
job.error_message = "job cancelled by client";
job.result_media_b64.clear();
job.result_media_mime_type.clear();
job.result_frame_count = 0;
job.result_fps = 0;
job.error_code = "cancelled";
job.error_message = "job cancelled by client";
return true;
}
@ -122,14 +126,24 @@ json make_async_job_json(const AsyncJobManager& manager, const AsyncGenerationJo
}
if (job.status == AsyncJobStatus::Completed) {
json images = json::array();
for (size_t i = 0; i < job.result_images_b64.size(); ++i) {
images.push_back({{"index", i}, {"b64_json", job.result_images_b64[i]}});
if (job.kind == AsyncJobKind::VidGen) {
result["result"] = {
{"output_format", job.vid_gen.output_format},
{"mime_type", job.result_media_mime_type},
{"fps", job.result_fps},
{"frame_count", job.result_frame_count},
{"b64_json", job.result_media_b64},
};
} else {
json images = json::array();
for (size_t i = 0; i < job.result_images_b64.size(); ++i) {
images.push_back({{"index", i}, {"b64_json", job.result_images_b64[i]}});
}
result["result"] = {
{"output_format", job.img_gen.output_format},
{"images", images},
};
}
result["result"] = {
{"output_format", job.img_gen.output_format},
{"images", images},
};
result["error"] = nullptr;
} else if (job.status == AsyncJobStatus::Failed ||
job.status == AsyncJobStatus::Cancelled) {
@ -156,16 +170,15 @@ bool execute_img_gen_job(ServerRuntime& runtime,
sd_img_gen_params_t params = job.img_gen.to_sd_img_gen_params_t();
SDImageVec results;
int num_results = 0;
{
std::lock_guard<std::mutex> lock(*runtime.sd_ctx_mutex);
sd_image_t* raw_results = generate_image(runtime.sd_ctx, &params);
num_results = params.batch_count;
results.adopt(raw_results, num_results);
results.adopt(raw_results, params.batch_count);
}
if (results.empty() || num_results <= 0) {
const int num_results = results.count();
if (num_results <= 0) {
error_message = "generate_image returned no results";
return false;
}
@ -208,6 +221,47 @@ bool execute_img_gen_job(ServerRuntime& runtime,
return true;
}
bool execute_vid_gen_job(ServerRuntime& runtime,
AsyncGenerationJob& job,
std::string& output_media_b64,
std::string& output_media_mime_type,
int& output_frame_count,
int& output_fps,
std::string& error_message) {
sd_vid_gen_params_t params = job.vid_gen.to_sd_vid_gen_params_t();
SDImageVec results;
int num_results = 0;
{
std::lock_guard<std::mutex> lock(*runtime.sd_ctx_mutex);
sd_image_t* raw_results = generate_video(runtime.sd_ctx, &params, &num_results);
results.adopt(raw_results, num_results);
}
num_results = results.count();
if (num_results <= 0) {
error_message = "generate_video returned no results";
return false;
}
std::vector<uint8_t> video_bytes = create_video_from_sd_images_to_vector(job.vid_gen.output_format,
results.data(),
num_results,
job.vid_gen.gen_params.fps,
job.vid_gen.output_compression);
if (video_bytes.empty()) {
error_message = "failed to encode generated video container";
return false;
}
output_media_b64 = base64_encode(video_bytes);
output_media_mime_type = video_mime_type(job.vid_gen.output_format);
output_frame_count = num_results;
output_fps = job.vid_gen.gen_params.fps;
return true;
}
void async_job_worker(ServerRuntime& runtime) {
AsyncJobManager& manager = *runtime.async_job_manager;
@ -240,11 +294,23 @@ void async_job_worker(ServerRuntime& runtime) {
}
std::vector<std::string> output_images;
std::string output_media_b64;
std::string output_media_mime_type;
int output_frame_count = 0;
int output_fps = 0;
std::string error_message;
bool ok = false;
if (job->kind == AsyncJobKind::ImgGen) {
ok = execute_img_gen_job(runtime, *job, output_images, error_message);
} else if (job->kind == AsyncJobKind::VidGen) {
ok = execute_vid_gen_job(runtime,
*job,
output_media_b64,
output_media_mime_type,
output_frame_count,
output_fps,
error_message);
} else {
error_message = "unsupported job kind";
}
@ -258,8 +324,12 @@ void async_job_worker(ServerRuntime& runtime) {
job->completed_at = unix_timestamp_now();
if (ok) {
job->status = AsyncJobStatus::Completed;
job->result_images_b64 = std::move(output_images);
job->status = AsyncJobStatus::Completed;
job->result_images_b64 = std::move(output_images);
job->result_media_b64 = std::move(output_media_b64);
job->result_media_mime_type = std::move(output_media_mime_type);
job->result_frame_count = output_frame_count;
job->result_fps = output_fps;
job->error_code.clear();
job->error_message.clear();
} else {
@ -267,6 +337,10 @@ void async_job_worker(ServerRuntime& runtime) {
job->error_code = "generation_failed";
job->error_message = error_message.empty() ? "unknown generation error" : error_message;
job->result_images_b64.clear();
job->result_media_b64.clear();
job->result_media_mime_type.clear();
job->result_frame_count = 0;
job->result_fps = 0;
}
purge_expired_jobs(manager);

View File

@ -36,7 +36,12 @@ struct AsyncGenerationJob {
int64_t started_at = 0;
int64_t completed_at = 0;
ImgGenJobRequest img_gen;
VidGenJobRequest vid_gen;
std::vector<std::string> result_images_b64;
std::string result_media_b64;
std::string result_media_mime_type;
int result_frame_count = 0;
int result_fps = 0;
std::string error_code;
std::string error_message;
};
@ -63,4 +68,11 @@ bool execute_img_gen_job(ServerRuntime& runtime,
AsyncGenerationJob& job,
std::vector<std::string>& output_images,
std::string& error_message);
bool execute_vid_gen_job(ServerRuntime& runtime,
AsyncGenerationJob& job,
std::string& output_media_b64,
std::string& output_media_mime_type,
int& output_frame_count,
int& output_fps,
std::string& error_message);
void async_job_worker(ServerRuntime& runtime);

@ -1 +1 @@
Subproject commit 740475a7a6794dc07fb23e8ec5dc56e7e80aa8c1
Subproject commit 797ccf80825cc035508ba9b599b2a21953e7f835

View File

@ -48,7 +48,9 @@ static void parse_args(int argc,
if (!svr_params.resolve_and_validate() ||
!ctx_params.resolve_and_validate(IMG_GEN) ||
!default_gen_params.resolve_and_validate(IMG_GEN, ctx_params.lora_model_dir)) {
!default_gen_params.resolve_and_validate(IMG_GEN,
ctx_params.lora_model_dir,
ctx_params.hires_upscalers_dir)) {
print_usage(argv[0], options_vec);
exit(1);
}
@ -95,6 +97,8 @@ int main(int argc, const char** argv) {
std::vector<LoraEntry> lora_cache;
std::mutex lora_mutex;
std::vector<UpscalerEntry> upscaler_cache;
std::mutex upscaler_mutex;
AsyncJobManager async_job_manager;
ServerRuntime runtime = {
sd_ctx.get(),
@ -104,6 +108,8 @@ int main(int argc, const char** argv) {
&default_gen_params,
&lora_cache,
&lora_mutex,
&upscaler_cache,
&upscaler_mutex,
&async_job_manager,
};

View File

@ -70,7 +70,7 @@ static bool build_openai_generation_request(const httplib::Request& req,
}
// Intentionally disable prompt-embedded LoRA tag parsing for server APIs.
if (!request.gen_params.resolve_and_validate(IMG_GEN, "", true)) {
if (!request.gen_params.resolve_and_validate(IMG_GEN, "", runtime.ctx_params->hires_upscalers_dir, true)) {
error_message = "invalid params";
return false;
}
@ -212,7 +212,7 @@ static bool build_openai_edit_request(const httplib::Request& req,
}
// Intentionally disable prompt-embedded LoRA tag parsing for server APIs.
if (!request.gen_params.resolve_and_validate(IMG_GEN, "", true)) {
if (!request.gen_params.resolve_and_validate(IMG_GEN, "", runtime.ctx_params->hires_upscalers_dir, true)) {
error_message = "invalid params";
return false;
}
@ -253,6 +253,12 @@ void register_openai_api_endpoints(httplib::Server& svr, ServerRuntime& rt) {
svr.Post("/v1/images/generations", [runtime](const httplib::Request& req, httplib::Response& res) {
try {
if (!runtime_supports_generation_mode(*runtime, IMG_GEN)) {
res.status = 400;
res.set_content(json({{"error", unsupported_generation_mode_error(IMG_GEN)}}).dump(), "application/json");
return;
}
ImgGenJobRequest request;
std::string error_message;
if (!build_openai_generation_request(req, *runtime, request, error_message)) {
@ -319,6 +325,12 @@ void register_openai_api_endpoints(httplib::Server& svr, ServerRuntime& rt) {
svr.Post("/v1/images/edits", [runtime](const httplib::Request& req, httplib::Response& res) {
try {
if (!runtime_supports_generation_mode(*runtime, IMG_GEN)) {
res.status = 400;
res.set_content(json({{"error", unsupported_generation_mode_error(IMG_GEN)}}).dump(), "application/json");
return;
}
ImgGenJobRequest request;
std::string error_message;
if (!build_openai_edit_request(req, *runtime, request, error_message)) {

View File

@ -1,6 +1,7 @@
#include "routes.h"
#include <algorithm>
#include <cctype>
#include <cstring>
#include <regex>
#include <string_view>
@ -35,14 +36,20 @@ static fs::path resolve_display_model_path(const ServerRuntime& runtime) {
return {};
}
static std::string lower_ascii(std::string value) {
std::transform(value.begin(), value.end(), value.begin(), [](unsigned char c) {
return static_cast<char>(std::tolower(c));
});
return value;
}
static enum sample_method_t get_sdapi_sample_method(std::string name) {
enum sample_method_t result = str_to_sample_method(name.c_str());
if (result != SAMPLE_METHOD_COUNT) {
return result;
}
std::transform(name.begin(), name.end(), name.begin(),
[](unsigned char c) { return static_cast<char>(std::tolower(c)); });
name = lower_ascii(name);
static const std::unordered_map<std::string_view, sample_method_t> hardcoded{
{"euler a", EULER_A_SAMPLE_METHOD},
{"k_euler_a", EULER_A_SAMPLE_METHOD},
@ -114,6 +121,18 @@ static bool build_sdapi_img_gen_request(const json& j,
request.gen_params.width = j.value("width", -1);
request.gen_params.height = j.value("height", -1);
if (!img2img && j.value("enable_hr", false)) {
request.gen_params.hires_enabled = true;
request.gen_params.hires_scale = j.value("hr_scale", request.gen_params.hires_scale);
request.gen_params.hires_width = j.value("hr_resize_x", request.gen_params.hires_width);
request.gen_params.hires_height = j.value("hr_resize_y", request.gen_params.hires_height);
request.gen_params.hires_steps = j.value("hr_steps", request.gen_params.hires_steps);
request.gen_params.hires_denoising_strength =
j.value("denoising_strength", request.gen_params.hires_denoising_strength);
request.gen_params.hires_upscaler = j.value("hr_upscaler", request.gen_params.hires_upscaler);
}
std::string sd_cpp_extra_args_str = extract_and_remove_sd_cpp_extra_args(request.gen_params.prompt);
if (!sd_cpp_extra_args_str.empty() && !request.gen_params.from_json_str(sd_cpp_extra_args_str)) {
error_message = "invalid sd_cpp_extra_args";
@ -228,7 +247,7 @@ static bool build_sdapi_img_gen_request(const json& j,
}
// Intentionally disable prompt-embedded LoRA tag parsing for server APIs.
if (!request.gen_params.resolve_and_validate(IMG_GEN, "", true)) {
if (!request.gen_params.resolve_and_validate(IMG_GEN, "", runtime.ctx_params->hires_upscalers_dir, true)) {
error_message = "invalid params";
return false;
}
@ -246,6 +265,11 @@ void register_sdapi_endpoints(httplib::Server& svr, ServerRuntime& rt) {
res.set_content(R"({"error":"empty body"})", "application/json");
return;
}
if (!runtime_supports_generation_mode(*runtime, IMG_GEN)) {
res.status = 400;
res.set_content(json({{"error", unsupported_generation_mode_error(IMG_GEN)}}).dump(), "application/json");
return;
}
json j = json::parse(req.body);
ImgGenJobRequest request;
@ -342,6 +366,52 @@ void register_sdapi_endpoints(httplib::Server& svr, ServerRuntime& rt) {
res.set_content(result.dump(), "application/json");
});
svr.Get("/sdapi/v1/upscalers", [runtime](const httplib::Request&, httplib::Response& res) {
refresh_upscaler_cache(*runtime);
auto make_builtin = [](const char* name) {
json item;
item["name"] = name;
item["model_name"] = nullptr;
item["model_path"] = nullptr;
item["model_url"] = nullptr;
item["scale"] = 4;
return item;
};
json result = json::array();
result.push_back(make_builtin("None"));
result.push_back(make_builtin("Lanczos"));
result.push_back(make_builtin("Nearest"));
{
std::lock_guard<std::mutex> lock(*runtime->upscaler_mutex);
for (const auto& e : *runtime->upscaler_cache) {
json item;
item["name"] = e.name;
item["model_name"] = e.model_name;
item["model_path"] = e.fullpath;
item["model_url"] = nullptr;
item["scale"] = e.scale;
result.push_back(item);
}
}
res.set_content(result.dump(), "application/json");
});
svr.Get("/sdapi/v1/latent-upscale-modes", [](const httplib::Request&, httplib::Response& res) {
json result = json::array({
{{"name", "Latent"}},
{{"name", "Latent (nearest)"}},
{{"name", "Latent (nearest-exact)"}},
{{"name", "Latent (antialiased)"}},
{{"name", "Latent (bicubic)"}},
{{"name", "Latent (bicubic antialiased)"}},
});
res.set_content(result.dump(), "application/json");
});
svr.Get("/sdapi/v1/samplers", [runtime](const httplib::Request&, httplib::Response& res) {
std::vector<std::string> sampler_names;
sampler_names.push_back("default");

View File

@ -75,48 +75,33 @@ static fs::path resolve_display_model_path(const ServerRuntime& runtime) {
return {};
}
static json make_capabilities_json(ServerRuntime& runtime) {
refresh_lora_cache(runtime);
AsyncJobManager& manager = *runtime.async_job_manager;
const auto& defaults = *runtime.default_gen_params;
const auto& sample_params = defaults.sample_params;
const auto& guidance = sample_params.guidance;
const fs::path model_path = resolve_display_model_path(runtime);
json samplers = json::array();
json schedulers = json::array();
json output_formats = json::array({"png", "jpeg"});
json available_loras = json::array();
for (int i = 0; i < SAMPLE_METHOD_COUNT; ++i) {
samplers.push_back(sd_sample_method_name((sample_method_t)i));
}
for (int i = 0; i < SCHEDULER_COUNT; ++i) {
schedulers.push_back(sd_scheduler_name((scheduler_t)i));
}
#ifdef SD_USE_WEBP
output_formats.push_back("webp");
#endif
{
std::lock_guard<std::mutex> lock(*runtime.lora_mutex);
for (const auto& entry : *runtime.lora_cache) {
available_loras.push_back({
{"name", entry.name},
{"path", entry.path},
});
}
}
json result;
result["model"] = {
{"name", model_path.filename().u8string()},
{"stem", model_path.stem().u8string()},
{"path", model_path.u8string()},
static json make_sample_params_json(const sd_sample_params_t& sample_params, const std::vector<int>& skip_layers) {
const auto& guidance = sample_params.guidance;
return {
{"scheduler", capability_scheduler_name(sample_params.scheduler)},
{"sample_method", capability_sample_method_name(sample_params.sample_method)},
{"sample_steps", sample_params.sample_steps},
{"eta", finite_number_or_null(sample_params.eta)},
{"shifted_timestep", sample_params.shifted_timestep},
{"flow_shift", finite_number_or_null(sample_params.flow_shift)},
{"guidance",
{
{"txt_cfg", guidance.txt_cfg},
{"img_cfg", finite_number_or_null(guidance.img_cfg)},
{"distilled_guidance", guidance.distilled_guidance},
{"slg",
{
{"layers", skip_layers},
{"layer_start", guidance.slg.layer_start},
{"layer_end", guidance.slg.layer_end},
{"scale", guidance.slg.scale},
}},
}},
};
result["defaults"] = {
}
static json make_img_gen_defaults_json(const SDGenerationParams& defaults, const std::string& output_format) {
return {
{"prompt", defaults.prompt},
{"negative_prompt", defaults.negative_prompt},
{"clip_skip", defaults.clip_skip},
@ -128,59 +113,228 @@ static json make_capabilities_json(ServerRuntime& runtime) {
{"auto_resize_ref_image", defaults.auto_resize_ref_image},
{"increase_ref_index", defaults.increase_ref_index},
{"control_strength", defaults.control_strength},
{"sample_params",
{"sample_params", make_sample_params_json(defaults.sample_params, defaults.skip_layers)},
{"hires",
{
{"scheduler", capability_scheduler_name(sample_params.scheduler)},
{"sample_method", capability_sample_method_name(sample_params.sample_method)},
{"sample_steps", sample_params.sample_steps},
{"eta", finite_number_or_null(sample_params.eta)},
{"shifted_timestep", sample_params.shifted_timestep},
{"flow_shift", finite_number_or_null(sample_params.flow_shift)},
{"guidance",
{
{"txt_cfg", guidance.txt_cfg},
{"img_cfg", finite_number_or_null(guidance.img_cfg)},
{"distilled_guidance", guidance.distilled_guidance},
{"slg",
{
{"layers", defaults.skip_layers},
{"layer_start", guidance.slg.layer_start},
{"layer_end", guidance.slg.layer_end},
{"scale", guidance.slg.scale},
}},
}},
{"enabled", defaults.hires_enabled},
{"upscaler", defaults.hires_upscaler},
{"scale", defaults.hires_scale},
{"target_width", defaults.hires_width},
{"target_height", defaults.hires_height},
{"steps", defaults.hires_steps},
{"denoising_strength", defaults.hires_denoising_strength},
{"upscale_tile_size", defaults.hires_upscale_tile_size},
}},
{"vae_tiling_params", make_vae_tiling_json(defaults.vae_tiling_params)},
{"cache_mode", defaults.cache_mode},
{"cache_option", defaults.cache_option},
{"scm_mask", defaults.scm_mask},
{"scm_policy_dynamic", defaults.scm_policy_dynamic},
{"output_format", "png"},
{"output_format", output_format},
{"output_compression", 100},
};
result["limits"] = {
{"min_width", 64},
{"max_width", 4096},
{"min_height", 64},
{"max_height", 4096},
{"max_batch_count", 8},
{"max_queue_size", manager.max_pending_jobs},
}
static json make_vid_gen_defaults_json(const SDGenerationParams& defaults, const std::string& output_format) {
return {
{"prompt", defaults.prompt},
{"negative_prompt", defaults.negative_prompt},
{"clip_skip", defaults.clip_skip},
{"width", defaults.width > 0 ? defaults.width : 512},
{"height", defaults.height > 0 ? defaults.height : 512},
{"strength", defaults.strength},
{"seed", defaults.seed},
{"video_frames", defaults.video_frames},
{"fps", defaults.fps},
{"moe_boundary", defaults.moe_boundary},
{"vace_strength", defaults.vace_strength},
{"sample_params", make_sample_params_json(defaults.sample_params, defaults.skip_layers)},
{"high_noise_sample_params", make_sample_params_json(defaults.high_noise_sample_params, defaults.high_noise_skip_layers)},
{"vae_tiling_params", make_vae_tiling_json(defaults.vae_tiling_params)},
{"cache_mode", defaults.cache_mode},
{"cache_option", defaults.cache_option},
{"scm_mask", defaults.scm_mask},
{"scm_policy_dynamic", defaults.scm_policy_dynamic},
{"output_format", output_format},
{"output_compression", 100},
};
result["samplers"] = samplers;
result["schedulers"] = schedulers;
result["output_formats"] = output_formats;
result["features"] = {
{"init_image", true},
{"mask_image", true},
{"control_image", true},
{"ref_images", true},
{"lora", true},
{"vae_tiling", true},
{"cache", true},
}
static json make_img_gen_features_json() {
return {
{"init_image", true},
{"mask_image", true},
{"control_image", true},
{"ref_images", true},
{"lora", true},
{"vae_tiling", true},
{"hires", true},
{"cache", true},
{"cancel_queued", true},
{"cancel_generating", false},
};
}
static json make_vid_gen_features_json() {
return {
{"init_image", true},
{"end_image", true},
{"control_frames", true},
{"high_noise_sample_params", true},
{"lora", true},
{"vae_tiling", true},
{"cache", true},
{"cancel_queued", true},
{"cancel_generating", false},
};
}
static json make_capabilities_json(ServerRuntime& runtime) {
refresh_lora_cache(runtime);
refresh_upscaler_cache(runtime);
AsyncJobManager& manager = *runtime.async_job_manager;
const auto& defaults = *runtime.default_gen_params;
const fs::path model_path = resolve_display_model_path(runtime);
const bool supports_img = runtime_supports_generation_mode(runtime, IMG_GEN);
const bool supports_vid = runtime_supports_generation_mode(runtime, VID_GEN);
json samplers = json::array();
json schedulers = json::array();
json image_output_formats = supported_img_output_formats();
json video_output_formats = supported_vid_output_formats();
json available_loras = json::array();
json available_upscalers = json::array();
json supported_modes = json::array();
for (int i = 0; i < SAMPLE_METHOD_COUNT; ++i) {
samplers.push_back(sd_sample_method_name((sample_method_t)i));
}
for (int i = 0; i < SCHEDULER_COUNT; ++i) {
schedulers.push_back(sd_scheduler_name((scheduler_t)i));
}
{
std::lock_guard<std::mutex> lock(*runtime.lora_mutex);
for (const auto& entry : *runtime.lora_cache) {
available_loras.push_back({
{"name", entry.name},
{"path", entry.path},
});
}
}
available_upscalers.push_back({
{"name", "None"},
});
available_upscalers.push_back({
{"name", "Lanczos"},
});
available_upscalers.push_back({
{"name", "Nearest"},
});
available_upscalers.push_back({
{"name", "Latent"},
});
available_upscalers.push_back({
{"name", "Latent (nearest)"},
});
available_upscalers.push_back({
{"name", "Latent (nearest-exact)"},
});
available_upscalers.push_back({
{"name", "Latent (antialiased)"},
});
available_upscalers.push_back({
{"name", "Latent (bicubic)"},
});
available_upscalers.push_back({
{"name", "Latent (bicubic antialiased)"},
});
{
std::lock_guard<std::mutex> lock(*runtime.upscaler_mutex);
for (const auto& entry : *runtime.upscaler_cache) {
available_upscalers.push_back({
{"name", entry.name},
});
}
}
if (supports_img) {
supported_modes.push_back("img_gen");
}
if (supports_vid) {
supported_modes.push_back("vid_gen");
}
std::string default_img_output_format = "png";
std::string default_vid_output_format = "avi";
if (!image_output_formats.empty()) {
default_img_output_format = image_output_formats[0].get<std::string>();
}
if (!video_output_formats.empty()) {
default_vid_output_format = video_output_formats[0].get<std::string>();
}
json defaults_by_mode = json::object();
json output_formats_by_mode = json::object();
json features_by_mode = json::object();
if (supports_img) {
defaults_by_mode["img_gen"] = make_img_gen_defaults_json(defaults, default_img_output_format);
output_formats_by_mode["img_gen"] = image_output_formats;
features_by_mode["img_gen"] = make_img_gen_features_json();
}
if (supports_vid) {
defaults_by_mode["vid_gen"] = make_vid_gen_defaults_json(defaults, default_vid_output_format);
output_formats_by_mode["vid_gen"] = video_output_formats;
features_by_mode["vid_gen"] = make_vid_gen_features_json();
}
json top_level_defaults = json::object();
json top_level_output_formats = json::array();
json top_level_features = {
{"cancel_queued", true},
{"cancel_generating", false},
};
result["loras"] = available_loras;
std::string current_mode = "";
if (supports_img) {
current_mode = "img_gen";
top_level_defaults = defaults_by_mode["img_gen"];
top_level_output_formats = output_formats_by_mode["img_gen"];
top_level_features = features_by_mode["img_gen"];
} else if (supports_vid) {
current_mode = "vid_gen";
top_level_defaults = defaults_by_mode["vid_gen"];
top_level_output_formats = output_formats_by_mode["vid_gen"];
top_level_features = features_by_mode["vid_gen"];
}
json result;
result["model"] = {
{"name", model_path.filename().u8string()},
{"stem", model_path.stem().u8string()},
{"path", model_path.u8string()},
};
result["current_mode"] = current_mode;
result["supported_modes"] = supported_modes;
result["defaults"] = top_level_defaults;
result["defaults_by_mode"] = defaults_by_mode;
result["limits"] = {
{"min_width", 64},
{"max_width", 4096},
{"min_height", 64},
{"max_height", 4096},
{"max_batch_count", 8},
{"max_queue_size", manager.max_pending_jobs},
};
result["samplers"] = samplers;
result["schedulers"] = schedulers;
result["output_formats"] = top_level_output_formats;
result["output_formats_by_mode"] = output_formats_by_mode;
result["features"] = top_level_features;
result["features_by_mode"] = features_by_mode;
result["loras"] = available_loras;
result["upscalers"] = available_upscalers;
return result;
}
@ -204,7 +358,34 @@ static bool parse_img_gen_request(const json& body,
return false;
}
// Intentionally disable prompt-embedded LoRA tag parsing for server APIs.
if (!request.gen_params.resolve_and_validate(IMG_GEN, "", true)) {
if (!request.gen_params.resolve_and_validate(IMG_GEN, "", runtime.ctx_params->hires_upscalers_dir, true)) {
error_message = "invalid generation parameters";
return false;
}
return true;
}
static bool parse_vid_gen_request(const json& body,
ServerRuntime& runtime,
VidGenJobRequest& request,
std::string& error_message) {
request.gen_params = *runtime.default_gen_params;
refresh_lora_cache(runtime);
if (!request.gen_params.from_json_str(body.dump(), [&](const std::string& path) {
return get_lora_full_path(runtime, path);
})) {
error_message = "invalid generation parameters";
return false;
}
std::string output_format = body.value("output_format", "webm");
int output_compression = body.value("output_compression", 100);
if (!assign_output_options(request, output_format, output_compression, error_message)) {
return false;
}
// Intentionally disable prompt-embedded LoRA tag parsing for server APIs.
if (!request.gen_params.resolve_and_validate(VID_GEN, "", runtime.ctx_params->hires_upscalers_dir, true)) {
error_message = "invalid generation parameters";
return false;
}
@ -226,6 +407,11 @@ void register_sdcpp_api_endpoints(httplib::Server& svr, ServerRuntime& rt) {
res.set_content(R"({"error":"empty body"})", "application/json");
return;
}
if (!runtime_supports_generation_mode(*runtime, IMG_GEN)) {
res.status = 400;
res.set_content(json({{"error", unsupported_generation_mode_error(IMG_GEN)}}).dump(), "application/json");
return;
}
json body = json::parse(req.body);
ImgGenJobRequest request;
@ -276,9 +462,66 @@ void register_sdcpp_api_endpoints(httplib::Server& svr, ServerRuntime& rt) {
}
});
svr.Post("/sdcpp/v1/vid_gen", [](const httplib::Request&, httplib::Response& res) {
res.status = 501;
res.set_content(R"({"error":"vid_gen is reserved and not implemented yet"})", "application/json");
svr.Post("/sdcpp/v1/vid_gen", [runtime](const httplib::Request& req, httplib::Response& res) {
try {
if (req.body.empty()) {
res.status = 400;
res.set_content(R"({"error":"empty body"})", "application/json");
return;
}
if (!runtime_supports_generation_mode(*runtime, VID_GEN)) {
res.status = 400;
res.set_content(json({{"error", unsupported_generation_mode_error(VID_GEN)}}).dump(), "application/json");
return;
}
json body = json::parse(req.body);
VidGenJobRequest request;
std::string error_message;
if (!parse_vid_gen_request(body, *runtime, request, error_message)) {
res.status = 400;
res.set_content(json({{"error", error_message}}).dump(), "application/json");
return;
}
AsyncJobManager& manager = *runtime->async_job_manager;
std::shared_ptr<AsyncGenerationJob> job = std::make_shared<AsyncGenerationJob>();
job->kind = AsyncJobKind::VidGen;
job->status = AsyncJobStatus::Queued;
job->created_at = unix_timestamp_now();
job->vid_gen = std::move(request);
{
std::lock_guard<std::mutex> lock(manager.mutex);
purge_expired_jobs(manager);
if (count_pending_jobs(manager) >= manager.max_pending_jobs) {
res.status = 429;
res.set_content(R"({"error":"job queue is full"})", "application/json");
return;
}
job->id = make_async_job_id(manager);
manager.jobs[job->id] = job;
manager.queue.push_back(job->id);
}
manager.cv.notify_one();
json out;
out["id"] = job->id;
out["kind"] = async_job_kind_name(job->kind);
out["status"] = async_job_status_name(job->status);
out["created"] = job->created_at;
out["poll_url"] = "/sdcpp/v1/jobs/" + job->id;
res.status = 202;
res.set_content(out.dump(), "application/json");
} catch (const json::parse_error& e) {
res.status = 400;
res.set_content(json({{"error", "invalid json"}, {"message", e.what()}}).dump(), "application/json");
} catch (const std::exception& e) {
res.status = 500;
res.set_content(json({{"error", "server_error"}, {"message", e.what()}}).dump(), "application/json");
}
});
svr.Get(R"(/sdcpp/v1/jobs/([A-Za-z0-9_\-]+))", [runtime](const httplib::Request& req, httplib::Response& res) {

View File

@ -1,6 +1,7 @@
#include "runtime.h"
#include <algorithm>
#include <cctype>
#include <chrono>
#include <cstdlib>
#include <filesystem>
@ -13,6 +14,18 @@
namespace fs = std::filesystem;
static std::string lower_ascii(std::string value) {
std::transform(value.begin(), value.end(), value.begin(), [](unsigned char c) {
return static_cast<char>(std::tolower(c));
});
return value;
}
static bool is_supported_model_ext(const fs::path& p) {
auto ext = lower_ascii(p.extension().string());
return ext == ".gguf" || ext == ".pt" || ext == ".pth" || ext == ".safetensors";
}
static const std::string k_base64_chars =
"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
"abcdefghijklmnopqrstuvwxyz"
@ -45,6 +58,44 @@ std::string normalize_output_format(std::string output_format) {
return output_format;
}
std::vector<std::string> supported_img_output_formats(bool allow_webp) {
std::vector<std::string> formats = {"png", "jpeg"};
#ifdef SD_USE_WEBP
if (allow_webp) {
formats.push_back("webp");
}
#else
(void)allow_webp;
#endif
return formats;
}
std::vector<std::string> supported_vid_output_formats() {
std::vector<std::string> formats;
#ifdef SD_USE_WEBM
formats.push_back("webm");
#endif
#ifdef SD_USE_WEBP
formats.push_back("webp");
#endif
formats.push_back("avi");
return formats;
}
static std::string valid_vid_output_formats_message() {
const std::vector<std::string> formats = supported_vid_output_formats();
std::string message = "invalid output_format, must be one of [";
for (size_t i = 0; i < formats.size(); ++i) {
if (i > 0) {
message += ", ";
}
message += formats[i];
}
message += "]";
return message;
}
bool assign_output_options(ImgGenJobRequest& request,
std::string output_format,
int output_compression,
@ -53,19 +104,88 @@ bool assign_output_options(ImgGenJobRequest& request,
request.output_format = normalize_output_format(std::move(output_format));
request.output_compression = std::clamp(output_compression, 0, 100);
const bool valid_format = request.output_format == "png" ||
request.output_format == "jpeg" ||
(allow_webp && request.output_format == "webp");
const std::vector<std::string> valid_formats = supported_img_output_formats(allow_webp);
const bool valid_format = std::find(valid_formats.begin(),
valid_formats.end(),
request.output_format) != valid_formats.end();
if (!valid_format) {
error_message = allow_webp
? "invalid output_format, must be one of [png, jpeg, webp]"
: "invalid output_format, must be one of [png, jpeg]";
error_message = "invalid output_format, must be one of [";
for (size_t i = 0; i < valid_formats.size(); ++i) {
if (i > 0) {
error_message += ", ";
}
error_message += valid_formats[i];
}
error_message += "]";
return false;
}
return true;
}
bool assign_output_options(VidGenJobRequest& request,
std::string output_format,
int output_compression,
std::string& error_message) {
request.output_format = normalize_output_format(std::move(output_format));
request.output_compression = std::clamp(output_compression, 0, 100);
if (request.output_format == "avi") {
return true;
}
if (request.output_format == "webm") {
#ifdef SD_USE_WEBM
return true;
#else
error_message = valid_vid_output_formats_message();
return false;
#endif
}
if (request.output_format == "webp") {
#ifdef SD_USE_WEBP
return true;
#else
error_message = valid_vid_output_formats_message();
return false;
#endif
}
error_message = valid_vid_output_formats_message();
return false;
}
std::string video_mime_type(const std::string& output_format) {
if (output_format == "webm") {
return "video/webm";
}
if (output_format == "webp") {
return "image/webp";
}
return "video/x-msvideo";
}
bool runtime_supports_generation_mode(const ServerRuntime& runtime, SDMode mode) {
if (mode == VID_GEN) {
return sd_ctx_supports_video_generation(runtime.sd_ctx);
}
if (mode == IMG_GEN) {
return sd_ctx_supports_image_generation(runtime.sd_ctx);
}
return true;
}
std::string unsupported_generation_mode_error(SDMode mode) {
if (mode == VID_GEN) {
return "loaded model does not support vid_gen";
}
if (mode == IMG_GEN) {
return "loaded model does not support img_gen";
}
return "loaded model does not support requested mode";
}
ArgOptions SDSvrParams::get_options() {
ArgOptions options;
@ -134,20 +254,12 @@ void refresh_lora_cache(ServerRuntime& rt) {
fs::path lora_dir = rt.ctx_params->lora_model_dir;
if (fs::exists(lora_dir) && fs::is_directory(lora_dir)) {
auto is_lora_ext = [](const fs::path& p) {
auto ext = p.extension().string();
std::transform(ext.begin(), ext.end(), ext.begin(), [](unsigned char c) {
return static_cast<char>(std::tolower(c));
});
return ext == ".gguf" || ext == ".pt" || ext == ".pth" || ext == ".safetensors";
};
for (auto& entry : fs::recursive_directory_iterator(lora_dir)) {
if (!entry.is_regular_file()) {
continue;
}
const fs::path& p = entry.path();
if (!is_lora_ext(p)) {
if (!is_supported_model_ext(p)) {
continue;
}
@ -179,6 +291,40 @@ std::string get_lora_full_path(ServerRuntime& rt, const std::string& path) {
return it != rt.lora_cache->end() ? it->fullpath : "";
}
void refresh_upscaler_cache(ServerRuntime& rt) {
std::vector<UpscalerEntry> new_cache;
fs::path upscaler_dir = rt.ctx_params->hires_upscalers_dir;
if (fs::exists(upscaler_dir) && fs::is_directory(upscaler_dir)) {
for (auto& entry : fs::directory_iterator(upscaler_dir)) {
if (!entry.is_regular_file()) {
continue;
}
const fs::path& p = entry.path();
if (!is_supported_model_ext(p)) {
continue;
}
UpscalerEntry upscaler_entry;
upscaler_entry.name = p.stem().u8string();
upscaler_entry.fullpath = fs::absolute(p).lexically_normal().u8string();
upscaler_entry.model_name = "ESRGAN_4x";
upscaler_entry.path = p.filename().u8string();
new_cache.push_back(std::move(upscaler_entry));
}
}
std::sort(new_cache.begin(), new_cache.end(), [](const UpscalerEntry& a, const UpscalerEntry& b) {
return a.name < b.name;
});
{
std::lock_guard<std::mutex> lock(*rt.upscaler_mutex);
*rt.upscaler_cache = std::move(new_cache);
}
}
int64_t unix_timestamp_now() {
return std::chrono::duration_cast<std::chrono::seconds>(
std::chrono::system_clock::now().time_since_epoch())

View File

@ -37,6 +37,14 @@ struct LoraEntry {
std::string fullpath;
};
struct UpscalerEntry {
std::string name;
std::string path;
std::string fullpath;
std::string model_name;
int scale = 4;
};
struct ServerRuntime {
sd_ctx_t* sd_ctx;
std::mutex* sd_ctx_mutex;
@ -45,6 +53,8 @@ struct ServerRuntime {
const SDGenerationParams* default_gen_params;
std::vector<LoraEntry>* lora_cache;
std::mutex* lora_mutex;
std::vector<UpscalerEntry>* upscaler_cache;
std::mutex* upscaler_mutex;
AsyncJobManager* async_job_manager;
};
@ -58,13 +68,33 @@ struct ImgGenJobRequest {
}
};
struct VidGenJobRequest {
SDGenerationParams gen_params;
std::string output_format = "webm";
int output_compression = 100;
sd_vid_gen_params_t to_sd_vid_gen_params_t() {
return gen_params.to_sd_vid_gen_params_t();
}
};
std::string base64_encode(const std::vector<uint8_t>& bytes);
std::string normalize_output_format(std::string output_format);
std::vector<std::string> supported_img_output_formats(bool allow_webp = true);
std::vector<std::string> supported_vid_output_formats();
bool assign_output_options(ImgGenJobRequest& request,
std::string output_format,
int output_compression,
bool allow_webp,
std::string& error_message);
bool assign_output_options(VidGenJobRequest& request,
std::string output_format,
int output_compression,
std::string& error_message);
std::string video_mime_type(const std::string& output_format);
bool runtime_supports_generation_mode(const ServerRuntime& runtime, SDMode mode);
std::string unsupported_generation_mode_error(SDMode mode);
void refresh_lora_cache(ServerRuntime& rt);
std::string get_lora_full_path(ServerRuntime& rt, const std::string& path);
void refresh_upscaler_cache(ServerRuntime& rt);
int64_t unix_timestamp_now();

View File

@ -1,5 +1,5 @@
for f in src/*.cpp src/*.h src/*.hpp src/tokenizers/*.h src/tokenizers/*.cpp src/tokenizers/vocab/*.h src/tokenizers/vocab/*.cpp \
examples/cli/*.cpp examples/cli/*.h examples/server/*.cpp \
src/model_io/*.h src/model_io/*.cpp examples/cli/*.cpp examples/cli/*.h examples/server/*.cpp \
examples/common/*.hpp examples/common/*.h examples/common/*.cpp; do
[[ "$f" == vocab* ]] && continue
echo "formatting '$f'"

View File

@ -203,6 +203,7 @@ typedef struct {
bool chroma_use_t5_mask;
int chroma_t5_mask_pad;
bool qwen_image_zero_cond_t;
float max_vram;
} sd_ctx_params_t;
typedef struct {
@ -289,6 +290,32 @@ typedef struct {
const char* path;
} sd_lora_t;
enum sd_hires_upscaler_t {
SD_HIRES_UPSCALER_NONE,
SD_HIRES_UPSCALER_LATENT,
SD_HIRES_UPSCALER_LATENT_NEAREST,
SD_HIRES_UPSCALER_LATENT_NEAREST_EXACT,
SD_HIRES_UPSCALER_LATENT_ANTIALIASED,
SD_HIRES_UPSCALER_LATENT_BICUBIC,
SD_HIRES_UPSCALER_LATENT_BICUBIC_ANTIALIASED,
SD_HIRES_UPSCALER_LANCZOS,
SD_HIRES_UPSCALER_NEAREST,
SD_HIRES_UPSCALER_MODEL,
SD_HIRES_UPSCALER_COUNT,
};
typedef struct {
bool enabled;
enum sd_hires_upscaler_t upscaler;
const char* model_path;
float scale;
int target_width;
int target_height;
int steps;
float denoising_strength;
int upscale_tile_size;
} sd_hires_params_t;
typedef struct {
const sd_lora_t* loras;
uint32_t lora_count;
@ -312,6 +339,7 @@ typedef struct {
sd_pm_params_t pm_params;
sd_tiling_params_t vae_tiling_params;
sd_cache_params_t cache;
sd_hires_params_t hires;
} sd_img_gen_params_t;
typedef struct {
@ -348,6 +376,8 @@ SD_API void sd_set_progress_callback(sd_progress_cb_t cb, void* data);
SD_API void sd_set_preview_callback(sd_preview_cb_t cb, enum preview_t mode, int interval, bool denoised, bool noisy, void* data);
SD_API int32_t sd_get_num_physical_cores();
SD_API const char* sd_get_system_info();
SD_API bool sd_ctx_supports_image_generation(const sd_ctx_t* sd_ctx);
SD_API bool sd_ctx_supports_video_generation(const sd_ctx_t* sd_ctx);
SD_API const char* sd_type_name(enum sd_type_t type);
SD_API enum sd_type_t str_to_sd_type(const char* str);
@ -363,8 +393,11 @@ SD_API const char* sd_preview_name(enum preview_t preview);
SD_API enum preview_t str_to_preview(const char* str);
SD_API const char* sd_lora_apply_mode_name(enum lora_apply_mode_t mode);
SD_API enum lora_apply_mode_t str_to_lora_apply_mode(const char* str);
SD_API const char* sd_hires_upscaler_name(enum sd_hires_upscaler_t upscaler);
SD_API enum sd_hires_upscaler_t str_to_sd_hires_upscaler(const char* str);
SD_API void sd_cache_params_init(sd_cache_params_t* cache_params);
SD_API void sd_hires_params_init(sd_hires_params_t* hires_params);
SD_API void sd_ctx_params_init(sd_ctx_params_t* sd_ctx_params);
SD_API char* sd_ctx_params_to_str(const sd_ctx_params_t* sd_ctx_params);

View File

@ -499,9 +499,15 @@ namespace Anima {
encoder_hidden_states = adapted_context;
}
sd::ggml_graph_cut::mark_graph_cut(x, "anima.prelude", "x");
sd::ggml_graph_cut::mark_graph_cut(embedded_timestep, "anima.prelude", "embedded_timestep");
sd::ggml_graph_cut::mark_graph_cut(temb, "anima.prelude", "temb");
sd::ggml_graph_cut::mark_graph_cut(encoder_hidden_states, "anima.prelude", "context");
for (int i = 0; i < num_layers; i++) {
auto block = std::dynamic_pointer_cast<TransformerBlock>(blocks["blocks." + std::to_string(i)]);
x = block->forward(ctx, x, encoder_hidden_states, embedded_timestep, temb, image_pe);
sd::ggml_graph_cut::mark_graph_cut(x, "anima.blocks." + std::to_string(i), "x");
}
x = final_layer->forward(ctx, x, embedded_timestep, temb); // [N, h*w, ph*pw*C]

View File

@ -328,6 +328,7 @@ public:
auto conv_out = std::dynamic_pointer_cast<Conv2d>(blocks["conv_out"]);
auto h = conv_in->forward(ctx, x); // [N, ch, h, w]
// sd::ggml_graph_cut::mark_graph_cut(h, "vae.encoder.prelude", "h");
// downsampling
size_t num_resolutions = ch_mult.size();
@ -337,12 +338,14 @@ public:
auto down_block = std::dynamic_pointer_cast<ResnetBlock>(blocks[name]);
h = down_block->forward(ctx, h);
// sd::ggml_graph_cut::mark_graph_cut(h, "vae.encoder.down." + std::to_string(i) + ".block." + std::to_string(j), "h");
}
if (i != num_resolutions - 1) {
std::string name = "down." + std::to_string(i) + ".downsample";
auto down_sample = std::dynamic_pointer_cast<DownSampleBlock>(blocks[name]);
h = down_sample->forward(ctx, h);
// sd::ggml_graph_cut::mark_graph_cut(h, "vae.encoder.down." + std::to_string(i) + ".downsample", "h");
}
}
@ -350,6 +353,7 @@ public:
h = mid_block_1->forward(ctx, h);
h = mid_attn_1->forward(ctx, h);
h = mid_block_2->forward(ctx, h); // [N, block_in, h, w]
// sd::ggml_graph_cut::mark_graph_cut(h, "vae.encoder.mid", "h");
// end
h = norm_out->forward(ctx, h);
@ -450,6 +454,7 @@ public:
// conv_in
auto h = conv_in->forward(ctx, z); // [N, block_in, h, w]
// sd::ggml_graph_cut::mark_graph_cut(h, "vae.decoder.prelude", "h");
// middle
h = mid_block_1->forward(ctx, h);
@ -457,6 +462,7 @@ public:
h = mid_attn_1->forward(ctx, h);
h = mid_block_2->forward(ctx, h); // [N, block_in, h, w]
// sd::ggml_graph_cut::mark_graph_cut(h, "vae.decoder.mid", "h");
// upsampling
int num_resolutions = static_cast<int>(ch_mult.size());
@ -466,12 +472,14 @@ public:
auto up_block = std::dynamic_pointer_cast<ResnetBlock>(blocks[name]);
h = up_block->forward(ctx, h);
// sd::ggml_graph_cut::mark_graph_cut(h, "vae.decoder.up." + std::to_string(i) + ".block." + std::to_string(j), "h");
}
if (i != 0) {
std::string name = "up." + std::to_string(i) + ".upsample";
auto up_sample = std::dynamic_pointer_cast<UpSampleBlock>(blocks[name]);
h = up_sample->forward(ctx, h);
// sd::ggml_graph_cut::mark_graph_cut(h, "vae.decoder.up." + std::to_string(i) + ".upsample", "h");
}
}
@ -599,6 +607,7 @@ public:
if (use_quant) {
auto post_quant_conv = std::dynamic_pointer_cast<Conv2d>(blocks["post_quant_conv"]);
z = post_quant_conv->forward(ctx, z); // [N, z_channels, h, w]
// sd::ggml_graph_cut::mark_graph_cut(z, "vae.decode.prelude", "z");
}
auto decoder = std::dynamic_pointer_cast<Decoder>(blocks["decoder"]);
@ -616,6 +625,7 @@ public:
if (use_quant) {
auto quant_conv = std::dynamic_pointer_cast<Conv2d>(blocks["quant_conv"]);
z = quant_conv->forward(ctx, z); // [N, 2*embed_dim, h/8, w/8]
// sd::ggml_graph_cut::mark_graph_cut(z, "vae.encode.final", "z");
}
if (sd_version_uses_flux2_vae(version)) {
z = ggml_ext_chunk(ctx->ggml_ctx, z, 2, 2)[0];

View File

@ -95,8 +95,9 @@ public:
ggml_tensor* forward(GGMLRunnerContext* ctx,
ggml_tensor* x,
ggml_tensor* mask = nullptr,
int clip_skip = -1) {
ggml_tensor* mask = nullptr,
int clip_skip = -1,
const std::string& graph_cut_prefix = "") {
// x: [N, n_token, d_model]
int layer_idx = n_layer - 1;
// LOG_DEBUG("clip_skip %d", clip_skip);
@ -112,6 +113,9 @@ public:
std::string name = "layers." + std::to_string(i);
auto layer = std::dynamic_pointer_cast<CLIPLayer>(blocks[name]);
x = layer->forward(ctx, x, mask); // [N, n_token, d_model]
if (!graph_cut_prefix.empty()) {
sd::ggml_graph_cut::mark_graph_cut(x, graph_cut_prefix + ".layers." + std::to_string(i), "x");
}
// LOG_DEBUG("layer %d", i);
}
return x;
@ -304,7 +308,8 @@ public:
auto final_layer_norm = std::dynamic_pointer_cast<LayerNorm>(blocks["final_layer_norm"]);
auto x = embeddings->forward(ctx, input_ids, tkn_embeddings); // [N, n_token, hidden_size]
x = encoder->forward(ctx, x, mask, return_pooled ? -1 : clip_skip);
sd::ggml_graph_cut::mark_graph_cut(x, "clip_text.prelude", "x");
x = encoder->forward(ctx, x, mask, return_pooled ? -1 : clip_skip, "clip_text");
if (return_pooled || with_final_ln) {
x = final_layer_norm->forward(ctx, x);
}
@ -368,7 +373,8 @@ public:
auto x = embeddings->forward(ctx, pixel_values); // [N, num_positions, embed_dim]
x = pre_layernorm->forward(ctx, x);
x = encoder->forward(ctx, x, nullptr, clip_skip);
sd::ggml_graph_cut::mark_graph_cut(x, "clip_vision.prelude", "x");
x = encoder->forward(ctx, x, nullptr, clip_skip, "clip_vision");
auto last_hidden_state = x;

View File

@ -1,7 +1,9 @@
#ifndef __COMMON_BLOCK_HPP__
#define __COMMON_BLOCK_HPP__
#include "ggml-backend.h"
#include "ggml_extend.hpp"
#include "util.h"
class DownSampleBlock : public GGMLBlock {
protected:
@ -248,9 +250,6 @@ public:
float scale = 1.f;
if (precision_fix) {
scale = 1.f / 128.f;
#ifdef SD_USE_VULKAN
force_prec_f32 = true;
#endif
}
// The purpose of the scale here is to prevent NaN issues in certain situations.
// For example, when using Vulkan without enabling force_prec_f32,
@ -264,6 +263,9 @@ public:
auto net_0 = std::dynamic_pointer_cast<UnaryBlock>(blocks["net.0"]);
auto net_2 = std::dynamic_pointer_cast<Linear>(blocks["net.2"]);
if (sd_backend_is(ctx->backend, "Vulkan")) {
net_2->set_force_prec_f32(true);
}
x = net_0->forward(ctx, x); // [ne3, ne2, ne1, inner_dim]
x = net_2->forward(ctx, x); // [ne3, ne2, ne1, dim_out]

View File

@ -85,7 +85,8 @@ public:
virtual void free_params_buffer() = 0;
virtual void get_param_tensors(std::map<std::string, ggml_tensor*>& tensors) = 0;
virtual size_t get_params_buffer_size() = 0;
virtual void set_flash_attention_enabled(bool enabled) = 0;
virtual void set_max_graph_vram_bytes(size_t max_vram_bytes) {}
virtual void set_flash_attention_enabled(bool enabled) = 0;
virtual void set_weight_adapter(const std::shared_ptr<WeightAdapter>& adapter) {}
virtual std::tuple<SDCondition, std::vector<bool>> get_learned_condition_with_trigger(int n_threads,
const ConditionerParams& conditioner_params) {
@ -165,6 +166,13 @@ struct FrozenCLIPEmbedderWithCustomWords : public Conditioner {
return buffer_size;
}
void set_max_graph_vram_bytes(size_t max_vram_bytes) override {
text_model->set_max_graph_vram_bytes(max_vram_bytes);
if (sd_version_is_sdxl(version)) {
text_model2->set_max_graph_vram_bytes(max_vram_bytes);
}
}
void set_flash_attention_enabled(bool enabled) override {
text_model->set_flash_attention_enabled(enabled);
if (sd_version_is_sdxl(version)) {
@ -781,6 +789,18 @@ struct SD3CLIPEmbedder : public Conditioner {
return buffer_size;
}
void set_max_graph_vram_bytes(size_t max_vram_bytes) override {
if (clip_l) {
clip_l->set_max_graph_vram_bytes(max_vram_bytes);
}
if (clip_g) {
clip_g->set_max_graph_vram_bytes(max_vram_bytes);
}
if (t5) {
t5->set_max_graph_vram_bytes(max_vram_bytes);
}
}
void set_flash_attention_enabled(bool enabled) override {
if (clip_l) {
clip_l->set_flash_attention_enabled(enabled);
@ -1124,6 +1144,15 @@ struct FluxCLIPEmbedder : public Conditioner {
return buffer_size;
}
void set_max_graph_vram_bytes(size_t max_vram_bytes) override {
if (clip_l) {
clip_l->set_max_graph_vram_bytes(max_vram_bytes);
}
if (t5) {
t5->set_max_graph_vram_bytes(max_vram_bytes);
}
}
void set_flash_attention_enabled(bool enabled) override {
if (clip_l) {
clip_l->set_flash_attention_enabled(enabled);
@ -1349,6 +1378,12 @@ struct T5CLIPEmbedder : public Conditioner {
return buffer_size;
}
void set_max_graph_vram_bytes(size_t max_vram_bytes) override {
if (t5) {
t5->set_max_graph_vram_bytes(max_vram_bytes);
}
}
void set_flash_attention_enabled(bool enabled) override {
if (t5) {
t5->set_flash_attention_enabled(enabled);
@ -1525,6 +1560,10 @@ struct AnimaConditioner : public Conditioner {
return llm->get_params_buffer_size();
}
void set_max_graph_vram_bytes(size_t max_vram_bytes) override {
llm->set_max_graph_vram_bytes(max_vram_bytes);
}
void set_flash_attention_enabled(bool enabled) override {
llm->set_flash_attention_enabled(enabled);
}
@ -1657,6 +1696,10 @@ struct LLMEmbedder : public Conditioner {
return buffer_size;
}
void set_max_graph_vram_bytes(size_t max_vram_bytes) override {
llm->set_max_graph_vram_bytes(max_vram_bytes);
}
void set_flash_attention_enabled(bool enabled) override {
llm->set_flash_attention_enabled(enabled);
}

138
src/convert.cpp Normal file
View File

@ -0,0 +1,138 @@
#include <cstring>
#include <mutex>
#include <regex>
#include <vector>
#include "model.h"
#include "model_io/gguf_io.h"
#include "model_io/safetensors_io.h"
#include "util.h"
#include "ggml-cpu.h"
static ggml_type get_export_tensor_type(ModelLoader& model_loader,
const TensorStorage& tensor_storage,
ggml_type type,
const TensorTypeRules& tensor_type_rules) {
const std::string& name = tensor_storage.name;
ggml_type tensor_type = tensor_storage.type;
ggml_type dst_type = type;
for (const auto& tensor_type_rule : tensor_type_rules) {
std::regex pattern(tensor_type_rule.first);
if (std::regex_search(name, pattern)) {
dst_type = tensor_type_rule.second;
break;
}
}
if (model_loader.tensor_should_be_converted(tensor_storage, dst_type)) {
tensor_type = dst_type;
}
return tensor_type;
}
static bool load_tensors_for_export(ModelLoader& model_loader,
ggml_context* ggml_ctx,
ggml_type type,
const TensorTypeRules& tensor_type_rules,
std::vector<TensorWriteInfo>& tensors) {
std::mutex tensor_mutex;
auto on_new_tensor_cb = [&](const TensorStorage& tensor_storage, ggml_tensor** dst_tensor) -> bool {
const std::string& name = tensor_storage.name;
ggml_type tensor_type = get_export_tensor_type(model_loader, tensor_storage, type, tensor_type_rules);
std::lock_guard<std::mutex> lock(tensor_mutex);
ggml_tensor* tensor = ggml_new_tensor(ggml_ctx, tensor_type, tensor_storage.n_dims, tensor_storage.ne);
if (tensor == nullptr) {
LOG_ERROR("ggml_new_tensor failed");
return false;
}
ggml_set_name(tensor, name.c_str());
if (!tensor->data) {
GGML_ASSERT(ggml_nelements(tensor) == 0);
// Avoid crashing writers by setting a dummy pointer for zero-sized tensors.
LOG_DEBUG("setting dummy pointer for zero-sized tensor %s", name.c_str());
tensor->data = ggml_get_mem_buffer(ggml_ctx);
}
TensorWriteInfo write_info;
write_info.tensor = tensor;
write_info.n_dims = tensor_storage.n_dims;
for (int i = 0; i < tensor_storage.n_dims; ++i) {
write_info.ne[i] = tensor_storage.ne[i];
}
*dst_tensor = tensor;
tensors.push_back(std::move(write_info));
return true;
};
bool success = model_loader.load_tensors(on_new_tensor_cb);
LOG_INFO("load tensors done");
return success;
}
bool convert(const char* input_path,
const char* vae_path,
const char* output_path,
sd_type_t output_type,
const char* tensor_type_rules,
bool convert_name) {
ModelLoader model_loader;
if (!model_loader.init_from_file(input_path)) {
LOG_ERROR("init model loader from file failed: '%s'", input_path);
return false;
}
if (vae_path != nullptr && strlen(vae_path) > 0) {
if (!model_loader.init_from_file(vae_path, "vae.")) {
LOG_ERROR("init model loader from file failed: '%s'", vae_path);
return false;
}
}
if (convert_name) {
model_loader.convert_tensors_name();
}
ggml_type type = (ggml_type)output_type;
bool output_is_safetensors = ends_with(output_path, ".safetensors");
TensorTypeRules type_rules = parse_tensor_type_rules(tensor_type_rules);
auto backend = ggml_backend_cpu_init();
size_t mem_size = 1 * 1024 * 1024; // for padding
mem_size += model_loader.get_tensor_storage_map().size() * ggml_tensor_overhead();
mem_size += model_loader.get_params_mem_size(backend, type);
LOG_INFO("model tensors mem size: %.2fMB", mem_size / 1024.f / 1024.f);
ggml_context* ggml_ctx = ggml_init({mem_size, nullptr, false});
if (ggml_ctx == nullptr) {
LOG_ERROR("ggml_init failed for converter");
ggml_backend_free(backend);
return false;
}
std::vector<TensorWriteInfo> tensors;
bool success = load_tensors_for_export(model_loader, ggml_ctx, type, type_rules, tensors);
ggml_backend_free(backend);
std::string error;
if (success) {
if (output_is_safetensors) {
success = write_safetensors_file(output_path, tensors, &error);
} else {
success = write_gguf_file(output_path, tensors, &error);
}
}
if (!success && !error.empty()) {
LOG_ERROR("%s", error.c_str());
}
ggml_free(ggml_ctx);
return success;
}

View File

@ -808,6 +808,18 @@ static std::tuple<float, float, float> get_ancestral_step_flow(float sigma_from,
return {sigma_down, sigma_up, alpha_scale};
}
static std::tuple<float, float, float> get_ancestral_step(float sigma_from,
float sigma_to,
float eta,
bool is_flow_denoiser) {
if (is_flow_denoiser) {
return get_ancestral_step_flow(sigma_from, sigma_to, eta);
} else {
auto [sigma_down, sigma_up] = get_ancestral_step(sigma_from, sigma_to, eta);
return {sigma_down, sigma_up, 1.0f};
}
}
static sd::Tensor<float> sample_euler_ancestral(denoise_cb_t model,
sd::Tensor<float> x,
const std::vector<float>& sigmas,
@ -970,6 +982,100 @@ static sd::Tensor<float> sample_dpmpp_2s_ancestral(denoise_cb_t model,
return x;
}
static sd::Tensor<float> sample_dpmpp_2s_ancestral_flow(denoise_cb_t model,
sd::Tensor<float> x,
const std::vector<float>& sigmas,
std::shared_ptr<RNG> rng,
float eta = 1.0f) {
int steps = static_cast<int>(sigmas.size()) - 1;
for (int i = 0; i < steps; i++) {
float sigma = sigmas[i];
float sigma_to = sigmas[i + 1];
bool opt_first_step = (1.0 - sigma < 1e-6);
auto denoised_opt = model(x, sigma, (opt_first_step ? 1 : -1) * (i + 1));
if (denoised_opt.empty()) {
return {};
}
sd::Tensor<float> denoised = std::move(denoised_opt);
if (sigma_to == 0.0f) {
// Euler method (final step, no noise)
// sigma_to == 0 --> sigma_down = 0, so:
// x + d * (sigma_down - sigma)
// = x + ((x - denoised) / sigma) * (sigma_down - sigma)
// = x + ((x - denoised) / sigma) * ( 0 - sigma)
// = x + ((x - denoised) ) * -1
// = x -x + denoised
x = denoised;
} else {
auto [sigma_down, sigma_up, alpha_scale] = get_ancestral_step_flow(sigma, sigma_to, eta);
sd::Tensor<float> D_i;
if (opt_first_step) {
// the reformulated exp_s calc already accounts for this, but we can avoid
// a redundant model call for the typical sigma 1 at the first step:
// exp_s = sqrt((1-sigma)/sigma * (1-sigma_down)/sigma_down)
// = sqrt((1- 1)/ 1 * (1-sigma_down)/sigma_down)
// = 0
// so sigma_s = 1 = sigma, and sigma_s_i_ratio = sigma_s / sigma = 1
// u = (x*sigma_s_i_ratio)+(denoised*(1.0f-sigma_s_i_ratio))
// = (x*1)+(denoised*0) = x
// so D_i = model(u, sigma_s, i + 1)
// = model(x, sigma, i + 1)
// = denoised
D_i = denoised;
} else {
float sigma_s;
// ref implementation would be:
// auto lambda_fn = [](float sigma) -> float {
// return std::log((1.0f - sigma) / sigma); };
// auto sigma_fn = [](float lbda) -> float {
// return 1.0f / (std::exp(lbda) + 1.0f); };
// t_i = lambda_fn(sigma);
// t_down = lambda_fn(sigma_down);
// float r = 0.5f;
// h = t_down - t_i;
// s = t_i + r * h;
// sigma_s = sigma_fn(s);
// assuming r is constant, we sidestep the singularity at sigma -> 1 by:
// s = 0.5 * (lambda_fn(sigma) + lambda_fn(sigma_down))
// = 0.5 * (log((1-sigma)/sigma) + log((1-sigma_down)/sigma_down))
// = 0.5 * log(((1-sigma)/sigma) * ((1-sigma_down)/sigma_down))
// = log(sqrt (((1-sigma)/sigma) * ((1-sigma_down)/sigma_down)))
// so exp(s) = sqrt((1-sigma)/sigma * (1-sigma_down)/sigma_down)
// and sigma_s = sigma_fn(s) = 1.0f / (exp(s) + 1.0f)
float exp_s = std::sqrt(((1 - sigma) / sigma) * ((1 - sigma_down) / sigma_down));
sigma_s = 1.0f / (exp_s + 1.0f);
float sigma_s_i_ratio = sigma_s / sigma;
sd::Tensor<float> u = (x * sigma_s_i_ratio) + (denoised * (1.0f - sigma_s_i_ratio));
auto denoised2_opt = model(u, sigma_s, i + 1);
if (denoised2_opt.empty()) {
return {};
}
D_i = std::move(denoised2_opt);
}
float sigma_down_i_ratio = sigma_down / sigma;
x = (x * sigma_down_i_ratio) + (D_i * (1.0f - sigma_down_i_ratio));
if (sigma_to > 0.0f && eta > 0.0f) {
x = alpha_scale * x + sd::Tensor<float>::randn_like(x, rng) * sigma_up;
}
}
}
return x;
}
static sd::Tensor<float> sample_dpmpp_2m(denoise_cb_t model,
sd::Tensor<float> x,
const std::vector<float>& sigmas) {
@ -1041,7 +1147,8 @@ static sd::Tensor<float> sample_dpmpp_2m_v2(denoise_cb_t model,
static sd::Tensor<float> sample_lcm(denoise_cb_t model,
sd::Tensor<float> x,
const std::vector<float>& sigmas,
std::shared_ptr<RNG> rng) {
std::shared_ptr<RNG> rng,
bool is_flow_denoiser) {
int steps = static_cast<int>(sigmas.size()) - 1;
for (int i = 0; i < steps; i++) {
auto denoised_opt = model(x, sigmas[i], i + 1);
@ -1050,6 +1157,9 @@ static sd::Tensor<float> sample_lcm(denoise_cb_t model,
}
x = std::move(denoised_opt);
if (sigmas[i + 1] > 0) {
if (is_flow_denoiser) {
x *= (1 - sigmas[i + 1]);
}
x += sd::Tensor<float>::randn_like(x, rng) * sigmas[i + 1];
}
}
@ -1149,6 +1259,7 @@ static sd::Tensor<float> sample_res_multistep(denoise_cb_t model,
sd::Tensor<float> x,
const std::vector<float>& sigmas,
std::shared_ptr<RNG> rng,
bool is_flow_denoiser,
float eta) {
sd::Tensor<float> old_denoised = x;
bool have_old_sigma = false;
@ -1180,7 +1291,8 @@ static sd::Tensor<float> sample_res_multistep(denoise_cb_t model,
float sigma_from = sigmas[i];
float sigma_to = sigmas[i + 1];
auto [sigma_down, sigma_up] = get_ancestral_step(sigma_from, sigma_to, eta);
auto [sigma_down, sigma_up, alpha_scale] = get_ancestral_step(sigma_from, sigma_to, eta, is_flow_denoiser);
if (sigma_down == 0.0f || !have_old_sigma) {
x += ((x - denoised) / sigma_from) * (sigma_down - sigma_from);
@ -1207,7 +1319,10 @@ static sd::Tensor<float> sample_res_multistep(denoise_cb_t model,
x = sigma_fn(h) * x + h * (b1 * denoised + b2 * old_denoised);
}
if (sigmas[i + 1] > 0 && sigma_up > 0.0f) {
if (sigma_to > 0.0f && sigma_up > 0.0f) {
if (is_flow_denoiser) {
x *= alpha_scale;
}
x += sd::Tensor<float>::randn_like(x, rng) * sigma_up;
}
@ -1222,6 +1337,7 @@ static sd::Tensor<float> sample_res_2s(denoise_cb_t model,
sd::Tensor<float> x,
const std::vector<float>& sigmas,
std::shared_ptr<RNG> rng,
bool is_flow_denoiser,
float eta) {
const float c2 = 0.5f;
auto t_fn = [](float sigma) -> float { return -logf(sigma); };
@ -1250,7 +1366,7 @@ static sd::Tensor<float> sample_res_2s(denoise_cb_t model,
}
sd::Tensor<float> denoised = std::move(denoised_opt);
auto [sigma_down, sigma_up] = get_ancestral_step(sigma_from, sigma_to, eta);
auto [sigma_down, sigma_up, alpha_scale] = get_ancestral_step(sigma_from, sigma_to, eta, is_flow_denoiser);
sd::Tensor<float> x0 = x;
if (sigma_down == 0.0f || sigma_from == 0.0f) {
@ -1279,7 +1395,10 @@ static sd::Tensor<float> sample_res_2s(denoise_cb_t model,
x = x0 + h * (b1 * eps1 + b2 * eps2);
}
if (sigmas[i + 1] > 0 && sigma_up > 0.0f) {
if (sigma_to > 0.0f && sigma_up > 0.0f) {
if (is_flow_denoiser) {
x *= alpha_scale;
}
x += sd::Tensor<float>::randn_like(x, rng) * sigma_up;
}
}
@ -1425,32 +1544,10 @@ static sd::Tensor<float> sample_ddim_trailing(denoise_cb_t model,
const std::vector<float>& sigmas,
std::shared_ptr<RNG> rng,
float eta) {
float beta_start = 0.00085f;
float beta_end = 0.0120f;
std::vector<double> alphas_cumprod(TIMESTEPS);
std::vector<double> compvis_sigmas(TIMESTEPS);
for (int i = 0; i < TIMESTEPS; i++) {
alphas_cumprod[i] =
(i == 0 ? 1.0f : alphas_cumprod[i - 1]) *
(1.0f -
std::pow(sqrtf(beta_start) +
(sqrtf(beta_end) - sqrtf(beta_start)) *
((float)i / (TIMESTEPS - 1)),
2));
compvis_sigmas[i] =
std::sqrt((1 - alphas_cumprod[i]) / alphas_cumprod[i]);
}
int steps = static_cast<int>(sigmas.size()) - 1;
for (int i = 0; i < steps; i++) {
int timestep = static_cast<int>(roundf(TIMESTEPS - i * ((float)TIMESTEPS / steps))) - 1;
int prev_timestep = timestep - TIMESTEPS / steps;
float sigma = static_cast<float>(compvis_sigmas[timestep]);
if (i == 0) {
x *= std::sqrt(sigma * sigma + 1) / sigma;
} else {
x *= std::sqrt(sigma * sigma + 1);
}
float sigma = sigmas[i];
float sigma_to = sigmas[i + 1];
auto model_output_opt = model(x, sigma, i + 1);
if (model_output_opt.empty()) {
@ -1459,8 +1556,8 @@ static sd::Tensor<float> sample_ddim_trailing(denoise_cb_t model,
sd::Tensor<float> model_output = std::move(model_output_opt);
model_output = (x - model_output) * (1.0f / sigma);
float alpha_prod_t = static_cast<float>(alphas_cumprod[timestep]);
float alpha_prod_t_prev = static_cast<float>(prev_timestep >= 0 ? alphas_cumprod[prev_timestep] : alphas_cumprod[0]);
float alpha_prod_t = 1.0f / (sigma * sigma + 1.0f);
float alpha_prod_t_prev = 1.0f / (sigma_to * sigma_to + 1.0f);
float beta_prod_t = 1.0f - alpha_prod_t;
sd::Tensor<float> pred_original_sample = ((x / std::sqrt(sigma * sigma + 1)) -
@ -1472,11 +1569,11 @@ static sd::Tensor<float> sample_ddim_trailing(denoise_cb_t model,
(1.0f - alpha_prod_t / alpha_prod_t_prev);
float std_dev_t = eta * std::sqrt(variance);
x = std::sqrt(alpha_prod_t_prev) * pred_original_sample +
std::sqrt(1.0f - alpha_prod_t_prev - std::pow(std_dev_t, 2)) * model_output;
x = pred_original_sample +
std::sqrt((1.0f - alpha_prod_t_prev - std::pow(std_dev_t, 2)) / alpha_prod_t_prev) * model_output;
if (eta > 0) {
x += std_dev_t * sd::Tensor<float>::randn_like(x, rng);
x += std_dev_t / std::sqrt(alpha_prod_t_prev) * sd::Tensor<float>::randn_like(x, rng);
}
}
return x;
@ -1503,19 +1600,26 @@ static sd::Tensor<float> sample_tcd(denoise_cb_t model,
std::sqrt((1 - alphas_cumprod[i]) / alphas_cumprod[i]);
}
int original_steps = 50;
int steps = static_cast<int>(sigmas.size()) - 1;
for (int i = 0; i < steps; i++) {
int timestep = TIMESTEPS - 1 - (TIMESTEPS / original_steps) * (int)floor(i * ((float)original_steps / steps));
int prev_timestep = i >= steps - 1 ? 0 : TIMESTEPS - 1 - (TIMESTEPS / original_steps) * (int)floor((i + 1) * ((float)original_steps / steps));
int timestep_s = (int)floor((1 - eta) * prev_timestep);
float sigma = static_cast<float>(compvis_sigmas[timestep]);
if (i == 0) {
x *= std::sqrt(sigma * sigma + 1) / sigma;
} else {
x *= std::sqrt(sigma * sigma + 1);
auto get_timestep_from_sigma = [&](float s) -> int {
auto it = std::lower_bound(compvis_sigmas.begin(), compvis_sigmas.end(), s);
if (it == compvis_sigmas.begin())
return 0;
if (it == compvis_sigmas.end())
return TIMESTEPS - 1;
int idx_high = static_cast<int>(std::distance(compvis_sigmas.begin(), it));
int idx_low = idx_high - 1;
if (std::abs(compvis_sigmas[idx_high] - s) < std::abs(compvis_sigmas[idx_low] - s)) {
return idx_high;
}
return idx_low;
};
int steps = static_cast<int>(sigmas.size()) - 1;
for (int i = 0; i < steps; i++) {
float sigma_to = sigmas[i + 1];
int prev_timestep = get_timestep_from_sigma(sigma_to);
int timestep_s = (int)floor((1 - eta) * prev_timestep);
float sigma = sigmas[i];
auto model_output_opt = model(x, sigma, i + 1);
if (model_output_opt.empty()) {
@ -1524,9 +1628,9 @@ static sd::Tensor<float> sample_tcd(denoise_cb_t model,
sd::Tensor<float> model_output = std::move(model_output_opt);
model_output = (x - model_output) * (1.0f / sigma);
float alpha_prod_t = static_cast<float>(alphas_cumprod[timestep]);
float alpha_prod_t = 1.0f / (sigma * sigma + 1.0f);
float beta_prod_t = 1.0f - alpha_prod_t;
float alpha_prod_t_prev = static_cast<float>(prev_timestep >= 0 ? alphas_cumprod[prev_timestep] : alphas_cumprod[0]);
float alpha_prod_t_prev = 1.0f / (sigma_to * sigma_to + 1.0f);
float alpha_prod_s = static_cast<float>(alphas_cumprod[timestep_s]);
float beta_prod_s = 1.0f - alpha_prod_s;
@ -1534,12 +1638,12 @@ static sd::Tensor<float> sample_tcd(denoise_cb_t model,
std::sqrt(beta_prod_t) * model_output) *
(1.0f / std::sqrt(alpha_prod_t));
x = std::sqrt(alpha_prod_s) * pred_original_sample +
std::sqrt(beta_prod_s) * model_output;
x = std::sqrt(alpha_prod_s / alpha_prod_t_prev) * pred_original_sample +
std::sqrt(beta_prod_s / alpha_prod_t_prev) * model_output;
if (eta > 0 && i != steps - 1) {
if (eta > 0 && sigma_to > 0.0f) {
x = std::sqrt(alpha_prod_t_prev / alpha_prod_s) * x +
std::sqrt(1.0f - alpha_prod_t_prev / alpha_prod_s) * sd::Tensor<float>::randn_like(x, rng);
std::sqrt(1.0f / alpha_prod_t_prev - 1.0f / alpha_prod_s) * sd::Tensor<float>::randn_like(x, rng);
}
}
return x;
@ -1566,21 +1670,24 @@ static sd::Tensor<float> sample_k_diffusion(sample_method_t method,
case DPM2_SAMPLE_METHOD:
return sample_dpm2(model, std::move(x), sigmas);
case DPMPP2S_A_SAMPLE_METHOD:
return sample_dpmpp_2s_ancestral(model, std::move(x), sigmas, rng, eta);
if (is_flow_denoiser)
return sample_dpmpp_2s_ancestral_flow(model, std::move(x), sigmas, rng, eta);
else
return sample_dpmpp_2s_ancestral(model, std::move(x), sigmas, rng, eta);
case DPMPP2M_SAMPLE_METHOD:
return sample_dpmpp_2m(model, std::move(x), sigmas);
case DPMPP2Mv2_SAMPLE_METHOD:
return sample_dpmpp_2m_v2(model, std::move(x), sigmas);
case LCM_SAMPLE_METHOD:
return sample_lcm(model, std::move(x), sigmas, rng);
return sample_lcm(model, std::move(x), sigmas, rng, is_flow_denoiser);
case IPNDM_SAMPLE_METHOD:
return sample_ipndm(model, std::move(x), sigmas);
case IPNDM_V_SAMPLE_METHOD:
return sample_ipndm_v(model, std::move(x), sigmas);
case RES_MULTISTEP_SAMPLE_METHOD:
return sample_res_multistep(model, std::move(x), sigmas, rng, eta);
return sample_res_multistep(model, std::move(x), sigmas, rng, is_flow_denoiser, eta);
case RES_2S_SAMPLE_METHOD:
return sample_res_2s(model, std::move(x), sigmas, rng, eta);
return sample_res_2s(model, std::move(x), sigmas, rng, is_flow_denoiser, eta);
case ER_SDE_SAMPLE_METHOD:
return sample_er_sde(model, std::move(x), sigmas, rng, is_flow_denoiser, eta);
case DDIM_TRAILING_SAMPLE_METHOD:

View File

@ -49,6 +49,7 @@ struct DiffusionModel {
virtual void set_weight_adapter(const std::shared_ptr<WeightAdapter>& adapter){};
virtual int64_t get_adm_in_channels() = 0;
virtual void set_flash_attention_enabled(bool enabled) = 0;
virtual void set_max_graph_vram_bytes(size_t max_vram_bytes) = 0;
virtual void set_circular_axes(bool circular_x, bool circular_y) = 0;
};
@ -98,6 +99,10 @@ struct UNetModel : public DiffusionModel {
unet.set_flash_attention_enabled(enabled);
}
void set_max_graph_vram_bytes(size_t max_vram_bytes) override {
unet.set_max_graph_vram_bytes(max_vram_bytes);
}
void set_circular_axes(bool circular_x, bool circular_y) override {
unet.set_circular_axes(circular_x, circular_y);
}
@ -164,6 +169,10 @@ struct MMDiTModel : public DiffusionModel {
mmdit.set_flash_attention_enabled(enabled);
}
void set_max_graph_vram_bytes(size_t max_vram_bytes) override {
mmdit.set_max_graph_vram_bytes(max_vram_bytes);
}
void set_circular_axes(bool circular_x, bool circular_y) override {
mmdit.set_circular_axes(circular_x, circular_y);
}
@ -229,6 +238,10 @@ struct FluxModel : public DiffusionModel {
flux.set_flash_attention_enabled(enabled);
}
void set_max_graph_vram_bytes(size_t max_vram_bytes) override {
flux.set_max_graph_vram_bytes(max_vram_bytes);
}
void set_circular_axes(bool circular_x, bool circular_y) override {
flux.set_circular_axes(circular_x, circular_y);
}
@ -299,6 +312,10 @@ struct AnimaModel : public DiffusionModel {
anima.set_flash_attention_enabled(enabled);
}
void set_max_graph_vram_bytes(size_t max_vram_bytes) override {
anima.set_max_graph_vram_bytes(max_vram_bytes);
}
void set_circular_axes(bool circular_x, bool circular_y) override {
anima.set_circular_axes(circular_x, circular_y);
}
@ -364,6 +381,10 @@ struct WanModel : public DiffusionModel {
wan.set_flash_attention_enabled(enabled);
}
void set_max_graph_vram_bytes(size_t max_vram_bytes) override {
wan.set_max_graph_vram_bytes(max_vram_bytes);
}
void set_circular_axes(bool circular_x, bool circular_y) override {
wan.set_circular_axes(circular_x, circular_y);
}
@ -433,6 +454,10 @@ struct QwenImageModel : public DiffusionModel {
qwen_image.set_flash_attention_enabled(enabled);
}
void set_max_graph_vram_bytes(size_t max_vram_bytes) override {
qwen_image.set_max_graph_vram_bytes(max_vram_bytes);
}
void set_circular_axes(bool circular_x, bool circular_y) override {
qwen_image.set_circular_axes(circular_x, circular_y);
}
@ -499,6 +524,10 @@ struct ZImageModel : public DiffusionModel {
z_image.set_flash_attention_enabled(enabled);
}
void set_max_graph_vram_bytes(size_t max_vram_bytes) override {
z_image.set_max_graph_vram_bytes(max_vram_bytes);
}
void set_circular_axes(bool circular_x, bool circular_y) override {
z_image.set_circular_axes(circular_x, circular_y);
}
@ -564,6 +593,10 @@ struct ErnieImageModel : public DiffusionModel {
ernie_image.set_flash_attention_enabled(enabled);
}
void set_max_graph_vram_bytes(size_t max_vram_bytes) override {
ernie_image.set_max_graph_vram_bytes(max_vram_bytes);
}
void set_circular_axes(bool circular_x, bool circular_y) override {
ernie_image.set_circular_axes(circular_x, circular_y);
}

View File

@ -295,7 +295,9 @@ namespace ErnieImage {
auto c = time_embedding->forward(ctx, sample); // [N, hidden_size]
auto mod_params = adaLN_mod->forward(ctx, ggml_silu(ctx->ggml_ctx, c)); // [N, 6 * hidden_size]
auto chunks = ggml_ext_chunk(ctx->ggml_ctx, mod_params, 6, 0);
sd::ggml_graph_cut::mark_graph_cut(hidden_states, "ernie_image.prelude", "hidden_states");
// sd::ggml_graph_cut::mark_graph_cut(mod_params, "ernie_image.prelude", "mod_params");
auto chunks = ggml_ext_chunk(ctx->ggml_ctx, mod_params, 6, 0);
std::vector<ggml_tensor*> temb;
temb.reserve(6);
for (auto chunk : chunks) {
@ -305,6 +307,7 @@ namespace ErnieImage {
for (int i = 0; i < params.num_layers; i++) {
auto layer = std::dynamic_pointer_cast<ErnieImageSharedAdaLNBlock>(blocks["layers." + std::to_string(i)]);
hidden_states = layer->forward(ctx, hidden_states, pe, temb);
sd::ggml_graph_cut::mark_graph_cut(hidden_states, "ernie_image.layers." + std::to_string(i), "hidden_states");
}
hidden_states = final_norm->forward(ctx, hidden_states, c);

View File

@ -124,27 +124,33 @@ public:
auto conv_hr = std::dynamic_pointer_cast<Conv2d>(blocks["conv_hr"]);
auto conv_last = std::dynamic_pointer_cast<Conv2d>(blocks["conv_last"]);
auto feat = conv_first->forward(ctx, x);
auto feat = conv_first->forward(ctx, x);
sd::ggml_graph_cut::mark_graph_cut(feat, "esrgan.prelude", "feat");
auto body_feat = feat;
for (int i = 0; i < num_block; i++) {
std::string name = "body." + std::to_string(i);
auto block = std::dynamic_pointer_cast<RRDB>(blocks[name]);
body_feat = block->forward(ctx, body_feat);
sd::ggml_graph_cut::mark_graph_cut(body_feat, "esrgan.body." + std::to_string(i), "feat");
}
body_feat = conv_body->forward(ctx, body_feat);
feat = ggml_add(ctx->ggml_ctx, feat, body_feat);
sd::ggml_graph_cut::mark_graph_cut(feat, "esrgan.body.out", "feat");
// upsample
if (scale >= 2) {
auto conv_up1 = std::dynamic_pointer_cast<Conv2d>(blocks["conv_up1"]);
feat = lrelu(ctx, conv_up1->forward(ctx, ggml_upscale(ctx->ggml_ctx, feat, 2, GGML_SCALE_MODE_NEAREST)));
sd::ggml_graph_cut::mark_graph_cut(feat, "esrgan.up1", "feat");
if (scale == 4) {
auto conv_up2 = std::dynamic_pointer_cast<Conv2d>(blocks["conv_up2"]);
feat = lrelu(ctx, conv_up2->forward(ctx, ggml_upscale(ctx->ggml_ctx, feat, 2, GGML_SCALE_MODE_NEAREST)));
sd::ggml_graph_cut::mark_graph_cut(feat, "esrgan.up2", "feat");
}
}
// for all scales
auto out = conv_last->forward(ctx, lrelu(ctx, conv_hr->forward(ctx, feat)));
sd::ggml_graph_cut::mark_graph_cut(out, "esrgan.final", "out");
return out;
}
};

View File

@ -928,6 +928,9 @@ namespace Flux {
}
txt = txt_in->forward(ctx, txt);
sd::ggml_graph_cut::mark_graph_cut(img, "flux.prelude", "img");
sd::ggml_graph_cut::mark_graph_cut(txt, "flux.prelude", "txt");
sd::ggml_graph_cut::mark_graph_cut(vec, "flux.prelude", "vec");
for (int i = 0; i < params.depth; i++) {
if (skip_layers.size() > 0 && std::find(skip_layers.begin(), skip_layers.end(), i) != skip_layers.end()) {
@ -939,6 +942,8 @@ namespace Flux {
auto img_txt = block->forward(ctx, img, txt, vec, pe, txt_img_mask, ds_img_mods, ds_txt_mods);
img = img_txt.first; // [N, n_img_token, hidden_size]
txt = img_txt.second; // [N, n_txt_token, hidden_size]
sd::ggml_graph_cut::mark_graph_cut(img, "flux.double_blocks." + std::to_string(i), "img");
sd::ggml_graph_cut::mark_graph_cut(txt, "flux.double_blocks." + std::to_string(i), "txt");
}
auto txt_img = ggml_concat(ctx->ggml_ctx, txt, img, 1); // [N, n_txt_token + n_img_token, hidden_size]
@ -949,6 +954,7 @@ namespace Flux {
auto block = std::dynamic_pointer_cast<SingleStreamBlock>(blocks["single_blocks." + std::to_string(i)]);
txt_img = block->forward(ctx, txt_img, vec, pe, txt_img_mask, ss_mods);
sd::ggml_graph_cut::mark_graph_cut(txt_img, "flux.single_blocks." + std::to_string(i), "txt_img");
}
img = ggml_view_3d(ctx->ggml_ctx,

File diff suppressed because it is too large Load Diff

298
src/ggml_extend_backend.hpp Normal file
View File

@ -0,0 +1,298 @@
#ifndef __GGML_EXTEND_BACKEND_HPP__
#define __GGML_EXTEND_BACKEND_HPP__
#include <cstring>
#include <mutex>
#include "ggml-backend.h"
#include "ggml.h"
#ifndef __STATIC_INLINE__
#define __STATIC_INLINE__ static inline
#endif
inline void ggml_backend_load_all_once() {
// If the registry already has devices and the CPU backend is present,
// assume either static registration or explicit host-side preloading has
// completed and avoid rescanning the default paths.
if (ggml_backend_dev_count() > 0 && ggml_backend_reg_by_name("CPU") != nullptr) {
return;
}
// In dynamic-backend mode the backend modules are discovered at runtime,
// so we must load them before asking for the CPU backend or its proc table.
// If the host preloaded only a subset of backends, allow one default-path
// scan so missing modules can still be discovered.
static std::once_flag once;
std::call_once(once, []() {
if (ggml_backend_dev_count() > 0 && ggml_backend_reg_by_name("CPU") != nullptr) {
return;
}
ggml_backend_load_all();
});
}
// Do not gate this branch on GGML_CPU or GGML_CPU_ALL_VARIANTS:
// those are CMake options used to configure ggml itself, but they are not
// exported as PUBLIC compile definitions to stable-diffusion in backend-DL mode.
// In practice, this target can reliably see GGML_BACKEND_DL, but not whether
// the CPU backend was compiled as a loadable module. We therefore use runtime
// backend discovery instead of compile-time assumptions.
__STATIC_INLINE__ ggml_backend_reg_t ggml_backend_cpu_reg() {
ggml_backend_reg_t reg = ggml_backend_reg_by_name("CPU");
if (reg != nullptr) {
return reg;
}
ggml_backend_load_all_once();
return ggml_backend_reg_by_name("CPU");
}
__STATIC_INLINE__ ggml_backend_reg_t ggml_backend_reg_from_backend(ggml_backend_t backend) {
if (backend != nullptr) {
ggml_backend_dev_t device = ggml_backend_get_device(backend);
if (device != nullptr) {
return ggml_backend_dev_backend_reg(device);
}
}
return ggml_backend_cpu_reg();
}
__STATIC_INLINE__ ggml_backend_t ggml_backend_cpu_init() {
ggml_backend_t backend = ggml_backend_init_by_type(GGML_BACKEND_DEVICE_TYPE_CPU, nullptr);
if (backend != nullptr) {
return backend;
}
ggml_backend_load_all_once();
return ggml_backend_init_by_type(GGML_BACKEND_DEVICE_TYPE_CPU, nullptr);
}
__STATIC_INLINE__ bool ggml_backend_is_cpu(ggml_backend_t backend) {
if (backend == nullptr) {
return false;
}
ggml_backend_dev_t device = ggml_backend_get_device(backend);
if (device != nullptr) {
return ggml_backend_dev_type(device) == GGML_BACKEND_DEVICE_TYPE_CPU;
}
const char* backend_name = ggml_backend_name(backend);
return backend_name != nullptr && std::strcmp(backend_name, "CPU") == 0;
}
__STATIC_INLINE__ void ggml_backend_cpu_set_n_threads(ggml_backend_t backend_cpu, int n_threads) {
ggml_backend_reg_t reg = ggml_backend_reg_from_backend(backend_cpu);
if (reg == nullptr) {
return;
}
auto fn = reinterpret_cast<ggml_backend_set_n_threads_t>(ggml_backend_reg_get_proc_address(reg, "ggml_backend_set_n_threads"));
if (fn != nullptr) {
fn(backend_cpu, n_threads);
}
}
using __ggml_backend_cpu_set_threadpool_t = void (*)(ggml_backend_t backend_cpu, ggml_threadpool_t threadpool);
__STATIC_INLINE__ void ggml_backend_cpu_set_threadpool(ggml_backend_t backend_cpu, ggml_threadpool_t threadpool) {
ggml_backend_reg_t reg = ggml_backend_reg_from_backend(backend_cpu);
if (reg == nullptr) {
return;
}
auto fn = reinterpret_cast<__ggml_backend_cpu_set_threadpool_t>(ggml_backend_reg_get_proc_address(reg, "ggml_backend_cpu_set_threadpool"));
if (fn != nullptr) {
fn(backend_cpu, threadpool);
}
}
__STATIC_INLINE__ void ggml_backend_cpu_set_abort_callback(ggml_backend_t backend_cpu, ggml_abort_callback abort_callback, void* abort_callback_data) {
ggml_backend_reg_t reg = ggml_backend_reg_from_backend(backend_cpu);
if (reg == nullptr) {
return;
}
auto fn = reinterpret_cast<ggml_backend_set_abort_callback_t>(ggml_backend_reg_get_proc_address(reg, "ggml_backend_set_abort_callback"));
if (fn != nullptr) {
fn(backend_cpu, abort_callback, abort_callback_data);
}
}
__STATIC_INLINE__ ggml_backend_buffer_t ggml_backend_tensor_buffer(const struct ggml_tensor* tensor) {
if (tensor == nullptr) {
return nullptr;
}
return tensor->view_src ? tensor->view_src->buffer : tensor->buffer;
}
__STATIC_INLINE__ bool ggml_backend_tensor_is_host_accessible(const struct ggml_tensor* tensor) {
if (tensor == nullptr || tensor->data == nullptr) {
return false;
}
ggml_backend_buffer_t buffer = ggml_backend_tensor_buffer(tensor);
return buffer == nullptr || ggml_backend_buffer_is_host(buffer);
}
__STATIC_INLINE__ size_t ggml_backend_tensor_offset(const struct ggml_tensor* tensor, int64_t i0, int64_t i1, int64_t i2, int64_t i3) {
return (size_t)(i0 * tensor->nb[0] + i1 * tensor->nb[1] + i2 * tensor->nb[2] + i3 * tensor->nb[3]);
}
template <typename T>
__STATIC_INLINE__ void ggml_backend_tensor_write_scalar(const struct ggml_tensor* tensor, int64_t i0, int64_t i1, int64_t i2, int64_t i3, T value) {
const size_t offset = ggml_backend_tensor_offset(tensor, i0, i1, i2, i3);
if (ggml_backend_tensor_is_host_accessible(tensor)) {
auto* dst = reinterpret_cast<T*>(reinterpret_cast<char*>(tensor->data) + offset);
*dst = value;
return;
}
ggml_backend_tensor_set(const_cast<struct ggml_tensor*>(tensor), &value, offset, sizeof(T));
}
__STATIC_INLINE__ void ggml_set_f32_nd(const struct ggml_tensor* tensor, int64_t i0, int64_t i1, int64_t i2, int64_t i3, float value) {
switch (tensor->type) {
case GGML_TYPE_I8:
ggml_backend_tensor_write_scalar(tensor, i0, i1, i2, i3, static_cast<int8_t>(value));
break;
case GGML_TYPE_I16:
ggml_backend_tensor_write_scalar(tensor, i0, i1, i2, i3, static_cast<int16_t>(value));
break;
case GGML_TYPE_I32:
ggml_backend_tensor_write_scalar(tensor, i0, i1, i2, i3, static_cast<int32_t>(value));
break;
case GGML_TYPE_F16:
ggml_backend_tensor_write_scalar(tensor, i0, i1, i2, i3, ggml_fp32_to_fp16(value));
break;
case GGML_TYPE_BF16:
ggml_backend_tensor_write_scalar(tensor, i0, i1, i2, i3, ggml_fp32_to_bf16(value));
break;
case GGML_TYPE_F32:
ggml_backend_tensor_write_scalar(tensor, i0, i1, i2, i3, value);
break;
default:
GGML_ABORT("fatal error");
}
}
__STATIC_INLINE__ void ggml_set_f32_1d(const struct ggml_tensor* tensor, int i, float value) {
if (!ggml_is_contiguous(tensor)) {
int64_t id[4] = {0, 0, 0, 0};
ggml_unravel_index(tensor, i, &id[0], &id[1], &id[2], &id[3]);
ggml_set_f32_nd(tensor, id[0], id[1], id[2], id[3], value);
return;
}
switch (tensor->type) {
case GGML_TYPE_I8:
ggml_backend_tensor_write_scalar(tensor, i, 0, 0, 0, static_cast<int8_t>(value));
break;
case GGML_TYPE_I16:
ggml_backend_tensor_write_scalar(tensor, i, 0, 0, 0, static_cast<int16_t>(value));
break;
case GGML_TYPE_I32:
ggml_backend_tensor_write_scalar(tensor, i, 0, 0, 0, static_cast<int32_t>(value));
break;
case GGML_TYPE_F16:
ggml_backend_tensor_write_scalar(tensor, i, 0, 0, 0, ggml_fp32_to_fp16(value));
break;
case GGML_TYPE_BF16:
ggml_backend_tensor_write_scalar(tensor, i, 0, 0, 0, ggml_fp32_to_bf16(value));
break;
case GGML_TYPE_F32:
ggml_backend_tensor_write_scalar(tensor, i, 0, 0, 0, value);
break;
default:
GGML_ABORT("fatal error");
}
}
__STATIC_INLINE__ enum ggml_status ggml_graph_compute_with_ctx(struct ggml_context* ctx, struct ggml_cgraph* cgraph, int n_threads) {
(void)ctx;
// The legacy ggml_graph_compute_with_ctx() symbol lives in ggml-cpu, but
// the backend proc table does not expose it in GGML_BACKEND_DL mode.
// Recreate the old behavior by initializing the CPU backend explicitly and
// executing the graph through the generic backend API.
ggml_backend_t backend = ggml_backend_cpu_init();
if (backend == nullptr) {
return GGML_STATUS_ALLOC_FAILED;
}
ggml_backend_cpu_set_n_threads(backend, n_threads);
const enum ggml_status status = ggml_backend_graph_compute(backend, cgraph);
ggml_backend_free(backend);
return status;
}
__STATIC_INLINE__ ggml_tensor* ggml_set_f32(struct ggml_tensor* tensor, float value) {
GGML_ASSERT(tensor != nullptr);
if (ggml_backend_tensor_is_host_accessible(tensor) && ggml_is_contiguous(tensor)) {
const int64_t nelements = ggml_nelements(tensor);
switch (tensor->type) {
case GGML_TYPE_I8: {
auto* data = reinterpret_cast<int8_t*>(tensor->data);
const int8_t v = static_cast<int8_t>(value);
for (int64_t i = 0; i < nelements; ++i) {
data[i] = v;
}
} break;
case GGML_TYPE_I16: {
auto* data = reinterpret_cast<int16_t*>(tensor->data);
const int16_t v = static_cast<int16_t>(value);
for (int64_t i = 0; i < nelements; ++i) {
data[i] = v;
}
} break;
case GGML_TYPE_I32: {
auto* data = reinterpret_cast<int32_t*>(tensor->data);
const int32_t v = static_cast<int32_t>(value);
for (int64_t i = 0; i < nelements; ++i) {
data[i] = v;
}
} break;
case GGML_TYPE_F16: {
auto* data = reinterpret_cast<ggml_fp16_t*>(tensor->data);
const ggml_fp16_t v = ggml_fp32_to_fp16(value);
for (int64_t i = 0; i < nelements; ++i) {
data[i] = v;
}
} break;
case GGML_TYPE_BF16: {
auto* data = reinterpret_cast<ggml_bf16_t*>(tensor->data);
const ggml_bf16_t v = ggml_fp32_to_bf16(value);
for (int64_t i = 0; i < nelements; ++i) {
data[i] = v;
}
} break;
case GGML_TYPE_F32: {
auto* data = reinterpret_cast<float*>(tensor->data);
for (int64_t i = 0; i < nelements; ++i) {
data[i] = value;
}
} break;
default:
GGML_ABORT("fatal error");
}
return tensor;
}
const int64_t nelements = ggml_nelements(tensor);
for (int64_t i = 0; i < nelements; ++i) {
ggml_set_f32_1d(tensor, static_cast<int>(i), value);
}
return tensor;
}
#endif

676
src/ggml_graph_cut.cpp Normal file
View File

@ -0,0 +1,676 @@
#include "ggml_graph_cut.h"
#include <algorithm>
#include <cstring>
#include <map>
#include <set>
#include <sstream>
#include <stack>
#include <unordered_map>
#include "ggml-alloc.h"
#include "ggml-backend.h"
#include "util.h"
#include "../ggml/src/ggml-impl.h"
namespace sd::ggml_graph_cut {
static std::string graph_cut_tensor_display_name(const ggml_tensor* tensor) {
if (tensor == nullptr) {
return "<null>";
}
if (tensor->name[0] != '\0') {
return tensor->name;
}
return sd_format("<tensor@%p>", (const void*)tensor);
}
static int graph_leaf_index(ggml_cgraph* gf, const ggml_tensor* tensor) {
GGML_ASSERT(gf != nullptr);
GGML_ASSERT(tensor != nullptr);
for (int i = 0; i < gf->n_leafs; ++i) {
if (gf->leafs[i] == tensor) {
return i;
}
}
return -1;
}
static bool is_params_tensor(const std::unordered_set<const ggml_tensor*>& params_tensor_set,
const ggml_tensor* tensor) {
if (tensor == nullptr) {
return false;
}
return params_tensor_set.find(tensor) != params_tensor_set.end();
}
static Plan::InputShape input_shape(const ggml_tensor* tensor) {
Plan::InputShape shape;
if (tensor == nullptr) {
return shape;
}
shape.type = tensor->type;
for (int i = 0; i < GGML_MAX_DIMS; ++i) {
shape.ne[static_cast<size_t>(i)] = tensor->ne[i];
}
return shape;
}
static size_t graph_cut_segment_vram_bytes(const Segment& segment) {
return segment.compute_buffer_size +
segment.input_param_bytes +
segment.input_previous_cut_bytes +
segment.output_bytes;
}
static Segment make_segment_seed(const Plan& plan,
size_t start_segment_index,
size_t end_segment_index) {
GGML_ASSERT(start_segment_index < plan.segments.size());
GGML_ASSERT(end_segment_index < plan.segments.size());
GGML_ASSERT(start_segment_index <= end_segment_index);
Segment seed;
const auto& start_segment = plan.segments[start_segment_index];
const auto& target_segment = plan.segments[end_segment_index];
std::unordered_set<int> seen_output_node_indices;
for (size_t seg_idx = start_segment_index; seg_idx <= end_segment_index; ++seg_idx) {
for (int output_node_index : plan.segments[seg_idx].output_node_indices) {
if (seen_output_node_indices.insert(output_node_index).second) {
seed.output_node_indices.push_back(output_node_index);
}
}
}
if (start_segment_index == end_segment_index) {
seed.group_name = target_segment.group_name;
} else {
seed.group_name = sd_format("%s..%s",
start_segment.group_name.c_str(),
target_segment.group_name.c_str());
}
return seed;
}
static void build_segment(ggml_cgraph* gf,
Plan& plan,
Segment& segment,
const std::unordered_map<const ggml_tensor*, int>& producer_index,
std::unordered_set<int>& available_cut_output_node_indices,
ggml_backend_t backend,
const std::unordered_set<const ggml_tensor*>& params_tensor_set,
const char* log_desc) {
std::set<int> internal_nodes;
std::unordered_set<const ggml_tensor*> input_seen;
std::vector<Segment::InputRef> input_refs;
std::stack<ggml_tensor*> work_stack;
for (int output_node_index : segment.output_node_indices) {
ggml_tensor* output = ggml_graph_node(gf, output_node_index);
if (output != nullptr) {
work_stack.push(output);
}
}
while (!work_stack.empty()) {
ggml_tensor* tensor = work_stack.top();
work_stack.pop();
if (tensor == nullptr) {
continue;
}
auto producer_it = producer_index.find(tensor);
if (producer_it == producer_index.end()) {
if (input_seen.insert(tensor).second) {
Segment::InputRef input_ref;
input_ref.type = is_params_tensor(params_tensor_set, tensor) ? Segment::INPUT_PARAM : Segment::INPUT_EXTERNAL;
input_ref.display_name = graph_cut_tensor_display_name(tensor);
input_ref.leaf_index = graph_leaf_index(gf, tensor);
input_refs.push_back(std::move(input_ref));
}
continue;
}
int node_idx = producer_it->second;
if (available_cut_output_node_indices.find(node_idx) != available_cut_output_node_indices.end()) {
if (input_seen.insert(tensor).second) {
Segment::InputRef input_ref;
input_ref.type = Segment::INPUT_PREVIOUS_CUT;
input_ref.display_name = graph_cut_tensor_display_name(tensor);
input_ref.node_index = node_idx;
input_refs.push_back(std::move(input_ref));
}
continue;
}
if (!internal_nodes.insert(node_idx).second) {
continue;
}
ggml_tensor* node = ggml_graph_node(gf, node_idx);
for (int src_idx = 0; src_idx < GGML_MAX_SRC; ++src_idx) {
if (node->src[src_idx] != nullptr) {
work_stack.push(node->src[src_idx]);
}
}
}
if (!internal_nodes.empty()) {
segment.internal_node_indices.assign(internal_nodes.begin(), internal_nodes.end());
}
std::sort(input_refs.begin(),
input_refs.end(),
[](const Segment::InputRef& a, const Segment::InputRef& b) {
if (a.type != b.type) {
return a.type < b.type;
}
return a.display_name < b.display_name;
});
segment.input_refs = input_refs;
for (const auto& input : input_refs) {
ggml_tensor* current_input = input_tensor(gf, input);
size_t tensor_bytes = current_input == nullptr
? 0
: (input.type == Segment::INPUT_PREVIOUS_CUT
? cache_tensor_bytes(current_input)
: ggml_nbytes(current_input));
switch (input.type) {
case Segment::INPUT_PREVIOUS_CUT:
segment.input_previous_cut_bytes += tensor_bytes;
break;
case Segment::INPUT_PARAM:
segment.input_param_bytes += tensor_bytes;
break;
case Segment::INPUT_EXTERNAL:
default:
segment.input_external_bytes += tensor_bytes;
break;
}
}
for (int output_node_index : segment.output_node_indices) {
ggml_tensor* output = ggml_graph_node(gf, output_node_index);
segment.output_bytes += cache_tensor_bytes(output);
}
segment.compute_buffer_size = measure_segment_compute_buffer(backend, gf, segment, log_desc);
for (int output_node_index : segment.output_node_indices) {
available_cut_output_node_indices.insert(output_node_index);
}
plan.segments.push_back(std::move(segment));
}
bool is_graph_cut_tensor(const ggml_tensor* tensor) {
if (tensor == nullptr || tensor->name[0] == '\0') {
return false;
}
return std::strncmp(tensor->name, GGML_RUNNER_CUT_PREFIX, std::strlen(GGML_RUNNER_CUT_PREFIX)) == 0;
}
std::string make_graph_cut_name(const std::string& group, const std::string& output) {
return std::string(GGML_RUNNER_CUT_PREFIX) + group + "|" + output;
}
void mark_graph_cut(ggml_tensor* tensor, const std::string& group, const std::string& output) {
if (tensor == nullptr) {
return;
}
auto name = make_graph_cut_name(group, output);
ggml_set_name(tensor, name.c_str());
}
int leaf_count(ggml_cgraph* gf) {
GGML_ASSERT(gf != nullptr);
return gf->n_leafs;
}
ggml_tensor* leaf_tensor(ggml_cgraph* gf, int leaf_index) {
GGML_ASSERT(gf != nullptr);
if (leaf_index < 0 || leaf_index >= gf->n_leafs) {
return nullptr;
}
return gf->leafs[leaf_index];
}
ggml_backend_buffer_t tensor_buffer(const ggml_tensor* tensor) {
if (tensor == nullptr) {
return nullptr;
}
return tensor->view_src ? tensor->view_src->buffer : tensor->buffer;
}
ggml_tensor* cache_source_tensor(ggml_tensor* tensor) {
if (tensor == nullptr) {
return nullptr;
}
return tensor->view_src ? tensor->view_src : tensor;
}
size_t cache_tensor_bytes(const ggml_tensor* tensor) {
if (tensor == nullptr) {
return 0;
}
const ggml_tensor* cache_src = tensor->view_src ? tensor->view_src : tensor;
return ggml_nbytes(cache_src);
}
bool plan_matches_graph(ggml_cgraph* gf, const Plan& plan) {
GGML_ASSERT(gf != nullptr);
if (ggml_graph_n_nodes(gf) != plan.n_nodes || gf->n_leafs != plan.n_leafs) {
return false;
}
for (const auto& input_shape_ref : plan.input_shapes) {
if (input_shape_ref.leaf_index < 0 || input_shape_ref.leaf_index >= gf->n_leafs) {
return false;
}
ggml_tensor* leaf = gf->leafs[input_shape_ref.leaf_index];
if (leaf == nullptr || input_shape_ref.type != leaf->type) {
return false;
}
for (int d = 0; d < GGML_MAX_DIMS; ++d) {
if (input_shape_ref.ne[static_cast<size_t>(d)] != leaf->ne[d]) {
return false;
}
}
}
return true;
}
ggml_tensor* output_tensor(ggml_cgraph* gf, const Segment& segment, size_t output_index) {
GGML_ASSERT(gf != nullptr);
if (output_index >= segment.output_node_indices.size()) {
return nullptr;
}
int node_index = segment.output_node_indices[output_index];
if (node_index < 0 || node_index >= ggml_graph_n_nodes(gf)) {
return nullptr;
}
return ggml_graph_node(gf, node_index);
}
ggml_tensor* input_tensor(ggml_cgraph* gf, const Segment::InputRef& input_ref) {
GGML_ASSERT(gf != nullptr);
if (input_ref.type == Segment::INPUT_PREVIOUS_CUT) {
if (input_ref.node_index < 0 || input_ref.node_index >= ggml_graph_n_nodes(gf)) {
return nullptr;
}
return ggml_graph_node(gf, input_ref.node_index);
}
if (input_ref.leaf_index < 0 || input_ref.leaf_index >= gf->n_leafs) {
return nullptr;
}
return leaf_tensor(gf, input_ref.leaf_index);
}
std::vector<ggml_tensor*> param_tensors(ggml_cgraph* gf, const Segment& segment) {
GGML_ASSERT(gf != nullptr);
std::vector<ggml_tensor*> tensors;
std::unordered_set<ggml_tensor*> seen_tensors;
tensors.reserve(segment.input_refs.size());
seen_tensors.reserve(segment.input_refs.size());
for (const auto& input_ref : segment.input_refs) {
if (input_ref.type != Segment::INPUT_PARAM) {
continue;
}
ggml_tensor* tensor = input_tensor(gf, input_ref);
if (tensor == nullptr) {
continue;
}
if (seen_tensors.insert(tensor).second) {
tensors.push_back(tensor);
}
}
return tensors;
}
std::vector<ggml_tensor*> runtime_param_tensors(ggml_cgraph* gf, const Segment& segment, const char* log_desc) {
std::vector<ggml_tensor*> tensors = param_tensors(gf, segment);
std::vector<ggml_tensor*> filtered_tensors;
filtered_tensors.reserve(tensors.size());
for (ggml_tensor* tensor : tensors) {
if (tensor_buffer(tensor) == nullptr) {
LOG_WARN("%s graph cut skipping param input without buffer: segment=%s tensor=%s",
log_desc == nullptr ? "unknown" : log_desc,
segment.group_name.c_str(),
tensor->name);
continue;
}
filtered_tensors.push_back(tensor);
}
return filtered_tensors;
}
std::unordered_set<std::string> collect_future_input_names(ggml_cgraph* gf,
const Plan& plan,
size_t current_segment_index) {
GGML_ASSERT(gf != nullptr);
std::unordered_set<std::string> future_input_names;
for (size_t seg_idx = current_segment_index + 1; seg_idx < plan.segments.size(); ++seg_idx) {
const auto& segment = plan.segments[seg_idx];
for (const auto& input_ref : segment.input_refs) {
if (input_ref.type != Segment::INPUT_PREVIOUS_CUT) {
continue;
}
ggml_tensor* current_input = input_tensor(gf, input_ref);
if (current_input != nullptr && current_input->name[0] != '\0') {
future_input_names.insert(current_input->name);
}
}
}
return future_input_names;
}
ggml_cgraph* build_segment_graph(ggml_cgraph* gf,
const Segment& segment,
ggml_context** graph_ctx_out) {
GGML_ASSERT(gf != nullptr);
GGML_ASSERT(graph_ctx_out != nullptr);
const size_t graph_size = segment.internal_node_indices.size() + segment.input_refs.size() + 8;
ggml_init_params params = {
/*.mem_size =*/ggml_graph_overhead_custom(graph_size, false) + 1024,
/*.mem_buffer =*/nullptr,
/*.no_alloc =*/true,
};
ggml_context* graph_ctx = ggml_init(params);
GGML_ASSERT(graph_ctx != nullptr);
ggml_cgraph* segment_graph = ggml_new_graph_custom(graph_ctx, graph_size, false);
GGML_ASSERT(segment_graph != nullptr);
for (const auto& input : segment.input_refs) {
ggml_tensor* current_input = input_tensor(gf, input);
if (current_input == nullptr) {
continue;
}
GGML_ASSERT(segment_graph->n_leafs < segment_graph->size);
segment_graph->leafs[segment_graph->n_leafs++] = current_input;
}
for (int output_node_index : segment.output_node_indices) {
ggml_tensor* output = ggml_graph_node(gf, output_node_index);
if (output == nullptr) {
continue;
}
ggml_set_output(output);
}
for (int node_idx : segment.internal_node_indices) {
ggml_graph_add_node(segment_graph, ggml_graph_node(gf, node_idx));
}
*graph_ctx_out = graph_ctx;
return segment_graph;
}
size_t measure_segment_compute_buffer(ggml_backend_t backend,
ggml_cgraph* gf,
const Segment& segment,
const char* log_desc) {
GGML_ASSERT(backend != nullptr);
GGML_ASSERT(gf != nullptr);
if (segment.internal_node_indices.empty()) {
return 0;
}
ggml_context* graph_ctx = nullptr;
ggml_cgraph* segment_graph = build_segment_graph(gf, segment, &graph_ctx);
ggml_gallocr_t allocr = ggml_gallocr_new(ggml_backend_get_default_buffer_type(backend));
size_t sizes[1] = {0};
ggml_gallocr_reserve_n_size(
allocr,
segment_graph,
nullptr,
nullptr,
sizes);
size_t buffer_size = sizes[0];
ggml_gallocr_free(allocr);
ggml_free(graph_ctx);
return buffer_size;
}
Plan build_plan(ggml_backend_t backend,
ggml_cgraph* gf,
const std::unordered_set<const ggml_tensor*>& params_tensor_set,
const char* log_desc) {
GGML_ASSERT(backend != nullptr);
GGML_ASSERT(gf != nullptr);
Plan plan;
plan.available = true;
const int n_nodes = ggml_graph_n_nodes(gf);
if (n_nodes <= 0) {
return plan;
}
plan.n_nodes = n_nodes;
plan.n_leafs = gf->n_leafs;
for (int i = 0; i < gf->n_leafs; ++i) {
ggml_tensor* leaf = gf->leafs[i];
if (is_params_tensor(params_tensor_set, leaf)) {
continue;
}
auto shape = input_shape(leaf);
shape.leaf_index = i;
plan.input_shapes.push_back(shape);
}
std::unordered_map<const ggml_tensor*, int> producer_index;
producer_index.reserve(static_cast<size_t>(n_nodes));
for (int i = 0; i < n_nodes; ++i) {
producer_index[ggml_graph_node(gf, i)] = i;
}
std::vector<Segment> grouped_segments;
std::unordered_map<std::string, size_t> group_to_segment;
for (int i = 0; i < n_nodes; ++i) {
ggml_tensor* node = ggml_graph_node(gf, i);
if (!is_graph_cut_tensor(node)) {
continue;
}
plan.has_cuts = true;
std::string full_name(node->name);
std::string payload = full_name.substr(std::strlen(GGML_RUNNER_CUT_PREFIX));
size_t sep = payload.find('|');
std::string group = sep == std::string::npos ? payload : payload.substr(0, sep);
auto it = group_to_segment.find(group);
if (it == group_to_segment.end()) {
Segment segment;
segment.group_name = group;
segment.output_node_indices.push_back(i);
group_to_segment[group] = grouped_segments.size();
grouped_segments.push_back(std::move(segment));
} else {
auto& segment = grouped_segments[it->second];
segment.output_node_indices.push_back(i);
}
}
if (!plan.has_cuts) {
return plan;
}
std::unordered_set<int> available_cut_output_node_indices;
available_cut_output_node_indices.reserve(static_cast<size_t>(n_nodes));
for (auto& segment : grouped_segments) {
build_segment(gf,
plan,
segment,
producer_index,
available_cut_output_node_indices,
backend,
params_tensor_set,
log_desc);
}
ggml_tensor* final_output = ggml_graph_node(gf, -1);
if (final_output != nullptr && available_cut_output_node_indices.find(n_nodes - 1) == available_cut_output_node_indices.end()) {
Segment final_segment;
final_segment.group_name = "ggml_runner.final";
final_segment.output_node_indices.push_back(n_nodes - 1);
build_segment(gf,
plan,
final_segment,
producer_index,
available_cut_output_node_indices,
backend,
params_tensor_set,
log_desc);
}
return plan;
}
Plan apply_max_vram_budget(ggml_cgraph* gf,
const Plan& base_plan,
size_t max_graph_vram_bytes,
ggml_backend_t backend,
const std::unordered_set<const ggml_tensor*>& params_tensor_set,
const char* log_desc) {
GGML_ASSERT(backend != nullptr);
GGML_ASSERT(gf != nullptr);
int64_t t_budget_begin = ggml_time_ms();
if (max_graph_vram_bytes == 0 || !base_plan.has_cuts || base_plan.segments.size() <= 1) {
return base_plan;
}
const int n_nodes = ggml_graph_n_nodes(gf);
std::unordered_map<const ggml_tensor*, int> producer_index;
producer_index.reserve(static_cast<size_t>(n_nodes));
for (int i = 0; i < n_nodes; ++i) {
producer_index[ggml_graph_node(gf, i)] = i;
}
Plan merged_plan;
merged_plan.available = true;
merged_plan.has_cuts = base_plan.has_cuts;
merged_plan.valid = base_plan.valid;
merged_plan.n_nodes = base_plan.n_nodes;
merged_plan.n_leafs = base_plan.n_leafs;
std::unordered_set<int> available_cut_output_node_indices;
available_cut_output_node_indices.reserve(static_cast<size_t>(n_nodes));
size_t start_segment_index = 0;
while (start_segment_index < base_plan.segments.size()) {
Plan single_plan;
auto single_available_cut_output_node_indices = available_cut_output_node_indices;
auto single_seed = make_segment_seed(base_plan,
start_segment_index,
start_segment_index);
build_segment(gf,
single_plan,
single_seed,
producer_index,
single_available_cut_output_node_indices,
backend,
params_tensor_set,
log_desc);
GGML_ASSERT(!single_plan.segments.empty());
size_t best_end_segment_index = start_segment_index;
bool can_merge_next_segment = graph_cut_segment_vram_bytes(single_plan.segments.back()) <= max_graph_vram_bytes;
while (can_merge_next_segment && best_end_segment_index + 1 < base_plan.segments.size()) {
const size_t next_end_segment_index = best_end_segment_index + 1;
Plan candidate_plan;
auto candidate_available_cut_output_node_indices = available_cut_output_node_indices;
auto candidate_seed = make_segment_seed(base_plan,
start_segment_index,
next_end_segment_index);
build_segment(gf,
candidate_plan,
candidate_seed,
producer_index,
candidate_available_cut_output_node_indices,
backend,
params_tensor_set,
log_desc);
GGML_ASSERT(!candidate_plan.segments.empty());
const auto& candidate_segment = candidate_plan.segments.back();
if (graph_cut_segment_vram_bytes(candidate_segment) > max_graph_vram_bytes) {
break;
}
best_end_segment_index = next_end_segment_index;
}
auto best_seed = make_segment_seed(base_plan,
start_segment_index,
best_end_segment_index);
build_segment(gf,
merged_plan,
best_seed,
producer_index,
available_cut_output_node_indices,
backend,
params_tensor_set,
log_desc);
start_segment_index = best_end_segment_index + 1;
}
if (log_desc != nullptr && merged_plan.segments.size() != base_plan.segments.size()) {
LOG_INFO("%s graph cut max_vram=%.2f MB merged %zu segments -> %zu segments",
log_desc,
max_graph_vram_bytes / 1024.0 / 1024.0,
base_plan.segments.size(),
merged_plan.segments.size());
}
if (log_desc != nullptr) {
LOG_INFO("%s graph cut max_vram budget merge took %lld ms",
log_desc,
ggml_time_ms() - t_budget_begin);
}
return merged_plan;
}
Plan resolve_plan(ggml_backend_t backend,
ggml_cgraph* gf,
PlanCache* cache,
size_t max_graph_vram_bytes,
const std::unordered_set<const ggml_tensor*>& params_tensor_set,
const char* log_desc) {
GGML_ASSERT(backend != nullptr);
GGML_ASSERT(gf != nullptr);
GGML_ASSERT(cache != nullptr);
int64_t t_prepare_begin = ggml_time_ms();
Plan base_plan;
int64_t t_plan_begin = ggml_time_ms();
if (cache->graph_cut_plan.available && plan_matches_graph(gf, cache->graph_cut_plan)) {
base_plan = cache->graph_cut_plan;
} else {
base_plan = build_plan(backend, gf, params_tensor_set, log_desc);
cache->graph_cut_plan = base_plan;
cache->graph_cut_plan.available = true;
cache->budgeted_graph_cut_plan.available = false;
if (log_desc != nullptr) {
LOG_INFO("%s build cached graph cut plan done (taking %lld ms)", log_desc, ggml_time_ms() - t_plan_begin);
}
}
Plan resolved_plan = base_plan;
if (max_graph_vram_bytes > 0 && base_plan.has_cuts) {
if (cache->budgeted_graph_cut_plan.available &&
cache->budgeted_graph_cut_plan_max_vram_bytes == max_graph_vram_bytes &&
plan_matches_graph(gf, cache->budgeted_graph_cut_plan)) {
resolved_plan = cache->budgeted_graph_cut_plan;
} else {
resolved_plan = apply_max_vram_budget(gf,
base_plan,
max_graph_vram_bytes,
backend,
params_tensor_set,
log_desc);
cache->budgeted_graph_cut_plan = resolved_plan;
cache->budgeted_graph_cut_plan.available = true;
cache->budgeted_graph_cut_plan_max_vram_bytes = max_graph_vram_bytes;
}
}
return resolved_plan;
}
} // namespace sd::ggml_graph_cut

104
src/ggml_graph_cut.h Normal file
View File

@ -0,0 +1,104 @@
#ifndef __SD_GGML_GRAPH_CUT_H__
#define __SD_GGML_GRAPH_CUT_H__
#include <array>
#include <string>
#include <unordered_set>
#include <vector>
#include "ggml-backend.h"
#include "ggml.h"
namespace sd::ggml_graph_cut {
struct Segment {
enum InputType {
INPUT_EXTERNAL = 0,
INPUT_PREVIOUS_CUT,
INPUT_PARAM,
};
struct InputRef {
InputType type = INPUT_EXTERNAL;
std::string display_name;
int leaf_index = -1;
int node_index = -1;
};
size_t compute_buffer_size = 0;
size_t output_bytes = 0;
size_t input_external_bytes = 0;
size_t input_previous_cut_bytes = 0;
size_t input_param_bytes = 0;
std::string group_name;
std::vector<int> internal_node_indices;
std::vector<int> output_node_indices;
std::vector<InputRef> input_refs;
};
struct Plan {
struct InputShape {
int leaf_index = -1;
ggml_type type = GGML_TYPE_COUNT;
std::array<int64_t, GGML_MAX_DIMS> ne = {0, 0, 0, 0};
};
bool available = false;
bool has_cuts = false;
bool valid = true;
int n_nodes = 0;
int n_leafs = 0;
std::vector<InputShape> input_shapes;
std::vector<Segment> segments;
};
struct PlanCache {
Plan graph_cut_plan;
Plan budgeted_graph_cut_plan;
size_t budgeted_graph_cut_plan_max_vram_bytes = 0;
};
static constexpr const char* GGML_RUNNER_CUT_PREFIX = "ggml_runner_cut:";
bool is_graph_cut_tensor(const ggml_tensor* tensor);
std::string make_graph_cut_name(const std::string& group, const std::string& output);
void mark_graph_cut(ggml_tensor* tensor, const std::string& group, const std::string& output);
int leaf_count(ggml_cgraph* gf);
ggml_tensor* leaf_tensor(ggml_cgraph* gf, int leaf_index);
ggml_backend_buffer_t tensor_buffer(const ggml_tensor* tensor);
ggml_tensor* cache_source_tensor(ggml_tensor* tensor);
size_t cache_tensor_bytes(const ggml_tensor* tensor);
bool plan_matches_graph(ggml_cgraph* gf, const Plan& plan);
ggml_tensor* output_tensor(ggml_cgraph* gf, const Segment& segment, size_t output_index);
ggml_tensor* input_tensor(ggml_cgraph* gf, const Segment::InputRef& input_ref);
std::vector<ggml_tensor*> param_tensors(ggml_cgraph* gf, const Segment& segment);
std::vector<ggml_tensor*> runtime_param_tensors(ggml_cgraph* gf, const Segment& segment, const char* log_desc);
std::unordered_set<std::string> collect_future_input_names(ggml_cgraph* gf,
const Plan& plan,
size_t current_segment_index);
ggml_cgraph* build_segment_graph(ggml_cgraph* gf,
const Segment& segment,
ggml_context** graph_ctx_out);
size_t measure_segment_compute_buffer(ggml_backend_t backend,
ggml_cgraph* gf,
const Segment& segment,
const char* log_desc);
Plan build_plan(ggml_backend_t backend,
ggml_cgraph* gf,
const std::unordered_set<const ggml_tensor*>& params_tensor_set,
const char* log_desc);
Plan apply_max_vram_budget(ggml_cgraph* gf,
const Plan& base_plan,
size_t max_graph_vram_bytes,
ggml_backend_t backend,
const std::unordered_set<const ggml_tensor*>& params_tensor_set,
const char* log_desc);
Plan resolve_plan(ggml_backend_t backend,
ggml_cgraph* gf,
PlanCache* cache,
size_t max_graph_vram_bytes,
const std::unordered_set<const ggml_tensor*>& params_tensor_set,
const char* log_desc);
} // namespace sd::ggml_graph_cut
#endif

View File

@ -346,6 +346,7 @@ namespace LLM {
auto merger = std::dynamic_pointer_cast<PatchMerger>(blocks["merger"]);
auto x = patch_embed->forward(ctx, pixel_values);
sd::ggml_graph_cut::mark_graph_cut(x, "llm.vision.prelude", "x");
x = ggml_reshape_4d(ctx->ggml_ctx, x, x->ne[0] * spatial_merge_size * spatial_merge_size, x->ne[1] / spatial_merge_size / spatial_merge_size, x->ne[2], x->ne[3]);
x = ggml_get_rows(ctx->ggml_ctx, x, window_index);
@ -359,9 +360,11 @@ namespace LLM {
mask = nullptr;
}
x = block->forward(ctx, x, pe, mask);
sd::ggml_graph_cut::mark_graph_cut(x, "llm.vision.blocks." + std::to_string(i), "x");
}
x = merger->forward(ctx, x);
sd::ggml_graph_cut::mark_graph_cut(x, "llm.vision.final", "x");
x = ggml_get_rows(ctx->ggml_ctx, x, window_inverse_index);
@ -506,6 +509,7 @@ namespace LLM {
auto norm = std::dynamic_pointer_cast<RMSNorm>(blocks["norm"]);
auto x = embed_tokens->forward(ctx, input_ids);
sd::ggml_graph_cut::mark_graph_cut(x, "llm.text.prelude", "x");
std::vector<ggml_tensor*> intermediate_outputs;
@ -552,6 +556,10 @@ namespace LLM {
auto block = std::dynamic_pointer_cast<TransformerBlock>(blocks["layers." + std::to_string(i)]);
x = block->forward(ctx, x, input_pos, attention_mask);
if (out_layers.size() > 1) {
x = ggml_cont(ctx->ggml_ctx, x);
}
sd::ggml_graph_cut::mark_graph_cut(x, "llm.text.layers." + std::to_string(i), "x");
if (out_layers.find(i + 1) != out_layers.end()) {
intermediate_outputs.push_back(x);
}

View File

@ -129,7 +129,7 @@ struct LoraModel : public GGMLRunner {
}
}
ggml_tensor* get_lora_weight_diff(const std::string& model_tensor_name, ggml_context* ctx) {
ggml_tensor* get_lora_weight_diff(const std::string& model_tensor_name, ggml_context* ctx, ggml_backend_t backend) {
ggml_tensor* updown = nullptr;
int index = 0;
while (true) {
@ -152,17 +152,17 @@ struct LoraModel : public GGMLRunner {
auto iter = lora_tensors.find(lora_up_name);
if (iter != lora_tensors.end()) {
lora_up = ggml_ext_cast_f32(ctx, iter->second);
lora_up = ggml_ext_cast_f32(ctx, backend, iter->second);
}
iter = lora_tensors.find(lora_mid_name);
if (iter != lora_tensors.end()) {
lora_mid = ggml_ext_cast_f32(ctx, iter->second);
lora_mid = ggml_ext_cast_f32(ctx, backend, iter->second);
}
iter = lora_tensors.find(lora_down_name);
if (iter != lora_tensors.end()) {
lora_down = ggml_ext_cast_f32(ctx, iter->second);
lora_down = ggml_ext_cast_f32(ctx, backend, iter->second);
}
if (lora_up == nullptr || lora_down == nullptr) {
@ -208,7 +208,7 @@ struct LoraModel : public GGMLRunner {
return updown;
}
ggml_tensor* get_raw_weight_diff(const std::string& model_tensor_name, ggml_context* ctx) {
ggml_tensor* get_raw_weight_diff(const std::string& model_tensor_name, ggml_context* ctx, ggml_backend_t backend) {
ggml_tensor* updown = nullptr;
int index = 0;
while (true) {
@ -225,7 +225,7 @@ struct LoraModel : public GGMLRunner {
auto iter = lora_tensors.find(diff_name);
if (iter != lora_tensors.end()) {
curr_updown = ggml_ext_cast_f32(ctx, iter->second);
curr_updown = ggml_ext_cast_f32(ctx, backend, iter->second);
} else {
break;
}
@ -248,7 +248,7 @@ struct LoraModel : public GGMLRunner {
return updown;
}
ggml_tensor* get_loha_weight_diff(const std::string& model_tensor_name, ggml_context* ctx) {
ggml_tensor* get_loha_weight_diff(const std::string& model_tensor_name, ggml_context* ctx, ggml_backend_t backend) {
ggml_tensor* updown = nullptr;
int index = 0;
while (true) {
@ -276,33 +276,33 @@ struct LoraModel : public GGMLRunner {
auto iter = lora_tensors.find(hada_1_down_name);
if (iter != lora_tensors.end()) {
hada_1_down = ggml_ext_cast_f32(ctx, iter->second);
hada_1_down = ggml_ext_cast_f32(ctx, backend, iter->second);
}
iter = lora_tensors.find(hada_1_up_name);
if (iter != lora_tensors.end()) {
hada_1_up = ggml_ext_cast_f32(ctx, iter->second);
hada_1_up = ggml_ext_cast_f32(ctx, backend, iter->second);
}
iter = lora_tensors.find(hada_1_mid_name);
if (iter != lora_tensors.end()) {
hada_1_mid = ggml_ext_cast_f32(ctx, iter->second);
hada_1_mid = ggml_ext_cast_f32(ctx, backend, iter->second);
hada_1_up = ggml_cont(ctx, ggml_transpose(ctx, hada_1_up));
}
iter = lora_tensors.find(hada_2_down_name);
if (iter != lora_tensors.end()) {
hada_2_down = ggml_ext_cast_f32(ctx, iter->second);
hada_2_down = ggml_ext_cast_f32(ctx, backend, iter->second);
}
iter = lora_tensors.find(hada_2_up_name);
if (iter != lora_tensors.end()) {
hada_2_up = ggml_ext_cast_f32(ctx, iter->second);
hada_2_up = ggml_ext_cast_f32(ctx, backend, iter->second);
}
iter = lora_tensors.find(hada_2_mid_name);
if (iter != lora_tensors.end()) {
hada_2_mid = ggml_ext_cast_f32(ctx, iter->second);
hada_2_mid = ggml_ext_cast_f32(ctx, backend, iter->second);
hada_2_up = ggml_cont(ctx, ggml_transpose(ctx, hada_2_up));
}
@ -351,7 +351,7 @@ struct LoraModel : public GGMLRunner {
return updown;
}
ggml_tensor* get_lokr_weight_diff(const std::string& model_tensor_name, ggml_context* ctx) {
ggml_tensor* get_lokr_weight_diff(const std::string& model_tensor_name, ggml_context* ctx, ggml_backend_t backend) {
ggml_tensor* updown = nullptr;
int index = 0;
while (true) {
@ -378,24 +378,24 @@ struct LoraModel : public GGMLRunner {
auto iter = lora_tensors.find(lokr_w1_name);
if (iter != lora_tensors.end()) {
lokr_w1 = ggml_ext_cast_f32(ctx, iter->second);
lokr_w1 = ggml_ext_cast_f32(ctx, backend, iter->second);
}
iter = lora_tensors.find(lokr_w2_name);
if (iter != lora_tensors.end()) {
lokr_w2 = ggml_ext_cast_f32(ctx, iter->second);
lokr_w2 = ggml_ext_cast_f32(ctx, backend, iter->second);
}
int64_t rank = 1;
if (lokr_w1 == nullptr) {
iter = lora_tensors.find(lokr_w1_a_name);
if (iter != lora_tensors.end()) {
lokr_w1_a = ggml_ext_cast_f32(ctx, iter->second);
lokr_w1_a = ggml_ext_cast_f32(ctx, backend, iter->second);
}
iter = lora_tensors.find(lokr_w1_b_name);
if (iter != lora_tensors.end()) {
lokr_w1_b = ggml_ext_cast_f32(ctx, iter->second);
lokr_w1_b = ggml_ext_cast_f32(ctx, backend, iter->second);
}
if (lokr_w1_a == nullptr || lokr_w1_b == nullptr) {
@ -410,12 +410,12 @@ struct LoraModel : public GGMLRunner {
if (lokr_w2 == nullptr) {
iter = lora_tensors.find(lokr_w2_a_name);
if (iter != lora_tensors.end()) {
lokr_w2_a = ggml_ext_cast_f32(ctx, iter->second);
lokr_w2_a = ggml_ext_cast_f32(ctx, backend, iter->second);
}
iter = lora_tensors.find(lokr_w2_b_name);
if (iter != lora_tensors.end()) {
lokr_w2_b = ggml_ext_cast_f32(ctx, iter->second);
lokr_w2_b = ggml_ext_cast_f32(ctx, backend, iter->second);
}
if (lokr_w2_a == nullptr || lokr_w2_b == nullptr) {
@ -468,23 +468,23 @@ struct LoraModel : public GGMLRunner {
return updown;
}
ggml_tensor* get_weight_diff(const std::string& model_tensor_name, ggml_context* ctx, ggml_tensor* model_tensor, bool with_lora_and_lokr = true) {
ggml_tensor* get_weight_diff(const std::string& model_tensor_name, ggml_backend_t backend, ggml_context* ctx, ggml_tensor* model_tensor, bool with_lora_and_lokr = true) {
// lora
ggml_tensor* diff = nullptr;
if (with_lora_and_lokr) {
diff = get_lora_weight_diff(model_tensor_name, ctx);
diff = get_lora_weight_diff(model_tensor_name, ctx, backend);
}
// diff
if (diff == nullptr) {
diff = get_raw_weight_diff(model_tensor_name, ctx);
diff = get_raw_weight_diff(model_tensor_name, ctx, backend);
}
// loha
if (diff == nullptr) {
diff = get_loha_weight_diff(model_tensor_name, ctx);
diff = get_loha_weight_diff(model_tensor_name, ctx, backend);
}
// lokr
if (diff == nullptr && with_lora_and_lokr) {
diff = get_lokr_weight_diff(model_tensor_name, ctx);
diff = get_lokr_weight_diff(model_tensor_name, ctx, backend);
}
if (diff != nullptr) {
if (ggml_nelements(diff) < ggml_nelements(model_tensor)) {
@ -502,6 +502,7 @@ struct LoraModel : public GGMLRunner {
}
ggml_tensor* get_out_diff(ggml_context* ctx,
ggml_backend_t backend,
ggml_tensor* x,
WeightAdapter::ForwardParams forward_params,
const std::string& model_tensor_name) {
@ -590,7 +591,7 @@ struct LoraModel : public GGMLRunner {
}
scale_value *= multiplier;
auto curr_out_diff = ggml_ext_lokr_forward(ctx, x, lokr_w1, lokr_w1_a, lokr_w1_b, lokr_w2, lokr_w2_a, lokr_w2_b, is_conv2d, forward_params.conv2d, scale_value);
auto curr_out_diff = ggml_ext_lokr_forward(ctx, backend, x, lokr_w1, lokr_w1_a, lokr_w1_b, lokr_w2, lokr_w2_a, lokr_w2_b, is_conv2d, forward_params.conv2d, scale_value);
if (out_diff == nullptr) {
out_diff = curr_out_diff;
} else {
@ -761,7 +762,7 @@ struct LoraModel : public GGMLRunner {
ggml_tensor* model_tensor = it.second;
// lora
ggml_tensor* diff = get_weight_diff(model_tensor_name, compute_ctx, model_tensor);
ggml_tensor* diff = get_weight_diff(model_tensor_name, runtime_backend, compute_ctx, model_tensor);
if (diff == nullptr) {
continue;
}
@ -774,7 +775,7 @@ struct LoraModel : public GGMLRunner {
ggml_tensor* final_tensor;
if (model_tensor->type != GGML_TYPE_F32 && model_tensor->type != GGML_TYPE_F16) {
final_tensor = ggml_ext_cast_f32(compute_ctx, model_tensor);
final_tensor = ggml_ext_cast_f32(compute_ctx, runtime_backend, model_tensor);
final_tensor = ggml_add_inplace(compute_ctx, final_tensor, diff);
final_tensor = ggml_cpy(compute_ctx, final_tensor, model_tensor);
} else {
@ -841,34 +842,35 @@ public:
: lora_models(lora_models) {
}
ggml_tensor* patch_weight(ggml_context* ctx, ggml_tensor* weight, const std::string& weight_name, bool with_lora_and_lokr) {
ggml_tensor* patch_weight(ggml_context* ctx, ggml_backend_t backend, ggml_tensor* weight, const std::string& weight_name, bool with_lora_and_lokr) {
for (auto& lora_model : lora_models) {
ggml_tensor* diff = lora_model->get_weight_diff(weight_name, ctx, weight, with_lora_and_lokr);
ggml_tensor* diff = lora_model->get_weight_diff(weight_name, backend, ctx, weight, with_lora_and_lokr);
if (diff == nullptr) {
continue;
}
if (weight->type != GGML_TYPE_F32 && weight->type != GGML_TYPE_F16) {
weight = ggml_ext_cast_f32(ctx, weight);
weight = ggml_ext_cast_f32(ctx, backend, weight);
}
weight = ggml_add(ctx, weight, diff);
}
return weight;
}
ggml_tensor* patch_weight(ggml_context* ctx, ggml_tensor* weight, const std::string& weight_name) override {
return patch_weight(ctx, weight, weight_name, true);
ggml_tensor* patch_weight(ggml_context* ctx, ggml_backend_t backend, ggml_tensor* weight, const std::string& weight_name) override {
return patch_weight(ctx, backend, weight, weight_name, true);
}
ggml_tensor* forward_with_lora(ggml_context* ctx,
ggml_backend_t backend,
ggml_tensor* x,
ggml_tensor* w,
ggml_tensor* b,
const std::string& prefix,
WeightAdapter::ForwardParams forward_params) override {
w = patch_weight(ctx, w, prefix + "weight", false);
w = patch_weight(ctx, backend, w, prefix + "weight", false);
if (b) {
b = patch_weight(ctx, b, prefix + "bias", false);
b = patch_weight(ctx, backend, b, prefix + "bias", false);
}
ggml_tensor* out;
if (forward_params.op_type == ForwardParams::op_type_t::OP_LINEAR) {
@ -890,7 +892,7 @@ public:
forward_params.conv2d.scale);
}
for (auto& lora_model : lora_models) {
ggml_tensor* out_diff = lora_model->get_out_diff(ctx, x, forward_params, prefix + "weight");
ggml_tensor* out_diff = lora_model->get_out_diff(ctx, backend, x, forward_params, prefix + "weight");
if (out_diff == nullptr) {
continue;
}

View File

@ -767,6 +767,8 @@ public:
auto context_x = block->forward(ctx, context, x, c_mod);
context = context_x.first;
x = context_x.second;
sd::ggml_graph_cut::mark_graph_cut(context, "mmdit.joint_blocks." + std::to_string(i), "context");
sd::ggml_graph_cut::mark_graph_cut(x, "mmdit.joint_blocks." + std::to_string(i), "x");
}
x = final_layer->forward(ctx, x, c_mod); // (N, T, patch_size ** 2 * out_channels)
@ -809,6 +811,11 @@ public:
context = context_embedder->forward(ctx, context); // [N, L, D] aka [N, L, 1536]
}
sd::ggml_graph_cut::mark_graph_cut(x, "mmdit.prelude", "x");
sd::ggml_graph_cut::mark_graph_cut(c, "mmdit.prelude", "c");
if (context != nullptr) {
sd::ggml_graph_cut::mark_graph_cut(context, "mmdit.prelude", "context");
}
x = forward_core_with_concat(ctx, x, c, context, skip_layers); // (N, H*W, patch_size ** 2 * out_channels)

View File

@ -2,6 +2,7 @@
#include <atomic>
#include <chrono>
#include <cstdarg>
#include <cstdlib>
#include <fstream>
#include <functional>
#include <mutex>
@ -12,64 +13,21 @@
#include <unordered_map>
#include <vector>
#include "gguf_reader.hpp"
#include "model.h"
#include "model_io/gguf_io.h"
#include "model_io/safetensors_io.h"
#include "model_io/torch_legacy_io.h"
#include "model_io/torch_zip_io.h"
#include "stable-diffusion.h"
#include "util.h"
#include "ggml-alloc.h"
#include "ggml-backend.h"
#include "ggml-cpu.h"
#include "ggml.h"
#include "ggml_extend_backend.hpp"
#include "zip.h"
#include "name_conversion.h"
#include "stable-diffusion.h"
#ifdef SD_USE_METAL
#include "ggml-metal.h"
#endif
#ifdef SD_USE_VULKAN
#include "ggml-vulkan.h"
#endif
#ifdef SD_USE_OPENCL
#include "ggml-opencl.h"
#endif
#define ST_HEADER_SIZE_LEN 8
uint64_t read_u64(uint8_t* buffer) {
// little endian
uint64_t value = 0;
value |= static_cast<int64_t>(buffer[7]) << 56;
value |= static_cast<int64_t>(buffer[6]) << 48;
value |= static_cast<int64_t>(buffer[5]) << 40;
value |= static_cast<int64_t>(buffer[4]) << 32;
value |= static_cast<int64_t>(buffer[3]) << 24;
value |= static_cast<int64_t>(buffer[2]) << 16;
value |= static_cast<int64_t>(buffer[1]) << 8;
value |= static_cast<int64_t>(buffer[0]);
return value;
}
int32_t read_int(uint8_t* buffer) {
// little endian
int value = 0;
value |= buffer[3] << 24;
value |= buffer[2] << 16;
value |= buffer[1] << 8;
value |= buffer[0];
return value;
}
uint16_t read_short(uint8_t* buffer) {
// little endian
uint16_t value = 0;
value |= buffer[1] << 8;
value |= buffer[0];
return value;
}
/*================================================= Preprocess ==================================================*/
@ -110,7 +68,7 @@ const char* unused_tensors[] = {
"first_stage_model.bn.",
};
bool is_unused_tensor(std::string name) {
bool is_unused_tensor(const std::string& name) {
for (size_t i = 0; i < sizeof(unused_tensors) / sizeof(const char*); i++) {
if (starts_with(name, unused_tensors[i])) {
return true;
@ -250,79 +208,6 @@ void ModelLoader::add_tensor_storage(const TensorStorage& tensor_storage) {
tensor_storage_map[tensor_storage.name] = tensor_storage;
}
bool is_zip_file(const std::string& file_path) {
zip_t* zip = zip_open(file_path.c_str(), 0, 'r');
if (zip == nullptr) {
return false;
}
zip_close(zip);
return true;
}
bool is_gguf_file(const std::string& file_path) {
std::ifstream file(file_path, std::ios::binary);
if (!file.is_open()) {
return false;
}
char magic[4];
file.read(magic, sizeof(magic));
if (!file) {
return false;
}
for (uint32_t i = 0; i < sizeof(magic); i++) {
if (magic[i] != GGUF_MAGIC[i]) {
return false;
}
}
return true;
}
bool is_safetensors_file(const std::string& file_path) {
std::ifstream file(file_path, std::ios::binary);
if (!file.is_open()) {
return false;
}
// get file size
file.seekg(0, file.end);
size_t file_size_ = file.tellg();
file.seekg(0, file.beg);
// read header size
if (file_size_ <= ST_HEADER_SIZE_LEN) {
return false;
}
uint8_t header_size_buf[ST_HEADER_SIZE_LEN];
file.read((char*)header_size_buf, ST_HEADER_SIZE_LEN);
if (!file) {
return false;
}
size_t header_size_ = read_u64(header_size_buf);
if (header_size_ >= file_size_ || header_size_ <= 2) {
return false;
}
// read header
std::vector<char> header_buf;
header_buf.resize(header_size_ + 1);
header_buf[header_size_] = '\0';
file.read(header_buf.data(), header_size_);
if (!file) {
return false;
}
try {
nlohmann::json header_ = nlohmann::json::parse(header_buf.data());
} catch (const std::exception&) {
return false;
}
return true;
}
bool ModelLoader::init_from_file(const std::string& file_path, const std::string& prefix) {
if (is_directory(file_path)) {
LOG_INFO("load %s using diffusers format", file_path.c_str());
@ -333,9 +218,12 @@ bool ModelLoader::init_from_file(const std::string& file_path, const std::string
} else if (is_safetensors_file(file_path)) {
LOG_INFO("load %s using safetensors format", file_path.c_str());
return init_from_safetensors_file(file_path, prefix);
} else if (is_zip_file(file_path)) {
LOG_INFO("load %s using checkpoint format", file_path.c_str());
return init_from_ckpt_file(file_path, prefix);
} else if (is_torch_zip_file(file_path)) {
LOG_INFO("load %s using torch zip format", file_path.c_str());
return init_from_torch_zip_file(file_path, prefix);
} else if (init_from_torch_legacy_file(file_path, prefix)) {
LOG_INFO("load %s using torch legacy format", file_path.c_str());
return true;
} else {
if (file_exists(file_path)) {
LOG_WARN("unknown format %s", file_path.c_str());
@ -375,242 +263,121 @@ bool ModelLoader::init_from_file_and_convert_name(const std::string& file_path,
bool ModelLoader::init_from_gguf_file(const std::string& file_path, const std::string& prefix) {
LOG_DEBUG("init from '%s'", file_path.c_str());
std::vector<TensorStorage> tensor_storages;
std::string error;
if (!read_gguf_file(file_path, tensor_storages, &error)) {
LOG_ERROR("%s", error.c_str());
return false;
}
file_paths_.push_back(file_path);
size_t file_index = file_paths_.size() - 1;
gguf_context* ctx_gguf_ = nullptr;
ggml_context* ctx_meta_ = nullptr;
for (auto& tensor_storage : tensor_storages) {
// LOG_DEBUG("%s", tensor_storage.name.c_str());
ctx_gguf_ = gguf_init_from_file(file_path.c_str(), {true, &ctx_meta_});
if (!ctx_gguf_) {
LOG_ERROR("failed to open '%s' with gguf_init_from_file. Try to open it with GGUFReader.", file_path.c_str());
GGUFReader gguf_reader;
if (!gguf_reader.load(file_path)) {
LOG_ERROR("failed to open '%s' with GGUFReader.", file_path.c_str());
return false;
if (!starts_with(tensor_storage.name, prefix)) {
tensor_storage.name = prefix + tensor_storage.name;
}
size_t data_offset = gguf_reader.data_offset();
for (const auto& gguf_tensor_info : gguf_reader.tensors()) {
std::string name = gguf_tensor_info.name;
if (!starts_with(name, prefix)) {
name = prefix + name;
}
TensorStorage tensor_storage(
name,
gguf_tensor_info.type,
gguf_tensor_info.shape.data(),
static_cast<int>(gguf_tensor_info.shape.size()),
file_index,
data_offset + gguf_tensor_info.offset);
// LOG_DEBUG("%s %s", name.c_str(), tensor_storage.to_string().c_str());
add_tensor_storage(tensor_storage);
}
return true;
}
int n_tensors = static_cast<int>(gguf_get_n_tensors(ctx_gguf_));
size_t total_size = 0;
size_t data_offset = gguf_get_data_offset(ctx_gguf_);
for (int i = 0; i < n_tensors; i++) {
std::string name = gguf_get_tensor_name(ctx_gguf_, i);
ggml_tensor* dummy = ggml_get_tensor(ctx_meta_, name.c_str());
size_t offset = data_offset + gguf_get_tensor_offset(ctx_gguf_, i);
// LOG_DEBUG("%s", name.c_str());
if (!starts_with(name, prefix)) {
name = prefix + name;
}
TensorStorage tensor_storage(name, dummy->type, dummy->ne, ggml_n_dims(dummy), file_index, offset);
GGML_ASSERT(ggml_nbytes(dummy) == tensor_storage.nbytes());
tensor_storage.file_index = file_index;
add_tensor_storage(tensor_storage);
}
gguf_free(ctx_gguf_);
ggml_free(ctx_meta_);
return true;
}
/*================================================= SafeTensorsModelLoader ==================================================*/
ggml_type str_to_ggml_type(const std::string& dtype) {
ggml_type ttype = GGML_TYPE_COUNT;
if (dtype == "F16") {
ttype = GGML_TYPE_F16;
} else if (dtype == "BF16") {
ttype = GGML_TYPE_BF16;
} else if (dtype == "F32") {
ttype = GGML_TYPE_F32;
} else if (dtype == "F64") {
ttype = GGML_TYPE_F32;
} else if (dtype == "F8_E4M3") {
ttype = GGML_TYPE_F16;
} else if (dtype == "F8_E5M2") {
ttype = GGML_TYPE_F16;
} else if (dtype == "I64") {
ttype = GGML_TYPE_I32;
}
return ttype;
}
// https://huggingface.co/docs/safetensors/index
bool ModelLoader::init_from_safetensors_file(const std::string& file_path, const std::string& prefix) {
LOG_DEBUG("init from '%s', prefix = '%s'", file_path.c_str(), prefix.c_str());
std::vector<TensorStorage> tensor_storages;
std::string error;
if (!read_safetensors_file(file_path, tensor_storages, &error)) {
LOG_ERROR("%s", error.c_str());
return false;
}
file_paths_.push_back(file_path);
size_t file_index = file_paths_.size() - 1;
std::ifstream file(file_path, std::ios::binary);
if (!file.is_open()) {
LOG_ERROR("failed to open '%s'", file_path.c_str());
file_paths_.pop_back();
return false;
}
// get file size
file.seekg(0, file.end);
size_t file_size_ = file.tellg();
file.seekg(0, file.beg);
// read header size
if (file_size_ <= ST_HEADER_SIZE_LEN) {
LOG_ERROR("invalid safetensor file '%s'", file_path.c_str());
file_paths_.pop_back();
return false;
}
uint8_t header_size_buf[ST_HEADER_SIZE_LEN];
file.read((char*)header_size_buf, ST_HEADER_SIZE_LEN);
if (!file) {
LOG_ERROR("read safetensors header size failed: '%s'", file_path.c_str());
return false;
}
size_t header_size_ = read_u64(header_size_buf);
if (header_size_ >= file_size_) {
LOG_ERROR("invalid safetensor file '%s'", file_path.c_str());
file_paths_.pop_back();
return false;
}
// read header
std::vector<char> header_buf;
header_buf.resize(header_size_ + 1);
header_buf[header_size_] = '\0';
file.read(header_buf.data(), header_size_);
if (!file) {
LOG_ERROR("read safetensors header failed: '%s'", file_path.c_str());
file_paths_.pop_back();
return false;
}
nlohmann::json header_;
try {
header_ = nlohmann::json::parse(header_buf.data());
} catch (const std::exception&) {
LOG_ERROR("parsing safetensors header failed", file_path.c_str());
file_paths_.pop_back();
return false;
}
for (auto& item : header_.items()) {
std::string name = item.key();
nlohmann::json tensor_info = item.value();
// LOG_DEBUG("%s %s\n", name.c_str(), tensor_info.dump().c_str());
if (name == "__metadata__") {
for (auto& tensor_storage : tensor_storages) {
if (is_unused_tensor(tensor_storage.name)) {
continue;
}
if (is_unused_tensor(name)) {
continue;
}
std::string dtype = tensor_info["dtype"];
nlohmann::json shape = tensor_info["shape"];
if (dtype == "U8") {
continue;
}
size_t begin = tensor_info["data_offsets"][0].get<size_t>();
size_t end = tensor_info["data_offsets"][1].get<size_t>();
ggml_type type = str_to_ggml_type(dtype);
if (type == GGML_TYPE_COUNT) {
LOG_ERROR("unsupported dtype '%s' (tensor '%s')", dtype.c_str(), name.c_str());
return false;
}
if (shape.size() > SD_MAX_DIMS) {
LOG_ERROR("invalid tensor '%s'", name.c_str());
return false;
}
int n_dims = (int)shape.size();
int64_t ne[SD_MAX_DIMS] = {1, 1, 1, 1, 1};
for (int i = 0; i < n_dims; i++) {
ne[i] = shape[i].get<int64_t>();
}
if (n_dims == 5) {
n_dims = 4;
ne[0] = ne[0] * ne[1];
ne[1] = ne[2];
ne[2] = ne[3];
ne[3] = ne[4];
}
// ggml_n_dims returns 1 for scalars
if (n_dims == 0) {
n_dims = 1;
}
if (!starts_with(name, prefix)) {
name = prefix + name;
}
TensorStorage tensor_storage(name, type, ne, n_dims, file_index, ST_HEADER_SIZE_LEN + header_size_ + begin);
tensor_storage.reverse_ne();
size_t tensor_data_size = end - begin;
bool tensor_size_ok;
if (dtype == "F8_E4M3") {
tensor_storage.is_f8_e4m3 = true;
// f8 -> f16
tensor_size_ok = (tensor_storage.nbytes() == tensor_data_size * 2);
} else if (dtype == "F8_E5M2") {
tensor_storage.is_f8_e5m2 = true;
// f8 -> f16
tensor_size_ok = (tensor_storage.nbytes() == tensor_data_size * 2);
} else if (dtype == "F64") {
tensor_storage.is_f64 = true;
// f64 -> f32
tensor_size_ok = (tensor_storage.nbytes() * 2 == tensor_data_size);
} else if (dtype == "I64") {
tensor_storage.is_i64 = true;
// i64 -> i32
tensor_size_ok = (tensor_storage.nbytes() * 2 == tensor_data_size);
} else {
tensor_size_ok = (tensor_storage.nbytes() == tensor_data_size);
}
if (!tensor_size_ok) {
LOG_ERROR("size mismatch for tensor '%s' (%s)\n", name.c_str(), dtype.c_str());
return false;
if (!starts_with(tensor_storage.name, prefix)) {
tensor_storage.name = prefix + tensor_storage.name;
}
tensor_storage.file_index = file_index;
add_tensor_storage(tensor_storage);
// LOG_DEBUG("%s %s", tensor_storage.to_string().c_str(), dtype.c_str());
// LOG_DEBUG("%s", tensor_storage.to_string().c_str());
}
return true;
}
/*================================================= TorchLegacyModelLoader ==================================================*/
bool ModelLoader::init_from_torch_legacy_file(const std::string& file_path, const std::string& prefix) {
LOG_DEBUG("init from torch legacy '%s'", file_path.c_str());
std::vector<TensorStorage> tensor_storages;
std::string error;
if (!read_torch_legacy_file(file_path, tensor_storages, &error)) {
if ((!error.empty()) && (ends_with(file_path, ".pt") || ends_with(file_path, ".pth"))) {
LOG_WARN("%s", error.c_str());
}
return false;
}
file_paths_.push_back(file_path);
size_t file_index = file_paths_.size() - 1;
for (auto& tensor_storage : tensor_storages) {
if (is_unused_tensor(tensor_storage.name)) {
continue;
}
if (!starts_with(tensor_storage.name, prefix)) {
tensor_storage.name = prefix + tensor_storage.name;
}
tensor_storage.file_index = file_index;
add_tensor_storage(tensor_storage);
}
return true;
}
/*================================================= TorchZipModelLoader ==================================================*/
bool ModelLoader::init_from_torch_zip_file(const std::string& file_path, const std::string& prefix) {
LOG_DEBUG("init from '%s'", file_path.c_str());
std::vector<TensorStorage> tensor_storages;
std::string error;
if (!read_torch_zip_file(file_path, tensor_storages, &error)) {
LOG_ERROR("%s", error.c_str());
return false;
}
file_paths_.push_back(file_path);
size_t file_index = file_paths_.size() - 1;
for (auto& tensor_storage : tensor_storages) {
if (!starts_with(tensor_storage.name, prefix)) {
tensor_storage.name = prefix + tensor_storage.name;
}
tensor_storage.file_index = file_index;
add_tensor_storage(tensor_storage);
// LOG_DEBUG("%s", tensor_storage.to_string().c_str());
}
return true;
@ -642,367 +409,6 @@ bool ModelLoader::init_from_diffusers_file(const std::string& file_path, const s
return true;
}
/*================================================= CkptModelLoader ==================================================*/
// $ python -m pickletools sd-v1-4/archive/data.pkl | head -n 100
// 0: \x80 PROTO 2
// 2: } EMPTY_DICT
// 3: q BINPUT 0
// 5: ( MARK
// 6: X BINUNICODE 'epoch'
// 16: q BINPUT 1
// 18: K BININT1 6
// 20: X BINUNICODE 'global_step'
// 36: q BINPUT 2
// 38: J BININT 470000
// 43: X BINUNICODE 'pytorch-lightning_version'
// 73: q BINPUT 3
// 75: X BINUNICODE '1.4.2'
// 85: q BINPUT 4
// 87: X BINUNICODE 'state_dict'
// 102: q BINPUT 5
// 104: } EMPTY_DICT
// 105: q BINPUT 6
// 107: ( MARK
// 108: X BINUNICODE 'betas'
// 118: q BINPUT 7
// 120: c GLOBAL 'torch._utils _rebuild_tensor_v2'
// 153: q BINPUT 8
// 155: ( MARK
// 156: ( MARK
// 157: X BINUNICODE 'storage'
// 169: q BINPUT 9
// 171: c GLOBAL 'torch FloatStorage'
// 191: q BINPUT 10
// 193: X BINUNICODE '0'
// 199: q BINPUT 11
// 201: X BINUNICODE 'cpu'
// 209: q BINPUT 12
// 211: M BININT2 1000
// 214: t TUPLE (MARK at 156)
// 215: q BINPUT 13
// 217: Q BINPERSID
// 218: K BININT1 0
// 220: M BININT2 1000
// ...............................
// 3201: q BINPUT 250
// 3203: R REDUCE
// 3204: q BINPUT 251
// 3206: X BINUNICODE 'model.diffusion_model.input_blocks.1.1.proj_in.weight'
// 3264: q BINPUT 252
// 3266: h BINGET 8
// 3268: ( MARK
// 3269: ( MARK
// 3270: h BINGET 9
// 3272: h BINGET 10
// 3274: X BINUNICODE '30'
// 3281: q BINPUT 253
// 3283: h BINGET 12
// 3285: J BININT 102400
// 3290: t TUPLE (MARK at 3269)
// 3291: q BINPUT 254
// 3293: Q BINPERSID
// 3294: K BININT1 0
// 3296: ( MARK
// 3297: M BININT2 320
// 3300: M BININT2 320
// 3303: K BININT1 1
// 3305: K BININT1 1
// 3307: t TUPLE (MARK at 3296)
// 3308: q BINPUT 255
// 3310: ( MARK
// 3311: M BININT2 320
// 3314: K BININT1 1
// 3316: K BININT1 1
// 3318: K BININT1 1
// 3320: t TUPLE (MARK at 3310)
// 3321: r LONG_BINPUT 256
// 3326: \x89 NEWFALSE
// 3327: h BINGET 16
// 3329: ) EMPTY_TUPLE
// 3330: R REDUCE
// 3331: r LONG_BINPUT 257
// 3336: t TUPLE (MARK at 3268)
// 3337: r LONG_BINPUT 258
// 3342: R REDUCE
// 3343: r LONG_BINPUT 259
// 3348: X BINUNICODE 'model.diffusion_model.input_blocks.1.1.proj_in.bias'
// 3404: r LONG_BINPUT 260
// 3409: h BINGET 8
// 3411: ( MARK
// 3412: ( MARK
// 3413: h BINGET 9
// 3415: h BINGET 10
// 3417: X BINUNICODE '31'
struct PickleTensorReader {
enum ReadPhase {
READ_NAME,
READ_DATA,
CHECK_SIZE,
READ_DIMENS
};
ReadPhase phase = READ_NAME;
size_t entry_size = 0;
int32_t nelements = 0;
TensorStorage tensor_storage;
static ggml_type global_type; // all pickle_tensors data type
static bool read_global_type;
bool read_int_value(uint32_t value) {
if (phase == CHECK_SIZE) {
if (entry_size == value * ggml_type_size(tensor_storage.type)) {
nelements = value;
phase = READ_DIMENS;
return true;
} else {
phase = READ_NAME;
}
} else if (phase == READ_DIMENS) {
if (tensor_storage.n_dims + 1 > SD_MAX_DIMS) { // too many dimens
phase = READ_NAME;
tensor_storage.n_dims = 0;
}
if (nelements % value == 0) {
tensor_storage.ne[tensor_storage.n_dims] = value;
tensor_storage.n_dims++;
}
}
return false;
}
void read_global(const std::string& str) {
if (str == "FloatStorage") {
if (read_global_type) {
global_type = GGML_TYPE_F32;
read_global_type = false;
}
tensor_storage.type = GGML_TYPE_F32;
} else if (str == "HalfStorage") {
if (read_global_type) {
global_type = GGML_TYPE_F16;
read_global_type = false;
}
tensor_storage.type = GGML_TYPE_F16;
}
}
void read_string(const std::string& str, zip_t* zip, std::string dir) {
if (str == "storage") {
read_global_type = true;
} else if (str != "state_dict") {
if (phase == READ_DATA) {
std::string entry_name = dir + "data/" + std::string(str);
size_t i, n = zip_entries_total(zip);
for (i = 0; i < n; ++i) {
zip_entry_openbyindex(zip, i);
{
std::string name = zip_entry_name(zip);
if (name == entry_name) {
tensor_storage.index_in_zip = (int)i;
entry_size = zip_entry_size(zip);
zip_entry_close(zip);
break;
}
}
zip_entry_close(zip);
}
phase = entry_size > 0 ? CHECK_SIZE : READ_NAME;
}
if (!read_global_type && phase == READ_NAME) {
tensor_storage.name = str;
phase = READ_DATA;
tensor_storage.type = global_type;
}
}
}
};
ggml_type PickleTensorReader::global_type = GGML_TYPE_F32; // all pickle_tensors data type
bool PickleTensorReader::read_global_type = false;
int find_char(uint8_t* buffer, int len, char c) {
for (int pos = 0; pos < len; pos++) {
if (buffer[pos] == c) {
return pos;
}
}
return -1;
}
#define MAX_STRING_BUFFER 512
bool ModelLoader::parse_data_pkl(uint8_t* buffer,
size_t buffer_size,
zip_t* zip,
std::string dir,
size_t file_index,
const std::string prefix) {
uint8_t* buffer_end = buffer + buffer_size;
if (buffer[0] == 0x80) { // proto
if (buffer[1] != 2) {
LOG_ERROR("Unsupported protocol\n");
return false;
}
buffer += 2; // 0x80 and version
char string_buffer[MAX_STRING_BUFFER];
bool finish = false;
PickleTensorReader reader;
// read pickle binary file
while (!finish && buffer < buffer_end) {
uint8_t opcode = *buffer;
buffer++;
// https://github.com/python/cpython/blob/3.7/Lib/pickletools.py#L1048
// https://github.com/python/cpython/blob/main/Lib/pickle.py#L105
switch (opcode) {
case '}': // EMPTY_DICT = b'}' # push empty dict
break;
case ']': // EMPTY_LIST = b']' # push empty list
break;
// skip unused sections
case 'h': // BINGET = b'h' # " " " " " " ; " " 1-byte arg
case 'q': // BINPUT = b'q' # " " " " " ; " " 1-byte arg
case 'Q': // BINPERSID = b'Q' # " " " ; " " " " stack
buffer++;
break;
case 'r': // LONG_BINPUT = b'r' # " " " " " ; " " 4-byte arg
buffer += 4;
break;
case 0x95: // FRAME = b'\x95' # indicate the beginning of a new frame
buffer += 8;
break;
case 0x94: // MEMOIZE = b'\x94' # store top of the stack in memo
break;
case '(': // MARK = b'(' # push special markobject on stack
break;
case 'K': // BININT1 = b'K' # push 1-byte unsigned int
{
uint8_t value = *buffer;
if (reader.read_int_value(value)) {
buffer++;
}
buffer++;
} break;
case 'M': // BININT2 = b'M' # push 2-byte unsigned int
{
uint16_t value = read_short(buffer);
if (reader.read_int_value(value)) {
buffer++;
}
buffer += 2;
} break;
case 'J': // BININT = b'J' # push four-byte signed int
{
const int32_t value = read_int(buffer);
if (reader.read_int_value(value)) {
buffer++; // skip tuple after read num_elements
}
buffer += 4;
} break;
case 'X': // BINUNICODE = b'X' # " " " ; counted UTF-8 string argument
{
const int32_t len = read_int(buffer);
buffer += 4;
memset(string_buffer, 0, MAX_STRING_BUFFER);
if (len > MAX_STRING_BUFFER) {
LOG_WARN("tensor name very large");
}
memcpy(string_buffer, buffer, len < MAX_STRING_BUFFER ? len : (MAX_STRING_BUFFER - 1));
buffer += len;
reader.read_string(string_buffer, zip, dir);
} break;
case 0x8C: // SHORT_BINUNICODE = b'\x8c' # push short string; UTF-8 length < 256 bytes
{
const int8_t len = *buffer;
buffer++;
memset(string_buffer, 0, MAX_STRING_BUFFER);
memcpy(string_buffer, buffer, len);
buffer += len;
// printf("String: '%s'\n", string_buffer);
} break;
case 'c': // GLOBAL = b'c' # push self.find_class(modname, name); 2 string args
{
int len = find_char(buffer, MAX_STRING_BUFFER, '\n');
buffer += len + 1;
len = find_char(buffer, MAX_STRING_BUFFER, '\n');
memset(string_buffer, 0, MAX_STRING_BUFFER);
memcpy(string_buffer, buffer, len);
buffer += len + 1;
reader.read_global(string_buffer);
} break;
case 0x86: // TUPLE2 = b'\x86' # build 2-tuple from two topmost stack items
case 0x85: // TUPLE1 = b'\x85' # build 1-tuple from stack top
case 't': // TUPLE = b't' # build tuple from topmost stack items
if (reader.phase == PickleTensorReader::READ_DIMENS) {
reader.tensor_storage.reverse_ne();
reader.tensor_storage.file_index = file_index;
// if(strcmp(prefix.c_str(), "scarlett") == 0)
// printf(" ZIP got tensor %s \n ", reader.tensor_storage.name.c_str());
std::string name = reader.tensor_storage.name;
if (!starts_with(name, prefix)) {
name = prefix + name;
}
reader.tensor_storage.name = name;
add_tensor_storage(reader.tensor_storage);
// LOG_DEBUG("%s", reader.tensor_storage.name.c_str());
// reset
reader = PickleTensorReader();
}
break;
case '.': // STOP = b'.' # every pickle ends with STOP
finish = true;
break;
default:
break;
}
}
}
return true;
}
bool ModelLoader::init_from_ckpt_file(const std::string& file_path, const std::string& prefix) {
LOG_DEBUG("init from '%s'", file_path.c_str());
file_paths_.push_back(file_path);
size_t file_index = file_paths_.size() - 1;
zip_t* zip = zip_open(file_path.c_str(), 0, 'r');
if (zip == nullptr) {
LOG_ERROR("failed to open '%s'", file_path.c_str());
return false;
}
int n = (int)zip_entries_total(zip);
for (int i = 0; i < n; ++i) {
zip_entry_openbyindex(zip, i);
{
std::string name = zip_entry_name(zip);
size_t pos = name.find("data.pkl");
if (pos != std::string::npos) {
std::string dir = name.substr(0, pos);
printf("ZIP %d, name = %s, dir = %s \n", i, name.c_str(), dir.c_str());
void* pkl_data = nullptr;
size_t pkl_size;
zip_entry_read(zip, &pkl_data, &pkl_size);
// LOG_DEBUG("%lld", pkl_size);
parse_data_pkl((uint8_t*)pkl_data, pkl_size, zip, dir, file_index, prefix);
free(pkl_data);
}
}
zip_entry_close(zip);
}
zip_close(zip);
return true;
}
SDVersion ModelLoader::get_sd_version() {
TensorStorage token_embedding_weight, input_block_weight;
@ -1268,8 +674,8 @@ std::map<ggml_type, uint32_t> ModelLoader::get_vae_wtype_stat() {
return wtype_stat;
}
static std::vector<std::pair<std::string, ggml_type>> parse_tensor_type_rules(const std::string& tensor_type_rules) {
std::vector<std::pair<std::string, ggml_type>> result;
TensorTypeRules parse_tensor_type_rules(const std::string& tensor_type_rules) {
TensorTypeRules result;
for (const auto& item : split_string(tensor_type_rules, ',')) {
if (item.size() == 0)
continue;
@ -1702,76 +1108,6 @@ bool ModelLoader::tensor_should_be_converted(const TensorStorage& tensor_storage
return false;
}
bool ModelLoader::save_to_gguf_file(const std::string& file_path, ggml_type type, const std::string& tensor_type_rules_str) {
auto backend = ggml_backend_cpu_init();
size_t mem_size = 1 * 1024 * 1024; // for padding
mem_size += tensor_storage_map.size() * ggml_tensor_overhead();
mem_size += get_params_mem_size(backend, type);
LOG_INFO("model tensors mem size: %.2fMB", mem_size / 1024.f / 1024.f);
ggml_context* ggml_ctx = ggml_init({mem_size, nullptr, false});
gguf_context* gguf_ctx = gguf_init_empty();
auto tensor_type_rules = parse_tensor_type_rules(tensor_type_rules_str);
std::mutex tensor_mutex;
auto on_new_tensor_cb = [&](const TensorStorage& tensor_storage, ggml_tensor** dst_tensor) -> bool {
const std::string& name = tensor_storage.name;
ggml_type tensor_type = tensor_storage.type;
ggml_type dst_type = type;
for (const auto& tensor_type_rule : tensor_type_rules) {
std::regex pattern(tensor_type_rule.first);
if (std::regex_search(name, pattern)) {
dst_type = tensor_type_rule.second;
break;
}
}
if (tensor_should_be_converted(tensor_storage, dst_type)) {
tensor_type = dst_type;
}
std::lock_guard<std::mutex> lock(tensor_mutex);
ggml_tensor* tensor = ggml_new_tensor(ggml_ctx, tensor_type, tensor_storage.n_dims, tensor_storage.ne);
if (tensor == nullptr) {
LOG_ERROR("ggml_new_tensor failed");
return false;
}
ggml_set_name(tensor, name.c_str());
// LOG_DEBUG("%s %d %s %d[%d %d %d %d] %d[%d %d %d %d]", name.c_str(),
// ggml_nbytes(tensor), ggml_type_name(tensor_type),
// tensor_storage.n_dims,
// tensor_storage.ne[0], tensor_storage.ne[1], tensor_storage.ne[2], tensor_storage.ne[3],
// tensor->n_dims, tensor->ne[0], tensor->ne[1], tensor->ne[2], tensor->ne[3]);
if (!tensor->data) {
GGML_ASSERT(ggml_nelements(tensor) == 0);
// avoid crashing the gguf writer by setting a dummy pointer for zero-sized tensors
LOG_DEBUG("setting dummy pointer for zero-sized tensor %s", name.c_str());
tensor->data = ggml_get_mem_buffer(ggml_ctx);
}
*dst_tensor = tensor;
gguf_add_tensor(gguf_ctx, tensor);
return true;
};
bool success = load_tensors(on_new_tensor_cb);
ggml_backend_free(backend);
LOG_INFO("load tensors done");
LOG_INFO("trying to save tensors to %s", file_path.c_str());
if (success) {
gguf_write_to_file(gguf_ctx, file_path.c_str(), false);
}
ggml_free(ggml_ctx);
gguf_free(gguf_ctx);
return success;
}
int64_t ModelLoader::get_params_mem_size(ggml_backend_t backend, ggml_type type) {
size_t alignment = 128;
if (backend != nullptr) {
@ -1791,29 +1127,3 @@ int64_t ModelLoader::get_params_mem_size(ggml_backend_t backend, ggml_type type)
return mem_size;
}
bool convert(const char* input_path,
const char* vae_path,
const char* output_path,
sd_type_t output_type,
const char* tensor_type_rules,
bool convert_name) {
ModelLoader model_loader;
if (!model_loader.init_from_file(input_path)) {
LOG_ERROR("init model loader from file failed: '%s'", input_path);
return false;
}
if (vae_path != nullptr && strlen(vae_path) > 0) {
if (!model_loader.init_from_file(vae_path, "vae.")) {
LOG_ERROR("init model loader from file failed: '%s'", vae_path);
return false;
}
}
if (convert_name) {
model_loader.convert_tensors_name();
}
bool success = model_loader.save_to_gguf_file(output_path, (ggml_type)output_type, tensor_type_rules);
return success;
}

View File

@ -5,20 +5,13 @@
#include <map>
#include <memory>
#include <set>
#include <sstream>
#include <string>
#include <tuple>
#include <utility>
#include <vector>
#include "ggml-backend.h"
#include "ggml.h"
#include "gguf.h"
#include "json.hpp"
#include "model_io/tensor_storage.h"
#include "ordered_map.hpp"
#include "zip.h"
#define SD_MAX_DIMS 5
enum SDVersion {
VERSION_SD1,
@ -195,116 +188,10 @@ enum PMVersion {
PM_VERSION_2,
};
struct TensorStorage {
std::string name;
ggml_type type = GGML_TYPE_F32;
ggml_type expected_type = GGML_TYPE_COUNT;
bool is_f8_e4m3 = false;
bool is_f8_e5m2 = false;
bool is_f64 = false;
bool is_i64 = false;
int64_t ne[SD_MAX_DIMS] = {1, 1, 1, 1, 1};
int n_dims = 0;
size_t file_index = 0;
int index_in_zip = -1; // >= means stored in a zip file
uint64_t offset = 0; // offset in file
TensorStorage() = default;
TensorStorage(std::string name, ggml_type type, const int64_t* ne, int n_dims, size_t file_index, size_t offset = 0)
: name(std::move(name)), type(type), n_dims(n_dims), file_index(file_index), offset(offset) {
for (int i = 0; i < n_dims; i++) {
this->ne[i] = ne[i];
}
}
int64_t nelements() const {
int64_t n = 1;
for (int i = 0; i < SD_MAX_DIMS; i++) {
n *= ne[i];
}
return n;
}
int64_t nbytes() const {
return nelements() * ggml_type_size(type) / ggml_blck_size(type);
}
int64_t nbytes_to_read() const {
if (is_f8_e4m3 || is_f8_e5m2) {
return nbytes() / 2;
} else if (is_f64 || is_i64) {
return nbytes() * 2;
} else {
return nbytes();
}
}
void unsqueeze() {
if (n_dims == 2) {
n_dims = 4;
ne[3] = ne[1];
ne[2] = ne[0];
ne[1] = 1;
ne[0] = 1;
}
}
std::vector<TensorStorage> chunk(size_t n) {
std::vector<TensorStorage> chunks;
uint64_t chunk_size = nbytes_to_read() / n;
// printf("%d/%d\n", chunk_size, nbytes_to_read());
reverse_ne();
for (size_t i = 0; i < n; i++) {
TensorStorage chunk_i = *this;
chunk_i.ne[0] = ne[0] / n;
chunk_i.offset = offset + i * chunk_size;
chunk_i.reverse_ne();
chunks.push_back(chunk_i);
}
reverse_ne();
return chunks;
}
void reverse_ne() {
int64_t new_ne[SD_MAX_DIMS] = {1, 1, 1, 1, 1};
for (int i = 0; i < n_dims; i++) {
new_ne[i] = ne[n_dims - 1 - i];
}
for (int i = 0; i < n_dims; i++) {
ne[i] = new_ne[i];
}
}
std::string to_string() const {
std::stringstream ss;
const char* type_name = ggml_type_name(type);
if (is_f8_e4m3) {
type_name = "f8_e4m3";
} else if (is_f8_e5m2) {
type_name = "f8_e5m2";
} else if (is_f64) {
type_name = "f64";
} else if (is_i64) {
type_name = "i64";
}
ss << name << " | " << type_name << " | ";
ss << n_dims << " [";
for (int i = 0; i < SD_MAX_DIMS; i++) {
ss << ne[i];
if (i != SD_MAX_DIMS - 1) {
ss << ", ";
}
}
ss << "]";
return ss.str();
}
};
typedef std::function<bool(const TensorStorage&, ggml_tensor**)> on_new_tensor_cb_t;
typedef OrderedMap<std::string, TensorStorage> String2TensorStorage;
using TensorTypeRules = std::vector<std::pair<std::string, ggml_type>>;
TensorTypeRules parse_tensor_type_rules(const std::string& tensor_type_rules);
class ModelLoader {
protected:
@ -314,16 +201,10 @@ protected:
void add_tensor_storage(const TensorStorage& tensor_storage);
bool parse_data_pkl(uint8_t* buffer,
size_t buffer_size,
zip_t* zip,
std::string dir,
size_t file_index,
const std::string prefix);
bool init_from_gguf_file(const std::string& file_path, const std::string& prefix = "");
bool init_from_safetensors_file(const std::string& file_path, const std::string& prefix = "");
bool init_from_ckpt_file(const std::string& file_path, const std::string& prefix = "");
bool init_from_torch_zip_file(const std::string& file_path, const std::string& prefix = "");
bool init_from_torch_legacy_file(const std::string& file_path, const std::string& prefix = "");
bool init_from_diffusers_file(const std::string& file_path, const std::string& prefix = "");
public:
@ -353,7 +234,6 @@ public:
return names;
}
bool save_to_gguf_file(const std::string& file_path, ggml_type type, const std::string& tensor_type_rules);
bool tensor_should_be_converted(const TensorStorage& tensor_storage, ggml_type type);
int64_t get_params_mem_size(ggml_backend_t backend, ggml_type type = GGML_TYPE_COUNT);
~ModelLoader() = default;

57
src/model_io/binary_io.h Normal file
View File

@ -0,0 +1,57 @@
#ifndef __SD_MODEL_IO_BINARY_IO_H__
#define __SD_MODEL_IO_BINARY_IO_H__
#include <cstdint>
#include <ostream>
namespace model_io {
inline int32_t read_int(const uint8_t* buffer) {
uint32_t value = 0;
value |= static_cast<uint32_t>(buffer[3]) << 24;
value |= static_cast<uint32_t>(buffer[2]) << 16;
value |= static_cast<uint32_t>(buffer[1]) << 8;
value |= static_cast<uint32_t>(buffer[0]);
return static_cast<int32_t>(value);
}
inline uint16_t read_short(const uint8_t* buffer) {
uint16_t value = 0;
value |= static_cast<uint16_t>(buffer[1]) << 8;
value |= static_cast<uint16_t>(buffer[0]);
return value;
}
inline uint64_t read_u64(const uint8_t* buffer) {
uint64_t value = 0;
value |= static_cast<uint64_t>(buffer[7]) << 56;
value |= static_cast<uint64_t>(buffer[6]) << 48;
value |= static_cast<uint64_t>(buffer[5]) << 40;
value |= static_cast<uint64_t>(buffer[4]) << 32;
value |= static_cast<uint64_t>(buffer[3]) << 24;
value |= static_cast<uint64_t>(buffer[2]) << 16;
value |= static_cast<uint64_t>(buffer[1]) << 8;
value |= static_cast<uint64_t>(buffer[0]);
return value;
}
inline void write_u64(std::ostream& stream, uint64_t value) {
uint8_t buffer[8];
for (int i = 0; i < 8; ++i) {
buffer[i] = static_cast<uint8_t>((value >> (8 * i)) & 0xFF);
}
stream.write((const char*)buffer, sizeof(buffer));
}
inline int find_char(const uint8_t* buffer, int len, char c) {
for (int pos = 0; pos < len; pos++) {
if (buffer[pos] == (uint8_t)c) {
return pos;
}
}
return -1;
}
} // namespace model_io
#endif // __SD_MODEL_IO_BINARY_IO_H__

123
src/model_io/gguf_io.cpp Normal file
View File

@ -0,0 +1,123 @@
#include "gguf_io.h"
#include <cstdint>
#include <fstream>
#include <string>
#include <vector>
#include "gguf.h"
#include "gguf_reader_ext.h"
#include "util.h"
static void set_error(std::string* error, const std::string& message) {
if (error != nullptr) {
*error = message;
}
}
bool is_gguf_file(const std::string& file_path) {
std::ifstream file(file_path, std::ios::binary);
if (!file.is_open()) {
return false;
}
char magic[4];
file.read(magic, sizeof(magic));
if (!file) {
return false;
}
for (uint32_t i = 0; i < sizeof(magic); i++) {
if (magic[i] != GGUF_MAGIC[i]) {
return false;
}
}
return true;
}
bool read_gguf_file(const std::string& file_path,
std::vector<TensorStorage>& tensor_storages,
std::string* error) {
tensor_storages.clear();
gguf_context* ctx_gguf_ = nullptr;
ggml_context* ctx_meta_ = nullptr;
ctx_gguf_ = gguf_init_from_file(file_path.c_str(), {true, &ctx_meta_});
if (!ctx_gguf_) {
GGUFReader gguf_reader;
if (!gguf_reader.load(file_path)) {
set_error(error, "failed to open '" + file_path + "' with GGUFReader");
return false;
}
size_t data_offset = gguf_reader.data_offset();
for (const auto& gguf_tensor_info : gguf_reader.tensors()) {
TensorStorage tensor_storage(
gguf_tensor_info.name,
gguf_tensor_info.type,
gguf_tensor_info.shape.data(),
static_cast<int>(gguf_tensor_info.shape.size()),
0,
data_offset + gguf_tensor_info.offset);
tensor_storages.push_back(tensor_storage);
}
return true;
}
int n_tensors = static_cast<int>(gguf_get_n_tensors(ctx_gguf_));
size_t data_offset = gguf_get_data_offset(ctx_gguf_);
for (int i = 0; i < n_tensors; i++) {
std::string name = gguf_get_tensor_name(ctx_gguf_, i);
ggml_tensor* dummy = ggml_get_tensor(ctx_meta_, name.c_str());
size_t offset = data_offset + gguf_get_tensor_offset(ctx_gguf_, i);
TensorStorage tensor_storage(name, dummy->type, dummy->ne, ggml_n_dims(dummy), 0, offset);
if (ggml_nbytes(dummy) != tensor_storage.nbytes()) {
gguf_free(ctx_gguf_);
ggml_free(ctx_meta_);
set_error(error, "size mismatch for tensor '" + name + "'");
return false;
}
tensor_storages.push_back(tensor_storage);
}
gguf_free(ctx_gguf_);
ggml_free(ctx_meta_);
return true;
}
bool write_gguf_file(const std::string& file_path,
const std::vector<TensorWriteInfo>& tensors,
std::string* error) {
gguf_context* gguf_ctx = gguf_init_empty();
if (gguf_ctx == nullptr) {
set_error(error, "gguf_init_empty failed");
return false;
}
for (const TensorWriteInfo& write_tensor : tensors) {
ggml_tensor* tensor = write_tensor.tensor;
if (tensor == nullptr) {
set_error(error, "null tensor cannot be written to GGUF");
gguf_free(gguf_ctx);
return false;
}
gguf_add_tensor(gguf_ctx, tensor);
}
LOG_INFO("trying to save tensors to %s", file_path.c_str());
bool success = gguf_write_to_file(gguf_ctx, file_path.c_str(), false);
if (!success) {
set_error(error, "failed to write GGUF file '" + file_path + "'");
}
gguf_free(gguf_ctx);
return success;
}

17
src/model_io/gguf_io.h Normal file
View File

@ -0,0 +1,17 @@
#ifndef __SD_MODEL_IO_GGUF_IO_H__
#define __SD_MODEL_IO_GGUF_IO_H__
#include <string>
#include <vector>
#include "tensor_storage.h"
bool is_gguf_file(const std::string& file_path);
bool read_gguf_file(const std::string& file_path,
std::vector<TensorStorage>& tensor_storages,
std::string* error = nullptr);
bool write_gguf_file(const std::string& file_path,
const std::vector<TensorWriteInfo>& tensors,
std::string* error = nullptr);
#endif // __SD_MODEL_IO_GGUF_IO_H__

View File

@ -1,5 +1,5 @@
#ifndef __GGUF_READER_HPP__
#define __GGUF_READER_HPP__
#ifndef __SD_MODEL_IO_GGUF_READER_EXT_H__
#define __SD_MODEL_IO_GGUF_READER_EXT_H__
#include <cstdint>
#include <fstream>
@ -231,4 +231,4 @@ public:
size_t data_offset() const { return data_offset_; }
};
#endif // __GGUF_READER_HPP__
#endif // __SD_MODEL_IO_GGUF_READER_EXT_H__

1064
src/model_io/pickle_io.cpp Normal file

File diff suppressed because it is too large Load Diff

21
src/model_io/pickle_io.h Normal file
View File

@ -0,0 +1,21 @@
#ifndef __SD_MODEL_IO_PICKLE_IO_H__
#define __SD_MODEL_IO_PICKLE_IO_H__
#include <cstddef>
#include <cstdint>
#include <string>
#include <unordered_map>
#include <vector>
#include "tensor_storage.h"
bool skip_pickle_object(const uint8_t* buffer, size_t buffer_size, size_t* object_size);
bool pickle_object_is_torch_magic_number(const uint8_t* buffer, size_t buffer_size);
bool parse_pickle_uint32_object(const uint8_t* buffer, size_t buffer_size, uint32_t* value);
bool parse_torch_state_dict_pickle(const uint8_t* buffer,
size_t buffer_size,
std::vector<TensorStorage>& tensor_storages,
std::unordered_map<std::string, uint64_t>& storage_nbytes,
std::string* error = nullptr);
#endif // __SD_MODEL_IO_PICKLE_IO_H__

View File

@ -0,0 +1,316 @@
#include "safetensors_io.h"
#include <cstdint>
#include <exception>
#include <fstream>
#include <string>
#include <vector>
#include "binary_io.h"
#include "json.hpp"
#include "util.h"
static constexpr size_t ST_HEADER_SIZE_LEN = 8;
static void set_error(std::string* error, const std::string& message) {
if (error != nullptr) {
*error = message;
}
}
bool is_safetensors_file(const std::string& file_path) {
std::ifstream file(file_path, std::ios::binary);
if (!file.is_open()) {
return false;
}
// get file size
file.seekg(0, file.end);
size_t file_size_ = file.tellg();
file.seekg(0, file.beg);
// read header size
if (file_size_ <= ST_HEADER_SIZE_LEN) {
return false;
}
uint8_t header_size_buf[ST_HEADER_SIZE_LEN];
file.read((char*)header_size_buf, ST_HEADER_SIZE_LEN);
if (!file) {
return false;
}
size_t header_size_ = model_io::read_u64(header_size_buf);
if (header_size_ >= file_size_ || header_size_ <= 2) {
return false;
}
// read header
std::vector<char> header_buf;
header_buf.resize(header_size_ + 1);
header_buf[header_size_] = '\0';
file.read(header_buf.data(), header_size_);
if (!file) {
return false;
}
try {
nlohmann::json header_ = nlohmann::json::parse(header_buf.data());
} catch (const std::exception&) {
return false;
}
return true;
}
static ggml_type safetensors_dtype_to_ggml_type(const std::string& dtype) {
ggml_type ttype = GGML_TYPE_COUNT;
if (dtype == "F16") {
ttype = GGML_TYPE_F16;
} else if (dtype == "BF16") {
ttype = GGML_TYPE_BF16;
} else if (dtype == "F32") {
ttype = GGML_TYPE_F32;
} else if (dtype == "F64") {
ttype = GGML_TYPE_F32;
} else if (dtype == "F8_E4M3") {
ttype = GGML_TYPE_F16;
} else if (dtype == "F8_E5M2") {
ttype = GGML_TYPE_F16;
} else if (dtype == "I32") {
ttype = GGML_TYPE_I32;
} else if (dtype == "I64") {
ttype = GGML_TYPE_I32;
}
return ttype;
}
// https://huggingface.co/docs/safetensors/index
bool read_safetensors_file(const std::string& file_path,
std::vector<TensorStorage>& tensor_storages,
std::string* error) {
std::ifstream file(file_path, std::ios::binary);
if (!file.is_open()) {
set_error(error, "failed to open '" + file_path + "'");
return false;
}
// get file size
file.seekg(0, file.end);
size_t file_size_ = file.tellg();
file.seekg(0, file.beg);
// read header size
if (file_size_ <= ST_HEADER_SIZE_LEN) {
set_error(error, "invalid safetensor file '" + file_path + "'");
return false;
}
uint8_t header_size_buf[ST_HEADER_SIZE_LEN];
file.read((char*)header_size_buf, ST_HEADER_SIZE_LEN);
if (!file) {
set_error(error, "read safetensors header size failed: '" + file_path + "'");
return false;
}
size_t header_size_ = model_io::read_u64(header_size_buf);
if (header_size_ >= file_size_) {
set_error(error, "invalid safetensor file '" + file_path + "'");
return false;
}
// read header
std::vector<char> header_buf;
header_buf.resize(header_size_ + 1);
header_buf[header_size_] = '\0';
file.read(header_buf.data(), header_size_);
if (!file) {
set_error(error, "read safetensors header failed: '" + file_path + "'");
return false;
}
nlohmann::json header_;
try {
header_ = nlohmann::json::parse(header_buf.data());
} catch (const std::exception&) {
set_error(error, "parsing safetensors header failed: '" + file_path + "'");
return false;
}
tensor_storages.clear();
for (auto& item : header_.items()) {
std::string name = item.key();
nlohmann::json tensor_info = item.value();
// LOG_DEBUG("%s %s\n", name.c_str(), tensor_info.dump().c_str());
if (name == "__metadata__") {
continue;
}
std::string dtype = tensor_info["dtype"];
nlohmann::json shape = tensor_info["shape"];
if (dtype == "U8") {
continue;
}
size_t begin = tensor_info["data_offsets"][0].get<size_t>();
size_t end = tensor_info["data_offsets"][1].get<size_t>();
ggml_type type = safetensors_dtype_to_ggml_type(dtype);
if (type == GGML_TYPE_COUNT) {
set_error(error, "unsupported dtype '" + dtype + "' (tensor '" + name + "')");
return false;
}
if (shape.size() > SD_MAX_DIMS) {
set_error(error, "invalid tensor '" + name + "'");
return false;
}
int n_dims = (int)shape.size();
int64_t ne[SD_MAX_DIMS] = {1, 1, 1, 1, 1};
for (int i = 0; i < n_dims; i++) {
ne[i] = shape[i].get<int64_t>();
}
if (n_dims == 5) {
n_dims = 4;
ne[0] = ne[0] * ne[1];
ne[1] = ne[2];
ne[2] = ne[3];
ne[3] = ne[4];
}
// ggml_n_dims returns 1 for scalars
if (n_dims == 0) {
n_dims = 1;
}
TensorStorage tensor_storage(name, type, ne, n_dims, 0, ST_HEADER_SIZE_LEN + header_size_ + begin);
tensor_storage.reverse_ne();
size_t tensor_data_size = end - begin;
bool tensor_size_ok;
if (dtype == "F8_E4M3") {
tensor_storage.is_f8_e4m3 = true;
// f8 -> f16
tensor_size_ok = (tensor_storage.nbytes() == tensor_data_size * 2);
} else if (dtype == "F8_E5M2") {
tensor_storage.is_f8_e5m2 = true;
// f8 -> f16
tensor_size_ok = (tensor_storage.nbytes() == tensor_data_size * 2);
} else if (dtype == "F64") {
tensor_storage.is_f64 = true;
// f64 -> f32
tensor_size_ok = (tensor_storage.nbytes() * 2 == tensor_data_size);
} else if (dtype == "I64") {
tensor_storage.is_i64 = true;
// i64 -> i32
tensor_size_ok = (tensor_storage.nbytes() * 2 == tensor_data_size);
} else {
tensor_size_ok = (tensor_storage.nbytes() == tensor_data_size);
}
if (!tensor_size_ok) {
set_error(error, "size mismatch for tensor '" + name + "' (" + dtype + ")");
return false;
}
tensor_storages.push_back(tensor_storage);
// LOG_DEBUG("%s %s", tensor_storage.to_string().c_str(), dtype.c_str());
}
return true;
}
static bool ggml_type_to_safetensors_dtype(ggml_type type, std::string* dtype) {
switch (type) {
case GGML_TYPE_F16:
*dtype = "F16";
return true;
case GGML_TYPE_BF16:
*dtype = "BF16";
return true;
case GGML_TYPE_F32:
*dtype = "F32";
return true;
case GGML_TYPE_I32:
*dtype = "I32";
return true;
default:
return false;
}
}
bool write_safetensors_file(const std::string& file_path,
const std::vector<TensorWriteInfo>& tensors,
std::string* error) {
nlohmann::ordered_json header = nlohmann::ordered_json::object();
uint64_t data_offset = 0;
for (const TensorWriteInfo& write_tensor : tensors) {
ggml_tensor* tensor = write_tensor.tensor;
if (tensor == nullptr) {
set_error(error, "null tensor cannot be written to safetensors");
return false;
}
const std::string name = ggml_get_name(tensor);
std::string dtype;
if (!ggml_type_to_safetensors_dtype(tensor->type, &dtype)) {
set_error(error,
"unsupported safetensors dtype '" + std::string(ggml_type_name(tensor->type)) +
"' for tensor '" + name + "'");
return false;
}
const uint64_t tensor_nbytes = ggml_nbytes(tensor);
nlohmann::ordered_json json_tensor_info = nlohmann::ordered_json::object();
json_tensor_info["dtype"] = dtype;
nlohmann::ordered_json shape = nlohmann::ordered_json::array();
for (int i = 0; i < write_tensor.n_dims; ++i) {
shape.push_back(write_tensor.ne[write_tensor.n_dims - 1 - i]);
}
json_tensor_info["shape"] = shape;
nlohmann::ordered_json data_offsets = nlohmann::ordered_json::array();
data_offsets.push_back(data_offset);
data_offsets.push_back(data_offset + tensor_nbytes);
json_tensor_info["data_offsets"] = data_offsets;
header[name] = json_tensor_info;
data_offset += tensor_nbytes;
}
const std::string header_str = header.dump();
std::ofstream file(file_path, std::ios::binary);
if (!file.is_open()) {
set_error(error, "failed to open '" + file_path + "' for writing");
return false;
}
LOG_INFO("trying to save tensors to %s", file_path.c_str());
model_io::write_u64(file, header_str.size());
file.write(header_str.data(), header_str.size());
if (!file) {
set_error(error, "failed to write safetensors header to '" + file_path + "'");
return false;
}
for (const TensorWriteInfo& write_tensor : tensors) {
ggml_tensor* tensor = write_tensor.tensor;
const std::string name = ggml_get_name(tensor);
const size_t tensor_nbytes = ggml_nbytes(tensor);
file.write((const char*)tensor->data, tensor_nbytes);
if (!file) {
set_error(error,
"failed to write tensor '" + name + "' to '" + file_path + "'");
return false;
}
}
return true;
}

View File

@ -0,0 +1,17 @@
#ifndef __SD_MODEL_IO_SAFETENSORS_IO_H__
#define __SD_MODEL_IO_SAFETENSORS_IO_H__
#include <string>
#include <vector>
#include "tensor_storage.h"
bool is_safetensors_file(const std::string& file_path);
bool read_safetensors_file(const std::string& file_path,
std::vector<TensorStorage>& tensor_storages,
std::string* error = nullptr);
bool write_safetensors_file(const std::string& file_path,
const std::vector<TensorWriteInfo>& tensors,
std::string* error = nullptr);
#endif // __SD_MODEL_IO_SAFETENSORS_IO_H__

View File

@ -0,0 +1,132 @@
#ifndef __SD_TENSOR_STORAGE_H__
#define __SD_TENSOR_STORAGE_H__
#include <cstddef>
#include <cstdint>
#include <functional>
#include <sstream>
#include <string>
#include <utility>
#include <vector>
#include "ggml.h"
#define SD_MAX_DIMS 5
struct TensorStorage {
std::string name;
ggml_type type = GGML_TYPE_F32;
ggml_type expected_type = GGML_TYPE_COUNT;
bool is_f8_e4m3 = false;
bool is_f8_e5m2 = false;
bool is_f64 = false;
bool is_i64 = false;
int64_t ne[SD_MAX_DIMS] = {1, 1, 1, 1, 1};
int n_dims = 0;
std::string storage_key;
size_t file_index = 0;
int index_in_zip = -1; // >= means stored in a zip file
uint64_t offset = 0; // offset in file
TensorStorage() = default;
TensorStorage(std::string name, ggml_type type, const int64_t* ne, int n_dims, size_t file_index, size_t offset = 0)
: name(std::move(name)), type(type), n_dims(n_dims), file_index(file_index), offset(offset) {
for (int i = 0; i < n_dims; i++) {
this->ne[i] = ne[i];
}
}
int64_t nelements() const {
int64_t n = 1;
for (int i = 0; i < SD_MAX_DIMS; i++) {
n *= ne[i];
}
return n;
}
int64_t nbytes() const {
return nelements() * ggml_type_size(type) / ggml_blck_size(type);
}
int64_t nbytes_to_read() const {
if (is_f8_e4m3 || is_f8_e5m2) {
return nbytes() / 2;
} else if (is_f64 || is_i64) {
return nbytes() * 2;
} else {
return nbytes();
}
}
void unsqueeze() {
if (n_dims == 2) {
n_dims = 4;
ne[3] = ne[1];
ne[2] = ne[0];
ne[1] = 1;
ne[0] = 1;
}
}
std::vector<TensorStorage> chunk(size_t n) {
std::vector<TensorStorage> chunks;
uint64_t chunk_size = nbytes_to_read() / n;
// printf("%d/%d\n", chunk_size, nbytes_to_read());
reverse_ne();
for (size_t i = 0; i < n; i++) {
TensorStorage chunk_i = *this;
chunk_i.ne[0] = ne[0] / n;
chunk_i.offset = offset + i * chunk_size;
chunk_i.reverse_ne();
chunks.push_back(chunk_i);
}
reverse_ne();
return chunks;
}
void reverse_ne() {
int64_t new_ne[SD_MAX_DIMS] = {1, 1, 1, 1, 1};
for (int i = 0; i < n_dims; i++) {
new_ne[i] = ne[n_dims - 1 - i];
}
for (int i = 0; i < n_dims; i++) {
ne[i] = new_ne[i];
}
}
std::string to_string() const {
std::stringstream ss;
const char* type_name = ggml_type_name(type);
if (is_f8_e4m3) {
type_name = "f8_e4m3";
} else if (is_f8_e5m2) {
type_name = "f8_e5m2";
} else if (is_f64) {
type_name = "f64";
} else if (is_i64) {
type_name = "i64";
}
ss << name << " | " << type_name << " | ";
ss << n_dims << " [";
for (int i = 0; i < SD_MAX_DIMS; i++) {
ss << ne[i];
if (i != SD_MAX_DIMS - 1) {
ss << ", ";
}
}
ss << "]";
return ss.str();
}
};
struct TensorWriteInfo {
int64_t ne[SD_MAX_DIMS] = {1, 1, 1, 1, 1};
int n_dims = 0;
ggml_tensor* tensor = nullptr;
};
typedef std::function<bool(const TensorStorage&, ggml_tensor**)> on_new_tensor_cb_t;
#endif // __SD_TENSOR_STORAGE_H__

View File

@ -0,0 +1,252 @@
#include "torch_legacy_io.h"
#include <algorithm>
#include <cstdint>
#include <fstream>
#include <string>
#include <unordered_map>
#include <vector>
#include "pickle_io.h"
#include "util.h"
// torch.save format background:
//
// - Before PyTorch 1.6.0, torch.save used this legacy non-zip format by
// default.
// - Since PyTorch 1.6.0, torch.save defaults to an uncompressed ZIP64 archive
// containing data.pkl, data/, version, and, since PyTorch 2.1.0, byteorder.
// - The old format can still be produced explicitly with:
// torch.save(obj, path, _use_new_zipfile_serialization=False)
//
// Whether obj is a state_dict or a whole nn.Module does not change the outer
// container format selected by torch.save. It changes the pickled object inside:
//
// - state_dict: usually an OrderedDict[str, Tensor]. pickle_io.cpp supports a
// restricted subset of this layout because tensor metadata and raw storages
// can be recovered without executing pickle callables.
// - whole module/checkpoint object: arbitrary Python object graph. This may
// require importing user classes and executing pickle GLOBAL/REDUCE rebuild
// logic, so it is intentionally not supported here.
//
// Legacy non-zip PyTorch files are not a single pickle object:
//
// 1. pickle object: PyTorch legacy magic number
// 2. pickle object: legacy protocol version, expected to be 1001
// 3. pickle object: sys_info metadata, ignored by this reader
// 4. pickle object: state_dict metadata, parsed by pickle_io.cpp
// 5. pickle object: serialized storage key list, skipped here
// 6. raw storage data payloads
// - PyTorch writes storages after the pickles, ordered by storage key
// - each storage has an 8-byte legacy storage header followed by raw bytes
static constexpr size_t LEGACY_STORAGE_HEADER_SIZE = 8;
static void set_error(std::string* error, const std::string& message) {
if (error != nullptr) {
*error = message;
}
}
static std::string bytes_to_hex(const std::vector<uint8_t>& bytes) {
static const char* hex = "0123456789ABCDEF";
std::string result;
result.reserve(bytes.size() * 3);
for (size_t i = 0; i < bytes.size(); ++i) {
if (i > 0) {
result.push_back('-');
}
result.push_back(hex[(bytes[i] >> 4) & 0x0F]);
result.push_back(hex[bytes[i] & 0x0F]);
}
return result;
}
static bool is_probably_tar_file(const std::vector<uint8_t>& header) {
return header.size() >= 262 &&
header[257] == 'u' &&
header[258] == 's' &&
header[259] == 't' &&
header[260] == 'a' &&
header[261] == 'r';
}
static std::string torch_legacy_diagnostics(const std::string& file_path, const std::vector<uint8_t>& buffer) {
if (!ends_with(file_path, ".pt") && !ends_with(file_path, ".pth")) {
return "";
}
if (buffer.empty()) {
return "unsupported PyTorch file '" + file_path + "': empty file";
}
size_t short_len = std::min<size_t>(buffer.size(), 32);
std::vector<uint8_t> short_header(buffer.begin(), buffer.begin() + short_len);
const bool raw_pickle = buffer[0] == 0x80;
const bool tar_file = is_probably_tar_file(buffer);
std::string message = "unsupported PyTorch file '" + file_path + "': first bytes " +
bytes_to_hex(short_header) +
", raw_pickle=" + (raw_pickle ? "true" : "false") +
", tar=" + (tar_file ? "true" : "false");
if (raw_pickle) {
message += "; raw pickle did not match the restricted state_dict layouts currently supported";
} else if (tar_file) {
message += "; legacy tar PyTorch checkpoints are not supported yet";
}
return message;
}
bool read_torch_legacy_file(const std::string& file_path,
std::vector<TensorStorage>& tensor_storages,
std::string* error) {
std::ifstream file(file_path, std::ios::binary);
if (!file.is_open()) {
set_error(error, "failed to open '" + file_path + "'");
return false;
}
file.seekg(0, file.end);
size_t file_size = (size_t)file.tellg();
file.seekg(0, file.beg);
if (file_size == 0) {
set_error(error, "empty file '" + file_path + "'");
return false;
}
std::vector<uint8_t> buffer(file_size);
file.read((char*)buffer.data(), file_size);
if (!file) {
set_error(error, "failed to read '" + file_path + "'");
return false;
}
auto finalize_tensor_offsets = [&](size_t storage_data_offset,
const std::unordered_map<std::string, uint64_t>& legacy_storage_map) -> bool {
if (storage_data_offset > file_size) {
return false;
}
std::vector<std::string> storage_keys;
storage_keys.reserve(legacy_storage_map.size());
for (const auto& [storage_key, _] : legacy_storage_map) {
storage_keys.push_back(storage_key);
}
std::sort(storage_keys.begin(), storage_keys.end());
std::unordered_map<std::string, uint64_t> storage_offsets;
uint64_t current_offset = storage_data_offset;
for (const auto& storage_key : storage_keys) {
auto it = legacy_storage_map.find(storage_key);
if (it == legacy_storage_map.end()) {
return false;
}
if (current_offset + LEGACY_STORAGE_HEADER_SIZE + it->second > file_size) {
return false;
}
storage_offsets[storage_key] = current_offset + LEGACY_STORAGE_HEADER_SIZE;
current_offset += LEGACY_STORAGE_HEADER_SIZE + it->second;
}
for (auto& tensor_storage : tensor_storages) {
if (tensor_storage.storage_key.empty()) {
continue;
}
auto it_offset = storage_offsets.find(tensor_storage.storage_key);
auto it_size = legacy_storage_map.find(tensor_storage.storage_key);
if (it_offset == storage_offsets.end() || it_size == legacy_storage_map.end()) {
return false;
}
uint64_t base_offset = it_offset->second;
uint64_t storage_nbytes = it_size->second;
uint64_t tensor_nbytes = tensor_storage.nbytes_to_read();
if (tensor_storage.offset + tensor_nbytes > storage_nbytes) {
return false;
}
tensor_storage.offset = base_offset + tensor_storage.offset;
tensor_storage.storage_key.clear();
}
return true;
};
auto parse_state_dict_at = [&](size_t state_dict_offset, size_t state_dict_size, size_t* storage_data_offset) -> bool {
tensor_storages.clear();
std::unordered_map<std::string, uint64_t> legacy_storage_map;
if (!parse_torch_state_dict_pickle(buffer.data() + state_dict_offset,
state_dict_size,
tensor_storages,
legacy_storage_map,
error)) {
return false;
}
size_t offset_after_state_dict = state_dict_offset + state_dict_size;
size_t storage_keys_size = 0;
if (!skip_pickle_object(buffer.data() + offset_after_state_dict,
buffer.size() - offset_after_state_dict,
&storage_keys_size)) {
return false;
}
*storage_data_offset = offset_after_state_dict + storage_keys_size;
return finalize_tensor_offsets(*storage_data_offset, legacy_storage_map);
};
size_t object_size_1 = 0;
size_t offset = 0;
if (skip_pickle_object(buffer.data(), buffer.size(), &object_size_1) &&
pickle_object_is_torch_magic_number(buffer.data(), object_size_1)) {
offset += object_size_1;
size_t object_size_2 = 0;
if (!skip_pickle_object(buffer.data() + offset, buffer.size() - offset, &object_size_2)) {
set_error(error, torch_legacy_diagnostics(file_path, buffer));
return false;
}
uint32_t protocol_version = 0;
if (!parse_pickle_uint32_object(buffer.data() + offset, object_size_2, &protocol_version) || protocol_version != 1001) {
set_error(error, torch_legacy_diagnostics(file_path, buffer));
return false;
}
offset += object_size_2;
size_t object_size_3 = 0;
if (!skip_pickle_object(buffer.data() + offset, buffer.size() - offset, &object_size_3)) {
set_error(error, torch_legacy_diagnostics(file_path, buffer));
return false;
}
offset += object_size_3;
size_t state_dict_size = 0;
if (!skip_pickle_object(buffer.data() + offset, buffer.size() - offset, &state_dict_size)) {
set_error(error, torch_legacy_diagnostics(file_path, buffer));
return false;
}
size_t storage_data_offset = 0;
if (parse_state_dict_at(offset, state_dict_size, &storage_data_offset)) {
return true;
}
if (error != nullptr && error->empty()) {
set_error(error, torch_legacy_diagnostics(file_path, buffer));
}
return false;
}
size_t state_dict_size = 0;
if (skip_pickle_object(buffer.data(), buffer.size(), &state_dict_size)) {
size_t storage_data_offset = 0;
if (parse_state_dict_at(0, state_dict_size, &storage_data_offset)) {
return true;
}
}
if (error != nullptr && error->empty()) {
set_error(error, torch_legacy_diagnostics(file_path, buffer));
}
return false;
}

View File

@ -0,0 +1,13 @@
#ifndef __SD_MODEL_IO_TORCH_LEGACY_IO_H__
#define __SD_MODEL_IO_TORCH_LEGACY_IO_H__
#include <string>
#include <vector>
#include "tensor_storage.h"
bool read_torch_legacy_file(const std::string& file_path,
std::vector<TensorStorage>& tensor_storages,
std::string* error = nullptr);
#endif // __SD_MODEL_IO_TORCH_LEGACY_IO_H__

View File

@ -0,0 +1,140 @@
#include "torch_zip_io.h"
#include <cstdint>
#include <cstdlib>
#include <string>
#include <unordered_map>
#include <vector>
#include "pickle_io.h"
#include "zip.h"
static void set_error(std::string* error, const std::string& message) {
if (error != nullptr) {
*error = message;
}
}
bool is_torch_zip_file(const std::string& file_path) {
zip_t* zip = zip_open(file_path.c_str(), 0, 'r');
if (zip == nullptr) {
return false;
}
zip_close(zip);
return true;
}
static bool find_zip_entry(zip_t* zip, const std::string& entry_name, int* index, uint64_t* size) {
size_t n = zip_entries_total(zip);
for (size_t i = 0; i < n; ++i) {
zip_entry_openbyindex(zip, i);
std::string name = zip_entry_name(zip);
if (name == entry_name) {
*index = (int)i;
*size = zip_entry_size(zip);
zip_entry_close(zip);
return true;
}
zip_entry_close(zip);
}
return false;
}
static bool parse_zip_data_pkl(const uint8_t* buffer,
size_t buffer_size,
zip_t* zip,
const std::string& dir,
std::vector<TensorStorage>& tensor_storages,
std::string* error) {
std::vector<TensorStorage> parsed_tensors;
std::unordered_map<std::string, uint64_t> storage_nbytes;
if (!parse_torch_state_dict_pickle(buffer, buffer_size, parsed_tensors, storage_nbytes, error)) {
if (error != nullptr && error->empty()) {
*error = "failed to parse torch zip pickle metadata";
}
return false;
}
for (auto& tensor_storage : parsed_tensors) {
if (tensor_storage.storage_key.empty()) {
set_error(error, "tensor '" + tensor_storage.name + "' has no storage key");
return false;
}
const std::string entry_name = dir + "data/" + tensor_storage.storage_key;
int zip_index = -1;
uint64_t entry_size = 0;
if (!find_zip_entry(zip, entry_name, &zip_index, &entry_size)) {
set_error(error, "storage entry '" + entry_name + "' was not found");
return false;
}
auto it_storage_size = storage_nbytes.find(tensor_storage.storage_key);
if (it_storage_size != storage_nbytes.end() && entry_size < it_storage_size->second) {
set_error(error, "storage entry '" + entry_name + "' is smaller than pickle metadata");
return false;
}
uint64_t tensor_nbytes = tensor_storage.nbytes_to_read();
if (tensor_storage.offset + tensor_nbytes > entry_size) {
set_error(error, "tensor '" + tensor_storage.name + "' exceeds storage entry '" + entry_name + "'");
return false;
}
tensor_storage.index_in_zip = zip_index;
tensor_storage.storage_key.clear();
tensor_storages.push_back(tensor_storage);
}
return true;
}
bool read_torch_zip_file(const std::string& file_path,
std::vector<TensorStorage>& tensor_storages,
std::string* error) {
zip_t* zip = zip_open(file_path.c_str(), 0, 'r');
if (zip == nullptr) {
set_error(error, "failed to open '" + file_path + "'");
return false;
}
tensor_storages.clear();
bool success = true;
bool found_data_pkl = false;
int n = (int)zip_entries_total(zip);
for (int i = 0; i < n; ++i) {
zip_entry_openbyindex(zip, i);
std::string name = zip_entry_name(zip);
size_t pos = name.find("data.pkl");
if (pos != std::string::npos) {
found_data_pkl = true;
std::string dir = name.substr(0, pos);
void* pkl_data = nullptr;
size_t pkl_size = 0;
zip_entry_read(zip, &pkl_data, &pkl_size);
if (pkl_data == nullptr || pkl_size == 0) {
set_error(error, "failed to read '" + name + "' from '" + file_path + "'");
success = false;
} else if (!parse_zip_data_pkl((const uint8_t*)pkl_data, pkl_size, zip, dir, tensor_storages, error)) {
success = false;
}
free(pkl_data);
}
zip_entry_close(zip);
if (!success) {
break;
}
}
if (success && !found_data_pkl) {
set_error(error, "data.pkl was not found in '" + file_path + "'");
success = false;
}
zip_close(zip);
return success;
}

View File

@ -0,0 +1,14 @@
#ifndef __SD_MODEL_IO_TORCH_ZIP_IO_H__
#define __SD_MODEL_IO_TORCH_ZIP_IO_H__
#include <string>
#include <vector>
#include "tensor_storage.h"
bool is_torch_zip_file(const std::string& file_path);
bool read_torch_zip_file(const std::string& file_path,
std::vector<TensorStorage>& tensor_storages,
std::string* error = nullptr);
#endif // __SD_MODEL_IO_TORCH_ZIP_IO_H__

View File

@ -24,6 +24,75 @@ static inline void preprocessing_set_4d(sd::Tensor<float>& tensor, float value,
tensor.values()[static_cast<size_t>(preprocessing_offset_4d(tensor, i0, i1, i2, i3))] = value;
}
static inline uint8_t preprocessing_float_to_u8(float value) {
if (value <= 0.0f) {
return 0;
}
if (value >= 1.0f) {
return 255;
}
return static_cast<uint8_t>(value * 255.0f + 0.5f);
}
static inline void preprocessing_tensor_frame_to_sd_image(const sd::Tensor<float>& tensor, int frame_index, uint8_t* image_data) {
const auto& shape = tensor.shape();
GGML_ASSERT(shape.size() == 4 || shape.size() == 5);
GGML_ASSERT(image_data != nullptr);
const int width = static_cast<int>(shape[0]);
const int height = static_cast<int>(shape[1]);
const int channel = static_cast<int>(shape[shape.size() == 5 ? 3 : 2]);
const size_t pixels = static_cast<size_t>(width) * static_cast<size_t>(height);
const float* src = tensor.data();
if (shape.size() == 4) {
GGML_ASSERT(frame_index >= 0 && frame_index < shape[3]);
const size_t frame_stride = pixels * static_cast<size_t>(channel);
const float* frame_ptr = src + static_cast<size_t>(frame_index) * frame_stride;
if (channel == 3) {
const float* c0 = frame_ptr;
const float* c1 = frame_ptr + pixels;
const float* c2 = frame_ptr + pixels * 2;
for (size_t i = 0; i < pixels; ++i) {
image_data[i * 3 + 0] = preprocessing_float_to_u8(c0[i]);
image_data[i * 3 + 1] = preprocessing_float_to_u8(c1[i]);
image_data[i * 3 + 2] = preprocessing_float_to_u8(c2[i]);
}
return;
}
for (size_t i = 0; i < pixels; ++i) {
for (int c = 0; c < channel; ++c) {
image_data[i * static_cast<size_t>(channel) + static_cast<size_t>(c)] =
preprocessing_float_to_u8(frame_ptr[i + pixels * static_cast<size_t>(c)]);
}
}
return;
}
GGML_ASSERT(frame_index >= 0 && frame_index < shape[2]);
const size_t channel_stride = pixels * static_cast<size_t>(shape[2]);
const float* frame_ptr = src + static_cast<size_t>(frame_index) * pixels;
if (channel == 3) {
const float* c0 = frame_ptr;
const float* c1 = frame_ptr + channel_stride;
const float* c2 = frame_ptr + channel_stride * 2;
for (size_t i = 0; i < pixels; ++i) {
image_data[i * 3 + 0] = preprocessing_float_to_u8(c0[i]);
image_data[i * 3 + 1] = preprocessing_float_to_u8(c1[i]);
image_data[i * 3 + 2] = preprocessing_float_to_u8(c2[i]);
}
return;
}
for (size_t i = 0; i < pixels; ++i) {
for (int c = 0; c < channel; ++c) {
image_data[i * static_cast<size_t>(channel) + static_cast<size_t>(c)] =
preprocessing_float_to_u8(frame_ptr[i + channel_stride * static_cast<size_t>(c)]);
}
}
}
static inline sd::Tensor<float> sd_image_to_preprocessing_tensor(sd_image_t image) {
sd::Tensor<float> tensor({static_cast<int64_t>(image.width), static_cast<int64_t>(image.height), static_cast<int64_t>(image.channel), 1});
for (uint32_t y = 0; y < image.height; ++y) {
@ -39,20 +108,7 @@ static inline sd::Tensor<float> sd_image_to_preprocessing_tensor(sd_image_t imag
static inline void preprocessing_tensor_to_sd_image(const sd::Tensor<float>& tensor, uint8_t* image_data) {
GGML_ASSERT(tensor.dim() == 4);
GGML_ASSERT(tensor.shape()[3] == 1);
GGML_ASSERT(image_data != nullptr);
int width = static_cast<int>(tensor.shape()[0]);
int height = static_cast<int>(tensor.shape()[1]);
int channel = static_cast<int>(tensor.shape()[2]);
for (int y = 0; y < height; ++y) {
for (int x = 0; x < width; ++x) {
for (int c = 0; c < channel; ++c) {
float value = preprocessing_get_4d(tensor, x, y, c, 0);
value = std::min(1.0f, std::max(0.0f, value));
image_data[(y * width + x) * channel + c] = static_cast<uint8_t>(std::round(value * 255.0f));
}
}
}
preprocessing_tensor_frame_to_sd_image(tensor, 0, image_data);
}
static inline sd::Tensor<float> gaussian_kernel_tensor(int kernel_size) {

View File

@ -95,9 +95,7 @@ namespace Qwen {
float scale = 1.f / 32.f;
bool force_prec_f32 = false;
#ifdef SD_USE_VULKAN
force_prec_f32 = true;
#endif
// The purpose of the scale here is to prevent NaN issues in certain situations.
// For example when using CUDA but the weights are k-quants (not all prompts).
blocks["to_out.0"] = std::shared_ptr<GGMLBlock>(new Linear(inner_dim, out_dim, out_bias, false, force_prec_f32, scale));
@ -124,6 +122,10 @@ namespace Qwen {
auto to_v = std::dynamic_pointer_cast<Linear>(blocks["to_v"]);
auto to_out_0 = std::dynamic_pointer_cast<Linear>(blocks["to_out.0"]);
if (sd_backend_is(ctx->backend, "Vulkan")) {
to_out_0->set_force_prec_f32(true);
}
auto norm_added_q = std::dynamic_pointer_cast<UnaryBlock>(blocks["norm_added_q"]);
auto norm_added_k = std::dynamic_pointer_cast<UnaryBlock>(blocks["norm_added_k"]);
@ -410,6 +412,9 @@ namespace Qwen {
auto img = img_in->forward(ctx, x);
auto txt = txt_norm->forward(ctx, context);
txt = txt_in->forward(ctx, txt);
sd::ggml_graph_cut::mark_graph_cut(img, "qwen_image.prelude", "img");
sd::ggml_graph_cut::mark_graph_cut(txt, "qwen_image.prelude", "txt");
// sd::ggml_graph_cut::mark_graph_cut(t_emb, "qwen_image.prelude", "t_emb");
for (int i = 0; i < params.num_layers; i++) {
auto block = std::dynamic_pointer_cast<QwenImageTransformerBlock>(blocks["transformer_blocks." + std::to_string(i)]);
@ -417,6 +422,8 @@ namespace Qwen {
auto result = block->forward(ctx, img, txt, t_emb, pe, modulate_index);
img = result.first;
txt = result.second;
sd::ggml_graph_cut::mark_graph_cut(img, "qwen_image.transformer_blocks." + std::to_string(i), "img");
sd::ggml_graph_cut::mark_graph_cut(txt, "qwen_image.transformer_blocks." + std::to_string(i), "txt");
}
if (params.zero_cond_t) {

View File

@ -17,6 +17,7 @@
#include "pmid.hpp"
#include "sample-cache.h"
#include "tae.hpp"
#include "upscaler.h"
#include "vae.hpp"
#include "latent-preview.h"
@ -143,6 +144,7 @@ public:
std::string taesd_path;
sd_tiling_params_t vae_tiling_params = {false, 0, 0, 0.5f, 0, 0};
bool offload_params_to_cpu = false;
float max_vram = 0.f;
bool use_pmid = false;
bool is_using_v_parameterization = false;
@ -171,60 +173,7 @@ public:
}
void init_backend() {
#ifdef SD_USE_CUDA
LOG_DEBUG("Using CUDA backend");
backend = ggml_backend_cuda_init(0);
#endif
#ifdef SD_USE_METAL
LOG_DEBUG("Using Metal backend");
backend = ggml_backend_metal_init();
#endif
#ifdef SD_USE_VULKAN
LOG_DEBUG("Using Vulkan backend");
size_t device = 0;
const int device_count = ggml_backend_vk_get_device_count();
if (device_count) {
const char* SD_VK_DEVICE = getenv("SD_VK_DEVICE");
if (SD_VK_DEVICE != nullptr) {
std::string sd_vk_device_str = SD_VK_DEVICE;
try {
device = std::stoull(sd_vk_device_str);
} catch (const std::invalid_argument&) {
LOG_WARN("SD_VK_DEVICE environment variable is not a valid integer (%s). Falling back to device 0.", SD_VK_DEVICE);
device = 0;
} catch (const std::out_of_range&) {
LOG_WARN("SD_VK_DEVICE environment variable value is out of range for `unsigned long long` type (%s). Falling back to device 0.", SD_VK_DEVICE);
device = 0;
}
if (device >= device_count) {
LOG_WARN("Cannot find targeted vulkan device (%llu). Falling back to device 0.", device);
device = 0;
}
}
LOG_INFO("Vulkan: Using device %llu", device);
backend = ggml_backend_vk_init(device);
}
if (!backend) {
LOG_WARN("Failed to initialize Vulkan backend");
}
#endif
#ifdef SD_USE_OPENCL
LOG_DEBUG("Using OpenCL backend");
// ggml_log_set(ggml_log_callback_default, nullptr); // Optional ggml logs
backend = ggml_backend_opencl_init();
if (!backend) {
LOG_WARN("Failed to initialize OpenCL backend");
}
#endif
#ifdef SD_USE_SYCL
LOG_DEBUG("Using SYCL backend");
backend = ggml_backend_sycl_init(0);
#endif
if (!backend) {
LOG_DEBUG("Using CPU backend");
backend = ggml_backend_cpu_init();
}
backend = sd_get_default_backend();
}
std::shared_ptr<RNG> get_rng(rng_type_t rng_type) {
@ -242,6 +191,7 @@ public:
vae_decode_only = sd_ctx_params->vae_decode_only;
free_params_immediately = sd_ctx_params->free_params_immediately;
offload_params_to_cpu = sd_ctx_params->offload_params_to_cpu;
max_vram = sd_ctx_params->max_vram;
bool use_tae = false;
@ -427,6 +377,10 @@ public:
bool clip_on_cpu = sd_ctx_params->keep_clip_on_cpu;
const size_t max_graph_vram_bytes = max_vram <= 0.f
? 0
: static_cast<size_t>(static_cast<double>(max_vram) * 1024.0 * 1024.0 * 1024.0);
{
clip_backend = backend;
if (clip_on_cpu && !ggml_backend_is_cpu(backend)) {
@ -516,6 +470,7 @@ public:
clip_vision = std::make_shared<FrozenCLIPVisionEmbedder>(backend,
offload_params_to_cpu,
tensor_storage_map);
clip_vision->set_max_graph_vram_bytes(max_graph_vram_bytes);
clip_vision->alloc_params_buffer();
clip_vision->get_param_tensors(tensors);
}
@ -592,9 +547,11 @@ public:
}
}
cond_stage_model->set_max_graph_vram_bytes(max_graph_vram_bytes);
cond_stage_model->alloc_params_buffer();
cond_stage_model->get_param_tensors(tensors);
diffusion_model->set_max_graph_vram_bytes(max_graph_vram_bytes);
diffusion_model->alloc_params_buffer();
diffusion_model->get_param_tensors(tensors);
@ -603,6 +560,7 @@ public:
}
if (high_noise_diffusion_model) {
high_noise_diffusion_model->set_max_graph_vram_bytes(max_graph_vram_bytes);
high_noise_diffusion_model->alloc_params_buffer();
high_noise_diffusion_model->get_param_tensors(tensors);
}
@ -675,16 +633,19 @@ public:
} else if (use_tae && !tae_preview_only) {
LOG_INFO("using TAE for encoding / decoding");
first_stage_model = create_tae();
first_stage_model->set_max_graph_vram_bytes(max_graph_vram_bytes);
first_stage_model->alloc_params_buffer();
first_stage_model->get_param_tensors(tensors, "tae");
} else {
LOG_INFO("using VAE for encoding / decoding");
first_stage_model = create_vae();
first_stage_model->set_max_graph_vram_bytes(max_graph_vram_bytes);
first_stage_model->alloc_params_buffer();
first_stage_model->get_param_tensors(tensors, "first_stage_model");
if (use_tae && tae_preview_only) {
LOG_INFO("using TAE for preview");
preview_vae = create_tae();
preview_vae->set_max_graph_vram_bytes(max_graph_vram_bytes);
preview_vae->alloc_params_buffer();
preview_vae->get_param_tensors(tensors, "tae");
}
@ -1156,8 +1117,13 @@ public:
cond_stage_lora_models.push_back(lora);
}
}
auto multi_lora_adapter = std::make_shared<MultiLoraAdapter>(cond_stage_lora_models);
cond_stage_model->set_weight_adapter(multi_lora_adapter);
// Only attach the adapter when there are LoRAs targeting the cond_stage model.
// An empty MultiLoraAdapter still routes every linear/conv through
// forward_with_lora() instead of the direct kernel path — slower for no benefit.
if (!cond_stage_lora_models.empty()) {
auto multi_lora_adapter = std::make_shared<MultiLoraAdapter>(cond_stage_lora_models);
cond_stage_model->set_weight_adapter(multi_lora_adapter);
}
}
if (diffusion_model) {
std::vector<std::shared_ptr<LoraModel>> lora_models;
@ -1188,10 +1154,12 @@ public:
diffusion_lora_models.push_back(lora);
}
}
auto multi_lora_adapter = std::make_shared<MultiLoraAdapter>(diffusion_lora_models);
diffusion_model->set_weight_adapter(multi_lora_adapter);
if (high_noise_diffusion_model) {
high_noise_diffusion_model->set_weight_adapter(multi_lora_adapter);
if (!diffusion_lora_models.empty()) {
auto multi_lora_adapter = std::make_shared<MultiLoraAdapter>(diffusion_lora_models);
diffusion_model->set_weight_adapter(multi_lora_adapter);
if (high_noise_diffusion_model) {
high_noise_diffusion_model->set_weight_adapter(multi_lora_adapter);
}
}
}
@ -1224,8 +1192,10 @@ public:
first_stage_lora_models.push_back(lora);
}
}
auto multi_lora_adapter = std::make_shared<MultiLoraAdapter>(first_stage_lora_models);
first_stage_model->set_weight_adapter(multi_lora_adapter);
if (!first_stage_lora_models.empty()) {
auto multi_lora_adapter = std::make_shared<MultiLoraAdapter>(first_stage_lora_models);
first_stage_model->set_weight_adapter(multi_lora_adapter);
}
}
}
@ -2113,6 +2083,35 @@ enum lora_apply_mode_t str_to_lora_apply_mode(const char* str) {
return LORA_APPLY_MODE_COUNT;
}
const char* hires_upscaler_to_str[] = {
"None",
"Latent",
"Latent (nearest)",
"Latent (nearest-exact)",
"Latent (antialiased)",
"Latent (bicubic)",
"Latent (bicubic antialiased)",
"Lanczos",
"Nearest",
"Model",
};
const char* sd_hires_upscaler_name(enum sd_hires_upscaler_t upscaler) {
if (upscaler >= SD_HIRES_UPSCALER_NONE && upscaler < SD_HIRES_UPSCALER_COUNT) {
return hires_upscaler_to_str[upscaler];
}
return NONE_STR;
}
enum sd_hires_upscaler_t str_to_sd_hires_upscaler(const char* str) {
for (int i = 0; i < SD_HIRES_UPSCALER_COUNT; i++) {
if (!strcmp(str, hires_upscaler_to_str[i])) {
return (enum sd_hires_upscaler_t)i;
}
}
return SD_HIRES_UPSCALER_COUNT;
}
void sd_cache_params_init(sd_cache_params_t* cache_params) {
*cache_params = {};
cache_params->mode = SD_CACHE_DISABLED;
@ -2141,6 +2140,19 @@ void sd_cache_params_init(sd_cache_params_t* cache_params) {
cache_params->spectrum_stop_percent = 0.9f;
}
void sd_hires_params_init(sd_hires_params_t* hires_params) {
*hires_params = {};
hires_params->enabled = false;
hires_params->upscaler = SD_HIRES_UPSCALER_LATENT;
hires_params->model_path = nullptr;
hires_params->scale = 2.0f;
hires_params->target_width = 0;
hires_params->target_height = 0;
hires_params->steps = 0;
hires_params->denoising_strength = 0.7f;
hires_params->upscale_tile_size = 128;
}
void sd_ctx_params_init(sd_ctx_params_t* sd_ctx_params) {
*sd_ctx_params = {};
sd_ctx_params->vae_decode_only = true;
@ -2152,6 +2164,7 @@ void sd_ctx_params_init(sd_ctx_params_t* sd_ctx_params) {
sd_ctx_params->prediction = PREDICTION_COUNT;
sd_ctx_params->lora_apply_mode = LORA_APPLY_AUTO;
sd_ctx_params->offload_params_to_cpu = false;
sd_ctx_params->max_vram = 0.f;
sd_ctx_params->enable_mmap = false;
sd_ctx_params->keep_clip_on_cpu = false;
sd_ctx_params->keep_control_net_on_cpu = false;
@ -2193,6 +2206,7 @@ char* sd_ctx_params_to_str(const sd_ctx_params_t* sd_ctx_params) {
"sampler_rng_type: %s\n"
"prediction: %s\n"
"offload_params_to_cpu: %s\n"
"max_vram: %.3f\n"
"keep_clip_on_cpu: %s\n"
"keep_control_net_on_cpu: %s\n"
"keep_vae_on_cpu: %s\n"
@ -2225,6 +2239,7 @@ char* sd_ctx_params_to_str(const sd_ctx_params_t* sd_ctx_params) {
sd_rng_type_name(sd_ctx_params->sampler_rng_type),
sd_prediction_name(sd_ctx_params->prediction),
BOOL_STR(sd_ctx_params->offload_params_to_cpu),
sd_ctx_params->max_vram,
BOOL_STR(sd_ctx_params->keep_clip_on_cpu),
BOOL_STR(sd_ctx_params->keep_control_net_on_cpu),
BOOL_STR(sd_ctx_params->keep_vae_on_cpu),
@ -2310,6 +2325,7 @@ void sd_img_gen_params_init(sd_img_gen_params_t* sd_img_gen_params) {
sd_img_gen_params->pm_params = {nullptr, 0, nullptr, 20.f};
sd_img_gen_params->vae_tiling_params = {false, 0, 0, 0.5f, 0.0f, 0.0f};
sd_cache_params_init(&sd_img_gen_params->cache);
sd_hires_params_init(&sd_img_gen_params->hires);
}
char* sd_img_gen_params_to_str(const sd_img_gen_params_t* sd_img_gen_params) {
@ -2336,7 +2352,8 @@ char* sd_img_gen_params_to_str(const sd_img_gen_params_t* sd_img_gen_params) {
"increase_ref_index: %s\n"
"control_strength: %.2f\n"
"photo maker: {style_strength = %.2f, id_images_count = %d, id_embed_path = %s}\n"
"VAE tiling: %s\n",
"VAE tiling: %s\n"
"hires: {enabled=%s, upscaler=%s, model_path=%s, scale=%.2f, target=%dx%d, steps=%d, denoising_strength=%.2f}\n",
SAFE_STR(sd_img_gen_params->prompt),
SAFE_STR(sd_img_gen_params->negative_prompt),
sd_img_gen_params->clip_skip,
@ -2353,7 +2370,15 @@ char* sd_img_gen_params_to_str(const sd_img_gen_params_t* sd_img_gen_params) {
sd_img_gen_params->pm_params.style_strength,
sd_img_gen_params->pm_params.id_images_count,
SAFE_STR(sd_img_gen_params->pm_params.id_embed_path),
BOOL_STR(sd_img_gen_params->vae_tiling_params.enabled));
BOOL_STR(sd_img_gen_params->vae_tiling_params.enabled),
BOOL_STR(sd_img_gen_params->hires.enabled),
sd_hires_upscaler_name(sd_img_gen_params->hires.upscaler),
SAFE_STR(sd_img_gen_params->hires.model_path),
sd_img_gen_params->hires.scale,
sd_img_gen_params->hires.target_width,
sd_img_gen_params->hires.target_height,
sd_img_gen_params->hires.steps,
sd_img_gen_params->hires.denoising_strength);
const char* cache_mode_str = "disabled";
if (sd_img_gen_params->cache.mode == SD_CACHE_EASYCACHE) {
cache_mode_str = "easycache";
@ -2390,6 +2415,14 @@ struct sd_ctx_t {
StableDiffusionGGML* sd = nullptr;
};
static bool sd_version_supports_video_generation(SDVersion version) {
return version == VERSION_SVD || sd_version_is_wan(version);
}
static bool sd_version_supports_image_generation(SDVersion version) {
return !sd_version_supports_video_generation(version);
}
sd_ctx_t* new_sd_ctx(const sd_ctx_params_t* sd_ctx_params) {
sd_ctx_t* sd_ctx = (sd_ctx_t*)malloc(sizeof(sd_ctx_t));
if (sd_ctx == nullptr) {
@ -2419,6 +2452,20 @@ void free_sd_ctx(sd_ctx_t* sd_ctx) {
free(sd_ctx);
}
SD_API bool sd_ctx_supports_image_generation(const sd_ctx_t* sd_ctx) {
if (sd_ctx == nullptr || sd_ctx->sd == nullptr) {
return false;
}
return sd_version_supports_image_generation(sd_ctx->sd->version);
}
SD_API bool sd_ctx_supports_video_generation(const sd_ctx_t* sd_ctx) {
if (sd_ctx == nullptr || sd_ctx->sd == nullptr) {
return false;
}
return sd_version_supports_video_generation(sd_ctx->sd->version);
}
enum sample_method_t sd_get_default_sample_method(const sd_ctx_t* sd_ctx) {
if (sd_ctx != nullptr && sd_ctx->sd != nullptr) {
if (sd_version_is_dit(sd_ctx->sd->version)) {
@ -2435,8 +2482,10 @@ enum scheduler_t sd_get_default_scheduler(const sd_ctx_t* sd_ctx, enum sample_me
return EXPONENTIAL_SCHEDULER;
}
}
if (sample_method == LCM_SAMPLE_METHOD) {
if (sample_method == LCM_SAMPLE_METHOD || sample_method == TCD_SAMPLE_METHOD) {
return LCM_SCHEDULER;
} else if (sample_method == DDIM_TRAILING_SAMPLE_METHOD) {
return SIMPLE_SCHEDULER;
}
return DISCRETE_SCHEDULER;
}
@ -2510,6 +2559,7 @@ struct GenerationRequest {
sd_guidance_params_t guidance = {};
sd_guidance_params_t high_noise_guidance = {};
sd_pm_params_t pm_params = {};
sd_hires_params_t hires = {};
int frames = -1;
float vace_strength = 1.f;
@ -2531,6 +2581,7 @@ struct GenerationRequest {
auto_resize_ref_image = sd_img_gen_params->auto_resize_ref_image;
guidance = sd_img_gen_params->sample_params.guidance;
pm_params = sd_img_gen_params->pm_params;
hires = sd_img_gen_params->hires;
cache_params = &sd_img_gen_params->cache;
resolve(sd_ctx);
}
@ -2553,26 +2604,76 @@ struct GenerationRequest {
}
void align_generation_request_size() {
align_image_size(&width, &height, "generation request");
}
void align_image_size(int* target_width, int* target_height, const char* label) {
int spatial_multiple = vae_scale_factor * diffusion_model_down_factor;
int width_offset = align_up_offset(width, spatial_multiple);
int height_offset = align_up_offset(height, spatial_multiple);
int width_offset = align_up_offset(*target_width, spatial_multiple);
int height_offset = align_up_offset(*target_height, spatial_multiple);
if (width_offset <= 0 && height_offset <= 0) {
return;
}
int original_width = width;
int original_height = height;
int original_width = *target_width;
int original_height = *target_height;
width += width_offset;
height += height_offset;
LOG_WARN("align up %dx%d to %dx%d (multiple=%d)",
*target_width += width_offset;
*target_height += height_offset;
LOG_WARN("align %s up %dx%d to %dx%d (multiple=%d)",
label,
original_width,
original_height,
width,
height,
*target_width,
*target_height,
spatial_multiple);
}
void resolve_hires() {
if (!hires.enabled) {
return;
}
if (hires.upscaler == SD_HIRES_UPSCALER_NONE) {
hires.enabled = false;
return;
}
if (hires.upscaler < SD_HIRES_UPSCALER_NONE || hires.upscaler >= SD_HIRES_UPSCALER_COUNT) {
LOG_WARN("hires upscaler '%d' is invalid, disabling hires", hires.upscaler);
hires.enabled = false;
return;
}
if (hires.upscaler == SD_HIRES_UPSCALER_MODEL && strlen(SAFE_STR(hires.model_path)) == 0) {
LOG_WARN("hires model upscaler requires a model path, disabling hires");
hires.enabled = false;
return;
}
if (hires.scale <= 0.f && hires.target_width <= 0 && hires.target_height <= 0) {
LOG_WARN("hires scale must be positive when no target size is set, disabling hires");
hires.enabled = false;
return;
}
hires.denoising_strength = std::clamp(hires.denoising_strength, 0.0001f, 1.f);
hires.steps = std::max(0, hires.steps);
if (hires.target_width > 0 && hires.target_height > 0) {
// pass
} else if (hires.target_width > 0) {
hires.target_height = hires.target_width;
} else if (hires.target_height > 0) {
hires.target_width = hires.target_height;
} else {
hires.target_width = static_cast<int>(std::round(width * hires.scale));
hires.target_height = static_cast<int>(std::round(height * hires.scale));
}
if (hires.target_width <= 0 || hires.target_height <= 0) {
LOG_WARN("hires target size is not positive, disabling hires");
hires.enabled = false;
return;
}
align_image_size(&hires.target_width, &hires.target_height, "hires target");
}
static void resolve_guidance(sd_ctx_t* sd_ctx,
sd_guidance_params_t* guidance,
bool* use_uncond,
@ -2613,6 +2714,7 @@ struct GenerationRequest {
void resolve(sd_ctx_t* sd_ctx) {
align_generation_request_size();
resolve_hires();
seed = resolve_seed(seed);
resolve_guidance(sd_ctx, &guidance, &use_uncond, &use_img_cond);
@ -3103,7 +3205,7 @@ static sd_image_t* decode_image_outputs(sd_ctx_t* sd_ctx,
}
decoded_images.push_back(std::move(image));
int64_t t2 = ggml_time_ms();
LOG_INFO("latent %" PRId64 " decoded, taking %.2fs", i + 1, (t2 - t1) * 1.0f / 1000);
LOG_INFO("latent %zu decoded, taking %.2fs", i + 1, (t2 - t1) * 1.0f / 1000);
}
int64_t t4 = ggml_time_ms();
@ -3125,6 +3227,135 @@ static sd_image_t* decode_image_outputs(sd_ctx_t* sd_ctx,
return result_images;
}
static sd::Tensor<float> upscale_hires_latent(sd_ctx_t* sd_ctx,
const sd::Tensor<float>& latent,
const GenerationRequest& request,
UpscalerGGML* upscaler) {
auto get_hires_latent_target_shape = [&]() {
std::vector<int64_t> target_shape = latent.shape();
if (target_shape.size() < 2) {
target_shape.clear();
return target_shape;
}
target_shape[0] = request.hires.target_width / request.vae_scale_factor;
target_shape[1] = request.hires.target_height / request.vae_scale_factor;
return target_shape;
};
if (request.hires.upscaler == SD_HIRES_UPSCALER_LATENT ||
request.hires.upscaler == SD_HIRES_UPSCALER_LATENT_NEAREST ||
request.hires.upscaler == SD_HIRES_UPSCALER_LATENT_NEAREST_EXACT ||
request.hires.upscaler == SD_HIRES_UPSCALER_LATENT_ANTIALIASED ||
request.hires.upscaler == SD_HIRES_UPSCALER_LATENT_BICUBIC ||
request.hires.upscaler == SD_HIRES_UPSCALER_LATENT_BICUBIC_ANTIALIASED) {
std::vector<int64_t> target_shape = get_hires_latent_target_shape();
if (target_shape.empty()) {
LOG_ERROR("latent has invalid shape for hires upscale");
return {};
}
sd::ops::InterpolateMode mode = sd::ops::InterpolateMode::Nearest;
bool antialias = false;
switch (request.hires.upscaler) {
case SD_HIRES_UPSCALER_LATENT:
mode = sd::ops::InterpolateMode::Bilinear;
break;
case SD_HIRES_UPSCALER_LATENT_NEAREST:
mode = sd::ops::InterpolateMode::Nearest;
break;
case SD_HIRES_UPSCALER_LATENT_NEAREST_EXACT:
mode = sd::ops::InterpolateMode::NearestExact;
break;
case SD_HIRES_UPSCALER_LATENT_ANTIALIASED:
mode = sd::ops::InterpolateMode::Bilinear;
antialias = true;
break;
case SD_HIRES_UPSCALER_LATENT_BICUBIC:
mode = sd::ops::InterpolateMode::Bicubic;
break;
case SD_HIRES_UPSCALER_LATENT_BICUBIC_ANTIALIASED:
mode = sd::ops::InterpolateMode::Bicubic;
antialias = true;
break;
default:
break;
}
LOG_INFO("hires %s upscale %" PRId64 "x%" PRId64 " -> %" PRId64 "x%" PRId64,
sd_hires_upscaler_name(request.hires.upscaler),
latent.shape()[0],
latent.shape()[1],
target_shape[0],
target_shape[1]);
return sd::ops::interpolate(latent, target_shape, mode, false, antialias);
} else if (request.hires.upscaler == SD_HIRES_UPSCALER_MODEL ||
request.hires.upscaler == SD_HIRES_UPSCALER_LANCZOS ||
request.hires.upscaler == SD_HIRES_UPSCALER_NEAREST) {
if (sd_ctx->sd->vae_decode_only) {
LOG_ERROR("hires %s upscaler requires VAE encoder weights; create the context with vae_decode_only=false",
sd_hires_upscaler_name(request.hires.upscaler));
return {};
}
if (request.hires.upscaler == SD_HIRES_UPSCALER_MODEL && upscaler == nullptr) {
LOG_ERROR("hires model upscaler context is null");
return {};
}
sd::Tensor<float> decoded = sd_ctx->sd->decode_first_stage(latent);
if (decoded.empty()) {
LOG_ERROR("decode_first_stage failed before hires %s upscale",
sd_hires_upscaler_name(request.hires.upscaler));
return {};
}
sd::Tensor<float> upscaled_tensor;
if (request.hires.upscaler == SD_HIRES_UPSCALER_MODEL) {
upscaled_tensor = upscaler->upscale_tensor(decoded);
if (upscaled_tensor.empty()) {
LOG_ERROR("hires model upscale failed");
return {};
}
if (upscaled_tensor.shape()[0] != request.hires.target_width ||
upscaled_tensor.shape()[1] != request.hires.target_height) {
upscaled_tensor = sd::ops::interpolate(upscaled_tensor,
{request.hires.target_width,
request.hires.target_height,
upscaled_tensor.shape()[2],
upscaled_tensor.shape()[3]});
}
} else {
sd::ops::InterpolateMode mode = request.hires.upscaler == SD_HIRES_UPSCALER_LANCZOS
? sd::ops::InterpolateMode::Lanczos
: sd::ops::InterpolateMode::Nearest;
LOG_INFO("hires %s image upscale %" PRId64 "x%" PRId64 " -> %dx%d",
sd_hires_upscaler_name(request.hires.upscaler),
decoded.shape()[0],
decoded.shape()[1],
request.hires.target_width,
request.hires.target_height);
upscaled_tensor = sd::ops::interpolate(decoded,
{request.hires.target_width,
request.hires.target_height,
decoded.shape()[2],
decoded.shape()[3]},
mode);
upscaled_tensor = sd::ops::clamp(upscaled_tensor, 0.0f, 1.0f);
}
sd::Tensor<float> upscaled_latent = sd_ctx->sd->encode_first_stage(upscaled_tensor);
if (upscaled_latent.empty()) {
LOG_ERROR("encode_first_stage failed after hires %s upscale",
sd_hires_upscaler_name(request.hires.upscaler));
}
return upscaled_latent;
}
LOG_ERROR("unsupported hires upscaler '%s'", sd_hires_upscaler_name(request.hires.upscaler));
return {};
}
SD_API sd_image_t* generate_image(sd_ctx_t* sd_ctx, const sd_img_gen_params_t* sd_img_gen_params) {
if (sd_ctx == nullptr || sd_img_gen_params == nullptr) {
return nullptr;
@ -3212,14 +3443,143 @@ SD_API sd_image_t* generate_image(sd_ctx_t* sd_ctx, const sd_img_gen_params_t* s
}
return nullptr;
}
if (sd_ctx->sd->free_params_immediately) {
if (sd_ctx->sd->free_params_immediately && !request.hires.enabled) {
sd_ctx->sd->diffusion_model->free_params_buffer();
}
int64_t denoise_end = ggml_time_ms();
LOG_INFO("generating %" PRId64 " latent images completed, taking %.2fs",
LOG_INFO("generating %zu latent images completed, taking %.2fs",
final_latents.size(),
(denoise_end - denoise_start) * 1.0f / 1000);
if (request.hires.enabled && request.hires.target_width > 0) {
LOG_INFO("hires fix: upscaling to %dx%d", request.hires.target_width, request.hires.target_height);
std::unique_ptr<UpscalerGGML> hires_upscaler;
if (request.hires.upscaler == SD_HIRES_UPSCALER_MODEL) {
LOG_INFO("hires fix: loading model upscaler from '%s'", request.hires.model_path);
hires_upscaler = std::make_unique<UpscalerGGML>(sd_ctx->sd->n_threads,
false,
request.hires.upscale_tile_size);
const size_t max_graph_vram_bytes = sd_ctx->sd->max_vram <= 0.f
? 0
: static_cast<size_t>(static_cast<double>(sd_ctx->sd->max_vram) * 1024.0 * 1024.0 * 1024.0);
hires_upscaler->set_max_graph_vram_bytes(max_graph_vram_bytes);
if (!hires_upscaler->load_from_file(request.hires.model_path,
sd_ctx->sd->offload_params_to_cpu,
sd_ctx->sd->n_threads)) {
LOG_ERROR("load hires model upscaler failed");
if (sd_ctx->sd->free_params_immediately) {
sd_ctx->sd->diffusion_model->free_params_buffer();
}
return nullptr;
}
}
int hires_steps = request.hires.steps > 0 ? request.hires.steps : plan.sample_steps;
// sd-webui behavior: scale up total steps so trimming by denoising_strength yields exactly hires_steps effective steps,
// unlike img2img which trims from a fixed step count
hires_steps = static_cast<int>(hires_steps / request.hires.denoising_strength);
std::vector<float> hires_sigmas = sd_ctx->sd->denoiser->get_sigmas(
hires_steps,
sd_ctx->sd->get_image_seq_len(request.hires.target_height, request.hires.target_width),
sd_img_gen_params->sample_params.scheduler,
sd_ctx->sd->version);
size_t t_enc = static_cast<size_t>(hires_steps * request.hires.denoising_strength);
if (t_enc >= static_cast<size_t>(hires_steps)) {
t_enc = static_cast<size_t>(hires_steps) - 1;
}
std::vector<float> hires_sigma_sched(hires_sigmas.begin() + hires_steps - static_cast<int>(t_enc) - 1,
hires_sigmas.end());
LOG_INFO("hires fix: %d steps, denoising_strength=%.2f, sigma_sched_size=%zu",
hires_steps,
request.hires.denoising_strength,
hires_sigma_sched.size());
std::vector<sd::Tensor<float>> hires_final_latents;
int64_t hires_denoise_start = ggml_time_ms();
for (int b = 0; b < (int)final_latents.size(); b++) {
int64_t cur_seed = request.seed + b;
sd_ctx->sd->rng->manual_seed(cur_seed);
sd_ctx->sd->sampler_rng->manual_seed(cur_seed);
sd::Tensor<float> upscaled = upscale_hires_latent(sd_ctx,
final_latents[b],
request,
hires_upscaler.get());
if (upscaled.empty()) {
if (sd_ctx->sd->free_params_immediately) {
sd_ctx->sd->diffusion_model->free_params_buffer();
}
return nullptr;
}
sd::Tensor<float> noise = sd::randn_like<float>(upscaled, sd_ctx->sd->rng);
sd::Tensor<float> hires_denoise_mask;
if (!latents.denoise_mask.empty()) {
std::vector<int64_t> mask_shape = latents.denoise_mask.shape();
mask_shape[0] = upscaled.shape()[0];
mask_shape[1] = upscaled.shape()[1];
hires_denoise_mask = sd::ops::interpolate(latents.denoise_mask,
mask_shape,
sd::ops::InterpolateMode::NearestMax);
}
int64_t hires_sample_start = ggml_time_ms();
sd::Tensor<float> x_0 = sd_ctx->sd->sample(sd_ctx->sd->diffusion_model,
true,
upscaled,
std::move(noise),
embeds.cond,
embeds.uncond,
embeds.img_cond,
embeds.id_cond,
latents.control_image,
request.control_strength,
request.guidance,
plan.eta,
request.shifted_timestep,
plan.sample_method,
sd_ctx->sd->is_flow_denoiser(),
hires_sigma_sched,
plan.start_merge_step,
latents.ref_latents,
request.increase_ref_index,
hires_denoise_mask,
sd::Tensor<float>(),
1.f,
request.cache_params);
int64_t hires_sample_end = ggml_time_ms();
if (!x_0.empty()) {
LOG_INFO("hires sampling %d/%d completed, taking %.2fs",
b + 1,
(int)final_latents.size(),
(hires_sample_end - hires_sample_start) * 1.0f / 1000);
hires_final_latents.push_back(std::move(x_0));
continue;
}
LOG_ERROR("hires sampling for image %d/%d failed after %.2fs",
b + 1,
(int)final_latents.size(),
(hires_sample_end - hires_sample_start) * 1.0f / 1000);
if (sd_ctx->sd->free_params_immediately) {
sd_ctx->sd->diffusion_model->free_params_buffer();
}
return nullptr;
}
if (sd_ctx->sd->free_params_immediately) {
sd_ctx->sd->diffusion_model->free_params_buffer();
}
int64_t hires_denoise_end = ggml_time_ms();
LOG_INFO("hires fix completed, taking %.2fs", (hires_denoise_end - hires_denoise_start) * 1.0f / 1000);
final_latents = std::move(hires_final_latents);
}
auto result = decode_image_outputs(sd_ctx, request, final_latents);
if (result == nullptr) {
return nullptr;

View File

@ -251,7 +251,8 @@ public:
ggml_tensor* x,
ggml_tensor* past_bias = nullptr,
ggml_tensor* attention_mask = nullptr,
ggml_tensor* relative_position_bucket = nullptr) {
ggml_tensor* relative_position_bucket = nullptr,
const std::string& graph_cut_prefix = "") {
// x: [N, n_token, model_dim]
for (int i = 0; i < num_layers; i++) {
auto block = std::dynamic_pointer_cast<T5Block>(blocks["block." + std::to_string(i)]);
@ -259,6 +260,9 @@ public:
auto ret = block->forward(ctx, x, past_bias, attention_mask, relative_position_bucket);
x = ret.first;
past_bias = ret.second;
if (!graph_cut_prefix.empty()) {
sd::ggml_graph_cut::mark_graph_cut(x, graph_cut_prefix + ".block." + std::to_string(i), "x");
}
}
auto final_layer_norm = std::dynamic_pointer_cast<T5LayerNorm>(blocks["final_layer_norm"]);
@ -305,7 +309,8 @@ public:
auto encoder = std::dynamic_pointer_cast<T5Stack>(blocks["encoder"]);
auto x = shared->forward(ctx, input_ids);
x = encoder->forward(ctx, x, past_bias, attention_mask, relative_position_bucket);
sd::ggml_graph_cut::mark_graph_cut(x, "t5.prelude", "x");
x = encoder->forward(ctx, x, past_bias, attention_mask, relative_position_bucket, "t5");
return x;
}
};

View File

@ -815,11 +815,202 @@ namespace sd {
namespace ops {
enum class InterpolateMode {
Nearest,
NearestExact,
NearestMax,
NearestMin,
NearestAvg,
Bilinear,
Bicubic,
Lanczos,
};
inline bool is_nearest_like_interpolate_mode(InterpolateMode mode) {
return mode == InterpolateMode::Nearest ||
mode == InterpolateMode::NearestExact ||
mode == InterpolateMode::NearestMax ||
mode == InterpolateMode::NearestMin ||
mode == InterpolateMode::NearestAvg;
}
inline bool is_2d_filter_interpolate_mode(InterpolateMode mode) {
return mode == InterpolateMode::Bilinear ||
mode == InterpolateMode::Bicubic ||
mode == InterpolateMode::Lanczos;
}
inline int64_t nearest_exact_interpolate_index(int64_t output_index,
int64_t input_size,
int64_t output_size) {
const double scale = static_cast<double>(input_size) / static_cast<double>(output_size);
const double center = (static_cast<double>(output_index) + 0.5) * scale - 0.5;
return std::min(std::max<int64_t>(static_cast<int64_t>(std::floor(center + 0.5)), 0), input_size - 1);
}
inline double linear_interpolate_weight(double x) {
x = std::abs(x);
return x < 1.0 ? 1.0 - x : 0.0;
}
inline double cubic_interpolate_weight(double x) {
constexpr double a = -0.75; // Match PyTorch bicubic interpolation.
x = std::abs(x);
if (x <= 1.0) {
return ((a + 2.0) * x - (a + 3.0)) * x * x + 1.0;
}
if (x < 2.0) {
return ((a * x - 5.0 * a) * x + 8.0 * a) * x - 4.0 * a;
}
return 0.0;
}
inline double sinc(double x) {
constexpr double pi = 3.14159265358979323846;
if (std::abs(x) < 1e-12) {
return 1.0;
}
const double pix = pi * x;
return std::sin(pix) / pix;
}
inline double lanczos_interpolate_weight(double x) {
constexpr double radius = 3.0;
x = std::abs(x);
if (x >= radius) {
return 0.0;
}
return sinc(x) * sinc(x / radius);
}
struct InterpolateContributor {
int64_t index;
double weight;
};
inline std::vector<std::vector<InterpolateContributor>> make_interpolate_contributors(
int64_t input_size,
int64_t output_size,
InterpolateMode mode,
bool antialias) {
std::vector<std::vector<InterpolateContributor>> contributors(static_cast<size_t>(output_size));
const double scale = static_cast<double>(input_size) / static_cast<double>(output_size);
const double filter_scale = antialias ? std::max(1.0, scale) : 1.0;
for (int64_t out = 0; out < output_size; ++out) {
const double center = (static_cast<double>(out) + 0.5) * scale - 0.5;
int64_t start = 0;
int64_t end = 0;
if (mode == InterpolateMode::Bilinear) {
const double support = filter_scale;
start = static_cast<int64_t>(std::ceil(center - support));
end = static_cast<int64_t>(std::floor(center + support));
} else if (mode == InterpolateMode::Bicubic) {
const double support = 2.0 * filter_scale;
start = static_cast<int64_t>(std::ceil(center - support));
end = static_cast<int64_t>(std::floor(center + support));
} else if (mode == InterpolateMode::Lanczos) {
const double support = 3.0 * filter_scale;
start = static_cast<int64_t>(std::ceil(center - support));
end = static_cast<int64_t>(std::floor(center + support));
} else {
tensor_throw_invalid_argument("Unsupported 2D filter interpolate mode: mode=" +
std::to_string(static_cast<int>(mode)));
}
double weight_sum = 0.0;
std::vector<InterpolateContributor>& axis_contributors = contributors[static_cast<size_t>(out)];
axis_contributors.reserve(static_cast<size_t>(end - start + 1));
for (int64_t in = start; in <= end; ++in) {
double weight = 0.0;
if (mode == InterpolateMode::Bilinear) {
weight = linear_interpolate_weight((center - static_cast<double>(in)) / filter_scale);
} else if (mode == InterpolateMode::Bicubic) {
weight = cubic_interpolate_weight((center - static_cast<double>(in)) / filter_scale);
} else {
weight = lanczos_interpolate_weight((center - static_cast<double>(in)) / filter_scale);
}
if (weight == 0.0) {
continue;
}
const int64_t clamped_index = std::min(std::max<int64_t>(in, 0), input_size - 1);
axis_contributors.push_back({clamped_index, weight});
weight_sum += weight;
}
if ((antialias || mode == InterpolateMode::Lanczos) &&
std::abs(weight_sum) > 1e-12) {
for (auto& contributor : axis_contributors) {
contributor.weight /= weight_sum;
}
}
if (axis_contributors.empty()) {
const int64_t nearest = std::min(
std::max<int64_t>(static_cast<int64_t>(std::floor(center + 0.5)), 0),
input_size - 1);
axis_contributors.push_back({nearest, 1.0});
}
}
return contributors;
}
template <typename T>
inline Tensor<T> interpolate_2d_filter(const Tensor<T>& input,
const std::vector<int64_t>& output_shape,
InterpolateMode mode,
bool antialias) {
if (input.dim() < 2) {
tensor_throw_invalid_argument("2D filter interpolate requires rank >= 2: input_shape=" +
tensor_shape_to_string(input.shape()) + ", output_shape=" +
tensor_shape_to_string(output_shape));
}
for (size_t i = 2; i < output_shape.size(); ++i) {
if (input.shape()[i] != output_shape[i]) {
tensor_throw_invalid_argument("2D filter interpolate only supports resizing dimensions 0 and 1: input_shape=" +
tensor_shape_to_string(input.shape()) + ", output_shape=" +
tensor_shape_to_string(output_shape));
}
}
Tensor<T> output(output_shape);
const int64_t input_width = input.shape()[0];
const int64_t input_height = input.shape()[1];
const int64_t output_width = output_shape[0];
const int64_t output_height = output_shape[1];
const int64_t input_plane = input_width * input_height;
const int64_t output_plane = output_width * output_height;
const int64_t plane_count = input.numel() / input_plane;
auto x_contributors = make_interpolate_contributors(input_width, output_width, mode, antialias);
auto y_contributors = make_interpolate_contributors(input_height, output_height, mode, antialias);
for (int64_t plane = 0; plane < plane_count; ++plane) {
const int64_t input_plane_offset = plane * input_plane;
const int64_t output_plane_offset = plane * output_plane;
for (int64_t y = 0; y < output_height; ++y) {
const auto& y_axis = y_contributors[static_cast<size_t>(y)];
for (int64_t x = 0; x < output_width; ++x) {
const auto& x_axis = x_contributors[static_cast<size_t>(x)];
double value = 0.0;
for (const auto& yc : y_axis) {
const int64_t input_row_offset = input_plane_offset + yc.index * input_width;
for (const auto& xc : x_axis) {
value += static_cast<double>(input.data()[input_row_offset + xc.index]) *
xc.weight * yc.weight;
}
}
output.data()[output_plane_offset + y * output_width + x] = static_cast<T>(value);
}
}
}
return output;
}
inline int64_t normalize_slice_bound(int64_t index, int64_t dim_size) {
if (index < 0) {
index += dim_size;
@ -1014,17 +1205,20 @@ namespace sd {
inline Tensor<T> interpolate(const Tensor<T>& input,
std::vector<int64_t> output_shape,
InterpolateMode mode = InterpolateMode::Nearest,
bool align_corners = false) {
const bool is_nearest_like_mode = (mode == InterpolateMode::Nearest ||
mode == InterpolateMode::NearestMax ||
mode == InterpolateMode::NearestMin ||
mode == InterpolateMode::NearestAvg);
if (!is_nearest_like_mode) {
tensor_throw_invalid_argument("Only nearest-like interpolate modes are implemented, got mode=" +
bool align_corners = false,
bool antialias = false) {
const bool is_nearest_like_mode = is_nearest_like_interpolate_mode(mode);
const bool is_2d_filter_mode = is_2d_filter_interpolate_mode(mode);
if (!is_nearest_like_mode && !is_2d_filter_mode) {
tensor_throw_invalid_argument("Unsupported interpolate mode: mode=" +
std::to_string(static_cast<int>(mode)));
}
if (antialias && !is_2d_filter_mode) {
tensor_throw_invalid_argument("Tensor interpolate antialias requires a 2D filter mode: mode=" +
std::to_string(static_cast<int>(mode)));
}
if (align_corners) {
tensor_throw_invalid_argument("align_corners is not supported for nearest-like interpolate: input_shape=" +
tensor_throw_invalid_argument("align_corners is not supported for tensor interpolate: input_shape=" +
tensor_shape_to_string(input.shape()) + ", output_shape=" +
tensor_shape_to_string(output_shape));
}
@ -1051,6 +1245,10 @@ namespace sd {
}
}
if (is_2d_filter_mode) {
return interpolate_2d_filter(input, output_shape, mode, antialias);
}
bool has_downsampling = false;
for (int64_t i = 0; i < input.dim(); ++i) {
if (input.shape()[i] > output_shape[i]) {
@ -1060,12 +1258,20 @@ namespace sd {
}
Tensor<T> output(std::move(output_shape));
if (mode == InterpolateMode::Nearest || !has_downsampling) {
if (mode == InterpolateMode::Nearest ||
mode == InterpolateMode::NearestExact ||
!has_downsampling) {
for (int64_t flat = 0; flat < output.numel(); ++flat) {
std::vector<int64_t> output_coord = tensor_unravel_index(flat, output.shape());
std::vector<int64_t> input_coord(static_cast<size_t>(input.dim()), 0);
for (size_t i = 0; i < static_cast<size_t>(input.dim()); ++i) {
input_coord[i] = output_coord[i] * input.shape()[i] / output.shape()[i];
if (mode == InterpolateMode::NearestExact) {
input_coord[i] = nearest_exact_interpolate_index(output_coord[i],
input.shape()[i],
output.shape()[i]);
} else {
input_coord[i] = output_coord[i] * input.shape()[i] / output.shape()[i];
}
}
output[flat] = input.index(input_coord);
}
@ -1083,6 +1289,12 @@ namespace sd {
return T(0);
case InterpolateMode::Nearest:
return T(0);
case InterpolateMode::NearestExact:
return T(0);
case InterpolateMode::Bilinear:
case InterpolateMode::Bicubic:
case InterpolateMode::Lanczos:
break;
}
tensor_throw_invalid_argument("Unsupported interpolate mode: mode=" +
@ -1102,6 +1314,12 @@ namespace sd {
break;
case InterpolateMode::Nearest:
break;
case InterpolateMode::NearestExact:
break;
case InterpolateMode::Bilinear:
case InterpolateMode::Bicubic:
case InterpolateMode::Lanczos:
break;
}
};
@ -1157,17 +1375,20 @@ namespace sd {
const std::optional<std::vector<int64_t>>& size,
const std::optional<std::vector<double>>& scale_factor,
InterpolateMode mode = InterpolateMode::Nearest,
bool align_corners = false) {
const bool is_nearest_like_mode = (mode == InterpolateMode::Nearest ||
mode == InterpolateMode::NearestMax ||
mode == InterpolateMode::NearestMin ||
mode == InterpolateMode::NearestAvg);
if (!is_nearest_like_mode) {
tensor_throw_invalid_argument("Only nearest-like interpolate modes are implemented, got mode=" +
bool align_corners = false,
bool antialias = false) {
const bool is_nearest_like_mode = is_nearest_like_interpolate_mode(mode);
const bool is_2d_filter_mode = is_2d_filter_interpolate_mode(mode);
if (!is_nearest_like_mode && !is_2d_filter_mode) {
tensor_throw_invalid_argument("Unsupported interpolate mode: mode=" +
std::to_string(static_cast<int>(mode)));
}
if (antialias && !is_2d_filter_mode) {
tensor_throw_invalid_argument("Tensor interpolate antialias requires a 2D filter mode: mode=" +
std::to_string(static_cast<int>(mode)));
}
if (align_corners) {
tensor_throw_invalid_argument("align_corners is not supported for nearest-like interpolate: input_shape=" +
tensor_throw_invalid_argument("align_corners is not supported for tensor interpolate: input_shape=" +
tensor_shape_to_string(input.shape()));
}
if (size.has_value() == scale_factor.has_value()) {
@ -1211,7 +1432,7 @@ namespace sd {
}
}
return interpolate(input, std::move(output_shape), mode, align_corners);
return interpolate(input, std::move(output_shape), mode, align_corners, antialias);
}
template <typename T>
@ -1219,12 +1440,14 @@ namespace sd {
const std::optional<std::vector<int64_t>>& size,
double scale_factor,
InterpolateMode mode = InterpolateMode::Nearest,
bool align_corners = false) {
bool align_corners = false,
bool antialias = false) {
return interpolate(input,
size,
std::vector<double>(size.has_value() ? size->size() : input.dim(), scale_factor),
mode,
align_corners);
align_corners,
antialias);
}
template <typename T>

View File

@ -62,7 +62,7 @@ void CLIPTokenizer::load_from_merges(const std::string& merges_utf8_str) {
}
vocab.push_back(utf8_to_utf32("<|startoftext|>"));
vocab.push_back(utf8_to_utf32("<|endoftext|>"));
LOG_DEBUG("vocab size: %llu", vocab.size());
LOG_DEBUG("vocab size: %zu", vocab.size());
int i = 0;
for (const auto& token : vocab) {
encoder[token] = i;

View File

@ -28,7 +28,7 @@ void MistralTokenizer::load_from_merges(const std::string& merges_utf8_str, cons
byte_decoder[pair.second] = pair.first;
}
std::vector<std::u32string> merges = split_utf32(merges_utf8_str);
LOG_DEBUG("merges size %llu", merges.size());
LOG_DEBUG("merges size %zu", merges.size());
std::vector<std::pair<std::u32string, std::u32string>> merge_pairs;
for (const auto& merge : merges) {
size_t space_pos = merge.find(' ');

View File

@ -11,7 +11,7 @@ void Qwen2Tokenizer::load_from_merges(const std::string& merges_utf8_str) {
}
std::vector<std::u32string> merges = split_utf32(merges_utf8_str);
LOG_DEBUG("merges size %llu", merges.size());
LOG_DEBUG("merges size %zu", merges.size());
std::vector<std::pair<std::u32string, std::u32string>> merge_pairs;
for (const auto& merge : merges) {
size_t space_pos = merge.find(' ');

View File

@ -482,12 +482,14 @@ public:
emb = ggml_add(ctx->ggml_ctx, emb, label_emb); // [N, time_embed_dim]
}
// sd::ggml_graph_cut::mark_graph_cut(emb, "unet.prelude", "emb");
// input_blocks
std::vector<ggml_tensor*> hs;
// input block 0
auto h = input_blocks_0_0->forward(ctx, x);
sd::ggml_graph_cut::mark_graph_cut(h, "unet.input_blocks.0", "h");
ggml_set_name(h, "bench-start");
hs.push_back(h);
@ -505,6 +507,7 @@ public:
std::string name = "input_blocks." + std::to_string(input_block_idx) + ".1";
h = attention_layer_forward(name, ctx, h, context, num_video_frames); // [N, mult*model_channels, h, w]
}
sd::ggml_graph_cut::mark_graph_cut(h, "unet.input_blocks." + std::to_string(input_block_idx), "h");
hs.push_back(h);
}
if (tiny_unet) {
@ -518,6 +521,7 @@ public:
auto block = std::dynamic_pointer_cast<DownSampleBlock>(blocks[name]);
h = block->forward(ctx, h); // [N, mult*model_channels, h/(2^(i+1)), w/(2^(i+1))]
// sd::ggml_graph_cut::mark_graph_cut(h, "unet.input_blocks." + std::to_string(input_block_idx), "h");
hs.push_back(h);
}
}
@ -531,6 +535,7 @@ public:
h = resblock_forward("middle_block.2", ctx, h, emb, num_video_frames); // [N, 4*model_channels, h/8, w/8]
}
}
sd::ggml_graph_cut::mark_graph_cut(h, "unet.middle_block", "h");
if (controls.size() > 0) {
auto cs = ggml_ext_scale(ctx->ggml_ctx, controls[controls.size() - 1], control_strength, true);
h = ggml_add(ctx->ggml_ctx, h, cs); // middle control
@ -581,6 +586,7 @@ public:
}
output_block_idx += 1;
sd::ggml_graph_cut::mark_graph_cut(h, "unet.output_blocks." + std::to_string(output_block_idx - 1), "h");
}
}

View File

@ -1,125 +1,106 @@
#include "esrgan.hpp"
#include "upscaler.h"
#include "ggml_extend.hpp"
#include "model.h"
#include "stable-diffusion.h"
#include "util.h"
struct UpscalerGGML {
ggml_backend_t backend = nullptr; // general backend
ggml_type model_data_type = GGML_TYPE_F16;
std::shared_ptr<ESRGAN> esrgan_upscaler;
std::string esrgan_path;
int n_threads;
bool direct = false;
int tile_size = 128;
UpscalerGGML::UpscalerGGML(int n_threads,
bool direct,
int tile_size)
: n_threads(n_threads),
direct(direct),
tile_size(tile_size) {
}
UpscalerGGML(int n_threads,
bool direct = false,
int tile_size = 128)
: n_threads(n_threads),
direct(direct),
tile_size(tile_size) {
void UpscalerGGML::set_max_graph_vram_bytes(size_t max_vram_bytes) {
max_graph_vram_bytes = max_vram_bytes;
if (esrgan_upscaler) {
esrgan_upscaler->set_max_graph_vram_bytes(max_vram_bytes);
}
}
bool load_from_file(const std::string& esrgan_path,
bool offload_params_to_cpu,
int n_threads) {
ggml_log_set(ggml_log_callback_default, nullptr);
#ifdef SD_USE_CUDA
LOG_DEBUG("Using CUDA backend");
backend = ggml_backend_cuda_init(0);
#endif
#ifdef SD_USE_METAL
LOG_DEBUG("Using Metal backend");
backend = ggml_backend_metal_init();
#endif
#ifdef SD_USE_VULKAN
LOG_DEBUG("Using Vulkan backend");
backend = ggml_backend_vk_init(0);
#endif
#ifdef SD_USE_OPENCL
LOG_DEBUG("Using OpenCL backend");
backend = ggml_backend_opencl_init();
#endif
#ifdef SD_USE_SYCL
LOG_DEBUG("Using SYCL backend");
backend = ggml_backend_sycl_init(0);
#endif
ModelLoader model_loader;
if (!model_loader.init_from_file_and_convert_name(esrgan_path)) {
LOG_ERROR("init model loader from file failed: '%s'", esrgan_path.c_str());
}
model_loader.set_wtype_override(model_data_type);
if (!backend) {
LOG_DEBUG("Using CPU backend");
backend = ggml_backend_cpu_init();
}
LOG_INFO("Upscaler weight type: %s", ggml_type_name(model_data_type));
esrgan_upscaler = std::make_shared<ESRGAN>(backend, offload_params_to_cpu, tile_size, model_loader.get_tensor_storage_map());
if (direct) {
esrgan_upscaler->set_conv2d_direct_enabled(true);
}
if (!esrgan_upscaler->load_from_file(esrgan_path, n_threads)) {
return false;
}
return true;
bool UpscalerGGML::load_from_file(const std::string& esrgan_path,
bool offload_params_to_cpu,
int n_threads) {
ggml_log_set(ggml_log_callback_default, nullptr);
backend = sd_get_default_backend();
ModelLoader model_loader;
if (!model_loader.init_from_file_and_convert_name(esrgan_path)) {
LOG_ERROR("init model loader from file failed: '%s'", esrgan_path.c_str());
}
sd::Tensor<float> upscale_tensor(const sd::Tensor<float>& input_tensor) {
sd::Tensor<float> upscaled;
if (tile_size <= 0 || (input_tensor.shape()[0] <= tile_size && input_tensor.shape()[1] <= tile_size)) {
upscaled = esrgan_upscaler->compute(n_threads, input_tensor);
} else {
auto on_processing = [&](const sd::Tensor<float>& input_tile) -> sd::Tensor<float> {
auto output_tile = esrgan_upscaler->compute(n_threads, input_tile);
if (output_tile.empty()) {
LOG_ERROR("esrgan compute failed while processing a tile");
return {};
}
return output_tile;
};
upscaled = process_tiles_2d(input_tensor,
static_cast<int>(input_tensor.shape()[0] * esrgan_upscaler->scale),
static_cast<int>(input_tensor.shape()[1] * esrgan_upscaler->scale),
esrgan_upscaler->scale,
tile_size,
tile_size,
0.25f,
false,
false,
on_processing);
}
esrgan_upscaler->free_compute_buffer();
if (upscaled.empty()) {
LOG_ERROR("esrgan compute failed");
return {};
}
return upscaled;
model_loader.set_wtype_override(model_data_type);
if (!backend) {
LOG_DEBUG("Using CPU backend");
backend = ggml_backend_cpu_init();
}
LOG_INFO("Upscaler weight type: %s", ggml_type_name(model_data_type));
esrgan_upscaler = std::make_shared<ESRGAN>(backend, offload_params_to_cpu, tile_size, model_loader.get_tensor_storage_map());
esrgan_upscaler->set_max_graph_vram_bytes(max_graph_vram_bytes);
if (direct) {
esrgan_upscaler->set_conv2d_direct_enabled(true);
}
if (!esrgan_upscaler->load_from_file(esrgan_path, n_threads)) {
return false;
}
return true;
}
sd_image_t upscale(sd_image_t input_image, uint32_t upscale_factor) {
// upscale_factor, unused for RealESRGAN_x4plus_anime_6B.pth
sd_image_t upscaled_image = {0, 0, 0, nullptr};
int output_width = (int)input_image.width * esrgan_upscaler->scale;
int output_height = (int)input_image.height * esrgan_upscaler->scale;
LOG_INFO("upscaling from (%i x %i) to (%i x %i)",
input_image.width, input_image.height, output_width, output_height);
sd::Tensor<float> UpscalerGGML::upscale_tensor(const sd::Tensor<float>& input_tensor) {
sd::Tensor<float> upscaled;
if (tile_size <= 0 || (input_tensor.shape()[0] <= tile_size && input_tensor.shape()[1] <= tile_size)) {
upscaled = esrgan_upscaler->compute(n_threads, input_tensor);
} else {
auto on_processing = [&](const sd::Tensor<float>& input_tile) -> sd::Tensor<float> {
auto output_tile = esrgan_upscaler->compute(n_threads, input_tile);
if (output_tile.empty()) {
LOG_ERROR("esrgan compute failed while processing a tile");
return {};
}
return output_tile;
};
sd::Tensor<float> input_tensor = sd_image_to_tensor(input_image);
sd::Tensor<float> upscaled;
int64_t t0 = ggml_time_ms();
upscaled = upscale_tensor(input_tensor);
if (upscaled.empty()) {
return upscaled_image;
}
sd_image_t upscaled_data = tensor_to_sd_image(upscaled);
int64_t t3 = ggml_time_ms();
LOG_INFO("input_image_tensor upscaled, taking %.2fs", (t3 - t0) / 1000.0f);
upscaled_image = upscaled_data;
upscaled = process_tiles_2d(input_tensor,
static_cast<int>(input_tensor.shape()[0] * esrgan_upscaler->scale),
static_cast<int>(input_tensor.shape()[1] * esrgan_upscaler->scale),
esrgan_upscaler->scale,
tile_size,
tile_size,
0.25f,
false,
false,
on_processing);
}
esrgan_upscaler->free_compute_buffer();
if (upscaled.empty()) {
LOG_ERROR("esrgan compute failed");
return {};
}
return upscaled;
}
sd_image_t UpscalerGGML::upscale(sd_image_t input_image, uint32_t upscale_factor) {
// upscale_factor, unused for RealESRGAN_x4plus_anime_6B.pth
sd_image_t upscaled_image = {0, 0, 0, nullptr};
int output_width = (int)input_image.width * esrgan_upscaler->scale;
int output_height = (int)input_image.height * esrgan_upscaler->scale;
LOG_INFO("upscaling from (%i x %i) to (%i x %i)",
input_image.width, input_image.height, output_width, output_height);
sd::Tensor<float> input_tensor = sd_image_to_tensor(input_image);
sd::Tensor<float> upscaled;
int64_t t0 = ggml_time_ms();
upscaled = upscale_tensor(input_tensor);
if (upscaled.empty()) {
return upscaled_image;
}
};
sd_image_t upscaled_data = tensor_to_sd_image(upscaled);
int64_t t3 = ggml_time_ms();
LOG_INFO("input_image_tensor upscaled, taking %.2fs", (t3 - t0) / 1000.0f);
upscaled_image = upscaled_data;
return upscaled_image;
}
struct upscaler_ctx_t {
UpscalerGGML* upscaler = nullptr;

33
src/upscaler.h Normal file
View File

@ -0,0 +1,33 @@
#ifndef __SD_UPSCALER_H__
#define __SD_UPSCALER_H__
#include "esrgan.hpp"
#include "stable-diffusion.h"
#include "tensor.hpp"
#include <memory>
#include <string>
struct UpscalerGGML {
ggml_backend_t backend = nullptr; // general backend
ggml_type model_data_type = GGML_TYPE_F16;
std::shared_ptr<ESRGAN> esrgan_upscaler;
std::string esrgan_path;
int n_threads;
bool direct = false;
int tile_size = 128;
size_t max_graph_vram_bytes = 0;
UpscalerGGML(int n_threads,
bool direct = false,
int tile_size = 128);
bool load_from_file(const std::string& esrgan_path,
bool offload_params_to_cpu,
int n_threads);
void set_max_graph_vram_bytes(size_t max_vram_bytes);
sd::Tensor<float> upscale_tensor(const sd::Tensor<float>& input_tensor);
sd_image_t upscale(sd_image_t input_image, uint32_t upscale_factor);
};
#endif // __SD_UPSCALER_H__

View File

@ -23,8 +23,9 @@
#include <unistd.h>
#endif
#include "ggml-cpu.h"
#include "ggml-backend.h"
#include "ggml.h"
#include "ggml_extend_backend.hpp"
#include "stable-diffusion.h"
bool ends_with(const std::string& str, const std::string& ending) {
@ -119,10 +120,10 @@ std::unique_ptr<MmapWrapper> MmapWrapper::create(const std::string& filename) {
filename.c_str(),
GENERIC_READ,
FILE_SHARE_READ,
NULL,
nullptr,
OPEN_EXISTING,
FILE_ATTRIBUTE_NORMAL,
NULL);
nullptr);
if (file_handle == INVALID_HANDLE_VALUE) {
return nullptr;
@ -136,16 +137,16 @@ std::unique_ptr<MmapWrapper> MmapWrapper::create(const std::string& filename) {
file_size = static_cast<size_t>(size.QuadPart);
HANDLE mapping_handle = CreateFileMapping(file_handle, NULL, PAGE_READONLY, 0, 0, NULL);
HANDLE mapping_handle = CreateFileMapping(file_handle, nullptr, PAGE_READONLY, 0, 0, nullptr);
if (mapping_handle == NULL) {
if (mapping_handle == nullptr) {
CloseHandle(file_handle);
return nullptr;
}
mapped_data = MapViewOfFile(mapping_handle, FILE_MAP_READ, 0, 0, file_size);
if (mapped_data == NULL) {
if (mapped_data == nullptr) {
CloseHandle(mapping_handle);
CloseHandle(file_handle);
return nullptr;
@ -203,7 +204,7 @@ std::unique_ptr<MmapWrapper> MmapWrapper::create(const std::string& filename) {
size_t file_size = sb.st_size;
void* mapped_data = mmap(NULL, file_size, PROT_READ, mmap_flags, file_descriptor, 0);
void* mapped_data = mmap(nullptr, file_size, PROT_READ, mmap_flags, file_descriptor, 0);
close(file_descriptor);
@ -495,26 +496,6 @@ sd_progress_cb_t sd_get_progress_callback() {
void* sd_get_progress_callback_data() {
return sd_progress_cb_data;
}
const char* sd_get_system_info() {
static char buffer[1024];
std::stringstream ss;
ss << "System Info: \n";
ss << " SSE3 = " << ggml_cpu_has_sse3() << " | ";
ss << " AVX = " << ggml_cpu_has_avx() << " | ";
ss << " AVX2 = " << ggml_cpu_has_avx2() << " | ";
ss << " AVX512 = " << ggml_cpu_has_avx512() << " | ";
ss << " AVX512_VBMI = " << ggml_cpu_has_avx512_vbmi() << " | ";
ss << " AVX512_VNNI = " << ggml_cpu_has_avx512_vnni() << " | ";
ss << " FMA = " << ggml_cpu_has_fma() << " | ";
ss << " NEON = " << ggml_cpu_has_neon() << " | ";
ss << " ARM_FMA = " << ggml_cpu_has_arm_fma() << " | ";
ss << " F16C = " << ggml_cpu_has_f16c() << " | ";
ss << " FP16_VA = " << ggml_cpu_has_fp16_va() << " | ";
ss << " WASM_SIMD = " << ggml_cpu_has_wasm_simd() << " | ";
ss << " VSX = " << ggml_cpu_has_vsx() << " | ";
snprintf(buffer, sizeof(buffer), "%s", ss.str().c_str());
return buffer;
}
sd_image_t tensor_to_sd_image(const sd::Tensor<float>& tensor, int frame_index) {
const auto& shape = tensor.shape();
@ -524,17 +505,7 @@ sd_image_t tensor_to_sd_image(const sd::Tensor<float>& tensor, int frame_index)
int channel = static_cast<int>(shape[shape.size() == 5 ? 3 : 2]);
uint8_t* data = (uint8_t*)malloc(static_cast<size_t>(width * height * channel));
GGML_ASSERT(data != nullptr);
for (int iw = 0; iw < width; ++iw) {
for (int ih = 0; ih < height; ++ih) {
for (int ic = 0; ic < channel; ++ic) {
float value = shape.size() == 5 ? tensor.index(iw, ih, frame_index, ic, 0)
: tensor.index(iw, ih, ic, frame_index);
value = std::clamp(value, 0.0f, 1.0f);
data[(ih * width + iw) * channel + ic] = static_cast<uint8_t>(std::round(value * 255.0f));
}
}
}
preprocessing_tensor_frame_to_sd_image(tensor, frame_index, data);
return {
static_cast<uint32_t>(width),
static_cast<uint32_t>(height),
@ -718,3 +689,100 @@ std::vector<std::pair<std::string, float>> parse_prompt_attention(const std::str
return res;
}
// test if the backend is a specific one, e.g. "CUDA", "ROCm", "Vulkan" etc.
bool sd_backend_is(ggml_backend_t backend, const std::string& name) {
if (!backend) {
return false;
}
ggml_backend_dev_t dev = ggml_backend_get_device(backend);
if (!dev)
return false;
std::string dev_name = ggml_backend_dev_name(dev);
return dev_name.find(name) != std::string::npos;
}
ggml_backend_t sd_get_default_backend() {
ggml_backend_load_all_once();
static std::once_flag once;
std::call_once(once, []() {
size_t dev_count = ggml_backend_dev_count();
if (dev_count == 0) {
LOG_ERROR("No devices found!");
} else {
LOG_DEBUG("Found %zu backend devices:", dev_count);
for (size_t i = 0; i < dev_count; ++i) {
auto dev = ggml_backend_dev_get(i);
LOG_DEBUG("#%zu: %s", i, ggml_backend_dev_name(dev));
}
}
});
ggml_backend_t backend = nullptr;
const char* SD_VK_DEVICE = getenv("SD_VK_DEVICE");
if (SD_VK_DEVICE != nullptr) {
std::string sd_vk_device_str = SD_VK_DEVICE;
try {
unsigned long long device = std::stoull(sd_vk_device_str);
std::string vk_device_name = "Vulkan" + std::to_string(device);
if (backend_name_exists(vk_device_name)) {
LOG_INFO("Selecting %s as main device by env var SD_VK_DEVICE", vk_device_name.c_str());
backend = init_named_backend(vk_device_name);
if (!backend) {
LOG_WARN("Device %s requested by SD_VK_DEVICE failed to init. Falling back to the default device.", vk_device_name.c_str());
}
} else {
LOG_WARN("Device %s requested by SD_VK_DEVICE was not found. Falling back to the default device.", vk_device_name.c_str());
}
} catch (const std::invalid_argument&) {
LOG_WARN("SD_VK_DEVICE environment variable is not a valid integer (%s). Falling back to the default device.", SD_VK_DEVICE);
} catch (const std::out_of_range&) {
LOG_WARN("SD_VK_DEVICE environment variable value is out of range for `unsigned long long` type (%s). Falling back to the default device.", SD_VK_DEVICE);
}
}
if (!backend) {
std::string dev_name = get_default_backend_name();
backend = init_named_backend(dev_name);
if (!backend && !dev_name.empty()) {
LOG_WARN("device %s failed to init", dev_name.c_str());
}
}
if (!backend) {
LOG_WARN("loading CPU backend");
backend = ggml_backend_cpu_init();
}
if (ggml_backend_is_cpu(backend)) {
LOG_DEBUG("Using CPU backend");
}
return backend;
}
// namespace is needed to avoid conflicts with ggml_backend_extend.hpp
namespace ggml_cpu {
#include "ggml-cpu.h"
}
const char* sd_get_system_info() {
using namespace ggml_cpu;
static char buffer[1024];
std::stringstream ss;
ss << "System Info: \n";
ss << " SSE3 = " << ggml_cpu_has_sse3() << " | ";
ss << " AVX = " << ggml_cpu_has_avx() << " | ";
ss << " AVX2 = " << ggml_cpu_has_avx2() << " | ";
ss << " AVX512 = " << ggml_cpu_has_avx512() << " | ";
ss << " AVX512_VBMI = " << ggml_cpu_has_avx512_vbmi() << " | ";
ss << " AVX512_VNNI = " << ggml_cpu_has_avx512_vnni() << " | ";
ss << " FMA = " << ggml_cpu_has_fma() << " | ";
ss << " NEON = " << ggml_cpu_has_neon() << " | ";
ss << " ARM_FMA = " << ggml_cpu_has_arm_fma() << " | ";
ss << " F16C = " << ggml_cpu_has_f16c() << " | ";
ss << " FP16_VA = " << ggml_cpu_has_fp16_va() << " | ";
ss << " WASM_SIMD = " << ggml_cpu_has_wasm_simd() << " | ";
ss << " VSX = " << ggml_cpu_has_vsx() << " | ";
snprintf(buffer, sizeof(buffer), "%s", ss.str().c_str());
return buffer;
}

View File

@ -6,6 +6,7 @@
#include <string>
#include <vector>
#include "ggml-backend.h"
#include "stable-diffusion.h"
#include "tensor.hpp"
@ -82,6 +83,10 @@ int sd_get_preview_interval();
bool sd_should_preview_denoised();
bool sd_should_preview_noisy();
// test if the backend is a specific one, e.g. "CUDA", "ROCm", "Vulkan" etc.
bool sd_backend_is(ggml_backend_t backend, const std::string& name);
ggml_backend_t sd_get_default_backend();
#define LOG_DEBUG(format, ...) log_printf(SD_LOG_DEBUG, __FILE__, __LINE__, format, ##__VA_ARGS__)
#define LOG_INFO(format, ...) log_printf(SD_LOG_INFO, __FILE__, __LINE__, format, ##__VA_ARGS__)
#define LOG_WARN(format, ...) log_printf(SD_LOG_WARN, __FILE__, __LINE__, format, ##__VA_ARGS__)

View File

@ -142,9 +142,10 @@ public:
"vae encode compute failed while processing a tile");
} else {
output = _compute(n_threads, input, false);
free_compute_buffer();
}
free_compute_buffer();
if (output.empty()) {
LOG_ERROR("vae encode compute failed");
return {};

View File

@ -692,6 +692,7 @@ namespace WAN {
} else {
x = conv1->forward(ctx, x);
}
// sd::ggml_graph_cut::mark_graph_cut(x, "wan_vae.encoder.prelude", "x");
// downsamples
std::vector<int64_t> dims = {dim};
@ -717,12 +718,14 @@ namespace WAN {
x = layer->forward(ctx, x, b, feat_cache, feat_idx, chunk_idx);
}
}
// sd::ggml_graph_cut::mark_graph_cut(x, "wan_vae.encoder.down." + std::to_string(i), "x");
}
// middle
x = middle_0->forward(ctx, x, b, feat_cache, feat_idx);
x = middle_1->forward(ctx, x, b);
x = middle_2->forward(ctx, x, b, feat_cache, feat_idx);
// sd::ggml_graph_cut::mark_graph_cut(x, "wan_vae.encoder.mid", "x");
// head
x = head_0->forward(ctx, x);
@ -863,11 +866,13 @@ namespace WAN {
} else {
x = conv1->forward(ctx, x);
}
// sd::ggml_graph_cut::mark_graph_cut(x, "wan_vae.decoder.prelude", "x");
// middle
x = middle_0->forward(ctx, x, b, feat_cache, feat_idx);
x = middle_1->forward(ctx, x, b);
x = middle_2->forward(ctx, x, b, feat_cache, feat_idx);
// sd::ggml_graph_cut::mark_graph_cut(x, "wan_vae.decoder.mid", "x");
// upsamples
std::vector<int64_t> dims = {dim_mult[dim_mult.size() - 1] * dim};
@ -893,6 +898,7 @@ namespace WAN {
x = layer->forward(ctx, x, b, feat_cache, feat_idx, chunk_idx);
}
}
// sd::ggml_graph_cut::mark_graph_cut(x, "wan_vae.decoder.up." + std::to_string(i), "x");
}
// head
@ -1031,6 +1037,7 @@ namespace WAN {
if (wan2_2) {
x = patchify(ctx->ggml_ctx, x, 2, b);
}
// sd::ggml_graph_cut::mark_graph_cut(x, "wan_vae.encode.prelude", "x");
auto encoder = std::dynamic_pointer_cast<Encoder3d>(blocks["encoder"]);
auto conv1 = std::dynamic_pointer_cast<CausalConv3d>(blocks["conv1"]);
@ -1051,6 +1058,7 @@ namespace WAN {
}
out = conv1->forward(ctx, out);
auto mu = ggml_ext_chunk(ctx->ggml_ctx, out, 2, 3)[0];
// sd::ggml_graph_cut::mark_graph_cut(mu, "wan_vae.encode.final", "mu");
clear_cache();
return mu;
}
@ -1068,6 +1076,7 @@ namespace WAN {
int64_t iter_ = z->ne[2];
auto x = conv2->forward(ctx, z);
// sd::ggml_graph_cut::mark_graph_cut(x, "wan_vae.decode.prelude", "x");
ggml_tensor* out;
for (int i = 0; i < iter_; i++) {
_conv_idx = 0;
@ -1083,6 +1092,7 @@ namespace WAN {
if (wan2_2) {
out = unpatchify(ctx->ggml_ctx, out, 2, b);
}
// sd::ggml_graph_cut::mark_graph_cut(out, "wan_vae.decode.final", "out");
clear_cache();
return out;
}
@ -1097,13 +1107,15 @@ namespace WAN {
auto decoder = std::dynamic_pointer_cast<Decoder3d>(blocks["decoder"]);
auto conv2 = std::dynamic_pointer_cast<CausalConv3d>(blocks["conv2"]);
auto x = conv2->forward(ctx, z);
auto x = conv2->forward(ctx, z);
// sd::ggml_graph_cut::mark_graph_cut(x, "wan_vae.decode_partial.prelude", "x");
auto in = ggml_ext_slice(ctx->ggml_ctx, x, 2, i, i + 1); // [b*c, 1, h, w]
_conv_idx = 0;
auto out = decoder->forward(ctx, in, b, _feat_map, _conv_idx, i);
if (wan2_2) {
out = unpatchify(ctx->ggml_ctx, out, 2, b);
}
// sd::ggml_graph_cut::mark_graph_cut(out, "wan_vae.decode_partial.final", "out");
return out;
}
};
@ -1984,6 +1996,13 @@ namespace WAN {
c = ggml_reshape_3d(ctx->ggml_ctx, c, c->ne[0] * c->ne[1] * c->ne[2], c->ne[3] / N, N); // [N, dim, t_len*h_len*w_len]
c = ggml_ext_cont(ctx->ggml_ctx, ggml_ext_torch_permute(ctx->ggml_ctx, c, 1, 0, 2, 3)); // [N, t_len*h_len*w_len, dim]
}
sd::ggml_graph_cut::mark_graph_cut(x, "wan.prelude", "x");
// sd::ggml_graph_cut::mark_graph_cut(e, "wan.prelude", "e");
// sd::ggml_graph_cut::mark_graph_cut(e0, "wan.prelude", "e0");
// sd::ggml_graph_cut::mark_graph_cut(context, "wan.prelude", "context");
if (c != nullptr) {
sd::ggml_graph_cut::mark_graph_cut(c, "wan.prelude", "c");
}
auto x_orig = x;
@ -2004,6 +2023,10 @@ namespace WAN {
c_skip = ggml_ext_scale(ctx->ggml_ctx, c_skip, vace_strength);
x = ggml_add(ctx->ggml_ctx, x, c_skip);
}
sd::ggml_graph_cut::mark_graph_cut(x, "wan.blocks." + std::to_string(i), "x");
if (c != nullptr) {
sd::ggml_graph_cut::mark_graph_cut(c, "wan.blocks." + std::to_string(i), "c");
}
}
x = head->forward(ctx, x, e); // [N, t_len*h_len*w_len, pt*ph*pw*out_dim]

View File

@ -31,10 +31,6 @@ namespace ZImage {
: head_dim(head_dim), num_heads(num_heads), num_kv_heads(num_kv_heads), qk_norm(qk_norm) {
blocks["qkv"] = std::make_shared<Linear>(hidden_size, (num_heads + num_kv_heads * 2) * head_dim, false);
float scale = 1.f;
#if GGML_USE_HIP
// Prevent NaN issues with certain ROCm setups
scale = 1.f / 16.f;
#endif
blocks["out"] = std::make_shared<Linear>(num_heads * head_dim, hidden_size, false, false, false, scale);
if (qk_norm) {
blocks["q_norm"] = std::make_shared<RMSNorm>(head_dim);
@ -52,6 +48,10 @@ namespace ZImage {
auto qkv_proj = std::dynamic_pointer_cast<Linear>(blocks["qkv"]);
auto out_proj = std::dynamic_pointer_cast<Linear>(blocks["out"]);
if (sd_backend_is(ctx->backend, "ROCm")) {
out_proj->set_scale(1.f / 16.f);
}
auto qkv = qkv_proj->forward(ctx, x); // [N, n_token, (num_heads + num_kv_heads*2)*head_dim]
qkv = ggml_reshape_4d(ctx->ggml_ctx, qkv, head_dim, num_heads + num_kv_heads * 2, qkv->ne[1], qkv->ne[2]); // [N, n_token, num_heads + num_kv_heads*2, head_dim]
@ -115,9 +115,7 @@ namespace ZImage {
bool force_prec_f32 = false;
float scale = 1.f / 128.f;
#ifdef SD_USE_VULKAN
force_prec_f32 = true;
#endif
// The purpose of the scale here is to prevent NaN issues in certain situations.
// For example, when using CUDA but the weights are k-quants.
blocks["w2"] = std::make_shared<Linear>(hidden_dim, dim, false, false, force_prec_f32, scale);
@ -129,6 +127,10 @@ namespace ZImage {
auto w2 = std::dynamic_pointer_cast<Linear>(blocks["w2"]);
auto w3 = std::dynamic_pointer_cast<Linear>(blocks["w3"]);
if (sd_backend_is(ctx->backend, "Vulkan")) {
w2->set_force_prec_f32(true);
}
auto x1 = w1->forward(ctx, x);
auto x3 = w3->forward(ctx, x);
x = ggml_swiglu_split(ctx->ggml_ctx, x1, x3);
@ -369,6 +371,9 @@ namespace ZImage {
auto txt = cap_embedder_1->forward(ctx, cap_embedder_0->forward(ctx, context)); // [N, n_txt_token, hidden_size]
auto img = x_embedder->forward(ctx, x); // [N, n_img_token, hidden_size]
sd::ggml_graph_cut::mark_graph_cut(txt, "z_image.prelude", "txt");
sd::ggml_graph_cut::mark_graph_cut(img, "z_image.prelude", "img");
sd::ggml_graph_cut::mark_graph_cut(t_emb, "z_image.prelude", "t_emb");
int64_t n_txt_pad_token = Rope::bound_mod(static_cast<int>(n_txt_token), SEQ_MULTI_OF);
if (n_txt_pad_token > 0) {
@ -391,20 +396,24 @@ namespace ZImage {
auto block = std::dynamic_pointer_cast<JointTransformerBlock>(blocks["context_refiner." + std::to_string(i)]);
txt = block->forward(ctx, txt, txt_pe, nullptr, nullptr);
sd::ggml_graph_cut::mark_graph_cut(txt, "z_image.context_refiner." + std::to_string(i), "txt");
}
for (int i = 0; i < z_image_params.num_refiner_layers; i++) {
auto block = std::dynamic_pointer_cast<JointTransformerBlock>(blocks["noise_refiner." + std::to_string(i)]);
img = block->forward(ctx, img, img_pe, nullptr, t_emb);
sd::ggml_graph_cut::mark_graph_cut(img, "z_image.noise_refiner." + std::to_string(i), "img");
}
auto txt_img = ggml_concat(ctx->ggml_ctx, txt, img, 1); // [N, n_txt_token + n_txt_pad_token + n_img_token + n_img_pad_token, hidden_size]
sd::ggml_graph_cut::mark_graph_cut(txt_img, "z_image.prelude", "txt_img");
for (int i = 0; i < z_image_params.num_layers; i++) {
auto block = std::dynamic_pointer_cast<JointTransformerBlock>(blocks["layers." + std::to_string(i)]);
txt_img = block->forward(ctx, txt_img, pe, nullptr, t_emb);
sd::ggml_graph_cut::mark_graph_cut(txt_img, "z_image.layers." + std::to_string(i), "txt_img");
}
txt_img = final_layer->forward(ctx, txt_img, t_emb); // [N, n_txt_token + n_txt_pad_token + n_img_token + n_img_pad_token, ph*pw*C]