mirror of
https://github.com/leejet/stable-diffusion.cpp.git
synced 2026-03-23 17:58:58 +00:00
Compare commits
4 Commits
5792c66879
...
aaa8a51bd8
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
aaa8a51bd8 | ||
|
|
ba35dd734e | ||
|
|
d41f5fff69 | ||
|
|
810ef0cf76 |
@ -4,11 +4,12 @@
|
||||
usage: ./bin/sd-cli [options]
|
||||
|
||||
CLI Options:
|
||||
-o, --output <string> path to write result image to. you can use printf-style %d format specifiers for image sequences (default: ./output.png) (eg. output_%03d.png)
|
||||
--output-begin-idx <int> starting index for output image sequence, must be non-negative (default 0 if specified %d in output path, 1 otherwise)
|
||||
-o, --output <string> path to write result image to. you can use printf-style %d format specifiers for image sequences (default:
|
||||
./output.png) (eg. output_%03d.png)
|
||||
--preview-path <string> path to write preview image to (default: ./preview.png)
|
||||
--preview-interval <int> interval in denoising steps between consecutive updates of the image preview file (default is 1, meaning updating at
|
||||
every step)
|
||||
--output-begin-idx <int> starting index for output image sequence, must be non-negative (default 0 if specified %d in output path, 1 otherwise)
|
||||
--canny apply canny preprocessor (edge detection)
|
||||
--convert-name convert tensor name (for convert mode)
|
||||
-v, --verbose print extra info
|
||||
@ -59,6 +60,7 @@ Context Options:
|
||||
--circularx enable circular RoPE wrapping on x-axis (width) only
|
||||
--circulary enable circular RoPE wrapping on y-axis (height) only
|
||||
--chroma-disable-dit-mask disable dit mask for chroma
|
||||
--qwen-image-zero-cond-t enable zero_cond_t for qwen image
|
||||
--chroma-enable-t5-mask enable t5 mask for chroma
|
||||
--type weight type (examples: f32, f16, q4_0, q4_1, q5_0, q5_1, q8_0, q2_K, q3_K, q4_K). If not specified, the default is the
|
||||
type of the weight file
|
||||
@ -107,7 +109,7 @@ Generation Options:
|
||||
medium
|
||||
--skip-layer-start <float> SLG enabling point (default: 0.01)
|
||||
--skip-layer-end <float> SLG disabling point (default: 0.2)
|
||||
--eta <float> eta in DDIM, only for DDIM/TCD/res_multistep/res_2s (default: 0)
|
||||
--eta <float> eta in DDIM, only for DDIM and TCD (default: 0)
|
||||
--flow-shift <float> shift value for Flow models like SD3.x or WAN (default: auto)
|
||||
--high-noise-cfg-scale <float> (high noise) unconditional guidance scale: (default: 7.0)
|
||||
--high-noise-img-cfg-scale <float> (high noise) image guidance scale for inpaint or instruct-pix2pix models (default: same as --cfg-scale)
|
||||
@ -115,7 +117,7 @@ Generation Options:
|
||||
--high-noise-slg-scale <float> (high noise) skip layer guidance (SLG) scale, only for DiT models: (default: 0)
|
||||
--high-noise-skip-layer-start <float> (high noise) SLG enabling point (default: 0.01)
|
||||
--high-noise-skip-layer-end <float> (high noise) SLG disabling point (default: 0.2)
|
||||
--high-noise-eta <float> (high noise) eta in DDIM, only for DDIM/TCD/res_multistep/res_2s (default: 0)
|
||||
--high-noise-eta <float> (high noise) eta in DDIM, only for DDIM and TCD (default: 0)
|
||||
--strength <float> strength for noising/unnoising (default: 0.75)
|
||||
--pm-style-strength <float>
|
||||
--control-strength <float> strength to apply Control Net (default: 0.9). 1.0 corresponds to full destruction of information in init image
|
||||
@ -124,10 +126,12 @@ Generation Options:
|
||||
--increase-ref-index automatically increase the indices of references images based on the order they are listed (starting with 1).
|
||||
--disable-auto-resize-ref-image disable auto resize of ref images
|
||||
-s, --seed RNG seed (default: 42, use random seed for < 0)
|
||||
--sampling-method sampling method, one of [euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m, dpm++2mv2, ipndm, ipndm_v, lcm, ddim_trailing, tcd,
|
||||
res_multistep, res_2s] (default: euler for Flux/SD3/Wan, euler_a otherwise)
|
||||
--high-noise-sampling-method (high noise) sampling method, one of [euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m, dpm++2mv2, ipndm, ipndm_v, lcm, ddim_trailing,
|
||||
tcd, res_multistep, res_2s] default: euler for Flux/SD3/Wan, euler_a otherwise
|
||||
--sampling-method sampling method, one of [euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m, dpm++2mv2, ipndm, ipndm_v, lcm, ddim_trailing,
|
||||
tcd, res_multistep, res_2s] (default: euler for Flux/SD3/Wan, euler_a
|
||||
otherwise)
|
||||
--high-noise-sampling-method (high noise) sampling method, one of [euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m, dpm++2mv2, ipndm, ipndm_v, lcm,
|
||||
ddim_trailing, tcd, res_multistep, res_2s] default: euler for Flux/SD3/Wan,
|
||||
euler_a otherwise
|
||||
--scheduler denoiser sigma scheduler, one of [discrete, karras, exponential, ays, gits, smoothstep, sgm_uniform, simple,
|
||||
kl_optimal, lcm, bong_tangent], default: discrete
|
||||
--sigmas custom sigma values for the sampler, comma-separated (e.g., "14.61,7.8,3.5,0.0").
|
||||
|
||||
@ -4,12 +4,12 @@
|
||||
usage: ./bin/sd-server [options]
|
||||
|
||||
Svr Options:
|
||||
-l, --listen-ip <string> server listen ip (default: 127.0.0.1)
|
||||
--listen-port <int> server listen port (default: 1234)
|
||||
--serve-html-path <string> path to HTML file to serve at root (optional)
|
||||
-v, --verbose print extra info
|
||||
--color colors the logging tags according to level
|
||||
-h, --help show this help message and exit
|
||||
-l, --listen-ip <string> server listen ip (default: 127.0.0.1)
|
||||
--serve-html-path <string> path to HTML file to serve at root (optional)
|
||||
--listen-port <int> server listen port (default: 1234)
|
||||
-v, --verbose print extra info
|
||||
--color colors the logging tags according to level
|
||||
-h, --help show this help message and exit
|
||||
|
||||
Context Options:
|
||||
-m, --model <string> path to full model
|
||||
@ -39,10 +39,10 @@ Context Options:
|
||||
--vae-tiling process vae in tiles to reduce memory usage
|
||||
--force-sdxl-vae-conv-scale force use of conv scale on sdxl vae
|
||||
--offload-to-cpu place the weights in RAM to save VRAM, and automatically load them into VRAM when needed
|
||||
--mmap whether to memory-map model
|
||||
--control-net-cpu keep controlnet in cpu (for low vram)
|
||||
--clip-on-cpu keep clip in cpu (for low vram)
|
||||
--vae-on-cpu keep vae in cpu (for low vram)
|
||||
--mmap whether to memory-map model
|
||||
--fa use flash attention
|
||||
--diffusion-fa use flash attention in the diffusion model only
|
||||
--diffusion-conv-direct use ggml_conv2d_direct in the diffusion model
|
||||
@ -51,6 +51,7 @@ Context Options:
|
||||
--circularx enable circular RoPE wrapping on x-axis (width) only
|
||||
--circulary enable circular RoPE wrapping on y-axis (height) only
|
||||
--chroma-disable-dit-mask disable dit mask for chroma
|
||||
--qwen-image-zero-cond-t enable zero_cond_t for qwen image
|
||||
--chroma-enable-t5-mask enable t5 mask for chroma
|
||||
--type weight type (examples: f32, f16, q4_0, q4_1, q5_0, q5_1, q8_0, q2_K, q3_K, q4_K). If not specified, the default is the
|
||||
type of the weight file
|
||||
@ -99,7 +100,7 @@ Default Generation Options:
|
||||
medium
|
||||
--skip-layer-start <float> SLG enabling point (default: 0.01)
|
||||
--skip-layer-end <float> SLG disabling point (default: 0.2)
|
||||
--eta <float> eta in DDIM, only for DDIM/TCD/res_multistep/res_2s (default: 0)
|
||||
--eta <float> eta in DDIM, only for DDIM and TCD (default: 0)
|
||||
--flow-shift <float> shift value for Flow models like SD3.x or WAN (default: auto)
|
||||
--high-noise-cfg-scale <float> (high noise) unconditional guidance scale: (default: 7.0)
|
||||
--high-noise-img-cfg-scale <float> (high noise) image guidance scale for inpaint or instruct-pix2pix models (default: same as --cfg-scale)
|
||||
@ -107,7 +108,7 @@ Default Generation Options:
|
||||
--high-noise-slg-scale <float> (high noise) skip layer guidance (SLG) scale, only for DiT models: (default: 0)
|
||||
--high-noise-skip-layer-start <float> (high noise) SLG enabling point (default: 0.01)
|
||||
--high-noise-skip-layer-end <float> (high noise) SLG disabling point (default: 0.2)
|
||||
--high-noise-eta <float> (high noise) eta in DDIM, only for DDIM/TCD/res_multistep/res_2s (default: 0)
|
||||
--high-noise-eta <float> (high noise) eta in DDIM, only for DDIM and TCD (default: 0)
|
||||
--strength <float> strength for noising/unnoising (default: 0.75)
|
||||
--pm-style-strength <float>
|
||||
--control-strength <float> strength to apply Control Net (default: 0.9). 1.0 corresponds to full destruction of information in init image
|
||||
@ -116,10 +117,12 @@ Default Generation Options:
|
||||
--increase-ref-index automatically increase the indices of references images based on the order they are listed (starting with 1).
|
||||
--disable-auto-resize-ref-image disable auto resize of ref images
|
||||
-s, --seed RNG seed (default: 42, use random seed for < 0)
|
||||
--sampling-method sampling method, one of [euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m, dpm++2mv2, ipndm, ipndm_v, lcm, ddim_trailing, tcd,
|
||||
res_multistep, res_2s] (default: euler for Flux/SD3/Wan, euler_a otherwise)
|
||||
--high-noise-sampling-method (high noise) sampling method, one of [euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m, dpm++2mv2, ipndm, ipndm_v, lcm, ddim_trailing,
|
||||
tcd, res_multistep, res_2s] default: euler for Flux/SD3/Wan, euler_a otherwise
|
||||
--sampling-method sampling method, one of [euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m, dpm++2mv2, ipndm, ipndm_v, lcm, ddim_trailing,
|
||||
tcd, res_multistep, res_2s] (default: euler for Flux/SD3/Wan, euler_a
|
||||
otherwise)
|
||||
--high-noise-sampling-method (high noise) sampling method, one of [euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m, dpm++2mv2, ipndm, ipndm_v, lcm,
|
||||
ddim_trailing, tcd, res_multistep, res_2s] default: euler for Flux/SD3/Wan,
|
||||
euler_a otherwise
|
||||
--scheduler denoiser sigma scheduler, one of [discrete, karras, exponential, ays, gits, smoothstep, sgm_uniform, simple,
|
||||
kl_optimal, lcm, bong_tangent], default: discrete
|
||||
--sigmas custom sigma values for the sampler, comma-separated (e.g., "14.61,7.8,3.5,0.0").
|
||||
|
||||
@ -345,7 +345,7 @@ int main(int argc, const char** argv) {
|
||||
auto get_lora_full_path = [&](const std::string& path) -> std::string {
|
||||
std::lock_guard<std::mutex> lock(lora_mutex);
|
||||
auto it = std::find_if(lora_cache.begin(), lora_cache.end(),
|
||||
[&](const LoraEntry& e) { return e.path == path; });
|
||||
[&](const LoraEntry& e) { return e.path == path; });
|
||||
return (it != lora_cache.end()) ? it->fullpath : "";
|
||||
};
|
||||
|
||||
@ -567,7 +567,7 @@ int main(int argc, const char** argv) {
|
||||
|
||||
std::string sd_cpp_extra_args_str = extract_and_remove_sd_cpp_extra_args(prompt);
|
||||
|
||||
size_t image_count = req.form.get_file_count("image[]");
|
||||
size_t image_count = req.form.get_file_count("image[]");
|
||||
bool has_legacy_image = req.form.has_file("image");
|
||||
if (image_count == 0 && !has_legacy_image) {
|
||||
res.status = 400;
|
||||
|
||||
25
src/flux.hpp
25
src/flux.hpp
@ -103,11 +103,13 @@ namespace Flux {
|
||||
auto norm = std::dynamic_pointer_cast<QKNorm>(blocks["norm"]);
|
||||
|
||||
auto qkv = qkv_proj->forward(ctx, x);
|
||||
auto qkv_vec = ggml_ext_chunk(ctx->ggml_ctx, qkv, 3, 0, true);
|
||||
int64_t head_dim = qkv_vec[0]->ne[0] / num_heads;
|
||||
auto q = ggml_reshape_4d(ctx->ggml_ctx, qkv_vec[0], head_dim, num_heads, qkv_vec[0]->ne[1], qkv_vec[0]->ne[2]);
|
||||
auto k = ggml_reshape_4d(ctx->ggml_ctx, qkv_vec[1], head_dim, num_heads, qkv_vec[1]->ne[1], qkv_vec[1]->ne[2]);
|
||||
auto v = ggml_reshape_4d(ctx->ggml_ctx, qkv_vec[2], head_dim, num_heads, qkv_vec[2]->ne[1], qkv_vec[2]->ne[2]);
|
||||
int64_t head_dim = qkv->ne[0] / 3 / num_heads;
|
||||
auto q = ggml_view_4d(ctx->ggml_ctx, qkv, head_dim, num_heads, qkv->ne[1], qkv->ne[2],
|
||||
qkv->nb[0] * head_dim, qkv->nb[1], qkv->nb[2], 0);
|
||||
auto k = ggml_view_4d(ctx->ggml_ctx, qkv, head_dim, num_heads, qkv->ne[1], qkv->ne[2],
|
||||
qkv->nb[0] * head_dim, qkv->nb[1], qkv->nb[2], (qkv->nb[0]) * qkv->ne[0] / 3);
|
||||
auto v = ggml_view_4d(ctx->ggml_ctx, qkv, head_dim, num_heads, qkv->ne[1], qkv->ne[2],
|
||||
qkv->nb[0] * head_dim, qkv->nb[1], qkv->nb[2], (qkv->nb[0]) * 2 * qkv->ne[0] / 3);
|
||||
q = norm->query_norm(ctx, q);
|
||||
k = norm->key_norm(ctx, k);
|
||||
return {q, k, v};
|
||||
@ -491,15 +493,14 @@ namespace Flux {
|
||||
auto x_mod = Flux::modulate(ctx->ggml_ctx, pre_norm->forward(ctx, x), mod.shift, mod.scale);
|
||||
auto qkv_mlp = linear1->forward(ctx, x_mod); // [N, n_token, hidden_size * 3 + mlp_hidden_dim*mlp_mult_factor]
|
||||
|
||||
auto q = ggml_view_3d(ctx->ggml_ctx, qkv_mlp, hidden_size, qkv_mlp->ne[1], qkv_mlp->ne[2], qkv_mlp->nb[1], qkv_mlp->nb[2], 0);
|
||||
auto k = ggml_view_3d(ctx->ggml_ctx, qkv_mlp, hidden_size, qkv_mlp->ne[1], qkv_mlp->ne[2], qkv_mlp->nb[1], qkv_mlp->nb[2], hidden_size * qkv_mlp->nb[0]);
|
||||
auto v = ggml_view_3d(ctx->ggml_ctx, qkv_mlp, hidden_size, qkv_mlp->ne[1], qkv_mlp->ne[2], qkv_mlp->nb[1], qkv_mlp->nb[2], hidden_size * 2 * qkv_mlp->nb[0]);
|
||||
|
||||
int64_t head_dim = hidden_size / num_heads;
|
||||
|
||||
q = ggml_reshape_4d(ctx->ggml_ctx, ggml_cont(ctx->ggml_ctx, q), head_dim, num_heads, q->ne[1], q->ne[2]); // [N, n_token, n_head, d_head]
|
||||
k = ggml_reshape_4d(ctx->ggml_ctx, ggml_cont(ctx->ggml_ctx, k), head_dim, num_heads, k->ne[1], k->ne[2]); // [N, n_token, n_head, d_head]
|
||||
v = ggml_reshape_4d(ctx->ggml_ctx, ggml_cont(ctx->ggml_ctx, v), head_dim, num_heads, v->ne[1], v->ne[2]); // [N, n_token, n_head, d_head]
|
||||
auto q = ggml_view_4d(ctx->ggml_ctx, qkv_mlp, head_dim, num_heads, qkv_mlp->ne[1], qkv_mlp->ne[2],
|
||||
qkv_mlp->nb[0] * head_dim, qkv_mlp->nb[1], qkv_mlp->nb[2], 0);
|
||||
auto k = ggml_view_4d(ctx->ggml_ctx, qkv_mlp, head_dim, num_heads, qkv_mlp->ne[1], qkv_mlp->ne[2],
|
||||
qkv_mlp->nb[0] * head_dim, qkv_mlp->nb[1], qkv_mlp->nb[2], (qkv_mlp->nb[0]) * hidden_size);
|
||||
auto v = ggml_view_4d(ctx->ggml_ctx, qkv_mlp, head_dim, num_heads, qkv_mlp->ne[1], qkv_mlp->ne[2],
|
||||
qkv_mlp->nb[0] * head_dim, qkv_mlp->nb[1], qkv_mlp->nb[2], (qkv_mlp->nb[0]) * 2 * hidden_size);
|
||||
|
||||
q = norm->query_norm(ctx, q);
|
||||
k = norm->key_norm(ctx, k);
|
||||
|
||||
@ -1219,6 +1219,11 @@ __STATIC_INLINE__ struct ggml_tensor* ggml_ext_zeros(struct ggml_context* ctx,
|
||||
return ggml_ext_full(ctx, 0.f, ne0, ne1, ne2, ne3);
|
||||
}
|
||||
|
||||
__STATIC_INLINE__ struct ggml_tensor* ggml_ext_zeros_like(struct ggml_context* ctx,
|
||||
struct ggml_tensor* x) {
|
||||
return ggml_ext_zeros(ctx, x->ne[0], x->ne[1], x->ne[2], x->ne[3]);
|
||||
}
|
||||
|
||||
__STATIC_INLINE__ struct ggml_tensor* ggml_ext_ones(struct ggml_context* ctx,
|
||||
int64_t ne0,
|
||||
int64_t ne1,
|
||||
@ -1227,6 +1232,11 @@ __STATIC_INLINE__ struct ggml_tensor* ggml_ext_ones(struct ggml_context* ctx,
|
||||
return ggml_ext_full(ctx, 1.f, ne0, ne1, ne2, ne3);
|
||||
}
|
||||
|
||||
__STATIC_INLINE__ struct ggml_tensor* ggml_ext_ones_like(struct ggml_context* ctx,
|
||||
struct ggml_tensor* x) {
|
||||
return ggml_ext_ones(ctx, x->ne[0], x->ne[1], x->ne[2], x->ne[3]);
|
||||
}
|
||||
|
||||
__STATIC_INLINE__ ggml_tensor* ggml_ext_cast_f32(ggml_context* ctx, ggml_tensor* a) {
|
||||
#ifdef SD_USE_VULKAN
|
||||
auto zero_index = ggml_get_tensor(ctx, "ggml_runner_build_in_tensor:zero_int");
|
||||
|
||||
@ -404,7 +404,7 @@ namespace Qwen {
|
||||
|
||||
auto t_emb = time_text_embed->forward(ctx, timestep);
|
||||
if (params.zero_cond_t) {
|
||||
auto t_emb_0 = time_text_embed->forward(ctx, ggml_ext_zeros(ctx->ggml_ctx, timestep->ne[0], timestep->ne[1], timestep->ne[2], timestep->ne[3]));
|
||||
auto t_emb_0 = time_text_embed->forward(ctx, ggml_ext_zeros_like(ctx->ggml_ctx, timestep));
|
||||
t_emb = ggml_concat(ctx->ggml_ctx, t_emb, t_emb_0, 1);
|
||||
}
|
||||
auto img = img_in->forward(ctx, x);
|
||||
|
||||
@ -1098,6 +1098,18 @@ public:
|
||||
cond_stage_lora_models.clear();
|
||||
diffusion_lora_models.clear();
|
||||
first_stage_lora_models.clear();
|
||||
if (cond_stage_model) {
|
||||
cond_stage_model->set_weight_adapter(nullptr);
|
||||
}
|
||||
if (diffusion_model) {
|
||||
diffusion_model->set_weight_adapter(nullptr);
|
||||
}
|
||||
if (high_noise_diffusion_model) {
|
||||
high_noise_diffusion_model->set_weight_adapter(nullptr);
|
||||
}
|
||||
if (first_stage_model) {
|
||||
first_stage_model->set_weight_adapter(nullptr);
|
||||
}
|
||||
if (lora_state.empty()) {
|
||||
return;
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user