From 80ecc326950bd44e43e2fcd60876a57836117d7d Mon Sep 17 00:00:00 2001 From: leejet Date: Wed, 15 Oct 2025 21:36:58 +0800 Subject: [PATCH] add --force-sdxl-vae-conv-scale option --- examples/cli/main.cpp | 5 +++++ stable-diffusion.cpp | 8 ++++++-- stable-diffusion.h | 1 + 3 files changed, 12 insertions(+), 2 deletions(-) diff --git a/examples/cli/main.cpp b/examples/cli/main.cpp index b1d83a0..474d433 100644 --- a/examples/cli/main.cpp +++ b/examples/cli/main.cpp @@ -128,6 +128,7 @@ struct SDParams { float flow_shift = INFINITY; sd_tiling_params_t vae_tiling_params = {false, 0, 0, 0.5f, 0.0f, 0.0f}; + bool force_sdxl_vae_conv_scale = false; SDParams() { sd_sample_params_init(&sample_params); @@ -194,6 +195,7 @@ void print_params(SDParams params) { printf(" seed: %zd\n", params.seed); printf(" batch_count: %d\n", params.batch_count); printf(" vae_tiling: %s\n", params.vae_tiling_params.enabled ? "true" : "false"); + printf(" force_sdxl_vae_conv_scale: %s\n", params.force_sdxl_vae_conv_scale ? "true" : "false"); printf(" upscale_repeats: %d\n", params.upscale_repeats); printf(" chroma_use_dit_mask: %s\n", params.chroma_use_dit_mask ? "true" : "false"); printf(" chroma_use_t5_mask: %s\n", params.chroma_use_t5_mask ? "true" : "false"); @@ -287,6 +289,7 @@ void print_usage(int argc, const char* argv[]) { printf(" --vae-tile-size [X]x[Y] tile size for vae tiling (default: 32x32)\n"); printf(" --vae-relative-tile-size [X]x[Y] relative tile size for vae tiling, in fraction of image size if < 1, in number of tiles per dim if >=1 (overrides --vae-tile-size)\n"); printf(" --vae-tile-overlap OVERLAP tile overlap for vae tiling, in fraction of tile size (default: 0.5)\n"); + printf(" --force-sdxl-vae-conv-scale force use of conv scale on sdxl vae\n"); printf(" --vae-on-cpu keep vae in cpu (for low vram)\n"); printf(" --clip-on-cpu keep clip in cpu (for low vram)\n"); printf(" --diffusion-fa use flash attention in the diffusion model (for low vram)\n"); @@ -557,6 +560,7 @@ void parse_args(int argc, const char** argv, SDParams& params) { options.bool_options = { {"", "--vae-tiling", "", true, ¶ms.vae_tiling_params.enabled}, + {"", "--force-sdxl-vae-conv-scale", "", true, ¶ms.force_sdxl_vae_conv_scale}, {"", "--offload-to-cpu", "", true, ¶ms.offload_params_to_cpu}, {"", "--control-net-cpu", "", true, ¶ms.control_net_cpu}, {"", "--clip-on-cpu", "", true, ¶ms.clip_on_cpu}, @@ -1361,6 +1365,7 @@ int main(int argc, const char* argv[]) { params.diffusion_flash_attn, params.diffusion_conv_direct, params.vae_conv_direct, + params.force_sdxl_vae_conv_scale, params.chroma_use_dit_mask, params.chroma_use_t5_mask, params.chroma_t5_mask_pad, diff --git a/stable-diffusion.cpp b/stable-diffusion.cpp index 3de9314..8071f6f 100644 --- a/stable-diffusion.cpp +++ b/stable-diffusion.cpp @@ -510,9 +510,13 @@ public: LOG_INFO("Using Conv2d direct in the vae model"); first_stage_model->enable_conv2d_direct(); } - if (version == VERSION_SDXL && strlen(SAFE_STR(sd_ctx_params->vae_path)) == 0) { + if (version == VERSION_SDXL && + (strlen(SAFE_STR(sd_ctx_params->vae_path)) == 0 || sd_ctx_params->force_sdxl_vae_conv_scale)) { float vae_conv_2d_scale = 1.f / 32.f; - LOG_WARN("No VAE specified with --vae, using Conv2D scale %.3f", vae_conv_2d_scale); + LOG_WARN( + "No VAE specified with --vae or --force-sdxl-vae-conv-scale flag set, " + "using Conv2D scale %.3f", + vae_conv_2d_scale); first_stage_model->set_conv2d_scale(vae_conv_2d_scale); } first_stage_model->alloc_params_buffer(); diff --git a/stable-diffusion.h b/stable-diffusion.h index 1d3ed85..4d6af69 100644 --- a/stable-diffusion.h +++ b/stable-diffusion.h @@ -153,6 +153,7 @@ typedef struct { bool diffusion_flash_attn; bool diffusion_conv_direct; bool vae_conv_direct; + bool force_sdxl_vae_conv_scale; bool chroma_use_dit_mask; bool chroma_use_t5_mask; int chroma_t5_mask_pad;