add --force-sdxl-vae-conv-scale option

This commit is contained in:
leejet 2025-10-15 21:36:58 +08:00
parent 1d13041aa2
commit 80ecc32695
3 changed files with 12 additions and 2 deletions

View File

@ -128,6 +128,7 @@ struct SDParams {
float flow_shift = INFINITY; float flow_shift = INFINITY;
sd_tiling_params_t vae_tiling_params = {false, 0, 0, 0.5f, 0.0f, 0.0f}; sd_tiling_params_t vae_tiling_params = {false, 0, 0, 0.5f, 0.0f, 0.0f};
bool force_sdxl_vae_conv_scale = false;
SDParams() { SDParams() {
sd_sample_params_init(&sample_params); sd_sample_params_init(&sample_params);
@ -194,6 +195,7 @@ void print_params(SDParams params) {
printf(" seed: %zd\n", params.seed); printf(" seed: %zd\n", params.seed);
printf(" batch_count: %d\n", params.batch_count); printf(" batch_count: %d\n", params.batch_count);
printf(" vae_tiling: %s\n", params.vae_tiling_params.enabled ? "true" : "false"); printf(" vae_tiling: %s\n", params.vae_tiling_params.enabled ? "true" : "false");
printf(" force_sdxl_vae_conv_scale: %s\n", params.force_sdxl_vae_conv_scale ? "true" : "false");
printf(" upscale_repeats: %d\n", params.upscale_repeats); printf(" upscale_repeats: %d\n", params.upscale_repeats);
printf(" chroma_use_dit_mask: %s\n", params.chroma_use_dit_mask ? "true" : "false"); printf(" chroma_use_dit_mask: %s\n", params.chroma_use_dit_mask ? "true" : "false");
printf(" chroma_use_t5_mask: %s\n", params.chroma_use_t5_mask ? "true" : "false"); printf(" chroma_use_t5_mask: %s\n", params.chroma_use_t5_mask ? "true" : "false");
@ -287,6 +289,7 @@ void print_usage(int argc, const char* argv[]) {
printf(" --vae-tile-size [X]x[Y] tile size for vae tiling (default: 32x32)\n"); printf(" --vae-tile-size [X]x[Y] tile size for vae tiling (default: 32x32)\n");
printf(" --vae-relative-tile-size [X]x[Y] relative tile size for vae tiling, in fraction of image size if < 1, in number of tiles per dim if >=1 (overrides --vae-tile-size)\n"); printf(" --vae-relative-tile-size [X]x[Y] relative tile size for vae tiling, in fraction of image size if < 1, in number of tiles per dim if >=1 (overrides --vae-tile-size)\n");
printf(" --vae-tile-overlap OVERLAP tile overlap for vae tiling, in fraction of tile size (default: 0.5)\n"); printf(" --vae-tile-overlap OVERLAP tile overlap for vae tiling, in fraction of tile size (default: 0.5)\n");
printf(" --force-sdxl-vae-conv-scale force use of conv scale on sdxl vae\n");
printf(" --vae-on-cpu keep vae in cpu (for low vram)\n"); printf(" --vae-on-cpu keep vae in cpu (for low vram)\n");
printf(" --clip-on-cpu keep clip in cpu (for low vram)\n"); printf(" --clip-on-cpu keep clip in cpu (for low vram)\n");
printf(" --diffusion-fa use flash attention in the diffusion model (for low vram)\n"); printf(" --diffusion-fa use flash attention in the diffusion model (for low vram)\n");
@ -557,6 +560,7 @@ void parse_args(int argc, const char** argv, SDParams& params) {
options.bool_options = { options.bool_options = {
{"", "--vae-tiling", "", true, &params.vae_tiling_params.enabled}, {"", "--vae-tiling", "", true, &params.vae_tiling_params.enabled},
{"", "--force-sdxl-vae-conv-scale", "", true, &params.force_sdxl_vae_conv_scale},
{"", "--offload-to-cpu", "", true, &params.offload_params_to_cpu}, {"", "--offload-to-cpu", "", true, &params.offload_params_to_cpu},
{"", "--control-net-cpu", "", true, &params.control_net_cpu}, {"", "--control-net-cpu", "", true, &params.control_net_cpu},
{"", "--clip-on-cpu", "", true, &params.clip_on_cpu}, {"", "--clip-on-cpu", "", true, &params.clip_on_cpu},
@ -1361,6 +1365,7 @@ int main(int argc, const char* argv[]) {
params.diffusion_flash_attn, params.diffusion_flash_attn,
params.diffusion_conv_direct, params.diffusion_conv_direct,
params.vae_conv_direct, params.vae_conv_direct,
params.force_sdxl_vae_conv_scale,
params.chroma_use_dit_mask, params.chroma_use_dit_mask,
params.chroma_use_t5_mask, params.chroma_use_t5_mask,
params.chroma_t5_mask_pad, params.chroma_t5_mask_pad,

View File

@ -510,9 +510,13 @@ public:
LOG_INFO("Using Conv2d direct in the vae model"); LOG_INFO("Using Conv2d direct in the vae model");
first_stage_model->enable_conv2d_direct(); first_stage_model->enable_conv2d_direct();
} }
if (version == VERSION_SDXL && strlen(SAFE_STR(sd_ctx_params->vae_path)) == 0) { if (version == VERSION_SDXL &&
(strlen(SAFE_STR(sd_ctx_params->vae_path)) == 0 || sd_ctx_params->force_sdxl_vae_conv_scale)) {
float vae_conv_2d_scale = 1.f / 32.f; float vae_conv_2d_scale = 1.f / 32.f;
LOG_WARN("No VAE specified with --vae, using Conv2D scale %.3f", vae_conv_2d_scale); LOG_WARN(
"No VAE specified with --vae or --force-sdxl-vae-conv-scale flag set, "
"using Conv2D scale %.3f",
vae_conv_2d_scale);
first_stage_model->set_conv2d_scale(vae_conv_2d_scale); first_stage_model->set_conv2d_scale(vae_conv_2d_scale);
} }
first_stage_model->alloc_params_buffer(); first_stage_model->alloc_params_buffer();

View File

@ -153,6 +153,7 @@ typedef struct {
bool diffusion_flash_attn; bool diffusion_flash_attn;
bool diffusion_conv_direct; bool diffusion_conv_direct;
bool vae_conv_direct; bool vae_conv_direct;
bool force_sdxl_vae_conv_scale;
bool chroma_use_dit_mask; bool chroma_use_dit_mask;
bool chroma_use_t5_mask; bool chroma_use_t5_mask;
int chroma_t5_mask_pad; int chroma_t5_mask_pad;