From 196bb895fd0af69ff424f86252ce73f8b02bef7c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20du=20Hamel?= Date: Fri, 12 Dec 2025 02:39:09 +0100 Subject: [PATCH] patch size consistent with Flux1 --- flux.hpp | 1 - stable-diffusion.cpp | 11 +---------- vae.hpp | 4 ++-- 3 files changed, 3 insertions(+), 13 deletions(-) diff --git a/flux.hpp b/flux.hpp index fc30987..df3c4c8 100644 --- a/flux.hpp +++ b/flux.hpp @@ -1309,7 +1309,6 @@ namespace Flux { } else if (sd_version_is_longcat(version)) { flux_params.context_in_dim = 3584; flux_params.vec_in_dim = 0; - flux_params.patch_size = 1; } for (auto pair : tensor_storage_map) { std::string tensor_name = pair.first; diff --git a/stable-diffusion.cpp b/stable-diffusion.cpp index 1e8f04a..f89e426 100644 --- a/stable-diffusion.cpp +++ b/stable-diffusion.cpp @@ -1346,12 +1346,6 @@ public: latent_rgb_bias = flux2_latent_rgb_bias; patch_sz = 2; } - } else if (dim == 64) { - if (sd_version_is_flux(version) || sd_version_is_z_image(version) || sd_version_is_longcat(version)) { - latent_rgb_proj = flux_latent_rgb_proj; - latent_rgb_bias = flux_latent_rgb_bias; - patch_sz = 2; - } } else if (dim == 48) { if (sd_version_is_wan(version)) { latent_rgb_proj = wan_22_latent_rgb_proj; @@ -1916,7 +1910,7 @@ public: int vae_scale_factor = 8; if (version == VERSION_WAN2_2_TI2V) { vae_scale_factor = 16; - } else if (sd_version_is_flux2(version) || sd_version_is_longcat(version)) { + } else if (sd_version_is_flux2(version)) { vae_scale_factor = 16; } else if (version == VERSION_CHROMA_RADIANCE) { vae_scale_factor = 1; @@ -1945,8 +1939,6 @@ public: latent_channel = 3; } else if (sd_version_is_flux2(version)) { latent_channel = 128; - } else if (sd_version_is_longcat(version)) { - latent_channel = 64; } else { latent_channel = 16; } @@ -2247,7 +2239,6 @@ public: sd_version_is_qwen_image(version) || sd_version_is_wan(version) || sd_version_is_flux2(version) || - sd_version_is_longcat(version) || version == VERSION_CHROMA_RADIANCE) { latent = vae_output; } else if (version == VERSION_SD1_PIX2PIX) { diff --git a/vae.hpp b/vae.hpp index 740a565..ad5db1b 100644 --- a/vae.hpp +++ b/vae.hpp @@ -553,7 +553,7 @@ public: struct ggml_tensor* decode(GGMLRunnerContext* ctx, struct ggml_tensor* z) { // z: [N, z_channels, h, w] - if (sd_version_is_flux2(version) || sd_version_is_longcat(version)) { + if (sd_version_is_flux2(version)) { // [N, C*p*p, h, w] -> [N, C, h*p, w*p] int64_t p = 2; @@ -592,7 +592,7 @@ public: auto quant_conv = std::dynamic_pointer_cast(blocks["quant_conv"]); z = quant_conv->forward(ctx, z); // [N, 2*embed_dim, h/8, w/8] } - if (sd_version_is_flux2(version) || sd_version_is_longcat(version)) { + if (sd_version_is_flux2(version)) { z = ggml_ext_chunk(ctx->ggml_ctx, z, 2, 2)[0]; // [N, C, H, W] -> [N, C*p*p, H/p, W/p]