feat: make Wan2.2 5B FLF2V work (#1110)

This commit is contained in:
leejet 2026-06-02 23:16:09 +08:00 committed by GitHub
parent 9c7f9a20b3
commit 2d40a8b2ad
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -4880,6 +4880,17 @@ static std::optional<ImageGenerationLatents> prepare_video_generation_latents(sd
latents.denoise_mask = sd::full<float>({latents.init_latent.shape()[0], latents.init_latent.shape()[1], latents.init_latent.shape()[2], 1, 1}, 1.f);
sd::ops::fill_slice(&latents.denoise_mask, 2, 0, init_image_latent.shape()[2], 0.0f);
if (!end_image.empty()) {
auto end_img = end_image.reshape({end_image.shape()[0], end_image.shape()[1], 1, end_image.shape()[2], 1});
auto end_image_latent = sd_ctx->sd->encode_first_stage(end_img); // [b, c, 1, h/vae_scale_factor, w/vae_scale_factor]
if (end_image_latent.empty()) {
LOG_ERROR("failed to encode end video frame");
return std::nullopt;
}
sd::ops::slice_assign(&latents.init_latent, 2, latents.init_latent.shape()[2] - 1, latents.init_latent.shape()[2], end_image_latent);
sd::ops::fill_slice(&latents.denoise_mask, 2, latents.init_latent.shape()[2] - 1, latents.init_latent.shape()[2], 0.0f);
}
int64_t t2 = ggml_time_ms();
LOG_INFO("encode_first_stage completed, taking %" PRId64 " ms", t2 - t1);
} else if (sd_ctx->sd->diffusion_model->get_desc() == "Wan2.1-VACE-1.3B" ||