add --vace-strength option

This commit is contained in:
leejet 2025-09-08 23:02:38 +08:00
parent 4b9bf2b513
commit 53aeb555bd
5 changed files with 30 additions and 22 deletions

View File

@ -14,7 +14,7 @@ struct DiffusionParams {
struct ggml_tensor* y = NULL;
struct ggml_tensor* guidance = NULL;
std::vector<ggml_tensor*> ref_latents = {};
bool increase_ref_index = false;
bool increase_ref_index = false;
int num_video_frames = -1;
std::vector<struct ggml_tensor*> controls = {};
float control_strength = 0.f;

View File

@ -91,10 +91,10 @@ struct SDParams {
std::vector<int> high_noise_skip_layers = {7, 8, 9};
sd_sample_params_t high_noise_sample_params;
float moe_boundary = 0.875f;
int video_frames = 1;
int fps = 16;
float moe_boundary = 0.875f;
int video_frames = 1;
int fps = 16;
float vace_strength = 1.f;
float strength = 0.75f;
float control_strength = 0.9f;
@ -186,6 +186,7 @@ void print_params(SDParams params) {
printf(" chroma_use_t5_mask: %s\n", params.chroma_use_t5_mask ? "true" : "false");
printf(" chroma_t5_mask_pad: %d\n", params.chroma_t5_mask_pad);
printf(" video_frames: %d\n", params.video_frames);
printf(" vace_strength: %.2f\n", params.vace_strength);
printf(" fps: %d\n", params.fps);
free(sample_params_str);
free(high_noise_sample_params_str);
@ -288,6 +289,7 @@ void print_usage(int argc, const char* argv[]) {
printf(" --moe-boundary BOUNDARY timestep boundary for Wan2.2 MoE model. (default: 0.875)\n");
printf(" only enabled if `--high-noise-steps` is set to -1\n");
printf(" --flow-shift SHIFT shift value for Flow models like SD3.x or WAN (default: auto)\n");
printf(" --vace-strength wan vace strength\n");
printf(" -v, --verbose print extra info\n");
}
@ -523,6 +525,7 @@ void parse_args(int argc, const char** argv, SDParams& params) {
{"", "--control-strength", "", &params.control_strength},
{"", "--moe-boundary", "", &params.moe_boundary},
{"", "--flow-shift", "", &params.flow_shift},
{"", "--vace-strength", "", &params.vace_strength},
};
options.bool_options = {
@ -1244,6 +1247,7 @@ int main(int argc, const char* argv[]) {
params.strength,
params.seed,
params.video_frames,
params.vace_strength,
};
results = generate_video(sd_ctx, &vid_gen_params, &num_results);

View File

@ -1119,15 +1119,15 @@ public:
}
DiffusionParams diffusion_params;
diffusion_params.x = noised_input;
diffusion_params.timesteps = timesteps;
diffusion_params.guidance = guidance_tensor;
diffusion_params.ref_latents = ref_latents;
diffusion_params.x = noised_input;
diffusion_params.timesteps = timesteps;
diffusion_params.guidance = guidance_tensor;
diffusion_params.ref_latents = ref_latents;
diffusion_params.increase_ref_index = increase_ref_index;
diffusion_params.controls = controls;
diffusion_params.control_strength = control_strength;
diffusion_params.vace_context = vace_context;
diffusion_params.vace_strength = vace_strength;
diffusion_params.controls = controls;
diffusion_params.control_strength = control_strength;
diffusion_params.vace_context = vace_context;
diffusion_params.vace_strength = vace_strength;
if (start_merge_step == -1 || step <= start_merge_step) {
// cond
@ -1728,6 +1728,7 @@ void sd_vid_gen_params_init(sd_vid_gen_params_t* sd_vid_gen_params) {
sd_vid_gen_params->seed = -1;
sd_vid_gen_params->video_frames = 6;
sd_vid_gen_params->moe_boundary = 0.875f;
sd_vid_gen_params->vace_strength = 1.f;
}
struct sd_ctx_t {
@ -2644,7 +2645,8 @@ SD_API sd_image_t* generate_video(sd_ctx_t* sd_ctx, const sd_vid_gen_params_t* s
{},
false,
denoise_mask,
vace_context);
vace_context,
sd_vid_gen_params->vace_strength);
int64_t sampling_end = ggml_time_ms();
LOG_INFO("sampling(high noise) completed, taking %.2fs", (sampling_end - sampling_start) * 1.0f / 1000);
@ -2678,7 +2680,8 @@ SD_API sd_image_t* generate_video(sd_ctx_t* sd_ctx, const sd_vid_gen_params_t* s
{},
false,
denoise_mask,
vace_context);
vace_context,
sd_vid_gen_params->vace_strength);
int64_t sampling_end = ggml_time_ms();
LOG_INFO("sampling completed, taking %.2fs", (sampling_end - sampling_start) * 1.0f / 1000);

View File

@ -211,6 +211,7 @@ typedef struct {
float strength;
int64_t seed;
int video_frames;
float vace_strength;
} sd_vid_gen_params_t;
typedef struct sd_ctx_t sd_ctx_t;

14
wan.hpp
View File

@ -1533,12 +1533,12 @@ namespace WAN {
}
virtual struct ggml_tensor* forward(struct ggml_context* ctx,
ggml_backend_t backend,
struct ggml_tensor* x,
struct ggml_tensor* e,
struct ggml_tensor* pe,
struct ggml_tensor* context,
int64_t context_img_len = 257) {
ggml_backend_t backend,
struct ggml_tensor* x,
struct ggml_tensor* e,
struct ggml_tensor* pe,
struct ggml_tensor* context,
int64_t context_img_len = 257) {
// x: [N, n_token, dim]
// e: [N, 6, dim] or [N, T, 6, dim]
// context: [N, context_img_len + context_txt_len, dim]
@ -1610,7 +1610,7 @@ namespace WAN {
}
std::pair<ggml_tensor*, ggml_tensor*> forward(struct ggml_context* ctx,
ggml_backend_t backend,
ggml_backend_t backend,
struct ggml_tensor* c,
struct ggml_tensor* x,
struct ggml_tensor* e,