mirror of
https://github.com/leejet/stable-diffusion.cpp.git
synced 2025-12-13 05:48:56 +00:00
add --vace-strength option
This commit is contained in:
parent
4b9bf2b513
commit
53aeb555bd
@ -14,7 +14,7 @@ struct DiffusionParams {
|
|||||||
struct ggml_tensor* y = NULL;
|
struct ggml_tensor* y = NULL;
|
||||||
struct ggml_tensor* guidance = NULL;
|
struct ggml_tensor* guidance = NULL;
|
||||||
std::vector<ggml_tensor*> ref_latents = {};
|
std::vector<ggml_tensor*> ref_latents = {};
|
||||||
bool increase_ref_index = false;
|
bool increase_ref_index = false;
|
||||||
int num_video_frames = -1;
|
int num_video_frames = -1;
|
||||||
std::vector<struct ggml_tensor*> controls = {};
|
std::vector<struct ggml_tensor*> controls = {};
|
||||||
float control_strength = 0.f;
|
float control_strength = 0.f;
|
||||||
|
|||||||
@ -91,10 +91,10 @@ struct SDParams {
|
|||||||
std::vector<int> high_noise_skip_layers = {7, 8, 9};
|
std::vector<int> high_noise_skip_layers = {7, 8, 9};
|
||||||
sd_sample_params_t high_noise_sample_params;
|
sd_sample_params_t high_noise_sample_params;
|
||||||
|
|
||||||
float moe_boundary = 0.875f;
|
float moe_boundary = 0.875f;
|
||||||
|
int video_frames = 1;
|
||||||
int video_frames = 1;
|
int fps = 16;
|
||||||
int fps = 16;
|
float vace_strength = 1.f;
|
||||||
|
|
||||||
float strength = 0.75f;
|
float strength = 0.75f;
|
||||||
float control_strength = 0.9f;
|
float control_strength = 0.9f;
|
||||||
@ -186,6 +186,7 @@ void print_params(SDParams params) {
|
|||||||
printf(" chroma_use_t5_mask: %s\n", params.chroma_use_t5_mask ? "true" : "false");
|
printf(" chroma_use_t5_mask: %s\n", params.chroma_use_t5_mask ? "true" : "false");
|
||||||
printf(" chroma_t5_mask_pad: %d\n", params.chroma_t5_mask_pad);
|
printf(" chroma_t5_mask_pad: %d\n", params.chroma_t5_mask_pad);
|
||||||
printf(" video_frames: %d\n", params.video_frames);
|
printf(" video_frames: %d\n", params.video_frames);
|
||||||
|
printf(" vace_strength: %.2f\n", params.vace_strength);
|
||||||
printf(" fps: %d\n", params.fps);
|
printf(" fps: %d\n", params.fps);
|
||||||
free(sample_params_str);
|
free(sample_params_str);
|
||||||
free(high_noise_sample_params_str);
|
free(high_noise_sample_params_str);
|
||||||
@ -288,6 +289,7 @@ void print_usage(int argc, const char* argv[]) {
|
|||||||
printf(" --moe-boundary BOUNDARY timestep boundary for Wan2.2 MoE model. (default: 0.875)\n");
|
printf(" --moe-boundary BOUNDARY timestep boundary for Wan2.2 MoE model. (default: 0.875)\n");
|
||||||
printf(" only enabled if `--high-noise-steps` is set to -1\n");
|
printf(" only enabled if `--high-noise-steps` is set to -1\n");
|
||||||
printf(" --flow-shift SHIFT shift value for Flow models like SD3.x or WAN (default: auto)\n");
|
printf(" --flow-shift SHIFT shift value for Flow models like SD3.x or WAN (default: auto)\n");
|
||||||
|
printf(" --vace-strength wan vace strength\n");
|
||||||
printf(" -v, --verbose print extra info\n");
|
printf(" -v, --verbose print extra info\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -523,6 +525,7 @@ void parse_args(int argc, const char** argv, SDParams& params) {
|
|||||||
{"", "--control-strength", "", ¶ms.control_strength},
|
{"", "--control-strength", "", ¶ms.control_strength},
|
||||||
{"", "--moe-boundary", "", ¶ms.moe_boundary},
|
{"", "--moe-boundary", "", ¶ms.moe_boundary},
|
||||||
{"", "--flow-shift", "", ¶ms.flow_shift},
|
{"", "--flow-shift", "", ¶ms.flow_shift},
|
||||||
|
{"", "--vace-strength", "", ¶ms.vace_strength},
|
||||||
};
|
};
|
||||||
|
|
||||||
options.bool_options = {
|
options.bool_options = {
|
||||||
@ -1244,6 +1247,7 @@ int main(int argc, const char* argv[]) {
|
|||||||
params.strength,
|
params.strength,
|
||||||
params.seed,
|
params.seed,
|
||||||
params.video_frames,
|
params.video_frames,
|
||||||
|
params.vace_strength,
|
||||||
};
|
};
|
||||||
|
|
||||||
results = generate_video(sd_ctx, &vid_gen_params, &num_results);
|
results = generate_video(sd_ctx, &vid_gen_params, &num_results);
|
||||||
|
|||||||
@ -1119,15 +1119,15 @@ public:
|
|||||||
}
|
}
|
||||||
|
|
||||||
DiffusionParams diffusion_params;
|
DiffusionParams diffusion_params;
|
||||||
diffusion_params.x = noised_input;
|
diffusion_params.x = noised_input;
|
||||||
diffusion_params.timesteps = timesteps;
|
diffusion_params.timesteps = timesteps;
|
||||||
diffusion_params.guidance = guidance_tensor;
|
diffusion_params.guidance = guidance_tensor;
|
||||||
diffusion_params.ref_latents = ref_latents;
|
diffusion_params.ref_latents = ref_latents;
|
||||||
diffusion_params.increase_ref_index = increase_ref_index;
|
diffusion_params.increase_ref_index = increase_ref_index;
|
||||||
diffusion_params.controls = controls;
|
diffusion_params.controls = controls;
|
||||||
diffusion_params.control_strength = control_strength;
|
diffusion_params.control_strength = control_strength;
|
||||||
diffusion_params.vace_context = vace_context;
|
diffusion_params.vace_context = vace_context;
|
||||||
diffusion_params.vace_strength = vace_strength;
|
diffusion_params.vace_strength = vace_strength;
|
||||||
|
|
||||||
if (start_merge_step == -1 || step <= start_merge_step) {
|
if (start_merge_step == -1 || step <= start_merge_step) {
|
||||||
// cond
|
// cond
|
||||||
@ -1728,6 +1728,7 @@ void sd_vid_gen_params_init(sd_vid_gen_params_t* sd_vid_gen_params) {
|
|||||||
sd_vid_gen_params->seed = -1;
|
sd_vid_gen_params->seed = -1;
|
||||||
sd_vid_gen_params->video_frames = 6;
|
sd_vid_gen_params->video_frames = 6;
|
||||||
sd_vid_gen_params->moe_boundary = 0.875f;
|
sd_vid_gen_params->moe_boundary = 0.875f;
|
||||||
|
sd_vid_gen_params->vace_strength = 1.f;
|
||||||
}
|
}
|
||||||
|
|
||||||
struct sd_ctx_t {
|
struct sd_ctx_t {
|
||||||
@ -2644,7 +2645,8 @@ SD_API sd_image_t* generate_video(sd_ctx_t* sd_ctx, const sd_vid_gen_params_t* s
|
|||||||
{},
|
{},
|
||||||
false,
|
false,
|
||||||
denoise_mask,
|
denoise_mask,
|
||||||
vace_context);
|
vace_context,
|
||||||
|
sd_vid_gen_params->vace_strength);
|
||||||
|
|
||||||
int64_t sampling_end = ggml_time_ms();
|
int64_t sampling_end = ggml_time_ms();
|
||||||
LOG_INFO("sampling(high noise) completed, taking %.2fs", (sampling_end - sampling_start) * 1.0f / 1000);
|
LOG_INFO("sampling(high noise) completed, taking %.2fs", (sampling_end - sampling_start) * 1.0f / 1000);
|
||||||
@ -2678,7 +2680,8 @@ SD_API sd_image_t* generate_video(sd_ctx_t* sd_ctx, const sd_vid_gen_params_t* s
|
|||||||
{},
|
{},
|
||||||
false,
|
false,
|
||||||
denoise_mask,
|
denoise_mask,
|
||||||
vace_context);
|
vace_context,
|
||||||
|
sd_vid_gen_params->vace_strength);
|
||||||
|
|
||||||
int64_t sampling_end = ggml_time_ms();
|
int64_t sampling_end = ggml_time_ms();
|
||||||
LOG_INFO("sampling completed, taking %.2fs", (sampling_end - sampling_start) * 1.0f / 1000);
|
LOG_INFO("sampling completed, taking %.2fs", (sampling_end - sampling_start) * 1.0f / 1000);
|
||||||
|
|||||||
@ -211,6 +211,7 @@ typedef struct {
|
|||||||
float strength;
|
float strength;
|
||||||
int64_t seed;
|
int64_t seed;
|
||||||
int video_frames;
|
int video_frames;
|
||||||
|
float vace_strength;
|
||||||
} sd_vid_gen_params_t;
|
} sd_vid_gen_params_t;
|
||||||
|
|
||||||
typedef struct sd_ctx_t sd_ctx_t;
|
typedef struct sd_ctx_t sd_ctx_t;
|
||||||
|
|||||||
14
wan.hpp
14
wan.hpp
@ -1533,12 +1533,12 @@ namespace WAN {
|
|||||||
}
|
}
|
||||||
|
|
||||||
virtual struct ggml_tensor* forward(struct ggml_context* ctx,
|
virtual struct ggml_tensor* forward(struct ggml_context* ctx,
|
||||||
ggml_backend_t backend,
|
ggml_backend_t backend,
|
||||||
struct ggml_tensor* x,
|
struct ggml_tensor* x,
|
||||||
struct ggml_tensor* e,
|
struct ggml_tensor* e,
|
||||||
struct ggml_tensor* pe,
|
struct ggml_tensor* pe,
|
||||||
struct ggml_tensor* context,
|
struct ggml_tensor* context,
|
||||||
int64_t context_img_len = 257) {
|
int64_t context_img_len = 257) {
|
||||||
// x: [N, n_token, dim]
|
// x: [N, n_token, dim]
|
||||||
// e: [N, 6, dim] or [N, T, 6, dim]
|
// e: [N, 6, dim] or [N, T, 6, dim]
|
||||||
// context: [N, context_img_len + context_txt_len, dim]
|
// context: [N, context_img_len + context_txt_len, dim]
|
||||||
@ -1610,7 +1610,7 @@ namespace WAN {
|
|||||||
}
|
}
|
||||||
|
|
||||||
std::pair<ggml_tensor*, ggml_tensor*> forward(struct ggml_context* ctx,
|
std::pair<ggml_tensor*, ggml_tensor*> forward(struct ggml_context* ctx,
|
||||||
ggml_backend_t backend,
|
ggml_backend_t backend,
|
||||||
struct ggml_tensor* c,
|
struct ggml_tensor* c,
|
||||||
struct ggml_tensor* x,
|
struct ggml_tensor* x,
|
||||||
struct ggml_tensor* e,
|
struct ggml_tensor* e,
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user