mirror of
https://github.com/leejet/stable-diffusion.cpp.git
synced 2025-12-13 05:48:56 +00:00
add --vace-strength option
This commit is contained in:
parent
4b9bf2b513
commit
53aeb555bd
@ -14,7 +14,7 @@ struct DiffusionParams {
|
||||
struct ggml_tensor* y = NULL;
|
||||
struct ggml_tensor* guidance = NULL;
|
||||
std::vector<ggml_tensor*> ref_latents = {};
|
||||
bool increase_ref_index = false;
|
||||
bool increase_ref_index = false;
|
||||
int num_video_frames = -1;
|
||||
std::vector<struct ggml_tensor*> controls = {};
|
||||
float control_strength = 0.f;
|
||||
|
||||
@ -91,10 +91,10 @@ struct SDParams {
|
||||
std::vector<int> high_noise_skip_layers = {7, 8, 9};
|
||||
sd_sample_params_t high_noise_sample_params;
|
||||
|
||||
float moe_boundary = 0.875f;
|
||||
|
||||
int video_frames = 1;
|
||||
int fps = 16;
|
||||
float moe_boundary = 0.875f;
|
||||
int video_frames = 1;
|
||||
int fps = 16;
|
||||
float vace_strength = 1.f;
|
||||
|
||||
float strength = 0.75f;
|
||||
float control_strength = 0.9f;
|
||||
@ -186,6 +186,7 @@ void print_params(SDParams params) {
|
||||
printf(" chroma_use_t5_mask: %s\n", params.chroma_use_t5_mask ? "true" : "false");
|
||||
printf(" chroma_t5_mask_pad: %d\n", params.chroma_t5_mask_pad);
|
||||
printf(" video_frames: %d\n", params.video_frames);
|
||||
printf(" vace_strength: %.2f\n", params.vace_strength);
|
||||
printf(" fps: %d\n", params.fps);
|
||||
free(sample_params_str);
|
||||
free(high_noise_sample_params_str);
|
||||
@ -288,6 +289,7 @@ void print_usage(int argc, const char* argv[]) {
|
||||
printf(" --moe-boundary BOUNDARY timestep boundary for Wan2.2 MoE model. (default: 0.875)\n");
|
||||
printf(" only enabled if `--high-noise-steps` is set to -1\n");
|
||||
printf(" --flow-shift SHIFT shift value for Flow models like SD3.x or WAN (default: auto)\n");
|
||||
printf(" --vace-strength wan vace strength\n");
|
||||
printf(" -v, --verbose print extra info\n");
|
||||
}
|
||||
|
||||
@ -523,6 +525,7 @@ void parse_args(int argc, const char** argv, SDParams& params) {
|
||||
{"", "--control-strength", "", ¶ms.control_strength},
|
||||
{"", "--moe-boundary", "", ¶ms.moe_boundary},
|
||||
{"", "--flow-shift", "", ¶ms.flow_shift},
|
||||
{"", "--vace-strength", "", ¶ms.vace_strength},
|
||||
};
|
||||
|
||||
options.bool_options = {
|
||||
@ -1244,6 +1247,7 @@ int main(int argc, const char* argv[]) {
|
||||
params.strength,
|
||||
params.seed,
|
||||
params.video_frames,
|
||||
params.vace_strength,
|
||||
};
|
||||
|
||||
results = generate_video(sd_ctx, &vid_gen_params, &num_results);
|
||||
|
||||
@ -1119,15 +1119,15 @@ public:
|
||||
}
|
||||
|
||||
DiffusionParams diffusion_params;
|
||||
diffusion_params.x = noised_input;
|
||||
diffusion_params.timesteps = timesteps;
|
||||
diffusion_params.guidance = guidance_tensor;
|
||||
diffusion_params.ref_latents = ref_latents;
|
||||
diffusion_params.x = noised_input;
|
||||
diffusion_params.timesteps = timesteps;
|
||||
diffusion_params.guidance = guidance_tensor;
|
||||
diffusion_params.ref_latents = ref_latents;
|
||||
diffusion_params.increase_ref_index = increase_ref_index;
|
||||
diffusion_params.controls = controls;
|
||||
diffusion_params.control_strength = control_strength;
|
||||
diffusion_params.vace_context = vace_context;
|
||||
diffusion_params.vace_strength = vace_strength;
|
||||
diffusion_params.controls = controls;
|
||||
diffusion_params.control_strength = control_strength;
|
||||
diffusion_params.vace_context = vace_context;
|
||||
diffusion_params.vace_strength = vace_strength;
|
||||
|
||||
if (start_merge_step == -1 || step <= start_merge_step) {
|
||||
// cond
|
||||
@ -1728,6 +1728,7 @@ void sd_vid_gen_params_init(sd_vid_gen_params_t* sd_vid_gen_params) {
|
||||
sd_vid_gen_params->seed = -1;
|
||||
sd_vid_gen_params->video_frames = 6;
|
||||
sd_vid_gen_params->moe_boundary = 0.875f;
|
||||
sd_vid_gen_params->vace_strength = 1.f;
|
||||
}
|
||||
|
||||
struct sd_ctx_t {
|
||||
@ -2644,7 +2645,8 @@ SD_API sd_image_t* generate_video(sd_ctx_t* sd_ctx, const sd_vid_gen_params_t* s
|
||||
{},
|
||||
false,
|
||||
denoise_mask,
|
||||
vace_context);
|
||||
vace_context,
|
||||
sd_vid_gen_params->vace_strength);
|
||||
|
||||
int64_t sampling_end = ggml_time_ms();
|
||||
LOG_INFO("sampling(high noise) completed, taking %.2fs", (sampling_end - sampling_start) * 1.0f / 1000);
|
||||
@ -2678,7 +2680,8 @@ SD_API sd_image_t* generate_video(sd_ctx_t* sd_ctx, const sd_vid_gen_params_t* s
|
||||
{},
|
||||
false,
|
||||
denoise_mask,
|
||||
vace_context);
|
||||
vace_context,
|
||||
sd_vid_gen_params->vace_strength);
|
||||
|
||||
int64_t sampling_end = ggml_time_ms();
|
||||
LOG_INFO("sampling completed, taking %.2fs", (sampling_end - sampling_start) * 1.0f / 1000);
|
||||
|
||||
@ -211,6 +211,7 @@ typedef struct {
|
||||
float strength;
|
||||
int64_t seed;
|
||||
int video_frames;
|
||||
float vace_strength;
|
||||
} sd_vid_gen_params_t;
|
||||
|
||||
typedef struct sd_ctx_t sd_ctx_t;
|
||||
|
||||
14
wan.hpp
14
wan.hpp
@ -1533,12 +1533,12 @@ namespace WAN {
|
||||
}
|
||||
|
||||
virtual struct ggml_tensor* forward(struct ggml_context* ctx,
|
||||
ggml_backend_t backend,
|
||||
struct ggml_tensor* x,
|
||||
struct ggml_tensor* e,
|
||||
struct ggml_tensor* pe,
|
||||
struct ggml_tensor* context,
|
||||
int64_t context_img_len = 257) {
|
||||
ggml_backend_t backend,
|
||||
struct ggml_tensor* x,
|
||||
struct ggml_tensor* e,
|
||||
struct ggml_tensor* pe,
|
||||
struct ggml_tensor* context,
|
||||
int64_t context_img_len = 257) {
|
||||
// x: [N, n_token, dim]
|
||||
// e: [N, 6, dim] or [N, T, 6, dim]
|
||||
// context: [N, context_img_len + context_txt_len, dim]
|
||||
@ -1610,7 +1610,7 @@ namespace WAN {
|
||||
}
|
||||
|
||||
std::pair<ggml_tensor*, ggml_tensor*> forward(struct ggml_context* ctx,
|
||||
ggml_backend_t backend,
|
||||
ggml_backend_t backend,
|
||||
struct ggml_tensor* c,
|
||||
struct ggml_tensor* x,
|
||||
struct ggml_tensor* e,
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user