mirror of
https://github.com/leejet/stable-diffusion.cpp.git
synced 2026-03-24 02:08:51 +00:00
Compare commits
No commits in common. "0752cc9d3a8ddee25b72ca65d061a28310d97c02" and "c9cd49701a91cb10a07f07512d1bb5b966f05c6e" have entirely different histories.
0752cc9d3a
...
c9cd49701a
@ -44,6 +44,7 @@ Context Options:
|
|||||||
CPU physical cores
|
CPU physical cores
|
||||||
--chroma-t5-mask-pad <int> t5 mask pad size of chroma
|
--chroma-t5-mask-pad <int> t5 mask pad size of chroma
|
||||||
--vae-tile-overlap <float> tile overlap for vae tiling, in fraction of tile size (default: 0.5)
|
--vae-tile-overlap <float> tile overlap for vae tiling, in fraction of tile size (default: 0.5)
|
||||||
|
--flow-shift <float> shift value for Flow models like SD3.x or WAN (default: auto)
|
||||||
--vae-tiling process vae in tiles to reduce memory usage
|
--vae-tiling process vae in tiles to reduce memory usage
|
||||||
--force-sdxl-vae-conv-scale force use of conv scale on sdxl vae
|
--force-sdxl-vae-conv-scale force use of conv scale on sdxl vae
|
||||||
--offload-to-cpu place the weights in RAM to save VRAM, and automatically load them into VRAM when needed
|
--offload-to-cpu place the weights in RAM to save VRAM, and automatically load them into VRAM when needed
|
||||||
@ -108,7 +109,6 @@ Generation Options:
|
|||||||
--skip-layer-start <float> SLG enabling point (default: 0.01)
|
--skip-layer-start <float> SLG enabling point (default: 0.01)
|
||||||
--skip-layer-end <float> SLG disabling point (default: 0.2)
|
--skip-layer-end <float> SLG disabling point (default: 0.2)
|
||||||
--eta <float> eta in DDIM, only for DDIM/TCD/res_multistep/res_2s (default: 0)
|
--eta <float> eta in DDIM, only for DDIM/TCD/res_multistep/res_2s (default: 0)
|
||||||
--flow-shift <float> shift value for Flow models like SD3.x or WAN (default: auto)
|
|
||||||
--high-noise-cfg-scale <float> (high noise) unconditional guidance scale: (default: 7.0)
|
--high-noise-cfg-scale <float> (high noise) unconditional guidance scale: (default: 7.0)
|
||||||
--high-noise-img-cfg-scale <float> (high noise) image guidance scale for inpaint or instruct-pix2pix models (default: same as --cfg-scale)
|
--high-noise-img-cfg-scale <float> (high noise) image guidance scale for inpaint or instruct-pix2pix models (default: same as --cfg-scale)
|
||||||
--high-noise-guidance <float> (high noise) distilled guidance scale for models with guidance input (default: 3.5)
|
--high-noise-guidance <float> (high noise) distilled guidance scale for models with guidance input (default: 3.5)
|
||||||
|
|||||||
@ -581,6 +581,10 @@ struct SDContextParams {
|
|||||||
"--vae-tile-overlap",
|
"--vae-tile-overlap",
|
||||||
"tile overlap for vae tiling, in fraction of tile size (default: 0.5)",
|
"tile overlap for vae tiling, in fraction of tile size (default: 0.5)",
|
||||||
&vae_tiling_params.target_overlap},
|
&vae_tiling_params.target_overlap},
|
||||||
|
{"",
|
||||||
|
"--flow-shift",
|
||||||
|
"shift value for Flow models like SD3.x or WAN (default: auto)",
|
||||||
|
&flow_shift},
|
||||||
};
|
};
|
||||||
|
|
||||||
options.bool_options = {
|
options.bool_options = {
|
||||||
@ -899,6 +903,7 @@ struct SDContextParams {
|
|||||||
<< " photo_maker_path: \"" << photo_maker_path << "\",\n"
|
<< " photo_maker_path: \"" << photo_maker_path << "\",\n"
|
||||||
<< " rng_type: " << sd_rng_type_name(rng_type) << ",\n"
|
<< " rng_type: " << sd_rng_type_name(rng_type) << ",\n"
|
||||||
<< " sampler_rng_type: " << sd_rng_type_name(sampler_rng_type) << ",\n"
|
<< " sampler_rng_type: " << sd_rng_type_name(sampler_rng_type) << ",\n"
|
||||||
|
<< " flow_shift: " << (std::isinf(flow_shift) ? "INF" : std::to_string(flow_shift)) << "\n"
|
||||||
<< " offload_params_to_cpu: " << (offload_params_to_cpu ? "true" : "false") << ",\n"
|
<< " offload_params_to_cpu: " << (offload_params_to_cpu ? "true" : "false") << ",\n"
|
||||||
<< " enable_mmap: " << (enable_mmap ? "true" : "false") << ",\n"
|
<< " enable_mmap: " << (enable_mmap ? "true" : "false") << ",\n"
|
||||||
<< " control_net_cpu: " << (control_net_cpu ? "true" : "false") << ",\n"
|
<< " control_net_cpu: " << (control_net_cpu ? "true" : "false") << ",\n"
|
||||||
@ -981,6 +986,7 @@ struct SDContextParams {
|
|||||||
chroma_use_t5_mask,
|
chroma_use_t5_mask,
|
||||||
chroma_t5_mask_pad,
|
chroma_t5_mask_pad,
|
||||||
qwen_image_zero_cond_t,
|
qwen_image_zero_cond_t,
|
||||||
|
flow_shift,
|
||||||
};
|
};
|
||||||
return sd_ctx_params;
|
return sd_ctx_params;
|
||||||
}
|
}
|
||||||
@ -1200,10 +1206,6 @@ struct SDGenerationParams {
|
|||||||
"--eta",
|
"--eta",
|
||||||
"eta in DDIM, only for DDIM and TCD (default: 0)",
|
"eta in DDIM, only for DDIM and TCD (default: 0)",
|
||||||
&sample_params.eta},
|
&sample_params.eta},
|
||||||
{"",
|
|
||||||
"--flow-shift",
|
|
||||||
"shift value for Flow models like SD3.x or WAN (default: auto)",
|
|
||||||
&sample_params.flow_shift},
|
|
||||||
{"",
|
{"",
|
||||||
"--high-noise-cfg-scale",
|
"--high-noise-cfg-scale",
|
||||||
"(high noise) unconditional guidance scale: (default: 7.0)",
|
"(high noise) unconditional guidance scale: (default: 7.0)",
|
||||||
@ -1604,7 +1606,6 @@ struct SDGenerationParams {
|
|||||||
load_if_exists("cfg_scale", sample_params.guidance.txt_cfg);
|
load_if_exists("cfg_scale", sample_params.guidance.txt_cfg);
|
||||||
load_if_exists("img_cfg_scale", sample_params.guidance.img_cfg);
|
load_if_exists("img_cfg_scale", sample_params.guidance.img_cfg);
|
||||||
load_if_exists("guidance", sample_params.guidance.distilled_guidance);
|
load_if_exists("guidance", sample_params.guidance.distilled_guidance);
|
||||||
load_if_exists("flow_shift", sample_params.flow_shift);
|
|
||||||
|
|
||||||
auto load_sampler_if_exists = [&](const char* key, enum sample_method_t& out) {
|
auto load_sampler_if_exists = [&](const char* key, enum sample_method_t& out) {
|
||||||
if (j.contains(key) && j[key].is_string()) {
|
if (j.contains(key) && j[key].is_string()) {
|
||||||
|
|||||||
@ -36,6 +36,7 @@ Context Options:
|
|||||||
CPU physical cores
|
CPU physical cores
|
||||||
--chroma-t5-mask-pad <int> t5 mask pad size of chroma
|
--chroma-t5-mask-pad <int> t5 mask pad size of chroma
|
||||||
--vae-tile-overlap <float> tile overlap for vae tiling, in fraction of tile size (default: 0.5)
|
--vae-tile-overlap <float> tile overlap for vae tiling, in fraction of tile size (default: 0.5)
|
||||||
|
--flow-shift <float> shift value for Flow models like SD3.x or WAN (default: auto)
|
||||||
--vae-tiling process vae in tiles to reduce memory usage
|
--vae-tiling process vae in tiles to reduce memory usage
|
||||||
--force-sdxl-vae-conv-scale force use of conv scale on sdxl vae
|
--force-sdxl-vae-conv-scale force use of conv scale on sdxl vae
|
||||||
--offload-to-cpu place the weights in RAM to save VRAM, and automatically load them into VRAM when needed
|
--offload-to-cpu place the weights in RAM to save VRAM, and automatically load them into VRAM when needed
|
||||||
@ -100,7 +101,6 @@ Default Generation Options:
|
|||||||
--skip-layer-start <float> SLG enabling point (default: 0.01)
|
--skip-layer-start <float> SLG enabling point (default: 0.01)
|
||||||
--skip-layer-end <float> SLG disabling point (default: 0.2)
|
--skip-layer-end <float> SLG disabling point (default: 0.2)
|
||||||
--eta <float> eta in DDIM, only for DDIM/TCD/res_multistep/res_2s (default: 0)
|
--eta <float> eta in DDIM, only for DDIM/TCD/res_multistep/res_2s (default: 0)
|
||||||
--flow-shift <float> shift value for Flow models like SD3.x or WAN (default: auto)
|
|
||||||
--high-noise-cfg-scale <float> (high noise) unconditional guidance scale: (default: 7.0)
|
--high-noise-cfg-scale <float> (high noise) unconditional guidance scale: (default: 7.0)
|
||||||
--high-noise-img-cfg-scale <float> (high noise) image guidance scale for inpaint or instruct-pix2pix models (default: same as --cfg-scale)
|
--high-noise-img-cfg-scale <float> (high noise) image guidance scale for inpaint or instruct-pix2pix models (default: same as --cfg-scale)
|
||||||
--high-noise-guidance <float> (high noise) distilled guidance scale for models with guidance input (default: 3.5)
|
--high-noise-guidance <float> (high noise) distilled guidance scale for models with guidance input (default: 3.5)
|
||||||
|
|||||||
@ -201,6 +201,7 @@ typedef struct {
|
|||||||
bool chroma_use_t5_mask;
|
bool chroma_use_t5_mask;
|
||||||
int chroma_t5_mask_pad;
|
int chroma_t5_mask_pad;
|
||||||
bool qwen_image_zero_cond_t;
|
bool qwen_image_zero_cond_t;
|
||||||
|
float flow_shift;
|
||||||
} sd_ctx_params_t;
|
} sd_ctx_params_t;
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
@ -234,7 +235,6 @@ typedef struct {
|
|||||||
int shifted_timestep;
|
int shifted_timestep;
|
||||||
float* custom_sigmas;
|
float* custom_sigmas;
|
||||||
int custom_sigmas_count;
|
int custom_sigmas_count;
|
||||||
float flow_shift;
|
|
||||||
} sd_sample_params_t;
|
} sd_sample_params_t;
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
|
|||||||
@ -657,8 +657,9 @@ struct DiscreteFlowDenoiser : public Denoiser {
|
|||||||
|
|
||||||
float sigma_data = 1.0f;
|
float sigma_data = 1.0f;
|
||||||
|
|
||||||
DiscreteFlowDenoiser(float shift = 3.0f) {
|
DiscreteFlowDenoiser(float shift = 3.0f)
|
||||||
set_shift(shift);
|
: shift(shift) {
|
||||||
|
set_parameters();
|
||||||
}
|
}
|
||||||
|
|
||||||
void set_parameters() {
|
void set_parameters() {
|
||||||
@ -667,11 +668,6 @@ struct DiscreteFlowDenoiser : public Denoiser {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void set_shift(float shift) {
|
|
||||||
this->shift = shift;
|
|
||||||
set_parameters();
|
|
||||||
}
|
|
||||||
|
|
||||||
float sigma_min() override {
|
float sigma_min() override {
|
||||||
return sigmas[0];
|
return sigmas[0];
|
||||||
}
|
}
|
||||||
@ -714,8 +710,34 @@ float flux_time_shift(float mu, float sigma, float t) {
|
|||||||
return ::expf(mu) / (::expf(mu) + ::powf((1.0f / t - 1.0f), sigma));
|
return ::expf(mu) / (::expf(mu) + ::powf((1.0f / t - 1.0f), sigma));
|
||||||
}
|
}
|
||||||
|
|
||||||
struct FluxFlowDenoiser : public DiscreteFlowDenoiser {
|
struct FluxFlowDenoiser : public Denoiser {
|
||||||
FluxFlowDenoiser() = default;
|
float sigmas[TIMESTEPS];
|
||||||
|
float shift = 1.15f;
|
||||||
|
|
||||||
|
float sigma_data = 1.0f;
|
||||||
|
|
||||||
|
FluxFlowDenoiser(float shift = 1.15f) {
|
||||||
|
set_parameters(shift);
|
||||||
|
}
|
||||||
|
|
||||||
|
void set_shift(float shift) {
|
||||||
|
this->shift = shift;
|
||||||
|
}
|
||||||
|
|
||||||
|
void set_parameters(float shift) {
|
||||||
|
set_shift(shift);
|
||||||
|
for (int i = 0; i < TIMESTEPS; i++) {
|
||||||
|
sigmas[i] = t_to_sigma(static_cast<float>(i));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
float sigma_min() override {
|
||||||
|
return sigmas[0];
|
||||||
|
}
|
||||||
|
|
||||||
|
float sigma_max() override {
|
||||||
|
return sigmas[TIMESTEPS - 1];
|
||||||
|
}
|
||||||
|
|
||||||
float sigma_to_t(float sigma) override {
|
float sigma_to_t(float sigma) override {
|
||||||
return sigma;
|
return sigma;
|
||||||
@ -725,6 +747,26 @@ struct FluxFlowDenoiser : public DiscreteFlowDenoiser {
|
|||||||
t = t + 1;
|
t = t + 1;
|
||||||
return flux_time_shift(shift, 1.0f, t / TIMESTEPS);
|
return flux_time_shift(shift, 1.0f, t / TIMESTEPS);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::vector<float> get_scalings(float sigma) override {
|
||||||
|
float c_skip = 1.0f;
|
||||||
|
float c_out = -sigma;
|
||||||
|
float c_in = 1.0f;
|
||||||
|
return {c_skip, c_out, c_in};
|
||||||
|
}
|
||||||
|
|
||||||
|
// this function will modify noise/latent
|
||||||
|
ggml_tensor* noise_scaling(float sigma, ggml_tensor* noise, ggml_tensor* latent) override {
|
||||||
|
ggml_ext_tensor_scale_inplace(noise, sigma);
|
||||||
|
ggml_ext_tensor_scale_inplace(latent, 1.0f - sigma);
|
||||||
|
ggml_ext_tensor_add_inplace(latent, noise);
|
||||||
|
return latent;
|
||||||
|
}
|
||||||
|
|
||||||
|
ggml_tensor* inverse_noise_scaling(float sigma, ggml_tensor* latent) override {
|
||||||
|
ggml_ext_tensor_scale_inplace(latent, 1.0f / (1.0f - sigma));
|
||||||
|
return latent;
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
struct Flux2FlowDenoiser : public FluxFlowDenoiser {
|
struct Flux2FlowDenoiser : public FluxFlowDenoiser {
|
||||||
|
|||||||
@ -115,7 +115,6 @@ public:
|
|||||||
int n_threads = -1;
|
int n_threads = -1;
|
||||||
float scale_factor = 0.18215f;
|
float scale_factor = 0.18215f;
|
||||||
float shift_factor = 0.f;
|
float shift_factor = 0.f;
|
||||||
float default_flow_shift = INFINITY;
|
|
||||||
|
|
||||||
std::shared_ptr<Conditioner> cond_stage_model;
|
std::shared_ptr<Conditioner> cond_stage_model;
|
||||||
std::shared_ptr<FrozenCLIPVisionEmbedder> clip_vision; // for svd or wan2.1 i2v
|
std::shared_ptr<FrozenCLIPVisionEmbedder> clip_vision; // for svd or wan2.1 i2v
|
||||||
@ -882,6 +881,7 @@ public:
|
|||||||
// init denoiser
|
// init denoiser
|
||||||
{
|
{
|
||||||
prediction_t pred_type = sd_ctx_params->prediction;
|
prediction_t pred_type = sd_ctx_params->prediction;
|
||||||
|
float flow_shift = sd_ctx_params->flow_shift;
|
||||||
|
|
||||||
if (pred_type == PREDICTION_COUNT) {
|
if (pred_type == PREDICTION_COUNT) {
|
||||||
if (sd_version_is_sd2(version)) {
|
if (sd_version_is_sd2(version)) {
|
||||||
@ -906,19 +906,22 @@ public:
|
|||||||
sd_version_is_qwen_image(version) ||
|
sd_version_is_qwen_image(version) ||
|
||||||
sd_version_is_z_image(version)) {
|
sd_version_is_z_image(version)) {
|
||||||
pred_type = FLOW_PRED;
|
pred_type = FLOW_PRED;
|
||||||
if (sd_version_is_wan(version)) {
|
if (flow_shift == INFINITY) {
|
||||||
default_flow_shift = 5.f;
|
if (sd_version_is_wan(version)) {
|
||||||
} else {
|
flow_shift = 5.f;
|
||||||
default_flow_shift = 3.f;
|
} else {
|
||||||
|
flow_shift = 3.f;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
} else if (sd_version_is_flux(version)) {
|
} else if (sd_version_is_flux(version)) {
|
||||||
pred_type = FLUX_FLOW_PRED;
|
pred_type = FLUX_FLOW_PRED;
|
||||||
|
|
||||||
default_flow_shift = 1.0f; // TODO: validate
|
if (flow_shift == INFINITY) {
|
||||||
for (const auto& [name, tensor_storage] : tensor_storage_map) {
|
flow_shift = 1.0f; // TODO: validate
|
||||||
if (starts_with(name, "model.diffusion_model.guidance_in.in_layer.weight")) {
|
for (const auto& [name, tensor_storage] : tensor_storage_map) {
|
||||||
default_flow_shift = 1.15f;
|
if (starts_with(name, "model.diffusion_model.guidance_in.in_layer.weight")) {
|
||||||
break;
|
flow_shift = 1.15f;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else if (sd_version_is_flux2(version)) {
|
} else if (sd_version_is_flux2(version)) {
|
||||||
@ -942,12 +945,12 @@ public:
|
|||||||
break;
|
break;
|
||||||
case FLOW_PRED: {
|
case FLOW_PRED: {
|
||||||
LOG_INFO("running in FLOW mode");
|
LOG_INFO("running in FLOW mode");
|
||||||
denoiser = std::make_shared<DiscreteFlowDenoiser>();
|
denoiser = std::make_shared<DiscreteFlowDenoiser>(flow_shift);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case FLUX_FLOW_PRED: {
|
case FLUX_FLOW_PRED: {
|
||||||
LOG_INFO("running in Flux FLOW mode");
|
LOG_INFO("running in Flux FLOW mode");
|
||||||
denoiser = std::make_shared<FluxFlowDenoiser>();
|
denoiser = std::make_shared<FluxFlowDenoiser>(flow_shift);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case FLUX2_FLOW_PRED: {
|
case FLUX2_FLOW_PRED: {
|
||||||
@ -2708,16 +2711,6 @@ public:
|
|||||||
ggml_ext_tensor_clamp_inplace(result, 0.0f, 1.0f);
|
ggml_ext_tensor_clamp_inplace(result, 0.0f, 1.0f);
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
void set_flow_shift(float flow_shift = INFINITY) {
|
|
||||||
auto flow_denoiser = std::dynamic_pointer_cast<DiscreteFlowDenoiser>(denoiser);
|
|
||||||
if (flow_denoiser) {
|
|
||||||
if (flow_shift == INFINITY) {
|
|
||||||
flow_shift = default_flow_shift;
|
|
||||||
}
|
|
||||||
flow_denoiser->set_shift(flow_shift);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
};
|
};
|
||||||
|
|
||||||
/*================================================= SD API ==================================================*/
|
/*================================================= SD API ==================================================*/
|
||||||
@ -2938,6 +2931,7 @@ void sd_ctx_params_init(sd_ctx_params_t* sd_ctx_params) {
|
|||||||
sd_ctx_params->chroma_use_dit_mask = true;
|
sd_ctx_params->chroma_use_dit_mask = true;
|
||||||
sd_ctx_params->chroma_use_t5_mask = false;
|
sd_ctx_params->chroma_use_t5_mask = false;
|
||||||
sd_ctx_params->chroma_t5_mask_pad = 1;
|
sd_ctx_params->chroma_t5_mask_pad = 1;
|
||||||
|
sd_ctx_params->flow_shift = INFINITY;
|
||||||
}
|
}
|
||||||
|
|
||||||
char* sd_ctx_params_to_str(const sd_ctx_params_t* sd_ctx_params) {
|
char* sd_ctx_params_to_str(const sd_ctx_params_t* sd_ctx_params) {
|
||||||
@ -3029,7 +3023,6 @@ void sd_sample_params_init(sd_sample_params_t* sample_params) {
|
|||||||
sample_params->sample_steps = 20;
|
sample_params->sample_steps = 20;
|
||||||
sample_params->custom_sigmas = nullptr;
|
sample_params->custom_sigmas = nullptr;
|
||||||
sample_params->custom_sigmas_count = 0;
|
sample_params->custom_sigmas_count = 0;
|
||||||
sample_params->flow_shift = INFINITY;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
char* sd_sample_params_to_str(const sd_sample_params_t* sample_params) {
|
char* sd_sample_params_to_str(const sd_sample_params_t* sample_params) {
|
||||||
@ -3050,8 +3043,7 @@ char* sd_sample_params_to_str(const sd_sample_params_t* sample_params) {
|
|||||||
"sample_method: %s, "
|
"sample_method: %s, "
|
||||||
"sample_steps: %d, "
|
"sample_steps: %d, "
|
||||||
"eta: %.2f, "
|
"eta: %.2f, "
|
||||||
"shifted_timestep: %d, "
|
"shifted_timestep: %d)",
|
||||||
"flow_shift: %.2f)",
|
|
||||||
sample_params->guidance.txt_cfg,
|
sample_params->guidance.txt_cfg,
|
||||||
std::isfinite(sample_params->guidance.img_cfg)
|
std::isfinite(sample_params->guidance.img_cfg)
|
||||||
? sample_params->guidance.img_cfg
|
? sample_params->guidance.img_cfg
|
||||||
@ -3065,8 +3057,7 @@ char* sd_sample_params_to_str(const sd_sample_params_t* sample_params) {
|
|||||||
sd_sample_method_name(sample_params->sample_method),
|
sd_sample_method_name(sample_params->sample_method),
|
||||||
sample_params->sample_steps,
|
sample_params->sample_steps,
|
||||||
sample_params->eta,
|
sample_params->eta,
|
||||||
sample_params->shifted_timestep,
|
sample_params->shifted_timestep);
|
||||||
sample_params->flow_shift);
|
|
||||||
|
|
||||||
return buf;
|
return buf;
|
||||||
}
|
}
|
||||||
@ -3537,8 +3528,6 @@ sd_image_t* generate_image(sd_ctx_t* sd_ctx, const sd_img_gen_params_t* sd_img_g
|
|||||||
|
|
||||||
size_t t0 = ggml_time_ms();
|
size_t t0 = ggml_time_ms();
|
||||||
|
|
||||||
sd_ctx->sd->set_flow_shift(sd_img_gen_params->sample_params.flow_shift);
|
|
||||||
|
|
||||||
// Apply lora
|
// Apply lora
|
||||||
sd_ctx->sd->apply_loras(sd_img_gen_params->loras, sd_img_gen_params->lora_count);
|
sd_ctx->sd->apply_loras(sd_img_gen_params->loras, sd_img_gen_params->lora_count);
|
||||||
|
|
||||||
@ -3814,8 +3803,6 @@ SD_API sd_image_t* generate_video(sd_ctx_t* sd_ctx, const sd_vid_gen_params_t* s
|
|||||||
}
|
}
|
||||||
LOG_INFO("generate_video %dx%dx%d", width, height, frames);
|
LOG_INFO("generate_video %dx%dx%d", width, height, frames);
|
||||||
|
|
||||||
sd_ctx->sd->set_flow_shift(sd_vid_gen_params->sample_params.flow_shift);
|
|
||||||
|
|
||||||
enum sample_method_t sample_method = sd_vid_gen_params->sample_params.sample_method;
|
enum sample_method_t sample_method = sd_vid_gen_params->sample_params.sample_method;
|
||||||
if (sample_method == SAMPLE_METHOD_COUNT) {
|
if (sample_method == SAMPLE_METHOD_COUNT) {
|
||||||
sample_method = sd_get_default_sample_method(sd_ctx);
|
sample_method = sd_get_default_sample_method(sd_ctx);
|
||||||
|
|||||||
@ -141,7 +141,7 @@ public:
|
|||||||
v = ggml_reshape_3d(ctx->ggml_ctx, v, c, h * w, n); // [N, h * w, in_channels]
|
v = ggml_reshape_3d(ctx->ggml_ctx, v, c, h * w, n); // [N, h * w, in_channels]
|
||||||
}
|
}
|
||||||
|
|
||||||
h_ = ggml_ext_attention_ext(ctx->ggml_ctx, ctx->backend, q, k, v, 1, nullptr, false, ctx->flash_attn_enabled);
|
h_ = ggml_ext_attention_ext(ctx->ggml_ctx, ctx->backend, q, k, v, 1, nullptr, true, ctx->flash_attn_enabled);
|
||||||
|
|
||||||
if (use_linear) {
|
if (use_linear) {
|
||||||
h_ = proj_out->forward(ctx, h_); // [N, h * w, in_channels]
|
h_ = proj_out->forward(ctx, h_); // [N, h * w, in_channels]
|
||||||
|
|||||||
@ -572,8 +572,8 @@ namespace WAN {
|
|||||||
auto v = qkv_vec[2];
|
auto v = qkv_vec[2];
|
||||||
v = ggml_reshape_3d(ctx->ggml_ctx, v, h * w, c, n); // [t, c, h * w]
|
v = ggml_reshape_3d(ctx->ggml_ctx, v, h * w, c, n); // [t, c, h * w]
|
||||||
|
|
||||||
v = ggml_cont(ctx->ggml_ctx, ggml_ext_torch_permute(ctx->ggml_ctx, v, 1, 0, 2, 3)); // [t, h * w, c]
|
v = ggml_cont(ctx->ggml_ctx, ggml_ext_torch_permute(ctx->ggml_ctx, v, 1, 0, 2, 3)); // [t, h * w, c]
|
||||||
x = ggml_ext_attention_ext(ctx->ggml_ctx, ctx->backend, q, k, v, 1, nullptr, false, ctx->flash_attn_enabled); // [t, h * w, c]
|
x = ggml_ext_attention_ext(ctx->ggml_ctx, ctx->backend, q, k, v, 1, nullptr, true, ctx->flash_attn_enabled); // [t, h * w, c]
|
||||||
|
|
||||||
x = ggml_ext_cont(ctx->ggml_ctx, ggml_permute(ctx->ggml_ctx, x, 1, 0, 2, 3)); // [t, c, h * w]
|
x = ggml_ext_cont(ctx->ggml_ctx, ggml_permute(ctx->ggml_ctx, x, 1, 0, 2, 3)); // [t, c, h * w]
|
||||||
x = ggml_reshape_4d(ctx->ggml_ctx, x, w, h, c, n); // [t, c, h, w]
|
x = ggml_reshape_4d(ctx->ggml_ctx, x, w, h, c, n); // [t, c, h, w]
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user