mirror of
https://github.com/leejet/stable-diffusion.cpp.git
synced 2025-12-31 17:53:35 +00:00
Compare commits
No commits in common. "79426d578ee976fe4b43887be229a3a050f7dd4c" and "0ebe6fe118f125665939b27c89f34ed38716bff8" have entirely different histories.
79426d578e
...
0ebe6fe118
9
.github/workflows/build.yml
vendored
9
.github/workflows/build.yml
vendored
@ -296,10 +296,6 @@ jobs:
|
|||||||
pattern: sd-*
|
pattern: sd-*
|
||||||
merge-multiple: true
|
merge-multiple: true
|
||||||
|
|
||||||
- name: Get commit count
|
|
||||||
id: commit_count
|
|
||||||
run: echo "count=$(git rev-list --count HEAD)" >> $GITHUB_OUTPUT
|
|
||||||
|
|
||||||
- name: Get commit hash
|
- name: Get commit hash
|
||||||
id: commit
|
id: commit
|
||||||
uses: pr-mpt/actions-commit-hash@v2
|
uses: pr-mpt/actions-commit-hash@v2
|
||||||
@ -310,10 +306,7 @@ jobs:
|
|||||||
env:
|
env:
|
||||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||||
with:
|
with:
|
||||||
tag_name: >
|
tag_name: ${{ env.BRANCH_NAME }}-${{ steps.commit.outputs.short }}
|
||||||
${{ github.ref_name == 'master' &&
|
|
||||||
format('release_{0}_{1}', steps.commit_count.outputs.count, steps.commit.outputs.short) ||
|
|
||||||
format('{0}-{1}', env.BRANCH_NAME, steps.commit.outputs.short) }}
|
|
||||||
|
|
||||||
- name: Upload release
|
- name: Upload release
|
||||||
id: upload_release
|
id: upload_release
|
||||||
|
|||||||
@ -326,10 +326,9 @@ arguments:
|
|||||||
--skip-layers LAYERS Layers to skip for SLG steps: (default: [7,8,9])
|
--skip-layers LAYERS Layers to skip for SLG steps: (default: [7,8,9])
|
||||||
--skip-layer-start START SLG enabling point: (default: 0.01)
|
--skip-layer-start START SLG enabling point: (default: 0.01)
|
||||||
--skip-layer-end END SLG disabling point: (default: 0.2)
|
--skip-layer-end END SLG disabling point: (default: 0.2)
|
||||||
--scheduler {discrete, karras, exponential, ays, gits, smoothstep, sgm_uniform, simple} Denoiser sigma scheduler (default: discrete)
|
--scheduler {discrete, karras, exponential, ays, gits, smoothstep} Denoiser sigma scheduler (default: discrete)
|
||||||
--sampling-method {euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m, dpm++2mv2, ipndm, ipndm_v, lcm, ddim_trailing, tcd}
|
--sampling-method {euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m, dpm++2mv2, ipndm, ipndm_v, lcm, ddim_trailing, tcd}
|
||||||
sampling method (default: "euler" for Flux/SD3/Wan, "euler_a" otherwise)
|
sampling method (default: "euler" for Flux/SD3/Wan, "euler_a" otherwise)
|
||||||
--timestep-shift N shift timestep for NitroFusion models, default: 0, recommended N for NitroSD-Realism around 250 and 500 for NitroSD-Vibrant
|
|
||||||
--steps STEPS number of sample steps (default: 20)
|
--steps STEPS number of sample steps (default: 20)
|
||||||
--high-noise-cfg-scale SCALE (high noise) unconditional guidance scale: (default: 7.0)
|
--high-noise-cfg-scale SCALE (high noise) unconditional guidance scale: (default: 7.0)
|
||||||
--high-noise-img-cfg-scale SCALE (high noise) image guidance scale for inpaint or instruct-pix2pix models: (default: same as --cfg-scale)
|
--high-noise-img-cfg-scale SCALE (high noise) image guidance scale for inpaint or instruct-pix2pix models: (default: same as --cfg-scale)
|
||||||
@ -340,7 +339,7 @@ arguments:
|
|||||||
--high-noise-skip-layers LAYERS (high noise) Layers to skip for SLG steps: (default: [7,8,9])
|
--high-noise-skip-layers LAYERS (high noise) Layers to skip for SLG steps: (default: [7,8,9])
|
||||||
--high-noise-skip-layer-start (high noise) SLG enabling point: (default: 0.01)
|
--high-noise-skip-layer-start (high noise) SLG enabling point: (default: 0.01)
|
||||||
--high-noise-skip-layer-end END (high noise) SLG disabling point: (default: 0.2)
|
--high-noise-skip-layer-end END (high noise) SLG disabling point: (default: 0.2)
|
||||||
--high-noise-scheduler {discrete, karras, exponential, ays, gits, smoothstep, sgm_uniform, simple} Denoiser sigma scheduler (default: discrete)
|
--high-noise-scheduler {discrete, karras, exponential, ays, gits, smoothstep} Denoiser sigma scheduler (default: discrete)
|
||||||
--high-noise-sampling-method {euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m, dpm++2mv2, ipndm, ipndm_v, lcm, ddim_trailing, tcd}
|
--high-noise-sampling-method {euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m, dpm++2mv2, ipndm, ipndm_v, lcm, ddim_trailing, tcd}
|
||||||
(high noise) sampling method (default: "euler_a")
|
(high noise) sampling method (default: "euler_a")
|
||||||
--high-noise-steps STEPS (high noise) number of sample steps (default: -1 = auto)
|
--high-noise-steps STEPS (high noise) number of sample steps (default: -1 = auto)
|
||||||
@ -353,7 +352,7 @@ arguments:
|
|||||||
--rng {std_default, cuda} RNG (default: cuda)
|
--rng {std_default, cuda} RNG (default: cuda)
|
||||||
-s SEED, --seed SEED RNG seed (default: 42, use random seed for < 0)
|
-s SEED, --seed SEED RNG seed (default: 42, use random seed for < 0)
|
||||||
-b, --batch-count COUNT number of images to generate
|
-b, --batch-count COUNT number of images to generate
|
||||||
--clip-skip N ignore last layers of CLIP network; 1 ignores none, 2 ignores one layer (default: -1)
|
--clip-skip N ignore last_dot_pos layers of CLIP network; 1 ignores none, 2 ignores one layer (default: -1)
|
||||||
<= 0 represents unspecified, will be 1 for SD1.x, 2 for SD2.x
|
<= 0 represents unspecified, will be 1 for SD1.x, 2 for SD2.x
|
||||||
--vae-tiling process vae in tiles to reduce memory usage
|
--vae-tiling process vae in tiles to reduce memory usage
|
||||||
--vae-tile-size [X]x[Y] tile size for vae tiling (default: 32x32)
|
--vae-tile-size [X]x[Y] tile size for vae tiling (default: 32x32)
|
||||||
|
|||||||
69
denoiser.hpp
69
denoiser.hpp
@ -232,25 +232,6 @@ struct GITSSchedule : SigmaSchedule {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
struct SGMUniformSchedule : SigmaSchedule {
|
|
||||||
std::vector<float> get_sigmas(uint32_t n, float sigma_min_in, float sigma_max_in, t_to_sigma_t t_to_sigma_func) override {
|
|
||||||
std::vector<float> result;
|
|
||||||
if (n == 0) {
|
|
||||||
result.push_back(0.0f);
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
result.reserve(n + 1);
|
|
||||||
int t_max = TIMESTEPS - 1;
|
|
||||||
int t_min = 0;
|
|
||||||
std::vector<float> timesteps = linear_space(static_cast<float>(t_max), static_cast<float>(t_min), n + 1);
|
|
||||||
for (int i = 0; i < n; i++) {
|
|
||||||
result.push_back(t_to_sigma_func(timesteps[i]));
|
|
||||||
}
|
|
||||||
result.push_back(0.0f);
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
struct KarrasSchedule : SigmaSchedule {
|
struct KarrasSchedule : SigmaSchedule {
|
||||||
std::vector<float> get_sigmas(uint32_t n, float sigma_min, float sigma_max, t_to_sigma_t t_to_sigma) {
|
std::vector<float> get_sigmas(uint32_t n, float sigma_min, float sigma_max, t_to_sigma_t t_to_sigma) {
|
||||||
// These *COULD* be function arguments here,
|
// These *COULD* be function arguments here,
|
||||||
@ -270,35 +251,6 @@ struct KarrasSchedule : SigmaSchedule {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
struct SimpleSchedule : SigmaSchedule {
|
|
||||||
std::vector<float> get_sigmas(uint32_t n, float sigma_min, float sigma_max, t_to_sigma_t t_to_sigma) override {
|
|
||||||
std::vector<float> result_sigmas;
|
|
||||||
|
|
||||||
if (n == 0) {
|
|
||||||
return result_sigmas;
|
|
||||||
}
|
|
||||||
|
|
||||||
result_sigmas.reserve(n + 1);
|
|
||||||
|
|
||||||
int model_sigmas_len = TIMESTEPS;
|
|
||||||
|
|
||||||
float step_factor = static_cast<float>(model_sigmas_len) / static_cast<float>(n);
|
|
||||||
|
|
||||||
for (uint32_t i = 0; i < n; ++i) {
|
|
||||||
int offset_from_start_of_py_array = static_cast<int>(static_cast<float>(i) * step_factor);
|
|
||||||
int timestep_index = model_sigmas_len - 1 - offset_from_start_of_py_array;
|
|
||||||
|
|
||||||
if (timestep_index < 0) {
|
|
||||||
timestep_index = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
result_sigmas.push_back(t_to_sigma(static_cast<float>(timestep_index)));
|
|
||||||
}
|
|
||||||
result_sigmas.push_back(0.0f);
|
|
||||||
return result_sigmas;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
// Close to Beta Schedule, but increadably simple in code.
|
// Close to Beta Schedule, but increadably simple in code.
|
||||||
struct SmoothStepSchedule : SigmaSchedule {
|
struct SmoothStepSchedule : SigmaSchedule {
|
||||||
static constexpr float smoothstep(float x) {
|
static constexpr float smoothstep(float x) {
|
||||||
@ -770,6 +722,7 @@ static void sample_k_diffusion(sample_method_t method,
|
|||||||
} break;
|
} break;
|
||||||
case DPMPP2S_A: {
|
case DPMPP2S_A: {
|
||||||
struct ggml_tensor* noise = ggml_dup_tensor(work_ctx, x);
|
struct ggml_tensor* noise = ggml_dup_tensor(work_ctx, x);
|
||||||
|
struct ggml_tensor* d = ggml_dup_tensor(work_ctx, x);
|
||||||
struct ggml_tensor* x2 = ggml_dup_tensor(work_ctx, x);
|
struct ggml_tensor* x2 = ggml_dup_tensor(work_ctx, x);
|
||||||
|
|
||||||
for (int i = 0; i < steps; i++) {
|
for (int i = 0; i < steps; i++) {
|
||||||
@ -784,15 +737,22 @@ static void sample_k_diffusion(sample_method_t method,
|
|||||||
auto sigma_fn = [](float t) -> float { return exp(-t); };
|
auto sigma_fn = [](float t) -> float { return exp(-t); };
|
||||||
|
|
||||||
if (sigma_down == 0) {
|
if (sigma_down == 0) {
|
||||||
// d = (x - denoised) / sigmas[i];
|
// Euler step
|
||||||
// dt = sigma_down - sigmas[i];
|
float* vec_d = (float*)d->data;
|
||||||
// x += d * dt;
|
|
||||||
// => x = denoised
|
|
||||||
float* vec_x = (float*)x->data;
|
float* vec_x = (float*)x->data;
|
||||||
float* vec_denoised = (float*)denoised->data;
|
float* vec_denoised = (float*)denoised->data;
|
||||||
|
|
||||||
for (int j = 0; j < ggml_nelements(x); j++) {
|
for (int j = 0; j < ggml_nelements(d); j++) {
|
||||||
vec_x[j] = vec_denoised[j];
|
vec_d[j] = (vec_x[j] - vec_denoised[j]) / sigmas[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: If sigma_down == 0, isn't this wrong?
|
||||||
|
// But
|
||||||
|
// https://github.com/crowsonkb/k-diffusion/blob/master/k_diffusion/sampling.py#L525
|
||||||
|
// has this exactly the same way.
|
||||||
|
float dt = sigma_down - sigmas[i];
|
||||||
|
for (int j = 0; j < ggml_nelements(d); j++) {
|
||||||
|
vec_x[j] = vec_x[j] + vec_d[j] * dt;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// DPM-Solver++(2S)
|
// DPM-Solver++(2S)
|
||||||
@ -801,6 +761,7 @@ static void sample_k_diffusion(sample_method_t method,
|
|||||||
float h = t_next - t;
|
float h = t_next - t;
|
||||||
float s = t + 0.5f * h;
|
float s = t + 0.5f * h;
|
||||||
|
|
||||||
|
float* vec_d = (float*)d->data;
|
||||||
float* vec_x = (float*)x->data;
|
float* vec_x = (float*)x->data;
|
||||||
float* vec_x2 = (float*)x2->data;
|
float* vec_x2 = (float*)x2->data;
|
||||||
float* vec_denoised = (float*)denoised->data;
|
float* vec_denoised = (float*)denoised->data;
|
||||||
|
|||||||
@ -248,10 +248,9 @@ void print_usage(int argc, const char* argv[]) {
|
|||||||
printf(" --skip-layers LAYERS Layers to skip for SLG steps: (default: [7,8,9])\n");
|
printf(" --skip-layers LAYERS Layers to skip for SLG steps: (default: [7,8,9])\n");
|
||||||
printf(" --skip-layer-start START SLG enabling point: (default: 0.01)\n");
|
printf(" --skip-layer-start START SLG enabling point: (default: 0.01)\n");
|
||||||
printf(" --skip-layer-end END SLG disabling point: (default: 0.2)\n");
|
printf(" --skip-layer-end END SLG disabling point: (default: 0.2)\n");
|
||||||
printf(" --scheduler {discrete, karras, exponential, ays, gits, smoothstep, sgm_uniform, simple} Denoiser sigma scheduler (default: discrete)\n");
|
printf(" --scheduler {discrete, karras, exponential, ays, gits, smoothstep} Denoiser sigma scheduler (default: discrete)\n");
|
||||||
printf(" --sampling-method {euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m, dpm++2mv2, ipndm, ipndm_v, lcm, ddim_trailing, tcd}\n");
|
printf(" --sampling-method {euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m, dpm++2mv2, ipndm, ipndm_v, lcm, ddim_trailing, tcd}\n");
|
||||||
printf(" sampling method (default: \"euler\" for Flux/SD3/Wan, \"euler_a\" otherwise)\n");
|
printf(" sampling method (default: \"euler\" for Flux/SD3/Wan, \"euler_a\" otherwise)\n");
|
||||||
printf(" --timestep-shift N shift timestep for NitroFusion models, default: 0, recommended N for NitroSD-Realism around 250 and 500 for NitroSD-Vibrant\n");
|
|
||||||
printf(" --steps STEPS number of sample steps (default: 20)\n");
|
printf(" --steps STEPS number of sample steps (default: 20)\n");
|
||||||
printf(" --high-noise-cfg-scale SCALE (high noise) unconditional guidance scale: (default: 7.0)\n");
|
printf(" --high-noise-cfg-scale SCALE (high noise) unconditional guidance scale: (default: 7.0)\n");
|
||||||
printf(" --high-noise-img-cfg-scale SCALE (high noise) image guidance scale for inpaint or instruct-pix2pix models: (default: same as --cfg-scale)\n");
|
printf(" --high-noise-img-cfg-scale SCALE (high noise) image guidance scale for inpaint or instruct-pix2pix models: (default: same as --cfg-scale)\n");
|
||||||
@ -262,7 +261,7 @@ void print_usage(int argc, const char* argv[]) {
|
|||||||
printf(" --high-noise-skip-layers LAYERS (high noise) Layers to skip for SLG steps: (default: [7,8,9])\n");
|
printf(" --high-noise-skip-layers LAYERS (high noise) Layers to skip for SLG steps: (default: [7,8,9])\n");
|
||||||
printf(" --high-noise-skip-layer-start (high noise) SLG enabling point: (default: 0.01)\n");
|
printf(" --high-noise-skip-layer-start (high noise) SLG enabling point: (default: 0.01)\n");
|
||||||
printf(" --high-noise-skip-layer-end END (high noise) SLG disabling point: (default: 0.2)\n");
|
printf(" --high-noise-skip-layer-end END (high noise) SLG disabling point: (default: 0.2)\n");
|
||||||
printf(" --high-noise-scheduler {discrete, karras, exponential, ays, gits, smoothstep, sgm_uniform, simple} Denoiser sigma scheduler (default: discrete)\n");
|
printf(" --high-noise-scheduler {discrete, karras, exponential, ays, gits, smoothstep} Denoiser sigma scheduler (default: discrete)\n");
|
||||||
printf(" --high-noise-sampling-method {euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m, dpm++2mv2, ipndm, ipndm_v, lcm, ddim_trailing, tcd}\n");
|
printf(" --high-noise-sampling-method {euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m, dpm++2mv2, ipndm, ipndm_v, lcm, ddim_trailing, tcd}\n");
|
||||||
printf(" (high noise) sampling method (default: \"euler_a\")\n");
|
printf(" (high noise) sampling method (default: \"euler_a\")\n");
|
||||||
printf(" --high-noise-steps STEPS (high noise) number of sample steps (default: -1 = auto)\n");
|
printf(" --high-noise-steps STEPS (high noise) number of sample steps (default: -1 = auto)\n");
|
||||||
@ -275,7 +274,7 @@ void print_usage(int argc, const char* argv[]) {
|
|||||||
printf(" --rng {std_default, cuda} RNG (default: cuda)\n");
|
printf(" --rng {std_default, cuda} RNG (default: cuda)\n");
|
||||||
printf(" -s SEED, --seed SEED RNG seed (default: 42, use random seed for < 0)\n");
|
printf(" -s SEED, --seed SEED RNG seed (default: 42, use random seed for < 0)\n");
|
||||||
printf(" -b, --batch-count COUNT number of images to generate\n");
|
printf(" -b, --batch-count COUNT number of images to generate\n");
|
||||||
printf(" --clip-skip N ignore last layers of CLIP network; 1 ignores none, 2 ignores one layer (default: -1)\n");
|
printf(" --clip-skip N ignore last_dot_pos layers of CLIP network; 1 ignores none, 2 ignores one layer (default: -1)\n");
|
||||||
printf(" <= 0 represents unspecified, will be 1 for SD1.x, 2 for SD2.x\n");
|
printf(" <= 0 represents unspecified, will be 1 for SD1.x, 2 for SD2.x\n");
|
||||||
printf(" --vae-tiling process vae in tiles to reduce memory usage\n");
|
printf(" --vae-tiling process vae in tiles to reduce memory usage\n");
|
||||||
printf(" --vae-tile-size [X]x[Y] tile size for vae tiling (default: 32x32)\n");
|
printf(" --vae-tile-size [X]x[Y] tile size for vae tiling (default: 32x32)\n");
|
||||||
@ -521,7 +520,6 @@ void parse_args(int argc, const char** argv, SDParams& params) {
|
|||||||
{"", "--chroma-t5-mask-pad", "", ¶ms.chroma_t5_mask_pad},
|
{"", "--chroma-t5-mask-pad", "", ¶ms.chroma_t5_mask_pad},
|
||||||
{"", "--video-frames", "", ¶ms.video_frames},
|
{"", "--video-frames", "", ¶ms.video_frames},
|
||||||
{"", "--fps", "", ¶ms.fps},
|
{"", "--fps", "", ¶ms.fps},
|
||||||
{"", "--timestep-shift", "", ¶ms.sample_params.shifted_timestep},
|
|
||||||
};
|
};
|
||||||
|
|
||||||
options.float_options = {
|
options.float_options = {
|
||||||
@ -877,11 +875,6 @@ void parse_args(int argc, const char** argv, SDParams& params) {
|
|||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (params.sample_params.shifted_timestep < 0 || params.sample_params.shifted_timestep > 1000) {
|
|
||||||
fprintf(stderr, "error: timestep-shift must be between 0 and 1000\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (params.upscale_repeats < 1) {
|
if (params.upscale_repeats < 1) {
|
||||||
fprintf(stderr, "error: upscale multiplier must be at least 1\n");
|
fprintf(stderr, "error: upscale multiplier must be at least 1\n");
|
||||||
exit(1);
|
exit(1);
|
||||||
|
|||||||
@ -747,16 +747,6 @@ public:
|
|||||||
denoiser->scheduler = std::make_shared<GITSSchedule>();
|
denoiser->scheduler = std::make_shared<GITSSchedule>();
|
||||||
denoiser->scheduler->version = version;
|
denoiser->scheduler->version = version;
|
||||||
break;
|
break;
|
||||||
case SGM_UNIFORM:
|
|
||||||
LOG_INFO("Running with SGM Uniform schedule");
|
|
||||||
denoiser->scheduler = std::make_shared<SGMUniformSchedule>();
|
|
||||||
denoiser->scheduler->version = version;
|
|
||||||
break;
|
|
||||||
case SIMPLE:
|
|
||||||
LOG_INFO("Running with Simple schedule");
|
|
||||||
denoiser->scheduler = std::make_shared<SimpleSchedule>();
|
|
||||||
denoiser->scheduler->version = version;
|
|
||||||
break;
|
|
||||||
case SMOOTHSTEP:
|
case SMOOTHSTEP:
|
||||||
LOG_INFO("Running with SmoothStep scheduler");
|
LOG_INFO("Running with SmoothStep scheduler");
|
||||||
denoiser->scheduler = std::make_shared<SmoothStepSchedule>();
|
denoiser->scheduler = std::make_shared<SmoothStepSchedule>();
|
||||||
@ -1043,7 +1033,6 @@ public:
|
|||||||
float control_strength,
|
float control_strength,
|
||||||
sd_guidance_params_t guidance,
|
sd_guidance_params_t guidance,
|
||||||
float eta,
|
float eta,
|
||||||
int shifted_timestep,
|
|
||||||
sample_method_t method,
|
sample_method_t method,
|
||||||
const std::vector<float>& sigmas,
|
const std::vector<float>& sigmas,
|
||||||
int start_merge_step,
|
int start_merge_step,
|
||||||
@ -1053,10 +1042,6 @@ public:
|
|||||||
ggml_tensor* denoise_mask = NULL,
|
ggml_tensor* denoise_mask = NULL,
|
||||||
ggml_tensor* vace_context = NULL,
|
ggml_tensor* vace_context = NULL,
|
||||||
float vace_strength = 1.f) {
|
float vace_strength = 1.f) {
|
||||||
if (shifted_timestep > 0 && !sd_version_is_sdxl(version)) {
|
|
||||||
LOG_WARN("timestep shifting is only supported for SDXL models!");
|
|
||||||
shifted_timestep = 0;
|
|
||||||
}
|
|
||||||
std::vector<int> skip_layers(guidance.slg.layers, guidance.slg.layers + guidance.slg.layer_count);
|
std::vector<int> skip_layers(guidance.slg.layers, guidance.slg.layers + guidance.slg.layer_count);
|
||||||
|
|
||||||
float cfg_scale = guidance.txt_cfg;
|
float cfg_scale = guidance.txt_cfg;
|
||||||
@ -1117,17 +1102,7 @@ public:
|
|||||||
float c_in = scaling[2];
|
float c_in = scaling[2];
|
||||||
|
|
||||||
float t = denoiser->sigma_to_t(sigma);
|
float t = denoiser->sigma_to_t(sigma);
|
||||||
std::vector<float> timesteps_vec;
|
std::vector<float> timesteps_vec(1, t); // [N, ]
|
||||||
if (shifted_timestep > 0 && sd_version_is_sdxl(version)) {
|
|
||||||
float shifted_t_float = t * (float(shifted_timestep) / float(TIMESTEPS));
|
|
||||||
int64_t shifted_t = static_cast<int64_t>(roundf(shifted_t_float));
|
|
||||||
shifted_t = std::max((int64_t)0, std::min((int64_t)(TIMESTEPS - 1), shifted_t));
|
|
||||||
LOG_DEBUG("shifting timestep from %.2f to %" PRId64 " (sigma: %.4f)", t, shifted_t, sigma);
|
|
||||||
timesteps_vec.assign(1, (float)shifted_t);
|
|
||||||
} else {
|
|
||||||
timesteps_vec.assign(1, t);
|
|
||||||
}
|
|
||||||
|
|
||||||
timesteps_vec = process_timesteps(timesteps_vec, init_latent, denoise_mask);
|
timesteps_vec = process_timesteps(timesteps_vec, init_latent, denoise_mask);
|
||||||
auto timesteps = vector_to_ggml_tensor(work_ctx, timesteps_vec);
|
auto timesteps = vector_to_ggml_tensor(work_ctx, timesteps_vec);
|
||||||
std::vector<float> guidance_vec(1, guidance.distilled_guidance);
|
std::vector<float> guidance_vec(1, guidance.distilled_guidance);
|
||||||
@ -1225,19 +1200,6 @@ public:
|
|||||||
float* vec_input = (float*)input->data;
|
float* vec_input = (float*)input->data;
|
||||||
float* positive_data = (float*)out_cond->data;
|
float* positive_data = (float*)out_cond->data;
|
||||||
int ne_elements = (int)ggml_nelements(denoised);
|
int ne_elements = (int)ggml_nelements(denoised);
|
||||||
|
|
||||||
if (shifted_timestep > 0 && sd_version_is_sdxl(version)) {
|
|
||||||
int64_t shifted_t_idx = static_cast<int64_t>(roundf(timesteps_vec[0]));
|
|
||||||
float shifted_sigma = denoiser->t_to_sigma((float)shifted_t_idx);
|
|
||||||
std::vector<float> shifted_scaling = denoiser->get_scalings(shifted_sigma);
|
|
||||||
float shifted_c_skip = shifted_scaling[0];
|
|
||||||
float shifted_c_out = shifted_scaling[1];
|
|
||||||
float shifted_c_in = shifted_scaling[2];
|
|
||||||
|
|
||||||
c_skip = shifted_c_skip * c_in / shifted_c_in;
|
|
||||||
c_out = shifted_c_out;
|
|
||||||
}
|
|
||||||
|
|
||||||
for (int i = 0; i < ne_elements; i++) {
|
for (int i = 0; i < ne_elements; i++) {
|
||||||
float latent_result = positive_data[i];
|
float latent_result = positive_data[i];
|
||||||
if (has_unconditioned) {
|
if (has_unconditioned) {
|
||||||
@ -1260,7 +1222,6 @@ public:
|
|||||||
// denoised = (v * c_out + input * c_skip) or (input + eps * c_out)
|
// denoised = (v * c_out + input * c_skip) or (input + eps * c_out)
|
||||||
vec_denoised[i] = latent_result * c_out + vec_input[i] * c_skip;
|
vec_denoised[i] = latent_result * c_out + vec_input[i] * c_skip;
|
||||||
}
|
}
|
||||||
|
|
||||||
int64_t t1 = ggml_time_us();
|
int64_t t1 = ggml_time_us();
|
||||||
if (step > 0) {
|
if (step > 0) {
|
||||||
pretty_progress(step, (int)steps, (t1 - t0) / 1000000.f);
|
pretty_progress(step, (int)steps, (t1 - t0) / 1000000.f);
|
||||||
@ -1362,15 +1323,15 @@ public:
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (!use_tiny_autoencoder) {
|
if (!use_tiny_autoencoder) {
|
||||||
|
float tile_overlap;
|
||||||
|
int tile_size_x, tile_size_y;
|
||||||
|
// multiply tile size for encode to keep the compute buffer size consistent
|
||||||
|
get_tile_sizes(tile_size_x, tile_size_y, tile_overlap, vae_tiling_params, W, H, 1.30539f);
|
||||||
|
|
||||||
|
LOG_DEBUG("VAE Tile size: %dx%d", tile_size_x, tile_size_y);
|
||||||
|
|
||||||
process_vae_input_tensor(x);
|
process_vae_input_tensor(x);
|
||||||
if (vae_tiling_params.enabled && !encode_video) {
|
if (vae_tiling_params.enabled && !encode_video) {
|
||||||
float tile_overlap;
|
|
||||||
int tile_size_x, tile_size_y;
|
|
||||||
// multiply tile size for encode to keep the compute buffer size consistent
|
|
||||||
get_tile_sizes(tile_size_x, tile_size_y, tile_overlap, vae_tiling_params, W, H, 1.30539f);
|
|
||||||
|
|
||||||
LOG_DEBUG("VAE Tile size: %dx%d", tile_size_x, tile_size_y);
|
|
||||||
|
|
||||||
auto on_tiling = [&](ggml_tensor* in, ggml_tensor* out, bool init) {
|
auto on_tiling = [&](ggml_tensor* in, ggml_tensor* out, bool init) {
|
||||||
first_stage_model->compute(n_threads, in, false, &out, work_ctx);
|
first_stage_model->compute(n_threads, in, false, &out, work_ctx);
|
||||||
};
|
};
|
||||||
@ -1507,15 +1468,15 @@ public:
|
|||||||
}
|
}
|
||||||
int64_t t0 = ggml_time_ms();
|
int64_t t0 = ggml_time_ms();
|
||||||
if (!use_tiny_autoencoder) {
|
if (!use_tiny_autoencoder) {
|
||||||
|
float tile_overlap;
|
||||||
|
int tile_size_x, tile_size_y;
|
||||||
|
get_tile_sizes(tile_size_x, tile_size_y, tile_overlap, vae_tiling_params, x->ne[0], x->ne[1]);
|
||||||
|
|
||||||
|
LOG_DEBUG("VAE Tile size: %dx%d", tile_size_x, tile_size_y);
|
||||||
|
|
||||||
process_latent_out(x);
|
process_latent_out(x);
|
||||||
// x = load_tensor_from_file(work_ctx, "wan_vae_z.bin");
|
// x = load_tensor_from_file(work_ctx, "wan_vae_z.bin");
|
||||||
if (vae_tiling_params.enabled && !decode_video) {
|
if (vae_tiling_params.enabled && !decode_video) {
|
||||||
float tile_overlap;
|
|
||||||
int tile_size_x, tile_size_y;
|
|
||||||
get_tile_sizes(tile_size_x, tile_size_y, tile_overlap, vae_tiling_params, x->ne[0], x->ne[1]);
|
|
||||||
|
|
||||||
LOG_DEBUG("VAE Tile size: %dx%d", tile_size_x, tile_size_y);
|
|
||||||
|
|
||||||
// split latent in 32x32 tiles and compute in several steps
|
// split latent in 32x32 tiles and compute in several steps
|
||||||
auto on_tiling = [&](ggml_tensor* in, ggml_tensor* out, bool init) {
|
auto on_tiling = [&](ggml_tensor* in, ggml_tensor* out, bool init) {
|
||||||
first_stage_model->compute(n_threads, in, true, &out, NULL);
|
first_stage_model->compute(n_threads, in, true, &out, NULL);
|
||||||
@ -1627,8 +1588,6 @@ const char* schedule_to_str[] = {
|
|||||||
"exponential",
|
"exponential",
|
||||||
"ays",
|
"ays",
|
||||||
"gits",
|
"gits",
|
||||||
"sgm_uniform",
|
|
||||||
"simple",
|
|
||||||
"smoothstep",
|
"smoothstep",
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -1761,8 +1720,7 @@ char* sd_sample_params_to_str(const sd_sample_params_t* sample_params) {
|
|||||||
"scheduler: %s, "
|
"scheduler: %s, "
|
||||||
"sample_method: %s, "
|
"sample_method: %s, "
|
||||||
"sample_steps: %d, "
|
"sample_steps: %d, "
|
||||||
"eta: %.2f, "
|
"eta: %.2f)",
|
||||||
"shifted_timestep: %d)",
|
|
||||||
sample_params->guidance.txt_cfg,
|
sample_params->guidance.txt_cfg,
|
||||||
sample_params->guidance.img_cfg,
|
sample_params->guidance.img_cfg,
|
||||||
sample_params->guidance.distilled_guidance,
|
sample_params->guidance.distilled_guidance,
|
||||||
@ -1773,8 +1731,7 @@ char* sd_sample_params_to_str(const sd_sample_params_t* sample_params) {
|
|||||||
sd_schedule_name(sample_params->scheduler),
|
sd_schedule_name(sample_params->scheduler),
|
||||||
sd_sample_method_name(sample_params->sample_method),
|
sd_sample_method_name(sample_params->sample_method),
|
||||||
sample_params->sample_steps,
|
sample_params->sample_steps,
|
||||||
sample_params->eta,
|
sample_params->eta);
|
||||||
sample_params->shifted_timestep);
|
|
||||||
|
|
||||||
return buf;
|
return buf;
|
||||||
}
|
}
|
||||||
@ -1906,7 +1863,6 @@ sd_image_t* generate_image_internal(sd_ctx_t* sd_ctx,
|
|||||||
int clip_skip,
|
int clip_skip,
|
||||||
sd_guidance_params_t guidance,
|
sd_guidance_params_t guidance,
|
||||||
float eta,
|
float eta,
|
||||||
int shifted_timestep,
|
|
||||||
int width,
|
int width,
|
||||||
int height,
|
int height,
|
||||||
enum sample_method_t sample_method,
|
enum sample_method_t sample_method,
|
||||||
@ -2145,7 +2101,6 @@ sd_image_t* generate_image_internal(sd_ctx_t* sd_ctx,
|
|||||||
control_strength,
|
control_strength,
|
||||||
guidance,
|
guidance,
|
||||||
eta,
|
eta,
|
||||||
shifted_timestep,
|
|
||||||
sample_method,
|
sample_method,
|
||||||
sigmas,
|
sigmas,
|
||||||
start_merge_step,
|
start_merge_step,
|
||||||
@ -2439,7 +2394,6 @@ sd_image_t* generate_image(sd_ctx_t* sd_ctx, const sd_img_gen_params_t* sd_img_g
|
|||||||
sd_img_gen_params->clip_skip,
|
sd_img_gen_params->clip_skip,
|
||||||
sd_img_gen_params->sample_params.guidance,
|
sd_img_gen_params->sample_params.guidance,
|
||||||
sd_img_gen_params->sample_params.eta,
|
sd_img_gen_params->sample_params.eta,
|
||||||
sd_img_gen_params->sample_params.shifted_timestep,
|
|
||||||
width,
|
width,
|
||||||
height,
|
height,
|
||||||
sample_method,
|
sample_method,
|
||||||
@ -2780,7 +2734,6 @@ SD_API sd_image_t* generate_video(sd_ctx_t* sd_ctx, const sd_vid_gen_params_t* s
|
|||||||
0,
|
0,
|
||||||
sd_vid_gen_params->high_noise_sample_params.guidance,
|
sd_vid_gen_params->high_noise_sample_params.guidance,
|
||||||
sd_vid_gen_params->high_noise_sample_params.eta,
|
sd_vid_gen_params->high_noise_sample_params.eta,
|
||||||
sd_vid_gen_params->high_noise_sample_params.shifted_timestep,
|
|
||||||
sd_vid_gen_params->high_noise_sample_params.sample_method,
|
sd_vid_gen_params->high_noise_sample_params.sample_method,
|
||||||
high_noise_sigmas,
|
high_noise_sigmas,
|
||||||
-1,
|
-1,
|
||||||
@ -2816,7 +2769,6 @@ SD_API sd_image_t* generate_video(sd_ctx_t* sd_ctx, const sd_vid_gen_params_t* s
|
|||||||
0,
|
0,
|
||||||
sd_vid_gen_params->sample_params.guidance,
|
sd_vid_gen_params->sample_params.guidance,
|
||||||
sd_vid_gen_params->sample_params.eta,
|
sd_vid_gen_params->sample_params.eta,
|
||||||
sd_vid_gen_params->sample_params.shifted_timestep,
|
|
||||||
sd_vid_gen_params->sample_params.sample_method,
|
sd_vid_gen_params->sample_params.sample_method,
|
||||||
sigmas,
|
sigmas,
|
||||||
-1,
|
-1,
|
||||||
|
|||||||
@ -58,8 +58,6 @@ enum scheduler_t {
|
|||||||
EXPONENTIAL,
|
EXPONENTIAL,
|
||||||
AYS,
|
AYS,
|
||||||
GITS,
|
GITS,
|
||||||
SGM_UNIFORM,
|
|
||||||
SIMPLE,
|
|
||||||
SMOOTHSTEP,
|
SMOOTHSTEP,
|
||||||
SCHEDULE_COUNT
|
SCHEDULE_COUNT
|
||||||
};
|
};
|
||||||
@ -185,7 +183,6 @@ typedef struct {
|
|||||||
enum sample_method_t sample_method;
|
enum sample_method_t sample_method;
|
||||||
int sample_steps;
|
int sample_steps;
|
||||||
float eta;
|
float eta;
|
||||||
int shifted_timestep;
|
|
||||||
} sd_sample_params_t;
|
} sd_sample_params_t;
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user