mirror of
https://github.com/leejet/stable-diffusion.cpp.git
synced 2025-12-13 05:48:56 +00:00
Compare commits
4 Commits
23de7fc44a
...
7dac89ad75
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
7dac89ad75 | ||
|
|
9251756086 | ||
|
|
ecf5db97ae | ||
|
|
ea46fd6948 |
2
.github/workflows/build.yml
vendored
2
.github/workflows/build.yml
vendored
@ -146,7 +146,7 @@ jobs:
|
|||||||
sd-${{ env.BRANCH_NAME }}-${{ steps.commit.outputs.short }}-bin-${{ steps.system-info.outputs.OS_TYPE }}-${{ steps.system-info.outputs.OS_NAME }}-${{ steps.system-info.outputs.OS_VERSION }}-${{ steps.system-info.outputs.CPU_ARCH }}.zip
|
sd-${{ env.BRANCH_NAME }}-${{ steps.commit.outputs.short }}-bin-${{ steps.system-info.outputs.OS_TYPE }}-${{ steps.system-info.outputs.OS_NAME }}-${{ steps.system-info.outputs.OS_VERSION }}-${{ steps.system-info.outputs.CPU_ARCH }}.zip
|
||||||
|
|
||||||
windows-latest-cmake:
|
windows-latest-cmake:
|
||||||
runs-on: windows-2019
|
runs-on: windows-2025
|
||||||
|
|
||||||
env:
|
env:
|
||||||
VULKAN_VERSION: 1.3.261.1
|
VULKAN_VERSION: 1.3.261.1
|
||||||
|
|||||||
@ -57,7 +57,7 @@ public:
|
|||||||
auto conv = std::dynamic_pointer_cast<Conv2d>(blocks["conv"]);
|
auto conv = std::dynamic_pointer_cast<Conv2d>(blocks["conv"]);
|
||||||
|
|
||||||
x = ggml_upscale(ctx, x, 2, GGML_SCALE_MODE_NEAREST); // [N, channels, h*2, w*2]
|
x = ggml_upscale(ctx, x, 2, GGML_SCALE_MODE_NEAREST); // [N, channels, h*2, w*2]
|
||||||
x = conv->forward(ctx, x); // [N, out_channels, h*2, w*2]
|
x = conv->forward(ctx, x); // [N, out_channels, h*2, w*2]
|
||||||
return x;
|
return x;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|||||||
59
denoiser.hpp
59
denoiser.hpp
@ -168,24 +168,21 @@ struct AYSSchedule : SigmaSchedule {
|
|||||||
std::vector<float> inputs;
|
std::vector<float> inputs;
|
||||||
std::vector<float> results(n + 1);
|
std::vector<float> results(n + 1);
|
||||||
|
|
||||||
switch (version) {
|
if (sd_version_is_sd2((SDVersion)version)) {
|
||||||
case VERSION_SD2: /* fallthrough */
|
LOG_WARN("AYS not designed for SD2.X models");
|
||||||
LOG_WARN("AYS not designed for SD2.X models");
|
} /* fallthrough */
|
||||||
case VERSION_SD1:
|
else if (sd_version_is_sd1((SDVersion)version)) {
|
||||||
LOG_INFO("AYS using SD1.5 noise levels");
|
LOG_INFO("AYS using SD1.5 noise levels");
|
||||||
inputs = noise_levels[0];
|
inputs = noise_levels[0];
|
||||||
break;
|
} else if (sd_version_is_sdxl((SDVersion)version)) {
|
||||||
case VERSION_SDXL:
|
LOG_INFO("AYS using SDXL noise levels");
|
||||||
LOG_INFO("AYS using SDXL noise levels");
|
inputs = noise_levels[1];
|
||||||
inputs = noise_levels[1];
|
} else if (version == VERSION_SVD) {
|
||||||
break;
|
LOG_INFO("AYS using SVD noise levels");
|
||||||
case VERSION_SVD:
|
inputs = noise_levels[2];
|
||||||
LOG_INFO("AYS using SVD noise levels");
|
} else {
|
||||||
inputs = noise_levels[2];
|
LOG_ERROR("Version not compatable with AYS scheduler");
|
||||||
break;
|
return results;
|
||||||
default:
|
|
||||||
LOG_ERROR("Version not compatable with AYS scheduler");
|
|
||||||
return results;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Stretches those pre-calculated reference levels out to the desired
|
/* Stretches those pre-calculated reference levels out to the desired
|
||||||
@ -346,6 +343,32 @@ struct CompVisVDenoiser : public CompVisDenoiser {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct EDMVDenoiser : public CompVisVDenoiser {
|
||||||
|
float min_sigma = 0.002;
|
||||||
|
float max_sigma = 120.0;
|
||||||
|
|
||||||
|
EDMVDenoiser(float min_sigma = 0.002, float max_sigma = 120.0)
|
||||||
|
: min_sigma(min_sigma), max_sigma(max_sigma) {
|
||||||
|
schedule = std::make_shared<ExponentialSchedule>();
|
||||||
|
}
|
||||||
|
|
||||||
|
float t_to_sigma(float t) {
|
||||||
|
return std::exp(t * 4 / (float)TIMESTEPS);
|
||||||
|
}
|
||||||
|
|
||||||
|
float sigma_to_t(float s) {
|
||||||
|
return 0.25 * std::log(s);
|
||||||
|
}
|
||||||
|
|
||||||
|
float sigma_min() {
|
||||||
|
return min_sigma;
|
||||||
|
}
|
||||||
|
|
||||||
|
float sigma_max() {
|
||||||
|
return max_sigma;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
float time_snr_shift(float alpha, float t) {
|
float time_snr_shift(float alpha, float t) {
|
||||||
if (alpha == 1.0f) {
|
if (alpha == 1.0f) {
|
||||||
return t;
|
return t;
|
||||||
|
|||||||
@ -602,6 +602,8 @@ typedef std::function<void(ggml_tensor*, ggml_tensor*, bool)> on_tile_process;
|
|||||||
|
|
||||||
// Tiling
|
// Tiling
|
||||||
__STATIC_INLINE__ void sd_tiling(ggml_tensor* input, ggml_tensor* output, const int scale, const int tile_size, const float tile_overlap_factor, on_tile_process on_processing) {
|
__STATIC_INLINE__ void sd_tiling(ggml_tensor* input, ggml_tensor* output, const int scale, const int tile_size, const float tile_overlap_factor, on_tile_process on_processing) {
|
||||||
|
output = ggml_set_f32(output, 0);
|
||||||
|
|
||||||
int input_width = (int)input->ne[0];
|
int input_width = (int)input->ne[0];
|
||||||
int input_height = (int)input->ne[1];
|
int input_height = (int)input->ne[1];
|
||||||
int output_width = (int)output->ne[0];
|
int output_width = (int)output->ne[0];
|
||||||
|
|||||||
@ -103,6 +103,9 @@ public:
|
|||||||
bool vae_tiling = false;
|
bool vae_tiling = false;
|
||||||
bool stacked_id = false;
|
bool stacked_id = false;
|
||||||
|
|
||||||
|
bool is_using_v_parameterization = false;
|
||||||
|
bool is_using_edm_v_parameterization = false;
|
||||||
|
|
||||||
std::map<std::string, struct ggml_tensor*> tensors;
|
std::map<std::string, struct ggml_tensor*> tensors;
|
||||||
|
|
||||||
std::string lora_model_dir;
|
std::string lora_model_dir;
|
||||||
@ -543,12 +546,17 @@ public:
|
|||||||
LOG_INFO("loading model from '%s' completed, taking %.2fs", model_path.c_str(), (t1 - t0) * 1.0f / 1000);
|
LOG_INFO("loading model from '%s' completed, taking %.2fs", model_path.c_str(), (t1 - t0) * 1.0f / 1000);
|
||||||
|
|
||||||
// check is_using_v_parameterization_for_sd2
|
// check is_using_v_parameterization_for_sd2
|
||||||
bool is_using_v_parameterization = false;
|
|
||||||
if (sd_version_is_sd2(version)) {
|
if (sd_version_is_sd2(version)) {
|
||||||
if (is_using_v_parameterization_for_sd2(ctx, sd_version_is_inpaint(version))) {
|
if (is_using_v_parameterization_for_sd2(ctx, sd_version_is_inpaint(version))) {
|
||||||
is_using_v_parameterization = true;
|
is_using_v_parameterization = true;
|
||||||
}
|
}
|
||||||
} else if (sd_version_is_sdxl(version)) {
|
} else if (sd_version_is_sdxl(version)) {
|
||||||
|
if (model_loader.tensor_storages_types.find("edm_vpred.sigma_max") != model_loader.tensor_storages_types.end()) {
|
||||||
|
// CosXL models
|
||||||
|
// TODO: get sigma_min and sigma_max values from file
|
||||||
|
is_using_edm_v_parameterization = true;
|
||||||
|
}
|
||||||
if (model_loader.tensor_storages_types.find("v_pred") != model_loader.tensor_storages_types.end()) {
|
if (model_loader.tensor_storages_types.find("v_pred") != model_loader.tensor_storages_types.end()) {
|
||||||
is_using_v_parameterization = true;
|
is_using_v_parameterization = true;
|
||||||
}
|
}
|
||||||
@ -573,6 +581,9 @@ public:
|
|||||||
} else if (is_using_v_parameterization) {
|
} else if (is_using_v_parameterization) {
|
||||||
LOG_INFO("running in v-prediction mode");
|
LOG_INFO("running in v-prediction mode");
|
||||||
denoiser = std::make_shared<CompVisVDenoiser>();
|
denoiser = std::make_shared<CompVisVDenoiser>();
|
||||||
|
} else if (is_using_edm_v_parameterization) {
|
||||||
|
LOG_INFO("running in v-prediction EDM mode");
|
||||||
|
denoiser = std::make_shared<EDMVDenoiser>();
|
||||||
} else {
|
} else {
|
||||||
LOG_INFO("running in eps-prediction mode");
|
LOG_INFO("running in eps-prediction mode");
|
||||||
}
|
}
|
||||||
@ -1396,7 +1407,7 @@ sd_image_t* generate_image(sd_ctx_t* sd_ctx,
|
|||||||
SDCondition uncond;
|
SDCondition uncond;
|
||||||
if (cfg_scale != 1.0) {
|
if (cfg_scale != 1.0) {
|
||||||
bool force_zero_embeddings = false;
|
bool force_zero_embeddings = false;
|
||||||
if (sd_version_is_sdxl(sd_ctx->sd->version) && negative_prompt.size() == 0) {
|
if (sd_version_is_sdxl(sd_ctx->sd->version) && negative_prompt.size() == 0 && !sd_ctx->sd->is_using_edm_v_parameterization) {
|
||||||
force_zero_embeddings = true;
|
force_zero_embeddings = true;
|
||||||
}
|
}
|
||||||
uncond = sd_ctx->sd->cond_stage_model->get_learned_condition(work_ctx,
|
uncond = sd_ctx->sd->cond_stage_model->get_learned_condition(work_ctx,
|
||||||
@ -1555,6 +1566,29 @@ sd_image_t* generate_image(sd_ctx_t* sd_ctx,
|
|||||||
return result_images;
|
return result_images;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ggml_tensor* generate_init_latent(sd_ctx_t* sd_ctx,
|
||||||
|
ggml_context* work_ctx,
|
||||||
|
int width,
|
||||||
|
int height) {
|
||||||
|
int C = 4;
|
||||||
|
if (sd_version_is_sd3(sd_ctx->sd->version)) {
|
||||||
|
C = 16;
|
||||||
|
} else if (sd_version_is_flux(sd_ctx->sd->version)) {
|
||||||
|
C = 16;
|
||||||
|
}
|
||||||
|
int W = width / 8;
|
||||||
|
int H = height / 8;
|
||||||
|
ggml_tensor* init_latent = ggml_new_tensor_4d(work_ctx, GGML_TYPE_F32, W, H, C, 1);
|
||||||
|
if (sd_version_is_sd3(sd_ctx->sd->version)) {
|
||||||
|
ggml_set_f32(init_latent, 0.0609f);
|
||||||
|
} else if (sd_version_is_flux(sd_ctx->sd->version)) {
|
||||||
|
ggml_set_f32(init_latent, 0.1159f);
|
||||||
|
} else {
|
||||||
|
ggml_set_f32(init_latent, 0.f);
|
||||||
|
}
|
||||||
|
return init_latent;
|
||||||
|
}
|
||||||
|
|
||||||
sd_image_t* txt2img(sd_ctx_t* sd_ctx,
|
sd_image_t* txt2img(sd_ctx_t* sd_ctx,
|
||||||
const char* prompt_c_str,
|
const char* prompt_c_str,
|
||||||
const char* negative_prompt_c_str,
|
const char* negative_prompt_c_str,
|
||||||
@ -1611,27 +1645,12 @@ sd_image_t* txt2img(sd_ctx_t* sd_ctx,
|
|||||||
|
|
||||||
std::vector<float> sigmas = sd_ctx->sd->denoiser->get_sigmas(sample_steps);
|
std::vector<float> sigmas = sd_ctx->sd->denoiser->get_sigmas(sample_steps);
|
||||||
|
|
||||||
int C = 4;
|
|
||||||
if (sd_version_is_sd3(sd_ctx->sd->version)) {
|
|
||||||
C = 16;
|
|
||||||
} else if (sd_version_is_flux(sd_ctx->sd->version)) {
|
|
||||||
C = 16;
|
|
||||||
}
|
|
||||||
int W = width / 8;
|
|
||||||
int H = height / 8;
|
|
||||||
ggml_tensor* init_latent = ggml_new_tensor_4d(work_ctx, GGML_TYPE_F32, W, H, C, 1);
|
|
||||||
if (sd_version_is_sd3(sd_ctx->sd->version)) {
|
|
||||||
ggml_set_f32(init_latent, 0.0609f);
|
|
||||||
} else if (sd_version_is_flux(sd_ctx->sd->version)) {
|
|
||||||
ggml_set_f32(init_latent, 0.1159f);
|
|
||||||
} else {
|
|
||||||
ggml_set_f32(init_latent, 0.f);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (sd_version_is_inpaint(sd_ctx->sd->version)) {
|
if (sd_version_is_inpaint(sd_ctx->sd->version)) {
|
||||||
LOG_WARN("This is an inpainting model, this should only be used in img2img mode with a mask");
|
LOG_WARN("This is an inpainting model, this should only be used in img2img mode with a mask");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ggml_tensor* init_latent = generate_init_latent(sd_ctx, work_ctx, width, height);
|
||||||
|
|
||||||
sd_image_t* result_images = generate_image(sd_ctx,
|
sd_image_t* result_images = generate_image(sd_ctx,
|
||||||
work_ctx,
|
work_ctx,
|
||||||
init_latent,
|
init_latent,
|
||||||
@ -2035,23 +2054,6 @@ sd_image_t* edit(sd_ctx_t* sd_ctx,
|
|||||||
}
|
}
|
||||||
sd_ctx->sd->rng->manual_seed(seed);
|
sd_ctx->sd->rng->manual_seed(seed);
|
||||||
|
|
||||||
int C = 4;
|
|
||||||
if (sd_version_is_sd3(sd_ctx->sd->version)) {
|
|
||||||
C = 16;
|
|
||||||
} else if (sd_version_is_flux(sd_ctx->sd->version)) {
|
|
||||||
C = 16;
|
|
||||||
}
|
|
||||||
int W = width / 8;
|
|
||||||
int H = height / 8;
|
|
||||||
ggml_tensor* init_latent = ggml_new_tensor_4d(work_ctx, GGML_TYPE_F32, W, H, C, 1);
|
|
||||||
if (sd_version_is_sd3(sd_ctx->sd->version)) {
|
|
||||||
ggml_set_f32(init_latent, 0.0609f);
|
|
||||||
} else if (sd_version_is_flux(sd_ctx->sd->version)) {
|
|
||||||
ggml_set_f32(init_latent, 0.1159f);
|
|
||||||
} else {
|
|
||||||
ggml_set_f32(init_latent, 0.f);
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t t0 = ggml_time_ms();
|
size_t t0 = ggml_time_ms();
|
||||||
|
|
||||||
std::vector<struct ggml_tensor*> ref_latents;
|
std::vector<struct ggml_tensor*> ref_latents;
|
||||||
@ -2074,6 +2076,8 @@ sd_image_t* edit(sd_ctx_t* sd_ctx,
|
|||||||
|
|
||||||
std::vector<float> sigmas = sd_ctx->sd->denoiser->get_sigmas(sample_steps);
|
std::vector<float> sigmas = sd_ctx->sd->denoiser->get_sigmas(sample_steps);
|
||||||
|
|
||||||
|
ggml_tensor* init_latent = generate_init_latent(sd_ctx, work_ctx, width, height);
|
||||||
|
|
||||||
sd_image_t* result_images = generate_image(sd_ctx,
|
sd_image_t* result_images = generate_image(sd_ctx,
|
||||||
work_ctx,
|
work_ctx,
|
||||||
init_latent,
|
init_latent,
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user