mirror of
https://github.com/leejet/stable-diffusion.cpp.git
synced 2025-12-12 13:28:37 +00:00
Compare commits
2 Commits
d939f6e86a
...
8823dc48bc
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
8823dc48bc | ||
|
|
1ac5a616de |
@ -156,9 +156,10 @@ struct ESRGAN : public GGMLRunner {
|
||||
|
||||
ESRGAN(ggml_backend_t backend,
|
||||
bool offload_params_to_cpu,
|
||||
int tile_size = 128,
|
||||
const String2TensorStorage& tensor_storage_map = {})
|
||||
: GGMLRunner(backend, offload_params_to_cpu) {
|
||||
// rrdb_net will be created in load_from_file
|
||||
this->tile_size = tile_size;
|
||||
}
|
||||
|
||||
std::string get_desc() override {
|
||||
|
||||
@ -1079,7 +1079,8 @@ struct SDGenerationParams {
|
||||
std::string pm_id_embed_path;
|
||||
float pm_style_strength = 20.f;
|
||||
|
||||
int upscale_repeats = 1;
|
||||
int upscale_repeats = 1;
|
||||
int upscale_tile_size = 128;
|
||||
|
||||
std::map<std::string, float> lora_map;
|
||||
std::map<std::string, float> high_noise_lora_map;
|
||||
@ -1176,6 +1177,10 @@ struct SDGenerationParams {
|
||||
"--upscale-repeats",
|
||||
"Run the ESRGAN upscaler this many times (default: 1)",
|
||||
&upscale_repeats},
|
||||
{"",
|
||||
"--upscale-tile-size",
|
||||
"tile size for ESRGAN upscaling (default: 128)",
|
||||
&upscale_tile_size},
|
||||
};
|
||||
|
||||
options.float_options = {
|
||||
@ -1635,6 +1640,10 @@ struct SDGenerationParams {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (upscale_tile_size < 1) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (mode == UPSCALE) {
|
||||
if (init_image_path.length() == 0) {
|
||||
fprintf(stderr, "error: upscale mode needs an init image (--init-img)\n");
|
||||
@ -1720,6 +1729,7 @@ struct SDGenerationParams {
|
||||
<< " control_strength: " << control_strength << ",\n"
|
||||
<< " seed: " << seed << ",\n"
|
||||
<< " upscale_repeats: " << upscale_repeats << ",\n"
|
||||
<< " upscale_tile_size: " << upscale_tile_size << ",\n"
|
||||
<< "}";
|
||||
free(sample_params_str);
|
||||
free(high_noise_sample_params_str);
|
||||
@ -2336,7 +2346,8 @@ int main(int argc, const char* argv[]) {
|
||||
upscaler_ctx_t* upscaler_ctx = new_upscaler_ctx(ctx_params.esrgan_path.c_str(),
|
||||
ctx_params.offload_params_to_cpu,
|
||||
ctx_params.diffusion_conv_direct,
|
||||
ctx_params.n_threads);
|
||||
ctx_params.n_threads,
|
||||
gen_params.upscale_tile_size);
|
||||
|
||||
if (upscaler_ctx == nullptr) {
|
||||
printf("new_upscaler_ctx failed\n");
|
||||
|
||||
@ -60,6 +60,14 @@
|
||||
#define SD_UNUSED(x) (void)(x)
|
||||
#endif
|
||||
|
||||
__STATIC_INLINE__ int align_up_offset(int n, int multiple) {
|
||||
return (multiple - n % multiple) % multiple;
|
||||
}
|
||||
|
||||
__STATIC_INLINE__ int align_up(int n, int multiple) {
|
||||
return n + align_up_offset(n, multiple);
|
||||
}
|
||||
|
||||
__STATIC_INLINE__ void ggml_log_callback_default(ggml_log_level level, const char* text, void*) {
|
||||
switch (level) {
|
||||
case GGML_LOG_LEVEL_DEBUG:
|
||||
|
||||
@ -1898,6 +1898,18 @@ public:
|
||||
return vae_scale_factor;
|
||||
}
|
||||
|
||||
int get_diffusion_model_down_factor() {
|
||||
int down_factor = 8; // unet
|
||||
if (sd_version_is_dit(version)) {
|
||||
if (sd_version_is_wan(version)) {
|
||||
down_factor = 2;
|
||||
} else {
|
||||
down_factor = 1;
|
||||
}
|
||||
}
|
||||
return down_factor;
|
||||
}
|
||||
|
||||
int get_latent_channel() {
|
||||
int latent_channel = 4;
|
||||
if (sd_version_is_dit(version)) {
|
||||
@ -3133,22 +3145,19 @@ sd_image_t* generate_image(sd_ctx_t* sd_ctx, const sd_img_gen_params_t* sd_img_g
|
||||
sd_ctx->sd->vae_tiling_params = sd_img_gen_params->vae_tiling_params;
|
||||
int width = sd_img_gen_params->width;
|
||||
int height = sd_img_gen_params->height;
|
||||
int vae_scale_factor = sd_ctx->sd->get_vae_scale_factor();
|
||||
if (sd_version_is_dit(sd_ctx->sd->version)) {
|
||||
if (width % 16 || height % 16) {
|
||||
LOG_ERROR("Image dimensions must be must be a multiple of 16 on each axis for %s models. (Got %dx%d)",
|
||||
model_version_to_str[sd_ctx->sd->version],
|
||||
width,
|
||||
height);
|
||||
return nullptr;
|
||||
}
|
||||
} else if (width % 64 || height % 64) {
|
||||
LOG_ERROR("Image dimensions must be must be a multiple of 64 on each axis for %s models. (Got %dx%d)",
|
||||
model_version_to_str[sd_ctx->sd->version],
|
||||
width,
|
||||
height);
|
||||
return nullptr;
|
||||
|
||||
int vae_scale_factor = sd_ctx->sd->get_vae_scale_factor();
|
||||
int diffusion_model_down_factor = sd_ctx->sd->get_diffusion_model_down_factor();
|
||||
int spatial_multiple = vae_scale_factor * diffusion_model_down_factor;
|
||||
|
||||
int width_offset = align_up_offset(width, spatial_multiple);
|
||||
int height_offset = align_up_offset(height, spatial_multiple);
|
||||
if (width_offset > 0 || height_offset > 0) {
|
||||
width += width_offset;
|
||||
height += height_offset;
|
||||
LOG_WARN("align up %dx%d to %dx%d (multiple=%d)", sd_img_gen_params->width, sd_img_gen_params->height, width, height, spatial_multiple);
|
||||
}
|
||||
|
||||
LOG_DEBUG("generate_image %dx%d", width, height);
|
||||
if (sd_ctx == nullptr || sd_img_gen_params == nullptr) {
|
||||
return nullptr;
|
||||
@ -3422,9 +3431,19 @@ SD_API sd_image_t* generate_video(sd_ctx_t* sd_ctx, const sd_vid_gen_params_t* s
|
||||
int frames = sd_vid_gen_params->video_frames;
|
||||
frames = (frames - 1) / 4 * 4 + 1;
|
||||
int sample_steps = sd_vid_gen_params->sample_params.sample_steps;
|
||||
LOG_INFO("generate_video %dx%dx%d", width, height, frames);
|
||||
|
||||
int vae_scale_factor = sd_ctx->sd->get_vae_scale_factor();
|
||||
int vae_scale_factor = sd_ctx->sd->get_vae_scale_factor();
|
||||
int diffusion_model_down_factor = sd_ctx->sd->get_diffusion_model_down_factor();
|
||||
int spatial_multiple = vae_scale_factor * diffusion_model_down_factor;
|
||||
|
||||
int width_offset = align_up_offset(width, spatial_multiple);
|
||||
int height_offset = align_up_offset(height, spatial_multiple);
|
||||
if (width_offset > 0 || height_offset > 0) {
|
||||
width += width_offset;
|
||||
height += height_offset;
|
||||
LOG_WARN("align up %dx%d to %dx%d (multiple=%d)", sd_vid_gen_params->width, sd_vid_gen_params->height, width, height, spatial_multiple);
|
||||
}
|
||||
LOG_INFO("generate_video %dx%dx%d", width, height, frames);
|
||||
|
||||
enum sample_method_t sample_method = sd_vid_gen_params->sample_params.sample_method;
|
||||
if (sample_method == SAMPLE_METHOD_COUNT) {
|
||||
|
||||
@ -347,7 +347,8 @@ typedef struct upscaler_ctx_t upscaler_ctx_t;
|
||||
SD_API upscaler_ctx_t* new_upscaler_ctx(const char* esrgan_path,
|
||||
bool offload_params_to_cpu,
|
||||
bool direct,
|
||||
int n_threads);
|
||||
int n_threads,
|
||||
int tile_size);
|
||||
SD_API void free_upscaler_ctx(upscaler_ctx_t* upscaler_ctx);
|
||||
|
||||
SD_API sd_image_t upscale(upscaler_ctx_t* upscaler_ctx,
|
||||
|
||||
16
upscaler.cpp
16
upscaler.cpp
@ -9,12 +9,15 @@ struct UpscalerGGML {
|
||||
std::shared_ptr<ESRGAN> esrgan_upscaler;
|
||||
std::string esrgan_path;
|
||||
int n_threads;
|
||||
bool direct = false;
|
||||
bool direct = false;
|
||||
int tile_size = 128;
|
||||
|
||||
UpscalerGGML(int n_threads,
|
||||
bool direct = false)
|
||||
bool direct = false,
|
||||
int tile_size = 128)
|
||||
: n_threads(n_threads),
|
||||
direct(direct) {
|
||||
direct(direct),
|
||||
tile_size(tile_size) {
|
||||
}
|
||||
|
||||
bool load_from_file(const std::string& esrgan_path,
|
||||
@ -51,7 +54,7 @@ struct UpscalerGGML {
|
||||
backend = ggml_backend_cpu_init();
|
||||
}
|
||||
LOG_INFO("Upscaler weight type: %s", ggml_type_name(model_data_type));
|
||||
esrgan_upscaler = std::make_shared<ESRGAN>(backend, offload_params_to_cpu, model_loader.get_tensor_storage_map());
|
||||
esrgan_upscaler = std::make_shared<ESRGAN>(backend, offload_params_to_cpu, tile_size, model_loader.get_tensor_storage_map());
|
||||
if (direct) {
|
||||
esrgan_upscaler->set_conv2d_direct_enabled(true);
|
||||
}
|
||||
@ -113,14 +116,15 @@ struct upscaler_ctx_t {
|
||||
upscaler_ctx_t* new_upscaler_ctx(const char* esrgan_path_c_str,
|
||||
bool offload_params_to_cpu,
|
||||
bool direct,
|
||||
int n_threads) {
|
||||
int n_threads,
|
||||
int tile_size) {
|
||||
upscaler_ctx_t* upscaler_ctx = (upscaler_ctx_t*)malloc(sizeof(upscaler_ctx_t));
|
||||
if (upscaler_ctx == nullptr) {
|
||||
return nullptr;
|
||||
}
|
||||
std::string esrgan_path(esrgan_path_c_str);
|
||||
|
||||
upscaler_ctx->upscaler = new UpscalerGGML(n_threads, direct);
|
||||
upscaler_ctx->upscaler = new UpscalerGGML(n_threads, direct, tile_size);
|
||||
if (upscaler_ctx->upscaler == nullptr) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user