mirror of
https://github.com/leejet/stable-diffusion.cpp.git
synced 2025-12-12 13:28:37 +00:00
* add wan vae suppport * add wan model support * add umt5 support * add wan2.1 t2i support * make flash attn work with wan * make wan a little faster * add wan2.1 t2v support * add wan gguf support * add offload params to cpu support * add wan2.1 i2v support * crop image before resize * set default fps to 16 * add diff lora support * fix wan2.1 i2v * introduce sd_sample_params_t * add wan2.2 t2v support * add wan2.2 14B i2v support * add wan2.2 ti2v support * add high noise lora support * sync: update ggml submodule url * avoid build failure on linux * avoid build failure * update ggml * update ggml * fix sd_version_is_wan * update ggml, fix cpu im2col_3d * fix ggml_nn_attention_ext mask * add cache support to ggml runner * fix the issue of illegal memory access * unify image loading processing * add wan2.1/2.2 FLF2V support * fix end_image mask * update to latest ggml * add GGUFReader * update docs
148 lines
5.3 KiB
C++
148 lines
5.3 KiB
C++
#include "esrgan.hpp"
|
|
#include "ggml_extend.hpp"
|
|
#include "model.h"
|
|
#include "stable-diffusion.h"
|
|
|
|
struct UpscalerGGML {
|
|
ggml_backend_t backend = NULL; // general backend
|
|
ggml_type model_data_type = GGML_TYPE_F16;
|
|
std::shared_ptr<ESRGAN> esrgan_upscaler;
|
|
std::string esrgan_path;
|
|
int n_threads;
|
|
bool direct = false;
|
|
|
|
UpscalerGGML(int n_threads,
|
|
bool direct = false)
|
|
: n_threads(n_threads),
|
|
direct(direct) {
|
|
}
|
|
|
|
bool load_from_file(const std::string& esrgan_path,
|
|
bool offload_params_to_cpu) {
|
|
#ifdef SD_USE_CUDA
|
|
LOG_DEBUG("Using CUDA backend");
|
|
backend = ggml_backend_cuda_init(0);
|
|
#endif
|
|
#ifdef SD_USE_METAL
|
|
LOG_DEBUG("Using Metal backend");
|
|
ggml_log_set(ggml_log_callback_default, nullptr);
|
|
backend = ggml_backend_metal_init();
|
|
#endif
|
|
#ifdef SD_USE_VULKAN
|
|
LOG_DEBUG("Using Vulkan backend");
|
|
backend = ggml_backend_vk_init(0);
|
|
#endif
|
|
#ifdef SD_USE_OPENCL
|
|
LOG_DEBUG("Using OpenCL backend");
|
|
backend = ggml_backend_opencl_init();
|
|
#endif
|
|
#ifdef SD_USE_SYCL
|
|
LOG_DEBUG("Using SYCL backend");
|
|
backend = ggml_backend_sycl_init(0);
|
|
#endif
|
|
ModelLoader model_loader;
|
|
if (!model_loader.init_from_file(esrgan_path)) {
|
|
LOG_ERROR("init model loader from file failed: '%s'", esrgan_path.c_str());
|
|
}
|
|
model_loader.set_wtype_override(model_data_type);
|
|
if (!backend) {
|
|
LOG_DEBUG("Using CPU backend");
|
|
backend = ggml_backend_cpu_init();
|
|
}
|
|
LOG_INFO("Upscaler weight type: %s", ggml_type_name(model_data_type));
|
|
esrgan_upscaler = std::make_shared<ESRGAN>(backend, offload_params_to_cpu, model_loader.tensor_storages_types);
|
|
if (direct) {
|
|
esrgan_upscaler->enable_conv2d_direct();
|
|
}
|
|
if (!esrgan_upscaler->load_from_file(esrgan_path)) {
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
sd_image_t upscale(sd_image_t input_image, uint32_t upscale_factor) {
|
|
// upscale_factor, unused for RealESRGAN_x4plus_anime_6B.pth
|
|
sd_image_t upscaled_image = {0, 0, 0, NULL};
|
|
int output_width = (int)input_image.width * esrgan_upscaler->scale;
|
|
int output_height = (int)input_image.height * esrgan_upscaler->scale;
|
|
LOG_INFO("upscaling from (%i x %i) to (%i x %i)",
|
|
input_image.width, input_image.height, output_width, output_height);
|
|
|
|
struct ggml_init_params params;
|
|
params.mem_size = output_width * output_height * 3 * sizeof(float) * 2;
|
|
params.mem_size += 2 * ggml_tensor_overhead();
|
|
params.mem_buffer = NULL;
|
|
params.no_alloc = false;
|
|
|
|
// draft context
|
|
struct ggml_context* upscale_ctx = ggml_init(params);
|
|
if (!upscale_ctx) {
|
|
LOG_ERROR("ggml_init() failed");
|
|
return upscaled_image;
|
|
}
|
|
LOG_DEBUG("upscale work buffer size: %.2f MB", params.mem_size / 1024.f / 1024.f);
|
|
ggml_tensor* input_image_tensor = ggml_new_tensor_4d(upscale_ctx, GGML_TYPE_F32, input_image.width, input_image.height, 3, 1);
|
|
sd_image_to_tensor(input_image.data, input_image_tensor);
|
|
|
|
ggml_tensor* upscaled = ggml_new_tensor_4d(upscale_ctx, GGML_TYPE_F32, output_width, output_height, 3, 1);
|
|
auto on_tiling = [&](ggml_tensor* in, ggml_tensor* out, bool init) {
|
|
esrgan_upscaler->compute(n_threads, in, &out);
|
|
};
|
|
int64_t t0 = ggml_time_ms();
|
|
sd_tiling(input_image_tensor, upscaled, esrgan_upscaler->scale, esrgan_upscaler->tile_size, 0.25f, on_tiling);
|
|
esrgan_upscaler->free_compute_buffer();
|
|
ggml_tensor_clamp(upscaled, 0.f, 1.f);
|
|
uint8_t* upscaled_data = sd_tensor_to_image(upscaled);
|
|
ggml_free(upscale_ctx);
|
|
int64_t t3 = ggml_time_ms();
|
|
LOG_INFO("input_image_tensor upscaled, taking %.2fs", (t3 - t0) / 1000.0f);
|
|
upscaled_image = {
|
|
(uint32_t)output_width,
|
|
(uint32_t)output_height,
|
|
3,
|
|
upscaled_data,
|
|
};
|
|
return upscaled_image;
|
|
}
|
|
};
|
|
|
|
struct upscaler_ctx_t {
|
|
UpscalerGGML* upscaler = NULL;
|
|
};
|
|
|
|
upscaler_ctx_t* new_upscaler_ctx(const char* esrgan_path_c_str,
|
|
bool offload_params_to_cpu,
|
|
bool direct,
|
|
int n_threads) {
|
|
upscaler_ctx_t* upscaler_ctx = (upscaler_ctx_t*)malloc(sizeof(upscaler_ctx_t));
|
|
if (upscaler_ctx == NULL) {
|
|
return NULL;
|
|
}
|
|
std::string esrgan_path(esrgan_path_c_str);
|
|
|
|
upscaler_ctx->upscaler = new UpscalerGGML(n_threads, direct);
|
|
if (upscaler_ctx->upscaler == NULL) {
|
|
return NULL;
|
|
}
|
|
|
|
if (!upscaler_ctx->upscaler->load_from_file(esrgan_path, offload_params_to_cpu)) {
|
|
delete upscaler_ctx->upscaler;
|
|
upscaler_ctx->upscaler = NULL;
|
|
free(upscaler_ctx);
|
|
return NULL;
|
|
}
|
|
return upscaler_ctx;
|
|
}
|
|
|
|
sd_image_t upscale(upscaler_ctx_t* upscaler_ctx, sd_image_t input_image, uint32_t upscale_factor) {
|
|
return upscaler_ctx->upscaler->upscale(input_image, upscale_factor);
|
|
}
|
|
|
|
void free_upscaler_ctx(upscaler_ctx_t* upscaler_ctx) {
|
|
if (upscaler_ctx->upscaler != NULL) {
|
|
delete upscaler_ctx->upscaler;
|
|
upscaler_ctx->upscaler = NULL;
|
|
}
|
|
free(upscaler_ctx);
|
|
}
|