mirror of
https://github.com/leejet/stable-diffusion.cpp.git
synced 2025-12-13 05:48:56 +00:00
Merge branch 'master' into wan
This commit is contained in:
commit
b05b2b29a3
@ -33,6 +33,7 @@ option(SD_SYCL "sd: sycl backend" OFF)
|
|||||||
option(SD_MUSA "sd: musa backend" OFF)
|
option(SD_MUSA "sd: musa backend" OFF)
|
||||||
option(SD_FAST_SOFTMAX "sd: x1.5 faster softmax, indeterministic (sometimes, same seed don't generate same image), cuda only" OFF)
|
option(SD_FAST_SOFTMAX "sd: x1.5 faster softmax, indeterministic (sometimes, same seed don't generate same image), cuda only" OFF)
|
||||||
option(SD_BUILD_SHARED_LIBS "sd: build shared libs" OFF)
|
option(SD_BUILD_SHARED_LIBS "sd: build shared libs" OFF)
|
||||||
|
option(SD_USE_SYSTEM_GGML "sd: use system-installed GGML library" OFF)
|
||||||
#option(SD_BUILD_SERVER "sd: build server example" ON)
|
#option(SD_BUILD_SERVER "sd: build server example" ON)
|
||||||
|
|
||||||
if(SD_CUDA)
|
if(SD_CUDA)
|
||||||
@ -118,13 +119,23 @@ endif()
|
|||||||
|
|
||||||
set(CMAKE_POLICY_DEFAULT_CMP0077 NEW)
|
set(CMAKE_POLICY_DEFAULT_CMP0077 NEW)
|
||||||
|
|
||||||
# see https://github.com/ggerganov/ggml/pull/682
|
if (NOT SD_USE_SYSTEM_GGML)
|
||||||
add_definitions(-DGGML_MAX_NAME=128)
|
# see https://github.com/ggerganov/ggml/pull/682
|
||||||
|
add_definitions(-DGGML_MAX_NAME=128)
|
||||||
|
endif()
|
||||||
|
|
||||||
# deps
|
# deps
|
||||||
# Only add ggml if it hasn't been added yet
|
# Only add ggml if it hasn't been added yet
|
||||||
if (NOT TARGET ggml)
|
if (NOT TARGET ggml)
|
||||||
add_subdirectory(ggml)
|
if (SD_USE_SYSTEM_GGML)
|
||||||
|
find_package(ggml REQUIRED)
|
||||||
|
if (NOT ggml_FOUND)
|
||||||
|
message(FATAL_ERROR "System-installed GGML library not found.")
|
||||||
|
endif()
|
||||||
|
add_library(ggml ALIAS ggml::ggml)
|
||||||
|
else()
|
||||||
|
add_subdirectory(ggml)
|
||||||
|
endif()
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
add_subdirectory(thirdparty)
|
add_subdirectory(thirdparty)
|
||||||
|
|||||||
@ -341,6 +341,10 @@ arguments:
|
|||||||
--diffusion-fa use flash attention in the diffusion model (for low vram)
|
--diffusion-fa use flash attention in the diffusion model (for low vram)
|
||||||
Might lower quality, since it implies converting k and v to f16.
|
Might lower quality, since it implies converting k and v to f16.
|
||||||
This might crash if it is not supported by the backend.
|
This might crash if it is not supported by the backend.
|
||||||
|
--diffusion-conv-direct use Conv2d direct in the diffusion model
|
||||||
|
This might crash if it is not supported by the backend.
|
||||||
|
--vae-conv-direct use Conv2d direct in the vae model (should improve the performance)
|
||||||
|
This might crash if it is not supported by the backend.
|
||||||
--control-net-cpu keep controlnet in cpu (for low vram)
|
--control-net-cpu keep controlnet in cpu (for low vram)
|
||||||
--canny apply canny preprocessor (edge detection)
|
--canny apply canny preprocessor (edge detection)
|
||||||
--color colors the logging tags according to level
|
--color colors the logging tags according to level
|
||||||
|
|||||||
11
control.hpp
11
control.hpp
@ -324,6 +324,17 @@ struct ControlNet : public GGMLRunner {
|
|||||||
control_net.init(params_ctx, tensor_types, "");
|
control_net.init(params_ctx, tensor_types, "");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void enable_conv2d_direct() {
|
||||||
|
std::vector<GGMLBlock*> blocks;
|
||||||
|
control_net.get_all_blocks(blocks);
|
||||||
|
for (auto block : blocks) {
|
||||||
|
if (block->get_desc() == "Conv2d") {
|
||||||
|
auto conv_block = (Conv2d*)block;
|
||||||
|
conv_block->enable_direct();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
~ControlNet() {
|
~ControlNet() {
|
||||||
free_control_ctx();
|
free_control_ctx();
|
||||||
}
|
}
|
||||||
|
|||||||
11
esrgan.hpp
11
esrgan.hpp
@ -149,6 +149,17 @@ struct ESRGAN : public GGMLRunner {
|
|||||||
rrdb_net.init(params_ctx, tensor_types, "");
|
rrdb_net.init(params_ctx, tensor_types, "");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void enable_conv2d_direct() {
|
||||||
|
std::vector<GGMLBlock*> blocks;
|
||||||
|
rrdb_net.get_all_blocks(blocks);
|
||||||
|
for (auto block : blocks) {
|
||||||
|
if (block->get_desc() == "Conv2d") {
|
||||||
|
auto conv_block = (Conv2d*)block;
|
||||||
|
conv_block->enable_direct();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
std::string get_desc() {
|
std::string get_desc() {
|
||||||
return "esrgan";
|
return "esrgan";
|
||||||
}
|
}
|
||||||
|
|||||||
@ -103,6 +103,8 @@ struct SDParams {
|
|||||||
bool clip_on_cpu = false;
|
bool clip_on_cpu = false;
|
||||||
bool vae_on_cpu = false;
|
bool vae_on_cpu = false;
|
||||||
bool diffusion_flash_attn = false;
|
bool diffusion_flash_attn = false;
|
||||||
|
bool diffusion_conv_direct = false;
|
||||||
|
bool vae_conv_direct = false;
|
||||||
bool canny_preprocess = false;
|
bool canny_preprocess = false;
|
||||||
bool color = false;
|
bool color = false;
|
||||||
int upscale_repeats = 1;
|
int upscale_repeats = 1;
|
||||||
@ -153,6 +155,8 @@ void print_params(SDParams params) {
|
|||||||
printf(" control_net_cpu: %s\n", params.control_net_cpu ? "true" : "false");
|
printf(" control_net_cpu: %s\n", params.control_net_cpu ? "true" : "false");
|
||||||
printf(" vae decoder on cpu:%s\n", params.vae_on_cpu ? "true" : "false");
|
printf(" vae decoder on cpu:%s\n", params.vae_on_cpu ? "true" : "false");
|
||||||
printf(" diffusion flash attention:%s\n", params.diffusion_flash_attn ? "true" : "false");
|
printf(" diffusion flash attention:%s\n", params.diffusion_flash_attn ? "true" : "false");
|
||||||
|
printf(" diffusion Conv2d direct:%s\n", params.diffusion_conv_direct ? "true" : "false");
|
||||||
|
printf(" vae Conv2d direct:%s\n", params.vae_conv_direct ? "true" : "false");
|
||||||
printf(" strength(control): %.2f\n", params.control_strength);
|
printf(" strength(control): %.2f\n", params.control_strength);
|
||||||
printf(" prompt: %s\n", params.prompt.c_str());
|
printf(" prompt: %s\n", params.prompt.c_str());
|
||||||
printf(" negative_prompt: %s\n", params.negative_prompt.c_str());
|
printf(" negative_prompt: %s\n", params.negative_prompt.c_str());
|
||||||
@ -255,6 +259,10 @@ void print_usage(int argc, const char* argv[]) {
|
|||||||
printf(" --diffusion-fa use flash attention in the diffusion model (for low vram)\n");
|
printf(" --diffusion-fa use flash attention in the diffusion model (for low vram)\n");
|
||||||
printf(" Might lower quality, since it implies converting k and v to f16.\n");
|
printf(" Might lower quality, since it implies converting k and v to f16.\n");
|
||||||
printf(" This might crash if it is not supported by the backend.\n");
|
printf(" This might crash if it is not supported by the backend.\n");
|
||||||
|
printf(" --diffusion-conv-direct use Conv2d direct in the diffusion model");
|
||||||
|
printf(" This might crash if it is not supported by the backend.\n");
|
||||||
|
printf(" --vae-conv-direct use Conv2d direct in the vae model (should improve the performance)");
|
||||||
|
printf(" This might crash if it is not supported by the backend.\n");
|
||||||
printf(" --control-net-cpu keep controlnet in cpu (for low vram)\n");
|
printf(" --control-net-cpu keep controlnet in cpu (for low vram)\n");
|
||||||
printf(" --canny apply canny preprocessor (edge detection)\n");
|
printf(" --canny apply canny preprocessor (edge detection)\n");
|
||||||
printf(" --color colors the logging tags according to level\n");
|
printf(" --color colors the logging tags according to level\n");
|
||||||
@ -495,6 +503,8 @@ void parse_args(int argc, const char** argv, SDParams& params) {
|
|||||||
{"", "--clip-on-cpu", "", true, ¶ms.clip_on_cpu},
|
{"", "--clip-on-cpu", "", true, ¶ms.clip_on_cpu},
|
||||||
{"", "--vae-on-cpu", "", true, ¶ms.vae_on_cpu},
|
{"", "--vae-on-cpu", "", true, ¶ms.vae_on_cpu},
|
||||||
{"", "--diffusion-fa", "", true, ¶ms.diffusion_flash_attn},
|
{"", "--diffusion-fa", "", true, ¶ms.diffusion_flash_attn},
|
||||||
|
{"", "--diffusion-conv-direct", "", true, ¶ms.diffusion_conv_direct},
|
||||||
|
{"", "--vae-conv-direct", "", true, ¶ms.vae_conv_direct},
|
||||||
{"", "--canny", "", true, ¶ms.canny_preprocess},
|
{"", "--canny", "", true, ¶ms.canny_preprocess},
|
||||||
{"-v", "--verbos", "", true, ¶ms.verbose},
|
{"-v", "--verbos", "", true, ¶ms.verbose},
|
||||||
{"", "--color", "", true, ¶ms.color},
|
{"", "--color", "", true, ¶ms.color},
|
||||||
@ -1077,6 +1087,8 @@ int main(int argc, const char* argv[]) {
|
|||||||
params.control_net_cpu,
|
params.control_net_cpu,
|
||||||
params.vae_on_cpu,
|
params.vae_on_cpu,
|
||||||
params.diffusion_flash_attn,
|
params.diffusion_flash_attn,
|
||||||
|
params.diffusion_conv_direct,
|
||||||
|
params.vae_conv_direct,
|
||||||
params.chroma_use_dit_mask,
|
params.chroma_use_dit_mask,
|
||||||
params.chroma_use_t5_mask,
|
params.chroma_use_t5_mask,
|
||||||
params.chroma_t5_mask_pad,
|
params.chroma_t5_mask_pad,
|
||||||
@ -1184,6 +1196,7 @@ int main(int argc, const char* argv[]) {
|
|||||||
if (params.esrgan_path.size() > 0 && params.upscale_repeats > 0) {
|
if (params.esrgan_path.size() > 0 && params.upscale_repeats > 0) {
|
||||||
upscaler_ctx_t* upscaler_ctx = new_upscaler_ctx(params.esrgan_path.c_str(),
|
upscaler_ctx_t* upscaler_ctx = new_upscaler_ctx(params.esrgan_path.c_str(),
|
||||||
params.offload_params_to_cpu,
|
params.offload_params_to_cpu,
|
||||||
|
params.diffusion_conv_direct,
|
||||||
params.n_threads);
|
params.n_threads);
|
||||||
|
|
||||||
if (upscaler_ctx == NULL) {
|
if (upscaler_ctx == NULL) {
|
||||||
|
|||||||
2
ggml
2
ggml
@ -1 +1 @@
|
|||||||
Subproject commit 089530bb72e70aa9f9ecb98137dfd891c2be20c1
|
Subproject commit 9caa235fe8e7e0ed0cbb599c54ec1cf07a9b7b73
|
||||||
@ -56,6 +56,8 @@
|
|||||||
#define __STATIC_INLINE__ static inline
|
#define __STATIC_INLINE__ static inline
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
static_assert(GGML_MAX_NAME >= 128, "GGML_MAX_NAME must be at least 128");
|
||||||
|
|
||||||
// n-mode trensor-matrix product
|
// n-mode trensor-matrix product
|
||||||
// example: 2-mode product
|
// example: 2-mode product
|
||||||
// A: [ne03, k, ne01, ne00]
|
// A: [ne03, k, ne01, ne00]
|
||||||
@ -839,6 +841,27 @@ __STATIC_INLINE__ struct ggml_tensor* ggml_nn_conv_2d(struct ggml_context* ctx,
|
|||||||
|
|
||||||
// w: [OC*IC, KD, KH, KW]
|
// w: [OC*IC, KD, KH, KW]
|
||||||
// x: [N*IC, ID, IH, IW]
|
// x: [N*IC, ID, IH, IW]
|
||||||
|
__STATIC_INLINE__ struct ggml_tensor* ggml_nn_conv_2d_direct(struct ggml_context* ctx,
|
||||||
|
struct ggml_tensor* x,
|
||||||
|
struct ggml_tensor* w,
|
||||||
|
struct ggml_tensor* b,
|
||||||
|
int s0 = 1,
|
||||||
|
int s1 = 1,
|
||||||
|
int p0 = 0,
|
||||||
|
int p1 = 0,
|
||||||
|
int d0 = 1,
|
||||||
|
int d1 = 1) {
|
||||||
|
x = ggml_conv_2d_direct(ctx, w, x, s0, s1, p0, p1, d0, d1);
|
||||||
|
if (b != NULL) {
|
||||||
|
b = ggml_reshape_4d(ctx, b, 1, 1, b->ne[0], 1);
|
||||||
|
// b = ggml_repeat(ctx, b, x);
|
||||||
|
x = ggml_add(ctx, x, b);
|
||||||
|
}
|
||||||
|
return x;
|
||||||
|
}
|
||||||
|
|
||||||
|
// w: [OC,IC, KD, 1 * 1]
|
||||||
|
// x: [N, IC, IH, IW]
|
||||||
// b: [OC,]
|
// b: [OC,]
|
||||||
// result: [N*OC, OD, OH, OW]
|
// result: [N*OC, OD, OH, OW]
|
||||||
__STATIC_INLINE__ struct ggml_tensor* ggml_nn_conv_3d(struct ggml_context* ctx,
|
__STATIC_INLINE__ struct ggml_tensor* ggml_nn_conv_3d(struct ggml_context* ctx,
|
||||||
@ -1607,6 +1630,19 @@ public:
|
|||||||
tensors[prefix + pair.first] = pair.second;
|
tensors[prefix + pair.first] = pair.second;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
virtual std::string get_desc() {
|
||||||
|
return "GGMLBlock";
|
||||||
|
}
|
||||||
|
|
||||||
|
void get_all_blocks(std::vector<GGMLBlock*>& result) {
|
||||||
|
result.push_back(this);
|
||||||
|
for (auto& block_iter : blocks) {
|
||||||
|
if (block_iter.second) {
|
||||||
|
block_iter.second->get_all_blocks(result);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
class UnaryBlock : public GGMLBlock {
|
class UnaryBlock : public GGMLBlock {
|
||||||
@ -1703,6 +1739,7 @@ protected:
|
|||||||
std::pair<int, int> padding;
|
std::pair<int, int> padding;
|
||||||
std::pair<int, int> dilation;
|
std::pair<int, int> dilation;
|
||||||
bool bias;
|
bool bias;
|
||||||
|
bool direct = false;
|
||||||
|
|
||||||
void init_params(struct ggml_context* ctx, const String2GGMLType& tensor_types, const std::string prefix = "") {
|
void init_params(struct ggml_context* ctx, const String2GGMLType& tensor_types, const std::string prefix = "") {
|
||||||
enum ggml_type wtype = GGML_TYPE_F16;
|
enum ggml_type wtype = GGML_TYPE_F16;
|
||||||
@ -1729,13 +1766,25 @@ public:
|
|||||||
dilation(dilation),
|
dilation(dilation),
|
||||||
bias(bias) {}
|
bias(bias) {}
|
||||||
|
|
||||||
|
void enable_direct() {
|
||||||
|
direct = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string get_desc() {
|
||||||
|
return "Conv2d";
|
||||||
|
}
|
||||||
|
|
||||||
struct ggml_tensor* forward(struct ggml_context* ctx, struct ggml_tensor* x) {
|
struct ggml_tensor* forward(struct ggml_context* ctx, struct ggml_tensor* x) {
|
||||||
struct ggml_tensor* w = params["weight"];
|
struct ggml_tensor* w = params["weight"];
|
||||||
struct ggml_tensor* b = NULL;
|
struct ggml_tensor* b = NULL;
|
||||||
if (bias) {
|
if (bias) {
|
||||||
b = params["bias"];
|
b = params["bias"];
|
||||||
}
|
}
|
||||||
return ggml_nn_conv_2d(ctx, x, w, b, stride.second, stride.first, padding.second, padding.first, dilation.second, dilation.first);
|
if (direct) {
|
||||||
|
return ggml_nn_conv_2d_direct(ctx, x, w, b, stride.second, stride.first, padding.second, padding.first, dilation.second, dilation.first);
|
||||||
|
} else {
|
||||||
|
return ggml_nn_conv_2d(ctx, x, w, b, stride.second, stride.first, padding.second, padding.first, dilation.second, dilation.first);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|||||||
@ -428,6 +428,10 @@ public:
|
|||||||
model_loader.tensor_storages_types,
|
model_loader.tensor_storages_types,
|
||||||
version,
|
version,
|
||||||
sd_ctx_params->diffusion_flash_attn);
|
sd_ctx_params->diffusion_flash_attn);
|
||||||
|
if (sd_ctx_params->diffusion_conv_direct) {
|
||||||
|
LOG_INFO("Using Conv2d direct in the diffusion model");
|
||||||
|
std::dynamic_pointer_cast<UNetModel>(diffusion_model)->unet.enable_conv2d_direct();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
cond_stage_model->alloc_params_buffer();
|
cond_stage_model->alloc_params_buffer();
|
||||||
@ -465,6 +469,10 @@ public:
|
|||||||
vae_decode_only,
|
vae_decode_only,
|
||||||
false,
|
false,
|
||||||
version);
|
version);
|
||||||
|
if (sd_ctx_params->vae_conv_direct) {
|
||||||
|
LOG_INFO("Using Conv2d direct in the vae model");
|
||||||
|
first_stage_model->enable_conv2d_direct();
|
||||||
|
}
|
||||||
first_stage_model->alloc_params_buffer();
|
first_stage_model->alloc_params_buffer();
|
||||||
first_stage_model->get_param_tensors(tensors, "first_stage_model");
|
first_stage_model->get_param_tensors(tensors, "first_stage_model");
|
||||||
} else {
|
} else {
|
||||||
@ -474,6 +482,10 @@ public:
|
|||||||
"decoder.layers",
|
"decoder.layers",
|
||||||
vae_decode_only,
|
vae_decode_only,
|
||||||
version);
|
version);
|
||||||
|
if (sd_ctx_params->vae_conv_direct) {
|
||||||
|
LOG_INFO("Using Conv2d direct in the tae model");
|
||||||
|
tae_first_stage->enable_conv2d_direct();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
// first_stage_model->get_param_tensors(tensors, "first_stage_model.");
|
// first_stage_model->get_param_tensors(tensors, "first_stage_model.");
|
||||||
|
|
||||||
@ -489,6 +501,10 @@ public:
|
|||||||
offload_params_to_cpu,
|
offload_params_to_cpu,
|
||||||
model_loader.tensor_storages_types,
|
model_loader.tensor_storages_types,
|
||||||
version);
|
version);
|
||||||
|
if (sd_ctx_params->diffusion_conv_direct) {
|
||||||
|
LOG_INFO("Using Conv2d direct in the control net");
|
||||||
|
control_net->enable_conv2d_direct();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (strstr(SAFE_STR(sd_ctx_params->stacked_id_embed_dir), "v2")) {
|
if (strstr(SAFE_STR(sd_ctx_params->stacked_id_embed_dir), "v2")) {
|
||||||
|
|||||||
@ -136,6 +136,8 @@ typedef struct {
|
|||||||
bool keep_control_net_on_cpu;
|
bool keep_control_net_on_cpu;
|
||||||
bool keep_vae_on_cpu;
|
bool keep_vae_on_cpu;
|
||||||
bool diffusion_flash_attn;
|
bool diffusion_flash_attn;
|
||||||
|
bool diffusion_conv_direct;
|
||||||
|
bool vae_conv_direct;
|
||||||
bool chroma_use_dit_mask;
|
bool chroma_use_dit_mask;
|
||||||
bool chroma_use_t5_mask;
|
bool chroma_use_t5_mask;
|
||||||
int chroma_t5_mask_pad;
|
int chroma_t5_mask_pad;
|
||||||
@ -245,6 +247,7 @@ typedef struct upscaler_ctx_t upscaler_ctx_t;
|
|||||||
|
|
||||||
SD_API upscaler_ctx_t* new_upscaler_ctx(const char* esrgan_path,
|
SD_API upscaler_ctx_t* new_upscaler_ctx(const char* esrgan_path,
|
||||||
bool offload_params_to_cpu,
|
bool offload_params_to_cpu,
|
||||||
|
bool direct,
|
||||||
int n_threads);
|
int n_threads);
|
||||||
SD_API void free_upscaler_ctx(upscaler_ctx_t* upscaler_ctx);
|
SD_API void free_upscaler_ctx(upscaler_ctx_t* upscaler_ctx);
|
||||||
|
|
||||||
|
|||||||
11
tae.hpp
11
tae.hpp
@ -207,6 +207,17 @@ struct TinyAutoEncoder : public GGMLRunner {
|
|||||||
taesd.init(params_ctx, tensor_types, prefix);
|
taesd.init(params_ctx, tensor_types, prefix);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void enable_conv2d_direct() {
|
||||||
|
std::vector<GGMLBlock*> blocks;
|
||||||
|
taesd.get_all_blocks(blocks);
|
||||||
|
for (auto block : blocks) {
|
||||||
|
if (block->get_desc() == "Conv2d") {
|
||||||
|
auto conv_block = (Conv2d*)block;
|
||||||
|
conv_block->enable_direct();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
std::string get_desc() {
|
std::string get_desc() {
|
||||||
return "taesd";
|
return "taesd";
|
||||||
}
|
}
|
||||||
|
|||||||
12
unet.hpp
12
unet.hpp
@ -547,6 +547,18 @@ struct UNetModelRunner : public GGMLRunner {
|
|||||||
unet.init(params_ctx, tensor_types, prefix);
|
unet.init(params_ctx, tensor_types, prefix);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void enable_conv2d_direct() {
|
||||||
|
std::vector<GGMLBlock*> blocks;
|
||||||
|
unet.get_all_blocks(blocks);
|
||||||
|
for (auto block : blocks) {
|
||||||
|
if (block->get_desc() == "Conv2d") {
|
||||||
|
LOG_DEBUG("block %s", block->get_desc().c_str());
|
||||||
|
auto conv_block = (Conv2d*)block;
|
||||||
|
conv_block->enable_direct();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
std::string get_desc() {
|
std::string get_desc() {
|
||||||
return "unet";
|
return "unet";
|
||||||
}
|
}
|
||||||
|
|||||||
13
upscaler.cpp
13
upscaler.cpp
@ -9,9 +9,12 @@ struct UpscalerGGML {
|
|||||||
std::shared_ptr<ESRGAN> esrgan_upscaler;
|
std::shared_ptr<ESRGAN> esrgan_upscaler;
|
||||||
std::string esrgan_path;
|
std::string esrgan_path;
|
||||||
int n_threads;
|
int n_threads;
|
||||||
|
bool direct = false;
|
||||||
|
|
||||||
UpscalerGGML(int n_threads)
|
UpscalerGGML(int n_threads,
|
||||||
: n_threads(n_threads) {
|
bool direct = false)
|
||||||
|
: n_threads(n_threads),
|
||||||
|
direct(direct) {
|
||||||
}
|
}
|
||||||
|
|
||||||
bool load_from_file(const std::string& esrgan_path,
|
bool load_from_file(const std::string& esrgan_path,
|
||||||
@ -48,6 +51,9 @@ struct UpscalerGGML {
|
|||||||
}
|
}
|
||||||
LOG_INFO("Upscaler weight type: %s", ggml_type_name(model_data_type));
|
LOG_INFO("Upscaler weight type: %s", ggml_type_name(model_data_type));
|
||||||
esrgan_upscaler = std::make_shared<ESRGAN>(backend, offload_params_to_cpu, model_loader.tensor_storages_types);
|
esrgan_upscaler = std::make_shared<ESRGAN>(backend, offload_params_to_cpu, model_loader.tensor_storages_types);
|
||||||
|
if (direct) {
|
||||||
|
esrgan_upscaler->enable_conv2d_direct();
|
||||||
|
}
|
||||||
if (!esrgan_upscaler->load_from_file(esrgan_path)) {
|
if (!esrgan_upscaler->load_from_file(esrgan_path)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
@ -106,6 +112,7 @@ struct upscaler_ctx_t {
|
|||||||
|
|
||||||
upscaler_ctx_t* new_upscaler_ctx(const char* esrgan_path_c_str,
|
upscaler_ctx_t* new_upscaler_ctx(const char* esrgan_path_c_str,
|
||||||
bool offload_params_to_cpu,
|
bool offload_params_to_cpu,
|
||||||
|
bool direct,
|
||||||
int n_threads) {
|
int n_threads) {
|
||||||
upscaler_ctx_t* upscaler_ctx = (upscaler_ctx_t*)malloc(sizeof(upscaler_ctx_t));
|
upscaler_ctx_t* upscaler_ctx = (upscaler_ctx_t*)malloc(sizeof(upscaler_ctx_t));
|
||||||
if (upscaler_ctx == NULL) {
|
if (upscaler_ctx == NULL) {
|
||||||
@ -113,7 +120,7 @@ upscaler_ctx_t* new_upscaler_ctx(const char* esrgan_path_c_str,
|
|||||||
}
|
}
|
||||||
std::string esrgan_path(esrgan_path_c_str);
|
std::string esrgan_path(esrgan_path_c_str);
|
||||||
|
|
||||||
upscaler_ctx->upscaler = new UpscalerGGML(n_threads);
|
upscaler_ctx->upscaler = new UpscalerGGML(n_threads, direct);
|
||||||
if (upscaler_ctx->upscaler == NULL) {
|
if (upscaler_ctx->upscaler == NULL) {
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|||||||
11
vae.hpp
11
vae.hpp
@ -546,6 +546,17 @@ struct AutoEncoderKL : public VAE {
|
|||||||
ae.init(params_ctx, tensor_types, prefix);
|
ae.init(params_ctx, tensor_types, prefix);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void enable_conv2d_direct() {
|
||||||
|
std::vector<GGMLBlock*> blocks;
|
||||||
|
ae.get_all_blocks(blocks);
|
||||||
|
for (auto block : blocks) {
|
||||||
|
if (block->get_desc() == "Conv2d") {
|
||||||
|
auto conv_block = (Conv2d*)block;
|
||||||
|
conv_block->enable_direct();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
std::string get_desc() {
|
std::string get_desc() {
|
||||||
return "vae";
|
return "vae";
|
||||||
}
|
}
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user