Merge branch 'master' into qwen_image

This commit is contained in:
leejet 2025-10-12 23:20:50 +08:00
commit 2ae762356f
7 changed files with 317 additions and 108 deletions

View File

@ -286,7 +286,7 @@ usage: ./bin/sd [arguments]
arguments: arguments:
-h, --help show this help message and exit -h, --help show this help message and exit
-M, --mode [MODE] run mode, one of: [img_gen, vid_gen, convert], default: img_gen -M, --mode [MODE] run mode, one of: [img_gen, vid_gen, upscale, convert], default: img_gen
-t, --threads N number of threads to use during computation (default: -1) -t, --threads N number of threads to use during computation (default: -1)
If threads <= 0, then threads will be set to the number of CPU physical cores If threads <= 0, then threads will be set to the number of CPU physical cores
--offload-to-cpu place the weights in RAM to save VRAM, and automatically load them into VRAM when needed --offload-to-cpu place the weights in RAM to save VRAM, and automatically load them into VRAM when needed
@ -302,7 +302,7 @@ arguments:
--taesd [TAESD_PATH] path to taesd. Using Tiny AutoEncoder for fast decoding (low quality) --taesd [TAESD_PATH] path to taesd. Using Tiny AutoEncoder for fast decoding (low quality)
--control-net [CONTROL_PATH] path to control net model --control-net [CONTROL_PATH] path to control net model
--embd-dir [EMBEDDING_PATH] path to embeddings --embd-dir [EMBEDDING_PATH] path to embeddings
--upscale-model [ESRGAN_PATH] path to esrgan model. Upscale images after generate, just RealESRGAN_x4plus_anime_6B supported by now --upscale-model [ESRGAN_PATH] path to esrgan model. For img_gen mode, upscale images after generate, just RealESRGAN_x4plus_anime_6B supported by now
--upscale-repeats Run the ESRGAN upscaler this many times (default 1) --upscale-repeats Run the ESRGAN upscaler this many times (default 1)
--type [TYPE] weight type (examples: f32, f16, q4_0, q4_1, q5_0, q5_1, q8_0, q2_K, q3_K, q4_K) --type [TYPE] weight type (examples: f32, f16, q4_0, q4_1, q5_0, q5_1, q8_0, q2_K, q3_K, q4_K)
If not specified, the default is the type of the weight file If not specified, the default is the type of the weight file

View File

@ -83,39 +83,44 @@ public:
class RRDBNet : public GGMLBlock { class RRDBNet : public GGMLBlock {
protected: protected:
int scale = 4; // default RealESRGAN_x4plus_anime_6B int scale = 4;
int num_block = 6; // default RealESRGAN_x4plus_anime_6B int num_block = 23;
int num_in_ch = 3; int num_in_ch = 3;
int num_out_ch = 3; int num_out_ch = 3;
int num_feat = 64; // default RealESRGAN_x4plus_anime_6B int num_feat = 64;
int num_grow_ch = 32; // default RealESRGAN_x4plus_anime_6B int num_grow_ch = 32;
public: public:
RRDBNet() { RRDBNet(int scale, int num_block, int num_in_ch, int num_out_ch, int num_feat, int num_grow_ch)
: scale(scale), num_block(num_block), num_in_ch(num_in_ch), num_out_ch(num_out_ch), num_feat(num_feat), num_grow_ch(num_grow_ch) {
blocks["conv_first"] = std::shared_ptr<GGMLBlock>(new Conv2d(num_in_ch, num_feat, {3, 3}, {1, 1}, {1, 1})); blocks["conv_first"] = std::shared_ptr<GGMLBlock>(new Conv2d(num_in_ch, num_feat, {3, 3}, {1, 1}, {1, 1}));
for (int i = 0; i < num_block; i++) { for (int i = 0; i < num_block; i++) {
std::string name = "body." + std::to_string(i); std::string name = "body." + std::to_string(i);
blocks[name] = std::shared_ptr<GGMLBlock>(new RRDB(num_feat, num_grow_ch)); blocks[name] = std::shared_ptr<GGMLBlock>(new RRDB(num_feat, num_grow_ch));
} }
blocks["conv_body"] = std::shared_ptr<GGMLBlock>(new Conv2d(num_feat, num_feat, {3, 3}, {1, 1}, {1, 1})); blocks["conv_body"] = std::shared_ptr<GGMLBlock>(new Conv2d(num_feat, num_feat, {3, 3}, {1, 1}, {1, 1}));
// upsample if (scale >= 2) {
blocks["conv_up1"] = std::shared_ptr<GGMLBlock>(new Conv2d(num_feat, num_feat, {3, 3}, {1, 1}, {1, 1})); blocks["conv_up1"] = std::shared_ptr<GGMLBlock>(new Conv2d(num_feat, num_feat, {3, 3}, {1, 1}, {1, 1}));
}
if (scale == 4) {
blocks["conv_up2"] = std::shared_ptr<GGMLBlock>(new Conv2d(num_feat, num_feat, {3, 3}, {1, 1}, {1, 1})); blocks["conv_up2"] = std::shared_ptr<GGMLBlock>(new Conv2d(num_feat, num_feat, {3, 3}, {1, 1}, {1, 1}));
}
blocks["conv_hr"] = std::shared_ptr<GGMLBlock>(new Conv2d(num_feat, num_feat, {3, 3}, {1, 1}, {1, 1})); blocks["conv_hr"] = std::shared_ptr<GGMLBlock>(new Conv2d(num_feat, num_feat, {3, 3}, {1, 1}, {1, 1}));
blocks["conv_last"] = std::shared_ptr<GGMLBlock>(new Conv2d(num_feat, num_out_ch, {3, 3}, {1, 1}, {1, 1})); blocks["conv_last"] = std::shared_ptr<GGMLBlock>(new Conv2d(num_feat, num_out_ch, {3, 3}, {1, 1}, {1, 1}));
} }
int get_scale() { return scale; }
int get_num_block() { return num_block; }
struct ggml_tensor* lrelu(struct ggml_context* ctx, struct ggml_tensor* x) { struct ggml_tensor* lrelu(struct ggml_context* ctx, struct ggml_tensor* x) {
return ggml_leaky_relu(ctx, x, 0.2f, true); return ggml_leaky_relu(ctx, x, 0.2f, true);
} }
struct ggml_tensor* forward(struct ggml_context* ctx, struct ggml_tensor* x) { struct ggml_tensor* forward(struct ggml_context* ctx, struct ggml_tensor* x) {
// x: [n, num_in_ch, h, w] // x: [n, num_in_ch, h, w]
// return: [n, num_out_ch, h*4, w*4] // return: [n, num_out_ch, h*scale, w*scale]
auto conv_first = std::dynamic_pointer_cast<Conv2d>(blocks["conv_first"]); auto conv_first = std::dynamic_pointer_cast<Conv2d>(blocks["conv_first"]);
auto conv_body = std::dynamic_pointer_cast<Conv2d>(blocks["conv_body"]); auto conv_body = std::dynamic_pointer_cast<Conv2d>(blocks["conv_body"]);
auto conv_up1 = std::dynamic_pointer_cast<Conv2d>(blocks["conv_up1"]);
auto conv_up2 = std::dynamic_pointer_cast<Conv2d>(blocks["conv_up2"]);
auto conv_hr = std::dynamic_pointer_cast<Conv2d>(blocks["conv_hr"]); auto conv_hr = std::dynamic_pointer_cast<Conv2d>(blocks["conv_hr"]);
auto conv_last = std::dynamic_pointer_cast<Conv2d>(blocks["conv_last"]); auto conv_last = std::dynamic_pointer_cast<Conv2d>(blocks["conv_last"]);
@ -130,15 +135,22 @@ public:
body_feat = conv_body->forward(ctx, body_feat); body_feat = conv_body->forward(ctx, body_feat);
feat = ggml_add(ctx, feat, body_feat); feat = ggml_add(ctx, feat, body_feat);
// upsample // upsample
if (scale >= 2) {
auto conv_up1 = std::dynamic_pointer_cast<Conv2d>(blocks["conv_up1"]);
feat = lrelu(ctx, conv_up1->forward(ctx, ggml_upscale(ctx, feat, 2, GGML_SCALE_MODE_NEAREST))); feat = lrelu(ctx, conv_up1->forward(ctx, ggml_upscale(ctx, feat, 2, GGML_SCALE_MODE_NEAREST)));
if (scale == 4) {
auto conv_up2 = std::dynamic_pointer_cast<Conv2d>(blocks["conv_up2"]);
feat = lrelu(ctx, conv_up2->forward(ctx, ggml_upscale(ctx, feat, 2, GGML_SCALE_MODE_NEAREST))); feat = lrelu(ctx, conv_up2->forward(ctx, ggml_upscale(ctx, feat, 2, GGML_SCALE_MODE_NEAREST)));
}
}
// for all scales
auto out = conv_last->forward(ctx, lrelu(ctx, conv_hr->forward(ctx, feat))); auto out = conv_last->forward(ctx, lrelu(ctx, conv_hr->forward(ctx, feat)));
return out; return out;
} }
}; };
struct ESRGAN : public GGMLRunner { struct ESRGAN : public GGMLRunner {
RRDBNet rrdb_net; std::unique_ptr<RRDBNet> rrdb_net;
int scale = 4; int scale = 4;
int tile_size = 128; // avoid cuda OOM for 4gb VRAM int tile_size = 128; // avoid cuda OOM for 4gb VRAM
@ -146,12 +158,14 @@ struct ESRGAN : public GGMLRunner {
bool offload_params_to_cpu, bool offload_params_to_cpu,
const String2GGMLType& tensor_types = {}) const String2GGMLType& tensor_types = {})
: GGMLRunner(backend, offload_params_to_cpu) { : GGMLRunner(backend, offload_params_to_cpu) {
rrdb_net.init(params_ctx, tensor_types, ""); // rrdb_net will be created in load_from_file
} }
void enable_conv2d_direct() { void enable_conv2d_direct() {
if (!rrdb_net)
return;
std::vector<GGMLBlock*> blocks; std::vector<GGMLBlock*> blocks;
rrdb_net.get_all_blocks(blocks); rrdb_net->get_all_blocks(blocks);
for (auto block : blocks) { for (auto block : blocks) {
if (block->get_desc() == "Conv2d") { if (block->get_desc() == "Conv2d") {
auto conv_block = (Conv2d*)block; auto conv_block = (Conv2d*)block;
@ -167,31 +181,185 @@ struct ESRGAN : public GGMLRunner {
bool load_from_file(const std::string& file_path, int n_threads) { bool load_from_file(const std::string& file_path, int n_threads) {
LOG_INFO("loading esrgan from '%s'", file_path.c_str()); LOG_INFO("loading esrgan from '%s'", file_path.c_str());
alloc_params_buffer();
std::map<std::string, ggml_tensor*> esrgan_tensors;
rrdb_net.get_param_tensors(esrgan_tensors);
ModelLoader model_loader; ModelLoader model_loader;
if (!model_loader.init_from_file(file_path)) { if (!model_loader.init_from_file(file_path)) {
LOG_ERROR("init esrgan model loader from file failed: '%s'", file_path.c_str()); LOG_ERROR("init esrgan model loader from file failed: '%s'", file_path.c_str());
return false; return false;
} }
bool success = model_loader.load_tensors(esrgan_tensors, {}, n_threads); // Get tensor names
auto tensor_names = model_loader.get_tensor_names();
// Detect if it's ESRGAN format
bool is_ESRGAN = std::find(tensor_names.begin(), tensor_names.end(), "model.0.weight") != tensor_names.end();
// Detect parameters from tensor names
int detected_num_block = 0;
if (is_ESRGAN) {
for (const auto& name : tensor_names) {
if (name.find("model.1.sub.") == 0) {
size_t first_dot = name.find('.', 12);
if (first_dot != std::string::npos) {
size_t second_dot = name.find('.', first_dot + 1);
if (second_dot != std::string::npos && name.substr(first_dot + 1, 3) == "RDB") {
try {
int idx = std::stoi(name.substr(12, first_dot - 12));
detected_num_block = std::max(detected_num_block, idx + 1);
} catch (...) {
}
}
}
}
}
} else {
// Original format
for (const auto& name : tensor_names) {
if (name.find("body.") == 0) {
size_t pos = name.find('.', 5);
if (pos != std::string::npos) {
try {
int idx = std::stoi(name.substr(5, pos - 5));
detected_num_block = std::max(detected_num_block, idx + 1);
} catch (...) {
}
}
}
}
}
int detected_scale = 4; // default
if (is_ESRGAN) {
// For ESRGAN format, detect scale by highest model number
int max_model_num = 0;
for (const auto& name : tensor_names) {
if (name.find("model.") == 0) {
size_t dot_pos = name.find('.', 6);
if (dot_pos != std::string::npos) {
try {
int num = std::stoi(name.substr(6, dot_pos - 6));
max_model_num = std::max(max_model_num, num);
} catch (...) {
}
}
}
}
if (max_model_num <= 4) {
detected_scale = 1;
} else if (max_model_num <= 7) {
detected_scale = 2;
} else {
detected_scale = 4;
}
} else {
// Original format
bool has_conv_up2 = std::any_of(tensor_names.begin(), tensor_names.end(), [](const std::string& name) {
return name == "conv_up2.weight";
});
bool has_conv_up1 = std::any_of(tensor_names.begin(), tensor_names.end(), [](const std::string& name) {
return name == "conv_up1.weight";
});
if (has_conv_up2) {
detected_scale = 4;
} else if (has_conv_up1) {
detected_scale = 2;
} else {
detected_scale = 1;
}
}
int detected_num_in_ch = 3;
int detected_num_out_ch = 3;
int detected_num_feat = 64;
int detected_num_grow_ch = 32;
// Create RRDBNet with detected parameters
rrdb_net = std::make_unique<RRDBNet>(detected_scale, detected_num_block, detected_num_in_ch, detected_num_out_ch, detected_num_feat, detected_num_grow_ch);
rrdb_net->init(params_ctx, {}, "");
alloc_params_buffer();
std::map<std::string, ggml_tensor*> esrgan_tensors;
rrdb_net->get_param_tensors(esrgan_tensors);
bool success;
if (is_ESRGAN) {
// Build name mapping for ESRGAN format
std::map<std::string, std::string> expected_to_model;
expected_to_model["conv_first.weight"] = "model.0.weight";
expected_to_model["conv_first.bias"] = "model.0.bias";
for (int i = 0; i < detected_num_block; i++) {
for (int j = 1; j <= 3; j++) {
for (int k = 1; k <= 5; k++) {
std::string expected_weight = "body." + std::to_string(i) + ".rdb" + std::to_string(j) + ".conv" + std::to_string(k) + ".weight";
std::string model_weight = "model.1.sub." + std::to_string(i) + ".RDB" + std::to_string(j) + ".conv" + std::to_string(k) + ".0.weight";
expected_to_model[expected_weight] = model_weight;
std::string expected_bias = "body." + std::to_string(i) + ".rdb" + std::to_string(j) + ".conv" + std::to_string(k) + ".bias";
std::string model_bias = "model.1.sub." + std::to_string(i) + ".RDB" + std::to_string(j) + ".conv" + std::to_string(k) + ".0.bias";
expected_to_model[expected_bias] = model_bias;
}
}
}
if (detected_scale == 1) {
expected_to_model["conv_body.weight"] = "model.1.sub." + std::to_string(detected_num_block) + ".weight";
expected_to_model["conv_body.bias"] = "model.1.sub." + std::to_string(detected_num_block) + ".bias";
expected_to_model["conv_hr.weight"] = "model.2.weight";
expected_to_model["conv_hr.bias"] = "model.2.bias";
expected_to_model["conv_last.weight"] = "model.4.weight";
expected_to_model["conv_last.bias"] = "model.4.bias";
} else {
expected_to_model["conv_body.weight"] = "model.1.sub." + std::to_string(detected_num_block) + ".weight";
expected_to_model["conv_body.bias"] = "model.1.sub." + std::to_string(detected_num_block) + ".bias";
if (detected_scale >= 2) {
expected_to_model["conv_up1.weight"] = "model.3.weight";
expected_to_model["conv_up1.bias"] = "model.3.bias";
}
if (detected_scale == 4) {
expected_to_model["conv_up2.weight"] = "model.6.weight";
expected_to_model["conv_up2.bias"] = "model.6.bias";
expected_to_model["conv_hr.weight"] = "model.8.weight";
expected_to_model["conv_hr.bias"] = "model.8.bias";
expected_to_model["conv_last.weight"] = "model.10.weight";
expected_to_model["conv_last.bias"] = "model.10.bias";
} else if (detected_scale == 2) {
expected_to_model["conv_hr.weight"] = "model.5.weight";
expected_to_model["conv_hr.bias"] = "model.5.bias";
expected_to_model["conv_last.weight"] = "model.7.weight";
expected_to_model["conv_last.bias"] = "model.7.bias";
}
}
std::map<std::string, ggml_tensor*> model_tensors;
for (auto& p : esrgan_tensors) {
auto it = expected_to_model.find(p.first);
if (it != expected_to_model.end()) {
model_tensors[it->second] = p.second;
}
}
success = model_loader.load_tensors(model_tensors, {}, n_threads);
} else {
success = model_loader.load_tensors(esrgan_tensors, {}, n_threads);
}
if (!success) { if (!success) {
LOG_ERROR("load esrgan tensors from model loader failed"); LOG_ERROR("load esrgan tensors from model loader failed");
return false; return false;
} }
LOG_INFO("esrgan model loaded"); scale = rrdb_net->get_scale();
LOG_INFO("esrgan model loaded with scale=%d, num_block=%d", scale, detected_num_block);
return success; return success;
} }
struct ggml_cgraph* build_graph(struct ggml_tensor* x) { struct ggml_cgraph* build_graph(struct ggml_tensor* x) {
struct ggml_cgraph* gf = ggml_new_graph(compute_ctx); if (!rrdb_net)
return nullptr;
constexpr int kGraphNodes = 1 << 16; // 65k
struct ggml_cgraph* gf = ggml_new_graph_custom(compute_ctx, kGraphNodes, /*grads*/ false);
x = to_backend(x); x = to_backend(x);
struct ggml_tensor* out = rrdb_net.forward(compute_ctx, x); struct ggml_tensor* out = rrdb_net->forward(compute_ctx, x);
ggml_build_forward_expand(gf, out); ggml_build_forward_expand(gf, out);
return gf; return gf;
} }

View File

@ -41,13 +41,15 @@ const char* modes_str[] = {
"img_gen", "img_gen",
"vid_gen", "vid_gen",
"convert", "convert",
"upscale",
}; };
#define SD_ALL_MODES_STR "img_gen, vid_gen, convert" #define SD_ALL_MODES_STR "img_gen, vid_gen, convert, upscale"
enum SDMode { enum SDMode {
IMG_GEN, IMG_GEN,
VID_GEN, VID_GEN,
CONVERT, CONVERT,
UPSCALE,
MODE_COUNT MODE_COUNT
}; };
@ -206,7 +208,7 @@ void print_usage(int argc, const char* argv[]) {
printf("\n"); printf("\n");
printf("arguments:\n"); printf("arguments:\n");
printf(" -h, --help show this help message and exit\n"); printf(" -h, --help show this help message and exit\n");
printf(" -M, --mode [MODE] run mode, one of: [img_gen, vid_gen, convert], default: img_gen\n"); printf(" -M, --mode [MODE] run mode, one of: [img_gen, vid_gen, upscale, convert], default: img_gen\n");
printf(" -t, --threads N number of threads to use during computation (default: -1)\n"); printf(" -t, --threads N number of threads to use during computation (default: -1)\n");
printf(" If threads <= 0, then threads will be set to the number of CPU physical cores\n"); printf(" If threads <= 0, then threads will be set to the number of CPU physical cores\n");
printf(" --offload-to-cpu place the weights in RAM to save VRAM, and automatically load them into VRAM when needed\n"); printf(" --offload-to-cpu place the weights in RAM to save VRAM, and automatically load them into VRAM when needed\n");
@ -222,7 +224,7 @@ void print_usage(int argc, const char* argv[]) {
printf(" --taesd [TAESD_PATH] path to taesd. Using Tiny AutoEncoder for fast decoding (low quality)\n"); printf(" --taesd [TAESD_PATH] path to taesd. Using Tiny AutoEncoder for fast decoding (low quality)\n");
printf(" --control-net [CONTROL_PATH] path to control net model\n"); printf(" --control-net [CONTROL_PATH] path to control net model\n");
printf(" --embd-dir [EMBEDDING_PATH] path to embeddings\n"); printf(" --embd-dir [EMBEDDING_PATH] path to embeddings\n");
printf(" --upscale-model [ESRGAN_PATH] path to esrgan model. Upscale images after generate, just RealESRGAN_x4plus_anime_6B supported by now\n"); printf(" --upscale-model [ESRGAN_PATH] path to esrgan model. For img_gen mode, upscale images after generate, just RealESRGAN_x4plus_anime_6B supported by now\n");
printf(" --upscale-repeats Run the ESRGAN upscaler this many times (default 1)\n"); printf(" --upscale-repeats Run the ESRGAN upscaler this many times (default 1)\n");
printf(" --type [TYPE] weight type (examples: f32, f16, q4_0, q4_1, q5_0, q5_1, q8_0, q2_K, q3_K, q4_K)\n"); printf(" --type [TYPE] weight type (examples: f32, f16, q4_0, q4_1, q5_0, q5_1, q8_0, q2_K, q3_K, q4_K)\n");
printf(" If not specified, the default is the type of the weight file\n"); printf(" If not specified, the default is the type of the weight file\n");
@ -821,13 +823,13 @@ void parse_args(int argc, const char** argv, SDParams& params) {
params.n_threads = get_num_physical_cores(); params.n_threads = get_num_physical_cores();
} }
if (params.mode != CONVERT && params.mode != VID_GEN && params.prompt.length() == 0) { if ((params.mode == IMG_GEN || params.mode == VID_GEN) && params.prompt.length() == 0) {
fprintf(stderr, "error: the following arguments are required: prompt\n"); fprintf(stderr, "error: the following arguments are required: prompt\n");
print_usage(argc, argv); print_usage(argc, argv);
exit(1); exit(1);
} }
if (params.model_path.length() == 0 && params.diffusion_model_path.length() == 0) { if (params.mode != UPSCALE && params.model_path.length() == 0 && params.diffusion_model_path.length() == 0) {
fprintf(stderr, "error: the following arguments are required: model_path/diffusion_model\n"); fprintf(stderr, "error: the following arguments are required: model_path/diffusion_model\n");
print_usage(argc, argv); print_usage(argc, argv);
exit(1); exit(1);
@ -887,6 +889,17 @@ void parse_args(int argc, const char** argv, SDParams& params) {
exit(1); exit(1);
} }
if (params.mode == UPSCALE) {
if (params.esrgan_path.length() == 0) {
fprintf(stderr, "error: upscale mode needs an upscaler model (--upscale-model)\n");
exit(1);
}
if (params.init_image_path.length() == 0) {
fprintf(stderr, "error: upscale mode needs an init image (--init-img)\n");
exit(1);
}
}
if (params.seed < 0) { if (params.seed < 0) {
srand((int)time(NULL)); srand((int)time(NULL));
params.seed = rand(); params.seed = rand();
@ -897,14 +910,6 @@ void parse_args(int argc, const char** argv, SDParams& params) {
params.output_path = "output.gguf"; params.output_path = "output.gguf";
} }
} }
if (!isfinite(params.sample_params.guidance.img_cfg)) {
params.sample_params.guidance.img_cfg = params.sample_params.guidance.txt_cfg;
}
if (!isfinite(params.high_noise_sample_params.guidance.img_cfg)) {
params.high_noise_sample_params.guidance.img_cfg = params.high_noise_sample_params.guidance.txt_cfg;
}
} }
static std::string sd_basename(const std::string& path) { static std::string sd_basename(const std::string& path) {
@ -1357,6 +1362,21 @@ int main(int argc, const char* argv[]) {
params.flow_shift, params.flow_shift,
}; };
sd_image_t* results = nullptr;
int num_results = 0;
if (params.mode == UPSCALE) {
num_results = 1;
results = (sd_image_t*)calloc(num_results, sizeof(sd_image_t));
if (results == NULL) {
printf("failed to allocate results array\n");
release_all_resources();
return 1;
}
results[0] = init_image;
init_image.data = NULL;
} else {
sd_ctx_t* sd_ctx = new_sd_ctx(&sd_ctx_params); sd_ctx_t* sd_ctx = new_sd_ctx(&sd_ctx_params);
if (sd_ctx == NULL) { if (sd_ctx == NULL) {
@ -1369,8 +1389,6 @@ int main(int argc, const char* argv[]) {
params.sample_params.sample_method = sd_get_default_sample_method(sd_ctx); params.sample_params.sample_method = sd_get_default_sample_method(sd_ctx);
} }
sd_image_t* results;
int num_results = 1;
if (params.mode == IMG_GEN) { if (params.mode == IMG_GEN) {
sd_img_gen_params_t img_gen_params = { sd_img_gen_params_t img_gen_params = {
params.prompt.c_str(), params.prompt.c_str(),
@ -1429,6 +1447,9 @@ int main(int argc, const char* argv[]) {
return 1; return 1;
} }
free_sd_ctx(sd_ctx);
}
int upscale_factor = 4; // unused for RealESRGAN_x4plus_anime_6B.pth int upscale_factor = 4; // unused for RealESRGAN_x4plus_anime_6B.pth
if (params.esrgan_path.size() > 0 && params.upscale_repeats > 0) { if (params.esrgan_path.size() > 0 && params.upscale_repeats > 0) {
upscaler_ctx_t* upscaler_ctx = new_upscaler_ctx(params.esrgan_path.c_str(), upscaler_ctx_t* upscaler_ctx = new_upscaler_ctx(params.esrgan_path.c_str(),
@ -1439,7 +1460,7 @@ int main(int argc, const char* argv[]) {
if (upscaler_ctx == NULL) { if (upscaler_ctx == NULL) {
printf("new_upscaler_ctx failed\n"); printf("new_upscaler_ctx failed\n");
} else { } else {
for (int i = 0; i < params.batch_count; i++) { for (int i = 0; i < num_results; i++) {
if (results[i].data == NULL) { if (results[i].data == NULL) {
continue; continue;
} }
@ -1525,7 +1546,6 @@ int main(int argc, const char* argv[]) {
results[i].data = NULL; results[i].data = NULL;
} }
free(results); free(results);
free_sd_ctx(sd_ctx);
release_all_resources(); release_all_resources();

View File

@ -269,6 +269,14 @@ public:
std::set<std::string> ignore_tensors = {}, std::set<std::string> ignore_tensors = {},
int n_threads = 0); int n_threads = 0);
std::vector<std::string> get_tensor_names() const {
std::vector<std::string> names;
for (const auto& ts : tensor_storages) {
names.push_back(ts.name);
}
return names;
}
bool save_to_gguf_file(const std::string& file_path, ggml_type type, const std::string& tensor_type_rules); bool save_to_gguf_file(const std::string& file_path, ggml_type type, const std::string& tensor_type_rules);
bool tensor_should_be_converted(const TensorStorage& tensor_storage, ggml_type type); bool tensor_should_be_converted(const TensorStorage& tensor_storage, ggml_type type);
int64_t get_params_mem_size(ggml_backend_t backend, ggml_type type = GGML_TYPE_COUNT); int64_t get_params_mem_size(ggml_backend_t backend, ggml_type type = GGML_TYPE_COUNT);

View File

@ -1096,7 +1096,7 @@ public:
std::vector<int> skip_layers(guidance.slg.layers, guidance.slg.layers + guidance.slg.layer_count); std::vector<int> skip_layers(guidance.slg.layers, guidance.slg.layers + guidance.slg.layer_count);
float cfg_scale = guidance.txt_cfg; float cfg_scale = guidance.txt_cfg;
float img_cfg_scale = guidance.img_cfg; float img_cfg_scale = isfinite(guidance.img_cfg) ? guidance.img_cfg : guidance.txt_cfg;
float slg_scale = guidance.slg.scale; float slg_scale = guidance.slg.scale;
if (img_cfg_scale != cfg_scale && !sd_version_is_inpaint_or_unet_edit(version)) { if (img_cfg_scale != cfg_scale && !sd_version_is_inpaint_or_unet_edit(version)) {
@ -1835,7 +1835,9 @@ char* sd_sample_params_to_str(const sd_sample_params_t* sample_params) {
"eta: %.2f, " "eta: %.2f, "
"shifted_timestep: %d)", "shifted_timestep: %d)",
sample_params->guidance.txt_cfg, sample_params->guidance.txt_cfg,
sample_params->guidance.img_cfg, isfinite(sample_params->guidance.img_cfg)
? sample_params->guidance.img_cfg
: sample_params->guidance.txt_cfg,
sample_params->guidance.distilled_guidance, sample_params->guidance.distilled_guidance,
sample_params->guidance.slg.layer_count, sample_params->guidance.slg.layer_count,
sample_params->guidance.slg.layer_start, sample_params->guidance.slg.layer_start,
@ -1996,7 +1998,9 @@ sd_image_t* generate_image_internal(sd_ctx_t* sd_ctx,
seed = rand(); seed = rand();
} }
print_ggml_tensor(init_latent, true, "init"); if (!isfinite(guidance.img_cfg)) {
guidance.img_cfg = guidance.txt_cfg;
}
// for (auto v : sigmas) { // for (auto v : sigmas) {
// std::cout << v << " "; // std::cout << v << " ";

View File

@ -284,6 +284,8 @@ SD_API sd_image_t upscale(upscaler_ctx_t* upscaler_ctx,
sd_image_t input_image, sd_image_t input_image,
uint32_t upscale_factor); uint32_t upscale_factor);
SD_API int get_upscale_factor(upscaler_ctx_t* upscaler_ctx);
SD_API bool convert(const char* input_path, SD_API bool convert(const char* input_path,
const char* vae_path, const char* vae_path,
const char* output_path, const char* output_path,

View File

@ -138,6 +138,13 @@ sd_image_t upscale(upscaler_ctx_t* upscaler_ctx, sd_image_t input_image, uint32_
return upscaler_ctx->upscaler->upscale(input_image, upscale_factor); return upscaler_ctx->upscaler->upscale(input_image, upscale_factor);
} }
int get_upscale_factor(upscaler_ctx_t* upscaler_ctx) {
if (upscaler_ctx == NULL || upscaler_ctx->upscaler == NULL || upscaler_ctx->upscaler->esrgan_upscaler == NULL) {
return 1;
}
return upscaler_ctx->upscaler->esrgan_upscaler->scale;
}
void free_upscaler_ctx(upscaler_ctx_t* upscaler_ctx) { void free_upscaler_ctx(upscaler_ctx_t* upscaler_ctx) {
if (upscaler_ctx->upscaler != NULL) { if (upscaler_ctx->upscaler != NULL) {
delete upscaler_ctx->upscaler; delete upscaler_ctx->upscaler;