mirror of
https://github.com/leejet/stable-diffusion.cpp.git
synced 2026-05-08 08:18:51 +00:00
feat: add sd-webui style Hires. fix support
This commit is contained in:
parent
44cca3d626
commit
f709e0e189
@ -278,7 +278,9 @@ void parse_args(int argc, const char** argv, SDCliParams& cli_params, SDContextP
|
|||||||
bool valid = cli_params.resolve_and_validate();
|
bool valid = cli_params.resolve_and_validate();
|
||||||
if (valid && cli_params.mode != METADATA) {
|
if (valid && cli_params.mode != METADATA) {
|
||||||
valid = ctx_params.resolve_and_validate(cli_params.mode) &&
|
valid = ctx_params.resolve_and_validate(cli_params.mode) &&
|
||||||
gen_params.resolve_and_validate(cli_params.mode, ctx_params.lora_model_dir);
|
gen_params.resolve_and_validate(cli_params.mode,
|
||||||
|
ctx_params.lora_model_dir,
|
||||||
|
ctx_params.hires_upscalers_dir);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!valid) {
|
if (!valid) {
|
||||||
@ -688,6 +690,10 @@ int main(int argc, const char* argv[]) {
|
|||||||
vae_decode_only = false;
|
vae_decode_only = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (gen_params.hires_enabled && !gen_params.hires_upscaler_model_path.empty()) {
|
||||||
|
vae_decode_only = false;
|
||||||
|
}
|
||||||
|
|
||||||
sd_ctx_params_t sd_ctx_params = ctx_params.to_sd_ctx_params_t(vae_decode_only, true, cli_params.taesd_preview);
|
sd_ctx_params_t sd_ctx_params = ctx_params.to_sd_ctx_params_t(vae_decode_only, true, cli_params.taesd_preview);
|
||||||
|
|
||||||
SDImageVec results;
|
SDImageVec results;
|
||||||
|
|||||||
@ -351,7 +351,10 @@ ArgOptions SDContextParams::get_options() {
|
|||||||
"--lora-model-dir",
|
"--lora-model-dir",
|
||||||
"lora model directory",
|
"lora model directory",
|
||||||
&lora_model_dir},
|
&lora_model_dir},
|
||||||
|
{"",
|
||||||
|
"--hires-upscalers-dir",
|
||||||
|
"highres fix upscaler model directory",
|
||||||
|
&hires_upscalers_dir},
|
||||||
{"",
|
{"",
|
||||||
"--tensor-type-rules",
|
"--tensor-type-rules",
|
||||||
"weight type per tensor pattern (example: \"^vae\\.=f16,model\\.=q8_0\")",
|
"weight type per tensor pattern (example: \"^vae\\.=f16,model\\.=q8_0\")",
|
||||||
@ -649,6 +652,7 @@ std::string SDContextParams::to_string() const {
|
|||||||
<< " wtype: " << sd_type_name(wtype) << ",\n"
|
<< " wtype: " << sd_type_name(wtype) << ",\n"
|
||||||
<< " tensor_type_rules: \"" << tensor_type_rules << "\",\n"
|
<< " tensor_type_rules: \"" << tensor_type_rules << "\",\n"
|
||||||
<< " lora_model_dir: \"" << lora_model_dir << "\",\n"
|
<< " lora_model_dir: \"" << lora_model_dir << "\",\n"
|
||||||
|
<< " hires_upscalers_dir: \"" << hires_upscalers_dir << "\",\n"
|
||||||
<< " photo_maker_path: \"" << photo_maker_path << "\",\n"
|
<< " photo_maker_path: \"" << photo_maker_path << "\",\n"
|
||||||
<< " rng_type: " << sd_rng_type_name(rng_type) << ",\n"
|
<< " rng_type: " << sd_rng_type_name(rng_type) << ",\n"
|
||||||
<< " sampler_rng_type: " << sd_rng_type_name(sampler_rng_type) << ",\n"
|
<< " sampler_rng_type: " << sd_rng_type_name(sampler_rng_type) << ",\n"
|
||||||
@ -777,6 +781,10 @@ ArgOptions SDGenerationParams::get_options() {
|
|||||||
"--pm-id-embed-path",
|
"--pm-id-embed-path",
|
||||||
"path to PHOTOMAKER v2 id embed",
|
"path to PHOTOMAKER v2 id embed",
|
||||||
&pm_id_embed_path},
|
&pm_id_embed_path},
|
||||||
|
{"",
|
||||||
|
"--hires-upscaler",
|
||||||
|
"highres fix upscaler, Latent (nearest) or a model name/path under --hires-upscalers-dir (default: Latent (nearest))",
|
||||||
|
&hires_upscaler},
|
||||||
};
|
};
|
||||||
|
|
||||||
options.int_options = {
|
options.int_options = {
|
||||||
@ -826,6 +834,22 @@ ArgOptions SDGenerationParams::get_options() {
|
|||||||
"--upscale-tile-size",
|
"--upscale-tile-size",
|
||||||
"tile size for ESRGAN upscaling (default: 128)",
|
"tile size for ESRGAN upscaling (default: 128)",
|
||||||
&upscale_tile_size},
|
&upscale_tile_size},
|
||||||
|
{"",
|
||||||
|
"--hires-width",
|
||||||
|
"highres fix target width, 0 to use --hires-scale (default: 0)",
|
||||||
|
&hires_width},
|
||||||
|
{"",
|
||||||
|
"--hires-height",
|
||||||
|
"highres fix target height, 0 to use --hires-scale (default: 0)",
|
||||||
|
&hires_height},
|
||||||
|
{"",
|
||||||
|
"--hires-steps",
|
||||||
|
"highres fix second pass sample steps, 0 to reuse --steps (default: 0)",
|
||||||
|
&hires_steps},
|
||||||
|
{"",
|
||||||
|
"--hires-upscale-tile-size",
|
||||||
|
"highres fix upscaler tile size, reserved for model-backed upscalers (default: 128)",
|
||||||
|
&hires_upscale_tile_size},
|
||||||
};
|
};
|
||||||
|
|
||||||
options.float_options = {
|
options.float_options = {
|
||||||
@ -913,6 +937,14 @@ ArgOptions SDGenerationParams::get_options() {
|
|||||||
"--vae-tile-overlap",
|
"--vae-tile-overlap",
|
||||||
"tile overlap for vae tiling, in fraction of tile size (default: 0.5)",
|
"tile overlap for vae tiling, in fraction of tile size (default: 0.5)",
|
||||||
&vae_tiling_params.target_overlap},
|
&vae_tiling_params.target_overlap},
|
||||||
|
{"",
|
||||||
|
"--hires-scale",
|
||||||
|
"highres fix scale when target size is not set (default: 2.0)",
|
||||||
|
&hires_scale},
|
||||||
|
{"",
|
||||||
|
"--hires-denoising-strength",
|
||||||
|
"highres fix second pass denoising strength (default: 0.7)",
|
||||||
|
&hires_denoising_strength},
|
||||||
};
|
};
|
||||||
|
|
||||||
options.bool_options = {
|
options.bool_options = {
|
||||||
@ -936,6 +968,11 @@ ArgOptions SDGenerationParams::get_options() {
|
|||||||
"process vae in tiles to reduce memory usage",
|
"process vae in tiles to reduce memory usage",
|
||||||
true,
|
true,
|
||||||
&vae_tiling_params.enabled},
|
&vae_tiling_params.enabled},
|
||||||
|
{"",
|
||||||
|
"--hires",
|
||||||
|
"enable highres fix",
|
||||||
|
true,
|
||||||
|
&hires_enabled},
|
||||||
};
|
};
|
||||||
|
|
||||||
auto on_seed_arg = [&](int argc, const char** argv, int index) {
|
auto on_seed_arg = [&](int argc, const char** argv, int index) {
|
||||||
@ -1424,6 +1461,37 @@ static bool parse_lora_json_field(const json& parent,
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool resolve_model_file_from_dir(const std::string& model_name,
|
||||||
|
const std::string& model_dir,
|
||||||
|
const std::vector<std::string>& valid_ext,
|
||||||
|
const char* label,
|
||||||
|
std::string& resolved_path) {
|
||||||
|
if (model_dir.empty()) {
|
||||||
|
LOG_ERROR("%s directory is empty", label);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (model_name.empty() ||
|
||||||
|
model_name.find('/') != std::string::npos ||
|
||||||
|
model_name.find('\\') != std::string::npos ||
|
||||||
|
fs::path(model_name).has_root_path() ||
|
||||||
|
fs::path(model_name).has_extension()) {
|
||||||
|
LOG_ERROR("%s must be a model name without path or extension: %s", label, model_name.c_str());
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
fs::path model_dir_path = model_dir;
|
||||||
|
for (const auto& ext : valid_ext) {
|
||||||
|
fs::path try_path = model_dir_path / (model_name + ext);
|
||||||
|
if (fs::exists(try_path) && fs::is_regular_file(try_path)) {
|
||||||
|
resolved_path = try_path.lexically_normal().string();
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
LOG_ERROR("can not find %s %s in %s", label, model_name.c_str(), model_dir_path.lexically_normal().string().c_str());
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
bool SDGenerationParams::from_json_str(
|
bool SDGenerationParams::from_json_str(
|
||||||
const std::string& json_str,
|
const std::string& json_str,
|
||||||
const std::function<std::string(const std::string&)>& lora_path_resolver) {
|
const std::function<std::string(const std::string&)>& lora_path_resolver) {
|
||||||
@ -1487,6 +1555,34 @@ bool SDGenerationParams::from_json_str(
|
|||||||
load_if_exists("increase_ref_index", increase_ref_index);
|
load_if_exists("increase_ref_index", increase_ref_index);
|
||||||
load_if_exists("embed_image_metadata", embed_image_metadata);
|
load_if_exists("embed_image_metadata", embed_image_metadata);
|
||||||
|
|
||||||
|
if (j.contains("hires") && j["hires"].is_object()) {
|
||||||
|
const json& hires_json = j["hires"];
|
||||||
|
if (hires_json.contains("enabled") && hires_json["enabled"].is_boolean()) {
|
||||||
|
hires_enabled = hires_json["enabled"];
|
||||||
|
}
|
||||||
|
if (hires_json.contains("upscaler") && hires_json["upscaler"].is_string()) {
|
||||||
|
hires_upscaler = hires_json["upscaler"];
|
||||||
|
}
|
||||||
|
if (hires_json.contains("scale") && hires_json["scale"].is_number()) {
|
||||||
|
hires_scale = hires_json["scale"];
|
||||||
|
}
|
||||||
|
if (hires_json.contains("target_width") && hires_json["target_width"].is_number_integer()) {
|
||||||
|
hires_width = hires_json["target_width"];
|
||||||
|
}
|
||||||
|
if (hires_json.contains("target_height") && hires_json["target_height"].is_number_integer()) {
|
||||||
|
hires_height = hires_json["target_height"];
|
||||||
|
}
|
||||||
|
if (hires_json.contains("steps") && hires_json["steps"].is_number_integer()) {
|
||||||
|
hires_steps = hires_json["steps"];
|
||||||
|
}
|
||||||
|
if (hires_json.contains("denoising_strength") && hires_json["denoising_strength"].is_number()) {
|
||||||
|
hires_denoising_strength = hires_json["denoising_strength"];
|
||||||
|
}
|
||||||
|
if (hires_json.contains("upscale_tile_size") && hires_json["upscale_tile_size"].is_number_integer()) {
|
||||||
|
hires_upscale_tile_size = hires_json["upscale_tile_size"];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
auto parse_sample_params_json = [&](const json& sample_json,
|
auto parse_sample_params_json = [&](const json& sample_json,
|
||||||
sd_sample_params_t& target_params,
|
sd_sample_params_t& target_params,
|
||||||
std::vector<int>& target_skip_layers,
|
std::vector<int>& target_skip_layers,
|
||||||
@ -1800,7 +1896,7 @@ bool SDGenerationParams::initialize_cache_params() {
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool SDGenerationParams::resolve(const std::string& lora_model_dir, bool strict) {
|
bool SDGenerationParams::resolve(const std::string& lora_model_dir, const std::string& hires_upscalers_dir, bool strict) {
|
||||||
if (high_noise_sample_params.sample_steps <= 0) {
|
if (high_noise_sample_params.sample_steps <= 0) {
|
||||||
high_noise_sample_params.sample_steps = -1;
|
high_noise_sample_params.sample_steps = -1;
|
||||||
}
|
}
|
||||||
@ -1819,6 +1915,27 @@ bool SDGenerationParams::resolve(const std::string& lora_model_dir, bool strict)
|
|||||||
sample_params.sample_steps = std::clamp(sample_params.sample_steps, 1, 100);
|
sample_params.sample_steps = std::clamp(sample_params.sample_steps, 1, 100);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
hires_upscaler_model_path.clear();
|
||||||
|
if (hires_enabled) {
|
||||||
|
if (hires_upscaler.empty()) {
|
||||||
|
hires_upscaler = "Latent (nearest)";
|
||||||
|
}
|
||||||
|
resolved_hires_upscaler = str_to_sd_hires_upscaler(hires_upscaler.c_str());
|
||||||
|
if (resolved_hires_upscaler == SD_HIRES_UPSCALER_NONE) {
|
||||||
|
hires_enabled = false;
|
||||||
|
} else if (resolved_hires_upscaler == SD_HIRES_UPSCALER_COUNT) {
|
||||||
|
static const std::vector<std::string> valid_ext = {".gguf", ".safetensors", ".pt", ".pth"};
|
||||||
|
if (!resolve_model_file_from_dir(hires_upscaler,
|
||||||
|
hires_upscalers_dir,
|
||||||
|
valid_ext,
|
||||||
|
"hires upscaler",
|
||||||
|
hires_upscaler_model_path)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
resolved_hires_upscaler = SD_HIRES_UPSCALER_MODEL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
prompt_with_lora = prompt;
|
prompt_with_lora = prompt;
|
||||||
if (!lora_model_dir.empty()) {
|
if (!lora_model_dir.empty()) {
|
||||||
extract_and_remove_lora(lora_model_dir);
|
extract_and_remove_lora(lora_model_dir);
|
||||||
@ -1883,6 +2000,29 @@ bool SDGenerationParams::validate(SDMode mode) {
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (hires_enabled) {
|
||||||
|
if (hires_width < 0 || hires_height < 0) {
|
||||||
|
LOG_ERROR("error: hires target width and height must be >= 0");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (hires_scale <= 0.f && hires_width <= 0 && hires_height <= 0) {
|
||||||
|
LOG_ERROR("error: hires scale must be positive when target size is not set");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (hires_steps < 0) {
|
||||||
|
LOG_ERROR("error: hires steps must be >= 0");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (hires_denoising_strength <= 0.f || hires_denoising_strength > 1.f) {
|
||||||
|
LOG_ERROR("error: hires denoising strength must be in (0.0, 1.0]");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (hires_upscale_tile_size < 1) {
|
||||||
|
LOG_ERROR("error: hires upscale tile size must be positive");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (mode == UPSCALE) {
|
if (mode == UPSCALE) {
|
||||||
if (init_image_path.length() == 0) {
|
if (init_image_path.length() == 0) {
|
||||||
LOG_ERROR("error: upscale mode needs an init image (--init-img)\n");
|
LOG_ERROR("error: upscale mode needs an init image (--init-img)\n");
|
||||||
@ -1893,8 +2033,11 @@ bool SDGenerationParams::validate(SDMode mode) {
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool SDGenerationParams::resolve_and_validate(SDMode mode, const std::string& lora_model_dir, bool strict) {
|
bool SDGenerationParams::resolve_and_validate(SDMode mode,
|
||||||
if (!resolve(lora_model_dir, strict)) {
|
const std::string& lora_model_dir,
|
||||||
|
const std::string& hires_upscalers_dir,
|
||||||
|
bool strict) {
|
||||||
|
if (!resolve(lora_model_dir, hires_upscalers_dir, strict)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (!validate(mode)) {
|
if (!validate(mode)) {
|
||||||
@ -1965,6 +2108,16 @@ sd_img_gen_params_t SDGenerationParams::to_sd_img_gen_params_t() {
|
|||||||
params.pm_params = pm_params;
|
params.pm_params = pm_params;
|
||||||
params.vae_tiling_params = vae_tiling_params;
|
params.vae_tiling_params = vae_tiling_params;
|
||||||
params.cache = cache_params;
|
params.cache = cache_params;
|
||||||
|
|
||||||
|
params.hires.enabled = hires_enabled;
|
||||||
|
params.hires.upscaler = resolved_hires_upscaler;
|
||||||
|
params.hires.model_path = hires_upscaler_model_path.empty() ? nullptr : hires_upscaler_model_path.c_str();
|
||||||
|
params.hires.scale = hires_scale;
|
||||||
|
params.hires.target_width = hires_width;
|
||||||
|
params.hires.target_height = hires_height;
|
||||||
|
params.hires.steps = hires_steps;
|
||||||
|
params.hires.denoising_strength = hires_denoising_strength;
|
||||||
|
params.hires.upscale_tile_size = hires_upscale_tile_size;
|
||||||
return params;
|
return params;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2089,6 +2242,15 @@ std::string SDGenerationParams::to_string() const {
|
|||||||
<< " seed: " << seed << ",\n"
|
<< " seed: " << seed << ",\n"
|
||||||
<< " upscale_repeats: " << upscale_repeats << ",\n"
|
<< " upscale_repeats: " << upscale_repeats << ",\n"
|
||||||
<< " upscale_tile_size: " << upscale_tile_size << ",\n"
|
<< " upscale_tile_size: " << upscale_tile_size << ",\n"
|
||||||
|
<< " hires: { enabled: " << (hires_enabled ? "true" : "false")
|
||||||
|
<< ", upscaler: \"" << hires_upscaler << "\""
|
||||||
|
<< ", model_path: \"" << hires_upscaler_model_path << "\""
|
||||||
|
<< ", scale: " << hires_scale
|
||||||
|
<< ", target_width: " << hires_width
|
||||||
|
<< ", target_height: " << hires_height
|
||||||
|
<< ", steps: " << hires_steps
|
||||||
|
<< ", denoising_strength: " << hires_denoising_strength
|
||||||
|
<< ", upscale_tile_size: " << hires_upscale_tile_size << " },\n"
|
||||||
<< " vae_tiling_params: { "
|
<< " vae_tiling_params: { "
|
||||||
<< vae_tiling_params.enabled << ", "
|
<< vae_tiling_params.enabled << ", "
|
||||||
<< vae_tiling_params.tile_size_x << ", "
|
<< vae_tiling_params.tile_size_x << ", "
|
||||||
@ -2162,6 +2324,13 @@ std::string get_image_params(const SDContextParams& ctx_params, const SDGenerati
|
|||||||
if (gen_params.clip_skip != -1) {
|
if (gen_params.clip_skip != -1) {
|
||||||
parameter_string += "Clip skip: " + std::to_string(gen_params.clip_skip) + ", ";
|
parameter_string += "Clip skip: " + std::to_string(gen_params.clip_skip) + ", ";
|
||||||
}
|
}
|
||||||
|
if (gen_params.hires_enabled) {
|
||||||
|
parameter_string += "Hires upscale: " + gen_params.hires_upscaler + ", ";
|
||||||
|
parameter_string += "Hires scale: " + std::to_string(gen_params.hires_scale) + ", ";
|
||||||
|
parameter_string += "Hires resize: " + std::to_string(gen_params.hires_width) + "x" + std::to_string(gen_params.hires_height) + ", ";
|
||||||
|
parameter_string += "Hires steps: " + std::to_string(gen_params.hires_steps) + ", ";
|
||||||
|
parameter_string += "Denoising strength: " + std::to_string(gen_params.hires_denoising_strength) + ", ";
|
||||||
|
}
|
||||||
parameter_string += "Version: stable-diffusion.cpp";
|
parameter_string += "Version: stable-diffusion.cpp";
|
||||||
return parameter_string;
|
return parameter_string;
|
||||||
}
|
}
|
||||||
|
|||||||
@ -101,6 +101,7 @@ struct SDContextParams {
|
|||||||
sd_type_t wtype = SD_TYPE_COUNT;
|
sd_type_t wtype = SD_TYPE_COUNT;
|
||||||
std::string tensor_type_rules;
|
std::string tensor_type_rules;
|
||||||
std::string lora_model_dir = ".";
|
std::string lora_model_dir = ".";
|
||||||
|
std::string hires_upscalers_dir;
|
||||||
|
|
||||||
std::map<std::string, std::string> embedding_map;
|
std::map<std::string, std::string> embedding_map;
|
||||||
std::vector<sd_embedding_t> embedding_vec;
|
std::vector<sd_embedding_t> embedding_vec;
|
||||||
@ -190,12 +191,23 @@ struct SDGenerationParams {
|
|||||||
int upscale_repeats = 1;
|
int upscale_repeats = 1;
|
||||||
int upscale_tile_size = 128;
|
int upscale_tile_size = 128;
|
||||||
|
|
||||||
|
bool hires_enabled = false;
|
||||||
|
std::string hires_upscaler = "Latent (nearest)";
|
||||||
|
std::string hires_upscaler_model_path;
|
||||||
|
float hires_scale = 2.f;
|
||||||
|
int hires_width = 0;
|
||||||
|
int hires_height = 0;
|
||||||
|
int hires_steps = 0;
|
||||||
|
float hires_denoising_strength = 0.7f;
|
||||||
|
int hires_upscale_tile_size = 128;
|
||||||
|
|
||||||
std::map<std::string, float> lora_map;
|
std::map<std::string, float> lora_map;
|
||||||
std::map<std::string, float> high_noise_lora_map;
|
std::map<std::string, float> high_noise_lora_map;
|
||||||
|
|
||||||
// Derived and normalized fields.
|
// Derived and normalized fields.
|
||||||
std::string prompt_with_lora; // for metadata record only
|
std::string prompt_with_lora; // for metadata record only
|
||||||
std::vector<sd_lora_t> lora_vec;
|
std::vector<sd_lora_t> lora_vec;
|
||||||
|
sd_hires_upscaler_t resolved_hires_upscaler;
|
||||||
|
|
||||||
// Owned execution payload.
|
// Owned execution payload.
|
||||||
SDImageOwner init_image;
|
SDImageOwner init_image;
|
||||||
@ -225,9 +237,12 @@ struct SDGenerationParams {
|
|||||||
void set_width_and_height_if_unset(int w, int h);
|
void set_width_and_height_if_unset(int w, int h);
|
||||||
int get_resolved_width() const;
|
int get_resolved_width() const;
|
||||||
int get_resolved_height() const;
|
int get_resolved_height() const;
|
||||||
bool resolve(const std::string& lora_model_dir, bool strict = false);
|
bool resolve(const std::string& lora_model_dir, const std::string& hires_upscalers_dir, bool strict = false);
|
||||||
bool validate(SDMode mode);
|
bool validate(SDMode mode);
|
||||||
bool resolve_and_validate(SDMode mode, const std::string& lora_model_dir, bool strict = false);
|
bool resolve_and_validate(SDMode mode,
|
||||||
|
const std::string& lora_model_dir,
|
||||||
|
const std::string& hires_upscalers_dir,
|
||||||
|
bool strict = false);
|
||||||
sd_img_gen_params_t to_sd_img_gen_params_t();
|
sd_img_gen_params_t to_sd_img_gen_params_t();
|
||||||
sd_vid_gen_params_t to_sd_vid_gen_params_t();
|
sd_vid_gen_params_t to_sd_vid_gen_params_t();
|
||||||
std::string to_string() const;
|
std::string to_string() const;
|
||||||
|
|||||||
@ -38,6 +38,8 @@ Current generation-related endpoints include:
|
|||||||
- `POST /sdapi/v1/txt2img`
|
- `POST /sdapi/v1/txt2img`
|
||||||
- `POST /sdapi/v1/img2img`
|
- `POST /sdapi/v1/img2img`
|
||||||
- `GET /sdapi/v1/loras`
|
- `GET /sdapi/v1/loras`
|
||||||
|
- `GET /sdapi/v1/upscalers`
|
||||||
|
- `GET /sdapi/v1/latent-upscale-modes`
|
||||||
- `GET /sdapi/v1/samplers`
|
- `GET /sdapi/v1/samplers`
|
||||||
- `GET /sdapi/v1/schedulers`
|
- `GET /sdapi/v1/schedulers`
|
||||||
- `GET /sdapi/v1/sd-models`
|
- `GET /sdapi/v1/sd-models`
|
||||||
@ -216,6 +218,13 @@ Currently supported request fields:
|
|||||||
| `scheduler` | `string` | Scheduler name |
|
| `scheduler` | `string` | Scheduler name |
|
||||||
| `lora` | `array<object>` | Structured LoRA list |
|
| `lora` | `array<object>` | Structured LoRA list |
|
||||||
| `extra_images` | `array<string>` | Base64 or data URL images |
|
| `extra_images` | `array<string>` | Base64 or data URL images |
|
||||||
|
| `enable_hr` | `boolean` | Enable highres fix for `txt2img` |
|
||||||
|
| `hr_upscaler` | `string` | `Latent (nearest)` or an upscaler model name from `/sdapi/v1/upscalers` |
|
||||||
|
| `hr_scale` | `number` | Highres scale when resize target is not set |
|
||||||
|
| `hr_resize_x` | `integer` | Highres target width, `0` to use scale |
|
||||||
|
| `hr_resize_y` | `integer` | Highres target height, `0` to use scale |
|
||||||
|
| `hr_steps` | `integer` | Highres second-pass sample steps, `0` to reuse `steps` |
|
||||||
|
| `denoising_strength` | `number` | Highres denoising strength for `txt2img` |
|
||||||
|
|
||||||
Native extension fields:
|
Native extension fields:
|
||||||
|
|
||||||
@ -241,6 +250,8 @@ Currently supported request fields:
|
|||||||
| `inpainting_mask_invert` | `integer` or `boolean` | Treated as invert flag |
|
| `inpainting_mask_invert` | `integer` or `boolean` | Treated as invert flag |
|
||||||
| `denoising_strength` | `number` | Clamped to `0.0..1.0` |
|
| `denoising_strength` | `number` | Clamped to `0.0..1.0` |
|
||||||
|
|
||||||
|
Highres fix fields are currently handled for `txt2img`; `img2img` uses `denoising_strength` as image-to-image strength.
|
||||||
|
|
||||||
Native extension fields:
|
Native extension fields:
|
||||||
|
|
||||||
- any `sdcpp API` fields embedded through `sd_cpp_extra_args` inside `prompt`
|
- any `sdcpp API` fields embedded through `sd_cpp_extra_args` inside `prompt`
|
||||||
@ -258,6 +269,8 @@ Response fields:
|
|||||||
Currently exposed:
|
Currently exposed:
|
||||||
|
|
||||||
- `GET /sdapi/v1/loras`
|
- `GET /sdapi/v1/loras`
|
||||||
|
- `GET /sdapi/v1/upscalers`
|
||||||
|
- `GET /sdapi/v1/latent-upscale-modes`
|
||||||
- `GET /sdapi/v1/samplers`
|
- `GET /sdapi/v1/samplers`
|
||||||
- `GET /sdapi/v1/schedulers`
|
- `GET /sdapi/v1/schedulers`
|
||||||
- `GET /sdapi/v1/sd-models`
|
- `GET /sdapi/v1/sd-models`
|
||||||
@ -272,6 +285,24 @@ Response fields:
|
|||||||
| `[].name` | `string` | Display name derived from file stem |
|
| `[].name` | `string` | Display name derived from file stem |
|
||||||
| `[].path` | `string` | Relative path under the configured LoRA directory |
|
| `[].path` | `string` | Relative path under the configured LoRA directory |
|
||||||
|
|
||||||
|
`GET /sdapi/v1/upscalers`
|
||||||
|
|
||||||
|
| Field | Type | Notes |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| `[].name` | `string` | Built-in name or model stem |
|
||||||
|
| `[].model_name` | `string \| null` | Model family label for model-backed upscalers |
|
||||||
|
| `[].model_path` | `string \| null` | Absolute model path for model-backed upscalers |
|
||||||
|
| `[].model_url` | `string \| null` | Currently always null |
|
||||||
|
| `[].scale` | `integer` | Currently `4` |
|
||||||
|
|
||||||
|
Built-in entries include `None`, `Lanczos`, and `Nearest`. Model-backed entries are scanned from the top level of `--hires-upscalers-dir`; subdirectories are not scanned.
|
||||||
|
|
||||||
|
`GET /sdapi/v1/latent-upscale-modes`
|
||||||
|
|
||||||
|
| Field | Type | Notes |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| `[].name` | `string` | WebUI-compatible latent upscale mode name |
|
||||||
|
|
||||||
`GET /sdapi/v1/samplers`
|
`GET /sdapi/v1/samplers`
|
||||||
|
|
||||||
| Field | Type | Notes |
|
| Field | Type | Notes |
|
||||||
@ -388,6 +419,7 @@ Top-level fields:
|
|||||||
| `samplers` | `array<string>` | Available sampling methods |
|
| `samplers` | `array<string>` | Available sampling methods |
|
||||||
| `schedulers` | `array<string>` | Available schedulers |
|
| `schedulers` | `array<string>` | Available schedulers |
|
||||||
| `loras` | `array<object>` | Available LoRA entries |
|
| `loras` | `array<object>` | Available LoRA entries |
|
||||||
|
| `upscalers` | `array<object>` | Available model-backed highres upscalers |
|
||||||
| `limits` | `object` | Shared queue and size limits |
|
| `limits` | `object` | Shared queue and size limits |
|
||||||
|
|
||||||
`model`
|
`model`
|
||||||
@ -424,6 +456,14 @@ Shared nested fields:
|
|||||||
| `loras[].name` | `string` |
|
| `loras[].name` | `string` |
|
||||||
| `loras[].path` | `string` |
|
| `loras[].path` | `string` |
|
||||||
|
|
||||||
|
`upscalers`
|
||||||
|
|
||||||
|
| Field | Type | Notes |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| `upscalers[].name` | `string` | Built-in name or model stem; use this value in `hires.upscaler` |
|
||||||
|
|
||||||
|
Built-in entries include `None` and `Latent (nearest)`. Model-backed entries are scanned from the top level of `--hires-upscalers-dir`; subdirectories are not scanned.
|
||||||
|
|
||||||
`limits`
|
`limits`
|
||||||
|
|
||||||
| Field | Type |
|
| Field | Type |
|
||||||
@ -482,6 +522,15 @@ Shared default fields used by both `img_gen` and `vid_gen`:
|
|||||||
| `auto_resize_ref_image` | `boolean` |
|
| `auto_resize_ref_image` | `boolean` |
|
||||||
| `increase_ref_index` | `boolean` |
|
| `increase_ref_index` | `boolean` |
|
||||||
| `control_strength` | `number` |
|
| `control_strength` | `number` |
|
||||||
|
| `hires` | `object` |
|
||||||
|
| `hires.enabled` | `boolean` |
|
||||||
|
| `hires.upscaler` | `string` |
|
||||||
|
| `hires.scale` | `number` |
|
||||||
|
| `hires.target_width` | `integer` |
|
||||||
|
| `hires.target_height` | `integer` |
|
||||||
|
| `hires.steps` | `integer` |
|
||||||
|
| `hires.denoising_strength` | `number` |
|
||||||
|
| `hires.upscale_tile_size` | `integer` |
|
||||||
|
|
||||||
`vid_gen`-specific default fields:
|
`vid_gen`-specific default fields:
|
||||||
|
|
||||||
@ -514,6 +563,7 @@ Fields returned in `features_by_mode.img_gen`:
|
|||||||
- `ref_images`
|
- `ref_images`
|
||||||
- `lora`
|
- `lora`
|
||||||
- `vae_tiling`
|
- `vae_tiling`
|
||||||
|
- `hires`
|
||||||
- `cache`
|
- `cache`
|
||||||
- `cancel_queued`
|
- `cancel_queued`
|
||||||
- `cancel_generating`
|
- `cancel_generating`
|
||||||
@ -625,6 +675,16 @@ Example:
|
|||||||
},
|
},
|
||||||
|
|
||||||
"lora": [],
|
"lora": [],
|
||||||
|
"hires": {
|
||||||
|
"enabled": false,
|
||||||
|
"upscaler": "Latent (nearest)",
|
||||||
|
"scale": 2.0,
|
||||||
|
"target_width": 0,
|
||||||
|
"target_height": 0,
|
||||||
|
"steps": 0,
|
||||||
|
"denoising_strength": 0.7,
|
||||||
|
"upscale_tile_size": 128
|
||||||
|
},
|
||||||
|
|
||||||
"vae_tiling_params": {
|
"vae_tiling_params": {
|
||||||
"enabled": false,
|
"enabled": false,
|
||||||
@ -729,12 +789,23 @@ Other native fields:
|
|||||||
|
|
||||||
| Field | Type |
|
| Field | Type |
|
||||||
| --- | --- |
|
| --- | --- |
|
||||||
|
| `hires` | `object` |
|
||||||
|
| `hires.enabled` | `boolean` |
|
||||||
|
| `hires.upscaler` | `string` |
|
||||||
|
| `hires.scale` | `number` |
|
||||||
|
| `hires.target_width` | `integer` |
|
||||||
|
| `hires.target_height` | `integer` |
|
||||||
|
| `hires.steps` | `integer` |
|
||||||
|
| `hires.denoising_strength` | `number` |
|
||||||
|
| `hires.upscale_tile_size` | `integer` |
|
||||||
| `vae_tiling_params` | `object` |
|
| `vae_tiling_params` | `object` |
|
||||||
| `cache_mode` | `string` |
|
| `cache_mode` | `string` |
|
||||||
| `cache_option` | `string` |
|
| `cache_option` | `string` |
|
||||||
| `scm_mask` | `string` |
|
| `scm_mask` | `string` |
|
||||||
| `scm_policy_dynamic` | `boolean` |
|
| `scm_policy_dynamic` | `boolean` |
|
||||||
|
|
||||||
|
For `hires.upscaler`, use `Latent (nearest)` for latent upscale or an `upscalers[].name` value from `GET /sdcpp/v1/capabilities`. Model-backed upscalers are resolved as `--hires-upscalers-dir / (name + ext)` and must live directly in that directory.
|
||||||
|
|
||||||
HTTP-only output fields:
|
HTTP-only output fields:
|
||||||
|
|
||||||
| Field | Type |
|
| Field | Type |
|
||||||
|
|||||||
@ -48,7 +48,9 @@ static void parse_args(int argc,
|
|||||||
|
|
||||||
if (!svr_params.resolve_and_validate() ||
|
if (!svr_params.resolve_and_validate() ||
|
||||||
!ctx_params.resolve_and_validate(IMG_GEN) ||
|
!ctx_params.resolve_and_validate(IMG_GEN) ||
|
||||||
!default_gen_params.resolve_and_validate(IMG_GEN, ctx_params.lora_model_dir)) {
|
!default_gen_params.resolve_and_validate(IMG_GEN,
|
||||||
|
ctx_params.lora_model_dir,
|
||||||
|
ctx_params.hires_upscalers_dir)) {
|
||||||
print_usage(argv[0], options_vec);
|
print_usage(argv[0], options_vec);
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
@ -95,6 +97,8 @@ int main(int argc, const char** argv) {
|
|||||||
|
|
||||||
std::vector<LoraEntry> lora_cache;
|
std::vector<LoraEntry> lora_cache;
|
||||||
std::mutex lora_mutex;
|
std::mutex lora_mutex;
|
||||||
|
std::vector<UpscalerEntry> upscaler_cache;
|
||||||
|
std::mutex upscaler_mutex;
|
||||||
AsyncJobManager async_job_manager;
|
AsyncJobManager async_job_manager;
|
||||||
ServerRuntime runtime = {
|
ServerRuntime runtime = {
|
||||||
sd_ctx.get(),
|
sd_ctx.get(),
|
||||||
@ -104,6 +108,8 @@ int main(int argc, const char** argv) {
|
|||||||
&default_gen_params,
|
&default_gen_params,
|
||||||
&lora_cache,
|
&lora_cache,
|
||||||
&lora_mutex,
|
&lora_mutex,
|
||||||
|
&upscaler_cache,
|
||||||
|
&upscaler_mutex,
|
||||||
&async_job_manager,
|
&async_job_manager,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|||||||
@ -70,7 +70,7 @@ static bool build_openai_generation_request(const httplib::Request& req,
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Intentionally disable prompt-embedded LoRA tag parsing for server APIs.
|
// Intentionally disable prompt-embedded LoRA tag parsing for server APIs.
|
||||||
if (!request.gen_params.resolve_and_validate(IMG_GEN, "", true)) {
|
if (!request.gen_params.resolve_and_validate(IMG_GEN, "", runtime.ctx_params->hires_upscalers_dir, true)) {
|
||||||
error_message = "invalid params";
|
error_message = "invalid params";
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
@ -212,7 +212,7 @@ static bool build_openai_edit_request(const httplib::Request& req,
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Intentionally disable prompt-embedded LoRA tag parsing for server APIs.
|
// Intentionally disable prompt-embedded LoRA tag parsing for server APIs.
|
||||||
if (!request.gen_params.resolve_and_validate(IMG_GEN, "", true)) {
|
if (!request.gen_params.resolve_and_validate(IMG_GEN, "", runtime.ctx_params->hires_upscalers_dir, true)) {
|
||||||
error_message = "invalid params";
|
error_message = "invalid params";
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1,6 +1,7 @@
|
|||||||
#include "routes.h"
|
#include "routes.h"
|
||||||
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
|
#include <cctype>
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
#include <regex>
|
#include <regex>
|
||||||
#include <string_view>
|
#include <string_view>
|
||||||
@ -35,14 +36,20 @@ static fs::path resolve_display_model_path(const ServerRuntime& runtime) {
|
|||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static std::string lower_ascii(std::string value) {
|
||||||
|
std::transform(value.begin(), value.end(), value.begin(), [](unsigned char c) {
|
||||||
|
return static_cast<char>(std::tolower(c));
|
||||||
|
});
|
||||||
|
return value;
|
||||||
|
}
|
||||||
|
|
||||||
static enum sample_method_t get_sdapi_sample_method(std::string name) {
|
static enum sample_method_t get_sdapi_sample_method(std::string name) {
|
||||||
enum sample_method_t result = str_to_sample_method(name.c_str());
|
enum sample_method_t result = str_to_sample_method(name.c_str());
|
||||||
if (result != SAMPLE_METHOD_COUNT) {
|
if (result != SAMPLE_METHOD_COUNT) {
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::transform(name.begin(), name.end(), name.begin(),
|
name = lower_ascii(name);
|
||||||
[](unsigned char c) { return static_cast<char>(std::tolower(c)); });
|
|
||||||
static const std::unordered_map<std::string_view, sample_method_t> hardcoded{
|
static const std::unordered_map<std::string_view, sample_method_t> hardcoded{
|
||||||
{"euler a", EULER_A_SAMPLE_METHOD},
|
{"euler a", EULER_A_SAMPLE_METHOD},
|
||||||
{"k_euler_a", EULER_A_SAMPLE_METHOD},
|
{"k_euler_a", EULER_A_SAMPLE_METHOD},
|
||||||
@ -114,6 +121,18 @@ static bool build_sdapi_img_gen_request(const json& j,
|
|||||||
request.gen_params.width = j.value("width", -1);
|
request.gen_params.width = j.value("width", -1);
|
||||||
request.gen_params.height = j.value("height", -1);
|
request.gen_params.height = j.value("height", -1);
|
||||||
|
|
||||||
|
if (!img2img && j.value("enable_hr", false)) {
|
||||||
|
request.gen_params.hires_enabled = true;
|
||||||
|
request.gen_params.hires_scale = j.value("hr_scale", request.gen_params.hires_scale);
|
||||||
|
request.gen_params.hires_width = j.value("hr_resize_x", request.gen_params.hires_width);
|
||||||
|
request.gen_params.hires_height = j.value("hr_resize_y", request.gen_params.hires_height);
|
||||||
|
request.gen_params.hires_steps = j.value("hr_steps", request.gen_params.hires_steps);
|
||||||
|
request.gen_params.hires_denoising_strength =
|
||||||
|
j.value("denoising_strength", request.gen_params.hires_denoising_strength);
|
||||||
|
|
||||||
|
request.gen_params.hires_upscaler = j.value("hr_upscaler", request.gen_params.hires_upscaler);
|
||||||
|
}
|
||||||
|
|
||||||
std::string sd_cpp_extra_args_str = extract_and_remove_sd_cpp_extra_args(request.gen_params.prompt);
|
std::string sd_cpp_extra_args_str = extract_and_remove_sd_cpp_extra_args(request.gen_params.prompt);
|
||||||
if (!sd_cpp_extra_args_str.empty() && !request.gen_params.from_json_str(sd_cpp_extra_args_str)) {
|
if (!sd_cpp_extra_args_str.empty() && !request.gen_params.from_json_str(sd_cpp_extra_args_str)) {
|
||||||
error_message = "invalid sd_cpp_extra_args";
|
error_message = "invalid sd_cpp_extra_args";
|
||||||
@ -228,7 +247,7 @@ static bool build_sdapi_img_gen_request(const json& j,
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Intentionally disable prompt-embedded LoRA tag parsing for server APIs.
|
// Intentionally disable prompt-embedded LoRA tag parsing for server APIs.
|
||||||
if (!request.gen_params.resolve_and_validate(IMG_GEN, "", true)) {
|
if (!request.gen_params.resolve_and_validate(IMG_GEN, "", runtime.ctx_params->hires_upscalers_dir, true)) {
|
||||||
error_message = "invalid params";
|
error_message = "invalid params";
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
@ -347,6 +366,45 @@ void register_sdapi_endpoints(httplib::Server& svr, ServerRuntime& rt) {
|
|||||||
res.set_content(result.dump(), "application/json");
|
res.set_content(result.dump(), "application/json");
|
||||||
});
|
});
|
||||||
|
|
||||||
|
svr.Get("/sdapi/v1/upscalers", [runtime](const httplib::Request&, httplib::Response& res) {
|
||||||
|
refresh_upscaler_cache(*runtime);
|
||||||
|
|
||||||
|
auto make_builtin = [](const char* name) {
|
||||||
|
json item;
|
||||||
|
item["name"] = name;
|
||||||
|
item["model_name"] = nullptr;
|
||||||
|
item["model_path"] = nullptr;
|
||||||
|
item["model_url"] = nullptr;
|
||||||
|
item["scale"] = 4;
|
||||||
|
return item;
|
||||||
|
};
|
||||||
|
|
||||||
|
json result = json::array();
|
||||||
|
result.push_back(make_builtin("None"));
|
||||||
|
|
||||||
|
{
|
||||||
|
std::lock_guard<std::mutex> lock(*runtime->upscaler_mutex);
|
||||||
|
for (const auto& e : *runtime->upscaler_cache) {
|
||||||
|
json item;
|
||||||
|
item["name"] = e.name;
|
||||||
|
item["model_name"] = e.model_name;
|
||||||
|
item["model_path"] = e.fullpath;
|
||||||
|
item["model_url"] = nullptr;
|
||||||
|
item["scale"] = e.scale;
|
||||||
|
result.push_back(item);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
res.set_content(result.dump(), "application/json");
|
||||||
|
});
|
||||||
|
|
||||||
|
svr.Get("/sdapi/v1/latent-upscale-modes", [](const httplib::Request&, httplib::Response& res) {
|
||||||
|
json result = json::array({
|
||||||
|
{{"name", "Latent (nearest)"}},
|
||||||
|
});
|
||||||
|
res.set_content(result.dump(), "application/json");
|
||||||
|
});
|
||||||
|
|
||||||
svr.Get("/sdapi/v1/samplers", [runtime](const httplib::Request&, httplib::Response& res) {
|
svr.Get("/sdapi/v1/samplers", [runtime](const httplib::Request&, httplib::Response& res) {
|
||||||
std::vector<std::string> sampler_names;
|
std::vector<std::string> sampler_names;
|
||||||
sampler_names.push_back("default");
|
sampler_names.push_back("default");
|
||||||
|
|||||||
@ -114,6 +114,17 @@ static json make_img_gen_defaults_json(const SDGenerationParams& defaults, const
|
|||||||
{"increase_ref_index", defaults.increase_ref_index},
|
{"increase_ref_index", defaults.increase_ref_index},
|
||||||
{"control_strength", defaults.control_strength},
|
{"control_strength", defaults.control_strength},
|
||||||
{"sample_params", make_sample_params_json(defaults.sample_params, defaults.skip_layers)},
|
{"sample_params", make_sample_params_json(defaults.sample_params, defaults.skip_layers)},
|
||||||
|
{"hires",
|
||||||
|
{
|
||||||
|
{"enabled", defaults.hires_enabled},
|
||||||
|
{"upscaler", defaults.hires_upscaler},
|
||||||
|
{"scale", defaults.hires_scale},
|
||||||
|
{"target_width", defaults.hires_width},
|
||||||
|
{"target_height", defaults.hires_height},
|
||||||
|
{"steps", defaults.hires_steps},
|
||||||
|
{"denoising_strength", defaults.hires_denoising_strength},
|
||||||
|
{"upscale_tile_size", defaults.hires_upscale_tile_size},
|
||||||
|
}},
|
||||||
{"vae_tiling_params", make_vae_tiling_json(defaults.vae_tiling_params)},
|
{"vae_tiling_params", make_vae_tiling_json(defaults.vae_tiling_params)},
|
||||||
{"cache_mode", defaults.cache_mode},
|
{"cache_mode", defaults.cache_mode},
|
||||||
{"cache_option", defaults.cache_option},
|
{"cache_option", defaults.cache_option},
|
||||||
@ -157,6 +168,7 @@ static json make_img_gen_features_json() {
|
|||||||
{"ref_images", true},
|
{"ref_images", true},
|
||||||
{"lora", true},
|
{"lora", true},
|
||||||
{"vae_tiling", true},
|
{"vae_tiling", true},
|
||||||
|
{"hires", true},
|
||||||
{"cache", true},
|
{"cache", true},
|
||||||
{"cancel_queued", true},
|
{"cancel_queued", true},
|
||||||
{"cancel_generating", false},
|
{"cancel_generating", false},
|
||||||
@ -179,6 +191,7 @@ static json make_vid_gen_features_json() {
|
|||||||
|
|
||||||
static json make_capabilities_json(ServerRuntime& runtime) {
|
static json make_capabilities_json(ServerRuntime& runtime) {
|
||||||
refresh_lora_cache(runtime);
|
refresh_lora_cache(runtime);
|
||||||
|
refresh_upscaler_cache(runtime);
|
||||||
|
|
||||||
AsyncJobManager& manager = *runtime.async_job_manager;
|
AsyncJobManager& manager = *runtime.async_job_manager;
|
||||||
const auto& defaults = *runtime.default_gen_params;
|
const auto& defaults = *runtime.default_gen_params;
|
||||||
@ -190,6 +203,7 @@ static json make_capabilities_json(ServerRuntime& runtime) {
|
|||||||
json image_output_formats = supported_img_output_formats();
|
json image_output_formats = supported_img_output_formats();
|
||||||
json video_output_formats = supported_vid_output_formats();
|
json video_output_formats = supported_vid_output_formats();
|
||||||
json available_loras = json::array();
|
json available_loras = json::array();
|
||||||
|
json available_upscalers = json::array();
|
||||||
json supported_modes = json::array();
|
json supported_modes = json::array();
|
||||||
|
|
||||||
for (int i = 0; i < SAMPLE_METHOD_COUNT; ++i) {
|
for (int i = 0; i < SAMPLE_METHOD_COUNT; ++i) {
|
||||||
@ -210,6 +224,21 @@ static json make_capabilities_json(ServerRuntime& runtime) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
available_upscalers.push_back({
|
||||||
|
{"name", "None"},
|
||||||
|
});
|
||||||
|
available_upscalers.push_back({
|
||||||
|
{"name", "Latent (nearest)"},
|
||||||
|
});
|
||||||
|
{
|
||||||
|
std::lock_guard<std::mutex> lock(*runtime.upscaler_mutex);
|
||||||
|
for (const auto& entry : *runtime.upscaler_cache) {
|
||||||
|
available_upscalers.push_back({
|
||||||
|
{"name", entry.name},
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (supports_img) {
|
if (supports_img) {
|
||||||
supported_modes.push_back("img_gen");
|
supported_modes.push_back("img_gen");
|
||||||
}
|
}
|
||||||
@ -284,6 +313,7 @@ static json make_capabilities_json(ServerRuntime& runtime) {
|
|||||||
result["features"] = top_level_features;
|
result["features"] = top_level_features;
|
||||||
result["features_by_mode"] = features_by_mode;
|
result["features_by_mode"] = features_by_mode;
|
||||||
result["loras"] = available_loras;
|
result["loras"] = available_loras;
|
||||||
|
result["upscalers"] = available_upscalers;
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -307,7 +337,7 @@ static bool parse_img_gen_request(const json& body,
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
// Intentionally disable prompt-embedded LoRA tag parsing for server APIs.
|
// Intentionally disable prompt-embedded LoRA tag parsing for server APIs.
|
||||||
if (!request.gen_params.resolve_and_validate(IMG_GEN, "", true)) {
|
if (!request.gen_params.resolve_and_validate(IMG_GEN, "", runtime.ctx_params->hires_upscalers_dir, true)) {
|
||||||
error_message = "invalid generation parameters";
|
error_message = "invalid generation parameters";
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
@ -334,7 +364,7 @@ static bool parse_vid_gen_request(const json& body,
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
// Intentionally disable prompt-embedded LoRA tag parsing for server APIs.
|
// Intentionally disable prompt-embedded LoRA tag parsing for server APIs.
|
||||||
if (!request.gen_params.resolve_and_validate(VID_GEN, "", true)) {
|
if (!request.gen_params.resolve_and_validate(VID_GEN, "", runtime.ctx_params->hires_upscalers_dir, true)) {
|
||||||
error_message = "invalid generation parameters";
|
error_message = "invalid generation parameters";
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1,6 +1,7 @@
|
|||||||
#include "runtime.h"
|
#include "runtime.h"
|
||||||
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
|
#include <cctype>
|
||||||
#include <chrono>
|
#include <chrono>
|
||||||
#include <cstdlib>
|
#include <cstdlib>
|
||||||
#include <filesystem>
|
#include <filesystem>
|
||||||
@ -13,6 +14,18 @@
|
|||||||
|
|
||||||
namespace fs = std::filesystem;
|
namespace fs = std::filesystem;
|
||||||
|
|
||||||
|
static std::string lower_ascii(std::string value) {
|
||||||
|
std::transform(value.begin(), value.end(), value.begin(), [](unsigned char c) {
|
||||||
|
return static_cast<char>(std::tolower(c));
|
||||||
|
});
|
||||||
|
return value;
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool is_supported_model_ext(const fs::path& p) {
|
||||||
|
auto ext = lower_ascii(p.extension().string());
|
||||||
|
return ext == ".gguf" || ext == ".pt" || ext == ".pth" || ext == ".safetensors";
|
||||||
|
}
|
||||||
|
|
||||||
static const std::string k_base64_chars =
|
static const std::string k_base64_chars =
|
||||||
"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
|
"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
|
||||||
"abcdefghijklmnopqrstuvwxyz"
|
"abcdefghijklmnopqrstuvwxyz"
|
||||||
@ -241,20 +254,12 @@ void refresh_lora_cache(ServerRuntime& rt) {
|
|||||||
|
|
||||||
fs::path lora_dir = rt.ctx_params->lora_model_dir;
|
fs::path lora_dir = rt.ctx_params->lora_model_dir;
|
||||||
if (fs::exists(lora_dir) && fs::is_directory(lora_dir)) {
|
if (fs::exists(lora_dir) && fs::is_directory(lora_dir)) {
|
||||||
auto is_lora_ext = [](const fs::path& p) {
|
|
||||||
auto ext = p.extension().string();
|
|
||||||
std::transform(ext.begin(), ext.end(), ext.begin(), [](unsigned char c) {
|
|
||||||
return static_cast<char>(std::tolower(c));
|
|
||||||
});
|
|
||||||
return ext == ".gguf" || ext == ".pt" || ext == ".pth" || ext == ".safetensors";
|
|
||||||
};
|
|
||||||
|
|
||||||
for (auto& entry : fs::recursive_directory_iterator(lora_dir)) {
|
for (auto& entry : fs::recursive_directory_iterator(lora_dir)) {
|
||||||
if (!entry.is_regular_file()) {
|
if (!entry.is_regular_file()) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
const fs::path& p = entry.path();
|
const fs::path& p = entry.path();
|
||||||
if (!is_lora_ext(p)) {
|
if (!is_supported_model_ext(p)) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -286,6 +291,40 @@ std::string get_lora_full_path(ServerRuntime& rt, const std::string& path) {
|
|||||||
return it != rt.lora_cache->end() ? it->fullpath : "";
|
return it != rt.lora_cache->end() ? it->fullpath : "";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void refresh_upscaler_cache(ServerRuntime& rt) {
|
||||||
|
std::vector<UpscalerEntry> new_cache;
|
||||||
|
|
||||||
|
fs::path upscaler_dir = rt.ctx_params->hires_upscalers_dir;
|
||||||
|
if (fs::exists(upscaler_dir) && fs::is_directory(upscaler_dir)) {
|
||||||
|
for (auto& entry : fs::directory_iterator(upscaler_dir)) {
|
||||||
|
if (!entry.is_regular_file()) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
const fs::path& p = entry.path();
|
||||||
|
if (!is_supported_model_ext(p)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
UpscalerEntry upscaler_entry;
|
||||||
|
upscaler_entry.name = p.stem().u8string();
|
||||||
|
upscaler_entry.fullpath = fs::absolute(p).lexically_normal().u8string();
|
||||||
|
upscaler_entry.model_name = "ESRGAN_4x";
|
||||||
|
upscaler_entry.path = p.filename().u8string();
|
||||||
|
|
||||||
|
new_cache.push_back(std::move(upscaler_entry));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
std::sort(new_cache.begin(), new_cache.end(), [](const UpscalerEntry& a, const UpscalerEntry& b) {
|
||||||
|
return a.name < b.name;
|
||||||
|
});
|
||||||
|
|
||||||
|
{
|
||||||
|
std::lock_guard<std::mutex> lock(*rt.upscaler_mutex);
|
||||||
|
*rt.upscaler_cache = std::move(new_cache);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
int64_t unix_timestamp_now() {
|
int64_t unix_timestamp_now() {
|
||||||
return std::chrono::duration_cast<std::chrono::seconds>(
|
return std::chrono::duration_cast<std::chrono::seconds>(
|
||||||
std::chrono::system_clock::now().time_since_epoch())
|
std::chrono::system_clock::now().time_since_epoch())
|
||||||
|
|||||||
@ -37,6 +37,14 @@ struct LoraEntry {
|
|||||||
std::string fullpath;
|
std::string fullpath;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct UpscalerEntry {
|
||||||
|
std::string name;
|
||||||
|
std::string path;
|
||||||
|
std::string fullpath;
|
||||||
|
std::string model_name;
|
||||||
|
int scale = 4;
|
||||||
|
};
|
||||||
|
|
||||||
struct ServerRuntime {
|
struct ServerRuntime {
|
||||||
sd_ctx_t* sd_ctx;
|
sd_ctx_t* sd_ctx;
|
||||||
std::mutex* sd_ctx_mutex;
|
std::mutex* sd_ctx_mutex;
|
||||||
@ -45,6 +53,8 @@ struct ServerRuntime {
|
|||||||
const SDGenerationParams* default_gen_params;
|
const SDGenerationParams* default_gen_params;
|
||||||
std::vector<LoraEntry>* lora_cache;
|
std::vector<LoraEntry>* lora_cache;
|
||||||
std::mutex* lora_mutex;
|
std::mutex* lora_mutex;
|
||||||
|
std::vector<UpscalerEntry>* upscaler_cache;
|
||||||
|
std::mutex* upscaler_mutex;
|
||||||
AsyncJobManager* async_job_manager;
|
AsyncJobManager* async_job_manager;
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -86,4 +96,5 @@ bool runtime_supports_generation_mode(const ServerRuntime& runtime, SDMode mode)
|
|||||||
std::string unsupported_generation_mode_error(SDMode mode);
|
std::string unsupported_generation_mode_error(SDMode mode);
|
||||||
void refresh_lora_cache(ServerRuntime& rt);
|
void refresh_lora_cache(ServerRuntime& rt);
|
||||||
std::string get_lora_full_path(ServerRuntime& rt, const std::string& path);
|
std::string get_lora_full_path(ServerRuntime& rt, const std::string& path);
|
||||||
|
void refresh_upscaler_cache(ServerRuntime& rt);
|
||||||
int64_t unix_timestamp_now();
|
int64_t unix_timestamp_now();
|
||||||
|
|||||||
@ -289,6 +289,25 @@ typedef struct {
|
|||||||
const char* path;
|
const char* path;
|
||||||
} sd_lora_t;
|
} sd_lora_t;
|
||||||
|
|
||||||
|
enum sd_hires_upscaler_t {
|
||||||
|
SD_HIRES_UPSCALER_NONE,
|
||||||
|
SD_HIRES_UPSCALER_LATENT_NEAREST,
|
||||||
|
SD_HIRES_UPSCALER_MODEL,
|
||||||
|
SD_HIRES_UPSCALER_COUNT,
|
||||||
|
};
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
bool enabled;
|
||||||
|
enum sd_hires_upscaler_t upscaler;
|
||||||
|
const char* model_path;
|
||||||
|
float scale;
|
||||||
|
int target_width;
|
||||||
|
int target_height;
|
||||||
|
int steps;
|
||||||
|
float denoising_strength;
|
||||||
|
int upscale_tile_size;
|
||||||
|
} sd_hires_params_t;
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
const sd_lora_t* loras;
|
const sd_lora_t* loras;
|
||||||
uint32_t lora_count;
|
uint32_t lora_count;
|
||||||
@ -312,6 +331,7 @@ typedef struct {
|
|||||||
sd_pm_params_t pm_params;
|
sd_pm_params_t pm_params;
|
||||||
sd_tiling_params_t vae_tiling_params;
|
sd_tiling_params_t vae_tiling_params;
|
||||||
sd_cache_params_t cache;
|
sd_cache_params_t cache;
|
||||||
|
sd_hires_params_t hires;
|
||||||
} sd_img_gen_params_t;
|
} sd_img_gen_params_t;
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
@ -365,8 +385,11 @@ SD_API const char* sd_preview_name(enum preview_t preview);
|
|||||||
SD_API enum preview_t str_to_preview(const char* str);
|
SD_API enum preview_t str_to_preview(const char* str);
|
||||||
SD_API const char* sd_lora_apply_mode_name(enum lora_apply_mode_t mode);
|
SD_API const char* sd_lora_apply_mode_name(enum lora_apply_mode_t mode);
|
||||||
SD_API enum lora_apply_mode_t str_to_lora_apply_mode(const char* str);
|
SD_API enum lora_apply_mode_t str_to_lora_apply_mode(const char* str);
|
||||||
|
SD_API const char* sd_hires_upscaler_name(enum sd_hires_upscaler_t upscaler);
|
||||||
|
SD_API enum sd_hires_upscaler_t str_to_sd_hires_upscaler(const char* str);
|
||||||
|
|
||||||
SD_API void sd_cache_params_init(sd_cache_params_t* cache_params);
|
SD_API void sd_cache_params_init(sd_cache_params_t* cache_params);
|
||||||
|
SD_API void sd_hires_params_init(sd_hires_params_t* hires_params);
|
||||||
|
|
||||||
SD_API void sd_ctx_params_init(sd_ctx_params_t* sd_ctx_params);
|
SD_API void sd_ctx_params_init(sd_ctx_params_t* sd_ctx_params);
|
||||||
SD_API char* sd_ctx_params_to_str(const sd_ctx_params_t* sd_ctx_params);
|
SD_API char* sd_ctx_params_to_str(const sd_ctx_params_t* sd_ctx_params);
|
||||||
|
|||||||
@ -17,6 +17,7 @@
|
|||||||
#include "pmid.hpp"
|
#include "pmid.hpp"
|
||||||
#include "sample-cache.h"
|
#include "sample-cache.h"
|
||||||
#include "tae.hpp"
|
#include "tae.hpp"
|
||||||
|
#include "upscaler.h"
|
||||||
#include "vae.hpp"
|
#include "vae.hpp"
|
||||||
|
|
||||||
#include "latent-preview.h"
|
#include "latent-preview.h"
|
||||||
@ -2113,6 +2114,28 @@ enum lora_apply_mode_t str_to_lora_apply_mode(const char* str) {
|
|||||||
return LORA_APPLY_MODE_COUNT;
|
return LORA_APPLY_MODE_COUNT;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const char* hires_upscaler_to_str[] = {
|
||||||
|
"None",
|
||||||
|
"Latent (nearest)",
|
||||||
|
"Model",
|
||||||
|
};
|
||||||
|
|
||||||
|
const char* sd_hires_upscaler_name(enum sd_hires_upscaler_t upscaler) {
|
||||||
|
if (upscaler < SD_HIRES_UPSCALER_COUNT) {
|
||||||
|
return hires_upscaler_to_str[upscaler];
|
||||||
|
}
|
||||||
|
return NONE_STR;
|
||||||
|
}
|
||||||
|
|
||||||
|
enum sd_hires_upscaler_t str_to_sd_hires_upscaler(const char* str) {
|
||||||
|
for (int i = 0; i < SD_HIRES_UPSCALER_COUNT; i++) {
|
||||||
|
if (!strcmp(str, hires_upscaler_to_str[i])) {
|
||||||
|
return (enum sd_hires_upscaler_t)i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return SD_HIRES_UPSCALER_COUNT;
|
||||||
|
}
|
||||||
|
|
||||||
void sd_cache_params_init(sd_cache_params_t* cache_params) {
|
void sd_cache_params_init(sd_cache_params_t* cache_params) {
|
||||||
*cache_params = {};
|
*cache_params = {};
|
||||||
cache_params->mode = SD_CACHE_DISABLED;
|
cache_params->mode = SD_CACHE_DISABLED;
|
||||||
@ -2141,6 +2164,19 @@ void sd_cache_params_init(sd_cache_params_t* cache_params) {
|
|||||||
cache_params->spectrum_stop_percent = 0.9f;
|
cache_params->spectrum_stop_percent = 0.9f;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void sd_hires_params_init(sd_hires_params_t* hires_params) {
|
||||||
|
*hires_params = {};
|
||||||
|
hires_params->enabled = false;
|
||||||
|
hires_params->upscaler = SD_HIRES_UPSCALER_LATENT_NEAREST;
|
||||||
|
hires_params->model_path = nullptr;
|
||||||
|
hires_params->scale = 2.0f;
|
||||||
|
hires_params->target_width = 0;
|
||||||
|
hires_params->target_height = 0;
|
||||||
|
hires_params->steps = 0;
|
||||||
|
hires_params->denoising_strength = 0.7f;
|
||||||
|
hires_params->upscale_tile_size = 128;
|
||||||
|
}
|
||||||
|
|
||||||
void sd_ctx_params_init(sd_ctx_params_t* sd_ctx_params) {
|
void sd_ctx_params_init(sd_ctx_params_t* sd_ctx_params) {
|
||||||
*sd_ctx_params = {};
|
*sd_ctx_params = {};
|
||||||
sd_ctx_params->vae_decode_only = true;
|
sd_ctx_params->vae_decode_only = true;
|
||||||
@ -2310,6 +2346,7 @@ void sd_img_gen_params_init(sd_img_gen_params_t* sd_img_gen_params) {
|
|||||||
sd_img_gen_params->pm_params = {nullptr, 0, nullptr, 20.f};
|
sd_img_gen_params->pm_params = {nullptr, 0, nullptr, 20.f};
|
||||||
sd_img_gen_params->vae_tiling_params = {false, 0, 0, 0.5f, 0.0f, 0.0f};
|
sd_img_gen_params->vae_tiling_params = {false, 0, 0, 0.5f, 0.0f, 0.0f};
|
||||||
sd_cache_params_init(&sd_img_gen_params->cache);
|
sd_cache_params_init(&sd_img_gen_params->cache);
|
||||||
|
sd_hires_params_init(&sd_img_gen_params->hires);
|
||||||
}
|
}
|
||||||
|
|
||||||
char* sd_img_gen_params_to_str(const sd_img_gen_params_t* sd_img_gen_params) {
|
char* sd_img_gen_params_to_str(const sd_img_gen_params_t* sd_img_gen_params) {
|
||||||
@ -2336,7 +2373,8 @@ char* sd_img_gen_params_to_str(const sd_img_gen_params_t* sd_img_gen_params) {
|
|||||||
"increase_ref_index: %s\n"
|
"increase_ref_index: %s\n"
|
||||||
"control_strength: %.2f\n"
|
"control_strength: %.2f\n"
|
||||||
"photo maker: {style_strength = %.2f, id_images_count = %d, id_embed_path = %s}\n"
|
"photo maker: {style_strength = %.2f, id_images_count = %d, id_embed_path = %s}\n"
|
||||||
"VAE tiling: %s\n",
|
"VAE tiling: %s\n"
|
||||||
|
"hires: {enabled=%s, upscaler=%s, model_path=%s, scale=%.2f, target=%dx%d, steps=%d, denoising_strength=%.2f}\n",
|
||||||
SAFE_STR(sd_img_gen_params->prompt),
|
SAFE_STR(sd_img_gen_params->prompt),
|
||||||
SAFE_STR(sd_img_gen_params->negative_prompt),
|
SAFE_STR(sd_img_gen_params->negative_prompt),
|
||||||
sd_img_gen_params->clip_skip,
|
sd_img_gen_params->clip_skip,
|
||||||
@ -2353,7 +2391,15 @@ char* sd_img_gen_params_to_str(const sd_img_gen_params_t* sd_img_gen_params) {
|
|||||||
sd_img_gen_params->pm_params.style_strength,
|
sd_img_gen_params->pm_params.style_strength,
|
||||||
sd_img_gen_params->pm_params.id_images_count,
|
sd_img_gen_params->pm_params.id_images_count,
|
||||||
SAFE_STR(sd_img_gen_params->pm_params.id_embed_path),
|
SAFE_STR(sd_img_gen_params->pm_params.id_embed_path),
|
||||||
BOOL_STR(sd_img_gen_params->vae_tiling_params.enabled));
|
BOOL_STR(sd_img_gen_params->vae_tiling_params.enabled),
|
||||||
|
BOOL_STR(sd_img_gen_params->hires.enabled),
|
||||||
|
sd_hires_upscaler_name(sd_img_gen_params->hires.upscaler),
|
||||||
|
SAFE_STR(sd_img_gen_params->hires.model_path),
|
||||||
|
sd_img_gen_params->hires.scale,
|
||||||
|
sd_img_gen_params->hires.target_width,
|
||||||
|
sd_img_gen_params->hires.target_height,
|
||||||
|
sd_img_gen_params->hires.steps,
|
||||||
|
sd_img_gen_params->hires.denoising_strength);
|
||||||
const char* cache_mode_str = "disabled";
|
const char* cache_mode_str = "disabled";
|
||||||
if (sd_img_gen_params->cache.mode == SD_CACHE_EASYCACHE) {
|
if (sd_img_gen_params->cache.mode == SD_CACHE_EASYCACHE) {
|
||||||
cache_mode_str = "easycache";
|
cache_mode_str = "easycache";
|
||||||
@ -2534,6 +2580,7 @@ struct GenerationRequest {
|
|||||||
sd_guidance_params_t guidance = {};
|
sd_guidance_params_t guidance = {};
|
||||||
sd_guidance_params_t high_noise_guidance = {};
|
sd_guidance_params_t high_noise_guidance = {};
|
||||||
sd_pm_params_t pm_params = {};
|
sd_pm_params_t pm_params = {};
|
||||||
|
sd_hires_params_t hires = {};
|
||||||
int frames = -1;
|
int frames = -1;
|
||||||
float vace_strength = 1.f;
|
float vace_strength = 1.f;
|
||||||
|
|
||||||
@ -2555,6 +2602,7 @@ struct GenerationRequest {
|
|||||||
auto_resize_ref_image = sd_img_gen_params->auto_resize_ref_image;
|
auto_resize_ref_image = sd_img_gen_params->auto_resize_ref_image;
|
||||||
guidance = sd_img_gen_params->sample_params.guidance;
|
guidance = sd_img_gen_params->sample_params.guidance;
|
||||||
pm_params = sd_img_gen_params->pm_params;
|
pm_params = sd_img_gen_params->pm_params;
|
||||||
|
hires = sd_img_gen_params->hires;
|
||||||
cache_params = &sd_img_gen_params->cache;
|
cache_params = &sd_img_gen_params->cache;
|
||||||
resolve(sd_ctx);
|
resolve(sd_ctx);
|
||||||
}
|
}
|
||||||
@ -2577,26 +2625,76 @@ struct GenerationRequest {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void align_generation_request_size() {
|
void align_generation_request_size() {
|
||||||
|
align_image_size(&width, &height, "generation request");
|
||||||
|
}
|
||||||
|
|
||||||
|
void align_image_size(int* target_width, int* target_height, const char* label) {
|
||||||
int spatial_multiple = vae_scale_factor * diffusion_model_down_factor;
|
int spatial_multiple = vae_scale_factor * diffusion_model_down_factor;
|
||||||
int width_offset = align_up_offset(width, spatial_multiple);
|
int width_offset = align_up_offset(*target_width, spatial_multiple);
|
||||||
int height_offset = align_up_offset(height, spatial_multiple);
|
int height_offset = align_up_offset(*target_height, spatial_multiple);
|
||||||
if (width_offset <= 0 && height_offset <= 0) {
|
if (width_offset <= 0 && height_offset <= 0) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
int original_width = width;
|
int original_width = *target_width;
|
||||||
int original_height = height;
|
int original_height = *target_height;
|
||||||
|
|
||||||
width += width_offset;
|
*target_width += width_offset;
|
||||||
height += height_offset;
|
*target_height += height_offset;
|
||||||
LOG_WARN("align up %dx%d to %dx%d (multiple=%d)",
|
LOG_WARN("align %s up %dx%d to %dx%d (multiple=%d)",
|
||||||
|
label,
|
||||||
original_width,
|
original_width,
|
||||||
original_height,
|
original_height,
|
||||||
width,
|
*target_width,
|
||||||
height,
|
*target_height,
|
||||||
spatial_multiple);
|
spatial_multiple);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void resolve_hires() {
|
||||||
|
if (!hires.enabled) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (hires.upscaler == SD_HIRES_UPSCALER_NONE) {
|
||||||
|
hires.enabled = false;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (hires.upscaler < SD_HIRES_UPSCALER_NONE && hires.upscaler >= SD_HIRES_UPSCALER_COUNT) {
|
||||||
|
LOG_WARN("hires upscaler '%d' is invalid, disabling hires", hires.upscaler);
|
||||||
|
hires.enabled = false;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (hires.upscaler == SD_HIRES_UPSCALER_MODEL && strlen(SAFE_STR(hires.model_path)) == 0) {
|
||||||
|
LOG_WARN("hires model upscaler requires a model path, disabling hires");
|
||||||
|
hires.enabled = false;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (hires.scale <= 0.f && hires.target_width <= 0 && hires.target_height <= 0) {
|
||||||
|
LOG_WARN("hires scale must be positive when no target size is set, disabling hires");
|
||||||
|
hires.enabled = false;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
hires.denoising_strength = std::clamp(hires.denoising_strength, 0.0001f, 1.f);
|
||||||
|
hires.steps = std::max(0, hires.steps);
|
||||||
|
|
||||||
|
if (hires.target_width > 0 && hires.target_height > 0) {
|
||||||
|
// pass
|
||||||
|
} else if (hires.target_width > 0) {
|
||||||
|
hires.target_height = hires.target_width;
|
||||||
|
} else if (hires.target_height > 0) {
|
||||||
|
hires.target_width = hires.target_height;
|
||||||
|
} else {
|
||||||
|
hires.target_width = static_cast<int>(std::round(width * hires.scale));
|
||||||
|
hires.target_height = static_cast<int>(std::round(height * hires.scale));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (hires.target_width <= 0 || hires.target_height <= 0) {
|
||||||
|
LOG_WARN("hires target size is not positive, disabling hires");
|
||||||
|
hires.enabled = false;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
align_image_size(&hires.target_width, &hires.target_height, "hires target");
|
||||||
|
}
|
||||||
|
|
||||||
static void resolve_guidance(sd_ctx_t* sd_ctx,
|
static void resolve_guidance(sd_ctx_t* sd_ctx,
|
||||||
sd_guidance_params_t* guidance,
|
sd_guidance_params_t* guidance,
|
||||||
bool* use_uncond,
|
bool* use_uncond,
|
||||||
@ -2637,6 +2735,7 @@ struct GenerationRequest {
|
|||||||
|
|
||||||
void resolve(sd_ctx_t* sd_ctx) {
|
void resolve(sd_ctx_t* sd_ctx) {
|
||||||
align_generation_request_size();
|
align_generation_request_size();
|
||||||
|
resolve_hires();
|
||||||
seed = resolve_seed(seed);
|
seed = resolve_seed(seed);
|
||||||
|
|
||||||
resolve_guidance(sd_ctx, &guidance, &use_uncond, &use_img_cond);
|
resolve_guidance(sd_ctx, &guidance, &use_uncond, &use_img_cond);
|
||||||
@ -3149,6 +3248,67 @@ static sd_image_t* decode_image_outputs(sd_ctx_t* sd_ctx,
|
|||||||
return result_images;
|
return result_images;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static sd::Tensor<float> upscale_hires_latent(sd_ctx_t* sd_ctx,
|
||||||
|
const sd::Tensor<float>& latent,
|
||||||
|
const GenerationRequest& request,
|
||||||
|
UpscalerGGML* upscaler) {
|
||||||
|
if (request.hires.upscaler == SD_HIRES_UPSCALER_LATENT_NEAREST) {
|
||||||
|
std::vector<int64_t> target_shape = latent.shape();
|
||||||
|
if (target_shape.size() < 2) {
|
||||||
|
LOG_ERROR("latent has invalid shape for hires upscale");
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
target_shape[0] = request.hires.target_width / request.vae_scale_factor;
|
||||||
|
target_shape[1] = request.hires.target_height / request.vae_scale_factor;
|
||||||
|
|
||||||
|
LOG_INFO("hires latent upscale %" PRId64 "x%" PRId64 " -> %" PRId64 "x%" PRId64,
|
||||||
|
latent.shape()[0],
|
||||||
|
latent.shape()[1],
|
||||||
|
target_shape[0],
|
||||||
|
target_shape[1]);
|
||||||
|
return sd::ops::interpolate(latent, target_shape, sd::ops::InterpolateMode::Nearest);
|
||||||
|
} else if (request.hires.upscaler == SD_HIRES_UPSCALER_MODEL) {
|
||||||
|
if (upscaler == nullptr) {
|
||||||
|
LOG_ERROR("hires model upscaler context is null");
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
if (sd_ctx->sd->vae_decode_only) {
|
||||||
|
LOG_ERROR("hires model upscaler requires VAE encoder weights; create the context with vae_decode_only=false");
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
|
||||||
|
sd::Tensor<float> decoded = sd_ctx->sd->decode_first_stage(latent);
|
||||||
|
if (decoded.empty()) {
|
||||||
|
LOG_ERROR("decode_first_stage failed before hires model upscale");
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
|
||||||
|
sd::Tensor<float> upscaled_tensor = upscaler->upscale_tensor(decoded);
|
||||||
|
if (upscaled_tensor.empty()) {
|
||||||
|
LOG_ERROR("hires model upscale failed");
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
|
||||||
|
if (upscaled_tensor.shape()[0] != request.hires.target_width ||
|
||||||
|
upscaled_tensor.shape()[1] != request.hires.target_height) {
|
||||||
|
upscaled_tensor = sd::ops::interpolate(upscaled_tensor,
|
||||||
|
{request.hires.target_width,
|
||||||
|
request.hires.target_height,
|
||||||
|
upscaled_tensor.shape()[2],
|
||||||
|
upscaled_tensor.shape()[3]});
|
||||||
|
}
|
||||||
|
|
||||||
|
sd::Tensor<float> upscaled_latent = sd_ctx->sd->encode_first_stage(upscaled_tensor);
|
||||||
|
if (upscaled_latent.empty()) {
|
||||||
|
LOG_ERROR("encode_first_stage failed after hires model upscale");
|
||||||
|
}
|
||||||
|
return upscaled_latent;
|
||||||
|
}
|
||||||
|
|
||||||
|
LOG_ERROR("unsupported hires upscaler '%s'", sd_hires_upscaler_name(request.hires.upscaler));
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
|
||||||
SD_API sd_image_t* generate_image(sd_ctx_t* sd_ctx, const sd_img_gen_params_t* sd_img_gen_params) {
|
SD_API sd_image_t* generate_image(sd_ctx_t* sd_ctx, const sd_img_gen_params_t* sd_img_gen_params) {
|
||||||
if (sd_ctx == nullptr || sd_img_gen_params == nullptr) {
|
if (sd_ctx == nullptr || sd_img_gen_params == nullptr) {
|
||||||
return nullptr;
|
return nullptr;
|
||||||
@ -3236,7 +3396,7 @@ SD_API sd_image_t* generate_image(sd_ctx_t* sd_ctx, const sd_img_gen_params_t* s
|
|||||||
}
|
}
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
if (sd_ctx->sd->free_params_immediately) {
|
if (sd_ctx->sd->free_params_immediately && !request.hires.enabled) {
|
||||||
sd_ctx->sd->diffusion_model->free_params_buffer();
|
sd_ctx->sd->diffusion_model->free_params_buffer();
|
||||||
}
|
}
|
||||||
int64_t denoise_end = ggml_time_ms();
|
int64_t denoise_end = ggml_time_ms();
|
||||||
@ -3244,6 +3404,131 @@ SD_API sd_image_t* generate_image(sd_ctx_t* sd_ctx, const sd_img_gen_params_t* s
|
|||||||
final_latents.size(),
|
final_latents.size(),
|
||||||
(denoise_end - denoise_start) * 1.0f / 1000);
|
(denoise_end - denoise_start) * 1.0f / 1000);
|
||||||
|
|
||||||
|
if (request.hires.enabled && request.hires.target_width > 0) {
|
||||||
|
LOG_INFO("hires fix: upscaling to %dx%d", request.hires.target_width, request.hires.target_height);
|
||||||
|
|
||||||
|
std::unique_ptr<UpscalerGGML> hires_upscaler;
|
||||||
|
if (request.hires.upscaler == SD_HIRES_UPSCALER_MODEL) {
|
||||||
|
LOG_INFO("hires fix: loading model upscaler from '%s'", request.hires.model_path);
|
||||||
|
hires_upscaler = std::make_unique<UpscalerGGML>(sd_ctx->sd->n_threads,
|
||||||
|
false,
|
||||||
|
request.hires.upscale_tile_size);
|
||||||
|
if (!hires_upscaler->load_from_file(request.hires.model_path,
|
||||||
|
sd_ctx->sd->offload_params_to_cpu,
|
||||||
|
sd_ctx->sd->n_threads)) {
|
||||||
|
LOG_ERROR("load hires model upscaler failed");
|
||||||
|
if (sd_ctx->sd->free_params_immediately) {
|
||||||
|
sd_ctx->sd->diffusion_model->free_params_buffer();
|
||||||
|
}
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int hires_steps = request.hires.steps > 0 ? request.hires.steps : plan.sample_steps;
|
||||||
|
|
||||||
|
// sd-webui behavior: scale up total steps so trimming by denoising_strength yields exactly hires_steps effective steps,
|
||||||
|
// unlike img2img which trims from a fixed step count
|
||||||
|
hires_steps = static_cast<int>(hires_steps / request.hires.denoising_strength);
|
||||||
|
|
||||||
|
std::vector<float> hires_sigmas = sd_ctx->sd->denoiser->get_sigmas(
|
||||||
|
hires_steps,
|
||||||
|
sd_ctx->sd->get_image_seq_len(request.hires.target_height, request.hires.target_width),
|
||||||
|
sd_img_gen_params->sample_params.scheduler,
|
||||||
|
sd_ctx->sd->version);
|
||||||
|
|
||||||
|
size_t t_enc = static_cast<size_t>(hires_steps * request.hires.denoising_strength);
|
||||||
|
if (t_enc >= static_cast<size_t>(hires_steps)) {
|
||||||
|
t_enc = static_cast<size_t>(hires_steps) - 1;
|
||||||
|
}
|
||||||
|
std::vector<float> hires_sigma_sched(hires_sigmas.begin() + hires_steps - static_cast<int>(t_enc) - 1,
|
||||||
|
hires_sigmas.end());
|
||||||
|
LOG_INFO("hires fix: %d steps, denoising_strength=%.2f, sigma_sched_size=%zu",
|
||||||
|
hires_steps,
|
||||||
|
request.hires.denoising_strength,
|
||||||
|
hires_sigma_sched.size());
|
||||||
|
|
||||||
|
std::vector<sd::Tensor<float>> hires_final_latents;
|
||||||
|
int64_t hires_denoise_start = ggml_time_ms();
|
||||||
|
for (int b = 0; b < (int)final_latents.size(); b++) {
|
||||||
|
int64_t cur_seed = request.seed + b;
|
||||||
|
sd_ctx->sd->rng->manual_seed(cur_seed);
|
||||||
|
sd_ctx->sd->sampler_rng->manual_seed(cur_seed);
|
||||||
|
|
||||||
|
sd::Tensor<float> upscaled = upscale_hires_latent(sd_ctx,
|
||||||
|
final_latents[b],
|
||||||
|
request,
|
||||||
|
hires_upscaler.get());
|
||||||
|
if (upscaled.empty()) {
|
||||||
|
if (sd_ctx->sd->free_params_immediately) {
|
||||||
|
sd_ctx->sd->diffusion_model->free_params_buffer();
|
||||||
|
}
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
sd::Tensor<float> noise = sd::randn_like<float>(upscaled, sd_ctx->sd->rng);
|
||||||
|
|
||||||
|
sd::Tensor<float> hires_denoise_mask;
|
||||||
|
if (!latents.denoise_mask.empty()) {
|
||||||
|
std::vector<int64_t> mask_shape = latents.denoise_mask.shape();
|
||||||
|
mask_shape[0] = upscaled.shape()[0];
|
||||||
|
mask_shape[1] = upscaled.shape()[1];
|
||||||
|
hires_denoise_mask = sd::ops::interpolate(latents.denoise_mask,
|
||||||
|
mask_shape,
|
||||||
|
sd::ops::InterpolateMode::NearestMax);
|
||||||
|
}
|
||||||
|
|
||||||
|
int64_t hires_sample_start = ggml_time_ms();
|
||||||
|
sd::Tensor<float> x_0 = sd_ctx->sd->sample(sd_ctx->sd->diffusion_model,
|
||||||
|
true,
|
||||||
|
upscaled,
|
||||||
|
std::move(noise),
|
||||||
|
embeds.cond,
|
||||||
|
embeds.uncond,
|
||||||
|
embeds.img_cond,
|
||||||
|
embeds.id_cond,
|
||||||
|
latents.control_image,
|
||||||
|
request.control_strength,
|
||||||
|
request.guidance,
|
||||||
|
plan.eta,
|
||||||
|
request.shifted_timestep,
|
||||||
|
plan.sample_method,
|
||||||
|
sd_ctx->sd->is_flow_denoiser(),
|
||||||
|
hires_sigma_sched,
|
||||||
|
plan.start_merge_step,
|
||||||
|
latents.ref_latents,
|
||||||
|
request.increase_ref_index,
|
||||||
|
hires_denoise_mask,
|
||||||
|
sd::Tensor<float>(),
|
||||||
|
1.f,
|
||||||
|
request.cache_params);
|
||||||
|
int64_t hires_sample_end = ggml_time_ms();
|
||||||
|
if (!x_0.empty()) {
|
||||||
|
LOG_INFO("hires sampling %d/%d completed, taking %.2fs",
|
||||||
|
b + 1,
|
||||||
|
(int)final_latents.size(),
|
||||||
|
(hires_sample_end - hires_sample_start) * 1.0f / 1000);
|
||||||
|
hires_final_latents.push_back(std::move(x_0));
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
LOG_ERROR("hires sampling for image %d/%d failed after %.2fs",
|
||||||
|
b + 1,
|
||||||
|
(int)final_latents.size(),
|
||||||
|
(hires_sample_end - hires_sample_start) * 1.0f / 1000);
|
||||||
|
if (sd_ctx->sd->free_params_immediately) {
|
||||||
|
sd_ctx->sd->diffusion_model->free_params_buffer();
|
||||||
|
}
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
if (sd_ctx->sd->free_params_immediately) {
|
||||||
|
sd_ctx->sd->diffusion_model->free_params_buffer();
|
||||||
|
}
|
||||||
|
int64_t hires_denoise_end = ggml_time_ms();
|
||||||
|
LOG_INFO("hires fix completed, taking %.2fs", (hires_denoise_end - hires_denoise_start) * 1.0f / 1000);
|
||||||
|
|
||||||
|
final_latents = std::move(hires_final_latents);
|
||||||
|
}
|
||||||
|
|
||||||
auto result = decode_image_outputs(sd_ctx, request, final_latents);
|
auto result = decode_image_outputs(sd_ctx, request, final_latents);
|
||||||
if (result == nullptr) {
|
if (result == nullptr) {
|
||||||
return nullptr;
|
return nullptr;
|
||||||
|
|||||||
@ -1,27 +1,18 @@
|
|||||||
#include "esrgan.hpp"
|
#include "upscaler.h"
|
||||||
#include "ggml_extend.hpp"
|
#include "ggml_extend.hpp"
|
||||||
#include "model.h"
|
#include "model.h"
|
||||||
#include "stable-diffusion.h"
|
#include "stable-diffusion.h"
|
||||||
#include "util.h"
|
#include "util.h"
|
||||||
|
|
||||||
struct UpscalerGGML {
|
UpscalerGGML::UpscalerGGML(int n_threads,
|
||||||
ggml_backend_t backend = nullptr; // general backend
|
bool direct,
|
||||||
ggml_type model_data_type = GGML_TYPE_F16;
|
int tile_size)
|
||||||
std::shared_ptr<ESRGAN> esrgan_upscaler;
|
|
||||||
std::string esrgan_path;
|
|
||||||
int n_threads;
|
|
||||||
bool direct = false;
|
|
||||||
int tile_size = 128;
|
|
||||||
|
|
||||||
UpscalerGGML(int n_threads,
|
|
||||||
bool direct = false,
|
|
||||||
int tile_size = 128)
|
|
||||||
: n_threads(n_threads),
|
: n_threads(n_threads),
|
||||||
direct(direct),
|
direct(direct),
|
||||||
tile_size(tile_size) {
|
tile_size(tile_size) {
|
||||||
}
|
}
|
||||||
|
|
||||||
bool load_from_file(const std::string& esrgan_path,
|
bool UpscalerGGML::load_from_file(const std::string& esrgan_path,
|
||||||
bool offload_params_to_cpu,
|
bool offload_params_to_cpu,
|
||||||
int n_threads) {
|
int n_threads) {
|
||||||
ggml_log_set(ggml_log_callback_default, nullptr);
|
ggml_log_set(ggml_log_callback_default, nullptr);
|
||||||
@ -63,9 +54,9 @@ struct UpscalerGGML {
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
sd::Tensor<float> upscale_tensor(const sd::Tensor<float>& input_tensor) {
|
sd::Tensor<float> UpscalerGGML::upscale_tensor(const sd::Tensor<float>& input_tensor) {
|
||||||
sd::Tensor<float> upscaled;
|
sd::Tensor<float> upscaled;
|
||||||
if (tile_size <= 0 || (input_tensor.shape()[0] <= tile_size && input_tensor.shape()[1] <= tile_size)) {
|
if (tile_size <= 0 || (input_tensor.shape()[0] <= tile_size && input_tensor.shape()[1] <= tile_size)) {
|
||||||
upscaled = esrgan_upscaler->compute(n_threads, input_tensor);
|
upscaled = esrgan_upscaler->compute(n_threads, input_tensor);
|
||||||
@ -96,9 +87,9 @@ struct UpscalerGGML {
|
|||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
return upscaled;
|
return upscaled;
|
||||||
}
|
}
|
||||||
|
|
||||||
sd_image_t upscale(sd_image_t input_image, uint32_t upscale_factor) {
|
sd_image_t UpscalerGGML::upscale(sd_image_t input_image, uint32_t upscale_factor) {
|
||||||
// upscale_factor, unused for RealESRGAN_x4plus_anime_6B.pth
|
// upscale_factor, unused for RealESRGAN_x4plus_anime_6B.pth
|
||||||
sd_image_t upscaled_image = {0, 0, 0, nullptr};
|
sd_image_t upscaled_image = {0, 0, 0, nullptr};
|
||||||
int output_width = (int)input_image.width * esrgan_upscaler->scale;
|
int output_width = (int)input_image.width * esrgan_upscaler->scale;
|
||||||
@ -118,8 +109,7 @@ struct UpscalerGGML {
|
|||||||
LOG_INFO("input_image_tensor upscaled, taking %.2fs", (t3 - t0) / 1000.0f);
|
LOG_INFO("input_image_tensor upscaled, taking %.2fs", (t3 - t0) / 1000.0f);
|
||||||
upscaled_image = upscaled_data;
|
upscaled_image = upscaled_data;
|
||||||
return upscaled_image;
|
return upscaled_image;
|
||||||
}
|
}
|
||||||
};
|
|
||||||
|
|
||||||
struct upscaler_ctx_t {
|
struct upscaler_ctx_t {
|
||||||
UpscalerGGML* upscaler = nullptr;
|
UpscalerGGML* upscaler = nullptr;
|
||||||
|
|||||||
31
src/upscaler.h
Normal file
31
src/upscaler.h
Normal file
@ -0,0 +1,31 @@
|
|||||||
|
#ifndef __SD_UPSCALER_H__
|
||||||
|
#define __SD_UPSCALER_H__
|
||||||
|
|
||||||
|
#include "esrgan.hpp"
|
||||||
|
#include "stable-diffusion.h"
|
||||||
|
#include "tensor.hpp"
|
||||||
|
|
||||||
|
#include <memory>
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
struct UpscalerGGML {
|
||||||
|
ggml_backend_t backend = nullptr; // general backend
|
||||||
|
ggml_type model_data_type = GGML_TYPE_F16;
|
||||||
|
std::shared_ptr<ESRGAN> esrgan_upscaler;
|
||||||
|
std::string esrgan_path;
|
||||||
|
int n_threads;
|
||||||
|
bool direct = false;
|
||||||
|
int tile_size = 128;
|
||||||
|
|
||||||
|
UpscalerGGML(int n_threads,
|
||||||
|
bool direct = false,
|
||||||
|
int tile_size = 128);
|
||||||
|
|
||||||
|
bool load_from_file(const std::string& esrgan_path,
|
||||||
|
bool offload_params_to_cpu,
|
||||||
|
int n_threads);
|
||||||
|
sd::Tensor<float> upscale_tensor(const sd::Tensor<float>& input_tensor);
|
||||||
|
sd_image_t upscale(sd_image_t input_image, uint32_t upscale_factor);
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif // __SD_UPSCALER_H__
|
||||||
Loading…
x
Reference in New Issue
Block a user