feat: add sd-webui style Hires. fix support

This commit is contained in:
leejet 2026-04-22 23:02:21 +08:00
parent 44cca3d626
commit f709e0e189
14 changed files with 870 additions and 136 deletions

View File

@ -278,7 +278,9 @@ void parse_args(int argc, const char** argv, SDCliParams& cli_params, SDContextP
bool valid = cli_params.resolve_and_validate(); bool valid = cli_params.resolve_and_validate();
if (valid && cli_params.mode != METADATA) { if (valid && cli_params.mode != METADATA) {
valid = ctx_params.resolve_and_validate(cli_params.mode) && valid = ctx_params.resolve_and_validate(cli_params.mode) &&
gen_params.resolve_and_validate(cli_params.mode, ctx_params.lora_model_dir); gen_params.resolve_and_validate(cli_params.mode,
ctx_params.lora_model_dir,
ctx_params.hires_upscalers_dir);
} }
if (!valid) { if (!valid) {
@ -688,6 +690,10 @@ int main(int argc, const char* argv[]) {
vae_decode_only = false; vae_decode_only = false;
} }
if (gen_params.hires_enabled && !gen_params.hires_upscaler_model_path.empty()) {
vae_decode_only = false;
}
sd_ctx_params_t sd_ctx_params = ctx_params.to_sd_ctx_params_t(vae_decode_only, true, cli_params.taesd_preview); sd_ctx_params_t sd_ctx_params = ctx_params.to_sd_ctx_params_t(vae_decode_only, true, cli_params.taesd_preview);
SDImageVec results; SDImageVec results;

View File

@ -351,7 +351,10 @@ ArgOptions SDContextParams::get_options() {
"--lora-model-dir", "--lora-model-dir",
"lora model directory", "lora model directory",
&lora_model_dir}, &lora_model_dir},
{"",
"--hires-upscalers-dir",
"highres fix upscaler model directory",
&hires_upscalers_dir},
{"", {"",
"--tensor-type-rules", "--tensor-type-rules",
"weight type per tensor pattern (example: \"^vae\\.=f16,model\\.=q8_0\")", "weight type per tensor pattern (example: \"^vae\\.=f16,model\\.=q8_0\")",
@ -649,6 +652,7 @@ std::string SDContextParams::to_string() const {
<< " wtype: " << sd_type_name(wtype) << ",\n" << " wtype: " << sd_type_name(wtype) << ",\n"
<< " tensor_type_rules: \"" << tensor_type_rules << "\",\n" << " tensor_type_rules: \"" << tensor_type_rules << "\",\n"
<< " lora_model_dir: \"" << lora_model_dir << "\",\n" << " lora_model_dir: \"" << lora_model_dir << "\",\n"
<< " hires_upscalers_dir: \"" << hires_upscalers_dir << "\",\n"
<< " photo_maker_path: \"" << photo_maker_path << "\",\n" << " photo_maker_path: \"" << photo_maker_path << "\",\n"
<< " rng_type: " << sd_rng_type_name(rng_type) << ",\n" << " rng_type: " << sd_rng_type_name(rng_type) << ",\n"
<< " sampler_rng_type: " << sd_rng_type_name(sampler_rng_type) << ",\n" << " sampler_rng_type: " << sd_rng_type_name(sampler_rng_type) << ",\n"
@ -777,6 +781,10 @@ ArgOptions SDGenerationParams::get_options() {
"--pm-id-embed-path", "--pm-id-embed-path",
"path to PHOTOMAKER v2 id embed", "path to PHOTOMAKER v2 id embed",
&pm_id_embed_path}, &pm_id_embed_path},
{"",
"--hires-upscaler",
"highres fix upscaler, Latent (nearest) or a model name/path under --hires-upscalers-dir (default: Latent (nearest))",
&hires_upscaler},
}; };
options.int_options = { options.int_options = {
@ -826,6 +834,22 @@ ArgOptions SDGenerationParams::get_options() {
"--upscale-tile-size", "--upscale-tile-size",
"tile size for ESRGAN upscaling (default: 128)", "tile size for ESRGAN upscaling (default: 128)",
&upscale_tile_size}, &upscale_tile_size},
{"",
"--hires-width",
"highres fix target width, 0 to use --hires-scale (default: 0)",
&hires_width},
{"",
"--hires-height",
"highres fix target height, 0 to use --hires-scale (default: 0)",
&hires_height},
{"",
"--hires-steps",
"highres fix second pass sample steps, 0 to reuse --steps (default: 0)",
&hires_steps},
{"",
"--hires-upscale-tile-size",
"highres fix upscaler tile size, reserved for model-backed upscalers (default: 128)",
&hires_upscale_tile_size},
}; };
options.float_options = { options.float_options = {
@ -913,6 +937,14 @@ ArgOptions SDGenerationParams::get_options() {
"--vae-tile-overlap", "--vae-tile-overlap",
"tile overlap for vae tiling, in fraction of tile size (default: 0.5)", "tile overlap for vae tiling, in fraction of tile size (default: 0.5)",
&vae_tiling_params.target_overlap}, &vae_tiling_params.target_overlap},
{"",
"--hires-scale",
"highres fix scale when target size is not set (default: 2.0)",
&hires_scale},
{"",
"--hires-denoising-strength",
"highres fix second pass denoising strength (default: 0.7)",
&hires_denoising_strength},
}; };
options.bool_options = { options.bool_options = {
@ -936,6 +968,11 @@ ArgOptions SDGenerationParams::get_options() {
"process vae in tiles to reduce memory usage", "process vae in tiles to reduce memory usage",
true, true,
&vae_tiling_params.enabled}, &vae_tiling_params.enabled},
{"",
"--hires",
"enable highres fix",
true,
&hires_enabled},
}; };
auto on_seed_arg = [&](int argc, const char** argv, int index) { auto on_seed_arg = [&](int argc, const char** argv, int index) {
@ -1424,6 +1461,37 @@ static bool parse_lora_json_field(const json& parent,
return true; return true;
} }
static bool resolve_model_file_from_dir(const std::string& model_name,
const std::string& model_dir,
const std::vector<std::string>& valid_ext,
const char* label,
std::string& resolved_path) {
if (model_dir.empty()) {
LOG_ERROR("%s directory is empty", label);
return false;
}
if (model_name.empty() ||
model_name.find('/') != std::string::npos ||
model_name.find('\\') != std::string::npos ||
fs::path(model_name).has_root_path() ||
fs::path(model_name).has_extension()) {
LOG_ERROR("%s must be a model name without path or extension: %s", label, model_name.c_str());
return false;
}
fs::path model_dir_path = model_dir;
for (const auto& ext : valid_ext) {
fs::path try_path = model_dir_path / (model_name + ext);
if (fs::exists(try_path) && fs::is_regular_file(try_path)) {
resolved_path = try_path.lexically_normal().string();
return true;
}
}
LOG_ERROR("can not find %s %s in %s", label, model_name.c_str(), model_dir_path.lexically_normal().string().c_str());
return false;
}
bool SDGenerationParams::from_json_str( bool SDGenerationParams::from_json_str(
const std::string& json_str, const std::string& json_str,
const std::function<std::string(const std::string&)>& lora_path_resolver) { const std::function<std::string(const std::string&)>& lora_path_resolver) {
@ -1487,6 +1555,34 @@ bool SDGenerationParams::from_json_str(
load_if_exists("increase_ref_index", increase_ref_index); load_if_exists("increase_ref_index", increase_ref_index);
load_if_exists("embed_image_metadata", embed_image_metadata); load_if_exists("embed_image_metadata", embed_image_metadata);
if (j.contains("hires") && j["hires"].is_object()) {
const json& hires_json = j["hires"];
if (hires_json.contains("enabled") && hires_json["enabled"].is_boolean()) {
hires_enabled = hires_json["enabled"];
}
if (hires_json.contains("upscaler") && hires_json["upscaler"].is_string()) {
hires_upscaler = hires_json["upscaler"];
}
if (hires_json.contains("scale") && hires_json["scale"].is_number()) {
hires_scale = hires_json["scale"];
}
if (hires_json.contains("target_width") && hires_json["target_width"].is_number_integer()) {
hires_width = hires_json["target_width"];
}
if (hires_json.contains("target_height") && hires_json["target_height"].is_number_integer()) {
hires_height = hires_json["target_height"];
}
if (hires_json.contains("steps") && hires_json["steps"].is_number_integer()) {
hires_steps = hires_json["steps"];
}
if (hires_json.contains("denoising_strength") && hires_json["denoising_strength"].is_number()) {
hires_denoising_strength = hires_json["denoising_strength"];
}
if (hires_json.contains("upscale_tile_size") && hires_json["upscale_tile_size"].is_number_integer()) {
hires_upscale_tile_size = hires_json["upscale_tile_size"];
}
}
auto parse_sample_params_json = [&](const json& sample_json, auto parse_sample_params_json = [&](const json& sample_json,
sd_sample_params_t& target_params, sd_sample_params_t& target_params,
std::vector<int>& target_skip_layers, std::vector<int>& target_skip_layers,
@ -1800,7 +1896,7 @@ bool SDGenerationParams::initialize_cache_params() {
return true; return true;
} }
bool SDGenerationParams::resolve(const std::string& lora_model_dir, bool strict) { bool SDGenerationParams::resolve(const std::string& lora_model_dir, const std::string& hires_upscalers_dir, bool strict) {
if (high_noise_sample_params.sample_steps <= 0) { if (high_noise_sample_params.sample_steps <= 0) {
high_noise_sample_params.sample_steps = -1; high_noise_sample_params.sample_steps = -1;
} }
@ -1819,6 +1915,27 @@ bool SDGenerationParams::resolve(const std::string& lora_model_dir, bool strict)
sample_params.sample_steps = std::clamp(sample_params.sample_steps, 1, 100); sample_params.sample_steps = std::clamp(sample_params.sample_steps, 1, 100);
} }
hires_upscaler_model_path.clear();
if (hires_enabled) {
if (hires_upscaler.empty()) {
hires_upscaler = "Latent (nearest)";
}
resolved_hires_upscaler = str_to_sd_hires_upscaler(hires_upscaler.c_str());
if (resolved_hires_upscaler == SD_HIRES_UPSCALER_NONE) {
hires_enabled = false;
} else if (resolved_hires_upscaler == SD_HIRES_UPSCALER_COUNT) {
static const std::vector<std::string> valid_ext = {".gguf", ".safetensors", ".pt", ".pth"};
if (!resolve_model_file_from_dir(hires_upscaler,
hires_upscalers_dir,
valid_ext,
"hires upscaler",
hires_upscaler_model_path)) {
return false;
}
resolved_hires_upscaler = SD_HIRES_UPSCALER_MODEL;
}
}
prompt_with_lora = prompt; prompt_with_lora = prompt;
if (!lora_model_dir.empty()) { if (!lora_model_dir.empty()) {
extract_and_remove_lora(lora_model_dir); extract_and_remove_lora(lora_model_dir);
@ -1883,6 +2000,29 @@ bool SDGenerationParams::validate(SDMode mode) {
return false; return false;
} }
if (hires_enabled) {
if (hires_width < 0 || hires_height < 0) {
LOG_ERROR("error: hires target width and height must be >= 0");
return false;
}
if (hires_scale <= 0.f && hires_width <= 0 && hires_height <= 0) {
LOG_ERROR("error: hires scale must be positive when target size is not set");
return false;
}
if (hires_steps < 0) {
LOG_ERROR("error: hires steps must be >= 0");
return false;
}
if (hires_denoising_strength <= 0.f || hires_denoising_strength > 1.f) {
LOG_ERROR("error: hires denoising strength must be in (0.0, 1.0]");
return false;
}
if (hires_upscale_tile_size < 1) {
LOG_ERROR("error: hires upscale tile size must be positive");
return false;
}
}
if (mode == UPSCALE) { if (mode == UPSCALE) {
if (init_image_path.length() == 0) { if (init_image_path.length() == 0) {
LOG_ERROR("error: upscale mode needs an init image (--init-img)\n"); LOG_ERROR("error: upscale mode needs an init image (--init-img)\n");
@ -1893,8 +2033,11 @@ bool SDGenerationParams::validate(SDMode mode) {
return true; return true;
} }
bool SDGenerationParams::resolve_and_validate(SDMode mode, const std::string& lora_model_dir, bool strict) { bool SDGenerationParams::resolve_and_validate(SDMode mode,
if (!resolve(lora_model_dir, strict)) { const std::string& lora_model_dir,
const std::string& hires_upscalers_dir,
bool strict) {
if (!resolve(lora_model_dir, hires_upscalers_dir, strict)) {
return false; return false;
} }
if (!validate(mode)) { if (!validate(mode)) {
@ -1965,6 +2108,16 @@ sd_img_gen_params_t SDGenerationParams::to_sd_img_gen_params_t() {
params.pm_params = pm_params; params.pm_params = pm_params;
params.vae_tiling_params = vae_tiling_params; params.vae_tiling_params = vae_tiling_params;
params.cache = cache_params; params.cache = cache_params;
params.hires.enabled = hires_enabled;
params.hires.upscaler = resolved_hires_upscaler;
params.hires.model_path = hires_upscaler_model_path.empty() ? nullptr : hires_upscaler_model_path.c_str();
params.hires.scale = hires_scale;
params.hires.target_width = hires_width;
params.hires.target_height = hires_height;
params.hires.steps = hires_steps;
params.hires.denoising_strength = hires_denoising_strength;
params.hires.upscale_tile_size = hires_upscale_tile_size;
return params; return params;
} }
@ -2089,6 +2242,15 @@ std::string SDGenerationParams::to_string() const {
<< " seed: " << seed << ",\n" << " seed: " << seed << ",\n"
<< " upscale_repeats: " << upscale_repeats << ",\n" << " upscale_repeats: " << upscale_repeats << ",\n"
<< " upscale_tile_size: " << upscale_tile_size << ",\n" << " upscale_tile_size: " << upscale_tile_size << ",\n"
<< " hires: { enabled: " << (hires_enabled ? "true" : "false")
<< ", upscaler: \"" << hires_upscaler << "\""
<< ", model_path: \"" << hires_upscaler_model_path << "\""
<< ", scale: " << hires_scale
<< ", target_width: " << hires_width
<< ", target_height: " << hires_height
<< ", steps: " << hires_steps
<< ", denoising_strength: " << hires_denoising_strength
<< ", upscale_tile_size: " << hires_upscale_tile_size << " },\n"
<< " vae_tiling_params: { " << " vae_tiling_params: { "
<< vae_tiling_params.enabled << ", " << vae_tiling_params.enabled << ", "
<< vae_tiling_params.tile_size_x << ", " << vae_tiling_params.tile_size_x << ", "
@ -2162,6 +2324,13 @@ std::string get_image_params(const SDContextParams& ctx_params, const SDGenerati
if (gen_params.clip_skip != -1) { if (gen_params.clip_skip != -1) {
parameter_string += "Clip skip: " + std::to_string(gen_params.clip_skip) + ", "; parameter_string += "Clip skip: " + std::to_string(gen_params.clip_skip) + ", ";
} }
if (gen_params.hires_enabled) {
parameter_string += "Hires upscale: " + gen_params.hires_upscaler + ", ";
parameter_string += "Hires scale: " + std::to_string(gen_params.hires_scale) + ", ";
parameter_string += "Hires resize: " + std::to_string(gen_params.hires_width) + "x" + std::to_string(gen_params.hires_height) + ", ";
parameter_string += "Hires steps: " + std::to_string(gen_params.hires_steps) + ", ";
parameter_string += "Denoising strength: " + std::to_string(gen_params.hires_denoising_strength) + ", ";
}
parameter_string += "Version: stable-diffusion.cpp"; parameter_string += "Version: stable-diffusion.cpp";
return parameter_string; return parameter_string;
} }

View File

@ -101,6 +101,7 @@ struct SDContextParams {
sd_type_t wtype = SD_TYPE_COUNT; sd_type_t wtype = SD_TYPE_COUNT;
std::string tensor_type_rules; std::string tensor_type_rules;
std::string lora_model_dir = "."; std::string lora_model_dir = ".";
std::string hires_upscalers_dir;
std::map<std::string, std::string> embedding_map; std::map<std::string, std::string> embedding_map;
std::vector<sd_embedding_t> embedding_vec; std::vector<sd_embedding_t> embedding_vec;
@ -190,12 +191,23 @@ struct SDGenerationParams {
int upscale_repeats = 1; int upscale_repeats = 1;
int upscale_tile_size = 128; int upscale_tile_size = 128;
bool hires_enabled = false;
std::string hires_upscaler = "Latent (nearest)";
std::string hires_upscaler_model_path;
float hires_scale = 2.f;
int hires_width = 0;
int hires_height = 0;
int hires_steps = 0;
float hires_denoising_strength = 0.7f;
int hires_upscale_tile_size = 128;
std::map<std::string, float> lora_map; std::map<std::string, float> lora_map;
std::map<std::string, float> high_noise_lora_map; std::map<std::string, float> high_noise_lora_map;
// Derived and normalized fields. // Derived and normalized fields.
std::string prompt_with_lora; // for metadata record only std::string prompt_with_lora; // for metadata record only
std::vector<sd_lora_t> lora_vec; std::vector<sd_lora_t> lora_vec;
sd_hires_upscaler_t resolved_hires_upscaler;
// Owned execution payload. // Owned execution payload.
SDImageOwner init_image; SDImageOwner init_image;
@ -225,9 +237,12 @@ struct SDGenerationParams {
void set_width_and_height_if_unset(int w, int h); void set_width_and_height_if_unset(int w, int h);
int get_resolved_width() const; int get_resolved_width() const;
int get_resolved_height() const; int get_resolved_height() const;
bool resolve(const std::string& lora_model_dir, bool strict = false); bool resolve(const std::string& lora_model_dir, const std::string& hires_upscalers_dir, bool strict = false);
bool validate(SDMode mode); bool validate(SDMode mode);
bool resolve_and_validate(SDMode mode, const std::string& lora_model_dir, bool strict = false); bool resolve_and_validate(SDMode mode,
const std::string& lora_model_dir,
const std::string& hires_upscalers_dir,
bool strict = false);
sd_img_gen_params_t to_sd_img_gen_params_t(); sd_img_gen_params_t to_sd_img_gen_params_t();
sd_vid_gen_params_t to_sd_vid_gen_params_t(); sd_vid_gen_params_t to_sd_vid_gen_params_t();
std::string to_string() const; std::string to_string() const;

View File

@ -38,6 +38,8 @@ Current generation-related endpoints include:
- `POST /sdapi/v1/txt2img` - `POST /sdapi/v1/txt2img`
- `POST /sdapi/v1/img2img` - `POST /sdapi/v1/img2img`
- `GET /sdapi/v1/loras` - `GET /sdapi/v1/loras`
- `GET /sdapi/v1/upscalers`
- `GET /sdapi/v1/latent-upscale-modes`
- `GET /sdapi/v1/samplers` - `GET /sdapi/v1/samplers`
- `GET /sdapi/v1/schedulers` - `GET /sdapi/v1/schedulers`
- `GET /sdapi/v1/sd-models` - `GET /sdapi/v1/sd-models`
@ -216,6 +218,13 @@ Currently supported request fields:
| `scheduler` | `string` | Scheduler name | | `scheduler` | `string` | Scheduler name |
| `lora` | `array<object>` | Structured LoRA list | | `lora` | `array<object>` | Structured LoRA list |
| `extra_images` | `array<string>` | Base64 or data URL images | | `extra_images` | `array<string>` | Base64 or data URL images |
| `enable_hr` | `boolean` | Enable highres fix for `txt2img` |
| `hr_upscaler` | `string` | `Latent (nearest)` or an upscaler model name from `/sdapi/v1/upscalers` |
| `hr_scale` | `number` | Highres scale when resize target is not set |
| `hr_resize_x` | `integer` | Highres target width, `0` to use scale |
| `hr_resize_y` | `integer` | Highres target height, `0` to use scale |
| `hr_steps` | `integer` | Highres second-pass sample steps, `0` to reuse `steps` |
| `denoising_strength` | `number` | Highres denoising strength for `txt2img` |
Native extension fields: Native extension fields:
@ -241,6 +250,8 @@ Currently supported request fields:
| `inpainting_mask_invert` | `integer` or `boolean` | Treated as invert flag | | `inpainting_mask_invert` | `integer` or `boolean` | Treated as invert flag |
| `denoising_strength` | `number` | Clamped to `0.0..1.0` | | `denoising_strength` | `number` | Clamped to `0.0..1.0` |
Highres fix fields are currently handled for `txt2img`; `img2img` uses `denoising_strength` as image-to-image strength.
Native extension fields: Native extension fields:
- any `sdcpp API` fields embedded through `sd_cpp_extra_args` inside `prompt` - any `sdcpp API` fields embedded through `sd_cpp_extra_args` inside `prompt`
@ -258,6 +269,8 @@ Response fields:
Currently exposed: Currently exposed:
- `GET /sdapi/v1/loras` - `GET /sdapi/v1/loras`
- `GET /sdapi/v1/upscalers`
- `GET /sdapi/v1/latent-upscale-modes`
- `GET /sdapi/v1/samplers` - `GET /sdapi/v1/samplers`
- `GET /sdapi/v1/schedulers` - `GET /sdapi/v1/schedulers`
- `GET /sdapi/v1/sd-models` - `GET /sdapi/v1/sd-models`
@ -272,6 +285,24 @@ Response fields:
| `[].name` | `string` | Display name derived from file stem | | `[].name` | `string` | Display name derived from file stem |
| `[].path` | `string` | Relative path under the configured LoRA directory | | `[].path` | `string` | Relative path under the configured LoRA directory |
`GET /sdapi/v1/upscalers`
| Field | Type | Notes |
| --- | --- | --- |
| `[].name` | `string` | Built-in name or model stem |
| `[].model_name` | `string \| null` | Model family label for model-backed upscalers |
| `[].model_path` | `string \| null` | Absolute model path for model-backed upscalers |
| `[].model_url` | `string \| null` | Currently always null |
| `[].scale` | `integer` | Currently `4` |
Built-in entries include `None`, `Lanczos`, and `Nearest`. Model-backed entries are scanned from the top level of `--hires-upscalers-dir`; subdirectories are not scanned.
`GET /sdapi/v1/latent-upscale-modes`
| Field | Type | Notes |
| --- | --- | --- |
| `[].name` | `string` | WebUI-compatible latent upscale mode name |
`GET /sdapi/v1/samplers` `GET /sdapi/v1/samplers`
| Field | Type | Notes | | Field | Type | Notes |
@ -388,6 +419,7 @@ Top-level fields:
| `samplers` | `array<string>` | Available sampling methods | | `samplers` | `array<string>` | Available sampling methods |
| `schedulers` | `array<string>` | Available schedulers | | `schedulers` | `array<string>` | Available schedulers |
| `loras` | `array<object>` | Available LoRA entries | | `loras` | `array<object>` | Available LoRA entries |
| `upscalers` | `array<object>` | Available model-backed highres upscalers |
| `limits` | `object` | Shared queue and size limits | | `limits` | `object` | Shared queue and size limits |
`model` `model`
@ -424,6 +456,14 @@ Shared nested fields:
| `loras[].name` | `string` | | `loras[].name` | `string` |
| `loras[].path` | `string` | | `loras[].path` | `string` |
`upscalers`
| Field | Type | Notes |
| --- | --- | --- |
| `upscalers[].name` | `string` | Built-in name or model stem; use this value in `hires.upscaler` |
Built-in entries include `None` and `Latent (nearest)`. Model-backed entries are scanned from the top level of `--hires-upscalers-dir`; subdirectories are not scanned.
`limits` `limits`
| Field | Type | | Field | Type |
@ -482,6 +522,15 @@ Shared default fields used by both `img_gen` and `vid_gen`:
| `auto_resize_ref_image` | `boolean` | | `auto_resize_ref_image` | `boolean` |
| `increase_ref_index` | `boolean` | | `increase_ref_index` | `boolean` |
| `control_strength` | `number` | | `control_strength` | `number` |
| `hires` | `object` |
| `hires.enabled` | `boolean` |
| `hires.upscaler` | `string` |
| `hires.scale` | `number` |
| `hires.target_width` | `integer` |
| `hires.target_height` | `integer` |
| `hires.steps` | `integer` |
| `hires.denoising_strength` | `number` |
| `hires.upscale_tile_size` | `integer` |
`vid_gen`-specific default fields: `vid_gen`-specific default fields:
@ -514,6 +563,7 @@ Fields returned in `features_by_mode.img_gen`:
- `ref_images` - `ref_images`
- `lora` - `lora`
- `vae_tiling` - `vae_tiling`
- `hires`
- `cache` - `cache`
- `cancel_queued` - `cancel_queued`
- `cancel_generating` - `cancel_generating`
@ -625,6 +675,16 @@ Example:
}, },
"lora": [], "lora": [],
"hires": {
"enabled": false,
"upscaler": "Latent (nearest)",
"scale": 2.0,
"target_width": 0,
"target_height": 0,
"steps": 0,
"denoising_strength": 0.7,
"upscale_tile_size": 128
},
"vae_tiling_params": { "vae_tiling_params": {
"enabled": false, "enabled": false,
@ -729,12 +789,23 @@ Other native fields:
| Field | Type | | Field | Type |
| --- | --- | | --- | --- |
| `hires` | `object` |
| `hires.enabled` | `boolean` |
| `hires.upscaler` | `string` |
| `hires.scale` | `number` |
| `hires.target_width` | `integer` |
| `hires.target_height` | `integer` |
| `hires.steps` | `integer` |
| `hires.denoising_strength` | `number` |
| `hires.upscale_tile_size` | `integer` |
| `vae_tiling_params` | `object` | | `vae_tiling_params` | `object` |
| `cache_mode` | `string` | | `cache_mode` | `string` |
| `cache_option` | `string` | | `cache_option` | `string` |
| `scm_mask` | `string` | | `scm_mask` | `string` |
| `scm_policy_dynamic` | `boolean` | | `scm_policy_dynamic` | `boolean` |
For `hires.upscaler`, use `Latent (nearest)` for latent upscale or an `upscalers[].name` value from `GET /sdcpp/v1/capabilities`. Model-backed upscalers are resolved as `--hires-upscalers-dir / (name + ext)` and must live directly in that directory.
HTTP-only output fields: HTTP-only output fields:
| Field | Type | | Field | Type |

View File

@ -48,7 +48,9 @@ static void parse_args(int argc,
if (!svr_params.resolve_and_validate() || if (!svr_params.resolve_and_validate() ||
!ctx_params.resolve_and_validate(IMG_GEN) || !ctx_params.resolve_and_validate(IMG_GEN) ||
!default_gen_params.resolve_and_validate(IMG_GEN, ctx_params.lora_model_dir)) { !default_gen_params.resolve_and_validate(IMG_GEN,
ctx_params.lora_model_dir,
ctx_params.hires_upscalers_dir)) {
print_usage(argv[0], options_vec); print_usage(argv[0], options_vec);
exit(1); exit(1);
} }
@ -95,6 +97,8 @@ int main(int argc, const char** argv) {
std::vector<LoraEntry> lora_cache; std::vector<LoraEntry> lora_cache;
std::mutex lora_mutex; std::mutex lora_mutex;
std::vector<UpscalerEntry> upscaler_cache;
std::mutex upscaler_mutex;
AsyncJobManager async_job_manager; AsyncJobManager async_job_manager;
ServerRuntime runtime = { ServerRuntime runtime = {
sd_ctx.get(), sd_ctx.get(),
@ -104,6 +108,8 @@ int main(int argc, const char** argv) {
&default_gen_params, &default_gen_params,
&lora_cache, &lora_cache,
&lora_mutex, &lora_mutex,
&upscaler_cache,
&upscaler_mutex,
&async_job_manager, &async_job_manager,
}; };

View File

@ -70,7 +70,7 @@ static bool build_openai_generation_request(const httplib::Request& req,
} }
// Intentionally disable prompt-embedded LoRA tag parsing for server APIs. // Intentionally disable prompt-embedded LoRA tag parsing for server APIs.
if (!request.gen_params.resolve_and_validate(IMG_GEN, "", true)) { if (!request.gen_params.resolve_and_validate(IMG_GEN, "", runtime.ctx_params->hires_upscalers_dir, true)) {
error_message = "invalid params"; error_message = "invalid params";
return false; return false;
} }
@ -212,7 +212,7 @@ static bool build_openai_edit_request(const httplib::Request& req,
} }
// Intentionally disable prompt-embedded LoRA tag parsing for server APIs. // Intentionally disable prompt-embedded LoRA tag parsing for server APIs.
if (!request.gen_params.resolve_and_validate(IMG_GEN, "", true)) { if (!request.gen_params.resolve_and_validate(IMG_GEN, "", runtime.ctx_params->hires_upscalers_dir, true)) {
error_message = "invalid params"; error_message = "invalid params";
return false; return false;
} }

View File

@ -1,6 +1,7 @@
#include "routes.h" #include "routes.h"
#include <algorithm> #include <algorithm>
#include <cctype>
#include <cstring> #include <cstring>
#include <regex> #include <regex>
#include <string_view> #include <string_view>
@ -35,14 +36,20 @@ static fs::path resolve_display_model_path(const ServerRuntime& runtime) {
return {}; return {};
} }
static std::string lower_ascii(std::string value) {
std::transform(value.begin(), value.end(), value.begin(), [](unsigned char c) {
return static_cast<char>(std::tolower(c));
});
return value;
}
static enum sample_method_t get_sdapi_sample_method(std::string name) { static enum sample_method_t get_sdapi_sample_method(std::string name) {
enum sample_method_t result = str_to_sample_method(name.c_str()); enum sample_method_t result = str_to_sample_method(name.c_str());
if (result != SAMPLE_METHOD_COUNT) { if (result != SAMPLE_METHOD_COUNT) {
return result; return result;
} }
std::transform(name.begin(), name.end(), name.begin(), name = lower_ascii(name);
[](unsigned char c) { return static_cast<char>(std::tolower(c)); });
static const std::unordered_map<std::string_view, sample_method_t> hardcoded{ static const std::unordered_map<std::string_view, sample_method_t> hardcoded{
{"euler a", EULER_A_SAMPLE_METHOD}, {"euler a", EULER_A_SAMPLE_METHOD},
{"k_euler_a", EULER_A_SAMPLE_METHOD}, {"k_euler_a", EULER_A_SAMPLE_METHOD},
@ -114,6 +121,18 @@ static bool build_sdapi_img_gen_request(const json& j,
request.gen_params.width = j.value("width", -1); request.gen_params.width = j.value("width", -1);
request.gen_params.height = j.value("height", -1); request.gen_params.height = j.value("height", -1);
if (!img2img && j.value("enable_hr", false)) {
request.gen_params.hires_enabled = true;
request.gen_params.hires_scale = j.value("hr_scale", request.gen_params.hires_scale);
request.gen_params.hires_width = j.value("hr_resize_x", request.gen_params.hires_width);
request.gen_params.hires_height = j.value("hr_resize_y", request.gen_params.hires_height);
request.gen_params.hires_steps = j.value("hr_steps", request.gen_params.hires_steps);
request.gen_params.hires_denoising_strength =
j.value("denoising_strength", request.gen_params.hires_denoising_strength);
request.gen_params.hires_upscaler = j.value("hr_upscaler", request.gen_params.hires_upscaler);
}
std::string sd_cpp_extra_args_str = extract_and_remove_sd_cpp_extra_args(request.gen_params.prompt); std::string sd_cpp_extra_args_str = extract_and_remove_sd_cpp_extra_args(request.gen_params.prompt);
if (!sd_cpp_extra_args_str.empty() && !request.gen_params.from_json_str(sd_cpp_extra_args_str)) { if (!sd_cpp_extra_args_str.empty() && !request.gen_params.from_json_str(sd_cpp_extra_args_str)) {
error_message = "invalid sd_cpp_extra_args"; error_message = "invalid sd_cpp_extra_args";
@ -228,7 +247,7 @@ static bool build_sdapi_img_gen_request(const json& j,
} }
// Intentionally disable prompt-embedded LoRA tag parsing for server APIs. // Intentionally disable prompt-embedded LoRA tag parsing for server APIs.
if (!request.gen_params.resolve_and_validate(IMG_GEN, "", true)) { if (!request.gen_params.resolve_and_validate(IMG_GEN, "", runtime.ctx_params->hires_upscalers_dir, true)) {
error_message = "invalid params"; error_message = "invalid params";
return false; return false;
} }
@ -347,6 +366,45 @@ void register_sdapi_endpoints(httplib::Server& svr, ServerRuntime& rt) {
res.set_content(result.dump(), "application/json"); res.set_content(result.dump(), "application/json");
}); });
svr.Get("/sdapi/v1/upscalers", [runtime](const httplib::Request&, httplib::Response& res) {
refresh_upscaler_cache(*runtime);
auto make_builtin = [](const char* name) {
json item;
item["name"] = name;
item["model_name"] = nullptr;
item["model_path"] = nullptr;
item["model_url"] = nullptr;
item["scale"] = 4;
return item;
};
json result = json::array();
result.push_back(make_builtin("None"));
{
std::lock_guard<std::mutex> lock(*runtime->upscaler_mutex);
for (const auto& e : *runtime->upscaler_cache) {
json item;
item["name"] = e.name;
item["model_name"] = e.model_name;
item["model_path"] = e.fullpath;
item["model_url"] = nullptr;
item["scale"] = e.scale;
result.push_back(item);
}
}
res.set_content(result.dump(), "application/json");
});
svr.Get("/sdapi/v1/latent-upscale-modes", [](const httplib::Request&, httplib::Response& res) {
json result = json::array({
{{"name", "Latent (nearest)"}},
});
res.set_content(result.dump(), "application/json");
});
svr.Get("/sdapi/v1/samplers", [runtime](const httplib::Request&, httplib::Response& res) { svr.Get("/sdapi/v1/samplers", [runtime](const httplib::Request&, httplib::Response& res) {
std::vector<std::string> sampler_names; std::vector<std::string> sampler_names;
sampler_names.push_back("default"); sampler_names.push_back("default");

View File

@ -114,6 +114,17 @@ static json make_img_gen_defaults_json(const SDGenerationParams& defaults, const
{"increase_ref_index", defaults.increase_ref_index}, {"increase_ref_index", defaults.increase_ref_index},
{"control_strength", defaults.control_strength}, {"control_strength", defaults.control_strength},
{"sample_params", make_sample_params_json(defaults.sample_params, defaults.skip_layers)}, {"sample_params", make_sample_params_json(defaults.sample_params, defaults.skip_layers)},
{"hires",
{
{"enabled", defaults.hires_enabled},
{"upscaler", defaults.hires_upscaler},
{"scale", defaults.hires_scale},
{"target_width", defaults.hires_width},
{"target_height", defaults.hires_height},
{"steps", defaults.hires_steps},
{"denoising_strength", defaults.hires_denoising_strength},
{"upscale_tile_size", defaults.hires_upscale_tile_size},
}},
{"vae_tiling_params", make_vae_tiling_json(defaults.vae_tiling_params)}, {"vae_tiling_params", make_vae_tiling_json(defaults.vae_tiling_params)},
{"cache_mode", defaults.cache_mode}, {"cache_mode", defaults.cache_mode},
{"cache_option", defaults.cache_option}, {"cache_option", defaults.cache_option},
@ -157,6 +168,7 @@ static json make_img_gen_features_json() {
{"ref_images", true}, {"ref_images", true},
{"lora", true}, {"lora", true},
{"vae_tiling", true}, {"vae_tiling", true},
{"hires", true},
{"cache", true}, {"cache", true},
{"cancel_queued", true}, {"cancel_queued", true},
{"cancel_generating", false}, {"cancel_generating", false},
@ -179,6 +191,7 @@ static json make_vid_gen_features_json() {
static json make_capabilities_json(ServerRuntime& runtime) { static json make_capabilities_json(ServerRuntime& runtime) {
refresh_lora_cache(runtime); refresh_lora_cache(runtime);
refresh_upscaler_cache(runtime);
AsyncJobManager& manager = *runtime.async_job_manager; AsyncJobManager& manager = *runtime.async_job_manager;
const auto& defaults = *runtime.default_gen_params; const auto& defaults = *runtime.default_gen_params;
@ -190,6 +203,7 @@ static json make_capabilities_json(ServerRuntime& runtime) {
json image_output_formats = supported_img_output_formats(); json image_output_formats = supported_img_output_formats();
json video_output_formats = supported_vid_output_formats(); json video_output_formats = supported_vid_output_formats();
json available_loras = json::array(); json available_loras = json::array();
json available_upscalers = json::array();
json supported_modes = json::array(); json supported_modes = json::array();
for (int i = 0; i < SAMPLE_METHOD_COUNT; ++i) { for (int i = 0; i < SAMPLE_METHOD_COUNT; ++i) {
@ -210,6 +224,21 @@ static json make_capabilities_json(ServerRuntime& runtime) {
} }
} }
available_upscalers.push_back({
{"name", "None"},
});
available_upscalers.push_back({
{"name", "Latent (nearest)"},
});
{
std::lock_guard<std::mutex> lock(*runtime.upscaler_mutex);
for (const auto& entry : *runtime.upscaler_cache) {
available_upscalers.push_back({
{"name", entry.name},
});
}
}
if (supports_img) { if (supports_img) {
supported_modes.push_back("img_gen"); supported_modes.push_back("img_gen");
} }
@ -284,6 +313,7 @@ static json make_capabilities_json(ServerRuntime& runtime) {
result["features"] = top_level_features; result["features"] = top_level_features;
result["features_by_mode"] = features_by_mode; result["features_by_mode"] = features_by_mode;
result["loras"] = available_loras; result["loras"] = available_loras;
result["upscalers"] = available_upscalers;
return result; return result;
} }
@ -307,7 +337,7 @@ static bool parse_img_gen_request(const json& body,
return false; return false;
} }
// Intentionally disable prompt-embedded LoRA tag parsing for server APIs. // Intentionally disable prompt-embedded LoRA tag parsing for server APIs.
if (!request.gen_params.resolve_and_validate(IMG_GEN, "", true)) { if (!request.gen_params.resolve_and_validate(IMG_GEN, "", runtime.ctx_params->hires_upscalers_dir, true)) {
error_message = "invalid generation parameters"; error_message = "invalid generation parameters";
return false; return false;
} }
@ -334,7 +364,7 @@ static bool parse_vid_gen_request(const json& body,
return false; return false;
} }
// Intentionally disable prompt-embedded LoRA tag parsing for server APIs. // Intentionally disable prompt-embedded LoRA tag parsing for server APIs.
if (!request.gen_params.resolve_and_validate(VID_GEN, "", true)) { if (!request.gen_params.resolve_and_validate(VID_GEN, "", runtime.ctx_params->hires_upscalers_dir, true)) {
error_message = "invalid generation parameters"; error_message = "invalid generation parameters";
return false; return false;
} }

View File

@ -1,6 +1,7 @@
#include "runtime.h" #include "runtime.h"
#include <algorithm> #include <algorithm>
#include <cctype>
#include <chrono> #include <chrono>
#include <cstdlib> #include <cstdlib>
#include <filesystem> #include <filesystem>
@ -13,6 +14,18 @@
namespace fs = std::filesystem; namespace fs = std::filesystem;
static std::string lower_ascii(std::string value) {
std::transform(value.begin(), value.end(), value.begin(), [](unsigned char c) {
return static_cast<char>(std::tolower(c));
});
return value;
}
static bool is_supported_model_ext(const fs::path& p) {
auto ext = lower_ascii(p.extension().string());
return ext == ".gguf" || ext == ".pt" || ext == ".pth" || ext == ".safetensors";
}
static const std::string k_base64_chars = static const std::string k_base64_chars =
"ABCDEFGHIJKLMNOPQRSTUVWXYZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
"abcdefghijklmnopqrstuvwxyz" "abcdefghijklmnopqrstuvwxyz"
@ -241,20 +254,12 @@ void refresh_lora_cache(ServerRuntime& rt) {
fs::path lora_dir = rt.ctx_params->lora_model_dir; fs::path lora_dir = rt.ctx_params->lora_model_dir;
if (fs::exists(lora_dir) && fs::is_directory(lora_dir)) { if (fs::exists(lora_dir) && fs::is_directory(lora_dir)) {
auto is_lora_ext = [](const fs::path& p) {
auto ext = p.extension().string();
std::transform(ext.begin(), ext.end(), ext.begin(), [](unsigned char c) {
return static_cast<char>(std::tolower(c));
});
return ext == ".gguf" || ext == ".pt" || ext == ".pth" || ext == ".safetensors";
};
for (auto& entry : fs::recursive_directory_iterator(lora_dir)) { for (auto& entry : fs::recursive_directory_iterator(lora_dir)) {
if (!entry.is_regular_file()) { if (!entry.is_regular_file()) {
continue; continue;
} }
const fs::path& p = entry.path(); const fs::path& p = entry.path();
if (!is_lora_ext(p)) { if (!is_supported_model_ext(p)) {
continue; continue;
} }
@ -286,6 +291,40 @@ std::string get_lora_full_path(ServerRuntime& rt, const std::string& path) {
return it != rt.lora_cache->end() ? it->fullpath : ""; return it != rt.lora_cache->end() ? it->fullpath : "";
} }
void refresh_upscaler_cache(ServerRuntime& rt) {
std::vector<UpscalerEntry> new_cache;
fs::path upscaler_dir = rt.ctx_params->hires_upscalers_dir;
if (fs::exists(upscaler_dir) && fs::is_directory(upscaler_dir)) {
for (auto& entry : fs::directory_iterator(upscaler_dir)) {
if (!entry.is_regular_file()) {
continue;
}
const fs::path& p = entry.path();
if (!is_supported_model_ext(p)) {
continue;
}
UpscalerEntry upscaler_entry;
upscaler_entry.name = p.stem().u8string();
upscaler_entry.fullpath = fs::absolute(p).lexically_normal().u8string();
upscaler_entry.model_name = "ESRGAN_4x";
upscaler_entry.path = p.filename().u8string();
new_cache.push_back(std::move(upscaler_entry));
}
}
std::sort(new_cache.begin(), new_cache.end(), [](const UpscalerEntry& a, const UpscalerEntry& b) {
return a.name < b.name;
});
{
std::lock_guard<std::mutex> lock(*rt.upscaler_mutex);
*rt.upscaler_cache = std::move(new_cache);
}
}
int64_t unix_timestamp_now() { int64_t unix_timestamp_now() {
return std::chrono::duration_cast<std::chrono::seconds>( return std::chrono::duration_cast<std::chrono::seconds>(
std::chrono::system_clock::now().time_since_epoch()) std::chrono::system_clock::now().time_since_epoch())

View File

@ -37,6 +37,14 @@ struct LoraEntry {
std::string fullpath; std::string fullpath;
}; };
struct UpscalerEntry {
std::string name;
std::string path;
std::string fullpath;
std::string model_name;
int scale = 4;
};
struct ServerRuntime { struct ServerRuntime {
sd_ctx_t* sd_ctx; sd_ctx_t* sd_ctx;
std::mutex* sd_ctx_mutex; std::mutex* sd_ctx_mutex;
@ -45,6 +53,8 @@ struct ServerRuntime {
const SDGenerationParams* default_gen_params; const SDGenerationParams* default_gen_params;
std::vector<LoraEntry>* lora_cache; std::vector<LoraEntry>* lora_cache;
std::mutex* lora_mutex; std::mutex* lora_mutex;
std::vector<UpscalerEntry>* upscaler_cache;
std::mutex* upscaler_mutex;
AsyncJobManager* async_job_manager; AsyncJobManager* async_job_manager;
}; };
@ -86,4 +96,5 @@ bool runtime_supports_generation_mode(const ServerRuntime& runtime, SDMode mode)
std::string unsupported_generation_mode_error(SDMode mode); std::string unsupported_generation_mode_error(SDMode mode);
void refresh_lora_cache(ServerRuntime& rt); void refresh_lora_cache(ServerRuntime& rt);
std::string get_lora_full_path(ServerRuntime& rt, const std::string& path); std::string get_lora_full_path(ServerRuntime& rt, const std::string& path);
void refresh_upscaler_cache(ServerRuntime& rt);
int64_t unix_timestamp_now(); int64_t unix_timestamp_now();

View File

@ -289,6 +289,25 @@ typedef struct {
const char* path; const char* path;
} sd_lora_t; } sd_lora_t;
enum sd_hires_upscaler_t {
SD_HIRES_UPSCALER_NONE,
SD_HIRES_UPSCALER_LATENT_NEAREST,
SD_HIRES_UPSCALER_MODEL,
SD_HIRES_UPSCALER_COUNT,
};
typedef struct {
bool enabled;
enum sd_hires_upscaler_t upscaler;
const char* model_path;
float scale;
int target_width;
int target_height;
int steps;
float denoising_strength;
int upscale_tile_size;
} sd_hires_params_t;
typedef struct { typedef struct {
const sd_lora_t* loras; const sd_lora_t* loras;
uint32_t lora_count; uint32_t lora_count;
@ -312,6 +331,7 @@ typedef struct {
sd_pm_params_t pm_params; sd_pm_params_t pm_params;
sd_tiling_params_t vae_tiling_params; sd_tiling_params_t vae_tiling_params;
sd_cache_params_t cache; sd_cache_params_t cache;
sd_hires_params_t hires;
} sd_img_gen_params_t; } sd_img_gen_params_t;
typedef struct { typedef struct {
@ -365,8 +385,11 @@ SD_API const char* sd_preview_name(enum preview_t preview);
SD_API enum preview_t str_to_preview(const char* str); SD_API enum preview_t str_to_preview(const char* str);
SD_API const char* sd_lora_apply_mode_name(enum lora_apply_mode_t mode); SD_API const char* sd_lora_apply_mode_name(enum lora_apply_mode_t mode);
SD_API enum lora_apply_mode_t str_to_lora_apply_mode(const char* str); SD_API enum lora_apply_mode_t str_to_lora_apply_mode(const char* str);
SD_API const char* sd_hires_upscaler_name(enum sd_hires_upscaler_t upscaler);
SD_API enum sd_hires_upscaler_t str_to_sd_hires_upscaler(const char* str);
SD_API void sd_cache_params_init(sd_cache_params_t* cache_params); SD_API void sd_cache_params_init(sd_cache_params_t* cache_params);
SD_API void sd_hires_params_init(sd_hires_params_t* hires_params);
SD_API void sd_ctx_params_init(sd_ctx_params_t* sd_ctx_params); SD_API void sd_ctx_params_init(sd_ctx_params_t* sd_ctx_params);
SD_API char* sd_ctx_params_to_str(const sd_ctx_params_t* sd_ctx_params); SD_API char* sd_ctx_params_to_str(const sd_ctx_params_t* sd_ctx_params);

View File

@ -17,6 +17,7 @@
#include "pmid.hpp" #include "pmid.hpp"
#include "sample-cache.h" #include "sample-cache.h"
#include "tae.hpp" #include "tae.hpp"
#include "upscaler.h"
#include "vae.hpp" #include "vae.hpp"
#include "latent-preview.h" #include "latent-preview.h"
@ -2113,6 +2114,28 @@ enum lora_apply_mode_t str_to_lora_apply_mode(const char* str) {
return LORA_APPLY_MODE_COUNT; return LORA_APPLY_MODE_COUNT;
} }
const char* hires_upscaler_to_str[] = {
"None",
"Latent (nearest)",
"Model",
};
const char* sd_hires_upscaler_name(enum sd_hires_upscaler_t upscaler) {
if (upscaler < SD_HIRES_UPSCALER_COUNT) {
return hires_upscaler_to_str[upscaler];
}
return NONE_STR;
}
enum sd_hires_upscaler_t str_to_sd_hires_upscaler(const char* str) {
for (int i = 0; i < SD_HIRES_UPSCALER_COUNT; i++) {
if (!strcmp(str, hires_upscaler_to_str[i])) {
return (enum sd_hires_upscaler_t)i;
}
}
return SD_HIRES_UPSCALER_COUNT;
}
void sd_cache_params_init(sd_cache_params_t* cache_params) { void sd_cache_params_init(sd_cache_params_t* cache_params) {
*cache_params = {}; *cache_params = {};
cache_params->mode = SD_CACHE_DISABLED; cache_params->mode = SD_CACHE_DISABLED;
@ -2141,6 +2164,19 @@ void sd_cache_params_init(sd_cache_params_t* cache_params) {
cache_params->spectrum_stop_percent = 0.9f; cache_params->spectrum_stop_percent = 0.9f;
} }
void sd_hires_params_init(sd_hires_params_t* hires_params) {
*hires_params = {};
hires_params->enabled = false;
hires_params->upscaler = SD_HIRES_UPSCALER_LATENT_NEAREST;
hires_params->model_path = nullptr;
hires_params->scale = 2.0f;
hires_params->target_width = 0;
hires_params->target_height = 0;
hires_params->steps = 0;
hires_params->denoising_strength = 0.7f;
hires_params->upscale_tile_size = 128;
}
void sd_ctx_params_init(sd_ctx_params_t* sd_ctx_params) { void sd_ctx_params_init(sd_ctx_params_t* sd_ctx_params) {
*sd_ctx_params = {}; *sd_ctx_params = {};
sd_ctx_params->vae_decode_only = true; sd_ctx_params->vae_decode_only = true;
@ -2310,6 +2346,7 @@ void sd_img_gen_params_init(sd_img_gen_params_t* sd_img_gen_params) {
sd_img_gen_params->pm_params = {nullptr, 0, nullptr, 20.f}; sd_img_gen_params->pm_params = {nullptr, 0, nullptr, 20.f};
sd_img_gen_params->vae_tiling_params = {false, 0, 0, 0.5f, 0.0f, 0.0f}; sd_img_gen_params->vae_tiling_params = {false, 0, 0, 0.5f, 0.0f, 0.0f};
sd_cache_params_init(&sd_img_gen_params->cache); sd_cache_params_init(&sd_img_gen_params->cache);
sd_hires_params_init(&sd_img_gen_params->hires);
} }
char* sd_img_gen_params_to_str(const sd_img_gen_params_t* sd_img_gen_params) { char* sd_img_gen_params_to_str(const sd_img_gen_params_t* sd_img_gen_params) {
@ -2336,7 +2373,8 @@ char* sd_img_gen_params_to_str(const sd_img_gen_params_t* sd_img_gen_params) {
"increase_ref_index: %s\n" "increase_ref_index: %s\n"
"control_strength: %.2f\n" "control_strength: %.2f\n"
"photo maker: {style_strength = %.2f, id_images_count = %d, id_embed_path = %s}\n" "photo maker: {style_strength = %.2f, id_images_count = %d, id_embed_path = %s}\n"
"VAE tiling: %s\n", "VAE tiling: %s\n"
"hires: {enabled=%s, upscaler=%s, model_path=%s, scale=%.2f, target=%dx%d, steps=%d, denoising_strength=%.2f}\n",
SAFE_STR(sd_img_gen_params->prompt), SAFE_STR(sd_img_gen_params->prompt),
SAFE_STR(sd_img_gen_params->negative_prompt), SAFE_STR(sd_img_gen_params->negative_prompt),
sd_img_gen_params->clip_skip, sd_img_gen_params->clip_skip,
@ -2353,7 +2391,15 @@ char* sd_img_gen_params_to_str(const sd_img_gen_params_t* sd_img_gen_params) {
sd_img_gen_params->pm_params.style_strength, sd_img_gen_params->pm_params.style_strength,
sd_img_gen_params->pm_params.id_images_count, sd_img_gen_params->pm_params.id_images_count,
SAFE_STR(sd_img_gen_params->pm_params.id_embed_path), SAFE_STR(sd_img_gen_params->pm_params.id_embed_path),
BOOL_STR(sd_img_gen_params->vae_tiling_params.enabled)); BOOL_STR(sd_img_gen_params->vae_tiling_params.enabled),
BOOL_STR(sd_img_gen_params->hires.enabled),
sd_hires_upscaler_name(sd_img_gen_params->hires.upscaler),
SAFE_STR(sd_img_gen_params->hires.model_path),
sd_img_gen_params->hires.scale,
sd_img_gen_params->hires.target_width,
sd_img_gen_params->hires.target_height,
sd_img_gen_params->hires.steps,
sd_img_gen_params->hires.denoising_strength);
const char* cache_mode_str = "disabled"; const char* cache_mode_str = "disabled";
if (sd_img_gen_params->cache.mode == SD_CACHE_EASYCACHE) { if (sd_img_gen_params->cache.mode == SD_CACHE_EASYCACHE) {
cache_mode_str = "easycache"; cache_mode_str = "easycache";
@ -2534,6 +2580,7 @@ struct GenerationRequest {
sd_guidance_params_t guidance = {}; sd_guidance_params_t guidance = {};
sd_guidance_params_t high_noise_guidance = {}; sd_guidance_params_t high_noise_guidance = {};
sd_pm_params_t pm_params = {}; sd_pm_params_t pm_params = {};
sd_hires_params_t hires = {};
int frames = -1; int frames = -1;
float vace_strength = 1.f; float vace_strength = 1.f;
@ -2555,6 +2602,7 @@ struct GenerationRequest {
auto_resize_ref_image = sd_img_gen_params->auto_resize_ref_image; auto_resize_ref_image = sd_img_gen_params->auto_resize_ref_image;
guidance = sd_img_gen_params->sample_params.guidance; guidance = sd_img_gen_params->sample_params.guidance;
pm_params = sd_img_gen_params->pm_params; pm_params = sd_img_gen_params->pm_params;
hires = sd_img_gen_params->hires;
cache_params = &sd_img_gen_params->cache; cache_params = &sd_img_gen_params->cache;
resolve(sd_ctx); resolve(sd_ctx);
} }
@ -2577,26 +2625,76 @@ struct GenerationRequest {
} }
void align_generation_request_size() { void align_generation_request_size() {
align_image_size(&width, &height, "generation request");
}
void align_image_size(int* target_width, int* target_height, const char* label) {
int spatial_multiple = vae_scale_factor * diffusion_model_down_factor; int spatial_multiple = vae_scale_factor * diffusion_model_down_factor;
int width_offset = align_up_offset(width, spatial_multiple); int width_offset = align_up_offset(*target_width, spatial_multiple);
int height_offset = align_up_offset(height, spatial_multiple); int height_offset = align_up_offset(*target_height, spatial_multiple);
if (width_offset <= 0 && height_offset <= 0) { if (width_offset <= 0 && height_offset <= 0) {
return; return;
} }
int original_width = width; int original_width = *target_width;
int original_height = height; int original_height = *target_height;
width += width_offset; *target_width += width_offset;
height += height_offset; *target_height += height_offset;
LOG_WARN("align up %dx%d to %dx%d (multiple=%d)", LOG_WARN("align %s up %dx%d to %dx%d (multiple=%d)",
label,
original_width, original_width,
original_height, original_height,
width, *target_width,
height, *target_height,
spatial_multiple); spatial_multiple);
} }
void resolve_hires() {
if (!hires.enabled) {
return;
}
if (hires.upscaler == SD_HIRES_UPSCALER_NONE) {
hires.enabled = false;
return;
}
if (hires.upscaler < SD_HIRES_UPSCALER_NONE && hires.upscaler >= SD_HIRES_UPSCALER_COUNT) {
LOG_WARN("hires upscaler '%d' is invalid, disabling hires", hires.upscaler);
hires.enabled = false;
return;
}
if (hires.upscaler == SD_HIRES_UPSCALER_MODEL && strlen(SAFE_STR(hires.model_path)) == 0) {
LOG_WARN("hires model upscaler requires a model path, disabling hires");
hires.enabled = false;
return;
}
if (hires.scale <= 0.f && hires.target_width <= 0 && hires.target_height <= 0) {
LOG_WARN("hires scale must be positive when no target size is set, disabling hires");
hires.enabled = false;
return;
}
hires.denoising_strength = std::clamp(hires.denoising_strength, 0.0001f, 1.f);
hires.steps = std::max(0, hires.steps);
if (hires.target_width > 0 && hires.target_height > 0) {
// pass
} else if (hires.target_width > 0) {
hires.target_height = hires.target_width;
} else if (hires.target_height > 0) {
hires.target_width = hires.target_height;
} else {
hires.target_width = static_cast<int>(std::round(width * hires.scale));
hires.target_height = static_cast<int>(std::round(height * hires.scale));
}
if (hires.target_width <= 0 || hires.target_height <= 0) {
LOG_WARN("hires target size is not positive, disabling hires");
hires.enabled = false;
return;
}
align_image_size(&hires.target_width, &hires.target_height, "hires target");
}
static void resolve_guidance(sd_ctx_t* sd_ctx, static void resolve_guidance(sd_ctx_t* sd_ctx,
sd_guidance_params_t* guidance, sd_guidance_params_t* guidance,
bool* use_uncond, bool* use_uncond,
@ -2637,6 +2735,7 @@ struct GenerationRequest {
void resolve(sd_ctx_t* sd_ctx) { void resolve(sd_ctx_t* sd_ctx) {
align_generation_request_size(); align_generation_request_size();
resolve_hires();
seed = resolve_seed(seed); seed = resolve_seed(seed);
resolve_guidance(sd_ctx, &guidance, &use_uncond, &use_img_cond); resolve_guidance(sd_ctx, &guidance, &use_uncond, &use_img_cond);
@ -3149,6 +3248,67 @@ static sd_image_t* decode_image_outputs(sd_ctx_t* sd_ctx,
return result_images; return result_images;
} }
static sd::Tensor<float> upscale_hires_latent(sd_ctx_t* sd_ctx,
const sd::Tensor<float>& latent,
const GenerationRequest& request,
UpscalerGGML* upscaler) {
if (request.hires.upscaler == SD_HIRES_UPSCALER_LATENT_NEAREST) {
std::vector<int64_t> target_shape = latent.shape();
if (target_shape.size() < 2) {
LOG_ERROR("latent has invalid shape for hires upscale");
return {};
}
target_shape[0] = request.hires.target_width / request.vae_scale_factor;
target_shape[1] = request.hires.target_height / request.vae_scale_factor;
LOG_INFO("hires latent upscale %" PRId64 "x%" PRId64 " -> %" PRId64 "x%" PRId64,
latent.shape()[0],
latent.shape()[1],
target_shape[0],
target_shape[1]);
return sd::ops::interpolate(latent, target_shape, sd::ops::InterpolateMode::Nearest);
} else if (request.hires.upscaler == SD_HIRES_UPSCALER_MODEL) {
if (upscaler == nullptr) {
LOG_ERROR("hires model upscaler context is null");
return {};
}
if (sd_ctx->sd->vae_decode_only) {
LOG_ERROR("hires model upscaler requires VAE encoder weights; create the context with vae_decode_only=false");
return {};
}
sd::Tensor<float> decoded = sd_ctx->sd->decode_first_stage(latent);
if (decoded.empty()) {
LOG_ERROR("decode_first_stage failed before hires model upscale");
return {};
}
sd::Tensor<float> upscaled_tensor = upscaler->upscale_tensor(decoded);
if (upscaled_tensor.empty()) {
LOG_ERROR("hires model upscale failed");
return {};
}
if (upscaled_tensor.shape()[0] != request.hires.target_width ||
upscaled_tensor.shape()[1] != request.hires.target_height) {
upscaled_tensor = sd::ops::interpolate(upscaled_tensor,
{request.hires.target_width,
request.hires.target_height,
upscaled_tensor.shape()[2],
upscaled_tensor.shape()[3]});
}
sd::Tensor<float> upscaled_latent = sd_ctx->sd->encode_first_stage(upscaled_tensor);
if (upscaled_latent.empty()) {
LOG_ERROR("encode_first_stage failed after hires model upscale");
}
return upscaled_latent;
}
LOG_ERROR("unsupported hires upscaler '%s'", sd_hires_upscaler_name(request.hires.upscaler));
return {};
}
SD_API sd_image_t* generate_image(sd_ctx_t* sd_ctx, const sd_img_gen_params_t* sd_img_gen_params) { SD_API sd_image_t* generate_image(sd_ctx_t* sd_ctx, const sd_img_gen_params_t* sd_img_gen_params) {
if (sd_ctx == nullptr || sd_img_gen_params == nullptr) { if (sd_ctx == nullptr || sd_img_gen_params == nullptr) {
return nullptr; return nullptr;
@ -3236,7 +3396,7 @@ SD_API sd_image_t* generate_image(sd_ctx_t* sd_ctx, const sd_img_gen_params_t* s
} }
return nullptr; return nullptr;
} }
if (sd_ctx->sd->free_params_immediately) { if (sd_ctx->sd->free_params_immediately && !request.hires.enabled) {
sd_ctx->sd->diffusion_model->free_params_buffer(); sd_ctx->sd->diffusion_model->free_params_buffer();
} }
int64_t denoise_end = ggml_time_ms(); int64_t denoise_end = ggml_time_ms();
@ -3244,6 +3404,131 @@ SD_API sd_image_t* generate_image(sd_ctx_t* sd_ctx, const sd_img_gen_params_t* s
final_latents.size(), final_latents.size(),
(denoise_end - denoise_start) * 1.0f / 1000); (denoise_end - denoise_start) * 1.0f / 1000);
if (request.hires.enabled && request.hires.target_width > 0) {
LOG_INFO("hires fix: upscaling to %dx%d", request.hires.target_width, request.hires.target_height);
std::unique_ptr<UpscalerGGML> hires_upscaler;
if (request.hires.upscaler == SD_HIRES_UPSCALER_MODEL) {
LOG_INFO("hires fix: loading model upscaler from '%s'", request.hires.model_path);
hires_upscaler = std::make_unique<UpscalerGGML>(sd_ctx->sd->n_threads,
false,
request.hires.upscale_tile_size);
if (!hires_upscaler->load_from_file(request.hires.model_path,
sd_ctx->sd->offload_params_to_cpu,
sd_ctx->sd->n_threads)) {
LOG_ERROR("load hires model upscaler failed");
if (sd_ctx->sd->free_params_immediately) {
sd_ctx->sd->diffusion_model->free_params_buffer();
}
return nullptr;
}
}
int hires_steps = request.hires.steps > 0 ? request.hires.steps : plan.sample_steps;
// sd-webui behavior: scale up total steps so trimming by denoising_strength yields exactly hires_steps effective steps,
// unlike img2img which trims from a fixed step count
hires_steps = static_cast<int>(hires_steps / request.hires.denoising_strength);
std::vector<float> hires_sigmas = sd_ctx->sd->denoiser->get_sigmas(
hires_steps,
sd_ctx->sd->get_image_seq_len(request.hires.target_height, request.hires.target_width),
sd_img_gen_params->sample_params.scheduler,
sd_ctx->sd->version);
size_t t_enc = static_cast<size_t>(hires_steps * request.hires.denoising_strength);
if (t_enc >= static_cast<size_t>(hires_steps)) {
t_enc = static_cast<size_t>(hires_steps) - 1;
}
std::vector<float> hires_sigma_sched(hires_sigmas.begin() + hires_steps - static_cast<int>(t_enc) - 1,
hires_sigmas.end());
LOG_INFO("hires fix: %d steps, denoising_strength=%.2f, sigma_sched_size=%zu",
hires_steps,
request.hires.denoising_strength,
hires_sigma_sched.size());
std::vector<sd::Tensor<float>> hires_final_latents;
int64_t hires_denoise_start = ggml_time_ms();
for (int b = 0; b < (int)final_latents.size(); b++) {
int64_t cur_seed = request.seed + b;
sd_ctx->sd->rng->manual_seed(cur_seed);
sd_ctx->sd->sampler_rng->manual_seed(cur_seed);
sd::Tensor<float> upscaled = upscale_hires_latent(sd_ctx,
final_latents[b],
request,
hires_upscaler.get());
if (upscaled.empty()) {
if (sd_ctx->sd->free_params_immediately) {
sd_ctx->sd->diffusion_model->free_params_buffer();
}
return nullptr;
}
sd::Tensor<float> noise = sd::randn_like<float>(upscaled, sd_ctx->sd->rng);
sd::Tensor<float> hires_denoise_mask;
if (!latents.denoise_mask.empty()) {
std::vector<int64_t> mask_shape = latents.denoise_mask.shape();
mask_shape[0] = upscaled.shape()[0];
mask_shape[1] = upscaled.shape()[1];
hires_denoise_mask = sd::ops::interpolate(latents.denoise_mask,
mask_shape,
sd::ops::InterpolateMode::NearestMax);
}
int64_t hires_sample_start = ggml_time_ms();
sd::Tensor<float> x_0 = sd_ctx->sd->sample(sd_ctx->sd->diffusion_model,
true,
upscaled,
std::move(noise),
embeds.cond,
embeds.uncond,
embeds.img_cond,
embeds.id_cond,
latents.control_image,
request.control_strength,
request.guidance,
plan.eta,
request.shifted_timestep,
plan.sample_method,
sd_ctx->sd->is_flow_denoiser(),
hires_sigma_sched,
plan.start_merge_step,
latents.ref_latents,
request.increase_ref_index,
hires_denoise_mask,
sd::Tensor<float>(),
1.f,
request.cache_params);
int64_t hires_sample_end = ggml_time_ms();
if (!x_0.empty()) {
LOG_INFO("hires sampling %d/%d completed, taking %.2fs",
b + 1,
(int)final_latents.size(),
(hires_sample_end - hires_sample_start) * 1.0f / 1000);
hires_final_latents.push_back(std::move(x_0));
continue;
}
LOG_ERROR("hires sampling for image %d/%d failed after %.2fs",
b + 1,
(int)final_latents.size(),
(hires_sample_end - hires_sample_start) * 1.0f / 1000);
if (sd_ctx->sd->free_params_immediately) {
sd_ctx->sd->diffusion_model->free_params_buffer();
}
return nullptr;
}
if (sd_ctx->sd->free_params_immediately) {
sd_ctx->sd->diffusion_model->free_params_buffer();
}
int64_t hires_denoise_end = ggml_time_ms();
LOG_INFO("hires fix completed, taking %.2fs", (hires_denoise_end - hires_denoise_start) * 1.0f / 1000);
final_latents = std::move(hires_final_latents);
}
auto result = decode_image_outputs(sd_ctx, request, final_latents); auto result = decode_image_outputs(sd_ctx, request, final_latents);
if (result == nullptr) { if (result == nullptr) {
return nullptr; return nullptr;

View File

@ -1,27 +1,18 @@
#include "esrgan.hpp" #include "upscaler.h"
#include "ggml_extend.hpp" #include "ggml_extend.hpp"
#include "model.h" #include "model.h"
#include "stable-diffusion.h" #include "stable-diffusion.h"
#include "util.h" #include "util.h"
struct UpscalerGGML { UpscalerGGML::UpscalerGGML(int n_threads,
ggml_backend_t backend = nullptr; // general backend bool direct,
ggml_type model_data_type = GGML_TYPE_F16; int tile_size)
std::shared_ptr<ESRGAN> esrgan_upscaler;
std::string esrgan_path;
int n_threads;
bool direct = false;
int tile_size = 128;
UpscalerGGML(int n_threads,
bool direct = false,
int tile_size = 128)
: n_threads(n_threads), : n_threads(n_threads),
direct(direct), direct(direct),
tile_size(tile_size) { tile_size(tile_size) {
} }
bool load_from_file(const std::string& esrgan_path, bool UpscalerGGML::load_from_file(const std::string& esrgan_path,
bool offload_params_to_cpu, bool offload_params_to_cpu,
int n_threads) { int n_threads) {
ggml_log_set(ggml_log_callback_default, nullptr); ggml_log_set(ggml_log_callback_default, nullptr);
@ -63,9 +54,9 @@ struct UpscalerGGML {
return false; return false;
} }
return true; return true;
} }
sd::Tensor<float> upscale_tensor(const sd::Tensor<float>& input_tensor) { sd::Tensor<float> UpscalerGGML::upscale_tensor(const sd::Tensor<float>& input_tensor) {
sd::Tensor<float> upscaled; sd::Tensor<float> upscaled;
if (tile_size <= 0 || (input_tensor.shape()[0] <= tile_size && input_tensor.shape()[1] <= tile_size)) { if (tile_size <= 0 || (input_tensor.shape()[0] <= tile_size && input_tensor.shape()[1] <= tile_size)) {
upscaled = esrgan_upscaler->compute(n_threads, input_tensor); upscaled = esrgan_upscaler->compute(n_threads, input_tensor);
@ -96,9 +87,9 @@ struct UpscalerGGML {
return {}; return {};
} }
return upscaled; return upscaled;
} }
sd_image_t upscale(sd_image_t input_image, uint32_t upscale_factor) { sd_image_t UpscalerGGML::upscale(sd_image_t input_image, uint32_t upscale_factor) {
// upscale_factor, unused for RealESRGAN_x4plus_anime_6B.pth // upscale_factor, unused for RealESRGAN_x4plus_anime_6B.pth
sd_image_t upscaled_image = {0, 0, 0, nullptr}; sd_image_t upscaled_image = {0, 0, 0, nullptr};
int output_width = (int)input_image.width * esrgan_upscaler->scale; int output_width = (int)input_image.width * esrgan_upscaler->scale;
@ -118,8 +109,7 @@ struct UpscalerGGML {
LOG_INFO("input_image_tensor upscaled, taking %.2fs", (t3 - t0) / 1000.0f); LOG_INFO("input_image_tensor upscaled, taking %.2fs", (t3 - t0) / 1000.0f);
upscaled_image = upscaled_data; upscaled_image = upscaled_data;
return upscaled_image; return upscaled_image;
} }
};
struct upscaler_ctx_t { struct upscaler_ctx_t {
UpscalerGGML* upscaler = nullptr; UpscalerGGML* upscaler = nullptr;

31
src/upscaler.h Normal file
View File

@ -0,0 +1,31 @@
#ifndef __SD_UPSCALER_H__
#define __SD_UPSCALER_H__
#include "esrgan.hpp"
#include "stable-diffusion.h"
#include "tensor.hpp"
#include <memory>
#include <string>
struct UpscalerGGML {
ggml_backend_t backend = nullptr; // general backend
ggml_type model_data_type = GGML_TYPE_F16;
std::shared_ptr<ESRGAN> esrgan_upscaler;
std::string esrgan_path;
int n_threads;
bool direct = false;
int tile_size = 128;
UpscalerGGML(int n_threads,
bool direct = false,
int tile_size = 128);
bool load_from_file(const std::string& esrgan_path,
bool offload_params_to_cpu,
int n_threads);
sd::Tensor<float> upscale_tensor(const sd::Tensor<float>& input_tensor);
sd_image_t upscale(sd_image_t input_image, uint32_t upscale_factor);
};
#endif // __SD_UPSCALER_H__