mirror of
https://github.com/leejet/stable-diffusion.cpp.git
synced 2026-05-08 08:18:51 +00:00
feat: add sd-webui style Hires. fix support
This commit is contained in:
parent
44cca3d626
commit
f709e0e189
@ -278,7 +278,9 @@ void parse_args(int argc, const char** argv, SDCliParams& cli_params, SDContextP
|
||||
bool valid = cli_params.resolve_and_validate();
|
||||
if (valid && cli_params.mode != METADATA) {
|
||||
valid = ctx_params.resolve_and_validate(cli_params.mode) &&
|
||||
gen_params.resolve_and_validate(cli_params.mode, ctx_params.lora_model_dir);
|
||||
gen_params.resolve_and_validate(cli_params.mode,
|
||||
ctx_params.lora_model_dir,
|
||||
ctx_params.hires_upscalers_dir);
|
||||
}
|
||||
|
||||
if (!valid) {
|
||||
@ -688,6 +690,10 @@ int main(int argc, const char* argv[]) {
|
||||
vae_decode_only = false;
|
||||
}
|
||||
|
||||
if (gen_params.hires_enabled && !gen_params.hires_upscaler_model_path.empty()) {
|
||||
vae_decode_only = false;
|
||||
}
|
||||
|
||||
sd_ctx_params_t sd_ctx_params = ctx_params.to_sd_ctx_params_t(vae_decode_only, true, cli_params.taesd_preview);
|
||||
|
||||
SDImageVec results;
|
||||
|
||||
@ -351,7 +351,10 @@ ArgOptions SDContextParams::get_options() {
|
||||
"--lora-model-dir",
|
||||
"lora model directory",
|
||||
&lora_model_dir},
|
||||
|
||||
{"",
|
||||
"--hires-upscalers-dir",
|
||||
"highres fix upscaler model directory",
|
||||
&hires_upscalers_dir},
|
||||
{"",
|
||||
"--tensor-type-rules",
|
||||
"weight type per tensor pattern (example: \"^vae\\.=f16,model\\.=q8_0\")",
|
||||
@ -649,6 +652,7 @@ std::string SDContextParams::to_string() const {
|
||||
<< " wtype: " << sd_type_name(wtype) << ",\n"
|
||||
<< " tensor_type_rules: \"" << tensor_type_rules << "\",\n"
|
||||
<< " lora_model_dir: \"" << lora_model_dir << "\",\n"
|
||||
<< " hires_upscalers_dir: \"" << hires_upscalers_dir << "\",\n"
|
||||
<< " photo_maker_path: \"" << photo_maker_path << "\",\n"
|
||||
<< " rng_type: " << sd_rng_type_name(rng_type) << ",\n"
|
||||
<< " sampler_rng_type: " << sd_rng_type_name(sampler_rng_type) << ",\n"
|
||||
@ -777,6 +781,10 @@ ArgOptions SDGenerationParams::get_options() {
|
||||
"--pm-id-embed-path",
|
||||
"path to PHOTOMAKER v2 id embed",
|
||||
&pm_id_embed_path},
|
||||
{"",
|
||||
"--hires-upscaler",
|
||||
"highres fix upscaler, Latent (nearest) or a model name/path under --hires-upscalers-dir (default: Latent (nearest))",
|
||||
&hires_upscaler},
|
||||
};
|
||||
|
||||
options.int_options = {
|
||||
@ -826,6 +834,22 @@ ArgOptions SDGenerationParams::get_options() {
|
||||
"--upscale-tile-size",
|
||||
"tile size for ESRGAN upscaling (default: 128)",
|
||||
&upscale_tile_size},
|
||||
{"",
|
||||
"--hires-width",
|
||||
"highres fix target width, 0 to use --hires-scale (default: 0)",
|
||||
&hires_width},
|
||||
{"",
|
||||
"--hires-height",
|
||||
"highres fix target height, 0 to use --hires-scale (default: 0)",
|
||||
&hires_height},
|
||||
{"",
|
||||
"--hires-steps",
|
||||
"highres fix second pass sample steps, 0 to reuse --steps (default: 0)",
|
||||
&hires_steps},
|
||||
{"",
|
||||
"--hires-upscale-tile-size",
|
||||
"highres fix upscaler tile size, reserved for model-backed upscalers (default: 128)",
|
||||
&hires_upscale_tile_size},
|
||||
};
|
||||
|
||||
options.float_options = {
|
||||
@ -913,6 +937,14 @@ ArgOptions SDGenerationParams::get_options() {
|
||||
"--vae-tile-overlap",
|
||||
"tile overlap for vae tiling, in fraction of tile size (default: 0.5)",
|
||||
&vae_tiling_params.target_overlap},
|
||||
{"",
|
||||
"--hires-scale",
|
||||
"highres fix scale when target size is not set (default: 2.0)",
|
||||
&hires_scale},
|
||||
{"",
|
||||
"--hires-denoising-strength",
|
||||
"highres fix second pass denoising strength (default: 0.7)",
|
||||
&hires_denoising_strength},
|
||||
};
|
||||
|
||||
options.bool_options = {
|
||||
@ -936,6 +968,11 @@ ArgOptions SDGenerationParams::get_options() {
|
||||
"process vae in tiles to reduce memory usage",
|
||||
true,
|
||||
&vae_tiling_params.enabled},
|
||||
{"",
|
||||
"--hires",
|
||||
"enable highres fix",
|
||||
true,
|
||||
&hires_enabled},
|
||||
};
|
||||
|
||||
auto on_seed_arg = [&](int argc, const char** argv, int index) {
|
||||
@ -1424,6 +1461,37 @@ static bool parse_lora_json_field(const json& parent,
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool resolve_model_file_from_dir(const std::string& model_name,
|
||||
const std::string& model_dir,
|
||||
const std::vector<std::string>& valid_ext,
|
||||
const char* label,
|
||||
std::string& resolved_path) {
|
||||
if (model_dir.empty()) {
|
||||
LOG_ERROR("%s directory is empty", label);
|
||||
return false;
|
||||
}
|
||||
if (model_name.empty() ||
|
||||
model_name.find('/') != std::string::npos ||
|
||||
model_name.find('\\') != std::string::npos ||
|
||||
fs::path(model_name).has_root_path() ||
|
||||
fs::path(model_name).has_extension()) {
|
||||
LOG_ERROR("%s must be a model name without path or extension: %s", label, model_name.c_str());
|
||||
return false;
|
||||
}
|
||||
|
||||
fs::path model_dir_path = model_dir;
|
||||
for (const auto& ext : valid_ext) {
|
||||
fs::path try_path = model_dir_path / (model_name + ext);
|
||||
if (fs::exists(try_path) && fs::is_regular_file(try_path)) {
|
||||
resolved_path = try_path.lexically_normal().string();
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
LOG_ERROR("can not find %s %s in %s", label, model_name.c_str(), model_dir_path.lexically_normal().string().c_str());
|
||||
return false;
|
||||
}
|
||||
|
||||
bool SDGenerationParams::from_json_str(
|
||||
const std::string& json_str,
|
||||
const std::function<std::string(const std::string&)>& lora_path_resolver) {
|
||||
@ -1487,6 +1555,34 @@ bool SDGenerationParams::from_json_str(
|
||||
load_if_exists("increase_ref_index", increase_ref_index);
|
||||
load_if_exists("embed_image_metadata", embed_image_metadata);
|
||||
|
||||
if (j.contains("hires") && j["hires"].is_object()) {
|
||||
const json& hires_json = j["hires"];
|
||||
if (hires_json.contains("enabled") && hires_json["enabled"].is_boolean()) {
|
||||
hires_enabled = hires_json["enabled"];
|
||||
}
|
||||
if (hires_json.contains("upscaler") && hires_json["upscaler"].is_string()) {
|
||||
hires_upscaler = hires_json["upscaler"];
|
||||
}
|
||||
if (hires_json.contains("scale") && hires_json["scale"].is_number()) {
|
||||
hires_scale = hires_json["scale"];
|
||||
}
|
||||
if (hires_json.contains("target_width") && hires_json["target_width"].is_number_integer()) {
|
||||
hires_width = hires_json["target_width"];
|
||||
}
|
||||
if (hires_json.contains("target_height") && hires_json["target_height"].is_number_integer()) {
|
||||
hires_height = hires_json["target_height"];
|
||||
}
|
||||
if (hires_json.contains("steps") && hires_json["steps"].is_number_integer()) {
|
||||
hires_steps = hires_json["steps"];
|
||||
}
|
||||
if (hires_json.contains("denoising_strength") && hires_json["denoising_strength"].is_number()) {
|
||||
hires_denoising_strength = hires_json["denoising_strength"];
|
||||
}
|
||||
if (hires_json.contains("upscale_tile_size") && hires_json["upscale_tile_size"].is_number_integer()) {
|
||||
hires_upscale_tile_size = hires_json["upscale_tile_size"];
|
||||
}
|
||||
}
|
||||
|
||||
auto parse_sample_params_json = [&](const json& sample_json,
|
||||
sd_sample_params_t& target_params,
|
||||
std::vector<int>& target_skip_layers,
|
||||
@ -1800,7 +1896,7 @@ bool SDGenerationParams::initialize_cache_params() {
|
||||
return true;
|
||||
}
|
||||
|
||||
bool SDGenerationParams::resolve(const std::string& lora_model_dir, bool strict) {
|
||||
bool SDGenerationParams::resolve(const std::string& lora_model_dir, const std::string& hires_upscalers_dir, bool strict) {
|
||||
if (high_noise_sample_params.sample_steps <= 0) {
|
||||
high_noise_sample_params.sample_steps = -1;
|
||||
}
|
||||
@ -1819,6 +1915,27 @@ bool SDGenerationParams::resolve(const std::string& lora_model_dir, bool strict)
|
||||
sample_params.sample_steps = std::clamp(sample_params.sample_steps, 1, 100);
|
||||
}
|
||||
|
||||
hires_upscaler_model_path.clear();
|
||||
if (hires_enabled) {
|
||||
if (hires_upscaler.empty()) {
|
||||
hires_upscaler = "Latent (nearest)";
|
||||
}
|
||||
resolved_hires_upscaler = str_to_sd_hires_upscaler(hires_upscaler.c_str());
|
||||
if (resolved_hires_upscaler == SD_HIRES_UPSCALER_NONE) {
|
||||
hires_enabled = false;
|
||||
} else if (resolved_hires_upscaler == SD_HIRES_UPSCALER_COUNT) {
|
||||
static const std::vector<std::string> valid_ext = {".gguf", ".safetensors", ".pt", ".pth"};
|
||||
if (!resolve_model_file_from_dir(hires_upscaler,
|
||||
hires_upscalers_dir,
|
||||
valid_ext,
|
||||
"hires upscaler",
|
||||
hires_upscaler_model_path)) {
|
||||
return false;
|
||||
}
|
||||
resolved_hires_upscaler = SD_HIRES_UPSCALER_MODEL;
|
||||
}
|
||||
}
|
||||
|
||||
prompt_with_lora = prompt;
|
||||
if (!lora_model_dir.empty()) {
|
||||
extract_and_remove_lora(lora_model_dir);
|
||||
@ -1883,6 +2000,29 @@ bool SDGenerationParams::validate(SDMode mode) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (hires_enabled) {
|
||||
if (hires_width < 0 || hires_height < 0) {
|
||||
LOG_ERROR("error: hires target width and height must be >= 0");
|
||||
return false;
|
||||
}
|
||||
if (hires_scale <= 0.f && hires_width <= 0 && hires_height <= 0) {
|
||||
LOG_ERROR("error: hires scale must be positive when target size is not set");
|
||||
return false;
|
||||
}
|
||||
if (hires_steps < 0) {
|
||||
LOG_ERROR("error: hires steps must be >= 0");
|
||||
return false;
|
||||
}
|
||||
if (hires_denoising_strength <= 0.f || hires_denoising_strength > 1.f) {
|
||||
LOG_ERROR("error: hires denoising strength must be in (0.0, 1.0]");
|
||||
return false;
|
||||
}
|
||||
if (hires_upscale_tile_size < 1) {
|
||||
LOG_ERROR("error: hires upscale tile size must be positive");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
if (mode == UPSCALE) {
|
||||
if (init_image_path.length() == 0) {
|
||||
LOG_ERROR("error: upscale mode needs an init image (--init-img)\n");
|
||||
@ -1893,8 +2033,11 @@ bool SDGenerationParams::validate(SDMode mode) {
|
||||
return true;
|
||||
}
|
||||
|
||||
bool SDGenerationParams::resolve_and_validate(SDMode mode, const std::string& lora_model_dir, bool strict) {
|
||||
if (!resolve(lora_model_dir, strict)) {
|
||||
bool SDGenerationParams::resolve_and_validate(SDMode mode,
|
||||
const std::string& lora_model_dir,
|
||||
const std::string& hires_upscalers_dir,
|
||||
bool strict) {
|
||||
if (!resolve(lora_model_dir, hires_upscalers_dir, strict)) {
|
||||
return false;
|
||||
}
|
||||
if (!validate(mode)) {
|
||||
@ -1965,6 +2108,16 @@ sd_img_gen_params_t SDGenerationParams::to_sd_img_gen_params_t() {
|
||||
params.pm_params = pm_params;
|
||||
params.vae_tiling_params = vae_tiling_params;
|
||||
params.cache = cache_params;
|
||||
|
||||
params.hires.enabled = hires_enabled;
|
||||
params.hires.upscaler = resolved_hires_upscaler;
|
||||
params.hires.model_path = hires_upscaler_model_path.empty() ? nullptr : hires_upscaler_model_path.c_str();
|
||||
params.hires.scale = hires_scale;
|
||||
params.hires.target_width = hires_width;
|
||||
params.hires.target_height = hires_height;
|
||||
params.hires.steps = hires_steps;
|
||||
params.hires.denoising_strength = hires_denoising_strength;
|
||||
params.hires.upscale_tile_size = hires_upscale_tile_size;
|
||||
return params;
|
||||
}
|
||||
|
||||
@ -2089,6 +2242,15 @@ std::string SDGenerationParams::to_string() const {
|
||||
<< " seed: " << seed << ",\n"
|
||||
<< " upscale_repeats: " << upscale_repeats << ",\n"
|
||||
<< " upscale_tile_size: " << upscale_tile_size << ",\n"
|
||||
<< " hires: { enabled: " << (hires_enabled ? "true" : "false")
|
||||
<< ", upscaler: \"" << hires_upscaler << "\""
|
||||
<< ", model_path: \"" << hires_upscaler_model_path << "\""
|
||||
<< ", scale: " << hires_scale
|
||||
<< ", target_width: " << hires_width
|
||||
<< ", target_height: " << hires_height
|
||||
<< ", steps: " << hires_steps
|
||||
<< ", denoising_strength: " << hires_denoising_strength
|
||||
<< ", upscale_tile_size: " << hires_upscale_tile_size << " },\n"
|
||||
<< " vae_tiling_params: { "
|
||||
<< vae_tiling_params.enabled << ", "
|
||||
<< vae_tiling_params.tile_size_x << ", "
|
||||
@ -2162,6 +2324,13 @@ std::string get_image_params(const SDContextParams& ctx_params, const SDGenerati
|
||||
if (gen_params.clip_skip != -1) {
|
||||
parameter_string += "Clip skip: " + std::to_string(gen_params.clip_skip) + ", ";
|
||||
}
|
||||
if (gen_params.hires_enabled) {
|
||||
parameter_string += "Hires upscale: " + gen_params.hires_upscaler + ", ";
|
||||
parameter_string += "Hires scale: " + std::to_string(gen_params.hires_scale) + ", ";
|
||||
parameter_string += "Hires resize: " + std::to_string(gen_params.hires_width) + "x" + std::to_string(gen_params.hires_height) + ", ";
|
||||
parameter_string += "Hires steps: " + std::to_string(gen_params.hires_steps) + ", ";
|
||||
parameter_string += "Denoising strength: " + std::to_string(gen_params.hires_denoising_strength) + ", ";
|
||||
}
|
||||
parameter_string += "Version: stable-diffusion.cpp";
|
||||
return parameter_string;
|
||||
}
|
||||
|
||||
@ -101,6 +101,7 @@ struct SDContextParams {
|
||||
sd_type_t wtype = SD_TYPE_COUNT;
|
||||
std::string tensor_type_rules;
|
||||
std::string lora_model_dir = ".";
|
||||
std::string hires_upscalers_dir;
|
||||
|
||||
std::map<std::string, std::string> embedding_map;
|
||||
std::vector<sd_embedding_t> embedding_vec;
|
||||
@ -190,12 +191,23 @@ struct SDGenerationParams {
|
||||
int upscale_repeats = 1;
|
||||
int upscale_tile_size = 128;
|
||||
|
||||
bool hires_enabled = false;
|
||||
std::string hires_upscaler = "Latent (nearest)";
|
||||
std::string hires_upscaler_model_path;
|
||||
float hires_scale = 2.f;
|
||||
int hires_width = 0;
|
||||
int hires_height = 0;
|
||||
int hires_steps = 0;
|
||||
float hires_denoising_strength = 0.7f;
|
||||
int hires_upscale_tile_size = 128;
|
||||
|
||||
std::map<std::string, float> lora_map;
|
||||
std::map<std::string, float> high_noise_lora_map;
|
||||
|
||||
// Derived and normalized fields.
|
||||
std::string prompt_with_lora; // for metadata record only
|
||||
std::vector<sd_lora_t> lora_vec;
|
||||
sd_hires_upscaler_t resolved_hires_upscaler;
|
||||
|
||||
// Owned execution payload.
|
||||
SDImageOwner init_image;
|
||||
@ -225,9 +237,12 @@ struct SDGenerationParams {
|
||||
void set_width_and_height_if_unset(int w, int h);
|
||||
int get_resolved_width() const;
|
||||
int get_resolved_height() const;
|
||||
bool resolve(const std::string& lora_model_dir, bool strict = false);
|
||||
bool resolve(const std::string& lora_model_dir, const std::string& hires_upscalers_dir, bool strict = false);
|
||||
bool validate(SDMode mode);
|
||||
bool resolve_and_validate(SDMode mode, const std::string& lora_model_dir, bool strict = false);
|
||||
bool resolve_and_validate(SDMode mode,
|
||||
const std::string& lora_model_dir,
|
||||
const std::string& hires_upscalers_dir,
|
||||
bool strict = false);
|
||||
sd_img_gen_params_t to_sd_img_gen_params_t();
|
||||
sd_vid_gen_params_t to_sd_vid_gen_params_t();
|
||||
std::string to_string() const;
|
||||
|
||||
@ -38,6 +38,8 @@ Current generation-related endpoints include:
|
||||
- `POST /sdapi/v1/txt2img`
|
||||
- `POST /sdapi/v1/img2img`
|
||||
- `GET /sdapi/v1/loras`
|
||||
- `GET /sdapi/v1/upscalers`
|
||||
- `GET /sdapi/v1/latent-upscale-modes`
|
||||
- `GET /sdapi/v1/samplers`
|
||||
- `GET /sdapi/v1/schedulers`
|
||||
- `GET /sdapi/v1/sd-models`
|
||||
@ -216,6 +218,13 @@ Currently supported request fields:
|
||||
| `scheduler` | `string` | Scheduler name |
|
||||
| `lora` | `array<object>` | Structured LoRA list |
|
||||
| `extra_images` | `array<string>` | Base64 or data URL images |
|
||||
| `enable_hr` | `boolean` | Enable highres fix for `txt2img` |
|
||||
| `hr_upscaler` | `string` | `Latent (nearest)` or an upscaler model name from `/sdapi/v1/upscalers` |
|
||||
| `hr_scale` | `number` | Highres scale when resize target is not set |
|
||||
| `hr_resize_x` | `integer` | Highres target width, `0` to use scale |
|
||||
| `hr_resize_y` | `integer` | Highres target height, `0` to use scale |
|
||||
| `hr_steps` | `integer` | Highres second-pass sample steps, `0` to reuse `steps` |
|
||||
| `denoising_strength` | `number` | Highres denoising strength for `txt2img` |
|
||||
|
||||
Native extension fields:
|
||||
|
||||
@ -241,6 +250,8 @@ Currently supported request fields:
|
||||
| `inpainting_mask_invert` | `integer` or `boolean` | Treated as invert flag |
|
||||
| `denoising_strength` | `number` | Clamped to `0.0..1.0` |
|
||||
|
||||
Highres fix fields are currently handled for `txt2img`; `img2img` uses `denoising_strength` as image-to-image strength.
|
||||
|
||||
Native extension fields:
|
||||
|
||||
- any `sdcpp API` fields embedded through `sd_cpp_extra_args` inside `prompt`
|
||||
@ -258,6 +269,8 @@ Response fields:
|
||||
Currently exposed:
|
||||
|
||||
- `GET /sdapi/v1/loras`
|
||||
- `GET /sdapi/v1/upscalers`
|
||||
- `GET /sdapi/v1/latent-upscale-modes`
|
||||
- `GET /sdapi/v1/samplers`
|
||||
- `GET /sdapi/v1/schedulers`
|
||||
- `GET /sdapi/v1/sd-models`
|
||||
@ -272,6 +285,24 @@ Response fields:
|
||||
| `[].name` | `string` | Display name derived from file stem |
|
||||
| `[].path` | `string` | Relative path under the configured LoRA directory |
|
||||
|
||||
`GET /sdapi/v1/upscalers`
|
||||
|
||||
| Field | Type | Notes |
|
||||
| --- | --- | --- |
|
||||
| `[].name` | `string` | Built-in name or model stem |
|
||||
| `[].model_name` | `string \| null` | Model family label for model-backed upscalers |
|
||||
| `[].model_path` | `string \| null` | Absolute model path for model-backed upscalers |
|
||||
| `[].model_url` | `string \| null` | Currently always null |
|
||||
| `[].scale` | `integer` | Currently `4` |
|
||||
|
||||
Built-in entries include `None`, `Lanczos`, and `Nearest`. Model-backed entries are scanned from the top level of `--hires-upscalers-dir`; subdirectories are not scanned.
|
||||
|
||||
`GET /sdapi/v1/latent-upscale-modes`
|
||||
|
||||
| Field | Type | Notes |
|
||||
| --- | --- | --- |
|
||||
| `[].name` | `string` | WebUI-compatible latent upscale mode name |
|
||||
|
||||
`GET /sdapi/v1/samplers`
|
||||
|
||||
| Field | Type | Notes |
|
||||
@ -388,6 +419,7 @@ Top-level fields:
|
||||
| `samplers` | `array<string>` | Available sampling methods |
|
||||
| `schedulers` | `array<string>` | Available schedulers |
|
||||
| `loras` | `array<object>` | Available LoRA entries |
|
||||
| `upscalers` | `array<object>` | Available model-backed highres upscalers |
|
||||
| `limits` | `object` | Shared queue and size limits |
|
||||
|
||||
`model`
|
||||
@ -424,6 +456,14 @@ Shared nested fields:
|
||||
| `loras[].name` | `string` |
|
||||
| `loras[].path` | `string` |
|
||||
|
||||
`upscalers`
|
||||
|
||||
| Field | Type | Notes |
|
||||
| --- | --- | --- |
|
||||
| `upscalers[].name` | `string` | Built-in name or model stem; use this value in `hires.upscaler` |
|
||||
|
||||
Built-in entries include `None` and `Latent (nearest)`. Model-backed entries are scanned from the top level of `--hires-upscalers-dir`; subdirectories are not scanned.
|
||||
|
||||
`limits`
|
||||
|
||||
| Field | Type |
|
||||
@ -482,6 +522,15 @@ Shared default fields used by both `img_gen` and `vid_gen`:
|
||||
| `auto_resize_ref_image` | `boolean` |
|
||||
| `increase_ref_index` | `boolean` |
|
||||
| `control_strength` | `number` |
|
||||
| `hires` | `object` |
|
||||
| `hires.enabled` | `boolean` |
|
||||
| `hires.upscaler` | `string` |
|
||||
| `hires.scale` | `number` |
|
||||
| `hires.target_width` | `integer` |
|
||||
| `hires.target_height` | `integer` |
|
||||
| `hires.steps` | `integer` |
|
||||
| `hires.denoising_strength` | `number` |
|
||||
| `hires.upscale_tile_size` | `integer` |
|
||||
|
||||
`vid_gen`-specific default fields:
|
||||
|
||||
@ -514,6 +563,7 @@ Fields returned in `features_by_mode.img_gen`:
|
||||
- `ref_images`
|
||||
- `lora`
|
||||
- `vae_tiling`
|
||||
- `hires`
|
||||
- `cache`
|
||||
- `cancel_queued`
|
||||
- `cancel_generating`
|
||||
@ -625,6 +675,16 @@ Example:
|
||||
},
|
||||
|
||||
"lora": [],
|
||||
"hires": {
|
||||
"enabled": false,
|
||||
"upscaler": "Latent (nearest)",
|
||||
"scale": 2.0,
|
||||
"target_width": 0,
|
||||
"target_height": 0,
|
||||
"steps": 0,
|
||||
"denoising_strength": 0.7,
|
||||
"upscale_tile_size": 128
|
||||
},
|
||||
|
||||
"vae_tiling_params": {
|
||||
"enabled": false,
|
||||
@ -729,12 +789,23 @@ Other native fields:
|
||||
|
||||
| Field | Type |
|
||||
| --- | --- |
|
||||
| `hires` | `object` |
|
||||
| `hires.enabled` | `boolean` |
|
||||
| `hires.upscaler` | `string` |
|
||||
| `hires.scale` | `number` |
|
||||
| `hires.target_width` | `integer` |
|
||||
| `hires.target_height` | `integer` |
|
||||
| `hires.steps` | `integer` |
|
||||
| `hires.denoising_strength` | `number` |
|
||||
| `hires.upscale_tile_size` | `integer` |
|
||||
| `vae_tiling_params` | `object` |
|
||||
| `cache_mode` | `string` |
|
||||
| `cache_option` | `string` |
|
||||
| `scm_mask` | `string` |
|
||||
| `scm_policy_dynamic` | `boolean` |
|
||||
|
||||
For `hires.upscaler`, use `Latent (nearest)` for latent upscale or an `upscalers[].name` value from `GET /sdcpp/v1/capabilities`. Model-backed upscalers are resolved as `--hires-upscalers-dir / (name + ext)` and must live directly in that directory.
|
||||
|
||||
HTTP-only output fields:
|
||||
|
||||
| Field | Type |
|
||||
|
||||
@ -48,7 +48,9 @@ static void parse_args(int argc,
|
||||
|
||||
if (!svr_params.resolve_and_validate() ||
|
||||
!ctx_params.resolve_and_validate(IMG_GEN) ||
|
||||
!default_gen_params.resolve_and_validate(IMG_GEN, ctx_params.lora_model_dir)) {
|
||||
!default_gen_params.resolve_and_validate(IMG_GEN,
|
||||
ctx_params.lora_model_dir,
|
||||
ctx_params.hires_upscalers_dir)) {
|
||||
print_usage(argv[0], options_vec);
|
||||
exit(1);
|
||||
}
|
||||
@ -95,6 +97,8 @@ int main(int argc, const char** argv) {
|
||||
|
||||
std::vector<LoraEntry> lora_cache;
|
||||
std::mutex lora_mutex;
|
||||
std::vector<UpscalerEntry> upscaler_cache;
|
||||
std::mutex upscaler_mutex;
|
||||
AsyncJobManager async_job_manager;
|
||||
ServerRuntime runtime = {
|
||||
sd_ctx.get(),
|
||||
@ -104,6 +108,8 @@ int main(int argc, const char** argv) {
|
||||
&default_gen_params,
|
||||
&lora_cache,
|
||||
&lora_mutex,
|
||||
&upscaler_cache,
|
||||
&upscaler_mutex,
|
||||
&async_job_manager,
|
||||
};
|
||||
|
||||
|
||||
@ -70,7 +70,7 @@ static bool build_openai_generation_request(const httplib::Request& req,
|
||||
}
|
||||
|
||||
// Intentionally disable prompt-embedded LoRA tag parsing for server APIs.
|
||||
if (!request.gen_params.resolve_and_validate(IMG_GEN, "", true)) {
|
||||
if (!request.gen_params.resolve_and_validate(IMG_GEN, "", runtime.ctx_params->hires_upscalers_dir, true)) {
|
||||
error_message = "invalid params";
|
||||
return false;
|
||||
}
|
||||
@ -212,7 +212,7 @@ static bool build_openai_edit_request(const httplib::Request& req,
|
||||
}
|
||||
|
||||
// Intentionally disable prompt-embedded LoRA tag parsing for server APIs.
|
||||
if (!request.gen_params.resolve_and_validate(IMG_GEN, "", true)) {
|
||||
if (!request.gen_params.resolve_and_validate(IMG_GEN, "", runtime.ctx_params->hires_upscalers_dir, true)) {
|
||||
error_message = "invalid params";
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -1,6 +1,7 @@
|
||||
#include "routes.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cctype>
|
||||
#include <cstring>
|
||||
#include <regex>
|
||||
#include <string_view>
|
||||
@ -35,14 +36,20 @@ static fs::path resolve_display_model_path(const ServerRuntime& runtime) {
|
||||
return {};
|
||||
}
|
||||
|
||||
static std::string lower_ascii(std::string value) {
|
||||
std::transform(value.begin(), value.end(), value.begin(), [](unsigned char c) {
|
||||
return static_cast<char>(std::tolower(c));
|
||||
});
|
||||
return value;
|
||||
}
|
||||
|
||||
static enum sample_method_t get_sdapi_sample_method(std::string name) {
|
||||
enum sample_method_t result = str_to_sample_method(name.c_str());
|
||||
if (result != SAMPLE_METHOD_COUNT) {
|
||||
return result;
|
||||
}
|
||||
|
||||
std::transform(name.begin(), name.end(), name.begin(),
|
||||
[](unsigned char c) { return static_cast<char>(std::tolower(c)); });
|
||||
name = lower_ascii(name);
|
||||
static const std::unordered_map<std::string_view, sample_method_t> hardcoded{
|
||||
{"euler a", EULER_A_SAMPLE_METHOD},
|
||||
{"k_euler_a", EULER_A_SAMPLE_METHOD},
|
||||
@ -114,6 +121,18 @@ static bool build_sdapi_img_gen_request(const json& j,
|
||||
request.gen_params.width = j.value("width", -1);
|
||||
request.gen_params.height = j.value("height", -1);
|
||||
|
||||
if (!img2img && j.value("enable_hr", false)) {
|
||||
request.gen_params.hires_enabled = true;
|
||||
request.gen_params.hires_scale = j.value("hr_scale", request.gen_params.hires_scale);
|
||||
request.gen_params.hires_width = j.value("hr_resize_x", request.gen_params.hires_width);
|
||||
request.gen_params.hires_height = j.value("hr_resize_y", request.gen_params.hires_height);
|
||||
request.gen_params.hires_steps = j.value("hr_steps", request.gen_params.hires_steps);
|
||||
request.gen_params.hires_denoising_strength =
|
||||
j.value("denoising_strength", request.gen_params.hires_denoising_strength);
|
||||
|
||||
request.gen_params.hires_upscaler = j.value("hr_upscaler", request.gen_params.hires_upscaler);
|
||||
}
|
||||
|
||||
std::string sd_cpp_extra_args_str = extract_and_remove_sd_cpp_extra_args(request.gen_params.prompt);
|
||||
if (!sd_cpp_extra_args_str.empty() && !request.gen_params.from_json_str(sd_cpp_extra_args_str)) {
|
||||
error_message = "invalid sd_cpp_extra_args";
|
||||
@ -228,7 +247,7 @@ static bool build_sdapi_img_gen_request(const json& j,
|
||||
}
|
||||
|
||||
// Intentionally disable prompt-embedded LoRA tag parsing for server APIs.
|
||||
if (!request.gen_params.resolve_and_validate(IMG_GEN, "", true)) {
|
||||
if (!request.gen_params.resolve_and_validate(IMG_GEN, "", runtime.ctx_params->hires_upscalers_dir, true)) {
|
||||
error_message = "invalid params";
|
||||
return false;
|
||||
}
|
||||
@ -347,6 +366,45 @@ void register_sdapi_endpoints(httplib::Server& svr, ServerRuntime& rt) {
|
||||
res.set_content(result.dump(), "application/json");
|
||||
});
|
||||
|
||||
svr.Get("/sdapi/v1/upscalers", [runtime](const httplib::Request&, httplib::Response& res) {
|
||||
refresh_upscaler_cache(*runtime);
|
||||
|
||||
auto make_builtin = [](const char* name) {
|
||||
json item;
|
||||
item["name"] = name;
|
||||
item["model_name"] = nullptr;
|
||||
item["model_path"] = nullptr;
|
||||
item["model_url"] = nullptr;
|
||||
item["scale"] = 4;
|
||||
return item;
|
||||
};
|
||||
|
||||
json result = json::array();
|
||||
result.push_back(make_builtin("None"));
|
||||
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(*runtime->upscaler_mutex);
|
||||
for (const auto& e : *runtime->upscaler_cache) {
|
||||
json item;
|
||||
item["name"] = e.name;
|
||||
item["model_name"] = e.model_name;
|
||||
item["model_path"] = e.fullpath;
|
||||
item["model_url"] = nullptr;
|
||||
item["scale"] = e.scale;
|
||||
result.push_back(item);
|
||||
}
|
||||
}
|
||||
|
||||
res.set_content(result.dump(), "application/json");
|
||||
});
|
||||
|
||||
svr.Get("/sdapi/v1/latent-upscale-modes", [](const httplib::Request&, httplib::Response& res) {
|
||||
json result = json::array({
|
||||
{{"name", "Latent (nearest)"}},
|
||||
});
|
||||
res.set_content(result.dump(), "application/json");
|
||||
});
|
||||
|
||||
svr.Get("/sdapi/v1/samplers", [runtime](const httplib::Request&, httplib::Response& res) {
|
||||
std::vector<std::string> sampler_names;
|
||||
sampler_names.push_back("default");
|
||||
|
||||
@ -114,6 +114,17 @@ static json make_img_gen_defaults_json(const SDGenerationParams& defaults, const
|
||||
{"increase_ref_index", defaults.increase_ref_index},
|
||||
{"control_strength", defaults.control_strength},
|
||||
{"sample_params", make_sample_params_json(defaults.sample_params, defaults.skip_layers)},
|
||||
{"hires",
|
||||
{
|
||||
{"enabled", defaults.hires_enabled},
|
||||
{"upscaler", defaults.hires_upscaler},
|
||||
{"scale", defaults.hires_scale},
|
||||
{"target_width", defaults.hires_width},
|
||||
{"target_height", defaults.hires_height},
|
||||
{"steps", defaults.hires_steps},
|
||||
{"denoising_strength", defaults.hires_denoising_strength},
|
||||
{"upscale_tile_size", defaults.hires_upscale_tile_size},
|
||||
}},
|
||||
{"vae_tiling_params", make_vae_tiling_json(defaults.vae_tiling_params)},
|
||||
{"cache_mode", defaults.cache_mode},
|
||||
{"cache_option", defaults.cache_option},
|
||||
@ -157,6 +168,7 @@ static json make_img_gen_features_json() {
|
||||
{"ref_images", true},
|
||||
{"lora", true},
|
||||
{"vae_tiling", true},
|
||||
{"hires", true},
|
||||
{"cache", true},
|
||||
{"cancel_queued", true},
|
||||
{"cancel_generating", false},
|
||||
@ -179,6 +191,7 @@ static json make_vid_gen_features_json() {
|
||||
|
||||
static json make_capabilities_json(ServerRuntime& runtime) {
|
||||
refresh_lora_cache(runtime);
|
||||
refresh_upscaler_cache(runtime);
|
||||
|
||||
AsyncJobManager& manager = *runtime.async_job_manager;
|
||||
const auto& defaults = *runtime.default_gen_params;
|
||||
@ -190,6 +203,7 @@ static json make_capabilities_json(ServerRuntime& runtime) {
|
||||
json image_output_formats = supported_img_output_formats();
|
||||
json video_output_formats = supported_vid_output_formats();
|
||||
json available_loras = json::array();
|
||||
json available_upscalers = json::array();
|
||||
json supported_modes = json::array();
|
||||
|
||||
for (int i = 0; i < SAMPLE_METHOD_COUNT; ++i) {
|
||||
@ -210,6 +224,21 @@ static json make_capabilities_json(ServerRuntime& runtime) {
|
||||
}
|
||||
}
|
||||
|
||||
available_upscalers.push_back({
|
||||
{"name", "None"},
|
||||
});
|
||||
available_upscalers.push_back({
|
||||
{"name", "Latent (nearest)"},
|
||||
});
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(*runtime.upscaler_mutex);
|
||||
for (const auto& entry : *runtime.upscaler_cache) {
|
||||
available_upscalers.push_back({
|
||||
{"name", entry.name},
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
if (supports_img) {
|
||||
supported_modes.push_back("img_gen");
|
||||
}
|
||||
@ -284,6 +313,7 @@ static json make_capabilities_json(ServerRuntime& runtime) {
|
||||
result["features"] = top_level_features;
|
||||
result["features_by_mode"] = features_by_mode;
|
||||
result["loras"] = available_loras;
|
||||
result["upscalers"] = available_upscalers;
|
||||
return result;
|
||||
}
|
||||
|
||||
@ -307,7 +337,7 @@ static bool parse_img_gen_request(const json& body,
|
||||
return false;
|
||||
}
|
||||
// Intentionally disable prompt-embedded LoRA tag parsing for server APIs.
|
||||
if (!request.gen_params.resolve_and_validate(IMG_GEN, "", true)) {
|
||||
if (!request.gen_params.resolve_and_validate(IMG_GEN, "", runtime.ctx_params->hires_upscalers_dir, true)) {
|
||||
error_message = "invalid generation parameters";
|
||||
return false;
|
||||
}
|
||||
@ -334,7 +364,7 @@ static bool parse_vid_gen_request(const json& body,
|
||||
return false;
|
||||
}
|
||||
// Intentionally disable prompt-embedded LoRA tag parsing for server APIs.
|
||||
if (!request.gen_params.resolve_and_validate(VID_GEN, "", true)) {
|
||||
if (!request.gen_params.resolve_and_validate(VID_GEN, "", runtime.ctx_params->hires_upscalers_dir, true)) {
|
||||
error_message = "invalid generation parameters";
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -1,6 +1,7 @@
|
||||
#include "runtime.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cctype>
|
||||
#include <chrono>
|
||||
#include <cstdlib>
|
||||
#include <filesystem>
|
||||
@ -13,6 +14,18 @@
|
||||
|
||||
namespace fs = std::filesystem;
|
||||
|
||||
static std::string lower_ascii(std::string value) {
|
||||
std::transform(value.begin(), value.end(), value.begin(), [](unsigned char c) {
|
||||
return static_cast<char>(std::tolower(c));
|
||||
});
|
||||
return value;
|
||||
}
|
||||
|
||||
static bool is_supported_model_ext(const fs::path& p) {
|
||||
auto ext = lower_ascii(p.extension().string());
|
||||
return ext == ".gguf" || ext == ".pt" || ext == ".pth" || ext == ".safetensors";
|
||||
}
|
||||
|
||||
static const std::string k_base64_chars =
|
||||
"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
|
||||
"abcdefghijklmnopqrstuvwxyz"
|
||||
@ -241,20 +254,12 @@ void refresh_lora_cache(ServerRuntime& rt) {
|
||||
|
||||
fs::path lora_dir = rt.ctx_params->lora_model_dir;
|
||||
if (fs::exists(lora_dir) && fs::is_directory(lora_dir)) {
|
||||
auto is_lora_ext = [](const fs::path& p) {
|
||||
auto ext = p.extension().string();
|
||||
std::transform(ext.begin(), ext.end(), ext.begin(), [](unsigned char c) {
|
||||
return static_cast<char>(std::tolower(c));
|
||||
});
|
||||
return ext == ".gguf" || ext == ".pt" || ext == ".pth" || ext == ".safetensors";
|
||||
};
|
||||
|
||||
for (auto& entry : fs::recursive_directory_iterator(lora_dir)) {
|
||||
if (!entry.is_regular_file()) {
|
||||
continue;
|
||||
}
|
||||
const fs::path& p = entry.path();
|
||||
if (!is_lora_ext(p)) {
|
||||
if (!is_supported_model_ext(p)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
@ -286,6 +291,40 @@ std::string get_lora_full_path(ServerRuntime& rt, const std::string& path) {
|
||||
return it != rt.lora_cache->end() ? it->fullpath : "";
|
||||
}
|
||||
|
||||
void refresh_upscaler_cache(ServerRuntime& rt) {
|
||||
std::vector<UpscalerEntry> new_cache;
|
||||
|
||||
fs::path upscaler_dir = rt.ctx_params->hires_upscalers_dir;
|
||||
if (fs::exists(upscaler_dir) && fs::is_directory(upscaler_dir)) {
|
||||
for (auto& entry : fs::directory_iterator(upscaler_dir)) {
|
||||
if (!entry.is_regular_file()) {
|
||||
continue;
|
||||
}
|
||||
const fs::path& p = entry.path();
|
||||
if (!is_supported_model_ext(p)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
UpscalerEntry upscaler_entry;
|
||||
upscaler_entry.name = p.stem().u8string();
|
||||
upscaler_entry.fullpath = fs::absolute(p).lexically_normal().u8string();
|
||||
upscaler_entry.model_name = "ESRGAN_4x";
|
||||
upscaler_entry.path = p.filename().u8string();
|
||||
|
||||
new_cache.push_back(std::move(upscaler_entry));
|
||||
}
|
||||
}
|
||||
|
||||
std::sort(new_cache.begin(), new_cache.end(), [](const UpscalerEntry& a, const UpscalerEntry& b) {
|
||||
return a.name < b.name;
|
||||
});
|
||||
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(*rt.upscaler_mutex);
|
||||
*rt.upscaler_cache = std::move(new_cache);
|
||||
}
|
||||
}
|
||||
|
||||
int64_t unix_timestamp_now() {
|
||||
return std::chrono::duration_cast<std::chrono::seconds>(
|
||||
std::chrono::system_clock::now().time_since_epoch())
|
||||
|
||||
@ -37,6 +37,14 @@ struct LoraEntry {
|
||||
std::string fullpath;
|
||||
};
|
||||
|
||||
struct UpscalerEntry {
|
||||
std::string name;
|
||||
std::string path;
|
||||
std::string fullpath;
|
||||
std::string model_name;
|
||||
int scale = 4;
|
||||
};
|
||||
|
||||
struct ServerRuntime {
|
||||
sd_ctx_t* sd_ctx;
|
||||
std::mutex* sd_ctx_mutex;
|
||||
@ -45,6 +53,8 @@ struct ServerRuntime {
|
||||
const SDGenerationParams* default_gen_params;
|
||||
std::vector<LoraEntry>* lora_cache;
|
||||
std::mutex* lora_mutex;
|
||||
std::vector<UpscalerEntry>* upscaler_cache;
|
||||
std::mutex* upscaler_mutex;
|
||||
AsyncJobManager* async_job_manager;
|
||||
};
|
||||
|
||||
@ -86,4 +96,5 @@ bool runtime_supports_generation_mode(const ServerRuntime& runtime, SDMode mode)
|
||||
std::string unsupported_generation_mode_error(SDMode mode);
|
||||
void refresh_lora_cache(ServerRuntime& rt);
|
||||
std::string get_lora_full_path(ServerRuntime& rt, const std::string& path);
|
||||
void refresh_upscaler_cache(ServerRuntime& rt);
|
||||
int64_t unix_timestamp_now();
|
||||
|
||||
@ -289,6 +289,25 @@ typedef struct {
|
||||
const char* path;
|
||||
} sd_lora_t;
|
||||
|
||||
enum sd_hires_upscaler_t {
|
||||
SD_HIRES_UPSCALER_NONE,
|
||||
SD_HIRES_UPSCALER_LATENT_NEAREST,
|
||||
SD_HIRES_UPSCALER_MODEL,
|
||||
SD_HIRES_UPSCALER_COUNT,
|
||||
};
|
||||
|
||||
typedef struct {
|
||||
bool enabled;
|
||||
enum sd_hires_upscaler_t upscaler;
|
||||
const char* model_path;
|
||||
float scale;
|
||||
int target_width;
|
||||
int target_height;
|
||||
int steps;
|
||||
float denoising_strength;
|
||||
int upscale_tile_size;
|
||||
} sd_hires_params_t;
|
||||
|
||||
typedef struct {
|
||||
const sd_lora_t* loras;
|
||||
uint32_t lora_count;
|
||||
@ -312,6 +331,7 @@ typedef struct {
|
||||
sd_pm_params_t pm_params;
|
||||
sd_tiling_params_t vae_tiling_params;
|
||||
sd_cache_params_t cache;
|
||||
sd_hires_params_t hires;
|
||||
} sd_img_gen_params_t;
|
||||
|
||||
typedef struct {
|
||||
@ -365,8 +385,11 @@ SD_API const char* sd_preview_name(enum preview_t preview);
|
||||
SD_API enum preview_t str_to_preview(const char* str);
|
||||
SD_API const char* sd_lora_apply_mode_name(enum lora_apply_mode_t mode);
|
||||
SD_API enum lora_apply_mode_t str_to_lora_apply_mode(const char* str);
|
||||
SD_API const char* sd_hires_upscaler_name(enum sd_hires_upscaler_t upscaler);
|
||||
SD_API enum sd_hires_upscaler_t str_to_sd_hires_upscaler(const char* str);
|
||||
|
||||
SD_API void sd_cache_params_init(sd_cache_params_t* cache_params);
|
||||
SD_API void sd_hires_params_init(sd_hires_params_t* hires_params);
|
||||
|
||||
SD_API void sd_ctx_params_init(sd_ctx_params_t* sd_ctx_params);
|
||||
SD_API char* sd_ctx_params_to_str(const sd_ctx_params_t* sd_ctx_params);
|
||||
|
||||
@ -17,6 +17,7 @@
|
||||
#include "pmid.hpp"
|
||||
#include "sample-cache.h"
|
||||
#include "tae.hpp"
|
||||
#include "upscaler.h"
|
||||
#include "vae.hpp"
|
||||
|
||||
#include "latent-preview.h"
|
||||
@ -2113,6 +2114,28 @@ enum lora_apply_mode_t str_to_lora_apply_mode(const char* str) {
|
||||
return LORA_APPLY_MODE_COUNT;
|
||||
}
|
||||
|
||||
const char* hires_upscaler_to_str[] = {
|
||||
"None",
|
||||
"Latent (nearest)",
|
||||
"Model",
|
||||
};
|
||||
|
||||
const char* sd_hires_upscaler_name(enum sd_hires_upscaler_t upscaler) {
|
||||
if (upscaler < SD_HIRES_UPSCALER_COUNT) {
|
||||
return hires_upscaler_to_str[upscaler];
|
||||
}
|
||||
return NONE_STR;
|
||||
}
|
||||
|
||||
enum sd_hires_upscaler_t str_to_sd_hires_upscaler(const char* str) {
|
||||
for (int i = 0; i < SD_HIRES_UPSCALER_COUNT; i++) {
|
||||
if (!strcmp(str, hires_upscaler_to_str[i])) {
|
||||
return (enum sd_hires_upscaler_t)i;
|
||||
}
|
||||
}
|
||||
return SD_HIRES_UPSCALER_COUNT;
|
||||
}
|
||||
|
||||
void sd_cache_params_init(sd_cache_params_t* cache_params) {
|
||||
*cache_params = {};
|
||||
cache_params->mode = SD_CACHE_DISABLED;
|
||||
@ -2141,6 +2164,19 @@ void sd_cache_params_init(sd_cache_params_t* cache_params) {
|
||||
cache_params->spectrum_stop_percent = 0.9f;
|
||||
}
|
||||
|
||||
void sd_hires_params_init(sd_hires_params_t* hires_params) {
|
||||
*hires_params = {};
|
||||
hires_params->enabled = false;
|
||||
hires_params->upscaler = SD_HIRES_UPSCALER_LATENT_NEAREST;
|
||||
hires_params->model_path = nullptr;
|
||||
hires_params->scale = 2.0f;
|
||||
hires_params->target_width = 0;
|
||||
hires_params->target_height = 0;
|
||||
hires_params->steps = 0;
|
||||
hires_params->denoising_strength = 0.7f;
|
||||
hires_params->upscale_tile_size = 128;
|
||||
}
|
||||
|
||||
void sd_ctx_params_init(sd_ctx_params_t* sd_ctx_params) {
|
||||
*sd_ctx_params = {};
|
||||
sd_ctx_params->vae_decode_only = true;
|
||||
@ -2310,6 +2346,7 @@ void sd_img_gen_params_init(sd_img_gen_params_t* sd_img_gen_params) {
|
||||
sd_img_gen_params->pm_params = {nullptr, 0, nullptr, 20.f};
|
||||
sd_img_gen_params->vae_tiling_params = {false, 0, 0, 0.5f, 0.0f, 0.0f};
|
||||
sd_cache_params_init(&sd_img_gen_params->cache);
|
||||
sd_hires_params_init(&sd_img_gen_params->hires);
|
||||
}
|
||||
|
||||
char* sd_img_gen_params_to_str(const sd_img_gen_params_t* sd_img_gen_params) {
|
||||
@ -2336,7 +2373,8 @@ char* sd_img_gen_params_to_str(const sd_img_gen_params_t* sd_img_gen_params) {
|
||||
"increase_ref_index: %s\n"
|
||||
"control_strength: %.2f\n"
|
||||
"photo maker: {style_strength = %.2f, id_images_count = %d, id_embed_path = %s}\n"
|
||||
"VAE tiling: %s\n",
|
||||
"VAE tiling: %s\n"
|
||||
"hires: {enabled=%s, upscaler=%s, model_path=%s, scale=%.2f, target=%dx%d, steps=%d, denoising_strength=%.2f}\n",
|
||||
SAFE_STR(sd_img_gen_params->prompt),
|
||||
SAFE_STR(sd_img_gen_params->negative_prompt),
|
||||
sd_img_gen_params->clip_skip,
|
||||
@ -2353,7 +2391,15 @@ char* sd_img_gen_params_to_str(const sd_img_gen_params_t* sd_img_gen_params) {
|
||||
sd_img_gen_params->pm_params.style_strength,
|
||||
sd_img_gen_params->pm_params.id_images_count,
|
||||
SAFE_STR(sd_img_gen_params->pm_params.id_embed_path),
|
||||
BOOL_STR(sd_img_gen_params->vae_tiling_params.enabled));
|
||||
BOOL_STR(sd_img_gen_params->vae_tiling_params.enabled),
|
||||
BOOL_STR(sd_img_gen_params->hires.enabled),
|
||||
sd_hires_upscaler_name(sd_img_gen_params->hires.upscaler),
|
||||
SAFE_STR(sd_img_gen_params->hires.model_path),
|
||||
sd_img_gen_params->hires.scale,
|
||||
sd_img_gen_params->hires.target_width,
|
||||
sd_img_gen_params->hires.target_height,
|
||||
sd_img_gen_params->hires.steps,
|
||||
sd_img_gen_params->hires.denoising_strength);
|
||||
const char* cache_mode_str = "disabled";
|
||||
if (sd_img_gen_params->cache.mode == SD_CACHE_EASYCACHE) {
|
||||
cache_mode_str = "easycache";
|
||||
@ -2534,6 +2580,7 @@ struct GenerationRequest {
|
||||
sd_guidance_params_t guidance = {};
|
||||
sd_guidance_params_t high_noise_guidance = {};
|
||||
sd_pm_params_t pm_params = {};
|
||||
sd_hires_params_t hires = {};
|
||||
int frames = -1;
|
||||
float vace_strength = 1.f;
|
||||
|
||||
@ -2555,6 +2602,7 @@ struct GenerationRequest {
|
||||
auto_resize_ref_image = sd_img_gen_params->auto_resize_ref_image;
|
||||
guidance = sd_img_gen_params->sample_params.guidance;
|
||||
pm_params = sd_img_gen_params->pm_params;
|
||||
hires = sd_img_gen_params->hires;
|
||||
cache_params = &sd_img_gen_params->cache;
|
||||
resolve(sd_ctx);
|
||||
}
|
||||
@ -2577,26 +2625,76 @@ struct GenerationRequest {
|
||||
}
|
||||
|
||||
void align_generation_request_size() {
|
||||
align_image_size(&width, &height, "generation request");
|
||||
}
|
||||
|
||||
void align_image_size(int* target_width, int* target_height, const char* label) {
|
||||
int spatial_multiple = vae_scale_factor * diffusion_model_down_factor;
|
||||
int width_offset = align_up_offset(width, spatial_multiple);
|
||||
int height_offset = align_up_offset(height, spatial_multiple);
|
||||
int width_offset = align_up_offset(*target_width, spatial_multiple);
|
||||
int height_offset = align_up_offset(*target_height, spatial_multiple);
|
||||
if (width_offset <= 0 && height_offset <= 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
int original_width = width;
|
||||
int original_height = height;
|
||||
int original_width = *target_width;
|
||||
int original_height = *target_height;
|
||||
|
||||
width += width_offset;
|
||||
height += height_offset;
|
||||
LOG_WARN("align up %dx%d to %dx%d (multiple=%d)",
|
||||
*target_width += width_offset;
|
||||
*target_height += height_offset;
|
||||
LOG_WARN("align %s up %dx%d to %dx%d (multiple=%d)",
|
||||
label,
|
||||
original_width,
|
||||
original_height,
|
||||
width,
|
||||
height,
|
||||
*target_width,
|
||||
*target_height,
|
||||
spatial_multiple);
|
||||
}
|
||||
|
||||
void resolve_hires() {
|
||||
if (!hires.enabled) {
|
||||
return;
|
||||
}
|
||||
if (hires.upscaler == SD_HIRES_UPSCALER_NONE) {
|
||||
hires.enabled = false;
|
||||
return;
|
||||
}
|
||||
if (hires.upscaler < SD_HIRES_UPSCALER_NONE && hires.upscaler >= SD_HIRES_UPSCALER_COUNT) {
|
||||
LOG_WARN("hires upscaler '%d' is invalid, disabling hires", hires.upscaler);
|
||||
hires.enabled = false;
|
||||
return;
|
||||
}
|
||||
if (hires.upscaler == SD_HIRES_UPSCALER_MODEL && strlen(SAFE_STR(hires.model_path)) == 0) {
|
||||
LOG_WARN("hires model upscaler requires a model path, disabling hires");
|
||||
hires.enabled = false;
|
||||
return;
|
||||
}
|
||||
if (hires.scale <= 0.f && hires.target_width <= 0 && hires.target_height <= 0) {
|
||||
LOG_WARN("hires scale must be positive when no target size is set, disabling hires");
|
||||
hires.enabled = false;
|
||||
return;
|
||||
}
|
||||
hires.denoising_strength = std::clamp(hires.denoising_strength, 0.0001f, 1.f);
|
||||
hires.steps = std::max(0, hires.steps);
|
||||
|
||||
if (hires.target_width > 0 && hires.target_height > 0) {
|
||||
// pass
|
||||
} else if (hires.target_width > 0) {
|
||||
hires.target_height = hires.target_width;
|
||||
} else if (hires.target_height > 0) {
|
||||
hires.target_width = hires.target_height;
|
||||
} else {
|
||||
hires.target_width = static_cast<int>(std::round(width * hires.scale));
|
||||
hires.target_height = static_cast<int>(std::round(height * hires.scale));
|
||||
}
|
||||
|
||||
if (hires.target_width <= 0 || hires.target_height <= 0) {
|
||||
LOG_WARN("hires target size is not positive, disabling hires");
|
||||
hires.enabled = false;
|
||||
return;
|
||||
}
|
||||
align_image_size(&hires.target_width, &hires.target_height, "hires target");
|
||||
}
|
||||
|
||||
static void resolve_guidance(sd_ctx_t* sd_ctx,
|
||||
sd_guidance_params_t* guidance,
|
||||
bool* use_uncond,
|
||||
@ -2637,6 +2735,7 @@ struct GenerationRequest {
|
||||
|
||||
void resolve(sd_ctx_t* sd_ctx) {
|
||||
align_generation_request_size();
|
||||
resolve_hires();
|
||||
seed = resolve_seed(seed);
|
||||
|
||||
resolve_guidance(sd_ctx, &guidance, &use_uncond, &use_img_cond);
|
||||
@ -3149,6 +3248,67 @@ static sd_image_t* decode_image_outputs(sd_ctx_t* sd_ctx,
|
||||
return result_images;
|
||||
}
|
||||
|
||||
static sd::Tensor<float> upscale_hires_latent(sd_ctx_t* sd_ctx,
|
||||
const sd::Tensor<float>& latent,
|
||||
const GenerationRequest& request,
|
||||
UpscalerGGML* upscaler) {
|
||||
if (request.hires.upscaler == SD_HIRES_UPSCALER_LATENT_NEAREST) {
|
||||
std::vector<int64_t> target_shape = latent.shape();
|
||||
if (target_shape.size() < 2) {
|
||||
LOG_ERROR("latent has invalid shape for hires upscale");
|
||||
return {};
|
||||
}
|
||||
target_shape[0] = request.hires.target_width / request.vae_scale_factor;
|
||||
target_shape[1] = request.hires.target_height / request.vae_scale_factor;
|
||||
|
||||
LOG_INFO("hires latent upscale %" PRId64 "x%" PRId64 " -> %" PRId64 "x%" PRId64,
|
||||
latent.shape()[0],
|
||||
latent.shape()[1],
|
||||
target_shape[0],
|
||||
target_shape[1]);
|
||||
return sd::ops::interpolate(latent, target_shape, sd::ops::InterpolateMode::Nearest);
|
||||
} else if (request.hires.upscaler == SD_HIRES_UPSCALER_MODEL) {
|
||||
if (upscaler == nullptr) {
|
||||
LOG_ERROR("hires model upscaler context is null");
|
||||
return {};
|
||||
}
|
||||
if (sd_ctx->sd->vae_decode_only) {
|
||||
LOG_ERROR("hires model upscaler requires VAE encoder weights; create the context with vae_decode_only=false");
|
||||
return {};
|
||||
}
|
||||
|
||||
sd::Tensor<float> decoded = sd_ctx->sd->decode_first_stage(latent);
|
||||
if (decoded.empty()) {
|
||||
LOG_ERROR("decode_first_stage failed before hires model upscale");
|
||||
return {};
|
||||
}
|
||||
|
||||
sd::Tensor<float> upscaled_tensor = upscaler->upscale_tensor(decoded);
|
||||
if (upscaled_tensor.empty()) {
|
||||
LOG_ERROR("hires model upscale failed");
|
||||
return {};
|
||||
}
|
||||
|
||||
if (upscaled_tensor.shape()[0] != request.hires.target_width ||
|
||||
upscaled_tensor.shape()[1] != request.hires.target_height) {
|
||||
upscaled_tensor = sd::ops::interpolate(upscaled_tensor,
|
||||
{request.hires.target_width,
|
||||
request.hires.target_height,
|
||||
upscaled_tensor.shape()[2],
|
||||
upscaled_tensor.shape()[3]});
|
||||
}
|
||||
|
||||
sd::Tensor<float> upscaled_latent = sd_ctx->sd->encode_first_stage(upscaled_tensor);
|
||||
if (upscaled_latent.empty()) {
|
||||
LOG_ERROR("encode_first_stage failed after hires model upscale");
|
||||
}
|
||||
return upscaled_latent;
|
||||
}
|
||||
|
||||
LOG_ERROR("unsupported hires upscaler '%s'", sd_hires_upscaler_name(request.hires.upscaler));
|
||||
return {};
|
||||
}
|
||||
|
||||
SD_API sd_image_t* generate_image(sd_ctx_t* sd_ctx, const sd_img_gen_params_t* sd_img_gen_params) {
|
||||
if (sd_ctx == nullptr || sd_img_gen_params == nullptr) {
|
||||
return nullptr;
|
||||
@ -3236,7 +3396,7 @@ SD_API sd_image_t* generate_image(sd_ctx_t* sd_ctx, const sd_img_gen_params_t* s
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
if (sd_ctx->sd->free_params_immediately) {
|
||||
if (sd_ctx->sd->free_params_immediately && !request.hires.enabled) {
|
||||
sd_ctx->sd->diffusion_model->free_params_buffer();
|
||||
}
|
||||
int64_t denoise_end = ggml_time_ms();
|
||||
@ -3244,6 +3404,131 @@ SD_API sd_image_t* generate_image(sd_ctx_t* sd_ctx, const sd_img_gen_params_t* s
|
||||
final_latents.size(),
|
||||
(denoise_end - denoise_start) * 1.0f / 1000);
|
||||
|
||||
if (request.hires.enabled && request.hires.target_width > 0) {
|
||||
LOG_INFO("hires fix: upscaling to %dx%d", request.hires.target_width, request.hires.target_height);
|
||||
|
||||
std::unique_ptr<UpscalerGGML> hires_upscaler;
|
||||
if (request.hires.upscaler == SD_HIRES_UPSCALER_MODEL) {
|
||||
LOG_INFO("hires fix: loading model upscaler from '%s'", request.hires.model_path);
|
||||
hires_upscaler = std::make_unique<UpscalerGGML>(sd_ctx->sd->n_threads,
|
||||
false,
|
||||
request.hires.upscale_tile_size);
|
||||
if (!hires_upscaler->load_from_file(request.hires.model_path,
|
||||
sd_ctx->sd->offload_params_to_cpu,
|
||||
sd_ctx->sd->n_threads)) {
|
||||
LOG_ERROR("load hires model upscaler failed");
|
||||
if (sd_ctx->sd->free_params_immediately) {
|
||||
sd_ctx->sd->diffusion_model->free_params_buffer();
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
int hires_steps = request.hires.steps > 0 ? request.hires.steps : plan.sample_steps;
|
||||
|
||||
// sd-webui behavior: scale up total steps so trimming by denoising_strength yields exactly hires_steps effective steps,
|
||||
// unlike img2img which trims from a fixed step count
|
||||
hires_steps = static_cast<int>(hires_steps / request.hires.denoising_strength);
|
||||
|
||||
std::vector<float> hires_sigmas = sd_ctx->sd->denoiser->get_sigmas(
|
||||
hires_steps,
|
||||
sd_ctx->sd->get_image_seq_len(request.hires.target_height, request.hires.target_width),
|
||||
sd_img_gen_params->sample_params.scheduler,
|
||||
sd_ctx->sd->version);
|
||||
|
||||
size_t t_enc = static_cast<size_t>(hires_steps * request.hires.denoising_strength);
|
||||
if (t_enc >= static_cast<size_t>(hires_steps)) {
|
||||
t_enc = static_cast<size_t>(hires_steps) - 1;
|
||||
}
|
||||
std::vector<float> hires_sigma_sched(hires_sigmas.begin() + hires_steps - static_cast<int>(t_enc) - 1,
|
||||
hires_sigmas.end());
|
||||
LOG_INFO("hires fix: %d steps, denoising_strength=%.2f, sigma_sched_size=%zu",
|
||||
hires_steps,
|
||||
request.hires.denoising_strength,
|
||||
hires_sigma_sched.size());
|
||||
|
||||
std::vector<sd::Tensor<float>> hires_final_latents;
|
||||
int64_t hires_denoise_start = ggml_time_ms();
|
||||
for (int b = 0; b < (int)final_latents.size(); b++) {
|
||||
int64_t cur_seed = request.seed + b;
|
||||
sd_ctx->sd->rng->manual_seed(cur_seed);
|
||||
sd_ctx->sd->sampler_rng->manual_seed(cur_seed);
|
||||
|
||||
sd::Tensor<float> upscaled = upscale_hires_latent(sd_ctx,
|
||||
final_latents[b],
|
||||
request,
|
||||
hires_upscaler.get());
|
||||
if (upscaled.empty()) {
|
||||
if (sd_ctx->sd->free_params_immediately) {
|
||||
sd_ctx->sd->diffusion_model->free_params_buffer();
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
sd::Tensor<float> noise = sd::randn_like<float>(upscaled, sd_ctx->sd->rng);
|
||||
|
||||
sd::Tensor<float> hires_denoise_mask;
|
||||
if (!latents.denoise_mask.empty()) {
|
||||
std::vector<int64_t> mask_shape = latents.denoise_mask.shape();
|
||||
mask_shape[0] = upscaled.shape()[0];
|
||||
mask_shape[1] = upscaled.shape()[1];
|
||||
hires_denoise_mask = sd::ops::interpolate(latents.denoise_mask,
|
||||
mask_shape,
|
||||
sd::ops::InterpolateMode::NearestMax);
|
||||
}
|
||||
|
||||
int64_t hires_sample_start = ggml_time_ms();
|
||||
sd::Tensor<float> x_0 = sd_ctx->sd->sample(sd_ctx->sd->diffusion_model,
|
||||
true,
|
||||
upscaled,
|
||||
std::move(noise),
|
||||
embeds.cond,
|
||||
embeds.uncond,
|
||||
embeds.img_cond,
|
||||
embeds.id_cond,
|
||||
latents.control_image,
|
||||
request.control_strength,
|
||||
request.guidance,
|
||||
plan.eta,
|
||||
request.shifted_timestep,
|
||||
plan.sample_method,
|
||||
sd_ctx->sd->is_flow_denoiser(),
|
||||
hires_sigma_sched,
|
||||
plan.start_merge_step,
|
||||
latents.ref_latents,
|
||||
request.increase_ref_index,
|
||||
hires_denoise_mask,
|
||||
sd::Tensor<float>(),
|
||||
1.f,
|
||||
request.cache_params);
|
||||
int64_t hires_sample_end = ggml_time_ms();
|
||||
if (!x_0.empty()) {
|
||||
LOG_INFO("hires sampling %d/%d completed, taking %.2fs",
|
||||
b + 1,
|
||||
(int)final_latents.size(),
|
||||
(hires_sample_end - hires_sample_start) * 1.0f / 1000);
|
||||
hires_final_latents.push_back(std::move(x_0));
|
||||
continue;
|
||||
}
|
||||
|
||||
LOG_ERROR("hires sampling for image %d/%d failed after %.2fs",
|
||||
b + 1,
|
||||
(int)final_latents.size(),
|
||||
(hires_sample_end - hires_sample_start) * 1.0f / 1000);
|
||||
if (sd_ctx->sd->free_params_immediately) {
|
||||
sd_ctx->sd->diffusion_model->free_params_buffer();
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
if (sd_ctx->sd->free_params_immediately) {
|
||||
sd_ctx->sd->diffusion_model->free_params_buffer();
|
||||
}
|
||||
int64_t hires_denoise_end = ggml_time_ms();
|
||||
LOG_INFO("hires fix completed, taking %.2fs", (hires_denoise_end - hires_denoise_start) * 1.0f / 1000);
|
||||
|
||||
final_latents = std::move(hires_final_latents);
|
||||
}
|
||||
|
||||
auto result = decode_image_outputs(sd_ctx, request, final_latents);
|
||||
if (result == nullptr) {
|
||||
return nullptr;
|
||||
|
||||
@ -1,27 +1,18 @@
|
||||
#include "esrgan.hpp"
|
||||
#include "upscaler.h"
|
||||
#include "ggml_extend.hpp"
|
||||
#include "model.h"
|
||||
#include "stable-diffusion.h"
|
||||
#include "util.h"
|
||||
|
||||
struct UpscalerGGML {
|
||||
ggml_backend_t backend = nullptr; // general backend
|
||||
ggml_type model_data_type = GGML_TYPE_F16;
|
||||
std::shared_ptr<ESRGAN> esrgan_upscaler;
|
||||
std::string esrgan_path;
|
||||
int n_threads;
|
||||
bool direct = false;
|
||||
int tile_size = 128;
|
||||
|
||||
UpscalerGGML(int n_threads,
|
||||
bool direct = false,
|
||||
int tile_size = 128)
|
||||
UpscalerGGML::UpscalerGGML(int n_threads,
|
||||
bool direct,
|
||||
int tile_size)
|
||||
: n_threads(n_threads),
|
||||
direct(direct),
|
||||
tile_size(tile_size) {
|
||||
}
|
||||
|
||||
bool load_from_file(const std::string& esrgan_path,
|
||||
bool UpscalerGGML::load_from_file(const std::string& esrgan_path,
|
||||
bool offload_params_to_cpu,
|
||||
int n_threads) {
|
||||
ggml_log_set(ggml_log_callback_default, nullptr);
|
||||
@ -65,7 +56,7 @@ struct UpscalerGGML {
|
||||
return true;
|
||||
}
|
||||
|
||||
sd::Tensor<float> upscale_tensor(const sd::Tensor<float>& input_tensor) {
|
||||
sd::Tensor<float> UpscalerGGML::upscale_tensor(const sd::Tensor<float>& input_tensor) {
|
||||
sd::Tensor<float> upscaled;
|
||||
if (tile_size <= 0 || (input_tensor.shape()[0] <= tile_size && input_tensor.shape()[1] <= tile_size)) {
|
||||
upscaled = esrgan_upscaler->compute(n_threads, input_tensor);
|
||||
@ -98,7 +89,7 @@ struct UpscalerGGML {
|
||||
return upscaled;
|
||||
}
|
||||
|
||||
sd_image_t upscale(sd_image_t input_image, uint32_t upscale_factor) {
|
||||
sd_image_t UpscalerGGML::upscale(sd_image_t input_image, uint32_t upscale_factor) {
|
||||
// upscale_factor, unused for RealESRGAN_x4plus_anime_6B.pth
|
||||
sd_image_t upscaled_image = {0, 0, 0, nullptr};
|
||||
int output_width = (int)input_image.width * esrgan_upscaler->scale;
|
||||
@ -119,7 +110,6 @@ struct UpscalerGGML {
|
||||
upscaled_image = upscaled_data;
|
||||
return upscaled_image;
|
||||
}
|
||||
};
|
||||
|
||||
struct upscaler_ctx_t {
|
||||
UpscalerGGML* upscaler = nullptr;
|
||||
|
||||
31
src/upscaler.h
Normal file
31
src/upscaler.h
Normal file
@ -0,0 +1,31 @@
|
||||
#ifndef __SD_UPSCALER_H__
|
||||
#define __SD_UPSCALER_H__
|
||||
|
||||
#include "esrgan.hpp"
|
||||
#include "stable-diffusion.h"
|
||||
#include "tensor.hpp"
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
|
||||
struct UpscalerGGML {
|
||||
ggml_backend_t backend = nullptr; // general backend
|
||||
ggml_type model_data_type = GGML_TYPE_F16;
|
||||
std::shared_ptr<ESRGAN> esrgan_upscaler;
|
||||
std::string esrgan_path;
|
||||
int n_threads;
|
||||
bool direct = false;
|
||||
int tile_size = 128;
|
||||
|
||||
UpscalerGGML(int n_threads,
|
||||
bool direct = false,
|
||||
int tile_size = 128);
|
||||
|
||||
bool load_from_file(const std::string& esrgan_path,
|
||||
bool offload_params_to_cpu,
|
||||
int n_threads);
|
||||
sd::Tensor<float> upscale_tensor(const sd::Tensor<float>& input_tensor);
|
||||
sd_image_t upscale(sd_image_t input_image, uint32_t upscale_factor);
|
||||
};
|
||||
|
||||
#endif // __SD_UPSCALER_H__
|
||||
Loading…
x
Reference in New Issue
Block a user