feat: add sd-webui style Hires. fix support

2026-05-08 08:18:51 +00:00 · 2026-04-22 23:02:21 +08:00 · 2026-04-22 23:02:21 +08:00 · f709e0e189
commit f709e0e189
parent 44cca3d626
14 changed files with 870 additions and 136 deletions
--- a/examples/cli/main.cpp
+++ b/examples/cli/main.cpp
@ -278,7 +278,9 @@ void parse_args(int argc, const char** argv, SDCliParams& cli_params, SDContextP
    bool valid = cli_params.resolve_and_validate();
    if (valid && cli_params.mode != METADATA) {
        valid = ctx_params.resolve_and_validate(cli_params.mode) &&
-                gen_params.resolve_and_validate(cli_params.mode, ctx_params.lora_model_dir);
+                gen_params.resolve_and_validate(cli_params.mode,
                                                ctx_params.lora_model_dir,
                                                ctx_params.hires_upscalers_dir);
    }
    if (!valid) {
@ -688,6 +690,10 @@ int main(int argc, const char* argv[]) {
        vae_decode_only = false;
    }
    if (gen_params.hires_enabled && !gen_params.hires_upscaler_model_path.empty()) {
        vae_decode_only = false;
    }
    sd_ctx_params_t sd_ctx_params = ctx_params.to_sd_ctx_params_t(vae_decode_only, true, cli_params.taesd_preview);
    SDImageVec results;
--- a/examples/common/common.cpp
+++ b/examples/common/common.cpp
@ -351,7 +351,10 @@ ArgOptions SDContextParams::get_options() {
         "--lora-model-dir",
         "lora model directory",
         &lora_model_dir},
-
+        {"",
         "--hires-upscalers-dir",
         "highres fix upscaler model directory",
         &hires_upscalers_dir},
        {"",
         "--tensor-type-rules",
         "weight type per tensor pattern (example: \"^vae\\.=f16,model\\.=q8_0\")",
@ -649,6 +652,7 @@ std::string SDContextParams::to_string() const {
        << "  wtype: " << sd_type_name(wtype) << ",\n"
        << "  tensor_type_rules: \"" << tensor_type_rules << "\",\n"
        << "  lora_model_dir: \"" << lora_model_dir << "\",\n"
        << "  hires_upscalers_dir: \"" << hires_upscalers_dir << "\",\n"
        << "  photo_maker_path: \"" << photo_maker_path << "\",\n"
        << "  rng_type: " << sd_rng_type_name(rng_type) << ",\n"
        << "  sampler_rng_type: " << sd_rng_type_name(sampler_rng_type) << ",\n"
@ -777,6 +781,10 @@ ArgOptions SDGenerationParams::get_options() {
         "--pm-id-embed-path",
         "path to PHOTOMAKER v2 id embed",
         &pm_id_embed_path},
        {"",
         "--hires-upscaler",
         "highres fix upscaler, Latent (nearest) or a model name/path under --hires-upscalers-dir (default: Latent (nearest))",
         &hires_upscaler},
    };
    options.int_options = {
@ -826,6 +834,22 @@ ArgOptions SDGenerationParams::get_options() {
         "--upscale-tile-size",
         "tile size for ESRGAN upscaling (default: 128)",
         &upscale_tile_size},
        {"",
         "--hires-width",
         "highres fix target width, 0 to use --hires-scale (default: 0)",
         &hires_width},
        {"",
         "--hires-height",
         "highres fix target height, 0 to use --hires-scale (default: 0)",
         &hires_height},
        {"",
         "--hires-steps",
         "highres fix second pass sample steps, 0 to reuse --steps (default: 0)",
         &hires_steps},
        {"",
         "--hires-upscale-tile-size",
         "highres fix upscaler tile size, reserved for model-backed upscalers (default: 128)",
         &hires_upscale_tile_size},
    };
    options.float_options = {
@ -913,6 +937,14 @@ ArgOptions SDGenerationParams::get_options() {
         "--vae-tile-overlap",
         "tile overlap for vae tiling, in fraction of tile size (default: 0.5)",
         &vae_tiling_params.target_overlap},
        {"",
         "--hires-scale",
         "highres fix scale when target size is not set (default: 2.0)",
         &hires_scale},
        {"",
         "--hires-denoising-strength",
         "highres fix second pass denoising strength (default: 0.7)",
         &hires_denoising_strength},
    };
    options.bool_options = {
@ -936,6 +968,11 @@ ArgOptions SDGenerationParams::get_options() {
         "process vae in tiles to reduce memory usage",
         true,
         &vae_tiling_params.enabled},
        {"",
         "--hires",
         "enable highres fix",
         true,
         &hires_enabled},
    };
    auto on_seed_arg = [&](int argc, const char** argv, int index) {
@ -1424,6 +1461,37 @@ static bool parse_lora_json_field(const json& parent,
    return true;
 }
 static bool resolve_model_file_from_dir(const std::string& model_name,
                                        const std::string& model_dir,
                                        const std::vector<std::string>& valid_ext,
                                        const char* label,
                                        std::string& resolved_path) {
    if (model_dir.empty()) {
        LOG_ERROR("%s directory is empty", label);
        return false;
    }
    if (model_name.empty() ||
        model_name.find('/') != std::string::npos ||
        model_name.find('\\') != std::string::npos ||
        fs::path(model_name).has_root_path() ||
        fs::path(model_name).has_extension()) {
        LOG_ERROR("%s must be a model name without path or extension: %s", label, model_name.c_str());
        return false;
    }
    fs::path model_dir_path = model_dir;
    for (const auto& ext : valid_ext) {
        fs::path try_path = model_dir_path / (model_name + ext);
        if (fs::exists(try_path) && fs::is_regular_file(try_path)) {
            resolved_path = try_path.lexically_normal().string();
            return true;
        }
    }
    LOG_ERROR("can not find %s %s in %s", label, model_name.c_str(), model_dir_path.lexically_normal().string().c_str());
    return false;
 }
 bool SDGenerationParams::from_json_str(
    const std::string& json_str,
    const std::function<std::string(const std::string&)>& lora_path_resolver) {
@ -1487,6 +1555,34 @@ bool SDGenerationParams::from_json_str(
    load_if_exists("increase_ref_index", increase_ref_index);
    load_if_exists("embed_image_metadata", embed_image_metadata);
    if (j.contains("hires") && j["hires"].is_object()) {
        const json& hires_json = j["hires"];
        if (hires_json.contains("enabled") && hires_json["enabled"].is_boolean()) {
            hires_enabled = hires_json["enabled"];
        }
        if (hires_json.contains("upscaler") && hires_json["upscaler"].is_string()) {
            hires_upscaler = hires_json["upscaler"];
        }
        if (hires_json.contains("scale") && hires_json["scale"].is_number()) {
            hires_scale = hires_json["scale"];
        }
        if (hires_json.contains("target_width") && hires_json["target_width"].is_number_integer()) {
            hires_width = hires_json["target_width"];
        }
        if (hires_json.contains("target_height") && hires_json["target_height"].is_number_integer()) {
            hires_height = hires_json["target_height"];
        }
        if (hires_json.contains("steps") && hires_json["steps"].is_number_integer()) {
            hires_steps = hires_json["steps"];
        }
        if (hires_json.contains("denoising_strength") && hires_json["denoising_strength"].is_number()) {
            hires_denoising_strength = hires_json["denoising_strength"];
        }
        if (hires_json.contains("upscale_tile_size") && hires_json["upscale_tile_size"].is_number_integer()) {
            hires_upscale_tile_size = hires_json["upscale_tile_size"];
        }
    }
    auto parse_sample_params_json = [&](const json& sample_json,
                                        sd_sample_params_t& target_params,
                                        std::vector<int>& target_skip_layers,
@ -1800,7 +1896,7 @@ bool SDGenerationParams::initialize_cache_params() {
    return true;
 }
-bool SDGenerationParams::resolve(const std::string& lora_model_dir, bool strict) {
+bool SDGenerationParams::resolve(const std::string& lora_model_dir, const std::string& hires_upscalers_dir, bool strict) {
    if (high_noise_sample_params.sample_steps <= 0) {
        high_noise_sample_params.sample_steps = -1;
    }
@ -1819,6 +1915,27 @@ bool SDGenerationParams::resolve(const std::string& lora_model_dir, bool strict)
        sample_params.sample_steps = std::clamp(sample_params.sample_steps, 1, 100);
    }
    hires_upscaler_model_path.clear();
    if (hires_enabled) {
        if (hires_upscaler.empty()) {
            hires_upscaler = "Latent (nearest)";
        }
        resolved_hires_upscaler = str_to_sd_hires_upscaler(hires_upscaler.c_str());
        if (resolved_hires_upscaler == SD_HIRES_UPSCALER_NONE) {
            hires_enabled = false;
        } else if (resolved_hires_upscaler == SD_HIRES_UPSCALER_COUNT) {
            static const std::vector<std::string> valid_ext = {".gguf", ".safetensors", ".pt", ".pth"};
            if (!resolve_model_file_from_dir(hires_upscaler,
                                             hires_upscalers_dir,
                                             valid_ext,
                                             "hires upscaler",
                                             hires_upscaler_model_path)) {
                return false;
            }
            resolved_hires_upscaler = SD_HIRES_UPSCALER_MODEL;
        }
    }
    prompt_with_lora = prompt;
    if (!lora_model_dir.empty()) {
        extract_and_remove_lora(lora_model_dir);
@ -1883,6 +2000,29 @@ bool SDGenerationParams::validate(SDMode mode) {
        return false;
    }
    if (hires_enabled) {
        if (hires_width < 0 || hires_height < 0) {
            LOG_ERROR("error: hires target width and height must be >= 0");
            return false;
        }
        if (hires_scale <= 0.f && hires_width <= 0 && hires_height <= 0) {
            LOG_ERROR("error: hires scale must be positive when target size is not set");
            return false;
        }
        if (hires_steps < 0) {
            LOG_ERROR("error: hires steps must be >= 0");
            return false;
        }
        if (hires_denoising_strength <= 0.f || hires_denoising_strength > 1.f) {
            LOG_ERROR("error: hires denoising strength must be in (0.0, 1.0]");
            return false;
        }
        if (hires_upscale_tile_size < 1) {
            LOG_ERROR("error: hires upscale tile size must be positive");
            return false;
        }
    }
    if (mode == UPSCALE) {
        if (init_image_path.length() == 0) {
            LOG_ERROR("error: upscale mode needs an init image (--init-img)\n");
@ -1893,8 +2033,11 @@ bool SDGenerationParams::validate(SDMode mode) {
    return true;
 }
-bool SDGenerationParams::resolve_and_validate(SDMode mode, const std::string& lora_model_dir, bool strict) {
+bool SDGenerationParams::resolve_and_validate(SDMode mode,
-    if (!resolve(lora_model_dir, strict)) {
+                                              const std::string& lora_model_dir,
                                              const std::string& hires_upscalers_dir,
                                              bool strict) {
    if (!resolve(lora_model_dir, hires_upscalers_dir, strict)) {
        return false;
    }
    if (!validate(mode)) {
@ -1965,6 +2108,16 @@ sd_img_gen_params_t SDGenerationParams::to_sd_img_gen_params_t() {
    params.pm_params             = pm_params;
    params.vae_tiling_params     = vae_tiling_params;
    params.cache                 = cache_params;
    params.hires.enabled            = hires_enabled;
    params.hires.upscaler           = resolved_hires_upscaler;
    params.hires.model_path         = hires_upscaler_model_path.empty() ? nullptr : hires_upscaler_model_path.c_str();
    params.hires.scale              = hires_scale;
    params.hires.target_width       = hires_width;
    params.hires.target_height      = hires_height;
    params.hires.steps              = hires_steps;
    params.hires.denoising_strength = hires_denoising_strength;
    params.hires.upscale_tile_size  = hires_upscale_tile_size;
    return params;
 }
@ -2089,6 +2242,15 @@ std::string SDGenerationParams::to_string() const {
        << "  seed: " << seed << ",\n"
        << "  upscale_repeats: " << upscale_repeats << ",\n"
        << "  upscale_tile_size: " << upscale_tile_size << ",\n"
        << "  hires: { enabled: " << (hires_enabled ? "true" : "false")
        << ", upscaler: \"" << hires_upscaler << "\""
        << ", model_path: \"" << hires_upscaler_model_path << "\""
        << ", scale: " << hires_scale
        << ", target_width: " << hires_width
        << ", target_height: " << hires_height
        << ", steps: " << hires_steps
        << ", denoising_strength: " << hires_denoising_strength
        << ", upscale_tile_size: " << hires_upscale_tile_size << " },\n"
        << "  vae_tiling_params: { "
        << vae_tiling_params.enabled << ", "
        << vae_tiling_params.tile_size_x << ", "
@ -2162,6 +2324,13 @@ std::string get_image_params(const SDContextParams& ctx_params, const SDGenerati
    if (gen_params.clip_skip != -1) {
        parameter_string += "Clip skip: " + std::to_string(gen_params.clip_skip) + ", ";
    }
    if (gen_params.hires_enabled) {
        parameter_string += "Hires upscale: " + gen_params.hires_upscaler + ", ";
        parameter_string += "Hires scale: " + std::to_string(gen_params.hires_scale) + ", ";
        parameter_string += "Hires resize: " + std::to_string(gen_params.hires_width) + "x" + std::to_string(gen_params.hires_height) + ", ";
        parameter_string += "Hires steps: " + std::to_string(gen_params.hires_steps) + ", ";
        parameter_string += "Denoising strength: " + std::to_string(gen_params.hires_denoising_strength) + ", ";
    }
    parameter_string += "Version: stable-diffusion.cpp";
    return parameter_string;
 }
--- a/examples/common/common.h
+++ b/examples/common/common.h
@ -101,6 +101,7 @@ struct SDContextParams {
    sd_type_t wtype = SD_TYPE_COUNT;
    std::string tensor_type_rules;
    std::string lora_model_dir = ".";
    std::string hires_upscalers_dir;
    std::map<std::string, std::string> embedding_map;
    std::vector<sd_embedding_t> embedding_vec;
@ -190,12 +191,23 @@ struct SDGenerationParams {
    int upscale_repeats   = 1;
    int upscale_tile_size = 128;
    bool hires_enabled         = false;
    std::string hires_upscaler = "Latent (nearest)";
    std::string hires_upscaler_model_path;
    float hires_scale              = 2.f;
    int hires_width                = 0;
    int hires_height               = 0;
    int hires_steps                = 0;
    float hires_denoising_strength = 0.7f;
    int hires_upscale_tile_size    = 128;
    std::map<std::string, float> lora_map;
    std::map<std::string, float> high_noise_lora_map;
    // Derived and normalized fields.
    std::string prompt_with_lora;  // for metadata record only
    std::vector<sd_lora_t> lora_vec;
    sd_hires_upscaler_t resolved_hires_upscaler;
    // Owned execution payload.
    SDImageOwner init_image;
@ -225,9 +237,12 @@ struct SDGenerationParams {
    void set_width_and_height_if_unset(int w, int h);
    int get_resolved_width() const;
    int get_resolved_height() const;
-    bool resolve(const std::string& lora_model_dir, bool strict = false);
+    bool resolve(const std::string& lora_model_dir, const std::string& hires_upscalers_dir, bool strict = false);
    bool validate(SDMode mode);
-    bool resolve_and_validate(SDMode mode, const std::string& lora_model_dir, bool strict = false);
+    bool resolve_and_validate(SDMode mode,
                              const std::string& lora_model_dir,
                              const std::string& hires_upscalers_dir,
                              bool strict = false);
    sd_img_gen_params_t to_sd_img_gen_params_t();
    sd_vid_gen_params_t to_sd_vid_gen_params_t();
    std::string to_string() const;
--- a/examples/server/api.md
+++ b/examples/server/api.md
@ -38,6 +38,8 @@ Current generation-related endpoints include:
 - `POST /sdapi/v1/txt2img`
 - `POST /sdapi/v1/img2img`
 - `GET /sdapi/v1/loras`
 - `GET /sdapi/v1/upscalers`
 - `GET /sdapi/v1/latent-upscale-modes`
 - `GET /sdapi/v1/samplers`
 - `GET /sdapi/v1/schedulers`
 - `GET /sdapi/v1/sd-models`
@ -216,6 +218,13 @@ Currently supported request fields:
 | `scheduler` | `string` | Scheduler name |
 | `lora` | `array<object>` | Structured LoRA list |
 | `extra_images` | `array<string>` | Base64 or data URL images |
 | `enable_hr` | `boolean` | Enable highres fix for `txt2img` |
 | `hr_upscaler` | `string` | `Latent (nearest)` or an upscaler model name from `/sdapi/v1/upscalers` |
 | `hr_scale` | `number` | Highres scale when resize target is not set |
 | `hr_resize_x` | `integer` | Highres target width, `0` to use scale |
 | `hr_resize_y` | `integer` | Highres target height, `0` to use scale |
 | `hr_steps` | `integer` | Highres second-pass sample steps, `0` to reuse `steps` |
 | `denoising_strength` | `number` | Highres denoising strength for `txt2img` |
 Native extension fields:
@ -241,6 +250,8 @@ Currently supported request fields:
 | `inpainting_mask_invert` | `integer` or `boolean` | Treated as invert flag |
 | `denoising_strength` | `number` | Clamped to `0.0..1.0` |
 Highres fix fields are currently handled for `txt2img`; `img2img` uses `denoising_strength` as image-to-image strength.
 Native extension fields:
 - any `sdcpp API` fields embedded through `sd_cpp_extra_args` inside `prompt`
@ -258,6 +269,8 @@ Response fields:
 Currently exposed:
 - `GET /sdapi/v1/loras`
 - `GET /sdapi/v1/upscalers`
 - `GET /sdapi/v1/latent-upscale-modes`
 - `GET /sdapi/v1/samplers`
 - `GET /sdapi/v1/schedulers`
 - `GET /sdapi/v1/sd-models`
@ -272,6 +285,24 @@ Response fields:
 | `[].name` | `string` | Display name derived from file stem |
 | `[].path` | `string` | Relative path under the configured LoRA directory |
 `GET /sdapi/v1/upscalers`
 | Field | Type | Notes |
 | --- | --- | --- |
 | `[].name` | `string` | Built-in name or model stem |
 | `[].model_name` | `string \| null` | Model family label for model-backed upscalers |
 | `[].model_path` | `string \| null` | Absolute model path for model-backed upscalers |
 | `[].model_url` | `string \| null` | Currently always null |
 | `[].scale` | `integer` | Currently `4` |
 Built-in entries include `None`, `Lanczos`, and `Nearest`. Model-backed entries are scanned from the top level of `--hires-upscalers-dir`; subdirectories are not scanned.
 `GET /sdapi/v1/latent-upscale-modes`
 | Field | Type | Notes |
 | --- | --- | --- |
 | `[].name` | `string` | WebUI-compatible latent upscale mode name |
 `GET /sdapi/v1/samplers`
 | Field | Type | Notes |
@ -388,6 +419,7 @@ Top-level fields:
 | `samplers` | `array<string>` | Available sampling methods |
 | `schedulers` | `array<string>` | Available schedulers |
 | `loras` | `array<object>` | Available LoRA entries |
 | `upscalers` | `array<object>` | Available model-backed highres upscalers |
 | `limits` | `object` | Shared queue and size limits |
 `model`
@ -424,6 +456,14 @@ Shared nested fields:
 | `loras[].name` | `string` |
 | `loras[].path` | `string` |
 `upscalers`
 | Field | Type | Notes |
 | --- | --- | --- |
 | `upscalers[].name` | `string` | Built-in name or model stem; use this value in `hires.upscaler` |
 Built-in entries include `None` and `Latent (nearest)`. Model-backed entries are scanned from the top level of `--hires-upscalers-dir`; subdirectories are not scanned.
 `limits`
 | Field | Type |
@ -482,6 +522,15 @@ Shared default fields used by both `img_gen` and `vid_gen`:
 | `auto_resize_ref_image` | `boolean` |
 | `increase_ref_index` | `boolean` |
 | `control_strength` | `number` |
 | `hires` | `object` |
 | `hires.enabled` | `boolean` |
 | `hires.upscaler` | `string` |
 | `hires.scale` | `number` |
 | `hires.target_width` | `integer` |
 | `hires.target_height` | `integer` |
 | `hires.steps` | `integer` |
 | `hires.denoising_strength` | `number` |
 | `hires.upscale_tile_size` | `integer` |
 `vid_gen`-specific default fields:
@ -514,6 +563,7 @@ Fields returned in `features_by_mode.img_gen`:
 - `ref_images`
 - `lora`
 - `vae_tiling`
 - `hires`
 - `cache`
 - `cancel_queued`
 - `cancel_generating`
@ -625,6 +675,16 @@ Example:
  },
  "lora": [],
  "hires": {
    "enabled": false,
    "upscaler": "Latent (nearest)",
    "scale": 2.0,
    "target_width": 0,
    "target_height": 0,
    "steps": 0,
    "denoising_strength": 0.7,
    "upscale_tile_size": 128
  },
  "vae_tiling_params": {
    "enabled": false,
@ -729,12 +789,23 @@ Other native fields:
 | Field | Type |
 | --- | --- |
 | `hires` | `object` |
 | `hires.enabled` | `boolean` |
 | `hires.upscaler` | `string` |
 | `hires.scale` | `number` |
 | `hires.target_width` | `integer` |
 | `hires.target_height` | `integer` |
 | `hires.steps` | `integer` |
 | `hires.denoising_strength` | `number` |
 | `hires.upscale_tile_size` | `integer` |
 | `vae_tiling_params` | `object` |
 | `cache_mode` | `string` |
 | `cache_option` | `string` |
 | `scm_mask` | `string` |
 | `scm_policy_dynamic` | `boolean` |
 For `hires.upscaler`, use `Latent (nearest)` for latent upscale or an `upscalers[].name` value from `GET /sdcpp/v1/capabilities`. Model-backed upscalers are resolved as `--hires-upscalers-dir / (name + ext)` and must live directly in that directory.
 HTTP-only output fields:
 | Field | Type |
--- a/examples/server/main.cpp
+++ b/examples/server/main.cpp
@ -48,7 +48,9 @@ static void parse_args(int argc,
    if (!svr_params.resolve_and_validate() ||
        !ctx_params.resolve_and_validate(IMG_GEN) ||
-        !default_gen_params.resolve_and_validate(IMG_GEN, ctx_params.lora_model_dir)) {
+        !default_gen_params.resolve_and_validate(IMG_GEN,
                                                 ctx_params.lora_model_dir,
                                                 ctx_params.hires_upscalers_dir)) {
        print_usage(argv[0], options_vec);
        exit(1);
    }
@ -95,6 +97,8 @@ int main(int argc, const char** argv) {
    std::vector<LoraEntry> lora_cache;
    std::mutex lora_mutex;
    std::vector<UpscalerEntry> upscaler_cache;
    std::mutex upscaler_mutex;
    AsyncJobManager async_job_manager;
    ServerRuntime runtime = {
        sd_ctx.get(),
@ -104,6 +108,8 @@ int main(int argc, const char** argv) {
        &default_gen_params,
        &lora_cache,
        &lora_mutex,
        &upscaler_cache,
        &upscaler_mutex,
        &async_job_manager,
    };
--- a/examples/server/routes_openai.cpp
+++ b/examples/server/routes_openai.cpp
@ -70,7 +70,7 @@ static bool build_openai_generation_request(const httplib::Request& req,
    }
    // Intentionally disable prompt-embedded LoRA tag parsing for server APIs.
-    if (!request.gen_params.resolve_and_validate(IMG_GEN, "", true)) {
+    if (!request.gen_params.resolve_and_validate(IMG_GEN, "", runtime.ctx_params->hires_upscalers_dir, true)) {
        error_message = "invalid params";
        return false;
    }
@ -212,7 +212,7 @@ static bool build_openai_edit_request(const httplib::Request& req,
    }
    // Intentionally disable prompt-embedded LoRA tag parsing for server APIs.
-    if (!request.gen_params.resolve_and_validate(IMG_GEN, "", true)) {
+    if (!request.gen_params.resolve_and_validate(IMG_GEN, "", runtime.ctx_params->hires_upscalers_dir, true)) {
        error_message = "invalid params";
        return false;
    }
--- a/examples/server/routes_sdapi.cpp
+++ b/examples/server/routes_sdapi.cpp
@ -1,6 +1,7 @@
 #include "routes.h"
 #include <algorithm>
 #include <cctype>
 #include <cstring>
 #include <regex>
 #include <string_view>
@ -35,14 +36,20 @@ static fs::path resolve_display_model_path(const ServerRuntime& runtime) {
    return {};
 }
 static std::string lower_ascii(std::string value) {
    std::transform(value.begin(), value.end(), value.begin(), [](unsigned char c) {
        return static_cast<char>(std::tolower(c));
    });
    return value;
 }
 static enum sample_method_t get_sdapi_sample_method(std::string name) {
    enum sample_method_t result = str_to_sample_method(name.c_str());
    if (result != SAMPLE_METHOD_COUNT) {
        return result;
    }
-    std::transform(name.begin(), name.end(), name.begin(),
+    name = lower_ascii(name);
                   [](unsigned char c) { return static_cast<char>(std::tolower(c)); });
    static const std::unordered_map<std::string_view, sample_method_t> hardcoded{
        {"euler a", EULER_A_SAMPLE_METHOD},
        {"k_euler_a", EULER_A_SAMPLE_METHOD},
@ -114,6 +121,18 @@ static bool build_sdapi_img_gen_request(const json& j,
    request.gen_params.width                          = j.value("width", -1);
    request.gen_params.height                         = j.value("height", -1);
    if (!img2img && j.value("enable_hr", false)) {
        request.gen_params.hires_enabled = true;
        request.gen_params.hires_scale   = j.value("hr_scale", request.gen_params.hires_scale);
        request.gen_params.hires_width   = j.value("hr_resize_x", request.gen_params.hires_width);
        request.gen_params.hires_height  = j.value("hr_resize_y", request.gen_params.hires_height);
        request.gen_params.hires_steps   = j.value("hr_steps", request.gen_params.hires_steps);
        request.gen_params.hires_denoising_strength =
            j.value("denoising_strength", request.gen_params.hires_denoising_strength);
        request.gen_params.hires_upscaler = j.value("hr_upscaler", request.gen_params.hires_upscaler);
    }
    std::string sd_cpp_extra_args_str = extract_and_remove_sd_cpp_extra_args(request.gen_params.prompt);
    if (!sd_cpp_extra_args_str.empty() && !request.gen_params.from_json_str(sd_cpp_extra_args_str)) {
        error_message = "invalid sd_cpp_extra_args";
@ -228,7 +247,7 @@ static bool build_sdapi_img_gen_request(const json& j,
    }
    // Intentionally disable prompt-embedded LoRA tag parsing for server APIs.
-    if (!request.gen_params.resolve_and_validate(IMG_GEN, "", true)) {
+    if (!request.gen_params.resolve_and_validate(IMG_GEN, "", runtime.ctx_params->hires_upscalers_dir, true)) {
        error_message = "invalid params";
        return false;
    }
@ -347,6 +366,45 @@ void register_sdapi_endpoints(httplib::Server& svr, ServerRuntime& rt) {
        res.set_content(result.dump(), "application/json");
    });
    svr.Get("/sdapi/v1/upscalers", [runtime](const httplib::Request&, httplib::Response& res) {
        refresh_upscaler_cache(*runtime);
        auto make_builtin = [](const char* name) {
            json item;
            item["name"]       = name;
            item["model_name"] = nullptr;
            item["model_path"] = nullptr;
            item["model_url"]  = nullptr;
            item["scale"]      = 4;
            return item;
        };
        json result = json::array();
        result.push_back(make_builtin("None"));
        {
            std::lock_guard<std::mutex> lock(*runtime->upscaler_mutex);
            for (const auto& e : *runtime->upscaler_cache) {
                json item;
                item["name"]       = e.name;
                item["model_name"] = e.model_name;
                item["model_path"] = e.fullpath;
                item["model_url"]  = nullptr;
                item["scale"]      = e.scale;
                result.push_back(item);
            }
        }
        res.set_content(result.dump(), "application/json");
    });
    svr.Get("/sdapi/v1/latent-upscale-modes", [](const httplib::Request&, httplib::Response& res) {
        json result = json::array({
            {{"name", "Latent (nearest)"}},
        });
        res.set_content(result.dump(), "application/json");
    });
    svr.Get("/sdapi/v1/samplers", [runtime](const httplib::Request&, httplib::Response& res) {
        std::vector<std::string> sampler_names;
        sampler_names.push_back("default");
--- a/examples/server/routes_sdcpp.cpp
+++ b/examples/server/routes_sdcpp.cpp
@ -114,6 +114,17 @@ static json make_img_gen_defaults_json(const SDGenerationParams& defaults, const
        {"increase_ref_index", defaults.increase_ref_index},
        {"control_strength", defaults.control_strength},
        {"sample_params", make_sample_params_json(defaults.sample_params, defaults.skip_layers)},
        {"hires",
         {
             {"enabled", defaults.hires_enabled},
             {"upscaler", defaults.hires_upscaler},
             {"scale", defaults.hires_scale},
             {"target_width", defaults.hires_width},
             {"target_height", defaults.hires_height},
             {"steps", defaults.hires_steps},
             {"denoising_strength", defaults.hires_denoising_strength},
             {"upscale_tile_size", defaults.hires_upscale_tile_size},
         }},
        {"vae_tiling_params", make_vae_tiling_json(defaults.vae_tiling_params)},
        {"cache_mode", defaults.cache_mode},
        {"cache_option", defaults.cache_option},
@ -157,6 +168,7 @@ static json make_img_gen_features_json() {
        {"ref_images", true},
        {"lora", true},
        {"vae_tiling", true},
        {"hires", true},
        {"cache", true},
        {"cancel_queued", true},
        {"cancel_generating", false},
@ -179,6 +191,7 @@ static json make_vid_gen_features_json() {
 static json make_capabilities_json(ServerRuntime& runtime) {
    refresh_lora_cache(runtime);
    refresh_upscaler_cache(runtime);
    AsyncJobManager& manager  = *runtime.async_job_manager;
    const auto& defaults      = *runtime.default_gen_params;
@ -190,6 +203,7 @@ static json make_capabilities_json(ServerRuntime& runtime) {
    json image_output_formats = supported_img_output_formats();
    json video_output_formats = supported_vid_output_formats();
    json available_loras      = json::array();
    json available_upscalers  = json::array();
    json supported_modes      = json::array();
    for (int i = 0; i < SAMPLE_METHOD_COUNT; ++i) {
@ -210,6 +224,21 @@ static json make_capabilities_json(ServerRuntime& runtime) {
        }
    }
    available_upscalers.push_back({
        {"name", "None"},
    });
    available_upscalers.push_back({
        {"name", "Latent (nearest)"},
    });
    {
        std::lock_guard<std::mutex> lock(*runtime.upscaler_mutex);
        for (const auto& entry : *runtime.upscaler_cache) {
            available_upscalers.push_back({
                {"name", entry.name},
            });
        }
    }
    if (supports_img) {
        supported_modes.push_back("img_gen");
    }
@ -284,6 +313,7 @@ static json make_capabilities_json(ServerRuntime& runtime) {
    result["features"]               = top_level_features;
    result["features_by_mode"]       = features_by_mode;
    result["loras"]                  = available_loras;
    result["upscalers"]              = available_upscalers;
    return result;
 }
@ -307,7 +337,7 @@ static bool parse_img_gen_request(const json& body,
        return false;
    }
    // Intentionally disable prompt-embedded LoRA tag parsing for server APIs.
-    if (!request.gen_params.resolve_and_validate(IMG_GEN, "", true)) {
+    if (!request.gen_params.resolve_and_validate(IMG_GEN, "", runtime.ctx_params->hires_upscalers_dir, true)) {
        error_message = "invalid generation parameters";
        return false;
    }
@ -334,7 +364,7 @@ static bool parse_vid_gen_request(const json& body,
        return false;
    }
    // Intentionally disable prompt-embedded LoRA tag parsing for server APIs.
-    if (!request.gen_params.resolve_and_validate(VID_GEN, "", true)) {
+    if (!request.gen_params.resolve_and_validate(VID_GEN, "", runtime.ctx_params->hires_upscalers_dir, true)) {
        error_message = "invalid generation parameters";
        return false;
    }
--- a/examples/server/runtime.cpp
+++ b/examples/server/runtime.cpp
@ -1,6 +1,7 @@
 #include "runtime.h"
 #include <algorithm>
 #include <cctype>
 #include <chrono>
 #include <cstdlib>
 #include <filesystem>
@ -13,6 +14,18 @@
 namespace fs = std::filesystem;
 static std::string lower_ascii(std::string value) {
    std::transform(value.begin(), value.end(), value.begin(), [](unsigned char c) {
        return static_cast<char>(std::tolower(c));
    });
    return value;
 }
 static bool is_supported_model_ext(const fs::path& p) {
    auto ext = lower_ascii(p.extension().string());
    return ext == ".gguf" || ext == ".pt" || ext == ".pth" || ext == ".safetensors";
 }
 static const std::string k_base64_chars =
    "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
    "abcdefghijklmnopqrstuvwxyz"
@ -241,20 +254,12 @@ void refresh_lora_cache(ServerRuntime& rt) {
    fs::path lora_dir = rt.ctx_params->lora_model_dir;
    if (fs::exists(lora_dir) && fs::is_directory(lora_dir)) {
        auto is_lora_ext = [](const fs::path& p) {
            auto ext = p.extension().string();
            std::transform(ext.begin(), ext.end(), ext.begin(), [](unsigned char c) {
                return static_cast<char>(std::tolower(c));
            });
            return ext == ".gguf" || ext == ".pt" || ext == ".pth" || ext == ".safetensors";
        };
        for (auto& entry : fs::recursive_directory_iterator(lora_dir)) {
            if (!entry.is_regular_file()) {
                continue;
            }
            const fs::path& p = entry.path();
-            if (!is_lora_ext(p)) {
+            if (!is_supported_model_ext(p)) {
                continue;
            }
@ -286,6 +291,40 @@ std::string get_lora_full_path(ServerRuntime& rt, const std::string& path) {
    return it != rt.lora_cache->end() ? it->fullpath : "";
 }
 void refresh_upscaler_cache(ServerRuntime& rt) {
    std::vector<UpscalerEntry> new_cache;
    fs::path upscaler_dir = rt.ctx_params->hires_upscalers_dir;
    if (fs::exists(upscaler_dir) && fs::is_directory(upscaler_dir)) {
        for (auto& entry : fs::directory_iterator(upscaler_dir)) {
            if (!entry.is_regular_file()) {
                continue;
            }
            const fs::path& p = entry.path();
            if (!is_supported_model_ext(p)) {
                continue;
            }
            UpscalerEntry upscaler_entry;
            upscaler_entry.name       = p.stem().u8string();
            upscaler_entry.fullpath   = fs::absolute(p).lexically_normal().u8string();
            upscaler_entry.model_name = "ESRGAN_4x";
            upscaler_entry.path       = p.filename().u8string();
            new_cache.push_back(std::move(upscaler_entry));
        }
    }
    std::sort(new_cache.begin(), new_cache.end(), [](const UpscalerEntry& a, const UpscalerEntry& b) {
        return a.name < b.name;
    });
    {
        std::lock_guard<std::mutex> lock(*rt.upscaler_mutex);
        *rt.upscaler_cache = std::move(new_cache);
    }
 }
 int64_t unix_timestamp_now() {
    return std::chrono::duration_cast<std::chrono::seconds>(
               std::chrono::system_clock::now().time_since_epoch())
--- a/examples/server/runtime.h
+++ b/examples/server/runtime.h
@ -37,6 +37,14 @@ struct LoraEntry {
    std::string fullpath;
 };
 struct UpscalerEntry {
    std::string name;
    std::string path;
    std::string fullpath;
    std::string model_name;
    int scale = 4;
 };
 struct ServerRuntime {
    sd_ctx_t* sd_ctx;
    std::mutex* sd_ctx_mutex;
@ -45,6 +53,8 @@ struct ServerRuntime {
    const SDGenerationParams* default_gen_params;
    std::vector<LoraEntry>* lora_cache;
    std::mutex* lora_mutex;
    std::vector<UpscalerEntry>* upscaler_cache;
    std::mutex* upscaler_mutex;
    AsyncJobManager* async_job_manager;
 };
@ -86,4 +96,5 @@ bool runtime_supports_generation_mode(const ServerRuntime& runtime, SDMode mode)
 std::string unsupported_generation_mode_error(SDMode mode);
 void refresh_lora_cache(ServerRuntime& rt);
 std::string get_lora_full_path(ServerRuntime& rt, const std::string& path);
 void refresh_upscaler_cache(ServerRuntime& rt);
 int64_t unix_timestamp_now();
--- a/include/stable-diffusion.h
+++ b/include/stable-diffusion.h
@ -289,6 +289,25 @@ typedef struct {
    const char* path;
 } sd_lora_t;
 enum sd_hires_upscaler_t {
    SD_HIRES_UPSCALER_NONE,
    SD_HIRES_UPSCALER_LATENT_NEAREST,
    SD_HIRES_UPSCALER_MODEL,
    SD_HIRES_UPSCALER_COUNT,
 };
 typedef struct {
    bool enabled;
    enum sd_hires_upscaler_t upscaler;
    const char* model_path;
    float scale;
    int target_width;
    int target_height;
    int steps;
    float denoising_strength;
    int upscale_tile_size;
 } sd_hires_params_t;
 typedef struct {
    const sd_lora_t* loras;
    uint32_t lora_count;
@ -312,6 +331,7 @@ typedef struct {
    sd_pm_params_t pm_params;
    sd_tiling_params_t vae_tiling_params;
    sd_cache_params_t cache;
    sd_hires_params_t hires;
 } sd_img_gen_params_t;
 typedef struct {
@ -365,8 +385,11 @@ SD_API const char* sd_preview_name(enum preview_t preview);
 SD_API enum preview_t str_to_preview(const char* str);
 SD_API const char* sd_lora_apply_mode_name(enum lora_apply_mode_t mode);
 SD_API enum lora_apply_mode_t str_to_lora_apply_mode(const char* str);
 SD_API const char* sd_hires_upscaler_name(enum sd_hires_upscaler_t upscaler);
 SD_API enum sd_hires_upscaler_t str_to_sd_hires_upscaler(const char* str);
 SD_API void sd_cache_params_init(sd_cache_params_t* cache_params);
 SD_API void sd_hires_params_init(sd_hires_params_t* hires_params);
 SD_API void sd_ctx_params_init(sd_ctx_params_t* sd_ctx_params);
 SD_API char* sd_ctx_params_to_str(const sd_ctx_params_t* sd_ctx_params);
--- a/src/stable-diffusion.cpp
+++ b/src/stable-diffusion.cpp
@ -17,6 +17,7 @@
 #include "pmid.hpp"
 #include "sample-cache.h"
 #include "tae.hpp"
 #include "upscaler.h"
 #include "vae.hpp"
 #include "latent-preview.h"
@ -2113,6 +2114,28 @@ enum lora_apply_mode_t str_to_lora_apply_mode(const char* str) {
    return LORA_APPLY_MODE_COUNT;
 }
 const char* hires_upscaler_to_str[] = {
    "None",
    "Latent (nearest)",
    "Model",
 };
 const char* sd_hires_upscaler_name(enum sd_hires_upscaler_t upscaler) {
    if (upscaler < SD_HIRES_UPSCALER_COUNT) {
        return hires_upscaler_to_str[upscaler];
    }
    return NONE_STR;
 }
 enum sd_hires_upscaler_t str_to_sd_hires_upscaler(const char* str) {
    for (int i = 0; i < SD_HIRES_UPSCALER_COUNT; i++) {
        if (!strcmp(str, hires_upscaler_to_str[i])) {
            return (enum sd_hires_upscaler_t)i;
        }
    }
    return SD_HIRES_UPSCALER_COUNT;
 }
 void sd_cache_params_init(sd_cache_params_t* cache_params) {
    *cache_params                             = {};
    cache_params->mode                        = SD_CACHE_DISABLED;
@ -2141,6 +2164,19 @@ void sd_cache_params_init(sd_cache_params_t* cache_params) {
    cache_params->spectrum_stop_percent       = 0.9f;
 }
 void sd_hires_params_init(sd_hires_params_t* hires_params) {
    *hires_params                    = {};
    hires_params->enabled            = false;
    hires_params->upscaler           = SD_HIRES_UPSCALER_LATENT_NEAREST;
    hires_params->model_path         = nullptr;
    hires_params->scale              = 2.0f;
    hires_params->target_width       = 0;
    hires_params->target_height      = 0;
    hires_params->steps              = 0;
    hires_params->denoising_strength = 0.7f;
    hires_params->upscale_tile_size  = 128;
 }
 void sd_ctx_params_init(sd_ctx_params_t* sd_ctx_params) {
    *sd_ctx_params                         = {};
    sd_ctx_params->vae_decode_only         = true;
@ -2310,6 +2346,7 @@ void sd_img_gen_params_init(sd_img_gen_params_t* sd_img_gen_params) {
    sd_img_gen_params->pm_params         = {nullptr, 0, nullptr, 20.f};
    sd_img_gen_params->vae_tiling_params = {false, 0, 0, 0.5f, 0.0f, 0.0f};
    sd_cache_params_init(&sd_img_gen_params->cache);
    sd_hires_params_init(&sd_img_gen_params->hires);
 }
 char* sd_img_gen_params_to_str(const sd_img_gen_params_t* sd_img_gen_params) {
@ -2336,7 +2373,8 @@ char* sd_img_gen_params_to_str(const sd_img_gen_params_t* sd_img_gen_params) {
             "increase_ref_index: %s\n"
             "control_strength: %.2f\n"
             "photo maker: {style_strength = %.2f, id_images_count = %d, id_embed_path = %s}\n"
-             "VAE tiling: %s\n",
+             "VAE tiling: %s\n"
             "hires: {enabled=%s, upscaler=%s, model_path=%s, scale=%.2f, target=%dx%d, steps=%d, denoising_strength=%.2f}\n",
             SAFE_STR(sd_img_gen_params->prompt),
             SAFE_STR(sd_img_gen_params->negative_prompt),
             sd_img_gen_params->clip_skip,
@ -2353,7 +2391,15 @@ char* sd_img_gen_params_to_str(const sd_img_gen_params_t* sd_img_gen_params) {
             sd_img_gen_params->pm_params.style_strength,
             sd_img_gen_params->pm_params.id_images_count,
             SAFE_STR(sd_img_gen_params->pm_params.id_embed_path),
-             BOOL_STR(sd_img_gen_params->vae_tiling_params.enabled));
+             BOOL_STR(sd_img_gen_params->vae_tiling_params.enabled),
             BOOL_STR(sd_img_gen_params->hires.enabled),
             sd_hires_upscaler_name(sd_img_gen_params->hires.upscaler),
             SAFE_STR(sd_img_gen_params->hires.model_path),
             sd_img_gen_params->hires.scale,
             sd_img_gen_params->hires.target_width,
             sd_img_gen_params->hires.target_height,
             sd_img_gen_params->hires.steps,
             sd_img_gen_params->hires.denoising_strength);
    const char* cache_mode_str = "disabled";
    if (sd_img_gen_params->cache.mode == SD_CACHE_EASYCACHE) {
        cache_mode_str = "easycache";
@ -2534,6 +2580,7 @@ struct GenerationRequest {
    sd_guidance_params_t guidance            = {};
    sd_guidance_params_t high_noise_guidance = {};
    sd_pm_params_t pm_params                 = {};
    sd_hires_params_t hires                  = {};
    int frames                               = -1;
    float vace_strength                      = 1.f;
@ -2555,6 +2602,7 @@ struct GenerationRequest {
        auto_resize_ref_image       = sd_img_gen_params->auto_resize_ref_image;
        guidance                    = sd_img_gen_params->sample_params.guidance;
        pm_params                   = sd_img_gen_params->pm_params;
        hires                       = sd_img_gen_params->hires;
        cache_params                = &sd_img_gen_params->cache;
        resolve(sd_ctx);
    }
@ -2577,26 +2625,76 @@ struct GenerationRequest {
    }
    void align_generation_request_size() {
        align_image_size(&width, &height, "generation request");
    }
    void align_image_size(int* target_width, int* target_height, const char* label) {
        int spatial_multiple = vae_scale_factor * diffusion_model_down_factor;
-        int width_offset     = align_up_offset(width, spatial_multiple);
+        int width_offset     = align_up_offset(*target_width, spatial_multiple);
-        int height_offset    = align_up_offset(height, spatial_multiple);
+        int height_offset    = align_up_offset(*target_height, spatial_multiple);
        if (width_offset <= 0 && height_offset <= 0) {
            return;
        }
-        int original_width  = width;
+        int original_width  = *target_width;
-        int original_height = height;
+        int original_height = *target_height;
-        width += width_offset;
+        *target_width += width_offset;
-        height += height_offset;
+        *target_height += height_offset;
-        LOG_WARN("align up %dx%d to %dx%d (multiple=%d)",
+        LOG_WARN("align %s up %dx%d to %dx%d (multiple=%d)",
                 label,
                 original_width,
                 original_height,
-                 width,
+                 *target_width,
-                 height,
+                 *target_height,
                 spatial_multiple);
    }
    void resolve_hires() {
        if (!hires.enabled) {
            return;
        }
        if (hires.upscaler == SD_HIRES_UPSCALER_NONE) {
            hires.enabled = false;
            return;
        }
        if (hires.upscaler < SD_HIRES_UPSCALER_NONE && hires.upscaler >= SD_HIRES_UPSCALER_COUNT) {
            LOG_WARN("hires upscaler '%d' is invalid, disabling hires", hires.upscaler);
            hires.enabled = false;
            return;
        }
        if (hires.upscaler == SD_HIRES_UPSCALER_MODEL && strlen(SAFE_STR(hires.model_path)) == 0) {
            LOG_WARN("hires model upscaler requires a model path, disabling hires");
            hires.enabled = false;
            return;
        }
        if (hires.scale <= 0.f && hires.target_width <= 0 && hires.target_height <= 0) {
            LOG_WARN("hires scale must be positive when no target size is set, disabling hires");
            hires.enabled = false;
            return;
        }
        hires.denoising_strength = std::clamp(hires.denoising_strength, 0.0001f, 1.f);
        hires.steps              = std::max(0, hires.steps);
        if (hires.target_width > 0 && hires.target_height > 0) {
            // pass
        } else if (hires.target_width > 0) {
            hires.target_height = hires.target_width;
        } else if (hires.target_height > 0) {
            hires.target_width = hires.target_height;
        } else {
            hires.target_width  = static_cast<int>(std::round(width * hires.scale));
            hires.target_height = static_cast<int>(std::round(height * hires.scale));
        }
        if (hires.target_width <= 0 || hires.target_height <= 0) {
            LOG_WARN("hires target size is not positive, disabling hires");
            hires.enabled = false;
            return;
        }
        align_image_size(&hires.target_width, &hires.target_height, "hires target");
    }
    static void resolve_guidance(sd_ctx_t* sd_ctx,
                                 sd_guidance_params_t* guidance,
                                 bool* use_uncond,
@ -2637,6 +2735,7 @@ struct GenerationRequest {
    void resolve(sd_ctx_t* sd_ctx) {
        align_generation_request_size();
        resolve_hires();
        seed = resolve_seed(seed);
        resolve_guidance(sd_ctx, &guidance, &use_uncond, &use_img_cond);
@ -3149,6 +3248,67 @@ static sd_image_t* decode_image_outputs(sd_ctx_t* sd_ctx,
    return result_images;
 }
 static sd::Tensor<float> upscale_hires_latent(sd_ctx_t* sd_ctx,
                                              const sd::Tensor<float>& latent,
                                              const GenerationRequest& request,
                                              UpscalerGGML* upscaler) {
    if (request.hires.upscaler == SD_HIRES_UPSCALER_LATENT_NEAREST) {
        std::vector<int64_t> target_shape = latent.shape();
        if (target_shape.size() < 2) {
            LOG_ERROR("latent has invalid shape for hires upscale");
            return {};
        }
        target_shape[0] = request.hires.target_width / request.vae_scale_factor;
        target_shape[1] = request.hires.target_height / request.vae_scale_factor;
        LOG_INFO("hires latent upscale %" PRId64 "x%" PRId64 " -> %" PRId64 "x%" PRId64,
                 latent.shape()[0],
                 latent.shape()[1],
                 target_shape[0],
                 target_shape[1]);
        return sd::ops::interpolate(latent, target_shape, sd::ops::InterpolateMode::Nearest);
    } else if (request.hires.upscaler == SD_HIRES_UPSCALER_MODEL) {
        if (upscaler == nullptr) {
            LOG_ERROR("hires model upscaler context is null");
            return {};
        }
        if (sd_ctx->sd->vae_decode_only) {
            LOG_ERROR("hires model upscaler requires VAE encoder weights; create the context with vae_decode_only=false");
            return {};
        }
        sd::Tensor<float> decoded = sd_ctx->sd->decode_first_stage(latent);
        if (decoded.empty()) {
            LOG_ERROR("decode_first_stage failed before hires model upscale");
            return {};
        }
        sd::Tensor<float> upscaled_tensor = upscaler->upscale_tensor(decoded);
        if (upscaled_tensor.empty()) {
            LOG_ERROR("hires model upscale failed");
            return {};
        }
        if (upscaled_tensor.shape()[0] != request.hires.target_width ||
            upscaled_tensor.shape()[1] != request.hires.target_height) {
            upscaled_tensor = sd::ops::interpolate(upscaled_tensor,
                                                   {request.hires.target_width,
                                                    request.hires.target_height,
                                                    upscaled_tensor.shape()[2],
                                                    upscaled_tensor.shape()[3]});
        }
        sd::Tensor<float> upscaled_latent = sd_ctx->sd->encode_first_stage(upscaled_tensor);
        if (upscaled_latent.empty()) {
            LOG_ERROR("encode_first_stage failed after hires model upscale");
        }
        return upscaled_latent;
    }
    LOG_ERROR("unsupported hires upscaler '%s'", sd_hires_upscaler_name(request.hires.upscaler));
    return {};
 }
 SD_API sd_image_t* generate_image(sd_ctx_t* sd_ctx, const sd_img_gen_params_t* sd_img_gen_params) {
    if (sd_ctx == nullptr || sd_img_gen_params == nullptr) {
        return nullptr;
@ -3236,7 +3396,7 @@ SD_API sd_image_t* generate_image(sd_ctx_t* sd_ctx, const sd_img_gen_params_t* s
        }
        return nullptr;
    }
-    if (sd_ctx->sd->free_params_immediately) {
+    if (sd_ctx->sd->free_params_immediately && !request.hires.enabled) {
        sd_ctx->sd->diffusion_model->free_params_buffer();
    }
    int64_t denoise_end = ggml_time_ms();
@ -3244,6 +3404,131 @@ SD_API sd_image_t* generate_image(sd_ctx_t* sd_ctx, const sd_img_gen_params_t* s
             final_latents.size(),
             (denoise_end - denoise_start) * 1.0f / 1000);
    if (request.hires.enabled && request.hires.target_width > 0) {
        LOG_INFO("hires fix: upscaling to %dx%d", request.hires.target_width, request.hires.target_height);
        std::unique_ptr<UpscalerGGML> hires_upscaler;
        if (request.hires.upscaler == SD_HIRES_UPSCALER_MODEL) {
            LOG_INFO("hires fix: loading model upscaler from '%s'", request.hires.model_path);
            hires_upscaler = std::make_unique<UpscalerGGML>(sd_ctx->sd->n_threads,
                                                            false,
                                                            request.hires.upscale_tile_size);
            if (!hires_upscaler->load_from_file(request.hires.model_path,
                                                sd_ctx->sd->offload_params_to_cpu,
                                                sd_ctx->sd->n_threads)) {
                LOG_ERROR("load hires model upscaler failed");
                if (sd_ctx->sd->free_params_immediately) {
                    sd_ctx->sd->diffusion_model->free_params_buffer();
                }
                return nullptr;
            }
        }
        int hires_steps = request.hires.steps > 0 ? request.hires.steps : plan.sample_steps;
        // sd-webui behavior: scale up total steps so trimming by denoising_strength yields exactly hires_steps effective steps,
        // unlike img2img which trims from a fixed step count
        hires_steps = static_cast<int>(hires_steps / request.hires.denoising_strength);
        std::vector<float> hires_sigmas = sd_ctx->sd->denoiser->get_sigmas(
            hires_steps,
            sd_ctx->sd->get_image_seq_len(request.hires.target_height, request.hires.target_width),
            sd_img_gen_params->sample_params.scheduler,
            sd_ctx->sd->version);
        size_t t_enc = static_cast<size_t>(hires_steps * request.hires.denoising_strength);
        if (t_enc >= static_cast<size_t>(hires_steps)) {
            t_enc = static_cast<size_t>(hires_steps) - 1;
        }
        std::vector<float> hires_sigma_sched(hires_sigmas.begin() + hires_steps - static_cast<int>(t_enc) - 1,
                                             hires_sigmas.end());
        LOG_INFO("hires fix: %d steps, denoising_strength=%.2f, sigma_sched_size=%zu",
                 hires_steps,
                 request.hires.denoising_strength,
                 hires_sigma_sched.size());
        std::vector<sd::Tensor<float>> hires_final_latents;
        int64_t hires_denoise_start = ggml_time_ms();
        for (int b = 0; b < (int)final_latents.size(); b++) {
            int64_t cur_seed = request.seed + b;
            sd_ctx->sd->rng->manual_seed(cur_seed);
            sd_ctx->sd->sampler_rng->manual_seed(cur_seed);
            sd::Tensor<float> upscaled = upscale_hires_latent(sd_ctx,
                                                              final_latents[b],
                                                              request,
                                                              hires_upscaler.get());
            if (upscaled.empty()) {
                if (sd_ctx->sd->free_params_immediately) {
                    sd_ctx->sd->diffusion_model->free_params_buffer();
                }
                return nullptr;
            }
            sd::Tensor<float> noise = sd::randn_like<float>(upscaled, sd_ctx->sd->rng);
            sd::Tensor<float> hires_denoise_mask;
            if (!latents.denoise_mask.empty()) {
                std::vector<int64_t> mask_shape = latents.denoise_mask.shape();
                mask_shape[0]                   = upscaled.shape()[0];
                mask_shape[1]                   = upscaled.shape()[1];
                hires_denoise_mask              = sd::ops::interpolate(latents.denoise_mask,
                                                                       mask_shape,
                                                                       sd::ops::InterpolateMode::NearestMax);
            }
            int64_t hires_sample_start = ggml_time_ms();
            sd::Tensor<float> x_0      = sd_ctx->sd->sample(sd_ctx->sd->diffusion_model,
                                                            true,
                                                            upscaled,
                                                            std::move(noise),
                                                            embeds.cond,
                                                            embeds.uncond,
                                                            embeds.img_cond,
                                                            embeds.id_cond,
                                                            latents.control_image,
                                                            request.control_strength,
                                                            request.guidance,
                                                            plan.eta,
                                                            request.shifted_timestep,
                                                            plan.sample_method,
                                                            sd_ctx->sd->is_flow_denoiser(),
                                                            hires_sigma_sched,
                                                            plan.start_merge_step,
                                                            latents.ref_latents,
                                                            request.increase_ref_index,
                                                            hires_denoise_mask,
                                                            sd::Tensor<float>(),
                                                            1.f,
                                                            request.cache_params);
            int64_t hires_sample_end   = ggml_time_ms();
            if (!x_0.empty()) {
                LOG_INFO("hires sampling %d/%d completed, taking %.2fs",
                         b + 1,
                         (int)final_latents.size(),
                         (hires_sample_end - hires_sample_start) * 1.0f / 1000);
                hires_final_latents.push_back(std::move(x_0));
                continue;
            }
            LOG_ERROR("hires sampling for image %d/%d failed after %.2fs",
                      b + 1,
                      (int)final_latents.size(),
                      (hires_sample_end - hires_sample_start) * 1.0f / 1000);
            if (sd_ctx->sd->free_params_immediately) {
                sd_ctx->sd->diffusion_model->free_params_buffer();
            }
            return nullptr;
        }
        if (sd_ctx->sd->free_params_immediately) {
            sd_ctx->sd->diffusion_model->free_params_buffer();
        }
        int64_t hires_denoise_end = ggml_time_ms();
        LOG_INFO("hires fix completed, taking %.2fs", (hires_denoise_end - hires_denoise_start) * 1.0f / 1000);
        final_latents = std::move(hires_final_latents);
    }
    auto result = decode_image_outputs(sd_ctx, request, final_latents);
    if (result == nullptr) {
        return nullptr;
--- a/src/upscaler.cpp
+++ b/src/upscaler.cpp
@ -1,27 +1,18 @@
-#include "esrgan.hpp"
+#include "upscaler.h"
 #include "ggml_extend.hpp"
 #include "model.h"
 #include "stable-diffusion.h"
 #include "util.h"
-struct UpscalerGGML {
+UpscalerGGML::UpscalerGGML(int n_threads,
-    ggml_backend_t backend    = nullptr;  // general backend
+                           bool direct,
-    ggml_type model_data_type = GGML_TYPE_F16;
+                           int tile_size)
    std::shared_ptr<ESRGAN> esrgan_upscaler;
    std::string esrgan_path;
    int n_threads;
    bool direct   = false;
    int tile_size = 128;
    UpscalerGGML(int n_threads,
                 bool direct   = false,
                 int tile_size = 128)
    : n_threads(n_threads),
      direct(direct),
      tile_size(tile_size) {
-    }
+}
-    bool load_from_file(const std::string& esrgan_path,
+bool UpscalerGGML::load_from_file(const std::string& esrgan_path,
                                  bool offload_params_to_cpu,
                                  int n_threads) {
    ggml_log_set(ggml_log_callback_default, nullptr);
@ -63,9 +54,9 @@ struct UpscalerGGML {
        return false;
    }
    return true;
-    }
+}
-    sd::Tensor<float> upscale_tensor(const sd::Tensor<float>& input_tensor) {
+sd::Tensor<float> UpscalerGGML::upscale_tensor(const sd::Tensor<float>& input_tensor) {
    sd::Tensor<float> upscaled;
    if (tile_size <= 0 || (input_tensor.shape()[0] <= tile_size && input_tensor.shape()[1] <= tile_size)) {
        upscaled = esrgan_upscaler->compute(n_threads, input_tensor);
@ -96,9 +87,9 @@ struct UpscalerGGML {
        return {};
    }
    return upscaled;
-    }
+}
-    sd_image_t upscale(sd_image_t input_image, uint32_t upscale_factor) {
+sd_image_t UpscalerGGML::upscale(sd_image_t input_image, uint32_t upscale_factor) {
    // upscale_factor, unused for RealESRGAN_x4plus_anime_6B.pth
    sd_image_t upscaled_image = {0, 0, 0, nullptr};
    int output_width          = (int)input_image.width * esrgan_upscaler->scale;
@ -118,8 +109,7 @@ struct UpscalerGGML {
    LOG_INFO("input_image_tensor upscaled, taking %.2fs", (t3 - t0) / 1000.0f);
    upscaled_image = upscaled_data;
    return upscaled_image;
-    }
+}
 };
 struct upscaler_ctx_t {
    UpscalerGGML* upscaler = nullptr;
--- a/src/upscaler.h
+++ b/src/upscaler.h
@ -0,0 +1,31 @@
 #ifndef __SD_UPSCALER_H__
 #define __SD_UPSCALER_H__
 #include "esrgan.hpp"
 #include "stable-diffusion.h"
 #include "tensor.hpp"
 #include <memory>
 #include <string>
 struct UpscalerGGML {
    ggml_backend_t backend    = nullptr;  // general backend
    ggml_type model_data_type = GGML_TYPE_F16;
    std::shared_ptr<ESRGAN> esrgan_upscaler;
    std::string esrgan_path;
    int n_threads;
    bool direct   = false;
    int tile_size = 128;
    UpscalerGGML(int n_threads,
                 bool direct   = false,
                 int tile_size = 128);
    bool load_from_file(const std::string& esrgan_path,
                        bool offload_params_to_cpu,
                        int n_threads);
    sd::Tensor<float> upscale_tensor(const sd::Tensor<float>& input_tensor);
    sd_image_t upscale(sd_image_t input_image, uint32_t upscale_factor);
 };
 #endif  // __SD_UPSCALER_H__