2026-05-08 16:28:53 +00:00
25 changed files with 1334 additions and 3304 deletions
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@ -21,7 +21,6 @@ on:
        "**/*.c",
        "**/*.cpp",
        "**/*.cu",
-        "examples/server/frontend",
        "examples/server/frontend/**",
      ]
  pull_request:
@ -36,7 +35,6 @@ on:
        "**/*.c",
        "**/*.cpp",
        "**/*.cu",
-        "examples/server/frontend",
        "examples/server/frontend/**",
      ]

--- a/.gitmodules
+++ b/.gitmodules
@ -3,7 +3,7 @@
 	url = https://github.com/ggml-org/ggml.git
 [submodule "examples/server/frontend"]
 	path = examples/server/frontend
-	url = https://github.com/leejet/sdcpp-webui.git
+	url = https://github.com/leejet/stable-ui.git
 [submodule "thirdparty/libwebp"]
 	path = thirdparty/libwebp
 	url = https://github.com/webmproject/libwebp.git
--- a/README.md
+++ b/README.md
@ -15,9 +15,6 @@ API and command-line option may change frequently.***

 ## 🔥Important News

-* **2026/04/11** 🚀 stable-diffusion.cpp now uses a brand-new embedded web UI.  
-  👉 Details: [PR #1408](https://github.com/leejet/stable-diffusion.cpp/pull/1408)
-
 * **2026/01/18** 🚀 stable-diffusion.cpp now supports **FLUX.2-klein**  
  👉 Details: [PR #1193](https://github.com/leejet/stable-diffusion.cpp/pull/1193)

--- a/examples/cli/main.cpp
+++ b/examples/cli/main.cpp
@ -192,22 +192,17 @@ struct SDCliParams {
        return options;
    };

-    bool resolve() {
+    bool process_and_check() {
+        if (mode != METADATA && output_path.length() == 0) {
+            LOG_ERROR("error: the following arguments are required: output_path");
+            return false;
+        }
+
        if (mode == CONVERT) {
            if (output_path == "output.png") {
                output_path = "output.gguf";
            }
-        }
-        return true;
-    }
-
-    bool validate() {
-        if (mode != METADATA) {
-            if (output_path.length() == 0) {
-                LOG_ERROR("error: the following arguments are required: output_path");
-                return false;
-            }
-        } else {
+        } else if (mode == METADATA) {
            if (image_path.empty()) {
                LOG_ERROR("error: metadata mode needs an image path (--image)");
                return false;
@ -221,16 +216,6 @@ struct SDCliParams {
        return true;
    }

-    bool resolve_and_validate() {
-        if (!resolve()) {
-            return false;
-        }
-        if (!validate()) {
-            return false;
-        }
-        return true;
-    }
-
    std::string to_string() const {
        std::ostringstream oss;
        oss << "SDCliParams {\n"
@ -275,10 +260,10 @@ void parse_args(int argc, const char** argv, SDCliParams& cli_params, SDContextP
        exit(cli_params.normal_exit ? 0 : 1);
    }

-    bool valid = cli_params.resolve_and_validate();
+    bool valid = cli_params.process_and_check();
    if (valid && cli_params.mode != METADATA) {
-        valid = ctx_params.resolve_and_validate(cli_params.mode) &&
-                gen_params.resolve_and_validate(cli_params.mode, ctx_params.lora_model_dir);
+        valid = ctx_params.process_and_check(cli_params.mode) &&
+                gen_params.process_and_check(cli_params.mode, ctx_params.lora_model_dir);
    }

    if (!valid) {
@ -293,7 +278,7 @@ void sd_log_cb(enum sd_log_level_t level, const char* log, void* data) {
 }

 bool load_images_from_dir(const std::string dir,
-                          std::vector<SDImageOwner>& images,
+                          SDImageVec& images,
                          int expected_width  = 0,
                          int expected_height = 0,
                          int max_image_num   = 0,
@ -330,10 +315,10 @@ bool load_images_from_dir(const std::string dir,
                return false;
            }

-            images.emplace_back(sd_image_t{(uint32_t)width,
-                                           (uint32_t)height,
-                                           3,
-                                           image_buffer});
+            images.push_back({(uint32_t)width,
+                              (uint32_t)height,
+                              3,
+                              image_buffer});

            if (max_image_num > 0 && static_cast<int>(images.size()) >= max_image_num) {
                break;
@ -573,6 +558,13 @@ int main(int argc, const char* argv[]) {
    }

    bool vae_decode_only = true;
+    SDImageOwner init_image({0, 0, 3, nullptr});
+    SDImageOwner end_image({0, 0, 3, nullptr});
+    SDImageOwner control_image({0, 0, 3, nullptr});
+    SDImageOwner mask_image({0, 0, 1, nullptr});
+    SDImageVec ref_images;
+    SDImageVec pmid_images;
+    SDImageVec control_frames;

    auto load_image_and_update_size = [&](const std::string& path,
                                          SDImageOwner& image,
@ -596,32 +588,31 @@ int main(int argc, const char* argv[]) {

    if (gen_params.init_image_path.size() > 0) {
        vae_decode_only = false;
-        if (!load_image_and_update_size(gen_params.init_image_path, gen_params.init_image)) {
+        if (!load_image_and_update_size(gen_params.init_image_path, init_image)) {
            return 1;
        }
    }

    if (gen_params.end_image_path.size() > 0) {
        vae_decode_only = false;
-        if (!load_image_and_update_size(gen_params.end_image_path, gen_params.end_image)) {
+        if (!load_image_and_update_size(gen_params.end_image_path, end_image)) {
            return 1;
        }
    }

    if (gen_params.ref_image_paths.size() > 0) {
        vae_decode_only = false;
-        gen_params.ref_images.clear();
        for (auto& path : gen_params.ref_image_paths) {
            SDImageOwner ref_image({0, 0, 3, nullptr});
            if (!load_image_and_update_size(path, ref_image, false)) {
                return 1;
            }
-            gen_params.ref_images.push_back(std::move(ref_image));
+            ref_images.push_back(std::move(ref_image));
        }
    }

    if (gen_params.mask_image_path.size() > 0) {
-        if (!load_sd_image_from_file(gen_params.mask_image.put(),
+        if (!load_sd_image_from_file(mask_image.put(),
                                     gen_params.mask_image_path.c_str(),
                                     gen_params.get_resolved_width(),
                                     gen_params.get_resolved_height(),
@ -639,11 +630,11 @@ int main(int argc, const char* argv[]) {
        generated_mask.width  = gen_params.get_resolved_width();
        generated_mask.height = gen_params.get_resolved_height();
        memset(generated_mask.data, 255, gen_params.get_resolved_width() * gen_params.get_resolved_height());
-        gen_params.mask_image.reset(generated_mask);
+        mask_image.reset(generated_mask);
    }

    if (gen_params.control_image_path.size() > 0) {
-        if (!load_sd_image_from_file(gen_params.control_image.put(),
+        if (!load_sd_image_from_file(control_image.put(),
                                     gen_params.control_image_path.c_str(),
                                     gen_params.get_resolved_width(),
                                     gen_params.get_resolved_height())) {
@ -651,7 +642,7 @@ int main(int argc, const char* argv[]) {
            return 1;
        }
        if (cli_params.canny_preprocess) {  // apply preprocessor
-            preprocess_canny(gen_params.control_image.get(),
+            preprocess_canny(control_image.get(),
                             0.08f,
                             0.08f,
                             0.8f,
@ -661,9 +652,8 @@ int main(int argc, const char* argv[]) {
    }

    if (!gen_params.control_video_path.empty()) {
-        gen_params.control_frames.clear();
        if (!load_images_from_dir(gen_params.control_video_path,
-                                  gen_params.control_frames,
+                                  control_frames,
                                  gen_params.get_resolved_width(),
                                  gen_params.get_resolved_height(),
                                  gen_params.video_frames,
@ -673,9 +663,8 @@ int main(int argc, const char* argv[]) {
    }

    if (!gen_params.pm_id_images_dir.empty()) {
-        gen_params.pm_id_images.clear();
        if (!load_images_from_dir(gen_params.pm_id_images_dir,
-                                  gen_params.pm_id_images,
+                                  pmid_images,
                                  0,
                                  0,
                                  0,
@ -695,7 +684,7 @@ int main(int argc, const char* argv[]) {

    if (cli_params.mode == UPSCALE) {
        num_results = 1;
-        results.push_back(gen_params.init_image.release());
+        results.push_back(init_image.release());
    } else {
        SDCtxPtr sd_ctx(new_sd_ctx(&sd_ctx_params));

@ -717,13 +706,63 @@ int main(int argc, const char* argv[]) {
        }

        if (cli_params.mode == IMG_GEN) {
-            sd_img_gen_params_t img_gen_params = gen_params.to_sd_img_gen_params_t();
+            sd_img_gen_params_t img_gen_params = {
+                gen_params.lora_vec.data(),
+                static_cast<uint32_t>(gen_params.lora_vec.size()),
+                gen_params.prompt.c_str(),
+                gen_params.negative_prompt.c_str(),
+                gen_params.clip_skip,
+                init_image.get(),
+                ref_images.data(),
+                (int)ref_images.size(),
+                gen_params.auto_resize_ref_image,
+                gen_params.increase_ref_index,
+                mask_image.get(),
+                gen_params.get_resolved_width(),
+                gen_params.get_resolved_height(),
+                gen_params.sample_params,
+                gen_params.strength,
+                gen_params.seed,
+                gen_params.batch_count,
+                control_image.get(),
+                gen_params.control_strength,
+                {
+                    pmid_images.data(),
+                    (int)pmid_images.size(),
+                    gen_params.pm_id_embed_path.c_str(),
+                    gen_params.pm_style_strength,
+                },  // pm_params
+                gen_params.vae_tiling_params,
+                gen_params.cache_params,
+            };

            num_results = gen_params.batch_count;
            results.adopt(generate_image(sd_ctx.get(), &img_gen_params), num_results);
        } else if (cli_params.mode == VID_GEN) {
-            sd_vid_gen_params_t vid_gen_params = gen_params.to_sd_vid_gen_params_t();
-            sd_image_t* generated_video        = generate_video(sd_ctx.get(), &vid_gen_params, &num_results);
+            sd_vid_gen_params_t vid_gen_params = {
+                gen_params.lora_vec.data(),
+                static_cast<uint32_t>(gen_params.lora_vec.size()),
+                gen_params.prompt.c_str(),
+                gen_params.negative_prompt.c_str(),
+                gen_params.clip_skip,
+                init_image.get(),
+                end_image.get(),
+                control_frames.data(),
+                (int)control_frames.size(),
+                gen_params.get_resolved_width(),
+                gen_params.get_resolved_height(),
+                gen_params.sample_params,
+                gen_params.high_noise_sample_params,
+                gen_params.moe_boundary,
+                gen_params.strength,
+                gen_params.seed,
+                gen_params.video_frames,
+                gen_params.vace_strength,
+                gen_params.vae_tiling_params,
+                gen_params.cache_params,
+            };
+
+            sd_image_t* generated_video = generate_video(sd_ctx.get(), &vid_gen_params, &num_results);
            results.adopt(generated_video, num_results);
        }

--- a/examples/common/common.cpp
+++ b/examples/common/common.cpp
@ -21,7 +21,6 @@
 #endif  // _WIN32

 #include "log.h"
-#include "media_io.h"
 #include "resource_owners.hpp"

 using json   = nlohmann::json;
@ -579,17 +578,7 @@ void SDContextParams::build_embedding_map() {
    }
 }

-bool SDContextParams::resolve(SDMode mode) {
-    if (n_threads <= 0) {
-        n_threads = sd_get_num_physical_cores();
-    }
-
-    build_embedding_map();
-
-    return true;
-}
-
-bool SDContextParams::validate(SDMode mode) {
+bool SDContextParams::process_and_check(SDMode mode) {
    if (mode != UPSCALE && mode != METADATA && model_path.length() == 0 && diffusion_model_path.length() == 0) {
        LOG_ERROR("error: the following arguments are required: model_path/diffusion_model\n");
        return false;
@ -602,16 +591,12 @@ bool SDContextParams::validate(SDMode mode) {
        }
    }

-    return true;
-}
+    if (n_threads <= 0) {
+        n_threads = sd_get_num_physical_cores();
+    }
+
+    build_embedding_map();

-bool SDContextParams::resolve_and_validate(SDMode mode) {
-    if (!resolve(mode)) {
-        return false;
-    }
-    if (!validate(mode)) {
-        return false;
-    }
    return true;
 }

@ -1243,190 +1228,7 @@ ArgOptions SDGenerationParams::get_options() {
    return options;
 }

-static const std::string k_base64_chars =
-    "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
-    "abcdefghijklmnopqrstuvwxyz"
-    "0123456789+/";
-
-static bool is_base64(unsigned char c) {
-    return std::isalnum(c) || c == '+' || c == '/';
-}
-
-static std::vector<uint8_t> decode_base64_bytes(const std::string& encoded_string) {
-    int in_len = static_cast<int>(encoded_string.size());
-    int i      = 0;
-    int j      = 0;
-    int in_    = 0;
-    uint8_t char_array_4[4];
-    uint8_t char_array_3[3];
-    std::vector<uint8_t> ret;
-
-    while (in_len-- && encoded_string[in_] != '=' && is_base64(encoded_string[in_])) {
-        char_array_4[i++] = encoded_string[in_];
-        in_++;
-        if (i == 4) {
-            for (i = 0; i < 4; i++) {
-                char_array_4[i] = static_cast<uint8_t>(k_base64_chars.find(char_array_4[i]));
-            }
-
-            char_array_3[0] = (char_array_4[0] << 2) + ((char_array_4[1] & 0x30) >> 4);
-            char_array_3[1] = ((char_array_4[1] & 0xf) << 4) + ((char_array_4[2] & 0x3c) >> 2);
-            char_array_3[2] = ((char_array_4[2] & 0x3) << 6) + char_array_4[3];
-
-            for (i = 0; i < 3; i++) {
-                ret.push_back(char_array_3[i]);
-            }
-            i = 0;
-        }
-    }
-
-    if (i) {
-        for (j = i; j < 4; j++) {
-            char_array_4[j] = 0;
-        }
-
-        for (j = 0; j < 4; j++) {
-            char_array_4[j] = static_cast<uint8_t>(k_base64_chars.find(char_array_4[j]));
-        }
-
-        char_array_3[0] = (char_array_4[0] << 2) + ((char_array_4[1] & 0x30) >> 4);
-        char_array_3[1] = ((char_array_4[1] & 0xf) << 4) + ((char_array_4[2] & 0x3c) >> 2);
-        char_array_3[2] = ((char_array_4[2] & 0x3) << 6) + char_array_4[3];
-
-        for (j = 0; j < i - 1; j++) {
-            ret.push_back(char_array_3[j]);
-        }
-    }
-
-    return ret;
-}
-
-bool decode_base64_image(const std::string& encoded_input,
-                         int target_channels,
-                         int expected_width,
-                         int expected_height,
-                         SDImageOwner& out_image) {
-    std::string encoded = encoded_input;
-    auto comma_pos      = encoded.find(',');
-    if (comma_pos != std::string::npos) {
-        encoded = encoded.substr(comma_pos + 1);
-    }
-
-    std::vector<uint8_t> image_bytes = decode_base64_bytes(encoded);
-    if (image_bytes.empty()) {
-        return false;
-    }
-
-    int decoded_width  = 0;
-    int decoded_height = 0;
-    uint8_t* raw_data  = load_image_from_memory(reinterpret_cast<const char*>(image_bytes.data()),
-                                                static_cast<int>(image_bytes.size()),
-                                                decoded_width,
-                                                decoded_height,
-                                                expected_width,
-                                                expected_height,
-                                                target_channels);
-    if (raw_data == nullptr) {
-        return false;
-    }
-
-    out_image.reset({(uint32_t)decoded_width, (uint32_t)decoded_height, (uint32_t)target_channels, raw_data});
-    return true;
-}
-
-static bool parse_image_json_field(const json& parent,
-                                   const char* key,
-                                   int channels,
-                                   int expected_width,
-                                   int expected_height,
-                                   SDImageOwner& out_image) {
-    if (!parent.contains(key)) {
-        return true;
-    }
-    if (parent.at(key).is_null()) {
-        out_image.reset({0, 0, (uint32_t)channels, nullptr});
-        return true;
-    }
-    if (!parent.at(key).is_string()) {
-        return false;
-    }
-    return decode_base64_image(parent.at(key).get<std::string>(), channels, expected_width, expected_height, out_image);
-}
-
-static bool parse_image_array_json_field(const json& parent,
-                                         const char* key,
-                                         int channels,
-                                         int expected_width,
-                                         int expected_height,
-                                         std::vector<SDImageOwner>& out_images) {
-    if (!parent.contains(key)) {
-        return true;
-    }
-    if (parent.at(key).is_null()) {
-        out_images.clear();
-        return true;
-    }
-    if (!parent.at(key).is_array()) {
-        return false;
-    }
-
-    out_images.clear();
-    for (const auto& item : parent.at(key)) {
-        if (!item.is_string()) {
-            return false;
-        }
-        SDImageOwner image;
-        if (!decode_base64_image(item.get<std::string>(), channels, expected_width, expected_height, image)) {
-            return false;
-        }
-        out_images.push_back(std::move(image));
-    }
-    return true;
-}
-
-static bool parse_lora_json_field(const json& parent,
-                                  const std::function<std::string(const std::string&)>& lora_path_resolver,
-                                  std::map<std::string, float>& lora_map,
-                                  std::map<std::string, float>& high_noise_lora_map) {
-    if (!parent.contains("lora")) {
-        return true;
-    }
-    if (!parent.at("lora").is_array()) {
-        return false;
-    }
-
-    lora_map.clear();
-    high_noise_lora_map.clear();
-    for (const auto& item : parent.at("lora")) {
-        if (!item.is_object()) {
-            return false;
-        }
-
-        std::string path = item.value("path", "");
-        if (path.empty()) {
-            return false;
-        }
-
-        std::string resolved_path = lora_path_resolver ? lora_path_resolver(path) : path;
-        if (resolved_path.empty()) {
-            return false;
-        }
-
-        const float multiplier   = item.value("multiplier", 1.0f);
-        const bool is_high_noise = item.value("is_high_noise", false);
-        if (is_high_noise) {
-            high_noise_lora_map[resolved_path] += multiplier;
-        } else {
-            lora_map[resolved_path] += multiplier;
-        }
-    }
-
-    return true;
-}
-
-bool SDGenerationParams::from_json_str(
-    const std::string& json_str,
-    const std::function<std::string(const std::string&)>& lora_path_resolver) {
+bool SDGenerationParams::from_json_str(const std::string& json_str) {
    json j;
    try {
        j = json::parse(json_str);
@ -1453,9 +1255,6 @@ bool SDGenerationParams::from_json_str(
            } else if constexpr (std::is_same_v<T, std::vector<int>>) {
                if (j[key].is_array())
                    out = j[key].get<std::vector<int>>();
-            } else if constexpr (std::is_same_v<T, std::vector<float>>) {
-                if (j[key].is_array())
-                    out = j[key].get<std::vector<float>>();
            } else if constexpr (std::is_same_v<T, std::vector<std::string>>) {
                if (j[key].is_array())
                    out = j[key].get<std::vector<std::string>>();
@ -1480,6 +1279,7 @@ bool SDGenerationParams::from_json_str(

    load_if_exists("strength", strength);
    load_if_exists("control_strength", control_strength);
+    load_if_exists("pm_style_strength", pm_style_strength);
    load_if_exists("moe_boundary", moe_boundary);
    load_if_exists("vace_strength", vace_strength);

@ -1487,119 +1287,32 @@ bool SDGenerationParams::from_json_str(
    load_if_exists("increase_ref_index", increase_ref_index);
    load_if_exists("embed_image_metadata", embed_image_metadata);

-    auto parse_sample_params_json = [&](const json& sample_json,
-                                        sd_sample_params_t& target_params,
-                                        std::vector<int>& target_skip_layers,
-                                        std::vector<float>* target_custom_sigmas) {
-        if (sample_json.contains("sample_steps") && sample_json["sample_steps"].is_number_integer()) {
-            target_params.sample_steps = sample_json["sample_steps"];
-        }
-        if (sample_json.contains("eta") && sample_json["eta"].is_number()) {
-            target_params.eta = sample_json["eta"];
-        }
-        if (sample_json.contains("shifted_timestep") && sample_json["shifted_timestep"].is_number_integer()) {
-            target_params.shifted_timestep = sample_json["shifted_timestep"];
-        }
-        if (sample_json.contains("flow_shift") && sample_json["flow_shift"].is_number()) {
-            target_params.flow_shift = sample_json["flow_shift"];
-        }
-        if (target_custom_sigmas != nullptr &&
-            sample_json.contains("custom_sigmas") &&
-            sample_json["custom_sigmas"].is_array()) {
-            *target_custom_sigmas = sample_json["custom_sigmas"].get<std::vector<float>>();
-        }
-        if (sample_json.contains("sample_method") && sample_json["sample_method"].is_string()) {
-            enum sample_method_t tmp = str_to_sample_method(sample_json["sample_method"].get<std::string>().c_str());
+    load_if_exists("skip_layers", skip_layers);
+    load_if_exists("high_noise_skip_layers", high_noise_skip_layers);
+
+    load_if_exists("steps", sample_params.sample_steps);
+    load_if_exists("high_noise_steps", high_noise_sample_params.sample_steps);
+    load_if_exists("cfg_scale", sample_params.guidance.txt_cfg);
+    load_if_exists("img_cfg_scale", sample_params.guidance.img_cfg);
+    load_if_exists("guidance", sample_params.guidance.distilled_guidance);
+    load_if_exists("flow_shift", sample_params.flow_shift);
+
+    auto load_sampler_if_exists = [&](const char* key, enum sample_method_t& out) {
+        if (j.contains(key) && j[key].is_string()) {
+            enum sample_method_t tmp = str_to_sample_method(j[key].get<std::string>().c_str());
            if (tmp != SAMPLE_METHOD_COUNT) {
-                target_params.sample_method = tmp;
-            }
-        }
-        if (sample_json.contains("scheduler") && sample_json["scheduler"].is_string()) {
-            enum scheduler_t tmp = str_to_scheduler(sample_json["scheduler"].get<std::string>().c_str());
-            if (tmp != SCHEDULER_COUNT) {
-                target_params.scheduler = tmp;
-            }
-        }
-        if (sample_json.contains("guidance") && sample_json["guidance"].is_object()) {
-            const json& guidance_json = sample_json["guidance"];
-            if (guidance_json.contains("txt_cfg") && guidance_json["txt_cfg"].is_number()) {
-                target_params.guidance.txt_cfg = guidance_json["txt_cfg"];
-            }
-            if (guidance_json.contains("img_cfg") && guidance_json["img_cfg"].is_number()) {
-                target_params.guidance.img_cfg = guidance_json["img_cfg"];
-            }
-            if (guidance_json.contains("distilled_guidance") && guidance_json["distilled_guidance"].is_number()) {
-                target_params.guidance.distilled_guidance = guidance_json["distilled_guidance"];
-            }
-            if (guidance_json.contains("slg") && guidance_json["slg"].is_object()) {
-                const json& slg_json = guidance_json["slg"];
-                if (slg_json.contains("layers") && slg_json["layers"].is_array()) {
-                    target_skip_layers = slg_json["layers"].get<std::vector<int>>();
-                }
-                if (slg_json.contains("layer_start") && slg_json["layer_start"].is_number()) {
-                    target_params.guidance.slg.layer_start = slg_json["layer_start"];
-                }
-                if (slg_json.contains("layer_end") && slg_json["layer_end"].is_number()) {
-                    target_params.guidance.slg.layer_end = slg_json["layer_end"];
-                }
-                if (slg_json.contains("scale") && slg_json["scale"].is_number()) {
-                    target_params.guidance.slg.scale = slg_json["scale"];
-                }
+                out = tmp;
            }
        }
    };
+    load_sampler_if_exists("sample_method", sample_params.sample_method);
+    load_sampler_if_exists("high_noise_sample_method", high_noise_sample_params.sample_method);

-    if (j.contains("sample_params") && j["sample_params"].is_object()) {
-        parse_sample_params_json(j["sample_params"], sample_params, skip_layers, &custom_sigmas);
-    }
-    if (j.contains("high_noise_sample_params") && j["high_noise_sample_params"].is_object()) {
-        parse_sample_params_json(j["high_noise_sample_params"],
-                                 high_noise_sample_params,
-                                 high_noise_skip_layers,
-                                 nullptr);
-    }
-
-    if (j.contains("vae_tiling_params") && j["vae_tiling_params"].is_object()) {
-        const json& tiling_json = j["vae_tiling_params"];
-        if (tiling_json.contains("enabled") && tiling_json["enabled"].is_boolean()) {
-            vae_tiling_params.enabled = tiling_json["enabled"];
+    if (j.contains("scheduler") && j["scheduler"].is_string()) {
+        enum scheduler_t tmp = str_to_scheduler(j["scheduler"].get<std::string>().c_str());
+        if (tmp != SCHEDULER_COUNT) {
+            sample_params.scheduler = tmp;
        }
-        if (tiling_json.contains("tile_size_x") && tiling_json["tile_size_x"].is_number_integer()) {
-            vae_tiling_params.tile_size_x = tiling_json["tile_size_x"];
-        }
-        if (tiling_json.contains("tile_size_y") && tiling_json["tile_size_y"].is_number_integer()) {
-            vae_tiling_params.tile_size_y = tiling_json["tile_size_y"];
-        }
-        if (tiling_json.contains("target_overlap") && tiling_json["target_overlap"].is_number()) {
-            vae_tiling_params.target_overlap = tiling_json["target_overlap"];
-        }
-        if (tiling_json.contains("rel_size_x") && tiling_json["rel_size_x"].is_number()) {
-            vae_tiling_params.rel_size_x = tiling_json["rel_size_x"];
-        }
-        if (tiling_json.contains("rel_size_y") && tiling_json["rel_size_y"].is_number()) {
-            vae_tiling_params.rel_size_y = tiling_json["rel_size_y"];
-        }
-    }
-
-    if (!parse_lora_json_field(j, lora_path_resolver, lora_map, high_noise_lora_map)) {
-        LOG_ERROR("invalid lora");
-        return false;
-    }
-    if (!parse_image_json_field(j, "init_image", 3, width, height, init_image)) {
-        LOG_ERROR("invalid init_image");
-        return false;
-    }
-    if (!parse_image_array_json_field(j, "ref_images", 3, width, height, ref_images)) {
-        LOG_ERROR("invalid ref_images");
-        return false;
-    }
-    if (!parse_image_json_field(j, "mask_image", 1, width, height, mask_image)) {
-        LOG_ERROR("invalid mask_image");
-        return false;
-    }
-    if (!parse_image_json_field(j, "control_image", 3, width, height, control_image)) {
-        LOG_ERROR("invalid control_image");
-        return false;
    }

    return true;
@ -1671,6 +1384,22 @@ void SDGenerationParams::extract_and_remove_lora(const std::string& lora_model_d

        tmp = m.suffix().str();
    }
+
+    for (const auto& kv : lora_map) {
+        sd_lora_t item;
+        item.is_high_noise = false;
+        item.path          = kv.first.c_str();
+        item.multiplier    = kv.second;
+        lora_vec.emplace_back(item);
+    }
+
+    for (const auto& kv : high_noise_lora_map) {
+        sd_lora_t item;
+        item.is_high_noise = true;
+        item.path          = kv.first.c_str();
+        item.multiplier    = kv.second;
+        lora_vec.emplace_back(item);
+    }
 }

 bool SDGenerationParams::width_and_height_are_set() const {
@ -1693,7 +1422,23 @@ int SDGenerationParams::get_resolved_height() const {
    return (height > 0) ? height : 512;
 }

-bool SDGenerationParams::initialize_cache_params() {
+bool SDGenerationParams::process_and_check(SDMode mode, const std::string& lora_model_dir) {
+    prompt_with_lora = prompt;
+
+    if (sample_params.sample_steps <= 0) {
+        LOG_ERROR("error: the sample_steps must be greater than 0\n");
+        return false;
+    }
+
+    if (high_noise_sample_params.sample_steps <= 0) {
+        high_noise_sample_params.sample_steps = -1;
+    }
+
+    if (strength < 0.f || strength > 1.f) {
+        LOG_ERROR("error: can only work with strength in [0.0, 1.0]\n");
+        return false;
+    }
+
    sd_cache_params_init(&cache_params);

    auto parse_named_params = [&](const std::string& opt_str) -> bool {
@ -1759,9 +1504,7 @@ bool SDGenerationParams::initialize_cache_params() {
    };

    if (!cache_mode.empty()) {
-        if (cache_mode == "disabled") {
-            cache_params.mode = SD_CACHE_DISABLED;
-        } else if (cache_mode == "easycache") {
+        if (cache_mode == "easycache") {
            cache_params.mode = SD_CACHE_EASYCACHE;
        } else if (cache_mode == "ucache") {
            cache_params.mode = SD_CACHE_UCACHE;
@ -1773,73 +1516,14 @@ bool SDGenerationParams::initialize_cache_params() {
            cache_params.mode = SD_CACHE_CACHE_DIT;
        } else if (cache_mode == "spectrum") {
            cache_params.mode = SD_CACHE_SPECTRUM;
-        } else {
-            LOG_ERROR("error: invalid cache mode '%s'", cache_mode.c_str());
-            return false;
        }
-    }

-    if (!cache_option.empty() && !parse_named_params(cache_option)) {
-        return false;
-    }
+        if (!cache_option.empty()) {
+            if (!parse_named_params(cache_option)) {
+                return false;
+            }
+        }

-    if (cache_params.mode == SD_CACHE_DBCACHE ||
-        cache_params.mode == SD_CACHE_TAYLORSEER ||
-        cache_params.mode == SD_CACHE_CACHE_DIT) {
-        cache_params.scm_policy_dynamic = scm_policy_dynamic;
-    }
-
-    return true;
-}
-
-bool SDGenerationParams::resolve(const std::string& lora_model_dir, bool strict) {
-    if (high_noise_sample_params.sample_steps <= 0) {
-        high_noise_sample_params.sample_steps = -1;
-    }
-
-    if (!initialize_cache_params()) {
-        return false;
-    }
-
-    if (seed < 0) {
-        srand((int)time(nullptr));
-        seed = rand();
-    }
-
-    if (strict) {
-        batch_count                = std::clamp(batch_count, 1, 8);
-        sample_params.sample_steps = std::clamp(sample_params.sample_steps, 1, 100);
-    }
-
-    prompt_with_lora = prompt;
-    if (!lora_model_dir.empty()) {
-        extract_and_remove_lora(lora_model_dir);
-    }
-    return true;
-}
-
-bool SDGenerationParams::validate(SDMode mode) {
-    if (batch_count <= 0) {
-        LOG_ERROR("error: batch_count must be greater than 0");
-        return false;
-    }
-
-    if (sample_params.sample_steps <= 0) {
-        LOG_ERROR("error: the sample_steps must be greater than 0\n");
-        return false;
-    }
-
-    if (strength < 0.f || strength > 1.f) {
-        LOG_ERROR("error: can only work with strength in [0.0, 1.0]\n");
-        return false;
-    }
-
-    if (sample_params.guidance.txt_cfg < 0.f) {
-        LOG_ERROR("error: cfg_scale must be positive");
-        return false;
-    }
-
-    if (!cache_mode.empty()) {
        if (cache_mode == "easycache" || cache_mode == "ucache") {
            if (cache_params.reuse_threshold < 0.0f) {
                LOG_ERROR("error: cache threshold must be non-negative");
@ -1854,6 +1538,22 @@ bool SDGenerationParams::validate(SDMode mode) {
        }
    }

+    if (cache_params.mode == SD_CACHE_DBCACHE ||
+        cache_params.mode == SD_CACHE_TAYLORSEER ||
+        cache_params.mode == SD_CACHE_CACHE_DIT) {
+        if (!scm_mask.empty()) {
+            cache_params.scm_mask = scm_mask.c_str();
+        }
+        cache_params.scm_policy_dynamic = scm_policy_dynamic;
+    }
+
+    sample_params.guidance.slg.layers                 = skip_layers.data();
+    sample_params.guidance.slg.layer_count            = skip_layers.size();
+    sample_params.custom_sigmas                       = custom_sigmas.data();
+    sample_params.custom_sigmas_count                 = static_cast<int>(custom_sigmas.size());
+    high_noise_sample_params.guidance.slg.layers      = high_noise_skip_layers.data();
+    high_noise_sample_params.guidance.slg.layer_count = high_noise_skip_layers.size();
+
    if (mode == VID_GEN && video_frames <= 0) {
        return false;
    }
@ -1863,7 +1563,6 @@ bool SDGenerationParams::validate(SDMode mode) {
    }

    if (sample_params.shifted_timestep < 0 || sample_params.shifted_timestep > 1000) {
-        LOG_ERROR("error: shifted_timestep must be in range [0, 1000]");
        return false;
    }

@ -1882,134 +1581,16 @@ bool SDGenerationParams::validate(SDMode mode) {
        }
    }

+    if (seed < 0) {
+        srand((int)time(nullptr));
+        seed = rand();
+    }
+
+    extract_and_remove_lora(lora_model_dir);
+
    return true;
 }

-bool SDGenerationParams::resolve_and_validate(SDMode mode, const std::string& lora_model_dir, bool strict) {
-    if (!resolve(lora_model_dir, strict)) {
-        return false;
-    }
-    if (!validate(mode)) {
-        return false;
-    }
-    return true;
-}
-
-sd_img_gen_params_t SDGenerationParams::to_sd_img_gen_params_t() {
-    sd_img_gen_params_t params;
-    sd_img_gen_params_init(&params);
-
-    lora_vec.clear();
-    lora_vec.reserve(lora_map.size() + high_noise_lora_map.size());
-    for (const auto& kv : lora_map) {
-        lora_vec.push_back({false, kv.second, kv.first.c_str()});
-    }
-    for (const auto& kv : high_noise_lora_map) {
-        lora_vec.push_back({true, kv.second, kv.first.c_str()});
-    }
-
-    ref_image_views.clear();
-    ref_image_views.reserve(ref_images.size());
-    for (auto& ref_image : ref_images) {
-        ref_image_views.push_back(ref_image.get());
-    }
-
-    pm_id_image_views.clear();
-    pm_id_image_views.reserve(pm_id_images.size());
-    for (auto& image : pm_id_images) {
-        pm_id_image_views.push_back(image.get());
-    }
-
-    sample_params.guidance.slg.layers                 = skip_layers.empty() ? nullptr : skip_layers.data();
-    sample_params.guidance.slg.layer_count            = skip_layers.size();
-    high_noise_sample_params.guidance.slg.layers      = high_noise_skip_layers.empty() ? nullptr : high_noise_skip_layers.data();
-    high_noise_sample_params.guidance.slg.layer_count = high_noise_skip_layers.size();
-    sample_params.custom_sigmas                       = custom_sigmas.empty() ? nullptr : custom_sigmas.data();
-    sample_params.custom_sigmas_count                 = static_cast<int>(custom_sigmas.size());
-    cache_params.scm_mask                             = scm_mask.empty() ? nullptr : scm_mask.c_str();
-
-    sd_pm_params_t pm_params = {
-        pm_id_image_views.empty() ? nullptr : pm_id_image_views.data(),
-        static_cast<int>(pm_id_image_views.size()),
-        pm_id_embed_path.empty() ? nullptr : pm_id_embed_path.c_str(),
-        pm_style_strength,
-    };
-
-    params.loras                 = lora_vec.empty() ? nullptr : lora_vec.data();
-    params.lora_count            = static_cast<uint32_t>(lora_vec.size());
-    params.prompt                = prompt.c_str();
-    params.negative_prompt       = negative_prompt.c_str();
-    params.clip_skip             = clip_skip;
-    params.init_image            = init_image.get();
-    params.ref_images            = ref_image_views.empty() ? nullptr : ref_image_views.data();
-    params.ref_images_count      = static_cast<int>(ref_image_views.size());
-    params.auto_resize_ref_image = auto_resize_ref_image;
-    params.increase_ref_index    = increase_ref_index;
-    params.mask_image            = mask_image.get();
-    params.width                 = get_resolved_width();
-    params.height                = get_resolved_height();
-    params.sample_params         = sample_params;
-    params.strength              = strength;
-    params.seed                  = seed;
-    params.batch_count           = batch_count;
-    params.control_image         = control_image.get();
-    params.control_strength      = control_strength;
-    params.pm_params             = pm_params;
-    params.vae_tiling_params     = vae_tiling_params;
-    params.cache                 = cache_params;
-    return params;
-}
-
-sd_vid_gen_params_t SDGenerationParams::to_sd_vid_gen_params_t() {
-    sd_vid_gen_params_t params;
-    sd_vid_gen_params_init(&params);
-
-    lora_vec.clear();
-    lora_vec.reserve(lora_map.size() + high_noise_lora_map.size());
-    for (const auto& kv : lora_map) {
-        lora_vec.push_back({false, kv.second, kv.first.c_str()});
-    }
-    for (const auto& kv : high_noise_lora_map) {
-        lora_vec.push_back({true, kv.second, kv.first.c_str()});
-    }
-
-    control_frame_views.clear();
-    control_frame_views.reserve(control_frames.size());
-    for (auto& frame : control_frames) {
-        control_frame_views.push_back(frame.get());
-    }
-
-    sample_params.guidance.slg.layers                 = skip_layers.empty() ? nullptr : skip_layers.data();
-    sample_params.guidance.slg.layer_count            = skip_layers.size();
-    high_noise_sample_params.guidance.slg.layers      = high_noise_skip_layers.empty() ? nullptr : high_noise_skip_layers.data();
-    high_noise_sample_params.guidance.slg.layer_count = high_noise_skip_layers.size();
-    sample_params.custom_sigmas                       = custom_sigmas.empty() ? nullptr : custom_sigmas.data();
-    sample_params.custom_sigmas_count                 = static_cast<int>(custom_sigmas.size());
-    cache_params.scm_mask                             = scm_mask.empty() ? nullptr : scm_mask.c_str();
-
-    params.loras                    = lora_vec.empty() ? nullptr : lora_vec.data();
-    params.lora_count               = static_cast<uint32_t>(lora_vec.size());
-    params.prompt                   = prompt.c_str();
-    params.negative_prompt          = negative_prompt.c_str();
-    params.clip_skip                = clip_skip;
-    params.init_image               = init_image.get();
-    params.end_image                = end_image.get();
-    params.control_frames           = control_frame_views.empty() ? nullptr : control_frame_views.data();
-    params.control_frames_size      = static_cast<int>(control_frame_views.size());
-    params.width                    = get_resolved_width();
-    params.height                   = get_resolved_height();
-    params.sample_params            = sample_params;
-    params.high_noise_sample_params = high_noise_sample_params;
-    params.moe_boundary             = moe_boundary;
-    params.strength                 = strength;
-    params.seed                     = seed;
-    params.video_frames             = video_frames;
-    params.vace_strength            = vace_strength;
-    params.vae_tiling_params        = vae_tiling_params;
-    params.cache                    = cache_params;
-    return params;
-}
-
 std::string SDGenerationParams::to_string() const {
    FreeUniquePtr<char> sample_params_str(sd_sample_params_to_str(&sample_params));
    FreeUniquePtr<char> high_noise_sample_params_str(sd_sample_params_to_str(&high_noise_sample_params));
--- a/examples/common/common.h
+++ b/examples/common/common.h
@ -9,7 +9,6 @@
 #include <vector>

 #include "log.h"
-#include "resource_owners.hpp"
 #include "stable-diffusion.h"

 #define SAFE_STR(s) ((s) ? (s) : "")
@ -75,11 +74,6 @@ struct ArgOptions {
 };

 bool parse_options(int argc, const char** argv, const std::vector<ArgOptions>& options_list);
-bool decode_base64_image(const std::string& encoded_input,
-                         int target_channels,
-                         int expected_width,
-                         int expected_height,
-                         SDImageOwner& out_image);

 struct SDContextParams {
    int n_threads = -1;
@ -135,39 +129,34 @@ struct SDContextParams {
    float flow_shift = INFINITY;
    ArgOptions get_options();
    void build_embedding_map();
-    bool resolve(SDMode mode);
-    bool validate(SDMode mode);
-    bool resolve_and_validate(SDMode mode);
+    bool process_and_check(SDMode mode);
    std::string to_string() const;
    sd_ctx_params_t to_sd_ctx_params_t(bool vae_decode_only, bool free_params_immediately, bool taesd_preview);
 };

 struct SDGenerationParams {
-    // User-facing input fields.
    std::string prompt;
+    std::string prompt_with_lora;  // for metadata record only
    std::string negative_prompt;
-    int clip_skip              = -1;  // <= 0 represents unspecified
-    int width                  = -1;
-    int height                 = -1;
-    int batch_count            = 1;
-    int64_t seed               = 42;
-    float strength             = 0.75f;
-    float control_strength     = 0.9f;
-    bool auto_resize_ref_image = true;
-    bool increase_ref_index    = false;
-    bool embed_image_metadata  = true;
-
+    int clip_skip   = -1;  // <= 0 represents unspecified
+    int width       = -1;
+    int height      = -1;
+    int batch_count = 1;
    std::string init_image_path;
    std::string end_image_path;
    std::string mask_image_path;
    std::string control_image_path;
    std::vector<std::string> ref_image_paths;
    std::string control_video_path;
+    bool auto_resize_ref_image = true;
+    bool increase_ref_index    = false;
+    bool embed_image_metadata  = true;

+    std::vector<int> skip_layers = {7, 8, 9};
    sd_sample_params_t sample_params;
-    sd_sample_params_t high_noise_sample_params;
-    std::vector<int> skip_layers            = {7, 8, 9};
+
    std::vector<int> high_noise_skip_layers = {7, 8, 9};
+    sd_sample_params_t high_noise_sample_params;

    std::vector<float> custom_sigmas;

@ -177,12 +166,19 @@ struct SDGenerationParams {
    bool scm_policy_dynamic = true;
    sd_cache_params_t cache_params{};

-    float moe_boundary                   = 0.875f;
-    int video_frames                     = 1;
-    int fps                              = 16;
-    float vace_strength                  = 1.f;
+    float moe_boundary  = 0.875f;
+    int video_frames    = 1;
+    int fps             = 16;
+    float vace_strength = 1.f;
+
+    float strength         = 0.75f;
+    float control_strength = 0.9f;
+
+    int64_t seed = 42;
+
    sd_tiling_params_t vae_tiling_params = {false, 0, 0, 0.5f, 0.0f, 0.0f};

+    // Photo Maker
    std::string pm_id_images_dir;
    std::string pm_id_embed_path;
    float pm_style_strength = 20.f;
@ -192,44 +188,16 @@ struct SDGenerationParams {

    std::map<std::string, float> lora_map;
    std::map<std::string, float> high_noise_lora_map;
-
-    // Derived and normalized fields.
-    std::string prompt_with_lora;  // for metadata record only
    std::vector<sd_lora_t> lora_vec;
-
-    // Owned execution payload.
-    SDImageOwner init_image;
-    SDImageOwner end_image;
-    std::vector<SDImageOwner> ref_images;
-    SDImageOwner mask_image;
-    SDImageOwner control_image;
-    std::vector<SDImageOwner> pm_id_images;
-    std::vector<SDImageOwner> control_frames;
-
-    // Backing storage for sd_img_gen_params_t view fields.
-    std::vector<sd_image_t> ref_image_views;
-    std::vector<sd_image_t> pm_id_image_views;
-    std::vector<sd_image_t> control_frame_views;
-
    SDGenerationParams();
-    SDGenerationParams(const SDGenerationParams& other)                = default;
-    SDGenerationParams& operator=(const SDGenerationParams& other)     = default;
-    SDGenerationParams(SDGenerationParams&& other) noexcept            = default;
-    SDGenerationParams& operator=(SDGenerationParams&& other) noexcept = default;
    ArgOptions get_options();
-    bool from_json_str(const std::string& json_str,
-                       const std::function<std::string(const std::string&)>& lora_path_resolver = {});
-    bool initialize_cache_params();
+    bool from_json_str(const std::string& json_str);
    void extract_and_remove_lora(const std::string& lora_model_dir);
    bool width_and_height_are_set() const;
    void set_width_and_height_if_unset(int w, int h);
    int get_resolved_width() const;
    int get_resolved_height() const;
-    bool resolve(const std::string& lora_model_dir, bool strict = false);
-    bool validate(SDMode mode);
-    bool resolve_and_validate(SDMode mode, const std::string& lora_model_dir, bool strict = false);
-    sd_img_gen_params_t to_sd_img_gen_params_t();
-    sd_vid_gen_params_t to_sd_vid_gen_params_t();
+    bool process_and_check(SDMode mode, const std::string& lora_model_dir);
    std::string to_string() const;
 };

--- a/examples/common/media_io.cpp
+++ b/examples/common/media_io.cpp
@ -569,7 +569,6 @@ void write_u32_le(FILE* f, uint32_t val) {
 void write_u16_le(FILE* f, uint16_t val) {
    fwrite(&val, 2, 1, f);
 }
-
 EncodedImageFormat encoded_image_format_from_path(const std::string& path) {
    std::string ext = fs::path(path).extension().string();
    std::transform(ext.begin(), ext.end(), ext.begin(), ::tolower);
@ -683,9 +682,8 @@ bool load_sd_image_from_file(sd_image_t* image,
    if (image->data == nullptr) {
        return false;
    }
-    image->width   = width;
-    image->height  = height;
-    image->channel = expected_channel;
+    image->width  = width;
+    image->height = height;
    return true;
 }

--- a/examples/common/resource_owners.hpp
+++ b/examples/common/resource_owners.hpp
@ -3,7 +3,6 @@

 #include <cstdio>
 #include <cstdlib>
-#include <cstring>
 #include <memory>
 #include <utility>
 #include <vector>
@ -48,40 +47,14 @@ using SDCtxPtr       = std::unique_ptr<sd_ctx_t, SDCtxDeleter>;
 using UpscalerCtxPtr = std::unique_ptr<upscaler_ctx_t, UpscalerCtxDeleter>;

 class SDImageOwner {
-private:
-    static sd_image_t copy_image(const sd_image_t& image) {
-        if (image.data == nullptr) {
-            return {image.width, image.height, image.channel, nullptr};
-        }
-
-        const size_t byte_count = static_cast<size_t>(image.width) * image.height * image.channel;
-        uint8_t* raw_copy       = static_cast<uint8_t*>(malloc(byte_count));
-        if (raw_copy == nullptr) {
-            return {0, 0, 0, nullptr};
-        }
-
-        std::memcpy(raw_copy, image.data, byte_count);
-        return {image.width, image.height, image.channel, raw_copy};
-    }
-
-    sd_image_t image_ = {0, 0, 0, nullptr};
-
 public:
    SDImageOwner() = default;
    explicit SDImageOwner(sd_image_t image)
        : image_(image) {
    }

-    SDImageOwner(const SDImageOwner& other)
-        : image_(copy_image(other.image_)) {
-    }
-
-    SDImageOwner& operator=(const SDImageOwner& other) {
-        if (this != &other) {
-            reset(copy_image(other.image_));
-        }
-        return *this;
-    }
+    SDImageOwner(const SDImageOwner&)            = delete;
+    SDImageOwner& operator=(const SDImageOwner&) = delete;

    SDImageOwner(SDImageOwner&& other) noexcept
        : image_(other.release()) {
@ -104,9 +77,8 @@ public:
            free(image_.data);
            image_.data = nullptr;
        }
-        image_.width   = 0;
-        image_.height  = 0;
-        image_.channel = 0;
+        image_.width  = 0;
+        image_.height = 0;
        return &image_;
    }

@ -130,12 +102,12 @@ public:
        }
        image_ = image;
    }
+
+private:
+    sd_image_t image_ = {0, 0, 0, nullptr};
 };

 class SDImageVec {
-private:
-    std::vector<sd_image_t> images_;
-
 public:
    SDImageVec() = default;

@ -192,10 +164,6 @@ public:
        return images_.empty();
    }

-    int count() const {
-        return static_cast<int>(images_.size());
-    }
-
    explicit operator bool() const {
        return !images_.empty();
    }
@ -231,6 +199,9 @@ public:
        }
        images_.clear();
    }
+
+private:
+    std::vector<sd_image_t> images_;
 };

 #endif  // __EXAMPLE_RESOURCE_OWNERS_H__
--- a/examples/server/CMakeLists.txt
+++ b/examples/server/CMakeLists.txt
@ -61,12 +61,6 @@ add_executable(${TARGET}
    ../common/log.cpp
    ../common/media_io.cpp
    main.cpp
-    runtime.cpp
-    async_jobs.cpp
-    routes_index.cpp
-    routes_openai.cpp
-    routes_sdapi.cpp
-    routes_sdcpp.cpp
 )

 if(HAVE_FRONTEND_BUILD)
--- a/examples/server/README.md
+++ b/examples/server/README.md
@ -1,33 +1,3 @@
-# Example
-
-The following example starts `sd-server` with a standalone diffusion model, VAE, and LLM text encoder:
-
-```
-.\bin\Release\sd-server.exe --diffusion-model  ..\models\diffusion_models\z_image_turbo_bf16.safetensors --vae ..\models\vae\ae.sft  --llm ..\models\text_encoders\qwen_3_4b.safetensors --diffusion-fa --offload-to-cpu -v --cfg-scale 1.0
-```
-
-What this example does:
-
-* `--diffusion-model` selects the standalone diffusion model
-* `--vae` selects the VAE decoder
-* `--llm` selects the text encoder / language model used by this pipeline
-* `--diffusion-fa` enables flash attention in the diffusion model
-* `--offload-to-cpu` reduces VRAM pressure by keeping weights in RAM when possible
-* `-v` enables verbose logging
-* `--cfg-scale 1.0` sets the default CFG scale for generation
-
-After the server starts successfully:
-
-* the web UI is available at `http://127.0.0.1:1234/`
-* the native async API is available under `/sdcpp/v1/...`
-* the compatibility APIs are available under `/v1/...` and `/sdapi/v1/...`
-
-If you want to use a different host or port, pass:
-
-```bash
--listen-ip <ip> --listen-port <port>
-```
-
 # Frontend

 ## Build with Frontend
@ -38,7 +8,7 @@ The server can optionally build the web frontend and embed it into the binary as

 Install the following tools:

-* **Node.js** ≥ 20
+* **Node.js** ≥ 22.18
  https://nodejs.org/

 * **pnpm** ≥ 10
@ -84,7 +54,7 @@ and embed the generated frontend into the server binary.

 ## Frontend Repository

-The web frontend is maintained in a **separate repository**, https://github.com/leejet/sdcpp-webui.
+The web frontend is maintained in a **separate repository**, https://github.com/leejet/stable-ui.

 If you want to modify the UI or frontend logic, please submit pull requests to the **frontend repository**.

--- a/examples/server/api.md
+++ b/examples/server/api.md
@ -1,794 +0,0 @@
-# stable-diffusion.cpp Server APIs
-
-This document describes the server-facing APIs exposed by `examples/server`.
-
-The server currently exposes three API families:
-
- `OpenAI API` under `/v1/...`
- `Stable Diffusion WebUI API` under `/sdapi/v1/...`
- `sdcpp API` under `/sdcpp/v1/...`
-
-The `sdcpp API` is the native API surface.
-Its request schema is also the canonical schema for `sd_cpp_extra_args`.
-
-Global LoRA rule:
-
- Server APIs do not parse LoRA tags embedded inside `prompt`.
- `<lora:...>` prompt syntax is intentionally unsupported in `OpenAI API`, `sdapi`, and `sdcpp API`.
- LoRA must be passed through structured API fields when the API supports it.
-
-## Overview
-
-### OpenAI API
-
-Compatibility API shaped like OpenAI image endpoints.
-
-Current generation-related endpoints include:
-
- `POST /v1/images/generations`
- `POST /v1/images/edits`
- `GET /v1/models`
-
-### Stable Diffusion WebUI API
-
-Compatibility API shaped like the AUTOMATIC1111 / WebUI endpoints.
-
-Current generation-related endpoints include:
-
- `POST /sdapi/v1/txt2img`
- `POST /sdapi/v1/img2img`
- `GET /sdapi/v1/loras`
- `GET /sdapi/v1/samplers`
- `GET /sdapi/v1/schedulers`
- `GET /sdapi/v1/sd-models`
- `GET /sdapi/v1/options`
-
-### sdcpp API
-
-Native async API for `stable-diffusion.cpp`.
-
-Current endpoints include:
-
- `GET /sdcpp/v1/capabilities`
- `POST /sdcpp/v1/img_gen`
- `GET /sdcpp/v1/jobs/{id}`
- `POST /sdcpp/v1/jobs/{id}/cancel`
- `POST /sdcpp/v1/vid_gen`
-
-`POST /sdcpp/v1/vid_gen` is currently exposed but returns `501 Not Implemented`.
-
-## `sd_cpp_extra_args`
-
-`sd_cpp_extra_args` is an extension mechanism for the compatibility APIs.
-
-Rules:
-
- Its JSON schema is the same schema used by the native `sdcpp API`.
- `OpenAI API` and `sdapi` can embed it inside `prompt`.
- `sdcpp API` does not need it, because the request body already uses the native schema directly.
-
-Embedding format:
-
-```text
-normal prompt text <sd_cpp_extra_args>{"sample_params":{"sample_steps":28}}</sd_cpp_extra_args>
-```
-
-Behavior:
-
- The server extracts the JSON block.
- The JSON block is parsed using the same field rules as the `sdcpp API`.
- The block is removed from the final prompt before generation.
-
-Intended use:
-
- extend `OpenAI API` requests with native `stable-diffusion.cpp` controls
- extend `sdapi` requests with native `stable-diffusion.cpp` controls
-
-Not intended use:
-
- do not use `sd_cpp_extra_args` with `/sdcpp/v1/*`
-
-## OpenAI API
-
-### Purpose
-
-This family exists for client compatibility.
-
-Use it when you want OpenAI-style request and response shapes.
-
-### Native Extension
-
-`OpenAI API` supports `sd_cpp_extra_args` embedded inside `prompt`.
-
-The embedded JSON follows the `sdcpp API` request schema.
-
-### Supported Fields
-
-#### `POST /v1/images/generations`
-
-Currently supported top-level request fields:
-
-| Field | Type | Notes |
-| --- | --- | --- |
-| `prompt` | `string` | Required |
-| `n` | `integer` | Number of images |
-| `size` | `string` | Format `WIDTHxHEIGHT` |
-| `output_format` | `string` | `png`, `jpeg`, or `webp` |
-| `output_compression` | `integer` | Range is clamped to `0..100` |
-
-Native extension fields:
-
- any `sdcpp API` fields embedded through `sd_cpp_extra_args` inside `prompt`
-
-Response fields:
-
-| Field | Type | Notes |
-| --- | --- | --- |
-| `created` | `integer` | Unix timestamp |
-| `output_format` | `string` | Final encoded image format |
-| `data` | `array<object>` | Generated image list |
-| `data[].b64_json` | `string` | Base64-encoded image bytes |
-
-#### `POST /v1/images/edits`
-
-Currently supported multipart form fields:
-
-| Field | Type | Notes |
-| --- | --- | --- |
-| `prompt` | `string` | Required |
-| `image[]` | `file[]` | Preferred image upload field |
-| `image` | `file` | Legacy single-image upload field |
-| `mask` | `file` | Optional mask image |
-| `n` | `integer` | Number of images |
-| `size` | `string` | Format `WIDTHxHEIGHT` |
-| `output_format` | `string` | `png` or `jpeg` |
-| `output_compression` | `integer` | Range is clamped to `0..100` |
-
-Native extension fields:
-
- any `sdcpp API` fields embedded through `sd_cpp_extra_args` inside `prompt`
-
-Response fields:
-
-| Field | Type | Notes |
-| --- | --- | --- |
-| `created` | `integer` | Unix timestamp |
-| `output_format` | `string` | Final encoded image format |
-| `data` | `array<object>` | Generated image list |
-| `data[].b64_json` | `string` | Base64-encoded image bytes |
-
-#### `GET /v1/models`
-
-Response fields:
-
-| Field | Type | Notes |
-| --- | --- | --- |
-| `data` | `array<object>` | Available local models |
-| `data[].id` | `string` | Currently fixed to `sd-cpp-local` |
-| `data[].object` | `string` | Currently fixed to `model` |
-| `data[].owned_by` | `string` | Currently fixed to `local` |
-
-### Output Options
-
-`OpenAI API` supports response serialization controls such as:
-
- `output_format`
- `output_compression`
-
-### Notes
-
- `OpenAI API` is synchronous from the HTTP client's perspective.
- Native async job polling is not exposed through this family.
- Prompt-embedded `<lora:...>` tags are intentionally unsupported.
-
-## Stable Diffusion WebUI API
-
-### Purpose
-
-This family exists for client compatibility with WebUI-style tools.
-
-Use it when you want `txt2img` / `img2img`-style endpoints and response shapes.
-
-### Native Extension
-
-`sdapi` supports `sd_cpp_extra_args` embedded inside `prompt`.
-
-The embedded JSON follows the `sdcpp API` request schema.
-
-This allows `sdapi` clients to use native `stable-diffusion.cpp` controls without changing the outer request format.
-
-### Supported Fields
-
-#### `POST /sdapi/v1/txt2img`
-
-Currently supported request fields:
-
-| Field | Type | Notes |
-| --- | --- | --- |
-| `prompt` | `string` | Required |
-| `negative_prompt` | `string` | Optional |
-| `width` | `integer` | Positive image width |
-| `height` | `integer` | Positive image height |
-| `steps` | `integer` | Sampling steps |
-| `cfg_scale` | `number` | Text CFG scale |
-| `seed` | `integer` | `-1` means random |
-| `batch_size` | `integer` | Number of images |
-| `clip_skip` | `integer` | Optional |
-| `sampler_name` | `string` | WebUI sampler name |
-| `scheduler` | `string` | Scheduler name |
-| `lora` | `array<object>` | Structured LoRA list |
-| `extra_images` | `array<string>` | Base64 or data URL images |
-
-Native extension fields:
-
- any `sdcpp API` fields embedded through `sd_cpp_extra_args` inside `prompt`
-
-Response fields:
-
-| Field | Type | Notes |
-| --- | --- | --- |
-| `images` | `array<string>` | Base64-encoded PNG images |
-| `parameters` | `object` | Echo of the parsed outer request body |
-| `info` | `string` | Currently empty string |
-
-#### `POST /sdapi/v1/img2img`
-
-Currently supported request fields:
-
-| Field | Type | Notes |
-| --- | --- | --- |
-| all currently supported `txt2img` fields | same as above | Reused |
-| `init_images` | `array<string>` | Base64 or data URL images |
-| `mask` | `string` | Base64 or data URL image |
-| `inpainting_mask_invert` | `integer` or `boolean` | Treated as invert flag |
-| `denoising_strength` | `number` | Clamped to `0.0..1.0` |
-
-Native extension fields:
-
- any `sdcpp API` fields embedded through `sd_cpp_extra_args` inside `prompt`
-
-Response fields:
-
-| Field | Type | Notes |
-| --- | --- | --- |
-| `images` | `array<string>` | Base64-encoded PNG images |
-| `parameters` | `object` | Echo of the parsed outer request body |
-| `info` | `string` | Currently empty string |
-
-#### Discovery / Compatibility Endpoints
-
-Currently exposed:
-
- `GET /sdapi/v1/loras`
- `GET /sdapi/v1/samplers`
- `GET /sdapi/v1/schedulers`
- `GET /sdapi/v1/sd-models`
- `GET /sdapi/v1/options`
-
-Response fields:
-
-`GET /sdapi/v1/loras`
-
-| Field | Type | Notes |
-| --- | --- | --- |
-| `[].name` | `string` | Display name derived from file stem |
-| `[].path` | `string` | Relative path under the configured LoRA directory |
-
-`GET /sdapi/v1/samplers`
-
-| Field | Type | Notes |
-| --- | --- | --- |
-| `[].name` | `string` | Sampler name |
-| `[].aliases` | `array<string>` | Currently contains the same single sampler name |
-| `[].options` | `object` | Currently empty object |
-
-`GET /sdapi/v1/schedulers`
-
-| Field | Type | Notes |
-| --- | --- | --- |
-| `[].name` | `string` | Scheduler name |
-| `[].label` | `string` | Same value as `name` |
-
-`GET /sdapi/v1/sd-models`
-
-| Field | Type | Notes |
-| --- | --- | --- |
-| `[].title` | `string` | Model stem |
-| `[].model_name` | `string` | Same value as `title` |
-| `[].filename` | `string` | Model filename |
-| `[].hash` | `string` | Placeholder compatibility value |
-| `[].sha256` | `string` | Placeholder compatibility value |
-| `[].config` | `null` | Currently always null |
-
-`GET /sdapi/v1/options`
-
-| Field | Type | Notes |
-| --- | --- | --- |
-| `samples_format` | `string` | Currently fixed to `png` |
-| `sd_model_checkpoint` | `string` | Model stem |
-
-### Notes
-
- `sdapi` is synchronous from the HTTP client's perspective.
- Prompt-embedded `<lora:...>` tags are intentionally unsupported.
-
-## sdcpp API
-
-### Purpose
-
-This is the native `stable-diffusion.cpp` API.
-
-Use it when you want:
-
- async job submission
- explicit native parameter control
- frontend-oriented capability discovery
-
-### Job Model
-
-All async generation requests create a job.
-
-Job states:
-
- `queued`
- `generating`
- `completed`
- `failed`
- `cancelled`
-
-Common job shape:
-
-```json
-{
-  "id": "job_01HTXYZABC",
-  "kind": "img_gen",
-  "status": "queued",
-  "created": 1775401200,
-  "started": null,
-  "completed": null,
-  "queue_position": 2,
-  "result": null,
-  "error": null
-}
-```
-
-Field types:
-
-| Field | Type |
-| --- | --- |
-| `id` | `string` |
-| `kind` | `string` |
-| `status` | `string` |
-| `created` | `integer` |
-| `started` | `integer \| null` |
-| `completed` | `integer \| null` |
-| `queue_position` | `integer` |
-| `result` | `object \| null` |
-| `error` | `object \| null` |
-
-### Endpoints
-
-#### `GET /sdcpp/v1/capabilities`
-
-Returns frontend-friendly capability metadata.
-
-Typical contents:
-
-| Field | Type |
-| --- | --- |
-| `model` | `object` |
-| `defaults` | `object` |
-| `loras` | `array<object>` |
-| `samplers` | `array<string>` |
-| `schedulers` | `array<string>` |
-| `output_formats` | `array<string>` |
-| `limits` | `object` |
-| `features` | `object` |
-
-Nested fields currently returned:
-
-`model`
-
-| Field | Type |
-| --- | --- |
-| `model.name` | `string` |
-| `model.stem` | `string` |
-| `model.path` | `string` |
-
-`defaults`
-
-| Field | Type |
-| --- | --- |
-| `defaults.prompt` | `string` |
-| `defaults.negative_prompt` | `string` |
-| `defaults.clip_skip` | `integer` |
-| `defaults.width` | `integer` |
-| `defaults.height` | `integer` |
-| `defaults.strength` | `number` |
-| `defaults.seed` | `integer` |
-| `defaults.batch_count` | `integer` |
-| `defaults.auto_resize_ref_image` | `boolean` |
-| `defaults.increase_ref_index` | `boolean` |
-| `defaults.control_strength` | `number` |
-| `defaults.sample_params` | `object` |
-| `defaults.sample_params.scheduler` | `string` |
-| `defaults.sample_params.sample_method` | `string` |
-| `defaults.sample_params.sample_steps` | `integer` |
-| `defaults.sample_params.eta` | `number \| null` |
-| `defaults.sample_params.shifted_timestep` | `integer` |
-| `defaults.sample_params.flow_shift` | `number \| null` |
-| `defaults.sample_params.guidance` | `object` |
-| `defaults.sample_params.guidance.txt_cfg` | `number` |
-| `defaults.sample_params.guidance.img_cfg` | `number \| null` |
-| `defaults.sample_params.guidance.distilled_guidance` | `number` |
-| `defaults.sample_params.guidance.slg` | `object` |
-| `defaults.sample_params.guidance.slg.layers` | `array<integer>` |
-| `defaults.sample_params.guidance.slg.layer_start` | `number` |
-| `defaults.sample_params.guidance.slg.layer_end` | `number` |
-| `defaults.sample_params.guidance.slg.scale` | `number` |
-| `defaults.vae_tiling_params` | `object` |
-| `defaults.vae_tiling_params.enabled` | `boolean` |
-| `defaults.vae_tiling_params.tile_size_x` | `integer` |
-| `defaults.vae_tiling_params.tile_size_y` | `integer` |
-| `defaults.vae_tiling_params.target_overlap` | `number` |
-| `defaults.vae_tiling_params.rel_size_x` | `number` |
-| `defaults.vae_tiling_params.rel_size_y` | `number` |
-| `defaults.cache_mode` | `string` |
-| `defaults.cache_option` | `string` |
-| `defaults.scm_mask` | `string` |
-| `defaults.scm_policy_dynamic` | `boolean` |
-| `defaults.output_format` | `string` |
-| `defaults.output_compression` | `integer` |
-
-`loras`
-
-| Field | Type |
-| --- | --- |
-| `loras[].name` | `string` |
-| `loras[].path` | `string` |
-
-`limits`
-
-| Field | Type |
-| --- | --- |
-| `limits.min_width` | `integer` |
-| `limits.max_width` | `integer` |
-| `limits.min_height` | `integer` |
-| `limits.max_height` | `integer` |
-| `limits.max_batch_count` | `integer` |
-| `limits.max_queue_size` | `integer` |
-
-`features`
-
-| Field | Type |
-| --- | --- |
-| `features.init_image` | `boolean` |
-| `features.mask_image` | `boolean` |
-| `features.control_image` | `boolean` |
-| `features.ref_images` | `boolean` |
-| `features.lora` | `boolean` |
-| `features.vae_tiling` | `boolean` |
-| `features.cache` | `boolean` |
-| `features.cancel_queued` | `boolean` |
-| `features.cancel_generating` | `boolean` |
-
-#### `POST /sdcpp/v1/img_gen`
-
-Submits an async image generation job.
-
-Successful submission returns `202 Accepted`.
-
-Example response:
-
-```json
-{
-  "id": "job_01HTXYZABC",
-  "kind": "img_gen",
-  "status": "queued",
-  "created": 1775401200,
-  "poll_url": "/sdcpp/v1/jobs/job_01HTXYZABC"
-}
-```
-
-Response fields:
-
-| Field | Type |
-| --- | --- |
-| `id` | `string` |
-| `kind` | `string` |
-| `status` | `string` |
-| `created` | `integer` |
-| `poll_url` | `string` |
-
-#### `GET /sdcpp/v1/jobs/{id}`
-
-Returns current job status.
-
-Typical status codes:
-
- `200 OK`
- `404 Not Found`
- `410 Gone`
-
-#### `POST /sdcpp/v1/jobs/{id}/cancel`
-
-Attempts to cancel an accepted job.
-
-Typical status codes:
-
- `200 OK`
- `404 Not Found`
- `409 Conflict`
- `410 Gone`
-
-### Canonical Request Schema
-
-The `sdcpp API` request body is the canonical native schema.
-
-Example:
-
-```json
-{
-  "prompt": "a cat sitting on a chair",
-  "negative_prompt": "",
-  "clip_skip": -1,
-  "width": 1024,
-  "height": 1024,
-  "strength": 0.75,
-  "seed": -1,
-  "batch_count": 1,
-  "auto_resize_ref_image": true,
-  "increase_ref_index": false,
-  "control_strength": 0.9,
-  "embed_image_metadata": true,
-
-  "init_image": null,
-  "ref_images": [],
-  "mask_image": null,
-  "control_image": null,
-
-  "sample_params": {
-    "scheduler": "discrete",
-    "sample_method": "euler_a",
-    "sample_steps": 28,
-    "eta": 1.0,
-    "shifted_timestep": 0,
-    "custom_sigmas": [],
-    "flow_shift": 0.0,
-    "guidance": {
-      "txt_cfg": 7.0,
-      "img_cfg": 7.0,
-      "distilled_guidance": 3.5,
-      "slg": {
-        "layers": [7, 8, 9],
-        "layer_start": 0.01,
-        "layer_end": 0.2,
-        "scale": 0.0
-      }
-    }
-  },
-
-  "lora": [],
-
-  "vae_tiling_params": {
-    "enabled": false,
-    "tile_size_x": 0,
-    "tile_size_y": 0,
-    "target_overlap": 0.5,
-    "rel_size_x": 0.0,
-    "rel_size_y": 0.0
-  },
-
-  "cache_mode": "disabled",
-  "cache_option": "",
-  "scm_mask": "",
-  "scm_policy_dynamic": true,
-
-  "output_format": "png",
-  "output_compression": 100
-}
-```
-
-### LoRA Rules
-
- The server only accepts explicit LoRA entries from the `lora` field.
- Prompt-embedded `<lora:...>` tags are intentionally unsupported.
- Clients should resolve LoRA usage through the structured `lora` array.
-
-### Image Encoding Rules
-
-Any image field accepts:
-
- a raw base64 string, or
- a data URL such as `data:image/png;base64,...`
-
-Channel expectations:
-
- `init_image`: 3 channels
- `ref_images[]`: 3 channels
- `control_image`: 3 channels
- `mask_image`: 1 channel
-
-If omitted or null:
-
- single-image fields map to an empty `sd_image_t`
- array fields map to `nullptr + count = 0`
-
-### Field Mapping Summary
-
-Top-level scalar fields:
-
-| Field | Type |
-| --- | --- |
-| `prompt` | `string` |
-| `negative_prompt` | `string` |
-| `clip_skip` | `integer` |
-| `width` | `integer` |
-| `height` | `integer` |
-| `strength` | `number` |
-| `seed` | `integer` |
-| `batch_count` | `integer` |
-| `auto_resize_ref_image` | `boolean` |
-| `increase_ref_index` | `boolean` |
-| `control_strength` | `number` |
-| `embed_image_metadata` | `boolean` |
-
-Image fields:
-
-| Field | Type |
-| --- | --- |
-| `init_image` | `string \| null` |
-| `ref_images` | `array<string>` |
-| `mask_image` | `string \| null` |
-| `control_image` | `string \| null` |
-
-LoRA fields:
-
-| Field | Type |
-| --- | --- |
-| `lora[].path` | `string` |
-| `lora[].multiplier` | `number` |
-| `lora[].is_high_noise` | `boolean` |
-
-Sampling fields:
-
-| Field | Type |
-| --- | --- |
-| `sample_params.scheduler` | `string` |
-| `sample_params.sample_method` | `string` |
-| `sample_params.sample_steps` | `integer` |
-| `sample_params.eta` | `number` |
-| `sample_params.shifted_timestep` | `integer` |
-| `sample_params.custom_sigmas` | `array<number>` |
-| `sample_params.flow_shift` | `number` |
-| `sample_params.guidance.txt_cfg` | `number` |
-| `sample_params.guidance.img_cfg` | `number` |
-| `sample_params.guidance.distilled_guidance` | `number` |
-| `sample_params.guidance.slg.layers` | `array<integer>` |
-| `sample_params.guidance.slg.layer_start` | `number` |
-| `sample_params.guidance.slg.layer_end` | `number` |
-| `sample_params.guidance.slg.scale` | `number` |
-
-Other native fields:
-
-| Field | Type |
-| --- | --- |
-| `vae_tiling_params` | `object` |
-| `cache_mode` | `string` |
-| `cache_option` | `string` |
-| `scm_mask` | `string` |
-| `scm_policy_dynamic` | `boolean` |
-
-HTTP-only output fields:
-
-| Field | Type |
-| --- | --- |
-| `output_format` | `string` |
-| `output_compression` | `integer` |
-
-### Optional Field Semantics
-
-Clients should preserve unset semantics for optional sampling fields.
-
-If a user has not explicitly provided one of these fields, the client should omit it instead of injecting a guessed fallback:
-
- `sample_params.scheduler`
- `sample_params.sample_method`
- `sample_params.eta`
- `sample_params.flow_shift`
- `sample_params.guidance.img_cfg`
-
-### Completion Result
-
-Example completed job:
-
-```json
-{
-  "id": "job_01HTXYZABC",
-  "kind": "img_gen",
-  "status": "completed",
-  "created": 1775401200,
-  "started": 1775401203,
-  "completed": 1775401215,
-  "queue_position": 0,
-  "result": {
-    "output_format": "png",
-    "images": [
-      {
-        "index": 0,
-        "b64_json": "iVBORw0KGgoAAA..."
-      }
-    ]
-  },
-  "error": null
-}
-```
-
-### Failure Result
-
-Example failed job:
-
-```json
-{
-  "id": "job_01HTXYZABC",
-  "kind": "img_gen",
-  "status": "failed",
-  "created": 1775401200,
-  "started": 1775401203,
-  "completed": 1775401204,
-  "queue_position": 0,
-  "result": null,
-  "error": {
-    "code": "generation_failed",
-    "message": "generate_image returned empty results"
-  }
-}
-```
-
-### Cancelled Result
-
-Example cancelled job:
-
-```json
-{
-  "id": "job_01HTXYZABC",
-  "kind": "img_gen",
-  "status": "cancelled",
-  "created": 1775401200,
-  "started": null,
-  "completed": 1775401202,
-  "queue_position": 0,
-  "result": null,
-  "error": {
-    "code": "cancelled",
-    "message": "job cancelled by client"
-  }
-}
-```
-
-### Validation and Retention
-
-Recommended behavior:
-
- malformed JSON returns `400`
- invalid image payloads return `400`
- invalid parameter structure returns `400`
- queue full returns `429` or `503`
- accepted runtime failures transition the job to `failed`
- unsupported in-progress cancellation may return `409`
-
-Recommended retention controls:
-
- pending job limit
- completed job TTL
- failed job TTL
-
-### Future `vid_gen`
-
-Future `vid_gen` should reuse the same async job model:
-
- `POST /sdcpp/v1/vid_gen`
- `GET /sdcpp/v1/jobs/{id}`
- `POST /sdcpp/v1/jobs/{id}/cancel`
-
-Its request body should mirror `sd_vid_gen_params_t` in the same way that `img_gen` mirrors `sd_img_gen_params_t`.
--- a/examples/server/async_jobs.cpp
+++ b/examples/server/async_jobs.cpp
@ -1,275 +0,0 @@
-// Extracted from main.cpp during server refactor.
-
-#include "async_jobs.h"
-
-#include <iomanip>
-#include <sstream>
-
-#include "common/log.h"
-#include "common/media_io.h"
-#include "common/resource_owners.hpp"
-
-const char* async_job_kind_name(AsyncJobKind kind) {
-    switch (kind) {
-        case AsyncJobKind::ImgGen:
-            return "img_gen";
-        case AsyncJobKind::VidGen:
-            return "vid_gen";
-        default:
-            return "img_gen";
-    }
-}
-
-const char* async_job_status_name(AsyncJobStatus status) {
-    switch (status) {
-        case AsyncJobStatus::Queued:
-            return "queued";
-        case AsyncJobStatus::Generating:
-            return "generating";
-        case AsyncJobStatus::Completed:
-            return "completed";
-        case AsyncJobStatus::Failed:
-            return "failed";
-        case AsyncJobStatus::Cancelled:
-            return "cancelled";
-        default:
-            return "failed";
-    }
-}
-
-void purge_expired_jobs(AsyncJobManager& manager) {
-    const int64_t now = unix_timestamp_now();
-
-    for (auto it = manager.expired_jobs.begin(); it != manager.expired_jobs.end();) {
-        if (it->second <= now) {
-            it = manager.expired_jobs.erase(it);
-        } else {
-            ++it;
-        }
-    }
-
-    for (auto it = manager.jobs.begin(); it != manager.jobs.end();) {
-        const auto& job = it->second;
-        if (job->completed_at == 0) {
-            ++it;
-            continue;
-        }
-
-        int64_t ttl_seconds = job->status == AsyncJobStatus::Completed
-                                  ? manager.completed_ttl_seconds
-                                  : manager.failed_ttl_seconds;
-        if (now - job->completed_at >= ttl_seconds) {
-            manager.expired_jobs[job->id] = now + std::max<int64_t>(ttl_seconds, 60);
-            it                            = manager.jobs.erase(it);
-        } else {
-            ++it;
-        }
-    }
-}
-
-size_t count_pending_jobs(const AsyncJobManager& manager) {
-    size_t pending = 0;
-    for (const auto& entry : manager.jobs) {
-        if (entry.second->status == AsyncJobStatus::Queued ||
-            entry.second->status == AsyncJobStatus::Generating) {
-            ++pending;
-        }
-    }
-    return pending;
-}
-
-std::string make_async_job_id(AsyncJobManager& manager) {
-    std::ostringstream oss;
-    oss << "job_" << std::hex << unix_timestamp_now() << "_" << std::setw(8)
-        << std::setfill('0') << manager.next_id++;
-    return oss.str();
-}
-
-bool cancel_queued_job(AsyncJobManager& manager, AsyncGenerationJob& job) {
-    auto new_end = std::remove(manager.queue.begin(), manager.queue.end(), job.id);
-    if (new_end == manager.queue.end()) {
-        return false;
-    }
-
-    manager.queue.erase(new_end, manager.queue.end());
-    job.status       = AsyncJobStatus::Cancelled;
-    job.completed_at = unix_timestamp_now();
-    job.result_images_b64.clear();
-    job.error_code    = "cancelled";
-    job.error_message = "job cancelled by client";
-    return true;
-}
-
-json make_async_job_json(const AsyncJobManager& manager, const AsyncGenerationJob& job) {
-    json result;
-    result["id"]             = job.id;
-    result["kind"]           = async_job_kind_name(job.kind);
-    result["status"]         = async_job_status_name(job.status);
-    result["created"]        = job.created_at;
-    result["started"]        = job.started_at == 0 ? json(nullptr) : json(job.started_at);
-    result["completed"]      = job.completed_at == 0 ? json(nullptr) : json(job.completed_at);
-    result["queue_position"] = 0;
-
-    if (job.status == AsyncJobStatus::Queued) {
-        size_t position = 1;
-        for (const auto& queued_id : manager.queue) {
-            if (queued_id == job.id) {
-                result["queue_position"] = position;
-                break;
-            }
-            ++position;
-        }
-    }
-
-    if (job.status == AsyncJobStatus::Completed) {
-        json images = json::array();
-        for (size_t i = 0; i < job.result_images_b64.size(); ++i) {
-            images.push_back({{"index", i}, {"b64_json", job.result_images_b64[i]}});
-        }
-        result["result"] = {
-            {"output_format", job.img_gen.output_format},
-            {"images", images},
-        };
-        result["error"] = nullptr;
-    } else if (job.status == AsyncJobStatus::Failed ||
-               job.status == AsyncJobStatus::Cancelled) {
-        result["result"] = nullptr;
-        result["error"]  = {
-             {"code",
-             job.error_code.empty()
-                  ? (job.status == AsyncJobStatus::Cancelled ? "cancelled" : "generation_failed")
-                  : job.error_code},
-             {"message", job.error_message},
-        };
-    } else {
-        result["result"] = nullptr;
-        result["error"]  = nullptr;
-    }
-
-    return result;
-}
-
-bool execute_img_gen_job(ServerRuntime& runtime,
-                         AsyncGenerationJob& job,
-                         std::vector<std::string>& output_images,
-                         std::string& error_message) {
-    sd_img_gen_params_t params = job.img_gen.to_sd_img_gen_params_t();
-
-    SDImageVec results;
-    int num_results = 0;
-
-    {
-        std::lock_guard<std::mutex> lock(*runtime.sd_ctx_mutex);
-        sd_image_t* raw_results = generate_image(runtime.sd_ctx, &params);
-        num_results             = params.batch_count;
-        results.adopt(raw_results, num_results);
-    }
-
-    if (results.empty() || num_results <= 0) {
-        error_message = "generate_image returned no results";
-        return false;
-    }
-
-    EncodedImageFormat encoded_format = EncodedImageFormat::PNG;
-    if (job.img_gen.output_format == "jpeg") {
-        encoded_format = EncodedImageFormat::JPEG;
-    } else if (job.img_gen.output_format == "webp") {
-        encoded_format = EncodedImageFormat::WEBP;
-    }
-
-    for (int i = 0; i < num_results; ++i) {
-        if (results[i].data == nullptr) {
-            continue;
-        }
-
-        const std::string metadata = job.img_gen.gen_params.embed_image_metadata
-                                         ? get_image_params(*runtime.ctx_params,
-                                                            job.img_gen.gen_params,
-                                                            job.img_gen.gen_params.seed + i)
-                                         : "";
-        auto image_bytes           = encode_image_to_vector(encoded_format,
-                                                            results[i].data,
-                                                            results[i].width,
-                                                            results[i].height,
-                                                            results[i].channel,
-                                                            metadata,
-                                                            job.img_gen.output_compression);
-        if (image_bytes.empty()) {
-            continue;
-        }
-        output_images.push_back(base64_encode(image_bytes));
-    }
-
-    if (output_images.empty()) {
-        error_message = "generate_image returned empty encoded outputs";
-        return false;
-    }
-
-    return true;
-}
-
-void async_job_worker(ServerRuntime& runtime) {
-    AsyncJobManager& manager = *runtime.async_job_manager;
-
-    while (true) {
-        std::shared_ptr<AsyncGenerationJob> job;
-        {
-            std::unique_lock<std::mutex> lock(manager.mutex);
-            manager.cv.wait(lock, [&]() { return manager.stop || !manager.queue.empty(); });
-
-            if (manager.stop && manager.queue.empty()) {
-                break;
-            }
-
-            purge_expired_jobs(manager);
-            if (manager.queue.empty()) {
-                continue;
-            }
-
-            const std::string job_id = manager.queue.front();
-            manager.queue.pop_front();
-
-            auto it = manager.jobs.find(job_id);
-            if (it == manager.jobs.end()) {
-                continue;
-            }
-
-            job             = it->second;
-            job->status     = AsyncJobStatus::Generating;
-            job->started_at = unix_timestamp_now();
-        }
-
-        std::vector<std::string> output_images;
-        std::string error_message;
-        bool ok = false;
-
-        if (job->kind == AsyncJobKind::ImgGen) {
-            ok = execute_img_gen_job(runtime, *job, output_images, error_message);
-        } else {
-            error_message = "unsupported job kind";
-        }
-
-        {
-            std::lock_guard<std::mutex> lock(manager.mutex);
-            auto it = manager.jobs.find(job->id);
-            if (it == manager.jobs.end()) {
-                continue;
-            }
-
-            job->completed_at = unix_timestamp_now();
-            if (ok) {
-                job->status            = AsyncJobStatus::Completed;
-                job->result_images_b64 = std::move(output_images);
-                job->error_code.clear();
-                job->error_message.clear();
-            } else {
-                job->status        = AsyncJobStatus::Failed;
-                job->error_code    = "generation_failed";
-                job->error_message = error_message.empty() ? "unknown generation error" : error_message;
-                job->result_images_b64.clear();
-            }
-
-            purge_expired_jobs(manager);
-        }
-    }
-}
--- a/examples/server/async_jobs.h
+++ b/examples/server/async_jobs.h
@ -1,66 +0,0 @@
-#pragma once
-
-#include <condition_variable>
-#include <cstdint>
-#include <deque>
-#include <memory>
-#include <mutex>
-#include <string>
-#include <unordered_map>
-#include <vector>
-
-
-#include "runtime.h"
-
-enum class AsyncJobKind {
-    ImgGen,
-    VidGen,
-};
-
-enum class AsyncJobStatus {
-    Queued,
-    Generating,
-    Completed,
-    Failed,
-    Cancelled,
-};
-
-const char* async_job_kind_name(AsyncJobKind kind);
-const char* async_job_status_name(AsyncJobStatus status);
-
-struct AsyncGenerationJob {
-    std::string id;
-    AsyncJobKind kind     = AsyncJobKind::ImgGen;
-    AsyncJobStatus status = AsyncJobStatus::Queued;
-    int64_t created_at    = unix_timestamp_now();
-    int64_t started_at    = 0;
-    int64_t completed_at  = 0;
-    ImgGenJobRequest img_gen;
-    std::vector<std::string> result_images_b64;
-    std::string error_code;
-    std::string error_message;
-};
-
-struct AsyncJobManager {
-    std::mutex mutex;
-    std::condition_variable cv;
-    std::unordered_map<std::string, std::shared_ptr<AsyncGenerationJob>> jobs;
-    std::unordered_map<std::string, int64_t> expired_jobs;
-    std::deque<std::string> queue;
-    uint64_t next_id              = 0;
-    bool stop                     = false;
-    size_t max_pending_jobs       = 64;
-    int64_t completed_ttl_seconds = 600;
-    int64_t failed_ttl_seconds    = 600;
-};
-
-void purge_expired_jobs(AsyncJobManager& manager);
-size_t count_pending_jobs(const AsyncJobManager& manager);
-std::string make_async_job_id(AsyncJobManager& manager);
-bool cancel_queued_job(AsyncJobManager& manager, AsyncGenerationJob& job);
-json make_async_job_json(const AsyncJobManager& manager, const AsyncGenerationJob& job);
-bool execute_img_gen_job(ServerRuntime& runtime,
-                         AsyncGenerationJob& job,
-                         std::vector<std::string>& output_images,
-                         std::string& error_message);
-void async_job_worker(ServerRuntime& runtime);
--- a/examples/server/frontend
+++ b/examples/server/frontend
@ -1 +1 @@
-Subproject commit 740475a7a6794dc07fb23e8ec5dc56e7e80aa8c1
+Subproject commit 1a34176cd6d39ad3a226b2b69047e71f6797f6bc
--- a/examples/server/main.cpp
+++ b/examples/server/main.cpp
--- a/examples/server/routes.h
+++ b/examples/server/routes.h
@ -1,11 +0,0 @@
-#pragma once
-
-#include <string>
-
-#include "httplib.h"
-#include "runtime.h"
-
-void register_index_endpoints(httplib::Server& svr, const SDSvrParams& svr_params, const std::string& index_html);
-void register_openai_api_endpoints(httplib::Server& svr, ServerRuntime& rt);
-void register_sdapi_endpoints(httplib::Server& svr, ServerRuntime& rt);
-void register_sdcpp_api_endpoints(httplib::Server& svr, ServerRuntime& rt);
--- a/examples/server/routes_index.cpp
+++ b/examples/server/routes_index.cpp
@ -1,22 +0,0 @@
-#include "routes.h"
-
-#include <fstream>
-#include <iterator>
-
-void register_index_endpoints(httplib::Server& svr, const SDSvrParams& svr_params, const std::string& index_html) {
-    const std::string serve_html_path = svr_params.serve_html_path;
-    svr.Get("/", [serve_html_path, index_html](const httplib::Request&, httplib::Response& res) {
-        if (!serve_html_path.empty()) {
-            std::ifstream file(serve_html_path);
-            if (file) {
-                std::string content((std::istreambuf_iterator<char>(file)), std::istreambuf_iterator<char>());
-                res.set_content(content, "text/html");
-            } else {
-                res.status = 500;
-                res.set_content("Error: Unable to read HTML file", "text/plain");
-            }
-        } else {
-            res.set_content(index_html, "text/html");
-        }
-    });
-}
--- a/examples/server/routes_openai.cpp
+++ b/examples/server/routes_openai.cpp
@ -1,376 +0,0 @@
-#include "routes.h"
-
-#include <algorithm>
-#include <ctime>
-#include <regex>
-
-#include "common/common.h"
-#include "common/media_io.h"
-#include "common/resource_owners.hpp"
-
-static std::string extract_and_remove_sd_cpp_extra_args(std::string& text) {
-    std::regex re("<sd_cpp_extra_args>(.*?)</sd_cpp_extra_args>");
-    std::smatch match;
-
-    std::string extracted;
-    if (std::regex_search(text, match, re)) {
-        extracted = match[1].str();
-        text      = std::regex_replace(text, re, "");
-    }
-    return extracted;
-}
-
-static bool build_openai_generation_request(const httplib::Request& req,
-                                            ServerRuntime& runtime,
-                                            ImgGenJobRequest& request,
-                                            std::string& error_message) {
-    if (req.body.empty()) {
-        error_message = "empty body";
-        return false;
-    }
-
-    json j                    = json::parse(req.body);
-    std::string prompt        = j.value("prompt", "");
-    int n                     = std::max(1, j.value("n", 1));
-    std::string size          = j.value("size", "");
-    std::string output_format = j.value("output_format", "png");
-    int output_compression    = j.value("output_compression", 100);
-    int width                 = runtime.default_gen_params->width > 0 ? runtime.default_gen_params->width : 512;
-    int height                = runtime.default_gen_params->width > 0 ? runtime.default_gen_params->height : 512;
-    if (!size.empty()) {
-        auto pos = size.find('x');
-        if (pos != std::string::npos) {
-            try {
-                width  = std::stoi(size.substr(0, pos));
-                height = std::stoi(size.substr(pos + 1));
-            } catch (...) {
-            }
-        }
-    }
-
-    if (prompt.empty()) {
-        error_message = "prompt required";
-        return false;
-    }
-
-    request.gen_params = *runtime.default_gen_params;
-    if (!assign_output_options(request, output_format, output_compression, true, error_message)) {
-        return false;
-    }
-
-    request.gen_params.prompt      = prompt;
-    request.gen_params.width       = width;
-    request.gen_params.height      = height;
-    request.gen_params.batch_count = n;
-
-    std::string sd_cpp_extra_args_str = extract_and_remove_sd_cpp_extra_args(request.gen_params.prompt);
-    if (!sd_cpp_extra_args_str.empty() && !request.gen_params.from_json_str(sd_cpp_extra_args_str)) {
-        error_message = "invalid sd_cpp_extra_args";
-        return false;
-    }
-
-    // Intentionally disable prompt-embedded LoRA tag parsing for server APIs.
-    if (!request.gen_params.resolve_and_validate(IMG_GEN, "", true)) {
-        error_message = "invalid params";
-        return false;
-    }
-    return true;
-}
-
-static bool build_openai_edit_request(const httplib::Request& req,
-                                      ServerRuntime& runtime,
-                                      ImgGenJobRequest& request,
-                                      std::string& error_message) {
-    if (!req.is_multipart_form_data()) {
-        error_message = "Content-Type must be multipart/form-data";
-        return false;
-    }
-
-    std::string prompt = req.form.get_field("prompt");
-    if (prompt.empty()) {
-        error_message = "prompt required";
-        return false;
-    }
-
-    size_t image_count    = req.form.get_file_count("image[]");
-    bool has_legacy_image = req.form.has_file("image");
-    if (image_count == 0 && !has_legacy_image) {
-        error_message = "at least one image[] required";
-        return false;
-    }
-
-    std::vector<std::vector<uint8_t>> images_bytes;
-    for (size_t i = 0; i < image_count; ++i) {
-        auto file = req.form.get_file("image[]", i);
-        images_bytes.emplace_back(file.content.begin(), file.content.end());
-    }
-    if (image_count == 0 && has_legacy_image) {
-        auto file = req.form.get_file("image");
-        images_bytes.emplace_back(file.content.begin(), file.content.end());
-    }
-
-    std::vector<uint8_t> mask_bytes;
-    if (req.form.has_file("mask")) {
-        auto file = req.form.get_file("mask");
-        mask_bytes.assign(file.content.begin(), file.content.end());
-    }
-
-    int n = 1;
-    if (req.form.has_field("n")) {
-        try {
-            n = std::stoi(req.form.get_field("n"));
-        } catch (...) {
-        }
-    }
-
-    std::string size = req.form.get_field("size");
-    int width        = -1;
-    int height       = -1;
-    if (!size.empty()) {
-        auto pos = size.find('x');
-        if (pos != std::string::npos) {
-            try {
-                width  = std::stoi(size.substr(0, pos));
-                height = std::stoi(size.substr(pos + 1));
-            } catch (...) {
-            }
-        }
-    }
-
-    std::string output_format = req.form.has_field("output_format")
-                                    ? req.form.get_field("output_format")
-                                    : "png";
-
-    int output_compression = 100;
-    try {
-        output_compression = std::stoi(req.form.get_field("output_compression"));
-    } catch (...) {
-    }
-
-    request.gen_params = *runtime.default_gen_params;
-    if (!assign_output_options(request, output_format, output_compression, false, error_message)) {
-        return false;
-    }
-
-    request.gen_params.prompt      = prompt;
-    request.gen_params.width       = width;
-    request.gen_params.height      = height;
-    request.gen_params.batch_count = n;
-
-    for (auto& bytes : images_bytes) {
-        int img_w           = 0;
-        int img_h           = 0;
-        uint8_t* raw_pixels = load_image_from_memory(
-            reinterpret_cast<const char*>(bytes.data()),
-            static_cast<int>(bytes.size()),
-            img_w, img_h,
-            width, height, 3);
-        if (raw_pixels == nullptr) {
-            continue;
-        }
-
-        SDImageOwner image_owner({(uint32_t)img_w, (uint32_t)img_h, 3, raw_pixels});
-        request.gen_params.set_width_and_height_if_unset(image_owner.get().width, image_owner.get().height);
-        request.gen_params.ref_images.push_back(std::move(image_owner));
-    }
-
-    if (!request.gen_params.ref_images.empty()) {
-        request.gen_params.init_image = request.gen_params.ref_images.front();
-    }
-
-    if (!mask_bytes.empty()) {
-        int expected_width  = 0;
-        int expected_height = 0;
-        if (request.gen_params.width_and_height_are_set()) {
-            expected_width  = request.gen_params.width;
-            expected_height = request.gen_params.height;
-        }
-        int mask_w = 0;
-        int mask_h = 0;
-
-        uint8_t* mask_raw = load_image_from_memory(
-            reinterpret_cast<const char*>(mask_bytes.data()),
-            static_cast<int>(mask_bytes.size()),
-            mask_w, mask_h,
-            expected_width, expected_height, 1);
-        request.gen_params.mask_image.reset({(uint32_t)mask_w, (uint32_t)mask_h, 1, mask_raw});
-        const sd_image_t& mask_image = request.gen_params.mask_image.get();
-        request.gen_params.set_width_and_height_if_unset(mask_image.width, mask_image.height);
-    } else {
-        request.gen_params.mask_image.reset({
-            (uint32_t)request.gen_params.get_resolved_width(),
-            (uint32_t)request.gen_params.get_resolved_height(),
-            1,
-            nullptr,
-        });
-    }
-
-    std::string sd_cpp_extra_args_str = extract_and_remove_sd_cpp_extra_args(request.gen_params.prompt);
-    if (!sd_cpp_extra_args_str.empty() && !request.gen_params.from_json_str(sd_cpp_extra_args_str)) {
-        error_message = "invalid sd_cpp_extra_args";
-        return false;
-    }
-
-    // Intentionally disable prompt-embedded LoRA tag parsing for server APIs.
-    if (!request.gen_params.resolve_and_validate(IMG_GEN, "", true)) {
-        error_message = "invalid params";
-        return false;
-    }
-
-    return true;
-}
-
-static bool execute_sync_img_gen_request(ServerRuntime& runtime,
-                                         ImgGenJobRequest& request,
-                                         SDImageVec& results,
-                                         std::string& error_message) {
-    sd_img_gen_params_t img_gen_params = request.to_sd_img_gen_params_t();
-    int num_results                    = 0;
-
-    {
-        std::lock_guard<std::mutex> lock(*runtime.sd_ctx_mutex);
-        sd_image_t* raw_results = generate_image(runtime.sd_ctx, &img_gen_params);
-        num_results             = request.gen_params.batch_count;
-        results.adopt(raw_results, num_results);
-    }
-
-    if (results.empty()) {
-        error_message = "generate_image returned no results";
-        return false;
-    }
-    return true;
-}
-
-void register_openai_api_endpoints(httplib::Server& svr, ServerRuntime& rt) {
-    ServerRuntime* runtime = &rt;
-
-    svr.Get("/v1/models", [runtime](const httplib::Request&, httplib::Response& res) {
-        json r;
-        r["data"] = json::array();
-        r["data"].push_back({{"id", "sd-cpp-local"}, {"object", "model"}, {"owned_by", "local"}});
-        res.set_content(r.dump(), "application/json");
-    });
-
-    svr.Post("/v1/images/generations", [runtime](const httplib::Request& req, httplib::Response& res) {
-        try {
-            ImgGenJobRequest request;
-            std::string error_message;
-            if (!build_openai_generation_request(req, *runtime, request, error_message)) {
-                res.status = 400;
-                res.set_content(json({{"error", error_message}}).dump(), "application/json");
-                return;
-            }
-
-            LOG_DEBUG("%s\n", request.gen_params.to_string().c_str());
-
-            SDImageVec results;
-            if (!execute_sync_img_gen_request(*runtime, request, results, error_message)) {
-                res.status = 500;
-                res.set_content(json({{"error", error_message}}).dump(), "application/json");
-                return;
-            }
-
-            json out;
-            out["created"]       = static_cast<long long>(std::time(nullptr));
-            out["data"]          = json::array();
-            out["output_format"] = request.output_format;
-
-            for (int i = 0; i < request.gen_params.batch_count; ++i) {
-                if (results[i].data == nullptr) {
-                    continue;
-                }
-                std::string params = request.gen_params.embed_image_metadata
-                                         ? get_image_params(*runtime->ctx_params,
-                                                            request.gen_params,
-                                                            request.gen_params.seed + i)
-                                         : "";
-                auto image_bytes   = encode_image_to_vector(request.output_format == "jpeg"
-                                                                ? EncodedImageFormat::JPEG
-                                                            : request.output_format == "webp"
-                                                                ? EncodedImageFormat::WEBP
-                                                                : EncodedImageFormat::PNG,
-                                                          results[i].data,
-                                                          results[i].width,
-                                                          results[i].height,
-                                                          results[i].channel,
-                                                          params,
-                                                          request.output_compression);
-                if (image_bytes.empty()) {
-                    LOG_ERROR("write image to mem failed");
-                    continue;
-                }
-
-                json item;
-                item["b64_json"] = base64_encode(image_bytes);
-                out["data"].push_back(item);
-            }
-
-            res.set_content(out.dump(), "application/json");
-            res.status = 200;
-
-        } catch (const std::exception& e) {
-            res.status = 500;
-            json err;
-            err["error"]   = "server_error";
-            err["message"] = e.what();
-            res.set_content(err.dump(), "application/json");
-        }
-    });
-
-    svr.Post("/v1/images/edits", [runtime](const httplib::Request& req, httplib::Response& res) {
-        try {
-            ImgGenJobRequest request;
-            std::string error_message;
-            if (!build_openai_edit_request(req, *runtime, request, error_message)) {
-                res.status = 400;
-                res.set_content(json({{"error", error_message}}).dump(), "application/json");
-                return;
-            }
-
-            LOG_DEBUG("%s\n", request.gen_params.to_string().c_str());
-
-            SDImageVec results;
-            if (!execute_sync_img_gen_request(*runtime, request, results, error_message)) {
-                res.status = 500;
-                res.set_content(json({{"error", error_message}}).dump(), "application/json");
-                return;
-            }
-
-            json out;
-            out["created"]       = static_cast<long long>(std::time(nullptr));
-            out["data"]          = json::array();
-            out["output_format"] = request.output_format;
-
-            for (int i = 0; i < request.gen_params.batch_count; ++i) {
-                if (results[i].data == nullptr) {
-                    continue;
-                }
-                std::string params = request.gen_params.embed_image_metadata
-                                         ? get_image_params(*runtime->ctx_params,
-                                                            request.gen_params,
-                                                            request.gen_params.seed + i)
-                                         : "";
-                auto image_bytes   = encode_image_to_vector(request.output_format == "jpeg" ? EncodedImageFormat::JPEG : EncodedImageFormat::PNG,
-                                                          results[i].data,
-                                                          results[i].width,
-                                                          results[i].height,
-                                                          results[i].channel,
-                                                          params,
-                                                          request.output_compression);
-                json item;
-                item["b64_json"] = base64_encode(image_bytes);
-                out["data"].push_back(item);
-            }
-
-            res.set_content(out.dump(), "application/json");
-            res.status = 200;
-
-        } catch (const std::exception& e) {
-            res.status = 500;
-            json err;
-            err["error"]   = "server_error";
-            err["message"] = e.what();
-            res.set_content(err.dump(), "application/json");
-        }
-    });
-}
--- a/examples/server/routes_sdapi.cpp
+++ b/examples/server/routes_sdapi.cpp
@ -1,399 +0,0 @@
-#include "routes.h"
-
-#include <algorithm>
-#include <cstring>
-#include <regex>
-#include <string_view>
-#include <unordered_map>
-
-#include "common/common.h"
-#include "common/media_io.h"
-#include "common/resource_owners.hpp"
-
-namespace fs = std::filesystem;
-
-static std::string extract_and_remove_sd_cpp_extra_args(std::string& text) {
-    std::regex re("<sd_cpp_extra_args>(.*?)</sd_cpp_extra_args>");
-    std::smatch match;
-
-    std::string extracted;
-    if (std::regex_search(text, match, re)) {
-        extracted = match[1].str();
-        text      = std::regex_replace(text, re, "");
-    }
-    return extracted;
-}
-
-static fs::path resolve_display_model_path(const ServerRuntime& runtime) {
-    const auto& ctx = *runtime.ctx_params;
-    if (!ctx.model_path.empty()) {
-        return fs::path(ctx.model_path);
-    }
-    if (!ctx.diffusion_model_path.empty()) {
-        return fs::path(ctx.diffusion_model_path);
-    }
-    return {};
-}
-
-static enum sample_method_t get_sdapi_sample_method(std::string name) {
-    enum sample_method_t result = str_to_sample_method(name.c_str());
-    if (result != SAMPLE_METHOD_COUNT) {
-        return result;
-    }
-
-    std::transform(name.begin(), name.end(), name.begin(),
-                   [](unsigned char c) { return static_cast<char>(std::tolower(c)); });
-    static const std::unordered_map<std::string_view, sample_method_t> hardcoded{
-        {"euler a", EULER_A_SAMPLE_METHOD},
-        {"k_euler_a", EULER_A_SAMPLE_METHOD},
-        {"euler", EULER_SAMPLE_METHOD},
-        {"k_euler", EULER_SAMPLE_METHOD},
-        {"heun", HEUN_SAMPLE_METHOD},
-        {"k_heun", HEUN_SAMPLE_METHOD},
-        {"dpm2", DPM2_SAMPLE_METHOD},
-        {"k_dpm_2", DPM2_SAMPLE_METHOD},
-        {"lcm", LCM_SAMPLE_METHOD},
-        {"ddim", DDIM_TRAILING_SAMPLE_METHOD},
-        {"dpm++ 2m", DPMPP2M_SAMPLE_METHOD},
-        {"k_dpmpp_2m", DPMPP2M_SAMPLE_METHOD},
-        {"res multistep", RES_MULTISTEP_SAMPLE_METHOD},
-        {"k_res_multistep", RES_MULTISTEP_SAMPLE_METHOD},
-        {"res 2s", RES_2S_SAMPLE_METHOD},
-        {"k_res_2s", RES_2S_SAMPLE_METHOD},
-    };
-    auto it = hardcoded.find(name);
-    return it != hardcoded.end() ? it->second : SAMPLE_METHOD_COUNT;
-}
-
-static void assign_solid_mask(SDImageOwner& mask_owner, int width, int height) {
-    const size_t pixel_count = static_cast<size_t>(width) * static_cast<size_t>(height);
-    uint8_t* raw_mask        = static_cast<uint8_t*>(malloc(pixel_count));
-    if (raw_mask == nullptr) {
-        mask_owner.reset({0, 0, 1, nullptr});
-        return;
-    }
-    std::memset(raw_mask, 255, pixel_count);
-    mask_owner.reset({(uint32_t)width, (uint32_t)height, 1, raw_mask});
-}
-
-static bool build_sdapi_img_gen_request(const json& j,
-                                        ServerRuntime& runtime,
-                                        bool img2img,
-                                        ImgGenJobRequest& request,
-                                        std::string& error_message) {
-    std::string prompt          = j.value("prompt", "");
-    std::string negative_prompt = j.value("negative_prompt", "");
-    int width                   = j.value("width", 512);
-    int height                  = j.value("height", 512);
-    int steps                   = j.value("steps", runtime.default_gen_params->sample_params.sample_steps);
-    float cfg_scale             = j.value("cfg_scale", runtime.default_gen_params->sample_params.guidance.txt_cfg);
-    int64_t seed                = j.value("seed", -1);
-    int batch_size              = j.value("batch_size", 1);
-    int clip_skip               = j.value("clip_skip", -1);
-    std::string sampler_name    = j.value("sampler_name", "");
-    std::string scheduler_name  = j.value("scheduler", "");
-
-    if (width <= 0 || height <= 0) {
-        error_message = "width and height must be positive";
-        return false;
-    }
-
-    if (prompt.empty()) {
-        error_message = "prompt required";
-        return false;
-    }
-
-    request.gen_params = *runtime.default_gen_params;
-
-    request.gen_params.prompt                         = prompt;
-    request.gen_params.negative_prompt                = negative_prompt;
-    request.gen_params.seed                           = seed;
-    request.gen_params.sample_params.sample_steps     = steps;
-    request.gen_params.batch_count                    = batch_size;
-    request.gen_params.sample_params.guidance.txt_cfg = cfg_scale;
-    request.gen_params.width                          = j.value("width", -1);
-    request.gen_params.height                         = j.value("height", -1);
-
-    std::string sd_cpp_extra_args_str = extract_and_remove_sd_cpp_extra_args(request.gen_params.prompt);
-    if (!sd_cpp_extra_args_str.empty() && !request.gen_params.from_json_str(sd_cpp_extra_args_str)) {
-        error_message = "invalid sd_cpp_extra_args";
-        return false;
-    }
-
-    if (clip_skip > 0) {
-        request.gen_params.clip_skip = clip_skip;
-    }
-
-    enum sample_method_t sample_method = get_sdapi_sample_method(sampler_name);
-    if (sample_method != SAMPLE_METHOD_COUNT) {
-        request.gen_params.sample_params.sample_method = sample_method;
-    }
-
-    enum scheduler_t scheduler = str_to_scheduler(scheduler_name.c_str());
-    if (scheduler != SCHEDULER_COUNT) {
-        request.gen_params.sample_params.scheduler = scheduler;
-    }
-
-    if (j.contains("lora") && j["lora"].is_array()) {
-        request.gen_params.lora_map.clear();
-        request.gen_params.high_noise_lora_map.clear();
-
-        for (const auto& item : j["lora"]) {
-            if (!item.is_object()) {
-                continue;
-            }
-
-            std::string path   = item.value("path", "");
-            float multiplier   = item.value("multiplier", 1.0f);
-            bool is_high_noise = item.value("is_high_noise", false);
-
-            if (path.empty()) {
-                error_message = "lora.path required";
-                return false;
-            }
-
-            std::string fullpath = get_lora_full_path(runtime, path);
-            if (fullpath.empty()) {
-                error_message = "invalid lora path: " + path;
-                return false;
-            }
-
-            if (is_high_noise) {
-                request.gen_params.high_noise_lora_map[fullpath] += multiplier;
-            } else {
-                request.gen_params.lora_map[fullpath] += multiplier;
-            }
-        }
-    }
-
-    if (img2img) {
-        const int expected_width  = request.gen_params.width_and_height_are_set() ? request.gen_params.width : 0;
-        const int expected_height = request.gen_params.width_and_height_are_set() ? request.gen_params.height : 0;
-
-        if (j.contains("init_images") && j["init_images"].is_array() && !j["init_images"].empty()) {
-            if (decode_base64_image(j["init_images"][0].get<std::string>(),
-                                    3,
-                                    expected_width,
-                                    expected_height,
-                                    request.gen_params.init_image)) {
-                const sd_image_t& image = request.gen_params.init_image.get();
-                request.gen_params.set_width_and_height_if_unset(image.width, image.height);
-            }
-        }
-
-        if (j.contains("mask") && j["mask"].is_string()) {
-            if (decode_base64_image(j["mask"].get<std::string>(),
-                                    1,
-                                    expected_width,
-                                    expected_height,
-                                    request.gen_params.mask_image)) {
-                const sd_image_t& image = request.gen_params.mask_image.get();
-                request.gen_params.set_width_and_height_if_unset(image.width, image.height);
-            }
-            sd_image_t& mask_image      = request.gen_params.mask_image.get();
-            bool inpainting_mask_invert = j.value("inpainting_mask_invert", 0) != 0;
-            if (inpainting_mask_invert && mask_image.data != nullptr) {
-                for (uint32_t i = 0; i < mask_image.width * mask_image.height; ++i) {
-                    mask_image.data[i] = 255 - mask_image.data[i];
-                }
-            }
-        } else {
-            const int resolved_width  = request.gen_params.get_resolved_width();
-            const int resolved_height = request.gen_params.get_resolved_height();
-            assign_solid_mask(request.gen_params.mask_image, resolved_width, resolved_height);
-        }
-
-        float denoising_strength = j.value("denoising_strength", -1.f);
-        if (denoising_strength >= 0.f) {
-            request.gen_params.strength = std::min(denoising_strength, 1.0f);
-        }
-    }
-
-    if (j.contains("extra_images") && j["extra_images"].is_array()) {
-        for (const auto& extra_image : j["extra_images"]) {
-            if (!extra_image.is_string()) {
-                continue;
-            }
-            SDImageOwner image_owner;
-            if (decode_base64_image(extra_image.get<std::string>(),
-                                    3,
-                                    request.gen_params.width_and_height_are_set() ? request.gen_params.width : 0,
-                                    request.gen_params.width_and_height_are_set() ? request.gen_params.height : 0,
-                                    image_owner)) {
-                const sd_image_t& image = image_owner.get();
-                request.gen_params.set_width_and_height_if_unset(image.width, image.height);
-                request.gen_params.ref_images.push_back(std::move(image_owner));
-            }
-        }
-    }
-
-    // Intentionally disable prompt-embedded LoRA tag parsing for server APIs.
-    if (!request.gen_params.resolve_and_validate(IMG_GEN, "", true)) {
-        error_message = "invalid params";
-        return false;
-    }
-
-    return true;
-}
-
-void register_sdapi_endpoints(httplib::Server& svr, ServerRuntime& rt) {
-    ServerRuntime* runtime = &rt;
-
-    auto sdapi_any2img = [runtime](const httplib::Request& req, httplib::Response& res, bool img2img) {
-        try {
-            if (req.body.empty()) {
-                res.status = 400;
-                res.set_content(R"({"error":"empty body"})", "application/json");
-                return;
-            }
-
-            json j = json::parse(req.body);
-            ImgGenJobRequest request;
-            std::string error_message;
-            if (!build_sdapi_img_gen_request(j, *runtime, img2img, request, error_message)) {
-                res.status = 400;
-                res.set_content(json({{"error", error_message}}).dump(), "application/json");
-                return;
-            }
-
-            LOG_DEBUG("%s\n", request.gen_params.to_string().c_str());
-
-            sd_img_gen_params_t img_gen_params = request.to_sd_img_gen_params_t();
-            SDImageVec results;
-            int num_results = 0;
-
-            {
-                std::lock_guard<std::mutex> lock(*runtime->sd_ctx_mutex);
-                sd_image_t* raw_results = generate_image(runtime->sd_ctx, &img_gen_params);
-                num_results             = request.gen_params.batch_count;
-                results.adopt(raw_results, num_results);
-            }
-
-            if (results.empty()) {
-                res.status = 500;
-                res.set_content(R"({"error":"generate_image returned no results"})", "application/json");
-                return;
-            }
-
-            json out;
-            out["images"]     = json::array();
-            out["parameters"] = j;
-            out["info"]       = "";
-
-            for (int i = 0; i < num_results; ++i) {
-                if (results[i].data == nullptr) {
-                    continue;
-                }
-
-                std::string params = request.gen_params.embed_image_metadata
-                                         ? get_image_params(*runtime->ctx_params,
-                                                            request.gen_params,
-                                                            request.gen_params.seed + i)
-                                         : "";
-                auto image_bytes   = encode_image_to_vector(EncodedImageFormat::PNG,
-                                                            results[i].data,
-                                                            results[i].width,
-                                                            results[i].height,
-                                                            results[i].channel,
-                                                            params);
-
-                if (image_bytes.empty()) {
-                    LOG_ERROR("write image to mem failed");
-                    continue;
-                }
-
-                out["images"].push_back(base64_encode(image_bytes));
-            }
-
-            res.set_content(out.dump(), "application/json");
-            res.status = 200;
-
-        } catch (const std::exception& e) {
-            res.status = 500;
-            json err;
-            err["error"]   = "server_error";
-            err["message"] = e.what();
-            res.set_content(err.dump(), "application/json");
-        }
-    };
-
-    svr.Post("/sdapi/v1/txt2img", [sdapi_any2img](const httplib::Request& req, httplib::Response& res) {
-        sdapi_any2img(req, res, false);
-    });
-
-    svr.Post("/sdapi/v1/img2img", [sdapi_any2img](const httplib::Request& req, httplib::Response& res) {
-        sdapi_any2img(req, res, true);
-    });
-
-    svr.Get("/sdapi/v1/loras", [runtime](const httplib::Request&, httplib::Response& res) {
-        refresh_lora_cache(*runtime);
-
-        json result = json::array();
-        {
-            std::lock_guard<std::mutex> lock(*runtime->lora_mutex);
-            for (const auto& e : *runtime->lora_cache) {
-                json item;
-                item["name"] = e.name;
-                item["path"] = e.path;
-                result.push_back(item);
-            }
-        }
-
-        res.set_content(result.dump(), "application/json");
-    });
-
-    svr.Get("/sdapi/v1/samplers", [runtime](const httplib::Request&, httplib::Response& res) {
-        std::vector<std::string> sampler_names;
-        sampler_names.push_back("default");
-        for (int i = 0; i < SAMPLE_METHOD_COUNT; i++) {
-            sampler_names.push_back(sd_sample_method_name((sample_method_t)i));
-        }
-        json r = json::array();
-        for (auto name : sampler_names) {
-            json entry;
-            entry["name"]    = name;
-            entry["aliases"] = json::array({name});
-            entry["options"] = json::object();
-            r.push_back(entry);
-        }
-        res.set_content(r.dump(), "application/json");
-    });
-
-    svr.Get("/sdapi/v1/schedulers", [runtime](const httplib::Request&, httplib::Response& res) {
-        std::vector<std::string> scheduler_names;
-        scheduler_names.push_back("default");
-        for (int i = 0; i < SCHEDULER_COUNT; i++) {
-            scheduler_names.push_back(sd_scheduler_name((scheduler_t)i));
-        }
-        json r = json::array();
-        for (auto name : scheduler_names) {
-            json entry;
-            entry["name"]  = name;
-            entry["label"] = name;
-            r.push_back(entry);
-        }
-        res.set_content(r.dump(), "application/json");
-    });
-
-    svr.Get("/sdapi/v1/sd-models", [runtime](const httplib::Request&, httplib::Response& res) {
-        fs::path model_path = resolve_display_model_path(*runtime);
-        json entry;
-        entry["title"]      = model_path.stem();
-        entry["model_name"] = model_path.stem();
-        entry["filename"]   = model_path.filename();
-        entry["hash"]       = "8888888888";
-        entry["sha256"]     = "8888888888888888888888888888888888888888888888888888888888888888";
-        entry["config"]     = nullptr;
-        json r              = json::array();
-        r.push_back(entry);
-        res.set_content(r.dump(), "application/json");
-    });
-
-    svr.Get("/sdapi/v1/options", [runtime](const httplib::Request&, httplib::Response& res) {
-        fs::path model_path = resolve_display_model_path(*runtime);
-        json r;
-        r["samples_format"]      = "png";
-        r["sd_model_checkpoint"] = model_path.stem();
-        res.set_content(r.dump(), "application/json");
-    });
-}
--- a/examples/server/routes_sdcpp.cpp
+++ b/examples/server/routes_sdcpp.cpp
@ -1,345 +0,0 @@
-#include "routes.h"
-
-#include <algorithm>
-#include <cmath>
-#include <filesystem>
-
-#include "async_jobs.h"
-#include "common/common.h"
-
-namespace fs = std::filesystem;
-
-static bool parse_cache_mode(const std::string& mode_str, sd_cache_mode_t& mode_out) {
-    if (mode_str == "disabled") {
-        mode_out = SD_CACHE_DISABLED;
-        return true;
-    }
-    if (mode_str == "easycache") {
-        mode_out = SD_CACHE_EASYCACHE;
-        return true;
-    }
-    if (mode_str == "ucache") {
-        mode_out = SD_CACHE_UCACHE;
-        return true;
-    }
-    if (mode_str == "dbcache") {
-        mode_out = SD_CACHE_DBCACHE;
-        return true;
-    }
-    if (mode_str == "taylorseer") {
-        mode_out = SD_CACHE_TAYLORSEER;
-        return true;
-    }
-    if (mode_str == "cache-dit") {
-        mode_out = SD_CACHE_CACHE_DIT;
-        return true;
-    }
-    if (mode_str == "spectrum") {
-        mode_out = SD_CACHE_SPECTRUM;
-        return true;
-    }
-    return false;
-}
-
-static json finite_number_or_null(float value) {
-    return std::isfinite(value) ? json(value) : json(nullptr);
-}
-
-static const char* capability_scheduler_name(enum scheduler_t scheduler) {
-    return scheduler < SCHEDULER_COUNT ? sd_scheduler_name(scheduler) : "default";
-}
-
-static const char* capability_sample_method_name(enum sample_method_t sample_method) {
-    return sample_method < SAMPLE_METHOD_COUNT ? sd_sample_method_name(sample_method) : "default";
-}
-
-static json make_vae_tiling_json(const sd_tiling_params_t& params) {
-    return {
-        {"enabled", params.enabled},
-        {"tile_size_x", params.tile_size_x},
-        {"tile_size_y", params.tile_size_y},
-        {"target_overlap", params.target_overlap},
-        {"rel_size_x", params.rel_size_x},
-        {"rel_size_y", params.rel_size_y},
-    };
-}
-
-static fs::path resolve_display_model_path(const ServerRuntime& runtime) {
-    const auto& ctx = *runtime.ctx_params;
-    if (!ctx.model_path.empty()) {
-        return fs::path(ctx.model_path);
-    }
-    if (!ctx.diffusion_model_path.empty()) {
-        return fs::path(ctx.diffusion_model_path);
-    }
-    return {};
-}
-
-static json make_capabilities_json(ServerRuntime& runtime) {
-    refresh_lora_cache(runtime);
-
-    AsyncJobManager& manager  = *runtime.async_job_manager;
-    const auto& defaults      = *runtime.default_gen_params;
-    const auto& sample_params = defaults.sample_params;
-    const auto& guidance      = sample_params.guidance;
-    const fs::path model_path = resolve_display_model_path(runtime);
-    json samplers             = json::array();
-    json schedulers           = json::array();
-    json output_formats       = json::array({"png", "jpeg"});
-    json available_loras      = json::array();
-
-    for (int i = 0; i < SAMPLE_METHOD_COUNT; ++i) {
-        samplers.push_back(sd_sample_method_name((sample_method_t)i));
-    }
-
-    for (int i = 0; i < SCHEDULER_COUNT; ++i) {
-        schedulers.push_back(sd_scheduler_name((scheduler_t)i));
-    }
-
-#ifdef SD_USE_WEBP
-    output_formats.push_back("webp");
-#endif
-
-    {
-        std::lock_guard<std::mutex> lock(*runtime.lora_mutex);
-        for (const auto& entry : *runtime.lora_cache) {
-            available_loras.push_back({
-                {"name", entry.name},
-                {"path", entry.path},
-            });
-        }
-    }
-
-    json result;
-    result["model"] = {
-        {"name", model_path.filename().u8string()},
-        {"stem", model_path.stem().u8string()},
-        {"path", model_path.u8string()},
-    };
-    result["defaults"] = {
-        {"prompt", defaults.prompt},
-        {"negative_prompt", defaults.negative_prompt},
-        {"clip_skip", defaults.clip_skip},
-        {"width", defaults.width > 0 ? defaults.width : 512},
-        {"height", defaults.height > 0 ? defaults.height : 512},
-        {"strength", defaults.strength},
-        {"seed", defaults.seed},
-        {"batch_count", defaults.batch_count},
-        {"auto_resize_ref_image", defaults.auto_resize_ref_image},
-        {"increase_ref_index", defaults.increase_ref_index},
-        {"control_strength", defaults.control_strength},
-        {"sample_params",
-         {
-             {"scheduler", capability_scheduler_name(sample_params.scheduler)},
-             {"sample_method", capability_sample_method_name(sample_params.sample_method)},
-             {"sample_steps", sample_params.sample_steps},
-             {"eta", finite_number_or_null(sample_params.eta)},
-             {"shifted_timestep", sample_params.shifted_timestep},
-             {"flow_shift", finite_number_or_null(sample_params.flow_shift)},
-             {"guidance",
-              {
-                  {"txt_cfg", guidance.txt_cfg},
-                  {"img_cfg", finite_number_or_null(guidance.img_cfg)},
-                  {"distilled_guidance", guidance.distilled_guidance},
-                  {"slg",
-                   {
-                       {"layers", defaults.skip_layers},
-                       {"layer_start", guidance.slg.layer_start},
-                       {"layer_end", guidance.slg.layer_end},
-                       {"scale", guidance.slg.scale},
-                   }},
-              }},
-         }},
-        {"vae_tiling_params", make_vae_tiling_json(defaults.vae_tiling_params)},
-        {"cache_mode", defaults.cache_mode},
-        {"cache_option", defaults.cache_option},
-        {"scm_mask", defaults.scm_mask},
-        {"scm_policy_dynamic", defaults.scm_policy_dynamic},
-        {"output_format", "png"},
-        {"output_compression", 100},
-    };
-    result["limits"] = {
-        {"min_width", 64},
-        {"max_width", 4096},
-        {"min_height", 64},
-        {"max_height", 4096},
-        {"max_batch_count", 8},
-        {"max_queue_size", manager.max_pending_jobs},
-    };
-    result["samplers"]       = samplers;
-    result["schedulers"]     = schedulers;
-    result["output_formats"] = output_formats;
-    result["features"]       = {
-              {"init_image", true},
-              {"mask_image", true},
-              {"control_image", true},
-              {"ref_images", true},
-              {"lora", true},
-              {"vae_tiling", true},
-              {"cache", true},
-              {"cancel_queued", true},
-              {"cancel_generating", false},
-    };
-    result["loras"] = available_loras;
-    return result;
-}
-
-static bool parse_img_gen_request(const json& body,
-                                  ServerRuntime& runtime,
-                                  ImgGenJobRequest& request,
-                                  std::string& error_message) {
-    request.gen_params = *runtime.default_gen_params;
-
-    refresh_lora_cache(runtime);
-    if (!request.gen_params.from_json_str(body.dump(), [&](const std::string& path) {
-            return get_lora_full_path(runtime, path);
-        })) {
-        error_message = "invalid generation parameters";
-        return false;
-    }
-
-    std::string output_format = body.value("output_format", "png");
-    int output_compression    = body.value("output_compression", 100);
-    if (!assign_output_options(request, output_format, output_compression, true, error_message)) {
-        return false;
-    }
-    // Intentionally disable prompt-embedded LoRA tag parsing for server APIs.
-    if (!request.gen_params.resolve_and_validate(IMG_GEN, "", true)) {
-        error_message = "invalid generation parameters";
-        return false;
-    }
-    return true;
-}
-
-void register_sdcpp_api_endpoints(httplib::Server& svr, ServerRuntime& rt) {
-    ServerRuntime* runtime = &rt;
-
-    svr.Get("/sdcpp/v1/capabilities", [runtime](const httplib::Request&, httplib::Response& res) {
-        res.status = 200;
-        res.set_content(make_capabilities_json(*runtime).dump(), "application/json");
-    });
-
-    svr.Post("/sdcpp/v1/img_gen", [runtime](const httplib::Request& req, httplib::Response& res) {
-        try {
-            if (req.body.empty()) {
-                res.status = 400;
-                res.set_content(R"({"error":"empty body"})", "application/json");
-                return;
-            }
-
-            json body = json::parse(req.body);
-            ImgGenJobRequest request;
-            std::string error_message;
-            if (!parse_img_gen_request(body, *runtime, request, error_message)) {
-                res.status = 400;
-                res.set_content(json({{"error", error_message}}).dump(), "application/json");
-                return;
-            }
-
-            AsyncJobManager& manager                = *runtime->async_job_manager;
-            std::shared_ptr<AsyncGenerationJob> job = std::make_shared<AsyncGenerationJob>();
-            job->kind                               = AsyncJobKind::ImgGen;
-            job->status                             = AsyncJobStatus::Queued;
-            job->created_at                         = unix_timestamp_now();
-            job->img_gen                            = std::move(request);
-
-            {
-                std::lock_guard<std::mutex> lock(manager.mutex);
-                purge_expired_jobs(manager);
-                if (count_pending_jobs(manager) >= manager.max_pending_jobs) {
-                    res.status = 429;
-                    res.set_content(R"({"error":"job queue is full"})", "application/json");
-                    return;
-                }
-                job->id               = make_async_job_id(manager);
-                manager.jobs[job->id] = job;
-                manager.queue.push_back(job->id);
-            }
-
-            manager.cv.notify_one();
-
-            json out;
-            out["id"]       = job->id;
-            out["kind"]     = async_job_kind_name(job->kind);
-            out["status"]   = async_job_status_name(job->status);
-            out["created"]  = job->created_at;
-            out["poll_url"] = "/sdcpp/v1/jobs/" + job->id;
-
-            res.status = 202;
-            res.set_content(out.dump(), "application/json");
-        } catch (const json::parse_error& e) {
-            res.status = 400;
-            res.set_content(json({{"error", "invalid json"}, {"message", e.what()}}).dump(), "application/json");
-        } catch (const std::exception& e) {
-            res.status = 500;
-            res.set_content(json({{"error", "server_error"}, {"message", e.what()}}).dump(), "application/json");
-        }
-    });
-
-    svr.Post("/sdcpp/v1/vid_gen", [](const httplib::Request&, httplib::Response& res) {
-        res.status = 501;
-        res.set_content(R"({"error":"vid_gen is reserved and not implemented yet"})", "application/json");
-    });
-
-    svr.Get(R"(/sdcpp/v1/jobs/([A-Za-z0-9_\-]+))", [runtime](const httplib::Request& req, httplib::Response& res) {
-        AsyncJobManager& manager = *runtime->async_job_manager;
-        std::lock_guard<std::mutex> lock(manager.mutex);
-        purge_expired_jobs(manager);
-
-        std::string job_id = req.matches[1];
-        auto it            = manager.jobs.find(job_id);
-        if (it == manager.jobs.end()) {
-            if (manager.expired_jobs.find(job_id) != manager.expired_jobs.end()) {
-                res.status = 410;
-                res.set_content(R"({"error":"job expired"})", "application/json");
-            } else {
-                res.status = 404;
-                res.set_content(R"({"error":"job not found"})", "application/json");
-            }
-            return;
-        }
-
-        res.status = 200;
-        res.set_content(make_async_job_json(manager, *it->second).dump(), "application/json");
-    });
-
-    svr.Post(R"(/sdcpp/v1/jobs/([A-Za-z0-9_\-]+)/cancel)", [runtime](const httplib::Request& req, httplib::Response& res) {
-        AsyncJobManager& manager = *runtime->async_job_manager;
-        std::lock_guard<std::mutex> lock(manager.mutex);
-        purge_expired_jobs(manager);
-
-        std::string job_id = req.matches[1];
-        auto it            = manager.jobs.find(job_id);
-        if (it == manager.jobs.end()) {
-            if (manager.expired_jobs.find(job_id) != manager.expired_jobs.end()) {
-                res.status = 410;
-                res.set_content(R"({"error":"job expired"})", "application/json");
-            } else {
-                res.status = 404;
-                res.set_content(R"({"error":"job not found"})", "application/json");
-            }
-            return;
-        }
-
-        auto& job = *it->second;
-        if (job.status == AsyncJobStatus::Queued) {
-            if (!cancel_queued_job(manager, job)) {
-                res.status = 409;
-                res.set_content(R"({"error":"job queue state changed before cancellation"})", "application/json");
-                return;
-            }
-            res.status = 200;
-            res.set_content(make_async_job_json(manager, job).dump(), "application/json");
-            return;
-        }
-
-        if (job.status == AsyncJobStatus::Generating) {
-            res.status = 409;
-            res.set_content(R"({"error":"job is currently generating and cannot be interrupted yet"})", "application/json");
-            return;
-        }
-
-        res.status = 200;
-        res.set_content(make_async_job_json(manager, job).dump(), "application/json");
-    });
-}
--- a/examples/server/runtime.cpp
+++ b/examples/server/runtime.cpp
@ -1,186 +0,0 @@
-#include "runtime.h"
-
-#include <algorithm>
-#include <chrono>
-#include <cstdlib>
-#include <filesystem>
-#include <mutex>
-#include <regex>
-#include <sstream>
-
-#include "common/common.h"
-#include "common/log.h"
-
-namespace fs = std::filesystem;
-
-static const std::string k_base64_chars =
-    "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
-    "abcdefghijklmnopqrstuvwxyz"
-    "0123456789+/";
-
-std::string base64_encode(const std::vector<uint8_t>& bytes) {
-    std::string ret;
-    int val  = 0;
-    int valb = -6;
-    for (uint8_t c : bytes) {
-        val = (val << 8) + c;
-        valb += 8;
-        while (valb >= 0) {
-            ret.push_back(k_base64_chars[(val >> valb) & 0x3F]);
-            valb -= 6;
-        }
-    }
-    if (valb > -6) {
-        ret.push_back(k_base64_chars[((val << 8) >> (valb + 8)) & 0x3F]);
-    }
-    while (ret.size() % 4) {
-        ret.push_back('=');
-    }
-    return ret;
-}
-
-std::string normalize_output_format(std::string output_format) {
-    std::transform(output_format.begin(), output_format.end(), output_format.begin(),
-                   [](unsigned char c) { return static_cast<char>(std::tolower(c)); });
-    return output_format;
-}
-
-bool assign_output_options(ImgGenJobRequest& request,
-                           std::string output_format,
-                           int output_compression,
-                           bool allow_webp,
-                           std::string& error_message) {
-    request.output_format      = normalize_output_format(std::move(output_format));
-    request.output_compression = std::clamp(output_compression, 0, 100);
-
-    const bool valid_format = request.output_format == "png" ||
-                              request.output_format == "jpeg" ||
-                              (allow_webp && request.output_format == "webp");
-    if (!valid_format) {
-        error_message = allow_webp
-                            ? "invalid output_format, must be one of [png, jpeg, webp]"
-                            : "invalid output_format, must be one of [png, jpeg]";
-        return false;
-    }
-
-    return true;
-}
-
-ArgOptions SDSvrParams::get_options() {
-    ArgOptions options;
-
-    options.string_options = {
-        {"-l", "--listen-ip", "server listen ip (default: 127.0.0.1)", &listen_ip},
-        {"", "--serve-html-path", "path to HTML file to serve at root (optional)", &serve_html_path},
-    };
-
-    options.int_options = {
-        {"", "--listen-port", "server listen port (default: 1234)", &listen_port},
-    };
-
-    options.bool_options = {
-        {"-v", "--verbose", "print extra info", true, &verbose},
-        {"", "--color", "colors the logging tags according to level", true, &color},
-    };
-
-    auto on_help_arg = [&](int, const char**, int) {
-        normal_exit = true;
-        return -1;
-    };
-
-    options.manual_options = {
-        {"-h", "--help", "show this help message and exit", on_help_arg},
-    };
-    return options;
-}
-
-bool SDSvrParams::validate() {
-    if (listen_ip.empty()) {
-        LOG_ERROR("error: the following arguments are required: listen_ip");
-        return false;
-    }
-
-    if (listen_port < 0 || listen_port > 65535) {
-        LOG_ERROR("error: listen_port should be in the range [0, 65535]");
-        return false;
-    }
-
-    if (!serve_html_path.empty() && !fs::exists(serve_html_path)) {
-        LOG_ERROR("error: serve_html_path file does not exist: %s", serve_html_path.c_str());
-        return false;
-    }
-    return true;
-}
-
-bool SDSvrParams::resolve_and_validate() {
-    if (!validate()) {
-        return false;
-    }
-    return true;
-}
-
-std::string SDSvrParams::to_string() const {
-    std::ostringstream oss;
-    oss << "SDSvrParams {\n"
-        << "  listen_ip: " << listen_ip << ",\n"
-        << "  listen_port: \"" << listen_port << "\",\n"
-        << "  serve_html_path: \"" << serve_html_path << "\",\n"
-        << "}";
-    return oss.str();
-}
-
-void refresh_lora_cache(ServerRuntime& rt) {
-    std::vector<LoraEntry> new_cache;
-
-    fs::path lora_dir = rt.ctx_params->lora_model_dir;
-    if (fs::exists(lora_dir) && fs::is_directory(lora_dir)) {
-        auto is_lora_ext = [](const fs::path& p) {
-            auto ext = p.extension().string();
-            std::transform(ext.begin(), ext.end(), ext.begin(), [](unsigned char c) {
-                return static_cast<char>(std::tolower(c));
-            });
-            return ext == ".gguf" || ext == ".pt" || ext == ".pth" || ext == ".safetensors";
-        };
-
-        for (auto& entry : fs::recursive_directory_iterator(lora_dir)) {
-            if (!entry.is_regular_file()) {
-                continue;
-            }
-            const fs::path& p = entry.path();
-            if (!is_lora_ext(p)) {
-                continue;
-            }
-
-            LoraEntry lora_entry;
-            lora_entry.name     = p.stem().u8string();
-            lora_entry.fullpath = p.u8string();
-            std::string rel     = p.lexically_relative(lora_dir).u8string();
-            std::replace(rel.begin(), rel.end(), '\\', '/');
-            lora_entry.path = rel;
-
-            new_cache.push_back(std::move(lora_entry));
-        }
-    }
-
-    std::sort(new_cache.begin(), new_cache.end(), [](const LoraEntry& a, const LoraEntry& b) {
-        return a.path < b.path;
-    });
-
-    {
-        std::lock_guard<std::mutex> lock(*rt.lora_mutex);
-        *rt.lora_cache = std::move(new_cache);
-    }
-}
-
-std::string get_lora_full_path(ServerRuntime& rt, const std::string& path) {
-    std::lock_guard<std::mutex> lock(*rt.lora_mutex);
-    auto it = std::find_if(rt.lora_cache->begin(), rt.lora_cache->end(),
-                           [&](const LoraEntry& entry) { return entry.path == path; });
-    return it != rt.lora_cache->end() ? it->fullpath : "";
-}
-
-int64_t unix_timestamp_now() {
-    return std::chrono::duration_cast<std::chrono::seconds>(
-               std::chrono::system_clock::now().time_since_epoch())
-        .count();
-}
--- a/examples/server/runtime.h
+++ b/examples/server/runtime.h
@ -1,70 +0,0 @@
-#pragma once
-
-#include <algorithm>
-#include <cstdint>
-#include <mutex>
-#include <string>
-#include <vector>
-
-#include <json.hpp>
-#include "common/common.h"
-#include "common/resource_owners.hpp"
-#include "stable-diffusion.h"
-
-using json = nlohmann::json;
-
-struct ArgOptions;
-struct SDContextParams;
-struct AsyncJobManager;
-
-struct SDSvrParams {
-    std::string listen_ip = "127.0.0.1";
-    int listen_port       = 1234;
-    std::string serve_html_path;
-    bool normal_exit = false;
-    bool verbose     = false;
-    bool color       = false;
-
-    ArgOptions get_options();
-    bool validate();
-    bool resolve_and_validate();
-    std::string to_string() const;
-};
-
-struct LoraEntry {
-    std::string name;
-    std::string path;
-    std::string fullpath;
-};
-
-struct ServerRuntime {
-    sd_ctx_t* sd_ctx;
-    std::mutex* sd_ctx_mutex;
-    const SDSvrParams* svr_params;
-    const SDContextParams* ctx_params;
-    const SDGenerationParams* default_gen_params;
-    std::vector<LoraEntry>* lora_cache;
-    std::mutex* lora_mutex;
-    AsyncJobManager* async_job_manager;
-};
-
-struct ImgGenJobRequest {
-    SDGenerationParams gen_params;
-    std::string output_format = "png";
-    int output_compression    = 100;
-
-    sd_img_gen_params_t to_sd_img_gen_params_t() {
-        return gen_params.to_sd_img_gen_params_t();
-    }
-};
-
-std::string base64_encode(const std::vector<uint8_t>& bytes);
-std::string normalize_output_format(std::string output_format);
-bool assign_output_options(ImgGenJobRequest& request,
-                           std::string output_format,
-                           int output_compression,
-                           bool allow_webp,
-                           std::string& error_message);
-void refresh_lora_cache(ServerRuntime& rt);
-std::string get_lora_full_path(ServerRuntime& rt, const std::string& path);
-int64_t unix_timestamp_now();
--- a/src/denoiser.hpp
+++ b/src/denoiser.hpp
@ -658,22 +658,32 @@ inline float time_snr_shift(float alpha, float t) {
 }

 struct DiscreteFlowDenoiser : public Denoiser {
+    float sigmas[TIMESTEPS];
    float shift = 3.0f;

+    float sigma_data = 1.0f;
+
    DiscreteFlowDenoiser(float shift = 3.0f) {
        set_shift(shift);
    }

+    void set_parameters() {
+        for (int i = 0; i < TIMESTEPS; i++) {
+            sigmas[i] = t_to_sigma(static_cast<float>(i));
+        }
+    }
+
    void set_shift(float shift) {
        this->shift = shift;
+        set_parameters();
    }

    float sigma_min() override {
-        return t_to_sigma(0);
+        return sigmas[0];
    }

    float sigma_max() override {
-        return t_to_sigma(TIMESTEPS - 1);
+        return sigmas[TIMESTEPS - 1];
    }

    float sigma_to_t(float sigma) override {
--- a/src/gguf_reader.hpp
+++ b/src/gguf_reader.hpp
@ -59,9 +59,6 @@ private:
        if (!safe_read(fin, key_len))
            return false;

-        if (key_len > 4096)
-            return false;
-
        std::string key(key_len, '\0');
        if (!safe_read(fin, (char*)key.data(), key_len))
            return false;
--- a/src/model.cpp
+++ b/src/model.cpp
@ -315,9 +315,8 @@ bool is_safetensors_file(const std::string& file_path) {
    if (!file) {
        return false;
    }
-    try {
-        nlohmann::json header_ = nlohmann::json::parse(header_buf.data());
-    } catch (const std::exception&) {
+    nlohmann::json header_ = nlohmann::json::parse(header_buf.data());
+    if (header_.is_discarded()) {
        return false;
    }
    return true;
@ -512,14 +511,7 @@ bool ModelLoader::init_from_safetensors_file(const std::string& file_path, const
        return false;
    }

-    nlohmann::json header_;
-    try {
-        header_ = nlohmann::json::parse(header_buf.data());
-    } catch (const std::exception&) {
-        LOG_ERROR("parsing safetensors header failed", file_path.c_str());
-        file_paths_.pop_back();
-        return false;
-    }
+    nlohmann::json header_ = nlohmann::json::parse(header_buf.data());

    for (auto& item : header_.items()) {
        std::string name           = item.key();
@ -583,29 +575,24 @@ bool ModelLoader::init_from_safetensors_file(const std::string& file_path, const

        size_t tensor_data_size = end - begin;

-        bool tensor_size_ok;
        if (dtype == "F8_E4M3") {
            tensor_storage.is_f8_e4m3 = true;
            // f8 -> f16
-            tensor_size_ok = (tensor_storage.nbytes() == tensor_data_size * 2);
+            GGML_ASSERT(tensor_storage.nbytes() == tensor_data_size * 2);
        } else if (dtype == "F8_E5M2") {
            tensor_storage.is_f8_e5m2 = true;
            // f8 -> f16
-            tensor_size_ok = (tensor_storage.nbytes() == tensor_data_size * 2);
+            GGML_ASSERT(tensor_storage.nbytes() == tensor_data_size * 2);
        } else if (dtype == "F64") {
            tensor_storage.is_f64 = true;
            // f64 -> f32
-            tensor_size_ok = (tensor_storage.nbytes() * 2 == tensor_data_size);
+            GGML_ASSERT(tensor_storage.nbytes() * 2 == tensor_data_size);
        } else if (dtype == "I64") {
            tensor_storage.is_i64 = true;
            // i64 -> i32
-            tensor_size_ok = (tensor_storage.nbytes() * 2 == tensor_data_size);
+            GGML_ASSERT(tensor_storage.nbytes() * 2 == tensor_data_size);
        } else {
-            tensor_size_ok = (tensor_storage.nbytes() == tensor_data_size);
-        }
-        if (!tensor_size_ok) {
-            LOG_ERROR("size mismatch for tensor '%s' (%s)\n", name.c_str(), dtype.c_str());
-            return false;
+            GGML_ASSERT(tensor_storage.nbytes() == tensor_data_size);
        }

        add_tensor_storage(tensor_storage);