feat(cli): add metadata inspection mode (#1381 )

fix: use resolved image size in embedded metadata (#1382 )
feat: show tensor loading progress in MB/s or GB/s (#1380 )
2026-05-13 10:48:52 +00:00 · 2026-04-01 00:52:03 +08:00 · 2026-03-31 23:55:49 +08:00 · 2026-03-31 23:06:44 +08:00 · 2026-03-31 23:06:27 +08:00 · 2026-03-31 22:10:34 +08:00
13 changed files with 1448 additions and 107 deletions
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@ -64,7 +64,7 @@ jobs:
      - name: Setup pnpm
        uses: pnpm/action-setup@v4
        with:
-          version: 9
+          version: 10.15.1

      - name: Dependencies
        id: depends
@ -127,7 +127,7 @@ jobs:
      - name: Setup pnpm
        uses: pnpm/action-setup@v4
        with:
-          version: 9
+          version: 10.15.1

      - name: Dependencies
        id: depends
@ -205,7 +205,7 @@ jobs:
      - name: Setup pnpm
        uses: pnpm/action-setup@v4
        with:
-          version: 9
+          version: 10.15.1

      - name: Get commit hash
        id: commit
@ -264,7 +264,7 @@ jobs:
      - name: Setup pnpm
        uses: pnpm/action-setup@v4
        with:
-          version: 9
+          version: 10.15.1

      - name: Dependencies
        id: depends
@ -345,7 +345,7 @@ jobs:
      - name: Setup pnpm
        uses: pnpm/action-setup@v4
        with:
-          version: 9
+          version: 10.15.1

      - name: Install cuda-toolkit
        id: cuda-toolkit
@ -460,7 +460,7 @@ jobs:
      - name: Setup pnpm
        uses: pnpm/action-setup@v4
        with:
-          version: 9
+          version: 10.15.1

      - name: Cache ROCm Installation
        id: cache-rocm
@ -573,7 +573,7 @@ jobs:
      - name: Setup pnpm
        uses: pnpm/action-setup@v4
        with:
-          version: 9
+          version: 10.15.1

      - name: Free disk space
        run: |
--- a/examples/cli/CMakeLists.txt
+++ b/examples/cli/CMakeLists.txt
@ -1,6 +1,9 @@
 set(TARGET sd-cli)

-add_executable(${TARGET} main.cpp)
+add_executable(${TARGET}
+    image_metadata.cpp
+    main.cpp
+)
 install(TARGETS ${TARGET} RUNTIME)
-target_link_libraries(${TARGET} PRIVATE stable-diffusion ${CMAKE_THREAD_LIBS_INIT})
+target_link_libraries(${TARGET} PRIVATE stable-diffusion zip ${CMAKE_THREAD_LIBS_INIT})
 target_compile_features(${TARGET} PUBLIC c_std_11 cxx_std_17)
--- a/examples/cli/README.md
+++ b/examples/cli/README.md
@ -10,13 +10,18 @@ CLI Options:
  --preview-interval <int>    interval in denoising steps between consecutive updates of the image preview file (default is 1, meaning updating at
                              every step)
  --output-begin-idx <int>    starting index for output image sequence, must be non-negative (default 0 if specified %d in output path, 1 otherwise)
+  --image <string>            path to the image to inspect (for metadata mode)
+  --metadata-format <string>  metadata output format, one of [text, json] (default: text)
  --canny                     apply canny preprocessor (edge detection)
  --convert-name              convert tensor name (for convert mode)
  -v, --verbose               print extra info
  --color                     colors the logging tags according to level
  --taesd-preview-only        prevents usage of taesd for decoding the final image. (for use with --preview tae)
  --preview-noisy             enables previewing noisy inputs of the models rather than the denoised outputs
-  -M, --mode                  run mode, one of [img_gen, vid_gen, upscale, convert], default: img_gen
+  --metadata-raw              include raw hex previews for unparsed metadata payloads
+  --metadata-brief            truncate long metadata text values in text output
+  --metadata-all              include structural/container entries such as IHDR, IDAT, and non-metadata JPEG segments
+  -M, --mode                  run mode, one of [img_gen, vid_gen, upscale, convert, metadata], default: img_gen
  --preview                   preview method. must be one of the following [none, proj, tae, vae] (default is none)
  -h, --help                  show this help message and exit

@ -125,6 +130,7 @@ Generation Options:
  --vace-strength <float>                  wan vace strength
  --increase-ref-index                     automatically increase the indices of references images based on the order they are listed (starting with 1).
  --disable-auto-resize-ref-image          disable auto resize of ref images
+  --disable-image-metadata                 do not embed generation metadata on image files
  -s, --seed                               RNG seed (default: 42, use random seed for < 0)
  --sampling-method                        sampling method, one of [euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m, dpm++2mv2, ipndm, ipndm_v, lcm, ddim_trailing,
                                           tcd, res_multistep, res_2s] (default: euler for Flux/SD3/Wan, euler_a
@ -147,3 +153,12 @@ Generation Options:
  --scm-mask                               SCM steps mask for cache-dit: comma-separated 0/1 (e.g., "1,1,1,0,0,1,0,0,1,0") - 1=compute, 0=can cache
  --scm-policy                             SCM policy: 'dynamic' (default) or 'static'
 ```
+
+Metadata mode inspects PNG/JPEG container metadata without loading any model:
+
+```bash
+./bin/sd-cli -M metadata --image ./output.png
+./bin/sd-cli -M metadata --image ./output.jpg --metadata-format json
+./bin/sd-cli -M metadata --image ./output.png --metadata-raw
+./bin/sd-cli -M metadata --image ./output.png --metadata-all
+```
--- a/examples/cli/image_metadata.cpp
+++ b/examples/cli/image_metadata.cpp
--- a/examples/cli/image_metadata.h
+++ b/examples/cli/image_metadata.h
@ -0,0 +1,21 @@
+#pragma once
+
+#include <iosfwd>
+#include <string>
+
+enum class MetadataOutputFormat {
+    TEXT,
+    JSON,
+};
+
+struct MetadataReadOptions {
+    MetadataOutputFormat output_format = MetadataOutputFormat::TEXT;
+    bool include_raw                   = false;
+    bool brief                         = false;
+    bool include_structural            = false;
+};
+
+bool print_image_metadata(const std::string& image_path,
+                          const MetadataReadOptions& options,
+                          std::ostream& out,
+                          std::string& error);
--- a/examples/cli/main.cpp
+++ b/examples/cli/main.cpp
@ -18,6 +18,7 @@
 #include "common/common.hpp"

 #include "avi_writer.h"
+#include "image_metadata.h"

 const char* previews_str[] = {
    "none",
@ -32,6 +33,8 @@ struct SDCliParams {
    SDMode mode             = IMG_GEN;
    std::string output_path = "output.png";
    int output_begin_idx    = -1;
+    std::string image_path;
+    std::string metadata_format = "text";

    bool verbose          = false;
    bool canny_preprocess = false;
@ -44,6 +47,9 @@ struct SDCliParams {
    bool taesd_preview       = false;
    bool preview_noisy       = false;
    bool color               = false;
+    bool metadata_raw        = false;
+    bool metadata_brief      = false;
+    bool metadata_all        = false;

    bool normal_exit = false;

@ -55,6 +61,14 @@ struct SDCliParams {
             "--output",
             "path to write result image to. you can use printf-style %d format specifiers for image sequences (default: ./output.png) (eg. output_%03d.png)",
             &output_path},
+            {"",
+             "--image",
+             "path to the image to inspect (for metadata mode)",
+             &image_path},
+            {"",
+             "--metadata-format",
+             "metadata output format, one of [text, json] (default: text)",
+             &metadata_format},
            {"",
             "--preview-path",
             "path to write preview image to (default: ./preview.png)",
@ -97,6 +111,18 @@ struct SDCliParams {
             "--preview-noisy",
             "enables previewing noisy inputs of the models rather than the denoised outputs",
             true, &preview_noisy},
+            {"",
+             "--metadata-raw",
+             "include raw hex previews for unparsed metadata payloads",
+             true, &metadata_raw},
+            {"",
+             "--metadata-brief",
+             "truncate long metadata text values in text output",
+             true, &metadata_brief},
+            {"",
+             "--metadata-all",
+             "include structural/container entries such as IHDR, IDAT, and non-metadata JPEG segments",
+             true, &metadata_all},

        };

@ -149,7 +175,7 @@ struct SDCliParams {
        options.manual_options = {
            {"-M",
             "--mode",
-             "run mode, one of [img_gen, vid_gen, upscale, convert], default: img_gen",
+             "run mode, one of [img_gen, vid_gen, upscale, convert, metadata], default: img_gen",
             on_mode_arg},
            {"",
             "--preview",
@ -165,7 +191,7 @@ struct SDCliParams {
    };

    bool process_and_check() {
-        if (output_path.length() == 0) {
+        if (mode != METADATA && output_path.length() == 0) {
            LOG_ERROR("error: the following arguments are required: output_path");
            return false;
        }
@ -174,6 +200,16 @@ struct SDCliParams {
            if (output_path == "output.png") {
                output_path = "output.gguf";
            }
+        } else if (mode == METADATA) {
+            if (image_path.empty()) {
+                LOG_ERROR("error: metadata mode needs an image path (--image)");
+                return false;
+            }
+            if (metadata_format != "text" && metadata_format != "json") {
+                LOG_ERROR("error: invalid metadata format %s, must be one of [text, json]",
+                          metadata_format.c_str());
+                return false;
+            }
        }
        return true;
    }
@ -183,6 +219,8 @@ struct SDCliParams {
        oss << "SDCliParams {\n"
            << "  mode: " << modes_str[mode] << ",\n"
            << "  output_path: \"" << output_path << "\",\n"
+            << "  image_path: \"" << image_path << "\",\n"
+            << "  metadata_format: \"" << metadata_format << "\",\n"
            << "  verbose: " << (verbose ? "true" : "false") << ",\n"
            << "  color: " << (color ? "true" : "false") << ",\n"
            << "  canny_preprocess: " << (canny_preprocess ? "true" : "false") << ",\n"
@ -192,7 +230,10 @@ struct SDCliParams {
            << "  preview_path: \"" << preview_path << "\",\n"
            << "  preview_fps: " << preview_fps << ",\n"
            << "  taesd_preview: " << (taesd_preview ? "true" : "false") << ",\n"
-            << "  preview_noisy: " << (preview_noisy ? "true" : "false") << "\n"
+            << "  preview_noisy: " << (preview_noisy ? "true" : "false") << ",\n"
+            << "  metadata_raw: " << (metadata_raw ? "true" : "false") << ",\n"
+            << "  metadata_brief: " << (metadata_brief ? "true" : "false") << ",\n"
+            << "  metadata_all: " << (metadata_all ? "true" : "false") << "\n"
            << "}";
        return oss.str();
    }
@ -217,71 +258,18 @@ void parse_args(int argc, const char** argv, SDCliParams& cli_params, SDContextP
        exit(cli_params.normal_exit ? 0 : 1);
    }

-    if (!cli_params.process_and_check() ||
-        !ctx_params.process_and_check(cli_params.mode) ||
-        !gen_params.process_and_check(cli_params.mode, ctx_params.lora_model_dir)) {
+    bool valid = cli_params.process_and_check();
+    if (valid && cli_params.mode != METADATA) {
+        valid = ctx_params.process_and_check(cli_params.mode) &&
+                gen_params.process_and_check(cli_params.mode, ctx_params.lora_model_dir);
+    }
+
+    if (!valid) {
        print_usage(argc, argv, options_vec);
        exit(1);
    }
 }

-std::string get_image_params(const SDCliParams& cli_params, const SDContextParams& ctx_params, const SDGenerationParams& gen_params, int64_t seed) {
-    std::string parameter_string = gen_params.prompt_with_lora + "\n";
-    if (gen_params.negative_prompt.size() != 0) {
-        parameter_string += "Negative prompt: " + gen_params.negative_prompt + "\n";
-    }
-    parameter_string += "Steps: " + std::to_string(gen_params.sample_params.sample_steps) + ", ";
-    parameter_string += "CFG scale: " + std::to_string(gen_params.sample_params.guidance.txt_cfg) + ", ";
-    if (gen_params.sample_params.guidance.slg.scale != 0 && gen_params.skip_layers.size() != 0) {
-        parameter_string += "SLG scale: " + std::to_string(gen_params.sample_params.guidance.txt_cfg) + ", ";
-        parameter_string += "Skip layers: [";
-        for (const auto& layer : gen_params.skip_layers) {
-            parameter_string += std::to_string(layer) + ", ";
-        }
-        parameter_string += "], ";
-        parameter_string += "Skip layer start: " + std::to_string(gen_params.sample_params.guidance.slg.layer_start) + ", ";
-        parameter_string += "Skip layer end: " + std::to_string(gen_params.sample_params.guidance.slg.layer_end) + ", ";
-    }
-    parameter_string += "Guidance: " + std::to_string(gen_params.sample_params.guidance.distilled_guidance) + ", ";
-    parameter_string += "Eta: " + std::to_string(gen_params.sample_params.eta) + ", ";
-    parameter_string += "Seed: " + std::to_string(seed) + ", ";
-    parameter_string += "Size: " + std::to_string(gen_params.get_resolved_width()) + "x" + std::to_string(gen_params.get_resolved_height()) + ", ";
-    parameter_string += "Model: " + sd_basename(ctx_params.model_path) + ", ";
-    parameter_string += "RNG: " + std::string(sd_rng_type_name(ctx_params.rng_type)) + ", ";
-    if (ctx_params.sampler_rng_type != RNG_TYPE_COUNT) {
-        parameter_string += "Sampler RNG: " + std::string(sd_rng_type_name(ctx_params.sampler_rng_type)) + ", ";
-    }
-    parameter_string += "Sampler: " + std::string(sd_sample_method_name(gen_params.sample_params.sample_method));
-    if (!gen_params.custom_sigmas.empty()) {
-        parameter_string += ", Custom Sigmas: [";
-        for (size_t i = 0; i < gen_params.custom_sigmas.size(); ++i) {
-            std::ostringstream oss;
-            oss << std::fixed << std::setprecision(4) << gen_params.custom_sigmas[i];
-            parameter_string += oss.str() + (i == gen_params.custom_sigmas.size() - 1 ? "" : ", ");
-        }
-        parameter_string += "]";
-    } else if (gen_params.sample_params.scheduler != SCHEDULER_COUNT) {  // Only show schedule if not using custom sigmas
-        parameter_string += " " + std::string(sd_scheduler_name(gen_params.sample_params.scheduler));
-    }
-    parameter_string += ", ";
-    for (const auto& te : {ctx_params.clip_l_path, ctx_params.clip_g_path, ctx_params.t5xxl_path, ctx_params.llm_path, ctx_params.llm_vision_path}) {
-        if (!te.empty()) {
-            parameter_string += "TE: " + sd_basename(te) + ", ";
-        }
-    }
-    if (!ctx_params.diffusion_model_path.empty()) {
-        parameter_string += "Unet: " + sd_basename(ctx_params.diffusion_model_path) + ", ";
-    }
-    if (!ctx_params.vae_path.empty()) {
-        parameter_string += "VAE: " + sd_basename(ctx_params.vae_path) + ", ";
-    }
-    if (gen_params.clip_skip != -1) {
-        parameter_string += "Clip skip: " + std::to_string(gen_params.clip_skip) + ", ";
-    }
-    parameter_string += "Version: stable-diffusion.cpp";
-    return parameter_string;
-}
-
 void sd_log_cb(enum sd_log_level_t level, const char* log, void* data) {
    SDCliParams* cli_params = (SDCliParams*)data;
    log_print(level, log, cli_params->verbose, cli_params->color);
@ -414,12 +402,14 @@ bool save_results(const SDCliParams& cli_params,
        if (!img.data)
            return false;

-        std::string params = get_image_params(cli_params, ctx_params, gen_params, gen_params.seed + idx);
+        std::string params = gen_params.embed_image_metadata
+                                 ? get_image_params(ctx_params, gen_params, gen_params.seed + idx)
+                                 : "";
        int ok             = 0;
        if (is_jpg) {
-            ok = stbi_write_jpg(path.string().c_str(), img.width, img.height, img.channel, img.data, 90, params.c_str());
+            ok = stbi_write_jpg(path.string().c_str(), img.width, img.height, img.channel, img.data, 90, params.size() > 0 ? params.c_str() : nullptr);
        } else {
-            ok = stbi_write_png(path.string().c_str(), img.width, img.height, img.channel, img.data, 0, params.c_str());
+            ok = stbi_write_png(path.string().c_str(), img.width, img.height, img.channel, img.data, 0, params.size() > 0 ? params.c_str() : nullptr);
        }
        LOG_INFO("save result image %d to '%s' (%s)", idx, path.string().c_str(), ok ? "success" : "failure");
        return ok != 0;
@ -485,6 +475,27 @@ int main(int argc, const char* argv[]) {
    SDGenerationParams gen_params;

    parse_args(argc, argv, cli_params, ctx_params, gen_params);
+    sd_set_log_callback(sd_log_cb, (void*)&cli_params);
+    log_verbose = cli_params.verbose;
+    log_color   = cli_params.color;
+
+    if (cli_params.mode == METADATA) {
+        MetadataReadOptions options;
+        options.output_format      = cli_params.metadata_format == "json"
+                                         ? MetadataOutputFormat::JSON
+                                         : MetadataOutputFormat::TEXT;
+        options.include_raw        = cli_params.metadata_raw;
+        options.brief              = cli_params.metadata_brief;
+        options.include_structural = cli_params.metadata_all;
+
+        std::string error;
+        if (!print_image_metadata(cli_params.image_path, options, std::cout, error)) {
+            LOG_ERROR("%s", error.c_str());
+            return 1;
+        }
+        return 0;
+    }
+
    if (gen_params.video_frames > 4) {
        size_t last_dot_pos   = cli_params.preview_path.find_last_of(".");
        std::string base_path = cli_params.preview_path;
@ -502,9 +513,6 @@ int main(int argc, const char* argv[]) {
    if (cli_params.preview_method == PREVIEW_PROJ)
        cli_params.preview_fps /= 4;

-    sd_set_log_callback(sd_log_cb, (void*)&cli_params);
-    log_verbose = cli_params.verbose;
-    log_color   = cli_params.color;
    sd_set_preview_callback(step_callback,
                            cli_params.preview_method,
                            cli_params.preview_interval,
--- a/examples/common/common.hpp
+++ b/examples/common/common.hpp
@ -39,14 +39,16 @@ const char* modes_str[] = {
    "vid_gen",
    "convert",
    "upscale",
+    "metadata",
 };
-#define SD_ALL_MODES_STR "img_gen, vid_gen, convert, upscale"
+#define SD_ALL_MODES_STR "img_gen, vid_gen, convert, upscale, metadata"

 enum SDMode {
    IMG_GEN,
    VID_GEN,
    CONVERT,
    UPSCALE,
+    METADATA,
    MODE_COUNT
 };

@ -777,7 +779,7 @@ struct SDContextParams {
    }

    bool process_and_check(SDMode mode) {
-        if (mode != UPSCALE && model_path.length() == 0 && diffusion_model_path.length() == 0) {
+        if (mode != UPSCALE && mode != METADATA && model_path.length() == 0 && diffusion_model_path.length() == 0) {
            LOG_ERROR("error: the following arguments are required: model_path/diffusion_model\n");
            return false;
        }
@ -965,6 +967,7 @@ struct SDGenerationParams {
    std::string control_video_path;
    bool auto_resize_ref_image = true;
    bool increase_ref_index    = false;
+    bool embed_image_metadata  = true;

    std::vector<int> skip_layers = {7, 8, 9};
    sd_sample_params_t sample_params;
@ -1199,10 +1202,16 @@ struct SDGenerationParams {
             "disable auto resize of ref images",
             false,
             &auto_resize_ref_image},
+            {"",
+             "--disable-image-metadata",
+             "do not embed generation metadata on image files",
+             false,
+             &embed_image_metadata},
            {"",
             "--vae-tiling",
             "process vae in tiles to reduce memory usage",
-             true, &vae_tiling_params.enabled},
+             true,
+             &vae_tiling_params.enabled},
        };

        auto on_seed_arg = [&](int argc, const char** argv, int index) {
@ -1567,6 +1576,7 @@ struct SDGenerationParams {

        load_if_exists("auto_resize_ref_image", auto_resize_ref_image);
        load_if_exists("increase_ref_index", increase_ref_index);
+        load_if_exists("embed_image_metadata", embed_image_metadata);

        load_if_exists("skip_layers", skip_layers);
        load_if_exists("high_noise_skip_layers", high_noise_skip_layers);
@ -2094,3 +2104,65 @@ uint8_t* load_image_from_memory(const char* image_bytes,
                                int expected_channel = 3) {
    return load_image_common(true, image_bytes, len, width, height, expected_width, expected_height, expected_channel);
 }
+
+std::string get_image_params(const SDContextParams& ctx_params, const SDGenerationParams& gen_params, int64_t seed) {
+    std::string parameter_string;
+    if (gen_params.prompt_with_lora.size() != 0) {
+        parameter_string += gen_params.prompt_with_lora + "\n";
+    } else {
+        parameter_string += gen_params.prompt + "\n";
+    }
+    if (gen_params.negative_prompt.size() != 0) {
+        parameter_string += "Negative prompt: " + gen_params.negative_prompt + "\n";
+    }
+    parameter_string += "Steps: " + std::to_string(gen_params.sample_params.sample_steps) + ", ";
+    parameter_string += "CFG scale: " + std::to_string(gen_params.sample_params.guidance.txt_cfg) + ", ";
+    if (gen_params.sample_params.guidance.slg.scale != 0 && gen_params.skip_layers.size() != 0) {
+        parameter_string += "SLG scale: " + std::to_string(gen_params.sample_params.guidance.txt_cfg) + ", ";
+        parameter_string += "Skip layers: [";
+        for (const auto& layer : gen_params.skip_layers) {
+            parameter_string += std::to_string(layer) + ", ";
+        }
+        parameter_string += "], ";
+        parameter_string += "Skip layer start: " + std::to_string(gen_params.sample_params.guidance.slg.layer_start) + ", ";
+        parameter_string += "Skip layer end: " + std::to_string(gen_params.sample_params.guidance.slg.layer_end) + ", ";
+    }
+    parameter_string += "Guidance: " + std::to_string(gen_params.sample_params.guidance.distilled_guidance) + ", ";
+    parameter_string += "Eta: " + std::to_string(gen_params.sample_params.eta) + ", ";
+    parameter_string += "Seed: " + std::to_string(seed) + ", ";
+    parameter_string += "Size: " + std::to_string(gen_params.get_resolved_width()) + "x" + std::to_string(gen_params.get_resolved_height()) + ", ";
+    parameter_string += "Model: " + sd_basename(ctx_params.model_path) + ", ";
+    parameter_string += "RNG: " + std::string(sd_rng_type_name(ctx_params.rng_type)) + ", ";
+    if (ctx_params.sampler_rng_type != RNG_TYPE_COUNT) {
+        parameter_string += "Sampler RNG: " + std::string(sd_rng_type_name(ctx_params.sampler_rng_type)) + ", ";
+    }
+    parameter_string += "Sampler: " + std::string(sd_sample_method_name(gen_params.sample_params.sample_method));
+    if (!gen_params.custom_sigmas.empty()) {
+        parameter_string += ", Custom Sigmas: [";
+        for (size_t i = 0; i < gen_params.custom_sigmas.size(); ++i) {
+            std::ostringstream oss;
+            oss << std::fixed << std::setprecision(4) << gen_params.custom_sigmas[i];
+            parameter_string += oss.str() + (i == gen_params.custom_sigmas.size() - 1 ? "" : ", ");
+        }
+        parameter_string += "]";
+    } else if (gen_params.sample_params.scheduler != SCHEDULER_COUNT) {  // Only show schedule if not using custom sigmas
+        parameter_string += " " + std::string(sd_scheduler_name(gen_params.sample_params.scheduler));
+    }
+    parameter_string += ", ";
+    for (const auto& te : {ctx_params.clip_l_path, ctx_params.clip_g_path, ctx_params.t5xxl_path, ctx_params.llm_path, ctx_params.llm_vision_path}) {
+        if (!te.empty()) {
+            parameter_string += "TE: " + sd_basename(te) + ", ";
+        }
+    }
+    if (!ctx_params.diffusion_model_path.empty()) {
+        parameter_string += "Unet: " + sd_basename(ctx_params.diffusion_model_path) + ", ";
+    }
+    if (!ctx_params.vae_path.empty()) {
+        parameter_string += "VAE: " + sd_basename(ctx_params.vae_path) + ", ";
+    }
+    if (gen_params.clip_skip != -1) {
+        parameter_string += "Clip skip: " + std::to_string(gen_params.clip_skip) + ", ";
+    }
+    parameter_string += "Version: stable-diffusion.cpp";
+    return parameter_string;
+}
--- a/examples/server/CMakeLists.txt
+++ b/examples/server/CMakeLists.txt
@ -70,4 +70,10 @@ endif()

 install(TARGETS ${TARGET} RUNTIME)
 target_link_libraries(${TARGET} PRIVATE stable-diffusion ${CMAKE_THREAD_LIBS_INIT})
+
+# due to httplib; it contains a pragma for MSVC, but other things need explicit flags
+if(WIN32 AND NOT MSVC)
+    target_link_libraries(${TARGET} PRIVATE ws2_32)
+endif()
+
 target_compile_features(${TARGET} PUBLIC c_std_11 cxx_std_17)
--- a/examples/server/README.md
+++ b/examples/server/README.md
@ -205,6 +205,7 @@ Default Generation Options:
  --vace-strength <float>                  wan vace strength
  --increase-ref-index                     automatically increase the indices of references images based on the order they are listed (starting with 1).
  --disable-auto-resize-ref-image          disable auto resize of ref images
+  --disable-image-metadata                 do not embed generation metadata on image files
  -s, --seed                               RNG seed (default: 42, use random seed for < 0)
  --sampling-method                        sampling method, one of [euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m, dpm++2mv2, ipndm, ipndm_v, lcm, ddim_trailing,
                                           tcd, res_multistep, res_2s] (default: euler for Flux/SD3/Wan, euler_a
--- a/examples/server/main.cpp
+++ b/examples/server/main.cpp
@ -220,13 +220,24 @@ std::string extract_and_remove_sd_cpp_extra_args(std::string& text) {
 enum class ImageFormat { JPEG,
                         PNG };

+static int stbi_ext_write_png_to_func(stbi_write_func* func, void* context, int x, int y, int comp, const void* data, int stride_bytes, const char* parameters) {
+    int len;
+    unsigned char* png = stbi_write_png_to_mem((const unsigned char*)data, stride_bytes, x, y, comp, &len, parameters);
+    if (png == NULL)
+        return 0;
+    func(context, png, len);
+    STBIW_FREE(png);
+    return 1;
+}
+
 std::vector<uint8_t> write_image_to_vector(
    ImageFormat format,
    const uint8_t* image,
    int width,
    int height,
    int channels,
-    int quality = 90) {
+    std::string params = "",
+    int quality        = 90) {
    std::vector<uint8_t> buffer;

    auto write_func = [&buffer](void* context, void* data, int size) {
@ -249,7 +260,7 @@ std::vector<uint8_t> write_image_to_vector(
            result = stbi_write_jpg_to_func(c_func, &ctx, width, height, channels, image, quality);
            break;
        case ImageFormat::PNG:
-            result = stbi_write_png_to_func(c_func, &ctx, width, height, channels, image, width * channels);
+            result = stbi_ext_write_png_to_func(c_func, &ctx, width, height, channels, image, width * channels, params.size() > 0 ? params.c_str() : nullptr);
            break;
        default:
            throw std::runtime_error("invalid image format");
@ -497,11 +508,15 @@ void register_openai_api_endpoints(httplib::Server& svr, ServerRuntime& rt) {
                if (results[i].data == nullptr) {
                    continue;
                }
-                auto image_bytes = write_image_to_vector(output_format == "jpeg" ? ImageFormat::JPEG : ImageFormat::PNG,
+                std::string params = gen_params.embed_image_metadata
+                                         ? get_image_params(*runtime->ctx_params, gen_params, gen_params.seed + i)
+                                         : "";
+                auto image_bytes   = write_image_to_vector(output_format == "jpeg" ? ImageFormat::JPEG : ImageFormat::PNG,
                                                         results[i].data,
                                                         results[i].width,
                                                         results[i].height,
                                                         results[i].channel,
+                                                         params,
                                                         output_compression);
                if (image_bytes.empty()) {
                    LOG_ERROR("write image to mem failed");
@ -747,11 +762,15 @@ void register_openai_api_endpoints(httplib::Server& svr, ServerRuntime& rt) {
            for (int i = 0; i < num_results; i++) {
                if (results[i].data == nullptr)
                    continue;
-                auto image_bytes = write_image_to_vector(output_format == "jpeg" ? ImageFormat::JPEG : ImageFormat::PNG,
+                std::string params = gen_params.embed_image_metadata
+                                         ? get_image_params(*runtime->ctx_params, gen_params, gen_params.seed + i)
+                                         : "";
+                auto image_bytes   = write_image_to_vector(output_format == "jpeg" ? ImageFormat::JPEG : ImageFormat::PNG,
                                                         results[i].data,
                                                         results[i].width,
                                                         results[i].height,
                                                         results[i].channel,
+                                                         params,
                                                         output_compression);
                std::string b64 = base64_encode(image_bytes);
                json item;
@ -1062,11 +1081,15 @@ void register_sdapi_endpoints(httplib::Server& svr, ServerRuntime& rt) {
                    continue;
                }

-                auto image_bytes = write_image_to_vector(ImageFormat::PNG,
-                                                         results[i].data,
-                                                         results[i].width,
-                                                         results[i].height,
-                                                         results[i].channel);
+                std::string params = gen_params.embed_image_metadata
+                                         ? get_image_params(*runtime->ctx_params, gen_params, gen_params.seed + i)
+                                         : "";
+                auto image_bytes   = write_image_to_vector(ImageFormat::PNG,
+                                                           results[i].data,
+                                                           results[i].width,
+                                                           results[i].height,
+                                                           results[i].channel,
+                                                           params);

                if (image_bytes.empty()) {
                    LOG_ERROR("write image to mem failed");
--- a/src/model.cpp
+++ b/src/model.cpp
@ -1311,6 +1311,7 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb, int n_thread
    std::atomic<int64_t> memcpy_time_ms(0);
    std::atomic<int64_t> copy_to_backend_time_ms(0);
    std::atomic<int64_t> convert_time_ms(0);
+    std::atomic<uint64_t> bytes_processed(0);

    int num_threads_to_use = n_threads_p > 0 ? n_threads_p : sd_get_num_physical_cores();
    LOG_DEBUG("using %d threads for model loading", num_threads_to_use);
@ -1522,6 +1523,8 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb, int n_thread
                        t1 = ggml_time_ms();
                        copy_to_backend_time_ms.fetch_add(t1 - t0);
                    }
+
+                    bytes_processed.fetch_add((uint64_t)nbytes_to_read);
                }
                if (zip != nullptr) {
                    zip_close(zip);
@ -1534,8 +1537,12 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb, int n_thread
            if (current_idx >= file_tensors.size() || failed) {
                break;
            }
-            size_t curr_num = total_tensors_processed + current_idx;
-            pretty_progress(static_cast<int>(curr_num), static_cast<int>(total_tensors_to_process), (ggml_time_ms() - t_start) / 1000.0f / (curr_num + 1e-6f));
+            size_t curr_num       = total_tensors_processed + current_idx;
+            float elapsed_seconds = (ggml_time_ms() - t_start) / 1000.0f;
+            pretty_bytes_progress(static_cast<int>(curr_num),
+                                  static_cast<int>(total_tensors_to_process),
+                                  bytes_processed.load(),
+                                  elapsed_seconds);
            std::this_thread::sleep_for(std::chrono::milliseconds(200));
        }

@ -1548,7 +1555,10 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb, int n_thread
            break;
        }
        total_tensors_processed += file_tensors.size();
-        pretty_progress(static_cast<int>(total_tensors_processed), static_cast<int>(total_tensors_to_process), (ggml_time_ms() - t_start) / 1000.0f / (total_tensors_processed + 1e-6f));
+        pretty_bytes_progress(static_cast<int>(total_tensors_processed),
+                              static_cast<int>(total_tensors_to_process),
+                              bytes_processed.load(),
+                              (ggml_time_ms() - t_start) / 1000.0f);
        if (total_tensors_processed < total_tensors_to_process) {
            printf("\n");
        }
--- a/src/util.cpp
+++ b/src/util.cpp
@ -337,17 +337,13 @@ std::vector<std::string> split_string(const std::string& str, char delimiter) {
    return result;
 }

-void pretty_progress(int step, int steps, float time) {
-    if (sd_progress_cb) {
-        sd_progress_cb(step, steps, time, sd_progress_cb_data);
-        return;
-    }
-    if (step == 0) {
-        return;
-    }
+static std::string build_progress_bar(int step, int steps) {
    std::string progress = "  |";
    int max_progress     = 50;
-    int32_t current      = (int32_t)(step * 1.f * max_progress / steps);
+    int32_t current      = 0;
+    if (steps > 0) {
+        current = (int32_t)(step * 1.f * max_progress / steps);
+    }
    for (int i = 0; i < 50; i++) {
        if (i > current) {
            progress += " ";
@ -358,16 +354,57 @@ void pretty_progress(int step, int steps, float time) {
        }
    }
    progress += "|";
+    return progress;
+}

-    const char* lf   = (step == steps ? "\n" : "");
+static void print_progress_line(int step, int steps, const std::string& speed_text) {
+    if (step == 0) {
+        return;
+    }
+    std::string progress = build_progress_bar(step, steps);
+    const char* lf       = (step == steps ? "\n" : "");
+    printf("\r%s %i/%i - %s\033[K%s", progress.c_str(), step, steps, speed_text.c_str(), lf);
+    fflush(stdout);  // for linux
+}
+
+void pretty_progress(int step, int steps, float time) {
+    if (sd_progress_cb) {
+        sd_progress_cb(step, steps, time, sd_progress_cb_data);
+        return;
+    }
+    if (step == 0) {
+        return;
+    }
    const char* unit = "s/it";
    float speed      = time;
    if (speed < 1.0f && speed > 0.f) {
        speed = 1.0f / speed;
        unit  = "it/s";
    }
-    printf("\r%s %i/%i - %.2f%s\033[K%s", progress.c_str(), step, steps, speed, unit, lf);
-    fflush(stdout);  // for linux
+    print_progress_line(step, steps, sd_format("%.2f%s", speed, unit));
+}
+
+void pretty_bytes_progress(int step, int steps, uint64_t bytes_processed, float elapsed_seconds) {
+    if (sd_progress_cb) {
+        float time = elapsed_seconds / (step + 1e-6f);
+        sd_progress_cb(step, steps, time, sd_progress_cb_data);
+        return;
+    }
+    if (step == 0) {
+        return;
+    }
+
+    double bytes_per_second = 0.0;
+    if (elapsed_seconds > 0.0f) {
+        bytes_per_second = bytes_processed / (double)elapsed_seconds;
+    }
+
+    double speed_mb = bytes_per_second / (1024.0 * 1024.0);
+    if (speed_mb >= 1024.0) {
+        print_progress_line(step, steps, sd_format("%.2fGB/s", speed_mb / 1024.0));
+    } else {
+        print_progress_line(step, steps, sd_format("%.2fMB/s", speed_mb));
+    }
 }

 std::string ltrim(const std::string& s) {
--- a/src/util.h
+++ b/src/util.h
@ -64,6 +64,7 @@ protected:
 std::string path_join(const std::string& p1, const std::string& p2);
 std::vector<std::string> split_string(const std::string& str, char delimiter);
 void pretty_progress(int step, int steps, float time);
+void pretty_bytes_progress(int step, int steps, uint64_t bytes_processed, float elapsed_seconds);

 void log_printf(sd_log_level_t level, const char* file, int line, const char* format, ...);
Author	SHA1	Message	Date
leejet	09b12d5f6d	feat(cli): add metadata inspection mode (#1381 )	2026-04-01 00:52:03 +08:00
leejet	6dfe945958	fix: use resolved image size in embedded metadata (#1382 )	2026-03-31 23:55:49 +08:00
leejet	bf0216765a	feat: show tensor loading progress in MB/s or GB/s (#1380 )	2026-03-31 23:06:44 +08:00
Wagner Bruna	4fe7a35939	feat(server): add generation metadata to png images (#1217 )	2026-03-31 23:06:27 +08:00
Jan Ekström	4d5232083f	chore(server): link winsock2 for non-MSVC windows (#1378 )	2026-03-31 22:10:34 +08:00