feat(cli): add metadata inspection mode (#1381)

This commit is contained in:
leejet 2026-04-01 00:52:03 +08:00 committed by GitHub
parent 6dfe945958
commit 09b12d5f6d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 1269 additions and 22 deletions

View File

@ -64,7 +64,7 @@ jobs:
- name: Setup pnpm - name: Setup pnpm
uses: pnpm/action-setup@v4 uses: pnpm/action-setup@v4
with: with:
version: 9 version: 10.15.1
- name: Dependencies - name: Dependencies
id: depends id: depends
@ -127,7 +127,7 @@ jobs:
- name: Setup pnpm - name: Setup pnpm
uses: pnpm/action-setup@v4 uses: pnpm/action-setup@v4
with: with:
version: 9 version: 10.15.1
- name: Dependencies - name: Dependencies
id: depends id: depends
@ -205,7 +205,7 @@ jobs:
- name: Setup pnpm - name: Setup pnpm
uses: pnpm/action-setup@v4 uses: pnpm/action-setup@v4
with: with:
version: 9 version: 10.15.1
- name: Get commit hash - name: Get commit hash
id: commit id: commit
@ -264,7 +264,7 @@ jobs:
- name: Setup pnpm - name: Setup pnpm
uses: pnpm/action-setup@v4 uses: pnpm/action-setup@v4
with: with:
version: 9 version: 10.15.1
- name: Dependencies - name: Dependencies
id: depends id: depends
@ -345,7 +345,7 @@ jobs:
- name: Setup pnpm - name: Setup pnpm
uses: pnpm/action-setup@v4 uses: pnpm/action-setup@v4
with: with:
version: 9 version: 10.15.1
- name: Install cuda-toolkit - name: Install cuda-toolkit
id: cuda-toolkit id: cuda-toolkit
@ -460,7 +460,7 @@ jobs:
- name: Setup pnpm - name: Setup pnpm
uses: pnpm/action-setup@v4 uses: pnpm/action-setup@v4
with: with:
version: 9 version: 10.15.1
- name: Cache ROCm Installation - name: Cache ROCm Installation
id: cache-rocm id: cache-rocm
@ -573,7 +573,7 @@ jobs:
- name: Setup pnpm - name: Setup pnpm
uses: pnpm/action-setup@v4 uses: pnpm/action-setup@v4
with: with:
version: 9 version: 10.15.1
- name: Free disk space - name: Free disk space
run: | run: |

View File

@ -1,6 +1,9 @@
set(TARGET sd-cli) set(TARGET sd-cli)
add_executable(${TARGET} main.cpp) add_executable(${TARGET}
image_metadata.cpp
main.cpp
)
install(TARGETS ${TARGET} RUNTIME) install(TARGETS ${TARGET} RUNTIME)
target_link_libraries(${TARGET} PRIVATE stable-diffusion ${CMAKE_THREAD_LIBS_INIT}) target_link_libraries(${TARGET} PRIVATE stable-diffusion zip ${CMAKE_THREAD_LIBS_INIT})
target_compile_features(${TARGET} PUBLIC c_std_11 cxx_std_17) target_compile_features(${TARGET} PUBLIC c_std_11 cxx_std_17)

View File

@ -10,13 +10,18 @@ CLI Options:
--preview-interval <int> interval in denoising steps between consecutive updates of the image preview file (default is 1, meaning updating at --preview-interval <int> interval in denoising steps between consecutive updates of the image preview file (default is 1, meaning updating at
every step) every step)
--output-begin-idx <int> starting index for output image sequence, must be non-negative (default 0 if specified %d in output path, 1 otherwise) --output-begin-idx <int> starting index for output image sequence, must be non-negative (default 0 if specified %d in output path, 1 otherwise)
--image <string> path to the image to inspect (for metadata mode)
--metadata-format <string> metadata output format, one of [text, json] (default: text)
--canny apply canny preprocessor (edge detection) --canny apply canny preprocessor (edge detection)
--convert-name convert tensor name (for convert mode) --convert-name convert tensor name (for convert mode)
-v, --verbose print extra info -v, --verbose print extra info
--color colors the logging tags according to level --color colors the logging tags according to level
--taesd-preview-only prevents usage of taesd for decoding the final image. (for use with --preview tae) --taesd-preview-only prevents usage of taesd for decoding the final image. (for use with --preview tae)
--preview-noisy enables previewing noisy inputs of the models rather than the denoised outputs --preview-noisy enables previewing noisy inputs of the models rather than the denoised outputs
-M, --mode run mode, one of [img_gen, vid_gen, upscale, convert], default: img_gen --metadata-raw include raw hex previews for unparsed metadata payloads
--metadata-brief truncate long metadata text values in text output
--metadata-all include structural/container entries such as IHDR, IDAT, and non-metadata JPEG segments
-M, --mode run mode, one of [img_gen, vid_gen, upscale, convert, metadata], default: img_gen
--preview preview method. must be one of the following [none, proj, tae, vae] (default is none) --preview preview method. must be one of the following [none, proj, tae, vae] (default is none)
-h, --help show this help message and exit -h, --help show this help message and exit
@ -148,3 +153,12 @@ Generation Options:
--scm-mask SCM steps mask for cache-dit: comma-separated 0/1 (e.g., "1,1,1,0,0,1,0,0,1,0") - 1=compute, 0=can cache --scm-mask SCM steps mask for cache-dit: comma-separated 0/1 (e.g., "1,1,1,0,0,1,0,0,1,0") - 1=compute, 0=can cache
--scm-policy SCM policy: 'dynamic' (default) or 'static' --scm-policy SCM policy: 'dynamic' (default) or 'static'
``` ```
Metadata mode inspects PNG/JPEG container metadata without loading any model:
```bash
./bin/sd-cli -M metadata --image ./output.png
./bin/sd-cli -M metadata --image ./output.jpg --metadata-format json
./bin/sd-cli -M metadata --image ./output.png --metadata-raw
./bin/sd-cli -M metadata --image ./output.png --metadata-all
```

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,21 @@
#pragma once
#include <iosfwd>
#include <string>
enum class MetadataOutputFormat {
TEXT,
JSON,
};
struct MetadataReadOptions {
MetadataOutputFormat output_format = MetadataOutputFormat::TEXT;
bool include_raw = false;
bool brief = false;
bool include_structural = false;
};
bool print_image_metadata(const std::string& image_path,
const MetadataReadOptions& options,
std::ostream& out,
std::string& error);

View File

@ -18,6 +18,7 @@
#include "common/common.hpp" #include "common/common.hpp"
#include "avi_writer.h" #include "avi_writer.h"
#include "image_metadata.h"
const char* previews_str[] = { const char* previews_str[] = {
"none", "none",
@ -32,6 +33,8 @@ struct SDCliParams {
SDMode mode = IMG_GEN; SDMode mode = IMG_GEN;
std::string output_path = "output.png"; std::string output_path = "output.png";
int output_begin_idx = -1; int output_begin_idx = -1;
std::string image_path;
std::string metadata_format = "text";
bool verbose = false; bool verbose = false;
bool canny_preprocess = false; bool canny_preprocess = false;
@ -44,6 +47,9 @@ struct SDCliParams {
bool taesd_preview = false; bool taesd_preview = false;
bool preview_noisy = false; bool preview_noisy = false;
bool color = false; bool color = false;
bool metadata_raw = false;
bool metadata_brief = false;
bool metadata_all = false;
bool normal_exit = false; bool normal_exit = false;
@ -55,6 +61,14 @@ struct SDCliParams {
"--output", "--output",
"path to write result image to. you can use printf-style %d format specifiers for image sequences (default: ./output.png) (eg. output_%03d.png)", "path to write result image to. you can use printf-style %d format specifiers for image sequences (default: ./output.png) (eg. output_%03d.png)",
&output_path}, &output_path},
{"",
"--image",
"path to the image to inspect (for metadata mode)",
&image_path},
{"",
"--metadata-format",
"metadata output format, one of [text, json] (default: text)",
&metadata_format},
{"", {"",
"--preview-path", "--preview-path",
"path to write preview image to (default: ./preview.png)", "path to write preview image to (default: ./preview.png)",
@ -97,6 +111,18 @@ struct SDCliParams {
"--preview-noisy", "--preview-noisy",
"enables previewing noisy inputs of the models rather than the denoised outputs", "enables previewing noisy inputs of the models rather than the denoised outputs",
true, &preview_noisy}, true, &preview_noisy},
{"",
"--metadata-raw",
"include raw hex previews for unparsed metadata payloads",
true, &metadata_raw},
{"",
"--metadata-brief",
"truncate long metadata text values in text output",
true, &metadata_brief},
{"",
"--metadata-all",
"include structural/container entries such as IHDR, IDAT, and non-metadata JPEG segments",
true, &metadata_all},
}; };
@ -149,7 +175,7 @@ struct SDCliParams {
options.manual_options = { options.manual_options = {
{"-M", {"-M",
"--mode", "--mode",
"run mode, one of [img_gen, vid_gen, upscale, convert], default: img_gen", "run mode, one of [img_gen, vid_gen, upscale, convert, metadata], default: img_gen",
on_mode_arg}, on_mode_arg},
{"", {"",
"--preview", "--preview",
@ -165,7 +191,7 @@ struct SDCliParams {
}; };
bool process_and_check() { bool process_and_check() {
if (output_path.length() == 0) { if (mode != METADATA && output_path.length() == 0) {
LOG_ERROR("error: the following arguments are required: output_path"); LOG_ERROR("error: the following arguments are required: output_path");
return false; return false;
} }
@ -174,6 +200,16 @@ struct SDCliParams {
if (output_path == "output.png") { if (output_path == "output.png") {
output_path = "output.gguf"; output_path = "output.gguf";
} }
} else if (mode == METADATA) {
if (image_path.empty()) {
LOG_ERROR("error: metadata mode needs an image path (--image)");
return false;
}
if (metadata_format != "text" && metadata_format != "json") {
LOG_ERROR("error: invalid metadata format %s, must be one of [text, json]",
metadata_format.c_str());
return false;
}
} }
return true; return true;
} }
@ -183,6 +219,8 @@ struct SDCliParams {
oss << "SDCliParams {\n" oss << "SDCliParams {\n"
<< " mode: " << modes_str[mode] << ",\n" << " mode: " << modes_str[mode] << ",\n"
<< " output_path: \"" << output_path << "\",\n" << " output_path: \"" << output_path << "\",\n"
<< " image_path: \"" << image_path << "\",\n"
<< " metadata_format: \"" << metadata_format << "\",\n"
<< " verbose: " << (verbose ? "true" : "false") << ",\n" << " verbose: " << (verbose ? "true" : "false") << ",\n"
<< " color: " << (color ? "true" : "false") << ",\n" << " color: " << (color ? "true" : "false") << ",\n"
<< " canny_preprocess: " << (canny_preprocess ? "true" : "false") << ",\n" << " canny_preprocess: " << (canny_preprocess ? "true" : "false") << ",\n"
@ -192,7 +230,10 @@ struct SDCliParams {
<< " preview_path: \"" << preview_path << "\",\n" << " preview_path: \"" << preview_path << "\",\n"
<< " preview_fps: " << preview_fps << ",\n" << " preview_fps: " << preview_fps << ",\n"
<< " taesd_preview: " << (taesd_preview ? "true" : "false") << ",\n" << " taesd_preview: " << (taesd_preview ? "true" : "false") << ",\n"
<< " preview_noisy: " << (preview_noisy ? "true" : "false") << "\n" << " preview_noisy: " << (preview_noisy ? "true" : "false") << ",\n"
<< " metadata_raw: " << (metadata_raw ? "true" : "false") << ",\n"
<< " metadata_brief: " << (metadata_brief ? "true" : "false") << ",\n"
<< " metadata_all: " << (metadata_all ? "true" : "false") << "\n"
<< "}"; << "}";
return oss.str(); return oss.str();
} }
@ -217,9 +258,13 @@ void parse_args(int argc, const char** argv, SDCliParams& cli_params, SDContextP
exit(cli_params.normal_exit ? 0 : 1); exit(cli_params.normal_exit ? 0 : 1);
} }
if (!cli_params.process_and_check() || bool valid = cli_params.process_and_check();
!ctx_params.process_and_check(cli_params.mode) || if (valid && cli_params.mode != METADATA) {
!gen_params.process_and_check(cli_params.mode, ctx_params.lora_model_dir)) { valid = ctx_params.process_and_check(cli_params.mode) &&
gen_params.process_and_check(cli_params.mode, ctx_params.lora_model_dir);
}
if (!valid) {
print_usage(argc, argv, options_vec); print_usage(argc, argv, options_vec);
exit(1); exit(1);
} }
@ -430,6 +475,27 @@ int main(int argc, const char* argv[]) {
SDGenerationParams gen_params; SDGenerationParams gen_params;
parse_args(argc, argv, cli_params, ctx_params, gen_params); parse_args(argc, argv, cli_params, ctx_params, gen_params);
sd_set_log_callback(sd_log_cb, (void*)&cli_params);
log_verbose = cli_params.verbose;
log_color = cli_params.color;
if (cli_params.mode == METADATA) {
MetadataReadOptions options;
options.output_format = cli_params.metadata_format == "json"
? MetadataOutputFormat::JSON
: MetadataOutputFormat::TEXT;
options.include_raw = cli_params.metadata_raw;
options.brief = cli_params.metadata_brief;
options.include_structural = cli_params.metadata_all;
std::string error;
if (!print_image_metadata(cli_params.image_path, options, std::cout, error)) {
LOG_ERROR("%s", error.c_str());
return 1;
}
return 0;
}
if (gen_params.video_frames > 4) { if (gen_params.video_frames > 4) {
size_t last_dot_pos = cli_params.preview_path.find_last_of("."); size_t last_dot_pos = cli_params.preview_path.find_last_of(".");
std::string base_path = cli_params.preview_path; std::string base_path = cli_params.preview_path;
@ -447,9 +513,6 @@ int main(int argc, const char* argv[]) {
if (cli_params.preview_method == PREVIEW_PROJ) if (cli_params.preview_method == PREVIEW_PROJ)
cli_params.preview_fps /= 4; cli_params.preview_fps /= 4;
sd_set_log_callback(sd_log_cb, (void*)&cli_params);
log_verbose = cli_params.verbose;
log_color = cli_params.color;
sd_set_preview_callback(step_callback, sd_set_preview_callback(step_callback,
cli_params.preview_method, cli_params.preview_method,
cli_params.preview_interval, cli_params.preview_interval,

View File

@ -39,14 +39,16 @@ const char* modes_str[] = {
"vid_gen", "vid_gen",
"convert", "convert",
"upscale", "upscale",
"metadata",
}; };
#define SD_ALL_MODES_STR "img_gen, vid_gen, convert, upscale" #define SD_ALL_MODES_STR "img_gen, vid_gen, convert, upscale, metadata"
enum SDMode { enum SDMode {
IMG_GEN, IMG_GEN,
VID_GEN, VID_GEN,
CONVERT, CONVERT,
UPSCALE, UPSCALE,
METADATA,
MODE_COUNT MODE_COUNT
}; };
@ -777,7 +779,7 @@ struct SDContextParams {
} }
bool process_and_check(SDMode mode) { bool process_and_check(SDMode mode) {
if (mode != UPSCALE && model_path.length() == 0 && diffusion_model_path.length() == 0) { if (mode != UPSCALE && mode != METADATA && model_path.length() == 0 && diffusion_model_path.length() == 0) {
LOG_ERROR("error: the following arguments are required: model_path/diffusion_model\n"); LOG_ERROR("error: the following arguments are required: model_path/diffusion_model\n");
return false; return false;
} }