mirror of
https://github.com/leejet/stable-diffusion.cpp.git
synced 2026-04-01 06:09:49 +00:00
feat(cli): add metadata inspection mode (#1381)
This commit is contained in:
parent
6dfe945958
commit
09b12d5f6d
14
.github/workflows/build.yml
vendored
14
.github/workflows/build.yml
vendored
@ -64,7 +64,7 @@ jobs:
|
||||
- name: Setup pnpm
|
||||
uses: pnpm/action-setup@v4
|
||||
with:
|
||||
version: 9
|
||||
version: 10.15.1
|
||||
|
||||
- name: Dependencies
|
||||
id: depends
|
||||
@ -127,7 +127,7 @@ jobs:
|
||||
- name: Setup pnpm
|
||||
uses: pnpm/action-setup@v4
|
||||
with:
|
||||
version: 9
|
||||
version: 10.15.1
|
||||
|
||||
- name: Dependencies
|
||||
id: depends
|
||||
@ -205,7 +205,7 @@ jobs:
|
||||
- name: Setup pnpm
|
||||
uses: pnpm/action-setup@v4
|
||||
with:
|
||||
version: 9
|
||||
version: 10.15.1
|
||||
|
||||
- name: Get commit hash
|
||||
id: commit
|
||||
@ -264,7 +264,7 @@ jobs:
|
||||
- name: Setup pnpm
|
||||
uses: pnpm/action-setup@v4
|
||||
with:
|
||||
version: 9
|
||||
version: 10.15.1
|
||||
|
||||
- name: Dependencies
|
||||
id: depends
|
||||
@ -345,7 +345,7 @@ jobs:
|
||||
- name: Setup pnpm
|
||||
uses: pnpm/action-setup@v4
|
||||
with:
|
||||
version: 9
|
||||
version: 10.15.1
|
||||
|
||||
- name: Install cuda-toolkit
|
||||
id: cuda-toolkit
|
||||
@ -460,7 +460,7 @@ jobs:
|
||||
- name: Setup pnpm
|
||||
uses: pnpm/action-setup@v4
|
||||
with:
|
||||
version: 9
|
||||
version: 10.15.1
|
||||
|
||||
- name: Cache ROCm Installation
|
||||
id: cache-rocm
|
||||
@ -573,7 +573,7 @@ jobs:
|
||||
- name: Setup pnpm
|
||||
uses: pnpm/action-setup@v4
|
||||
with:
|
||||
version: 9
|
||||
version: 10.15.1
|
||||
|
||||
- name: Free disk space
|
||||
run: |
|
||||
|
||||
@ -1,6 +1,9 @@
|
||||
set(TARGET sd-cli)
|
||||
|
||||
add_executable(${TARGET} main.cpp)
|
||||
add_executable(${TARGET}
|
||||
image_metadata.cpp
|
||||
main.cpp
|
||||
)
|
||||
install(TARGETS ${TARGET} RUNTIME)
|
||||
target_link_libraries(${TARGET} PRIVATE stable-diffusion ${CMAKE_THREAD_LIBS_INIT})
|
||||
target_compile_features(${TARGET} PUBLIC c_std_11 cxx_std_17)
|
||||
target_link_libraries(${TARGET} PRIVATE stable-diffusion zip ${CMAKE_THREAD_LIBS_INIT})
|
||||
target_compile_features(${TARGET} PUBLIC c_std_11 cxx_std_17)
|
||||
|
||||
@ -10,13 +10,18 @@ CLI Options:
|
||||
--preview-interval <int> interval in denoising steps between consecutive updates of the image preview file (default is 1, meaning updating at
|
||||
every step)
|
||||
--output-begin-idx <int> starting index for output image sequence, must be non-negative (default 0 if specified %d in output path, 1 otherwise)
|
||||
--image <string> path to the image to inspect (for metadata mode)
|
||||
--metadata-format <string> metadata output format, one of [text, json] (default: text)
|
||||
--canny apply canny preprocessor (edge detection)
|
||||
--convert-name convert tensor name (for convert mode)
|
||||
-v, --verbose print extra info
|
||||
--color colors the logging tags according to level
|
||||
--taesd-preview-only prevents usage of taesd for decoding the final image. (for use with --preview tae)
|
||||
--preview-noisy enables previewing noisy inputs of the models rather than the denoised outputs
|
||||
-M, --mode run mode, one of [img_gen, vid_gen, upscale, convert], default: img_gen
|
||||
--metadata-raw include raw hex previews for unparsed metadata payloads
|
||||
--metadata-brief truncate long metadata text values in text output
|
||||
--metadata-all include structural/container entries such as IHDR, IDAT, and non-metadata JPEG segments
|
||||
-M, --mode run mode, one of [img_gen, vid_gen, upscale, convert, metadata], default: img_gen
|
||||
--preview preview method. must be one of the following [none, proj, tae, vae] (default is none)
|
||||
-h, --help show this help message and exit
|
||||
|
||||
@ -148,3 +153,12 @@ Generation Options:
|
||||
--scm-mask SCM steps mask for cache-dit: comma-separated 0/1 (e.g., "1,1,1,0,0,1,0,0,1,0") - 1=compute, 0=can cache
|
||||
--scm-policy SCM policy: 'dynamic' (default) or 'static'
|
||||
```
|
||||
|
||||
Metadata mode inspects PNG/JPEG container metadata without loading any model:
|
||||
|
||||
```bash
|
||||
./bin/sd-cli -M metadata --image ./output.png
|
||||
./bin/sd-cli -M metadata --image ./output.jpg --metadata-format json
|
||||
./bin/sd-cli -M metadata --image ./output.png --metadata-raw
|
||||
./bin/sd-cli -M metadata --image ./output.png --metadata-all
|
||||
```
|
||||
|
||||
1144
examples/cli/image_metadata.cpp
Normal file
1144
examples/cli/image_metadata.cpp
Normal file
File diff suppressed because it is too large
Load Diff
21
examples/cli/image_metadata.h
Normal file
21
examples/cli/image_metadata.h
Normal file
@ -0,0 +1,21 @@
|
||||
#pragma once
|
||||
|
||||
#include <iosfwd>
|
||||
#include <string>
|
||||
|
||||
enum class MetadataOutputFormat {
|
||||
TEXT,
|
||||
JSON,
|
||||
};
|
||||
|
||||
struct MetadataReadOptions {
|
||||
MetadataOutputFormat output_format = MetadataOutputFormat::TEXT;
|
||||
bool include_raw = false;
|
||||
bool brief = false;
|
||||
bool include_structural = false;
|
||||
};
|
||||
|
||||
bool print_image_metadata(const std::string& image_path,
|
||||
const MetadataReadOptions& options,
|
||||
std::ostream& out,
|
||||
std::string& error);
|
||||
@ -18,6 +18,7 @@
|
||||
#include "common/common.hpp"
|
||||
|
||||
#include "avi_writer.h"
|
||||
#include "image_metadata.h"
|
||||
|
||||
const char* previews_str[] = {
|
||||
"none",
|
||||
@ -32,6 +33,8 @@ struct SDCliParams {
|
||||
SDMode mode = IMG_GEN;
|
||||
std::string output_path = "output.png";
|
||||
int output_begin_idx = -1;
|
||||
std::string image_path;
|
||||
std::string metadata_format = "text";
|
||||
|
||||
bool verbose = false;
|
||||
bool canny_preprocess = false;
|
||||
@ -44,6 +47,9 @@ struct SDCliParams {
|
||||
bool taesd_preview = false;
|
||||
bool preview_noisy = false;
|
||||
bool color = false;
|
||||
bool metadata_raw = false;
|
||||
bool metadata_brief = false;
|
||||
bool metadata_all = false;
|
||||
|
||||
bool normal_exit = false;
|
||||
|
||||
@ -55,6 +61,14 @@ struct SDCliParams {
|
||||
"--output",
|
||||
"path to write result image to. you can use printf-style %d format specifiers for image sequences (default: ./output.png) (eg. output_%03d.png)",
|
||||
&output_path},
|
||||
{"",
|
||||
"--image",
|
||||
"path to the image to inspect (for metadata mode)",
|
||||
&image_path},
|
||||
{"",
|
||||
"--metadata-format",
|
||||
"metadata output format, one of [text, json] (default: text)",
|
||||
&metadata_format},
|
||||
{"",
|
||||
"--preview-path",
|
||||
"path to write preview image to (default: ./preview.png)",
|
||||
@ -97,6 +111,18 @@ struct SDCliParams {
|
||||
"--preview-noisy",
|
||||
"enables previewing noisy inputs of the models rather than the denoised outputs",
|
||||
true, &preview_noisy},
|
||||
{"",
|
||||
"--metadata-raw",
|
||||
"include raw hex previews for unparsed metadata payloads",
|
||||
true, &metadata_raw},
|
||||
{"",
|
||||
"--metadata-brief",
|
||||
"truncate long metadata text values in text output",
|
||||
true, &metadata_brief},
|
||||
{"",
|
||||
"--metadata-all",
|
||||
"include structural/container entries such as IHDR, IDAT, and non-metadata JPEG segments",
|
||||
true, &metadata_all},
|
||||
|
||||
};
|
||||
|
||||
@ -149,7 +175,7 @@ struct SDCliParams {
|
||||
options.manual_options = {
|
||||
{"-M",
|
||||
"--mode",
|
||||
"run mode, one of [img_gen, vid_gen, upscale, convert], default: img_gen",
|
||||
"run mode, one of [img_gen, vid_gen, upscale, convert, metadata], default: img_gen",
|
||||
on_mode_arg},
|
||||
{"",
|
||||
"--preview",
|
||||
@ -165,7 +191,7 @@ struct SDCliParams {
|
||||
};
|
||||
|
||||
bool process_and_check() {
|
||||
if (output_path.length() == 0) {
|
||||
if (mode != METADATA && output_path.length() == 0) {
|
||||
LOG_ERROR("error: the following arguments are required: output_path");
|
||||
return false;
|
||||
}
|
||||
@ -174,6 +200,16 @@ struct SDCliParams {
|
||||
if (output_path == "output.png") {
|
||||
output_path = "output.gguf";
|
||||
}
|
||||
} else if (mode == METADATA) {
|
||||
if (image_path.empty()) {
|
||||
LOG_ERROR("error: metadata mode needs an image path (--image)");
|
||||
return false;
|
||||
}
|
||||
if (metadata_format != "text" && metadata_format != "json") {
|
||||
LOG_ERROR("error: invalid metadata format %s, must be one of [text, json]",
|
||||
metadata_format.c_str());
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
@ -183,6 +219,8 @@ struct SDCliParams {
|
||||
oss << "SDCliParams {\n"
|
||||
<< " mode: " << modes_str[mode] << ",\n"
|
||||
<< " output_path: \"" << output_path << "\",\n"
|
||||
<< " image_path: \"" << image_path << "\",\n"
|
||||
<< " metadata_format: \"" << metadata_format << "\",\n"
|
||||
<< " verbose: " << (verbose ? "true" : "false") << ",\n"
|
||||
<< " color: " << (color ? "true" : "false") << ",\n"
|
||||
<< " canny_preprocess: " << (canny_preprocess ? "true" : "false") << ",\n"
|
||||
@ -192,7 +230,10 @@ struct SDCliParams {
|
||||
<< " preview_path: \"" << preview_path << "\",\n"
|
||||
<< " preview_fps: " << preview_fps << ",\n"
|
||||
<< " taesd_preview: " << (taesd_preview ? "true" : "false") << ",\n"
|
||||
<< " preview_noisy: " << (preview_noisy ? "true" : "false") << "\n"
|
||||
<< " preview_noisy: " << (preview_noisy ? "true" : "false") << ",\n"
|
||||
<< " metadata_raw: " << (metadata_raw ? "true" : "false") << ",\n"
|
||||
<< " metadata_brief: " << (metadata_brief ? "true" : "false") << ",\n"
|
||||
<< " metadata_all: " << (metadata_all ? "true" : "false") << "\n"
|
||||
<< "}";
|
||||
return oss.str();
|
||||
}
|
||||
@ -217,9 +258,13 @@ void parse_args(int argc, const char** argv, SDCliParams& cli_params, SDContextP
|
||||
exit(cli_params.normal_exit ? 0 : 1);
|
||||
}
|
||||
|
||||
if (!cli_params.process_and_check() ||
|
||||
!ctx_params.process_and_check(cli_params.mode) ||
|
||||
!gen_params.process_and_check(cli_params.mode, ctx_params.lora_model_dir)) {
|
||||
bool valid = cli_params.process_and_check();
|
||||
if (valid && cli_params.mode != METADATA) {
|
||||
valid = ctx_params.process_and_check(cli_params.mode) &&
|
||||
gen_params.process_and_check(cli_params.mode, ctx_params.lora_model_dir);
|
||||
}
|
||||
|
||||
if (!valid) {
|
||||
print_usage(argc, argv, options_vec);
|
||||
exit(1);
|
||||
}
|
||||
@ -430,6 +475,27 @@ int main(int argc, const char* argv[]) {
|
||||
SDGenerationParams gen_params;
|
||||
|
||||
parse_args(argc, argv, cli_params, ctx_params, gen_params);
|
||||
sd_set_log_callback(sd_log_cb, (void*)&cli_params);
|
||||
log_verbose = cli_params.verbose;
|
||||
log_color = cli_params.color;
|
||||
|
||||
if (cli_params.mode == METADATA) {
|
||||
MetadataReadOptions options;
|
||||
options.output_format = cli_params.metadata_format == "json"
|
||||
? MetadataOutputFormat::JSON
|
||||
: MetadataOutputFormat::TEXT;
|
||||
options.include_raw = cli_params.metadata_raw;
|
||||
options.brief = cli_params.metadata_brief;
|
||||
options.include_structural = cli_params.metadata_all;
|
||||
|
||||
std::string error;
|
||||
if (!print_image_metadata(cli_params.image_path, options, std::cout, error)) {
|
||||
LOG_ERROR("%s", error.c_str());
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (gen_params.video_frames > 4) {
|
||||
size_t last_dot_pos = cli_params.preview_path.find_last_of(".");
|
||||
std::string base_path = cli_params.preview_path;
|
||||
@ -447,9 +513,6 @@ int main(int argc, const char* argv[]) {
|
||||
if (cli_params.preview_method == PREVIEW_PROJ)
|
||||
cli_params.preview_fps /= 4;
|
||||
|
||||
sd_set_log_callback(sd_log_cb, (void*)&cli_params);
|
||||
log_verbose = cli_params.verbose;
|
||||
log_color = cli_params.color;
|
||||
sd_set_preview_callback(step_callback,
|
||||
cli_params.preview_method,
|
||||
cli_params.preview_interval,
|
||||
|
||||
@ -39,14 +39,16 @@ const char* modes_str[] = {
|
||||
"vid_gen",
|
||||
"convert",
|
||||
"upscale",
|
||||
"metadata",
|
||||
};
|
||||
#define SD_ALL_MODES_STR "img_gen, vid_gen, convert, upscale"
|
||||
#define SD_ALL_MODES_STR "img_gen, vid_gen, convert, upscale, metadata"
|
||||
|
||||
enum SDMode {
|
||||
IMG_GEN,
|
||||
VID_GEN,
|
||||
CONVERT,
|
||||
UPSCALE,
|
||||
METADATA,
|
||||
MODE_COUNT
|
||||
};
|
||||
|
||||
@ -777,7 +779,7 @@ struct SDContextParams {
|
||||
}
|
||||
|
||||
bool process_and_check(SDMode mode) {
|
||||
if (mode != UPSCALE && model_path.length() == 0 && diffusion_model_path.length() == 0) {
|
||||
if (mode != UPSCALE && mode != METADATA && model_path.length() == 0 && diffusion_model_path.length() == 0) {
|
||||
LOG_ERROR("error: the following arguments are required: model_path/diffusion_model\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user