mirror of
https://github.com/leejet/stable-diffusion.cpp.git
synced 2026-04-01 06:09:49 +00:00
feat(cli): add metadata inspection mode (#1381)
This commit is contained in:
parent
6dfe945958
commit
09b12d5f6d
14
.github/workflows/build.yml
vendored
14
.github/workflows/build.yml
vendored
@ -64,7 +64,7 @@ jobs:
|
|||||||
- name: Setup pnpm
|
- name: Setup pnpm
|
||||||
uses: pnpm/action-setup@v4
|
uses: pnpm/action-setup@v4
|
||||||
with:
|
with:
|
||||||
version: 9
|
version: 10.15.1
|
||||||
|
|
||||||
- name: Dependencies
|
- name: Dependencies
|
||||||
id: depends
|
id: depends
|
||||||
@ -127,7 +127,7 @@ jobs:
|
|||||||
- name: Setup pnpm
|
- name: Setup pnpm
|
||||||
uses: pnpm/action-setup@v4
|
uses: pnpm/action-setup@v4
|
||||||
with:
|
with:
|
||||||
version: 9
|
version: 10.15.1
|
||||||
|
|
||||||
- name: Dependencies
|
- name: Dependencies
|
||||||
id: depends
|
id: depends
|
||||||
@ -205,7 +205,7 @@ jobs:
|
|||||||
- name: Setup pnpm
|
- name: Setup pnpm
|
||||||
uses: pnpm/action-setup@v4
|
uses: pnpm/action-setup@v4
|
||||||
with:
|
with:
|
||||||
version: 9
|
version: 10.15.1
|
||||||
|
|
||||||
- name: Get commit hash
|
- name: Get commit hash
|
||||||
id: commit
|
id: commit
|
||||||
@ -264,7 +264,7 @@ jobs:
|
|||||||
- name: Setup pnpm
|
- name: Setup pnpm
|
||||||
uses: pnpm/action-setup@v4
|
uses: pnpm/action-setup@v4
|
||||||
with:
|
with:
|
||||||
version: 9
|
version: 10.15.1
|
||||||
|
|
||||||
- name: Dependencies
|
- name: Dependencies
|
||||||
id: depends
|
id: depends
|
||||||
@ -345,7 +345,7 @@ jobs:
|
|||||||
- name: Setup pnpm
|
- name: Setup pnpm
|
||||||
uses: pnpm/action-setup@v4
|
uses: pnpm/action-setup@v4
|
||||||
with:
|
with:
|
||||||
version: 9
|
version: 10.15.1
|
||||||
|
|
||||||
- name: Install cuda-toolkit
|
- name: Install cuda-toolkit
|
||||||
id: cuda-toolkit
|
id: cuda-toolkit
|
||||||
@ -460,7 +460,7 @@ jobs:
|
|||||||
- name: Setup pnpm
|
- name: Setup pnpm
|
||||||
uses: pnpm/action-setup@v4
|
uses: pnpm/action-setup@v4
|
||||||
with:
|
with:
|
||||||
version: 9
|
version: 10.15.1
|
||||||
|
|
||||||
- name: Cache ROCm Installation
|
- name: Cache ROCm Installation
|
||||||
id: cache-rocm
|
id: cache-rocm
|
||||||
@ -573,7 +573,7 @@ jobs:
|
|||||||
- name: Setup pnpm
|
- name: Setup pnpm
|
||||||
uses: pnpm/action-setup@v4
|
uses: pnpm/action-setup@v4
|
||||||
with:
|
with:
|
||||||
version: 9
|
version: 10.15.1
|
||||||
|
|
||||||
- name: Free disk space
|
- name: Free disk space
|
||||||
run: |
|
run: |
|
||||||
|
|||||||
@ -1,6 +1,9 @@
|
|||||||
set(TARGET sd-cli)
|
set(TARGET sd-cli)
|
||||||
|
|
||||||
add_executable(${TARGET} main.cpp)
|
add_executable(${TARGET}
|
||||||
|
image_metadata.cpp
|
||||||
|
main.cpp
|
||||||
|
)
|
||||||
install(TARGETS ${TARGET} RUNTIME)
|
install(TARGETS ${TARGET} RUNTIME)
|
||||||
target_link_libraries(${TARGET} PRIVATE stable-diffusion ${CMAKE_THREAD_LIBS_INIT})
|
target_link_libraries(${TARGET} PRIVATE stable-diffusion zip ${CMAKE_THREAD_LIBS_INIT})
|
||||||
target_compile_features(${TARGET} PUBLIC c_std_11 cxx_std_17)
|
target_compile_features(${TARGET} PUBLIC c_std_11 cxx_std_17)
|
||||||
@ -10,13 +10,18 @@ CLI Options:
|
|||||||
--preview-interval <int> interval in denoising steps between consecutive updates of the image preview file (default is 1, meaning updating at
|
--preview-interval <int> interval in denoising steps between consecutive updates of the image preview file (default is 1, meaning updating at
|
||||||
every step)
|
every step)
|
||||||
--output-begin-idx <int> starting index for output image sequence, must be non-negative (default 0 if specified %d in output path, 1 otherwise)
|
--output-begin-idx <int> starting index for output image sequence, must be non-negative (default 0 if specified %d in output path, 1 otherwise)
|
||||||
|
--image <string> path to the image to inspect (for metadata mode)
|
||||||
|
--metadata-format <string> metadata output format, one of [text, json] (default: text)
|
||||||
--canny apply canny preprocessor (edge detection)
|
--canny apply canny preprocessor (edge detection)
|
||||||
--convert-name convert tensor name (for convert mode)
|
--convert-name convert tensor name (for convert mode)
|
||||||
-v, --verbose print extra info
|
-v, --verbose print extra info
|
||||||
--color colors the logging tags according to level
|
--color colors the logging tags according to level
|
||||||
--taesd-preview-only prevents usage of taesd for decoding the final image. (for use with --preview tae)
|
--taesd-preview-only prevents usage of taesd for decoding the final image. (for use with --preview tae)
|
||||||
--preview-noisy enables previewing noisy inputs of the models rather than the denoised outputs
|
--preview-noisy enables previewing noisy inputs of the models rather than the denoised outputs
|
||||||
-M, --mode run mode, one of [img_gen, vid_gen, upscale, convert], default: img_gen
|
--metadata-raw include raw hex previews for unparsed metadata payloads
|
||||||
|
--metadata-brief truncate long metadata text values in text output
|
||||||
|
--metadata-all include structural/container entries such as IHDR, IDAT, and non-metadata JPEG segments
|
||||||
|
-M, --mode run mode, one of [img_gen, vid_gen, upscale, convert, metadata], default: img_gen
|
||||||
--preview preview method. must be one of the following [none, proj, tae, vae] (default is none)
|
--preview preview method. must be one of the following [none, proj, tae, vae] (default is none)
|
||||||
-h, --help show this help message and exit
|
-h, --help show this help message and exit
|
||||||
|
|
||||||
@ -148,3 +153,12 @@ Generation Options:
|
|||||||
--scm-mask SCM steps mask for cache-dit: comma-separated 0/1 (e.g., "1,1,1,0,0,1,0,0,1,0") - 1=compute, 0=can cache
|
--scm-mask SCM steps mask for cache-dit: comma-separated 0/1 (e.g., "1,1,1,0,0,1,0,0,1,0") - 1=compute, 0=can cache
|
||||||
--scm-policy SCM policy: 'dynamic' (default) or 'static'
|
--scm-policy SCM policy: 'dynamic' (default) or 'static'
|
||||||
```
|
```
|
||||||
|
|
||||||
|
Metadata mode inspects PNG/JPEG container metadata without loading any model:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
./bin/sd-cli -M metadata --image ./output.png
|
||||||
|
./bin/sd-cli -M metadata --image ./output.jpg --metadata-format json
|
||||||
|
./bin/sd-cli -M metadata --image ./output.png --metadata-raw
|
||||||
|
./bin/sd-cli -M metadata --image ./output.png --metadata-all
|
||||||
|
```
|
||||||
|
|||||||
1144
examples/cli/image_metadata.cpp
Normal file
1144
examples/cli/image_metadata.cpp
Normal file
File diff suppressed because it is too large
Load Diff
21
examples/cli/image_metadata.h
Normal file
21
examples/cli/image_metadata.h
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <iosfwd>
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
enum class MetadataOutputFormat {
|
||||||
|
TEXT,
|
||||||
|
JSON,
|
||||||
|
};
|
||||||
|
|
||||||
|
struct MetadataReadOptions {
|
||||||
|
MetadataOutputFormat output_format = MetadataOutputFormat::TEXT;
|
||||||
|
bool include_raw = false;
|
||||||
|
bool brief = false;
|
||||||
|
bool include_structural = false;
|
||||||
|
};
|
||||||
|
|
||||||
|
bool print_image_metadata(const std::string& image_path,
|
||||||
|
const MetadataReadOptions& options,
|
||||||
|
std::ostream& out,
|
||||||
|
std::string& error);
|
||||||
@ -18,6 +18,7 @@
|
|||||||
#include "common/common.hpp"
|
#include "common/common.hpp"
|
||||||
|
|
||||||
#include "avi_writer.h"
|
#include "avi_writer.h"
|
||||||
|
#include "image_metadata.h"
|
||||||
|
|
||||||
const char* previews_str[] = {
|
const char* previews_str[] = {
|
||||||
"none",
|
"none",
|
||||||
@ -32,6 +33,8 @@ struct SDCliParams {
|
|||||||
SDMode mode = IMG_GEN;
|
SDMode mode = IMG_GEN;
|
||||||
std::string output_path = "output.png";
|
std::string output_path = "output.png";
|
||||||
int output_begin_idx = -1;
|
int output_begin_idx = -1;
|
||||||
|
std::string image_path;
|
||||||
|
std::string metadata_format = "text";
|
||||||
|
|
||||||
bool verbose = false;
|
bool verbose = false;
|
||||||
bool canny_preprocess = false;
|
bool canny_preprocess = false;
|
||||||
@ -44,6 +47,9 @@ struct SDCliParams {
|
|||||||
bool taesd_preview = false;
|
bool taesd_preview = false;
|
||||||
bool preview_noisy = false;
|
bool preview_noisy = false;
|
||||||
bool color = false;
|
bool color = false;
|
||||||
|
bool metadata_raw = false;
|
||||||
|
bool metadata_brief = false;
|
||||||
|
bool metadata_all = false;
|
||||||
|
|
||||||
bool normal_exit = false;
|
bool normal_exit = false;
|
||||||
|
|
||||||
@ -55,6 +61,14 @@ struct SDCliParams {
|
|||||||
"--output",
|
"--output",
|
||||||
"path to write result image to. you can use printf-style %d format specifiers for image sequences (default: ./output.png) (eg. output_%03d.png)",
|
"path to write result image to. you can use printf-style %d format specifiers for image sequences (default: ./output.png) (eg. output_%03d.png)",
|
||||||
&output_path},
|
&output_path},
|
||||||
|
{"",
|
||||||
|
"--image",
|
||||||
|
"path to the image to inspect (for metadata mode)",
|
||||||
|
&image_path},
|
||||||
|
{"",
|
||||||
|
"--metadata-format",
|
||||||
|
"metadata output format, one of [text, json] (default: text)",
|
||||||
|
&metadata_format},
|
||||||
{"",
|
{"",
|
||||||
"--preview-path",
|
"--preview-path",
|
||||||
"path to write preview image to (default: ./preview.png)",
|
"path to write preview image to (default: ./preview.png)",
|
||||||
@ -97,6 +111,18 @@ struct SDCliParams {
|
|||||||
"--preview-noisy",
|
"--preview-noisy",
|
||||||
"enables previewing noisy inputs of the models rather than the denoised outputs",
|
"enables previewing noisy inputs of the models rather than the denoised outputs",
|
||||||
true, &preview_noisy},
|
true, &preview_noisy},
|
||||||
|
{"",
|
||||||
|
"--metadata-raw",
|
||||||
|
"include raw hex previews for unparsed metadata payloads",
|
||||||
|
true, &metadata_raw},
|
||||||
|
{"",
|
||||||
|
"--metadata-brief",
|
||||||
|
"truncate long metadata text values in text output",
|
||||||
|
true, &metadata_brief},
|
||||||
|
{"",
|
||||||
|
"--metadata-all",
|
||||||
|
"include structural/container entries such as IHDR, IDAT, and non-metadata JPEG segments",
|
||||||
|
true, &metadata_all},
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -149,7 +175,7 @@ struct SDCliParams {
|
|||||||
options.manual_options = {
|
options.manual_options = {
|
||||||
{"-M",
|
{"-M",
|
||||||
"--mode",
|
"--mode",
|
||||||
"run mode, one of [img_gen, vid_gen, upscale, convert], default: img_gen",
|
"run mode, one of [img_gen, vid_gen, upscale, convert, metadata], default: img_gen",
|
||||||
on_mode_arg},
|
on_mode_arg},
|
||||||
{"",
|
{"",
|
||||||
"--preview",
|
"--preview",
|
||||||
@ -165,7 +191,7 @@ struct SDCliParams {
|
|||||||
};
|
};
|
||||||
|
|
||||||
bool process_and_check() {
|
bool process_and_check() {
|
||||||
if (output_path.length() == 0) {
|
if (mode != METADATA && output_path.length() == 0) {
|
||||||
LOG_ERROR("error: the following arguments are required: output_path");
|
LOG_ERROR("error: the following arguments are required: output_path");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
@ -174,6 +200,16 @@ struct SDCliParams {
|
|||||||
if (output_path == "output.png") {
|
if (output_path == "output.png") {
|
||||||
output_path = "output.gguf";
|
output_path = "output.gguf";
|
||||||
}
|
}
|
||||||
|
} else if (mode == METADATA) {
|
||||||
|
if (image_path.empty()) {
|
||||||
|
LOG_ERROR("error: metadata mode needs an image path (--image)");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (metadata_format != "text" && metadata_format != "json") {
|
||||||
|
LOG_ERROR("error: invalid metadata format %s, must be one of [text, json]",
|
||||||
|
metadata_format.c_str());
|
||||||
|
return false;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@ -183,6 +219,8 @@ struct SDCliParams {
|
|||||||
oss << "SDCliParams {\n"
|
oss << "SDCliParams {\n"
|
||||||
<< " mode: " << modes_str[mode] << ",\n"
|
<< " mode: " << modes_str[mode] << ",\n"
|
||||||
<< " output_path: \"" << output_path << "\",\n"
|
<< " output_path: \"" << output_path << "\",\n"
|
||||||
|
<< " image_path: \"" << image_path << "\",\n"
|
||||||
|
<< " metadata_format: \"" << metadata_format << "\",\n"
|
||||||
<< " verbose: " << (verbose ? "true" : "false") << ",\n"
|
<< " verbose: " << (verbose ? "true" : "false") << ",\n"
|
||||||
<< " color: " << (color ? "true" : "false") << ",\n"
|
<< " color: " << (color ? "true" : "false") << ",\n"
|
||||||
<< " canny_preprocess: " << (canny_preprocess ? "true" : "false") << ",\n"
|
<< " canny_preprocess: " << (canny_preprocess ? "true" : "false") << ",\n"
|
||||||
@ -192,7 +230,10 @@ struct SDCliParams {
|
|||||||
<< " preview_path: \"" << preview_path << "\",\n"
|
<< " preview_path: \"" << preview_path << "\",\n"
|
||||||
<< " preview_fps: " << preview_fps << ",\n"
|
<< " preview_fps: " << preview_fps << ",\n"
|
||||||
<< " taesd_preview: " << (taesd_preview ? "true" : "false") << ",\n"
|
<< " taesd_preview: " << (taesd_preview ? "true" : "false") << ",\n"
|
||||||
<< " preview_noisy: " << (preview_noisy ? "true" : "false") << "\n"
|
<< " preview_noisy: " << (preview_noisy ? "true" : "false") << ",\n"
|
||||||
|
<< " metadata_raw: " << (metadata_raw ? "true" : "false") << ",\n"
|
||||||
|
<< " metadata_brief: " << (metadata_brief ? "true" : "false") << ",\n"
|
||||||
|
<< " metadata_all: " << (metadata_all ? "true" : "false") << "\n"
|
||||||
<< "}";
|
<< "}";
|
||||||
return oss.str();
|
return oss.str();
|
||||||
}
|
}
|
||||||
@ -217,9 +258,13 @@ void parse_args(int argc, const char** argv, SDCliParams& cli_params, SDContextP
|
|||||||
exit(cli_params.normal_exit ? 0 : 1);
|
exit(cli_params.normal_exit ? 0 : 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!cli_params.process_and_check() ||
|
bool valid = cli_params.process_and_check();
|
||||||
!ctx_params.process_and_check(cli_params.mode) ||
|
if (valid && cli_params.mode != METADATA) {
|
||||||
!gen_params.process_and_check(cli_params.mode, ctx_params.lora_model_dir)) {
|
valid = ctx_params.process_and_check(cli_params.mode) &&
|
||||||
|
gen_params.process_and_check(cli_params.mode, ctx_params.lora_model_dir);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!valid) {
|
||||||
print_usage(argc, argv, options_vec);
|
print_usage(argc, argv, options_vec);
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
@ -430,6 +475,27 @@ int main(int argc, const char* argv[]) {
|
|||||||
SDGenerationParams gen_params;
|
SDGenerationParams gen_params;
|
||||||
|
|
||||||
parse_args(argc, argv, cli_params, ctx_params, gen_params);
|
parse_args(argc, argv, cli_params, ctx_params, gen_params);
|
||||||
|
sd_set_log_callback(sd_log_cb, (void*)&cli_params);
|
||||||
|
log_verbose = cli_params.verbose;
|
||||||
|
log_color = cli_params.color;
|
||||||
|
|
||||||
|
if (cli_params.mode == METADATA) {
|
||||||
|
MetadataReadOptions options;
|
||||||
|
options.output_format = cli_params.metadata_format == "json"
|
||||||
|
? MetadataOutputFormat::JSON
|
||||||
|
: MetadataOutputFormat::TEXT;
|
||||||
|
options.include_raw = cli_params.metadata_raw;
|
||||||
|
options.brief = cli_params.metadata_brief;
|
||||||
|
options.include_structural = cli_params.metadata_all;
|
||||||
|
|
||||||
|
std::string error;
|
||||||
|
if (!print_image_metadata(cli_params.image_path, options, std::cout, error)) {
|
||||||
|
LOG_ERROR("%s", error.c_str());
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
if (gen_params.video_frames > 4) {
|
if (gen_params.video_frames > 4) {
|
||||||
size_t last_dot_pos = cli_params.preview_path.find_last_of(".");
|
size_t last_dot_pos = cli_params.preview_path.find_last_of(".");
|
||||||
std::string base_path = cli_params.preview_path;
|
std::string base_path = cli_params.preview_path;
|
||||||
@ -447,9 +513,6 @@ int main(int argc, const char* argv[]) {
|
|||||||
if (cli_params.preview_method == PREVIEW_PROJ)
|
if (cli_params.preview_method == PREVIEW_PROJ)
|
||||||
cli_params.preview_fps /= 4;
|
cli_params.preview_fps /= 4;
|
||||||
|
|
||||||
sd_set_log_callback(sd_log_cb, (void*)&cli_params);
|
|
||||||
log_verbose = cli_params.verbose;
|
|
||||||
log_color = cli_params.color;
|
|
||||||
sd_set_preview_callback(step_callback,
|
sd_set_preview_callback(step_callback,
|
||||||
cli_params.preview_method,
|
cli_params.preview_method,
|
||||||
cli_params.preview_interval,
|
cli_params.preview_interval,
|
||||||
|
|||||||
@ -39,14 +39,16 @@ const char* modes_str[] = {
|
|||||||
"vid_gen",
|
"vid_gen",
|
||||||
"convert",
|
"convert",
|
||||||
"upscale",
|
"upscale",
|
||||||
|
"metadata",
|
||||||
};
|
};
|
||||||
#define SD_ALL_MODES_STR "img_gen, vid_gen, convert, upscale"
|
#define SD_ALL_MODES_STR "img_gen, vid_gen, convert, upscale, metadata"
|
||||||
|
|
||||||
enum SDMode {
|
enum SDMode {
|
||||||
IMG_GEN,
|
IMG_GEN,
|
||||||
VID_GEN,
|
VID_GEN,
|
||||||
CONVERT,
|
CONVERT,
|
||||||
UPSCALE,
|
UPSCALE,
|
||||||
|
METADATA,
|
||||||
MODE_COUNT
|
MODE_COUNT
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -777,7 +779,7 @@ struct SDContextParams {
|
|||||||
}
|
}
|
||||||
|
|
||||||
bool process_and_check(SDMode mode) {
|
bool process_and_check(SDMode mode) {
|
||||||
if (mode != UPSCALE && model_path.length() == 0 && diffusion_model_path.length() == 0) {
|
if (mode != UPSCALE && mode != METADATA && model_path.length() == 0 && diffusion_model_path.length() == 0) {
|
||||||
LOG_ERROR("error: the following arguments are required: model_path/diffusion_model\n");
|
LOG_ERROR("error: the following arguments are required: model_path/diffusion_model\n");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user