mirror of
https://github.com/leejet/stable-diffusion.cpp.git
synced 2026-04-03 07:08:53 +00:00
feat: add webp support (#1384)
This commit is contained in:
parent
99c1de379b
commit
87ecb95cbc
3
.gitmodules
vendored
3
.gitmodules
vendored
@ -4,3 +4,6 @@
|
||||
[submodule "examples/server/frontend"]
|
||||
path = examples/server/frontend
|
||||
url = https://github.com/leejet/stable-ui.git
|
||||
[submodule "thirdparty/libwebp"]
|
||||
path = thirdparty/libwebp
|
||||
url = https://github.com/webmproject/libwebp.git
|
||||
|
||||
@ -29,6 +29,7 @@ endif()
|
||||
# general
|
||||
#option(SD_BUILD_TESTS "sd: build tests" ${SD_STANDALONE})
|
||||
option(SD_BUILD_EXAMPLES "sd: build examples" ${SD_STANDALONE})
|
||||
option(SD_WEBP "sd: enable WebP image I/O support" ON)
|
||||
option(SD_CUDA "sd: cuda backend" OFF)
|
||||
option(SD_HIPBLAS "sd: rocm backend" OFF)
|
||||
option(SD_METAL "sd: metal backend" OFF)
|
||||
@ -77,6 +78,10 @@ if(SD_MUSA)
|
||||
add_definitions(-DSD_USE_CUDA)
|
||||
endif()
|
||||
|
||||
if(SD_WEBP)
|
||||
add_compile_definitions(SD_USE_WEBP)
|
||||
endif()
|
||||
|
||||
set(SD_LIB stable-diffusion)
|
||||
|
||||
file(GLOB SD_LIB_SOURCES
|
||||
|
||||
@ -16,6 +16,18 @@ git submodule init
|
||||
git submodule update
|
||||
```
|
||||
|
||||
## WebP Support in Examples
|
||||
|
||||
The example applications (`examples/cli` and `examples/server`) use `libwebp` to support WebP image I/O. This is enabled by default.
|
||||
|
||||
If you do not want WebP support, you can disable it at configure time:
|
||||
|
||||
```shell
|
||||
mkdir build && cd build
|
||||
cmake .. -DSD_WEBP=OFF
|
||||
cmake --build . --config Release
|
||||
```
|
||||
|
||||
## Build (CPU only)
|
||||
|
||||
If you don't have a GPU or CUDA installed, you can build a CPU-only version.
|
||||
|
||||
@ -1,9 +1,14 @@
|
||||
set(TARGET sd-cli)
|
||||
|
||||
add_executable(${TARGET}
|
||||
../common/log.cpp
|
||||
../common/media_io.cpp
|
||||
image_metadata.cpp
|
||||
main.cpp
|
||||
)
|
||||
install(TARGETS ${TARGET} RUNTIME)
|
||||
target_link_libraries(${TARGET} PRIVATE stable-diffusion zip ${CMAKE_THREAD_LIBS_INIT})
|
||||
if(SD_WEBP)
|
||||
target_link_libraries(${TARGET} PRIVATE webp libwebpmux)
|
||||
endif()
|
||||
target_compile_features(${TARGET} PUBLIC c_std_11 cxx_std_17)
|
||||
|
||||
@ -5,8 +5,8 @@ usage: ./bin/sd-cli [options]
|
||||
|
||||
CLI Options:
|
||||
-o, --output <string> path to write result image to. you can use printf-style %d format specifiers for image sequences (default:
|
||||
./output.png) (eg. output_%03d.png)
|
||||
--preview-path <string> path to write preview image to (default: ./preview.png)
|
||||
./output.png) (eg. output_%03d.png). For video generation, single-file outputs support .avi and animated .webp
|
||||
--preview-path <string> path to write preview image to (default: ./preview.png). Multi-frame previews support .avi and animated .webp
|
||||
--preview-interval <int> interval in denoising steps between consecutive updates of the image preview file (default is 1, meaning updating at
|
||||
every step)
|
||||
--output-begin-idx <int> starting index for output image sequence, must be non-negative (default 0 if specified %d in output path, 1 otherwise)
|
||||
|
||||
@ -1,217 +0,0 @@
|
||||
#ifndef __AVI_WRITER_H__
|
||||
#define __AVI_WRITER_H__
|
||||
|
||||
#include <cstdint>
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
|
||||
#include "stable-diffusion.h"
|
||||
|
||||
#ifndef INCLUDE_STB_IMAGE_WRITE_H
|
||||
#include "stb_image_write.h"
|
||||
#endif
|
||||
|
||||
typedef struct {
|
||||
uint32_t offset;
|
||||
uint32_t size;
|
||||
} avi_index_entry;
|
||||
|
||||
// Write 32-bit little-endian integer
|
||||
void write_u32_le(FILE* f, uint32_t val) {
|
||||
fwrite(&val, 4, 1, f);
|
||||
}
|
||||
|
||||
// Write 16-bit little-endian integer
|
||||
void write_u16_le(FILE* f, uint16_t val) {
|
||||
fwrite(&val, 2, 1, f);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create an MJPG AVI file from an array of sd_image_t images.
|
||||
* Images are encoded to JPEG using stb_image_write.
|
||||
*
|
||||
* @param filename Output AVI file name.
|
||||
* @param images Array of input images.
|
||||
* @param num_images Number of images in the array.
|
||||
* @param fps Frames per second for the video.
|
||||
* @param quality JPEG quality (0-100).
|
||||
* @return 0 on success, -1 on failure.
|
||||
*/
|
||||
int create_mjpg_avi_from_sd_images(const char* filename, sd_image_t* images, int num_images, int fps, int quality = 90) {
|
||||
if (num_images == 0) {
|
||||
fprintf(stderr, "Error: Image array is empty.\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
FILE* f = fopen(filename, "wb");
|
||||
if (!f) {
|
||||
perror("Error opening file for writing");
|
||||
return -1;
|
||||
}
|
||||
|
||||
uint32_t width = images[0].width;
|
||||
uint32_t height = images[0].height;
|
||||
uint32_t channels = images[0].channel;
|
||||
if (channels != 3 && channels != 4) {
|
||||
fprintf(stderr, "Error: Unsupported channel count: %u\n", channels);
|
||||
fclose(f);
|
||||
return -1;
|
||||
}
|
||||
|
||||
// --- RIFF AVI Header ---
|
||||
fwrite("RIFF", 4, 1, f);
|
||||
long riff_size_pos = ftell(f);
|
||||
write_u32_le(f, 0); // Placeholder for file size
|
||||
fwrite("AVI ", 4, 1, f);
|
||||
|
||||
// 'hdrl' LIST (header list)
|
||||
fwrite("LIST", 4, 1, f);
|
||||
write_u32_le(f, 4 + 8 + 56 + 8 + 4 + 8 + 56 + 8 + 40);
|
||||
fwrite("hdrl", 4, 1, f);
|
||||
|
||||
// 'avih' chunk (AVI main header)
|
||||
fwrite("avih", 4, 1, f);
|
||||
write_u32_le(f, 56);
|
||||
write_u32_le(f, 1000000 / fps); // Microseconds per frame
|
||||
write_u32_le(f, 0); // Max bytes per second
|
||||
write_u32_le(f, 0); // Padding granularity
|
||||
write_u32_le(f, 0x110); // Flags (HASINDEX | ISINTERLEAVED)
|
||||
write_u32_le(f, num_images); // Total frames
|
||||
write_u32_le(f, 0); // Initial frames
|
||||
write_u32_le(f, 1); // Number of streams
|
||||
write_u32_le(f, width * height * 3); // Suggested buffer size
|
||||
write_u32_le(f, width);
|
||||
write_u32_le(f, height);
|
||||
write_u32_le(f, 0); // Reserved
|
||||
write_u32_le(f, 0); // Reserved
|
||||
write_u32_le(f, 0); // Reserved
|
||||
write_u32_le(f, 0); // Reserved
|
||||
|
||||
// 'strl' LIST (stream list)
|
||||
fwrite("LIST", 4, 1, f);
|
||||
write_u32_le(f, 4 + 8 + 56 + 8 + 40);
|
||||
fwrite("strl", 4, 1, f);
|
||||
|
||||
// 'strh' chunk (stream header)
|
||||
fwrite("strh", 4, 1, f);
|
||||
write_u32_le(f, 56);
|
||||
fwrite("vids", 4, 1, f); // Stream type: video
|
||||
fwrite("MJPG", 4, 1, f); // Codec: Motion JPEG
|
||||
write_u32_le(f, 0); // Flags
|
||||
write_u16_le(f, 0); // Priority
|
||||
write_u16_le(f, 0); // Language
|
||||
write_u32_le(f, 0); // Initial frames
|
||||
write_u32_le(f, 1); // Scale
|
||||
write_u32_le(f, fps); // Rate
|
||||
write_u32_le(f, 0); // Start
|
||||
write_u32_le(f, num_images); // Length
|
||||
write_u32_le(f, width * height * 3); // Suggested buffer size
|
||||
write_u32_le(f, (uint32_t)-1); // Quality
|
||||
write_u32_le(f, 0); // Sample size
|
||||
write_u16_le(f, 0); // rcFrame.left
|
||||
write_u16_le(f, 0); // rcFrame.top
|
||||
write_u16_le(f, 0); // rcFrame.right
|
||||
write_u16_le(f, 0); // rcFrame.bottom
|
||||
|
||||
// 'strf' chunk (stream format: BITMAPINFOHEADER)
|
||||
fwrite("strf", 4, 1, f);
|
||||
write_u32_le(f, 40);
|
||||
write_u32_le(f, 40); // biSize
|
||||
write_u32_le(f, width);
|
||||
write_u32_le(f, height);
|
||||
write_u16_le(f, 1); // biPlanes
|
||||
write_u16_le(f, 24); // biBitCount
|
||||
fwrite("MJPG", 4, 1, f); // biCompression (FOURCC)
|
||||
write_u32_le(f, width * height * 3); // biSizeImage
|
||||
write_u32_le(f, 0); // XPelsPerMeter
|
||||
write_u32_le(f, 0); // YPelsPerMeter
|
||||
write_u32_le(f, 0); // Colors used
|
||||
write_u32_le(f, 0); // Colors important
|
||||
|
||||
// 'movi' LIST (video frames)
|
||||
// long movi_list_pos = ftell(f);
|
||||
fwrite("LIST", 4, 1, f);
|
||||
long movi_size_pos = ftell(f);
|
||||
write_u32_le(f, 0); // Placeholder for movi size
|
||||
fwrite("movi", 4, 1, f);
|
||||
|
||||
avi_index_entry* index = (avi_index_entry*)malloc(sizeof(avi_index_entry) * num_images);
|
||||
if (!index) {
|
||||
fclose(f);
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Encode and write each frame as JPEG
|
||||
struct {
|
||||
uint8_t* buf;
|
||||
size_t size;
|
||||
} jpeg_data;
|
||||
|
||||
for (int i = 0; i < num_images; i++) {
|
||||
jpeg_data.buf = nullptr;
|
||||
jpeg_data.size = 0;
|
||||
|
||||
// Callback function to collect JPEG data into memory
|
||||
auto write_to_buf = [](void* context, void* data, int size) {
|
||||
auto jd = (decltype(jpeg_data)*)context;
|
||||
jd->buf = (uint8_t*)realloc(jd->buf, jd->size + size);
|
||||
memcpy(jd->buf + jd->size, data, size);
|
||||
jd->size += size;
|
||||
};
|
||||
|
||||
// Encode to JPEG in memory
|
||||
stbi_write_jpg_to_func(
|
||||
write_to_buf,
|
||||
&jpeg_data,
|
||||
images[i].width,
|
||||
images[i].height,
|
||||
channels,
|
||||
images[i].data,
|
||||
quality);
|
||||
|
||||
// Write '00dc' chunk (video frame)
|
||||
fwrite("00dc", 4, 1, f);
|
||||
write_u32_le(f, (uint32_t)jpeg_data.size);
|
||||
index[i].offset = ftell(f) - 8;
|
||||
index[i].size = (uint32_t)jpeg_data.size;
|
||||
fwrite(jpeg_data.buf, 1, jpeg_data.size, f);
|
||||
|
||||
// Align to even byte size
|
||||
if (jpeg_data.size % 2)
|
||||
fputc(0, f);
|
||||
|
||||
free(jpeg_data.buf);
|
||||
}
|
||||
|
||||
// Finalize 'movi' size
|
||||
long cur_pos = ftell(f);
|
||||
long movi_size = cur_pos - movi_size_pos - 4;
|
||||
fseek(f, movi_size_pos, SEEK_SET);
|
||||
write_u32_le(f, movi_size);
|
||||
fseek(f, cur_pos, SEEK_SET);
|
||||
|
||||
// Write 'idx1' index
|
||||
fwrite("idx1", 4, 1, f);
|
||||
write_u32_le(f, num_images * 16);
|
||||
for (int i = 0; i < num_images; i++) {
|
||||
fwrite("00dc", 4, 1, f);
|
||||
write_u32_le(f, 0x10);
|
||||
write_u32_le(f, index[i].offset);
|
||||
write_u32_le(f, index[i].size);
|
||||
}
|
||||
|
||||
// Finalize RIFF size
|
||||
cur_pos = ftell(f);
|
||||
long file_size = cur_pos - riff_size_pos - 4;
|
||||
fseek(f, riff_size_pos, SEEK_SET);
|
||||
write_u32_le(f, file_size);
|
||||
fseek(f, cur_pos, SEEK_SET);
|
||||
|
||||
fclose(f);
|
||||
free(index);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif // __AVI_WRITER_H__
|
||||
@ -40,6 +40,13 @@ namespace {
|
||||
static_cast<uint32_t>(data[offset + 3]);
|
||||
}
|
||||
|
||||
uint32_t read_u32_le(const std::vector<uint8_t>& data, size_t offset) {
|
||||
return static_cast<uint32_t>(data[offset]) |
|
||||
(static_cast<uint32_t>(data[offset + 1]) << 8) |
|
||||
(static_cast<uint32_t>(data[offset + 2]) << 16) |
|
||||
(static_cast<uint32_t>(data[offset + 3]) << 24);
|
||||
}
|
||||
|
||||
uint16_t read_u16_tiff(const std::vector<uint8_t>& data, size_t offset, bool little_endian) {
|
||||
if (little_endian) {
|
||||
return static_cast<uint16_t>(data[offset]) |
|
||||
@ -357,6 +364,11 @@ namespace {
|
||||
json& result,
|
||||
std::string& error);
|
||||
|
||||
bool parse_webp(const std::vector<uint8_t>& data,
|
||||
bool include_raw,
|
||||
json& result,
|
||||
std::string& error);
|
||||
|
||||
std::string abbreviate(const std::string& value, bool brief);
|
||||
|
||||
void print_json_value(std::ostream& out,
|
||||
@ -1008,6 +1020,83 @@ namespace {
|
||||
return true;
|
||||
}
|
||||
|
||||
bool parse_webp(const std::vector<uint8_t>& data,
|
||||
bool include_raw,
|
||||
json& result,
|
||||
std::string& error) {
|
||||
if (data.size() < 12 ||
|
||||
memcmp(data.data(), "RIFF", 4) != 0 ||
|
||||
memcmp(data.data() + 8, "WEBP", 4) != 0) {
|
||||
error = "not a WebP file";
|
||||
return false;
|
||||
}
|
||||
|
||||
result["format"] = "WEBP";
|
||||
result["entries"] = json::array();
|
||||
|
||||
size_t offset = 12;
|
||||
while (offset + 8 <= data.size()) {
|
||||
const std::string raw_type =
|
||||
bytes_to_string(data.data() + offset, data.data() + offset + 4);
|
||||
const uint32_t length = read_u32_le(data, offset + 4);
|
||||
offset += 8;
|
||||
|
||||
if (offset + static_cast<size_t>(length) > data.size()) {
|
||||
error = "WebP chunk exceeds file size";
|
||||
return false;
|
||||
}
|
||||
|
||||
const uint8_t* payload = data.data() + offset;
|
||||
const std::string type =
|
||||
!raw_type.empty() && raw_type.back() == ' '
|
||||
? raw_type.substr(0, raw_type.size() - 1)
|
||||
: raw_type;
|
||||
|
||||
json entry;
|
||||
entry["entry_type"] = "chunk";
|
||||
entry["name"] = type;
|
||||
entry["length"] = length;
|
||||
entry["metadata_like"] =
|
||||
(raw_type == "ICCP" || raw_type == "EXIF" || raw_type == "XMP ");
|
||||
|
||||
if (raw_type == "VP8X" && length >= 10) {
|
||||
entry["data"] = json{
|
||||
{"icc_profile", (payload[0] & 0x20) != 0},
|
||||
{"alpha", (payload[0] & 0x10) != 0},
|
||||
{"exif", (payload[0] & 0x08) != 0},
|
||||
{"xmp", (payload[0] & 0x04) != 0},
|
||||
{"animation", (payload[0] & 0x02) != 0},
|
||||
{"canvas_width", 1 + static_cast<uint32_t>(payload[4]) + (static_cast<uint32_t>(payload[5]) << 8) + (static_cast<uint32_t>(payload[6]) << 16)},
|
||||
{"canvas_height", 1 + static_cast<uint32_t>(payload[7]) + (static_cast<uint32_t>(payload[8]) << 8) + (static_cast<uint32_t>(payload[9]) << 16)},
|
||||
};
|
||||
} else if (raw_type == "EXIF") {
|
||||
std::string exif_error;
|
||||
json meta = parse_exif_tiff(payload, length, include_raw, exif_error);
|
||||
if (!meta.empty()) {
|
||||
entry["data"] = std::move(meta);
|
||||
}
|
||||
if (!exif_error.empty()) {
|
||||
entry["error"] = exif_error;
|
||||
}
|
||||
} else if (raw_type == "XMP ") {
|
||||
entry["data"] = json{
|
||||
{"type", "XMP"},
|
||||
{"xml", trim_trailing_nuls(bytes_to_string(payload, payload + length))},
|
||||
};
|
||||
} else if (raw_type == "ICCP") {
|
||||
entry["data"] = json{{"profile_size", length}};
|
||||
append_raw_preview(entry["data"], payload, length, include_raw);
|
||||
} else {
|
||||
append_raw_preview(entry, payload, length, include_raw);
|
||||
}
|
||||
|
||||
result["entries"].push_back(entry);
|
||||
offset += static_cast<size_t>(length) + (length & 1u);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
std::string abbreviate(const std::string& value, bool brief) {
|
||||
if (!brief || value.size() <= 240) {
|
||||
return value;
|
||||
@ -1116,8 +1205,12 @@ namespace {
|
||||
if (data.size() >= 2 && data[0] == 0xFF && data[1] == 0xD8) {
|
||||
return parse_jpeg(data, include_raw, report, error);
|
||||
}
|
||||
if (data.size() >= 12 && memcmp(data.data(), "RIFF", 4) == 0 &&
|
||||
memcmp(data.data() + 8, "WEBP", 4) == 0) {
|
||||
return parse_webp(data, include_raw, report, error);
|
||||
}
|
||||
|
||||
error = "unsupported image format; only PNG and JPEG are supported";
|
||||
error = "unsupported image format; only PNG, JPEG, and WebP are supported";
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
@ -16,8 +16,7 @@
|
||||
#include "stable-diffusion.h"
|
||||
|
||||
#include "common/common.hpp"
|
||||
|
||||
#include "avi_writer.h"
|
||||
#include "common/media_io.h"
|
||||
#include "image_metadata.h"
|
||||
|
||||
const char* previews_str[] = {
|
||||
@ -303,7 +302,7 @@ bool load_images_from_dir(const std::string dir,
|
||||
std::string ext = entry.path().extension().string();
|
||||
std::transform(ext.begin(), ext.end(), ext.begin(), ::tolower);
|
||||
|
||||
if (ext == ".jpg" || ext == ".jpeg" || ext == ".png" || ext == ".bmp") {
|
||||
if (ext == ".jpg" || ext == ".jpeg" || ext == ".png" || ext == ".bmp" || ext == ".webp") {
|
||||
LOG_DEBUG("load image %zu from '%s'", images.size(), path.c_str());
|
||||
int width = 0;
|
||||
int height = 0;
|
||||
@ -333,9 +332,17 @@ void step_callback(int step, int frame_count, sd_image_t* image, bool is_noisy,
|
||||
// is_noisy is set to true if the preview corresponds to noisy latents, false if it's denoised latents
|
||||
// unused in this app, it will either be always noisy or always denoised here
|
||||
if (frame_count == 1) {
|
||||
stbi_write_png(cli_params->preview_path.c_str(), image->width, image->height, image->channel, image->data, 0);
|
||||
if (!write_image_to_file(cli_params->preview_path,
|
||||
image->data,
|
||||
image->width,
|
||||
image->height,
|
||||
image->channel)) {
|
||||
LOG_ERROR("save preview image to '%s' failed", cli_params->preview_path.c_str());
|
||||
}
|
||||
} else {
|
||||
create_mjpg_avi_from_sd_images(cli_params->preview_path.c_str(), image, frame_count, cli_params->preview_fps);
|
||||
if (create_video_from_sd_images(cli_params->preview_path.c_str(), image, frame_count, cli_params->preview_fps) != 0) {
|
||||
LOG_ERROR("save preview video to '%s' failed", cli_params->preview_path.c_str());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -385,9 +392,11 @@ bool save_results(const SDCliParams& cli_params,
|
||||
|
||||
std::string ext_lower = ext.string();
|
||||
std::transform(ext_lower.begin(), ext_lower.end(), ext_lower.begin(), ::tolower);
|
||||
bool is_jpg = (ext_lower == ".jpg" || ext_lower == ".jpeg" || ext_lower == ".jpe");
|
||||
const EncodedImageFormat output_format = encoded_image_format_from_path(out_path.string());
|
||||
if (!ext.empty()) {
|
||||
if (is_jpg || ext_lower == ".png") {
|
||||
if (output_format == EncodedImageFormat::JPEG ||
|
||||
output_format == EncodedImageFormat::PNG ||
|
||||
output_format == EncodedImageFormat::WEBP) {
|
||||
base_path.replace_extension();
|
||||
}
|
||||
}
|
||||
@ -405,20 +414,15 @@ bool save_results(const SDCliParams& cli_params,
|
||||
std::string params = gen_params.embed_image_metadata
|
||||
? get_image_params(ctx_params, gen_params, gen_params.seed + idx)
|
||||
: "";
|
||||
int ok = 0;
|
||||
if (is_jpg) {
|
||||
ok = stbi_write_jpg(path.string().c_str(), img.width, img.height, img.channel, img.data, 90, params.size() > 0 ? params.c_str() : nullptr);
|
||||
} else {
|
||||
ok = stbi_write_png(path.string().c_str(), img.width, img.height, img.channel, img.data, 0, params.size() > 0 ? params.c_str() : nullptr);
|
||||
}
|
||||
const bool ok = write_image_to_file(path.string(), img.data, img.width, img.height, img.channel, params, 90);
|
||||
LOG_INFO("save result image %d to '%s' (%s)", idx, path.string().c_str(), ok ? "success" : "failure");
|
||||
return ok != 0;
|
||||
return ok;
|
||||
};
|
||||
|
||||
int sucessful_reults = 0;
|
||||
|
||||
if (std::regex_search(cli_params.output_path, format_specifier_regex)) {
|
||||
if (!is_jpg && ext_lower != ".png")
|
||||
if (output_format == EncodedImageFormat::UNKNOWN)
|
||||
ext = ".png";
|
||||
fs::path pattern = base_path;
|
||||
pattern += ext;
|
||||
@ -434,20 +438,20 @@ bool save_results(const SDCliParams& cli_params,
|
||||
}
|
||||
|
||||
if (cli_params.mode == VID_GEN && num_results > 1) {
|
||||
if (ext_lower != ".avi")
|
||||
if (ext_lower != ".avi" && ext_lower != ".webp")
|
||||
ext = ".avi";
|
||||
fs::path video_path = base_path;
|
||||
video_path += ext;
|
||||
if (create_mjpg_avi_from_sd_images(video_path.string().c_str(), results, num_results, gen_params.fps) == 0) {
|
||||
LOG_INFO("save result MJPG AVI video to '%s'", video_path.string().c_str());
|
||||
if (create_video_from_sd_images(video_path.string().c_str(), results, num_results, gen_params.fps) == 0) {
|
||||
LOG_INFO("save result video to '%s'", video_path.string().c_str());
|
||||
return true;
|
||||
} else {
|
||||
LOG_ERROR("Failed to save result MPG AVI video to '%s'", video_path.string().c_str());
|
||||
LOG_ERROR("Failed to save result video to '%s'", video_path.string().c_str());
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
if (!is_jpg && ext_lower != ".png")
|
||||
if (output_format == EncodedImageFormat::UNKNOWN)
|
||||
ext = ".png";
|
||||
|
||||
for (int i = 0; i < num_results; ++i) {
|
||||
|
||||
@ -1,4 +1,6 @@
|
||||
|
||||
#include <algorithm>
|
||||
#include <cctype>
|
||||
#include <filesystem>
|
||||
#include <iostream>
|
||||
#include <map>
|
||||
@ -17,20 +19,9 @@ namespace fs = std::filesystem;
|
||||
#include <windows.h>
|
||||
#endif // _WIN32
|
||||
|
||||
#include "log.h"
|
||||
#include "stable-diffusion.h"
|
||||
|
||||
#define STB_IMAGE_IMPLEMENTATION
|
||||
#define STB_IMAGE_STATIC
|
||||
#include "stb_image.h"
|
||||
|
||||
#define STB_IMAGE_WRITE_IMPLEMENTATION
|
||||
#define STB_IMAGE_WRITE_STATIC
|
||||
#include "stb_image_write.h"
|
||||
|
||||
#define STB_IMAGE_RESIZE_IMPLEMENTATION
|
||||
#define STB_IMAGE_RESIZE_STATIC
|
||||
#include "stb_image_resize.h"
|
||||
|
||||
#define SAFE_STR(s) ((s) ? (s) : "")
|
||||
#define BOOL_STR(b) ((b) ? "true" : "false")
|
||||
|
||||
@ -88,125 +79,6 @@ static std::string argv_to_utf8(int index, const char** argv) {
|
||||
|
||||
#endif
|
||||
|
||||
static void print_utf8(FILE* stream, const char* utf8) {
|
||||
if (!utf8)
|
||||
return;
|
||||
|
||||
#ifdef _WIN32
|
||||
HANDLE h = (stream == stderr)
|
||||
? GetStdHandle(STD_ERROR_HANDLE)
|
||||
: GetStdHandle(STD_OUTPUT_HANDLE);
|
||||
|
||||
DWORD mode;
|
||||
BOOL is_console = GetConsoleMode(h, &mode);
|
||||
|
||||
if (is_console) {
|
||||
int wlen = MultiByteToWideChar(CP_UTF8, 0, utf8, -1, NULL, 0);
|
||||
if (wlen <= 0)
|
||||
return;
|
||||
|
||||
wchar_t* wbuf = (wchar_t*)malloc(wlen * sizeof(wchar_t));
|
||||
if (!wbuf)
|
||||
return;
|
||||
|
||||
MultiByteToWideChar(CP_UTF8, 0, utf8, -1, wbuf, wlen);
|
||||
|
||||
DWORD written;
|
||||
WriteConsoleW(h, wbuf, wlen - 1, &written, NULL);
|
||||
|
||||
free(wbuf);
|
||||
} else {
|
||||
DWORD written;
|
||||
WriteFile(h, utf8, (DWORD)strlen(utf8), &written, NULL);
|
||||
}
|
||||
#else
|
||||
fputs(utf8, stream);
|
||||
#endif
|
||||
}
|
||||
|
||||
static std::string sd_basename(const std::string& path) {
|
||||
size_t pos = path.find_last_of('/');
|
||||
if (pos != std::string::npos) {
|
||||
return path.substr(pos + 1);
|
||||
}
|
||||
pos = path.find_last_of('\\');
|
||||
if (pos != std::string::npos) {
|
||||
return path.substr(pos + 1);
|
||||
}
|
||||
return path;
|
||||
}
|
||||
|
||||
static void log_print(enum sd_log_level_t level, const char* log, bool verbose, bool color) {
|
||||
int tag_color;
|
||||
const char* level_str;
|
||||
FILE* out_stream = (level == SD_LOG_ERROR) ? stderr : stdout;
|
||||
|
||||
if (!log || (!verbose && level <= SD_LOG_DEBUG)) {
|
||||
return;
|
||||
}
|
||||
|
||||
switch (level) {
|
||||
case SD_LOG_DEBUG:
|
||||
tag_color = 37;
|
||||
level_str = "DEBUG";
|
||||
break;
|
||||
case SD_LOG_INFO:
|
||||
tag_color = 34;
|
||||
level_str = "INFO";
|
||||
break;
|
||||
case SD_LOG_WARN:
|
||||
tag_color = 35;
|
||||
level_str = "WARN";
|
||||
break;
|
||||
case SD_LOG_ERROR:
|
||||
tag_color = 31;
|
||||
level_str = "ERROR";
|
||||
break;
|
||||
default: /* Potential future-proofing */
|
||||
tag_color = 33;
|
||||
level_str = "?????";
|
||||
break;
|
||||
}
|
||||
|
||||
if (color) {
|
||||
fprintf(out_stream, "\033[%d;1m[%-5s]\033[0m ", tag_color, level_str);
|
||||
} else {
|
||||
fprintf(out_stream, "[%-5s] ", level_str);
|
||||
}
|
||||
print_utf8(out_stream, log);
|
||||
fflush(out_stream);
|
||||
}
|
||||
|
||||
#define LOG_BUFFER_SIZE 4096
|
||||
|
||||
static bool log_verbose = false;
|
||||
static bool log_color = false;
|
||||
|
||||
static void log_printf(sd_log_level_t level, const char* file, int line, const char* format, ...) {
|
||||
va_list args;
|
||||
va_start(args, format);
|
||||
|
||||
static char log_buffer[LOG_BUFFER_SIZE + 1];
|
||||
int written = snprintf(log_buffer, LOG_BUFFER_SIZE, "%s:%-4d - ", sd_basename(file).c_str(), line);
|
||||
|
||||
if (written >= 0 && written < LOG_BUFFER_SIZE) {
|
||||
vsnprintf(log_buffer + written, LOG_BUFFER_SIZE - written, format, args);
|
||||
}
|
||||
size_t len = strlen(log_buffer);
|
||||
if (log_buffer[len - 1] != '\n') {
|
||||
strncat(log_buffer, "\n", LOG_BUFFER_SIZE - len);
|
||||
}
|
||||
|
||||
log_print(level, log_buffer, log_verbose, log_color);
|
||||
|
||||
va_end(args);
|
||||
}
|
||||
|
||||
#define LOG_DEBUG(format, ...) log_printf(SD_LOG_DEBUG, __FILE__, __LINE__, format, ##__VA_ARGS__)
|
||||
#define LOG_INFO(format, ...) log_printf(SD_LOG_INFO, __FILE__, __LINE__, format, ##__VA_ARGS__)
|
||||
#define LOG_WARN(format, ...) log_printf(SD_LOG_WARN, __FILE__, __LINE__, format, ##__VA_ARGS__)
|
||||
#define LOG_ERROR(format, ...) log_printf(SD_LOG_ERROR, __FILE__, __LINE__, format, ##__VA_ARGS__)
|
||||
|
||||
struct StringOption {
|
||||
std::string short_name;
|
||||
std::string long_name;
|
||||
@ -1967,144 +1839,6 @@ static std::string version_string() {
|
||||
return std::string("stable-diffusion.cpp version ") + sd_version() + ", commit " + sd_commit();
|
||||
}
|
||||
|
||||
uint8_t* load_image_common(bool from_memory,
|
||||
const char* image_path_or_bytes,
|
||||
int len,
|
||||
int& width,
|
||||
int& height,
|
||||
int expected_width = 0,
|
||||
int expected_height = 0,
|
||||
int expected_channel = 3) {
|
||||
int c = 0;
|
||||
const char* image_path;
|
||||
uint8_t* image_buffer = nullptr;
|
||||
if (from_memory) {
|
||||
image_path = "memory";
|
||||
image_buffer = (uint8_t*)stbi_load_from_memory((const stbi_uc*)image_path_or_bytes, len, &width, &height, &c, expected_channel);
|
||||
} else {
|
||||
image_path = image_path_or_bytes;
|
||||
image_buffer = (uint8_t*)stbi_load(image_path_or_bytes, &width, &height, &c, expected_channel);
|
||||
}
|
||||
if (image_buffer == nullptr) {
|
||||
LOG_ERROR("load image from '%s' failed", image_path);
|
||||
return nullptr;
|
||||
}
|
||||
if (c < expected_channel) {
|
||||
fprintf(stderr,
|
||||
"the number of channels for the input image must be >= %d,"
|
||||
"but got %d channels, image_path = %s",
|
||||
expected_channel,
|
||||
c,
|
||||
image_path);
|
||||
free(image_buffer);
|
||||
return nullptr;
|
||||
}
|
||||
if (width <= 0) {
|
||||
LOG_ERROR("error: the width of image must be greater than 0, image_path = %s", image_path);
|
||||
free(image_buffer);
|
||||
return nullptr;
|
||||
}
|
||||
if (height <= 0) {
|
||||
LOG_ERROR("error: the height of image must be greater than 0, image_path = %s", image_path);
|
||||
free(image_buffer);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// Resize input image ...
|
||||
if ((expected_width > 0 && expected_height > 0) && (height != expected_height || width != expected_width)) {
|
||||
float dst_aspect = (float)expected_width / (float)expected_height;
|
||||
float src_aspect = (float)width / (float)height;
|
||||
|
||||
int crop_x = 0, crop_y = 0;
|
||||
int crop_w = width, crop_h = height;
|
||||
|
||||
if (src_aspect > dst_aspect) {
|
||||
crop_w = (int)(height * dst_aspect);
|
||||
crop_x = (width - crop_w) / 2;
|
||||
} else if (src_aspect < dst_aspect) {
|
||||
crop_h = (int)(width / dst_aspect);
|
||||
crop_y = (height - crop_h) / 2;
|
||||
}
|
||||
|
||||
if (crop_x != 0 || crop_y != 0) {
|
||||
LOG_INFO("crop input image from %dx%d to %dx%d, image_path = %s", width, height, crop_w, crop_h, image_path);
|
||||
uint8_t* cropped_image_buffer = (uint8_t*)malloc(crop_w * crop_h * expected_channel);
|
||||
if (cropped_image_buffer == nullptr) {
|
||||
LOG_ERROR("error: allocate memory for crop\n");
|
||||
free(image_buffer);
|
||||
return nullptr;
|
||||
}
|
||||
for (int row = 0; row < crop_h; row++) {
|
||||
uint8_t* src = image_buffer + ((crop_y + row) * width + crop_x) * expected_channel;
|
||||
uint8_t* dst = cropped_image_buffer + (row * crop_w) * expected_channel;
|
||||
memcpy(dst, src, crop_w * expected_channel);
|
||||
}
|
||||
|
||||
width = crop_w;
|
||||
height = crop_h;
|
||||
free(image_buffer);
|
||||
image_buffer = cropped_image_buffer;
|
||||
}
|
||||
|
||||
LOG_INFO("resize input image from %dx%d to %dx%d", width, height, expected_width, expected_height);
|
||||
int resized_height = expected_height;
|
||||
int resized_width = expected_width;
|
||||
|
||||
uint8_t* resized_image_buffer = (uint8_t*)malloc(resized_height * resized_width * expected_channel);
|
||||
if (resized_image_buffer == nullptr) {
|
||||
LOG_ERROR("error: allocate memory for resize input image\n");
|
||||
free(image_buffer);
|
||||
return nullptr;
|
||||
}
|
||||
stbir_resize(image_buffer, width, height, 0,
|
||||
resized_image_buffer, resized_width, resized_height, 0, STBIR_TYPE_UINT8,
|
||||
expected_channel, STBIR_ALPHA_CHANNEL_NONE, 0,
|
||||
STBIR_EDGE_CLAMP, STBIR_EDGE_CLAMP,
|
||||
STBIR_FILTER_BOX, STBIR_FILTER_BOX,
|
||||
STBIR_COLORSPACE_SRGB, nullptr);
|
||||
width = resized_width;
|
||||
height = resized_height;
|
||||
free(image_buffer);
|
||||
image_buffer = resized_image_buffer;
|
||||
}
|
||||
return image_buffer;
|
||||
}
|
||||
|
||||
uint8_t* load_image_from_file(const char* image_path,
|
||||
int& width,
|
||||
int& height,
|
||||
int expected_width = 0,
|
||||
int expected_height = 0,
|
||||
int expected_channel = 3) {
|
||||
return load_image_common(false, image_path, 0, width, height, expected_width, expected_height, expected_channel);
|
||||
}
|
||||
|
||||
bool load_sd_image_from_file(sd_image_t* image,
|
||||
const char* image_path,
|
||||
int expected_width = 0,
|
||||
int expected_height = 0,
|
||||
int expected_channel = 3) {
|
||||
int width;
|
||||
int height;
|
||||
image->data = load_image_common(false, image_path, 0, width, height, expected_width, expected_height, expected_channel);
|
||||
if (image->data == nullptr) {
|
||||
return false;
|
||||
}
|
||||
image->width = width;
|
||||
image->height = height;
|
||||
return true;
|
||||
}
|
||||
|
||||
uint8_t* load_image_from_memory(const char* image_bytes,
|
||||
int len,
|
||||
int& width,
|
||||
int& height,
|
||||
int expected_width = 0,
|
||||
int expected_height = 0,
|
||||
int expected_channel = 3) {
|
||||
return load_image_common(true, image_bytes, len, width, height, expected_width, expected_height, expected_channel);
|
||||
}
|
||||
|
||||
std::string get_image_params(const SDContextParams& ctx_params, const SDGenerationParams& gen_params, int64_t seed) {
|
||||
std::string parameter_string;
|
||||
if (gen_params.prompt_with_lora.size() != 0) {
|
||||
|
||||
118
examples/common/log.cpp
Normal file
118
examples/common/log.cpp
Normal file
@ -0,0 +1,118 @@
|
||||
#include "log.h"
|
||||
|
||||
bool log_verbose = false;
|
||||
bool log_color = false;
|
||||
|
||||
std::string sd_basename(const std::string& path) {
|
||||
size_t pos = path.find_last_of('/');
|
||||
if (pos != std::string::npos) {
|
||||
return path.substr(pos + 1);
|
||||
}
|
||||
pos = path.find_last_of('\\');
|
||||
if (pos != std::string::npos) {
|
||||
return path.substr(pos + 1);
|
||||
}
|
||||
return path;
|
||||
}
|
||||
|
||||
void print_utf8(FILE* stream, const char* utf8) {
|
||||
if (!utf8) {
|
||||
return;
|
||||
}
|
||||
|
||||
#ifdef _WIN32
|
||||
HANDLE h = (stream == stderr)
|
||||
? GetStdHandle(STD_ERROR_HANDLE)
|
||||
: GetStdHandle(STD_OUTPUT_HANDLE);
|
||||
|
||||
DWORD mode;
|
||||
BOOL is_console = GetConsoleMode(h, &mode);
|
||||
|
||||
if (is_console) {
|
||||
int wlen = MultiByteToWideChar(CP_UTF8, 0, utf8, -1, NULL, 0);
|
||||
if (wlen <= 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
wchar_t* wbuf = (wchar_t*)malloc(wlen * sizeof(wchar_t));
|
||||
if (!wbuf) {
|
||||
return;
|
||||
}
|
||||
|
||||
MultiByteToWideChar(CP_UTF8, 0, utf8, -1, wbuf, wlen);
|
||||
|
||||
DWORD written;
|
||||
WriteConsoleW(h, wbuf, wlen - 1, &written, NULL);
|
||||
|
||||
free(wbuf);
|
||||
} else {
|
||||
DWORD written;
|
||||
WriteFile(h, utf8, (DWORD)strlen(utf8), &written, NULL);
|
||||
}
|
||||
#else
|
||||
fputs(utf8, stream);
|
||||
#endif
|
||||
}
|
||||
|
||||
void log_print(enum sd_log_level_t level, const char* log, bool verbose, bool color) {
|
||||
int tag_color;
|
||||
const char* level_str;
|
||||
FILE* out_stream = (level == SD_LOG_ERROR) ? stderr : stdout;
|
||||
|
||||
if (!log || (!verbose && level <= SD_LOG_DEBUG)) {
|
||||
return;
|
||||
}
|
||||
|
||||
switch (level) {
|
||||
case SD_LOG_DEBUG:
|
||||
tag_color = 37;
|
||||
level_str = "DEBUG";
|
||||
break;
|
||||
case SD_LOG_INFO:
|
||||
tag_color = 34;
|
||||
level_str = "INFO";
|
||||
break;
|
||||
case SD_LOG_WARN:
|
||||
tag_color = 35;
|
||||
level_str = "WARN";
|
||||
break;
|
||||
case SD_LOG_ERROR:
|
||||
tag_color = 31;
|
||||
level_str = "ERROR";
|
||||
break;
|
||||
default:
|
||||
tag_color = 33;
|
||||
level_str = "?????";
|
||||
break;
|
||||
}
|
||||
|
||||
if (color) {
|
||||
fprintf(out_stream, "\033[%d;1m[%-5s]\033[0m ", tag_color, level_str);
|
||||
} else {
|
||||
fprintf(out_stream, "[%-5s] ", level_str);
|
||||
}
|
||||
print_utf8(out_stream, log);
|
||||
fflush(out_stream);
|
||||
}
|
||||
|
||||
void example_log_printf(sd_log_level_t level, const char* file, int line, const char* format, ...) {
|
||||
constexpr size_t LOG_BUFFER_SIZE = 4096;
|
||||
|
||||
va_list args;
|
||||
va_start(args, format);
|
||||
|
||||
static char log_buffer[LOG_BUFFER_SIZE + 1];
|
||||
int written = snprintf(log_buffer, LOG_BUFFER_SIZE, "%s:%-4d - ", sd_basename(file).c_str(), line);
|
||||
|
||||
if (written >= 0 && written < static_cast<int>(LOG_BUFFER_SIZE)) {
|
||||
vsnprintf(log_buffer + written, LOG_BUFFER_SIZE - written, format, args);
|
||||
}
|
||||
size_t len = strlen(log_buffer);
|
||||
if (len == 0 || log_buffer[len - 1] != '\n') {
|
||||
strncat(log_buffer, "\n", LOG_BUFFER_SIZE - len);
|
||||
}
|
||||
|
||||
log_print(level, log_buffer, log_verbose, log_color);
|
||||
|
||||
va_end(args);
|
||||
}
|
||||
32
examples/common/log.h
Normal file
32
examples/common/log.h
Normal file
@ -0,0 +1,32 @@
|
||||
#ifndef __EXAMPLE_LOG_H__
|
||||
#define __EXAMPLE_LOG_H__
|
||||
|
||||
#include <cstdarg>
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include <string>
|
||||
|
||||
#if defined(_WIN32)
|
||||
#ifndef NOMINMAX
|
||||
#define NOMINMAX
|
||||
#endif
|
||||
#include <windows.h>
|
||||
#endif // _WIN32
|
||||
|
||||
#include "stable-diffusion.h"
|
||||
|
||||
extern bool log_verbose;
|
||||
extern bool log_color;
|
||||
|
||||
std::string sd_basename(const std::string& path);
|
||||
void print_utf8(FILE* stream, const char* utf8);
|
||||
void log_print(sd_log_level_t level, const char* log, bool verbose, bool color);
|
||||
void example_log_printf(sd_log_level_t level, const char* file, int line, const char* format, ...);
|
||||
|
||||
#define LOG_DEBUG(format, ...) example_log_printf(SD_LOG_DEBUG, __FILE__, __LINE__, format, ##__VA_ARGS__)
|
||||
#define LOG_INFO(format, ...) example_log_printf(SD_LOG_INFO, __FILE__, __LINE__, format, ##__VA_ARGS__)
|
||||
#define LOG_WARN(format, ...) example_log_printf(SD_LOG_WARN, __FILE__, __LINE__, format, ##__VA_ARGS__)
|
||||
#define LOG_ERROR(format, ...) example_log_printf(SD_LOG_ERROR, __FILE__, __LINE__, format, ##__VA_ARGS__)
|
||||
|
||||
#endif // __EXAMPLE_LOG_H__
|
||||
879
examples/common/media_io.cpp
Normal file
879
examples/common/media_io.cpp
Normal file
@ -0,0 +1,879 @@
|
||||
#include "log.h"
|
||||
#include "media_io.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cctype>
|
||||
#include <cmath>
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include <filesystem>
|
||||
#include <fstream>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#define STB_IMAGE_IMPLEMENTATION
|
||||
#define STB_IMAGE_STATIC
|
||||
#include "stb_image.h"
|
||||
|
||||
#define STB_IMAGE_WRITE_IMPLEMENTATION
|
||||
#define STB_IMAGE_WRITE_STATIC
|
||||
#include "stb_image_write.h"
|
||||
|
||||
#define STB_IMAGE_RESIZE_IMPLEMENTATION
|
||||
#define STB_IMAGE_RESIZE_STATIC
|
||||
#include "stb_image_resize.h"
|
||||
|
||||
#ifdef SD_USE_WEBP
|
||||
#include "webp/decode.h"
|
||||
#include "webp/encode.h"
|
||||
#include "webp/mux.h"
|
||||
#endif
|
||||
|
||||
namespace fs = std::filesystem;
|
||||
|
||||
namespace {
|
||||
bool read_binary_file_bytes(const char* path, std::vector<uint8_t>& data) {
|
||||
std::ifstream fin(fs::path(path), std::ios::binary);
|
||||
if (!fin) {
|
||||
return false;
|
||||
}
|
||||
|
||||
fin.seekg(0, std::ios::end);
|
||||
std::streampos size = fin.tellg();
|
||||
if (size < 0) {
|
||||
return false;
|
||||
}
|
||||
fin.seekg(0, std::ios::beg);
|
||||
|
||||
data.resize(static_cast<size_t>(size));
|
||||
if (!data.empty()) {
|
||||
fin.read(reinterpret_cast<char*>(data.data()), size);
|
||||
if (!fin) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool write_binary_file_bytes(const std::string& path, const std::vector<uint8_t>& data) {
|
||||
std::ofstream fout(fs::path(path), std::ios::binary);
|
||||
if (!fout) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!data.empty()) {
|
||||
fout.write(reinterpret_cast<const char*>(data.data()), static_cast<std::streamsize>(data.size()));
|
||||
if (!fout) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
int stbi_ext_write_png_to_func(stbi_write_func* func,
|
||||
void* context,
|
||||
int x,
|
||||
int y,
|
||||
int comp,
|
||||
const void* data,
|
||||
int stride_bytes,
|
||||
const char* parameters) {
|
||||
int len = 0;
|
||||
unsigned char* png = stbi_write_png_to_mem((const unsigned char*)data, stride_bytes, x, y, comp, &len, parameters);
|
||||
if (png == nullptr) {
|
||||
return 0;
|
||||
}
|
||||
func(context, png, len);
|
||||
STBIW_FREE(png);
|
||||
return 1;
|
||||
}
|
||||
|
||||
bool is_webp_signature(const uint8_t* data, size_t size) {
|
||||
return size >= 12 &&
|
||||
memcmp(data, "RIFF", 4) == 0 &&
|
||||
memcmp(data + 8, "WEBP", 4) == 0;
|
||||
}
|
||||
|
||||
std::string xml_escape(const std::string& value) {
|
||||
std::string escaped;
|
||||
escaped.reserve(value.size());
|
||||
|
||||
for (char ch : value) {
|
||||
switch (ch) {
|
||||
case '&':
|
||||
escaped += "&";
|
||||
break;
|
||||
case '<':
|
||||
escaped += "<";
|
||||
break;
|
||||
case '>':
|
||||
escaped += ">";
|
||||
break;
|
||||
case '"':
|
||||
escaped += """;
|
||||
break;
|
||||
case '\'':
|
||||
escaped += "'";
|
||||
break;
|
||||
default:
|
||||
escaped += ch;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return escaped;
|
||||
}
|
||||
|
||||
#ifdef SD_USE_WEBP
|
||||
uint8_t* decode_webp_image_to_buffer(const uint8_t* data,
|
||||
size_t size,
|
||||
int& width,
|
||||
int& height,
|
||||
int expected_channel,
|
||||
int& source_channel_count) {
|
||||
WebPBitstreamFeatures features;
|
||||
if (WebPGetFeatures(data, size, &features) != VP8_STATUS_OK) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
width = features.width;
|
||||
height = features.height;
|
||||
source_channel_count = features.has_alpha ? 4 : 3;
|
||||
|
||||
const size_t pixel_count = static_cast<size_t>(width) * static_cast<size_t>(height);
|
||||
|
||||
if (expected_channel == 1) {
|
||||
int decoded_width = width;
|
||||
int decoded_height = height;
|
||||
uint8_t* decoded = features.has_alpha
|
||||
? WebPDecodeRGBA(data, size, &decoded_width, &decoded_height)
|
||||
: WebPDecodeRGB(data, size, &decoded_width, &decoded_height);
|
||||
if (decoded == nullptr) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
uint8_t* grayscale = (uint8_t*)malloc(pixel_count);
|
||||
if (grayscale == nullptr) {
|
||||
WebPFree(decoded);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
const int decoded_channels = features.has_alpha ? 4 : 3;
|
||||
for (size_t i = 0; i < pixel_count; ++i) {
|
||||
const uint8_t* src = decoded + i * decoded_channels;
|
||||
grayscale[i] = static_cast<uint8_t>((77 * src[0] + 150 * src[1] + 29 * src[2] + 128) >> 8);
|
||||
}
|
||||
|
||||
WebPFree(decoded);
|
||||
return grayscale;
|
||||
}
|
||||
|
||||
if (expected_channel != 3 && expected_channel != 4) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
int decoded_width = width;
|
||||
int decoded_height = height;
|
||||
uint8_t* decoded = (expected_channel == 4)
|
||||
? WebPDecodeRGBA(data, size, &decoded_width, &decoded_height)
|
||||
: WebPDecodeRGB(data, size, &decoded_width, &decoded_height);
|
||||
if (decoded == nullptr) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
const size_t out_size = pixel_count * static_cast<size_t>(expected_channel);
|
||||
uint8_t* output = (uint8_t*)malloc(out_size);
|
||||
if (output == nullptr) {
|
||||
WebPFree(decoded);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
memcpy(output, decoded, out_size);
|
||||
WebPFree(decoded);
|
||||
return output;
|
||||
}
|
||||
|
||||
std::string build_webp_xmp_packet(const std::string& parameters) {
|
||||
if (parameters.empty()) {
|
||||
return "";
|
||||
}
|
||||
|
||||
const std::string escaped_parameters = xml_escape(parameters);
|
||||
return "<?xpacket begin=\"\" id=\"W5M0MpCehiHzreSzNTczkc9d\"?>\n"
|
||||
"<x:xmpmeta xmlns:x=\"adobe:ns:meta/\">\n"
|
||||
" <rdf:RDF xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\">\n"
|
||||
" <rdf:Description xmlns:sdcpp=\"https://github.com/leejet/stable-diffusion.cpp/ns/1.0/\">\n"
|
||||
" <sdcpp:parameters>" +
|
||||
escaped_parameters +
|
||||
"</sdcpp:parameters>\n"
|
||||
" </rdf:Description>\n"
|
||||
" </rdf:RDF>\n"
|
||||
"</x:xmpmeta>\n"
|
||||
"<?xpacket end=\"w\"?>";
|
||||
}
|
||||
|
||||
bool encode_webp_image_to_vector(const uint8_t* image,
|
||||
int width,
|
||||
int height,
|
||||
int channels,
|
||||
const std::string& parameters,
|
||||
int quality,
|
||||
std::vector<uint8_t>& out) {
|
||||
if (image == nullptr || width <= 0 || height <= 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
std::vector<uint8_t> rgb_image;
|
||||
const uint8_t* input_image = image;
|
||||
int input_channels = channels;
|
||||
|
||||
if (channels == 1) {
|
||||
rgb_image.resize(static_cast<size_t>(width) * static_cast<size_t>(height) * 3);
|
||||
for (int i = 0; i < width * height; ++i) {
|
||||
rgb_image[i * 3 + 0] = image[i];
|
||||
rgb_image[i * 3 + 1] = image[i];
|
||||
rgb_image[i * 3 + 2] = image[i];
|
||||
}
|
||||
input_image = rgb_image.data();
|
||||
input_channels = 3;
|
||||
}
|
||||
|
||||
if (input_channels != 3 && input_channels != 4) {
|
||||
return false;
|
||||
}
|
||||
|
||||
uint8_t* encoded = nullptr;
|
||||
size_t encoded_size = (input_channels == 4)
|
||||
? WebPEncodeRGBA(input_image, width, height, width * input_channels, static_cast<float>(quality), &encoded)
|
||||
: WebPEncodeRGB(input_image, width, height, width * input_channels, static_cast<float>(quality), &encoded);
|
||||
if (encoded == nullptr || encoded_size == 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
out.assign(encoded, encoded + encoded_size);
|
||||
WebPFree(encoded);
|
||||
|
||||
if (parameters.empty()) {
|
||||
return true;
|
||||
}
|
||||
|
||||
WebPData image_data;
|
||||
WebPData assembled_data;
|
||||
WebPDataInit(&image_data);
|
||||
WebPDataInit(&assembled_data);
|
||||
|
||||
image_data.bytes = out.data();
|
||||
image_data.size = out.size();
|
||||
|
||||
WebPMux* mux = WebPMuxNew();
|
||||
if (mux == nullptr) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const std::string xmp_packet = build_webp_xmp_packet(parameters);
|
||||
WebPData xmp_data;
|
||||
WebPDataInit(&xmp_data);
|
||||
xmp_data.bytes = reinterpret_cast<const uint8_t*>(xmp_packet.data());
|
||||
xmp_data.size = xmp_packet.size();
|
||||
|
||||
const bool ok = WebPMuxSetImage(mux, &image_data, 1) == WEBP_MUX_OK &&
|
||||
WebPMuxSetChunk(mux, "XMP ", &xmp_data, 1) == WEBP_MUX_OK &&
|
||||
WebPMuxAssemble(mux, &assembled_data) == WEBP_MUX_OK;
|
||||
|
||||
if (ok) {
|
||||
out.assign(assembled_data.bytes, assembled_data.bytes + assembled_data.size);
|
||||
}
|
||||
|
||||
WebPDataClear(&assembled_data);
|
||||
WebPMuxDelete(mux);
|
||||
return ok;
|
||||
}
|
||||
#endif
|
||||
|
||||
uint8_t* load_image_common(bool from_memory,
|
||||
const char* image_path_or_bytes,
|
||||
int len,
|
||||
int& width,
|
||||
int& height,
|
||||
int expected_width,
|
||||
int expected_height,
|
||||
int expected_channel) {
|
||||
const char* image_path;
|
||||
uint8_t* image_buffer = nullptr;
|
||||
int source_channel_count = 0;
|
||||
|
||||
#ifdef SD_USE_WEBP
|
||||
if (from_memory) {
|
||||
image_path = "memory";
|
||||
if (len > 0 && is_webp_signature(reinterpret_cast<const uint8_t*>(image_path_or_bytes), static_cast<size_t>(len))) {
|
||||
image_buffer = decode_webp_image_to_buffer(reinterpret_cast<const uint8_t*>(image_path_or_bytes),
|
||||
static_cast<size_t>(len),
|
||||
width,
|
||||
height,
|
||||
expected_channel,
|
||||
source_channel_count);
|
||||
}
|
||||
} else {
|
||||
image_path = image_path_or_bytes;
|
||||
if (encoded_image_format_from_path(image_path_or_bytes) == EncodedImageFormat::WEBP) {
|
||||
std::vector<uint8_t> file_bytes;
|
||||
if (!read_binary_file_bytes(image_path_or_bytes, file_bytes)) {
|
||||
LOG_ERROR("load image from '%s' failed", image_path_or_bytes);
|
||||
return nullptr;
|
||||
}
|
||||
if (!is_webp_signature(file_bytes.data(), file_bytes.size())) {
|
||||
LOG_ERROR("load image from '%s' failed", image_path_or_bytes);
|
||||
return nullptr;
|
||||
}
|
||||
image_buffer = decode_webp_image_to_buffer(file_bytes.data(),
|
||||
file_bytes.size(),
|
||||
width,
|
||||
height,
|
||||
expected_channel,
|
||||
source_channel_count);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
if (from_memory) {
|
||||
image_path = "memory";
|
||||
if (image_buffer == nullptr) {
|
||||
int c = 0;
|
||||
image_buffer = (uint8_t*)stbi_load_from_memory((const stbi_uc*)image_path_or_bytes, len, &width, &height, &c, expected_channel);
|
||||
source_channel_count = c;
|
||||
}
|
||||
} else {
|
||||
image_path = image_path_or_bytes;
|
||||
if (image_buffer == nullptr) {
|
||||
int c = 0;
|
||||
image_buffer = (uint8_t*)stbi_load(image_path_or_bytes, &width, &height, &c, expected_channel);
|
||||
source_channel_count = c;
|
||||
}
|
||||
}
|
||||
if (image_buffer == nullptr) {
|
||||
LOG_ERROR("load image from '%s' failed", image_path);
|
||||
return nullptr;
|
||||
}
|
||||
if (source_channel_count < expected_channel) {
|
||||
fprintf(stderr,
|
||||
"the number of channels for the input image must be >= %d,"
|
||||
"but got %d channels, image_path = %s",
|
||||
expected_channel,
|
||||
source_channel_count,
|
||||
image_path);
|
||||
free(image_buffer);
|
||||
return nullptr;
|
||||
}
|
||||
if (width <= 0) {
|
||||
LOG_ERROR("error: the width of image must be greater than 0, image_path = %s", image_path);
|
||||
free(image_buffer);
|
||||
return nullptr;
|
||||
}
|
||||
if (height <= 0) {
|
||||
LOG_ERROR("error: the height of image must be greater than 0, image_path = %s", image_path);
|
||||
free(image_buffer);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
if ((expected_width > 0 && expected_height > 0) && (height != expected_height || width != expected_width)) {
|
||||
float dst_aspect = (float)expected_width / (float)expected_height;
|
||||
float src_aspect = (float)width / (float)height;
|
||||
|
||||
int crop_x = 0, crop_y = 0;
|
||||
int crop_w = width, crop_h = height;
|
||||
|
||||
if (src_aspect > dst_aspect) {
|
||||
crop_w = (int)(height * dst_aspect);
|
||||
crop_x = (width - crop_w) / 2;
|
||||
} else if (src_aspect < dst_aspect) {
|
||||
crop_h = (int)(width / dst_aspect);
|
||||
crop_y = (height - crop_h) / 2;
|
||||
}
|
||||
|
||||
if (crop_x != 0 || crop_y != 0) {
|
||||
LOG_INFO("crop input image from %dx%d to %dx%d, image_path = %s", width, height, crop_w, crop_h, image_path);
|
||||
uint8_t* cropped_image_buffer = (uint8_t*)malloc(crop_w * crop_h * expected_channel);
|
||||
if (cropped_image_buffer == nullptr) {
|
||||
LOG_ERROR("error: allocate memory for crop\n");
|
||||
free(image_buffer);
|
||||
return nullptr;
|
||||
}
|
||||
for (int row = 0; row < crop_h; row++) {
|
||||
uint8_t* src = image_buffer + ((crop_y + row) * width + crop_x) * expected_channel;
|
||||
uint8_t* dst = cropped_image_buffer + (row * crop_w) * expected_channel;
|
||||
memcpy(dst, src, crop_w * expected_channel);
|
||||
}
|
||||
|
||||
width = crop_w;
|
||||
height = crop_h;
|
||||
free(image_buffer);
|
||||
image_buffer = cropped_image_buffer;
|
||||
}
|
||||
|
||||
LOG_INFO("resize input image from %dx%d to %dx%d", width, height, expected_width, expected_height);
|
||||
uint8_t* resized_image_buffer = (uint8_t*)malloc(expected_height * expected_width * expected_channel);
|
||||
if (resized_image_buffer == nullptr) {
|
||||
LOG_ERROR("error: allocate memory for resize input image\n");
|
||||
free(image_buffer);
|
||||
return nullptr;
|
||||
}
|
||||
stbir_resize(image_buffer, width, height, 0,
|
||||
resized_image_buffer, expected_width, expected_height, 0, STBIR_TYPE_UINT8,
|
||||
expected_channel, STBIR_ALPHA_CHANNEL_NONE, 0,
|
||||
STBIR_EDGE_CLAMP, STBIR_EDGE_CLAMP,
|
||||
STBIR_FILTER_BOX, STBIR_FILTER_BOX,
|
||||
STBIR_COLORSPACE_SRGB, nullptr);
|
||||
width = expected_width;
|
||||
height = expected_height;
|
||||
free(image_buffer);
|
||||
image_buffer = resized_image_buffer;
|
||||
}
|
||||
return image_buffer;
|
||||
}
|
||||
|
||||
typedef struct {
|
||||
uint32_t offset;
|
||||
uint32_t size;
|
||||
} avi_index_entry;
|
||||
|
||||
void write_u32_le(FILE* f, uint32_t val) {
|
||||
fwrite(&val, 4, 1, f);
|
||||
}
|
||||
|
||||
void write_u16_le(FILE* f, uint16_t val) {
|
||||
fwrite(&val, 2, 1, f);
|
||||
}
|
||||
} // namespace
|
||||
|
||||
EncodedImageFormat encoded_image_format_from_path(const std::string& path) {
|
||||
std::string ext = fs::path(path).extension().string();
|
||||
std::transform(ext.begin(), ext.end(), ext.begin(), ::tolower);
|
||||
|
||||
if (ext == ".jpg" || ext == ".jpeg" || ext == ".jpe") {
|
||||
return EncodedImageFormat::JPEG;
|
||||
}
|
||||
if (ext == ".png") {
|
||||
return EncodedImageFormat::PNG;
|
||||
}
|
||||
if (ext == ".webp") {
|
||||
return EncodedImageFormat::WEBP;
|
||||
}
|
||||
return EncodedImageFormat::UNKNOWN;
|
||||
}
|
||||
|
||||
std::vector<uint8_t> encode_image_to_vector(EncodedImageFormat format,
|
||||
const uint8_t* image,
|
||||
int width,
|
||||
int height,
|
||||
int channels,
|
||||
const std::string& parameters,
|
||||
int quality) {
|
||||
std::vector<uint8_t> buffer;
|
||||
|
||||
auto write_func = [&buffer](void* context, void* data, int size) {
|
||||
(void)context;
|
||||
uint8_t* src = reinterpret_cast<uint8_t*>(data);
|
||||
buffer.insert(buffer.end(), src, src + size);
|
||||
};
|
||||
|
||||
struct ContextWrapper {
|
||||
decltype(write_func)& func;
|
||||
} ctx{write_func};
|
||||
|
||||
auto c_func = [](void* context, void* data, int size) {
|
||||
auto* wrapper = reinterpret_cast<ContextWrapper*>(context);
|
||||
wrapper->func(context, data, size);
|
||||
};
|
||||
|
||||
int result = 0;
|
||||
switch (format) {
|
||||
case EncodedImageFormat::JPEG:
|
||||
result = stbi_write_jpg_to_func(c_func, &ctx, width, height, channels, image, quality);
|
||||
break;
|
||||
case EncodedImageFormat::PNG:
|
||||
result = stbi_ext_write_png_to_func(c_func, &ctx, width, height, channels, image, width * channels, parameters.empty() ? nullptr : parameters.c_str());
|
||||
break;
|
||||
case EncodedImageFormat::WEBP:
|
||||
#ifdef SD_USE_WEBP
|
||||
if (!encode_webp_image_to_vector(image, width, height, channels, parameters, quality, buffer)) {
|
||||
buffer.clear();
|
||||
}
|
||||
result = buffer.empty() ? 0 : 1;
|
||||
break;
|
||||
#else
|
||||
result = 0;
|
||||
break;
|
||||
#endif
|
||||
default:
|
||||
result = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
if (!result) {
|
||||
buffer.clear();
|
||||
}
|
||||
return buffer;
|
||||
}
|
||||
|
||||
bool write_image_to_file(const std::string& path,
|
||||
const uint8_t* image,
|
||||
int width,
|
||||
int height,
|
||||
int channels,
|
||||
const std::string& parameters,
|
||||
int quality) {
|
||||
const EncodedImageFormat format = encoded_image_format_from_path(path);
|
||||
|
||||
switch (format) {
|
||||
case EncodedImageFormat::JPEG:
|
||||
return stbi_write_jpg(path.c_str(), width, height, channels, image, quality, parameters.empty() ? nullptr : parameters.c_str()) != 0;
|
||||
case EncodedImageFormat::PNG:
|
||||
return stbi_write_png(path.c_str(), width, height, channels, image, 0, parameters.empty() ? nullptr : parameters.c_str()) != 0;
|
||||
case EncodedImageFormat::WEBP: {
|
||||
const std::vector<uint8_t> encoded = encode_image_to_vector(format, image, width, height, channels, parameters, quality);
|
||||
return !encoded.empty() && write_binary_file_bytes(path, encoded);
|
||||
}
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
uint8_t* load_image_from_file(const char* image_path,
|
||||
int& width,
|
||||
int& height,
|
||||
int expected_width,
|
||||
int expected_height,
|
||||
int expected_channel) {
|
||||
return load_image_common(false, image_path, 0, width, height, expected_width, expected_height, expected_channel);
|
||||
}
|
||||
|
||||
bool load_sd_image_from_file(sd_image_t* image,
|
||||
const char* image_path,
|
||||
int expected_width,
|
||||
int expected_height,
|
||||
int expected_channel) {
|
||||
int width;
|
||||
int height;
|
||||
image->data = load_image_common(false, image_path, 0, width, height, expected_width, expected_height, expected_channel);
|
||||
if (image->data == nullptr) {
|
||||
return false;
|
||||
}
|
||||
image->width = width;
|
||||
image->height = height;
|
||||
return true;
|
||||
}
|
||||
|
||||
uint8_t* load_image_from_memory(const char* image_bytes,
|
||||
int len,
|
||||
int& width,
|
||||
int& height,
|
||||
int expected_width,
|
||||
int expected_height,
|
||||
int expected_channel) {
|
||||
return load_image_common(true, image_bytes, len, width, height, expected_width, expected_height, expected_channel);
|
||||
}
|
||||
|
||||
int create_mjpg_avi_from_sd_images(const char* filename, sd_image_t* images, int num_images, int fps, int quality) {
|
||||
if (num_images == 0) {
|
||||
fprintf(stderr, "Error: Image array is empty.\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
FILE* f = fopen(filename, "wb");
|
||||
if (!f) {
|
||||
perror("Error opening file for writing");
|
||||
return -1;
|
||||
}
|
||||
|
||||
uint32_t width = images[0].width;
|
||||
uint32_t height = images[0].height;
|
||||
uint32_t channels = images[0].channel;
|
||||
if (channels != 3 && channels != 4) {
|
||||
fprintf(stderr, "Error: Unsupported channel count: %u\n", channels);
|
||||
fclose(f);
|
||||
return -1;
|
||||
}
|
||||
|
||||
fwrite("RIFF", 4, 1, f);
|
||||
long riff_size_pos = ftell(f);
|
||||
write_u32_le(f, 0);
|
||||
fwrite("AVI ", 4, 1, f);
|
||||
|
||||
fwrite("LIST", 4, 1, f);
|
||||
write_u32_le(f, 4 + 8 + 56 + 8 + 4 + 8 + 56 + 8 + 40);
|
||||
fwrite("hdrl", 4, 1, f);
|
||||
|
||||
fwrite("avih", 4, 1, f);
|
||||
write_u32_le(f, 56);
|
||||
write_u32_le(f, 1000000 / fps);
|
||||
write_u32_le(f, 0);
|
||||
write_u32_le(f, 0);
|
||||
write_u32_le(f, 0x110);
|
||||
write_u32_le(f, num_images);
|
||||
write_u32_le(f, 0);
|
||||
write_u32_le(f, 1);
|
||||
write_u32_le(f, width * height * 3);
|
||||
write_u32_le(f, width);
|
||||
write_u32_le(f, height);
|
||||
write_u32_le(f, 0);
|
||||
write_u32_le(f, 0);
|
||||
write_u32_le(f, 0);
|
||||
write_u32_le(f, 0);
|
||||
|
||||
fwrite("LIST", 4, 1, f);
|
||||
write_u32_le(f, 4 + 8 + 56 + 8 + 40);
|
||||
fwrite("strl", 4, 1, f);
|
||||
|
||||
fwrite("strh", 4, 1, f);
|
||||
write_u32_le(f, 56);
|
||||
fwrite("vids", 4, 1, f);
|
||||
fwrite("MJPG", 4, 1, f);
|
||||
write_u32_le(f, 0);
|
||||
write_u16_le(f, 0);
|
||||
write_u16_le(f, 0);
|
||||
write_u32_le(f, 0);
|
||||
write_u32_le(f, 1);
|
||||
write_u32_le(f, fps);
|
||||
write_u32_le(f, 0);
|
||||
write_u32_le(f, num_images);
|
||||
write_u32_le(f, width * height * 3);
|
||||
write_u32_le(f, (uint32_t)-1);
|
||||
write_u32_le(f, 0);
|
||||
write_u16_le(f, 0);
|
||||
write_u16_le(f, 0);
|
||||
write_u16_le(f, 0);
|
||||
write_u16_le(f, 0);
|
||||
|
||||
fwrite("strf", 4, 1, f);
|
||||
write_u32_le(f, 40);
|
||||
write_u32_le(f, 40);
|
||||
write_u32_le(f, width);
|
||||
write_u32_le(f, height);
|
||||
write_u16_le(f, 1);
|
||||
write_u16_le(f, 24);
|
||||
fwrite("MJPG", 4, 1, f);
|
||||
write_u32_le(f, width * height * 3);
|
||||
write_u32_le(f, 0);
|
||||
write_u32_le(f, 0);
|
||||
write_u32_le(f, 0);
|
||||
write_u32_le(f, 0);
|
||||
|
||||
fwrite("LIST", 4, 1, f);
|
||||
long movi_size_pos = ftell(f);
|
||||
write_u32_le(f, 0);
|
||||
fwrite("movi", 4, 1, f);
|
||||
|
||||
avi_index_entry* index = (avi_index_entry*)malloc(sizeof(avi_index_entry) * num_images);
|
||||
if (!index) {
|
||||
fclose(f);
|
||||
return -1;
|
||||
}
|
||||
|
||||
struct {
|
||||
uint8_t* buf;
|
||||
size_t size;
|
||||
} jpeg_data;
|
||||
|
||||
for (int i = 0; i < num_images; i++) {
|
||||
jpeg_data.buf = nullptr;
|
||||
jpeg_data.size = 0;
|
||||
|
||||
auto write_to_buf = [](void* context, void* data, int size) {
|
||||
auto jd = (decltype(jpeg_data)*)context;
|
||||
jd->buf = (uint8_t*)realloc(jd->buf, jd->size + size);
|
||||
memcpy(jd->buf + jd->size, data, size);
|
||||
jd->size += size;
|
||||
};
|
||||
|
||||
stbi_write_jpg_to_func(write_to_buf, &jpeg_data, images[i].width, images[i].height, channels, images[i].data, quality);
|
||||
|
||||
fwrite("00dc", 4, 1, f);
|
||||
write_u32_le(f, (uint32_t)jpeg_data.size);
|
||||
index[i].offset = ftell(f) - 8;
|
||||
index[i].size = (uint32_t)jpeg_data.size;
|
||||
fwrite(jpeg_data.buf, 1, jpeg_data.size, f);
|
||||
|
||||
if (jpeg_data.size % 2) {
|
||||
fputc(0, f);
|
||||
}
|
||||
|
||||
free(jpeg_data.buf);
|
||||
}
|
||||
|
||||
long cur_pos = ftell(f);
|
||||
long movi_size = cur_pos - movi_size_pos - 4;
|
||||
fseek(f, movi_size_pos, SEEK_SET);
|
||||
write_u32_le(f, movi_size);
|
||||
fseek(f, cur_pos, SEEK_SET);
|
||||
|
||||
fwrite("idx1", 4, 1, f);
|
||||
write_u32_le(f, num_images * 16);
|
||||
for (int i = 0; i < num_images; i++) {
|
||||
fwrite("00dc", 4, 1, f);
|
||||
write_u32_le(f, 0x10);
|
||||
write_u32_le(f, index[i].offset);
|
||||
write_u32_le(f, index[i].size);
|
||||
}
|
||||
|
||||
cur_pos = ftell(f);
|
||||
long file_size = cur_pos - riff_size_pos - 4;
|
||||
fseek(f, riff_size_pos, SEEK_SET);
|
||||
write_u32_le(f, file_size);
|
||||
fseek(f, cur_pos, SEEK_SET);
|
||||
|
||||
fclose(f);
|
||||
free(index);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifdef SD_USE_WEBP
|
||||
int create_animated_webp_from_sd_images(const char* filename, sd_image_t* images, int num_images, int fps, int quality) {
|
||||
if (num_images == 0) {
|
||||
fprintf(stderr, "Error: Image array is empty.\n");
|
||||
return -1;
|
||||
}
|
||||
if (fps <= 0) {
|
||||
fprintf(stderr, "Error: FPS must be positive.\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
const int width = static_cast<int>(images[0].width);
|
||||
const int height = static_cast<int>(images[0].height);
|
||||
const int channels = static_cast<int>(images[0].channel);
|
||||
if (channels != 1 && channels != 3 && channels != 4) {
|
||||
fprintf(stderr, "Error: Unsupported channel count: %d\n", channels);
|
||||
return -1;
|
||||
}
|
||||
|
||||
WebPAnimEncoderOptions anim_options;
|
||||
WebPConfig config;
|
||||
if (!WebPAnimEncoderOptionsInit(&anim_options) || !WebPConfigInit(&config)) {
|
||||
fprintf(stderr, "Error: Failed to initialize WebP animation encoder.\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
config.quality = static_cast<float>(quality);
|
||||
config.method = 4;
|
||||
config.thread_level = 1;
|
||||
if (channels == 4) {
|
||||
config.exact = 1;
|
||||
}
|
||||
if (!WebPValidateConfig(&config)) {
|
||||
fprintf(stderr, "Error: Invalid WebP encoder configuration.\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
WebPAnimEncoder* enc = WebPAnimEncoderNew(width, height, &anim_options);
|
||||
if (enc == nullptr) {
|
||||
fprintf(stderr, "Error: Could not create WebPAnimEncoder object.\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
const int frame_duration_ms = std::max(1, static_cast<int>(std::lround(1000.0 / static_cast<double>(fps))));
|
||||
int timestamp_ms = 0;
|
||||
int ret = -1;
|
||||
|
||||
for (int i = 0; i < num_images; ++i) {
|
||||
const sd_image_t& image = images[i];
|
||||
if (static_cast<int>(image.width) != width || static_cast<int>(image.height) != height) {
|
||||
fprintf(stderr, "Error: Frame dimensions do not match.\n");
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
WebPPicture picture;
|
||||
if (!WebPPictureInit(&picture)) {
|
||||
fprintf(stderr, "Error: Failed to initialize WebPPicture.\n");
|
||||
goto cleanup;
|
||||
}
|
||||
picture.use_argb = 1;
|
||||
picture.width = width;
|
||||
picture.height = height;
|
||||
|
||||
bool picture_ok = false;
|
||||
std::vector<uint8_t> rgb_buffer;
|
||||
if (image.channel == 1) {
|
||||
rgb_buffer.resize(static_cast<size_t>(width) * static_cast<size_t>(height) * 3);
|
||||
for (int p = 0; p < width * height; ++p) {
|
||||
rgb_buffer[p * 3 + 0] = image.data[p];
|
||||
rgb_buffer[p * 3 + 1] = image.data[p];
|
||||
rgb_buffer[p * 3 + 2] = image.data[p];
|
||||
}
|
||||
picture_ok = WebPPictureImportRGB(&picture, rgb_buffer.data(), width * 3) != 0;
|
||||
} else if (image.channel == 4) {
|
||||
picture_ok = WebPPictureImportRGBA(&picture, image.data, width * 4) != 0;
|
||||
} else {
|
||||
picture_ok = WebPPictureImportRGB(&picture, image.data, width * 3) != 0;
|
||||
}
|
||||
|
||||
if (!picture_ok) {
|
||||
fprintf(stderr, "Error: Failed to import frame into WebPPicture.\n");
|
||||
WebPPictureFree(&picture);
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
if (!WebPAnimEncoderAdd(enc, &picture, timestamp_ms, &config)) {
|
||||
fprintf(stderr, "Error: Failed to add frame to animated WebP: %s\n", WebPAnimEncoderGetError(enc));
|
||||
WebPPictureFree(&picture);
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
WebPPictureFree(&picture);
|
||||
timestamp_ms += frame_duration_ms;
|
||||
}
|
||||
|
||||
if (!WebPAnimEncoderAdd(enc, nullptr, timestamp_ms, nullptr)) {
|
||||
fprintf(stderr, "Error: Failed to finalize animated WebP frames: %s\n", WebPAnimEncoderGetError(enc));
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
{
|
||||
WebPData webp_data;
|
||||
WebPDataInit(&webp_data);
|
||||
if (!WebPAnimEncoderAssemble(enc, &webp_data)) {
|
||||
fprintf(stderr, "Error: Failed to assemble animated WebP: %s\n", WebPAnimEncoderGetError(enc));
|
||||
WebPDataClear(&webp_data);
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
FILE* f = fopen(filename, "wb");
|
||||
if (!f) {
|
||||
perror("Error opening file for writing");
|
||||
WebPDataClear(&webp_data);
|
||||
goto cleanup;
|
||||
}
|
||||
if (webp_data.size > 0 && fwrite(webp_data.bytes, 1, webp_data.size, f) != webp_data.size) {
|
||||
fprintf(stderr, "Error: Failed to write animated WebP file.\n");
|
||||
fclose(f);
|
||||
WebPDataClear(&webp_data);
|
||||
goto cleanup;
|
||||
}
|
||||
fclose(f);
|
||||
WebPDataClear(&webp_data);
|
||||
}
|
||||
|
||||
ret = 0;
|
||||
|
||||
cleanup:
|
||||
WebPAnimEncoderDelete(enc);
|
||||
return ret;
|
||||
}
|
||||
#endif
|
||||
|
||||
int create_video_from_sd_images(const char* filename, sd_image_t* images, int num_images, int fps, int quality) {
|
||||
std::string path = filename ? filename : "";
|
||||
auto pos = path.find_last_of('.');
|
||||
std::string ext = pos == std::string::npos ? "" : path.substr(pos);
|
||||
for (char& ch : ext) {
|
||||
ch = static_cast<char>(tolower(static_cast<unsigned char>(ch)));
|
||||
}
|
||||
|
||||
#ifdef SD_USE_WEBP
|
||||
if (ext == ".webp") {
|
||||
return create_animated_webp_from_sd_images(filename, images, num_images, fps, quality);
|
||||
}
|
||||
#endif
|
||||
|
||||
return create_mjpg_avi_from_sd_images(filename, images, num_images, fps, quality);
|
||||
}
|
||||
76
examples/common/media_io.h
Normal file
76
examples/common/media_io.h
Normal file
@ -0,0 +1,76 @@
|
||||
#ifndef __MEDIA_IO_H__
|
||||
#define __MEDIA_IO_H__
|
||||
|
||||
#include <cstdint>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "stable-diffusion.h"
|
||||
|
||||
enum class EncodedImageFormat {
|
||||
JPEG,
|
||||
PNG,
|
||||
WEBP,
|
||||
UNKNOWN,
|
||||
};
|
||||
|
||||
EncodedImageFormat encoded_image_format_from_path(const std::string& path);
|
||||
|
||||
std::vector<uint8_t> encode_image_to_vector(EncodedImageFormat format,
|
||||
const uint8_t* image,
|
||||
int width,
|
||||
int height,
|
||||
int channels,
|
||||
const std::string& parameters = "",
|
||||
int quality = 90);
|
||||
|
||||
bool write_image_to_file(const std::string& path,
|
||||
const uint8_t* image,
|
||||
int width,
|
||||
int height,
|
||||
int channels,
|
||||
const std::string& parameters = "",
|
||||
int quality = 90);
|
||||
|
||||
uint8_t* load_image_from_file(const char* image_path,
|
||||
int& width,
|
||||
int& height,
|
||||
int expected_width = 0,
|
||||
int expected_height = 0,
|
||||
int expected_channel = 3);
|
||||
|
||||
bool load_sd_image_from_file(sd_image_t* image,
|
||||
const char* image_path,
|
||||
int expected_width = 0,
|
||||
int expected_height = 0,
|
||||
int expected_channel = 3);
|
||||
|
||||
uint8_t* load_image_from_memory(const char* image_bytes,
|
||||
int len,
|
||||
int& width,
|
||||
int& height,
|
||||
int expected_width = 0,
|
||||
int expected_height = 0,
|
||||
int expected_channel = 3);
|
||||
|
||||
int create_mjpg_avi_from_sd_images(const char* filename,
|
||||
sd_image_t* images,
|
||||
int num_images,
|
||||
int fps,
|
||||
int quality = 90);
|
||||
|
||||
#ifdef SD_USE_WEBP
|
||||
int create_animated_webp_from_sd_images(const char* filename,
|
||||
sd_image_t* images,
|
||||
int num_images,
|
||||
int fps,
|
||||
int quality = 90);
|
||||
#endif
|
||||
|
||||
int create_video_from_sd_images(const char* filename,
|
||||
sd_image_t* images,
|
||||
int num_images,
|
||||
int fps,
|
||||
int quality = 90);
|
||||
|
||||
#endif // __MEDIA_IO_H__
|
||||
@ -56,7 +56,11 @@ else()
|
||||
message(STATUS "Frontend disabled or directory not found: ${FRONTEND_DIR}")
|
||||
endif()
|
||||
|
||||
add_executable(${TARGET} main.cpp)
|
||||
add_executable(${TARGET}
|
||||
../common/log.cpp
|
||||
../common/media_io.cpp
|
||||
main.cpp
|
||||
)
|
||||
|
||||
if(HAVE_FRONTEND_BUILD)
|
||||
add_dependencies(${TARGET} ${TARGET}_frontend)
|
||||
@ -70,6 +74,9 @@ endif()
|
||||
|
||||
install(TARGETS ${TARGET} RUNTIME)
|
||||
target_link_libraries(${TARGET} PRIVATE stable-diffusion ${CMAKE_THREAD_LIBS_INIT})
|
||||
if(SD_WEBP)
|
||||
target_link_libraries(${TARGET} PRIVATE webp libwebpmux)
|
||||
endif()
|
||||
|
||||
# due to httplib; it contains a pragma for MSVC, but other things need explicit flags
|
||||
if(WIN32 AND NOT MSVC)
|
||||
|
||||
@ -12,6 +12,7 @@
|
||||
#include "stable-diffusion.h"
|
||||
|
||||
#include "common/common.hpp"
|
||||
#include "common/media_io.h"
|
||||
|
||||
#ifdef HAVE_INDEX_HTML
|
||||
#include "frontend/dist/gen_index_html.h"
|
||||
@ -217,62 +218,6 @@ std::string extract_and_remove_sd_cpp_extra_args(std::string& text) {
|
||||
return extracted;
|
||||
}
|
||||
|
||||
enum class ImageFormat { JPEG,
|
||||
PNG };
|
||||
|
||||
static int stbi_ext_write_png_to_func(stbi_write_func* func, void* context, int x, int y, int comp, const void* data, int stride_bytes, const char* parameters) {
|
||||
int len;
|
||||
unsigned char* png = stbi_write_png_to_mem((const unsigned char*)data, stride_bytes, x, y, comp, &len, parameters);
|
||||
if (png == NULL)
|
||||
return 0;
|
||||
func(context, png, len);
|
||||
STBIW_FREE(png);
|
||||
return 1;
|
||||
}
|
||||
|
||||
std::vector<uint8_t> write_image_to_vector(
|
||||
ImageFormat format,
|
||||
const uint8_t* image,
|
||||
int width,
|
||||
int height,
|
||||
int channels,
|
||||
std::string params = "",
|
||||
int quality = 90) {
|
||||
std::vector<uint8_t> buffer;
|
||||
|
||||
auto write_func = [&buffer](void* context, void* data, int size) {
|
||||
uint8_t* src = reinterpret_cast<uint8_t*>(data);
|
||||
buffer.insert(buffer.end(), src, src + size);
|
||||
};
|
||||
|
||||
struct ContextWrapper {
|
||||
decltype(write_func)& func;
|
||||
} ctx{write_func};
|
||||
|
||||
auto c_func = [](void* context, void* data, int size) {
|
||||
auto* wrapper = reinterpret_cast<ContextWrapper*>(context);
|
||||
wrapper->func(context, data, size);
|
||||
};
|
||||
|
||||
int result = 0;
|
||||
switch (format) {
|
||||
case ImageFormat::JPEG:
|
||||
result = stbi_write_jpg_to_func(c_func, &ctx, width, height, channels, image, quality);
|
||||
break;
|
||||
case ImageFormat::PNG:
|
||||
result = stbi_ext_write_png_to_func(c_func, &ctx, width, height, channels, image, width * channels, params.size() > 0 ? params.c_str() : nullptr);
|
||||
break;
|
||||
default:
|
||||
throw std::runtime_error("invalid image format");
|
||||
}
|
||||
|
||||
if (!result) {
|
||||
throw std::runtime_error("write imgage to mem failed");
|
||||
}
|
||||
|
||||
return buffer;
|
||||
}
|
||||
|
||||
void sd_log_cb(enum sd_log_level_t level, const char* log, void* data) {
|
||||
SDSvrParams* svr_params = (SDSvrParams*)data;
|
||||
log_print(level, log, svr_params->verbose, svr_params->color);
|
||||
@ -345,7 +290,7 @@ void free_results(sd_image_t* result_images, int num_results) {
|
||||
if (result_images) {
|
||||
for (int i = 0; i < num_results; ++i) {
|
||||
if (result_images[i].data) {
|
||||
stbi_image_free(result_images[i].data);
|
||||
free(result_images[i].data);
|
||||
result_images[i].data = nullptr;
|
||||
}
|
||||
}
|
||||
@ -416,9 +361,9 @@ void register_openai_api_endpoints(httplib::Server& svr, ServerRuntime& rt) {
|
||||
|
||||
std::string sd_cpp_extra_args_str = extract_and_remove_sd_cpp_extra_args(prompt);
|
||||
|
||||
if (output_format != "png" && output_format != "jpeg") {
|
||||
if (output_format != "png" && output_format != "jpeg" && output_format != "webp") {
|
||||
res.status = 400;
|
||||
res.set_content(R"({"error":"invalid output_format, must be one of [png, jpeg]"})", "application/json");
|
||||
res.set_content(R"({"error":"invalid output_format, must be one of [png, jpeg, webp]"})", "application/json");
|
||||
return;
|
||||
}
|
||||
if (n <= 0)
|
||||
@ -511,7 +456,11 @@ void register_openai_api_endpoints(httplib::Server& svr, ServerRuntime& rt) {
|
||||
std::string params = gen_params.embed_image_metadata
|
||||
? get_image_params(*runtime->ctx_params, gen_params, gen_params.seed + i)
|
||||
: "";
|
||||
auto image_bytes = write_image_to_vector(output_format == "jpeg" ? ImageFormat::JPEG : ImageFormat::PNG,
|
||||
auto image_bytes = encode_image_to_vector(output_format == "jpeg"
|
||||
? EncodedImageFormat::JPEG
|
||||
: output_format == "webp"
|
||||
? EncodedImageFormat::WEBP
|
||||
: EncodedImageFormat::PNG,
|
||||
results[i].data,
|
||||
results[i].width,
|
||||
results[i].height,
|
||||
@ -765,7 +714,11 @@ void register_openai_api_endpoints(httplib::Server& svr, ServerRuntime& rt) {
|
||||
std::string params = gen_params.embed_image_metadata
|
||||
? get_image_params(*runtime->ctx_params, gen_params, gen_params.seed + i)
|
||||
: "";
|
||||
auto image_bytes = write_image_to_vector(output_format == "jpeg" ? ImageFormat::JPEG : ImageFormat::PNG,
|
||||
auto image_bytes = encode_image_to_vector(output_format == "jpeg"
|
||||
? EncodedImageFormat::JPEG
|
||||
: output_format == "webp"
|
||||
? EncodedImageFormat::WEBP
|
||||
: EncodedImageFormat::PNG,
|
||||
results[i].data,
|
||||
results[i].width,
|
||||
results[i].height,
|
||||
@ -783,13 +736,13 @@ void register_openai_api_endpoints(httplib::Server& svr, ServerRuntime& rt) {
|
||||
res.status = 200;
|
||||
|
||||
if (init_image.data) {
|
||||
stbi_image_free(init_image.data);
|
||||
free(init_image.data);
|
||||
}
|
||||
if (mask_image.data) {
|
||||
stbi_image_free(mask_image.data);
|
||||
free(mask_image.data);
|
||||
}
|
||||
for (auto ref_image : ref_images) {
|
||||
stbi_image_free(ref_image.data);
|
||||
free(ref_image.data);
|
||||
}
|
||||
} catch (const std::exception& e) {
|
||||
res.status = 500;
|
||||
@ -1084,7 +1037,7 @@ void register_sdapi_endpoints(httplib::Server& svr, ServerRuntime& rt) {
|
||||
std::string params = gen_params.embed_image_metadata
|
||||
? get_image_params(*runtime->ctx_params, gen_params, gen_params.seed + i)
|
||||
: "";
|
||||
auto image_bytes = write_image_to_vector(ImageFormat::PNG,
|
||||
auto image_bytes = encode_image_to_vector(EncodedImageFormat::PNG,
|
||||
results[i].data,
|
||||
results[i].width,
|
||||
results[i].height,
|
||||
@ -1105,13 +1058,13 @@ void register_sdapi_endpoints(httplib::Server& svr, ServerRuntime& rt) {
|
||||
res.status = 200;
|
||||
|
||||
if (init_image.data) {
|
||||
stbi_image_free(init_image.data);
|
||||
free(init_image.data);
|
||||
}
|
||||
if (mask_image.data && mask_data.empty()) {
|
||||
stbi_image_free(mask_image.data);
|
||||
free(mask_image.data);
|
||||
}
|
||||
for (auto ref_image : ref_images) {
|
||||
stbi_image_free(ref_image.data);
|
||||
free(ref_image.data);
|
||||
}
|
||||
|
||||
} catch (const std::exception& e) {
|
||||
|
||||
17
thirdparty/CMakeLists.txt
vendored
17
thirdparty/CMakeLists.txt
vendored
@ -1,3 +1,20 @@
|
||||
set(Z_TARGET zip)
|
||||
add_library(${Z_TARGET} OBJECT zip.c zip.h miniz.h)
|
||||
target_include_directories(${Z_TARGET} PUBLIC .)
|
||||
|
||||
if(SD_WEBP)
|
||||
set(WEBP_BUILD_ANIM_UTILS OFF)
|
||||
set(WEBP_BUILD_CWEBP OFF)
|
||||
set(WEBP_BUILD_DWEBP OFF)
|
||||
set(WEBP_BUILD_GIF2WEBP OFF)
|
||||
set(WEBP_BUILD_IMG2WEBP OFF)
|
||||
set(WEBP_BUILD_VWEBP OFF)
|
||||
set(WEBP_BUILD_WEBPINFO OFF)
|
||||
set(WEBP_BUILD_WEBPMUX OFF)
|
||||
set(WEBP_BUILD_EXTRAS OFF)
|
||||
set(WEBP_BUILD_WEBP_JS OFF)
|
||||
set(WEBP_BUILD_FUZZTEST OFF)
|
||||
set(WEBP_BUILD_LIBWEBPMUX ON)
|
||||
|
||||
add_subdirectory(libwebp EXCLUDE_FROM_ALL)
|
||||
endif()
|
||||
|
||||
1
thirdparty/libwebp
vendored
Submodule
1
thirdparty/libwebp
vendored
Submodule
@ -0,0 +1 @@
|
||||
Subproject commit 0c9546f7efc61eac7f79ae115c3f99c91c21c443
|
||||
Loading…
x
Reference in New Issue
Block a user