diff --git a/.gitmodules b/.gitmodules index 5d66c879..91cde1f2 100644 --- a/.gitmodules +++ b/.gitmodules @@ -4,3 +4,6 @@ [submodule "examples/server/frontend"] path = examples/server/frontend url = https://github.com/leejet/stable-ui.git +[submodule "thirdparty/libwebp"] + path = thirdparty/libwebp + url = https://github.com/webmproject/libwebp.git diff --git a/CMakeLists.txt b/CMakeLists.txt index bad1ba4c..9098f827 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -29,6 +29,7 @@ endif() # general #option(SD_BUILD_TESTS "sd: build tests" ${SD_STANDALONE}) option(SD_BUILD_EXAMPLES "sd: build examples" ${SD_STANDALONE}) +option(SD_WEBP "sd: enable WebP image I/O support" ON) option(SD_CUDA "sd: cuda backend" OFF) option(SD_HIPBLAS "sd: rocm backend" OFF) option(SD_METAL "sd: metal backend" OFF) @@ -77,6 +78,10 @@ if(SD_MUSA) add_definitions(-DSD_USE_CUDA) endif() +if(SD_WEBP) + add_compile_definitions(SD_USE_WEBP) +endif() + set(SD_LIB stable-diffusion) file(GLOB SD_LIB_SOURCES diff --git a/docs/build.md b/docs/build.md index 1ba582d9..eabb51ac 100644 --- a/docs/build.md +++ b/docs/build.md @@ -16,6 +16,18 @@ git submodule init git submodule update ``` +## WebP Support in Examples + +The example applications (`examples/cli` and `examples/server`) use `libwebp` to support WebP image I/O. This is enabled by default. + +If you do not want WebP support, you can disable it at configure time: + +```shell +mkdir build && cd build +cmake .. -DSD_WEBP=OFF +cmake --build . --config Release +``` + ## Build (CPU only) If you don't have a GPU or CUDA installed, you can build a CPU-only version. diff --git a/examples/cli/CMakeLists.txt b/examples/cli/CMakeLists.txt index 1727268b..e4acaac8 100644 --- a/examples/cli/CMakeLists.txt +++ b/examples/cli/CMakeLists.txt @@ -1,9 +1,14 @@ set(TARGET sd-cli) add_executable(${TARGET} + ../common/log.cpp + ../common/media_io.cpp image_metadata.cpp main.cpp ) install(TARGETS ${TARGET} RUNTIME) target_link_libraries(${TARGET} PRIVATE stable-diffusion zip ${CMAKE_THREAD_LIBS_INIT}) +if(SD_WEBP) + target_link_libraries(${TARGET} PRIVATE webp libwebpmux) +endif() target_compile_features(${TARGET} PUBLIC c_std_11 cxx_std_17) diff --git a/examples/cli/README.md b/examples/cli/README.md index 7bb037a9..25fcce69 100644 --- a/examples/cli/README.md +++ b/examples/cli/README.md @@ -5,8 +5,8 @@ usage: ./bin/sd-cli [options] CLI Options: -o, --output path to write result image to. you can use printf-style %d format specifiers for image sequences (default: - ./output.png) (eg. output_%03d.png) - --preview-path path to write preview image to (default: ./preview.png) + ./output.png) (eg. output_%03d.png). For video generation, single-file outputs support .avi and animated .webp + --preview-path path to write preview image to (default: ./preview.png). Multi-frame previews support .avi and animated .webp --preview-interval interval in denoising steps between consecutive updates of the image preview file (default is 1, meaning updating at every step) --output-begin-idx starting index for output image sequence, must be non-negative (default 0 if specified %d in output path, 1 otherwise) diff --git a/examples/cli/avi_writer.h b/examples/cli/avi_writer.h deleted file mode 100644 index 53b4749c..00000000 --- a/examples/cli/avi_writer.h +++ /dev/null @@ -1,217 +0,0 @@ -#ifndef __AVI_WRITER_H__ -#define __AVI_WRITER_H__ - -#include -#include -#include -#include - -#include "stable-diffusion.h" - -#ifndef INCLUDE_STB_IMAGE_WRITE_H -#include "stb_image_write.h" -#endif - -typedef struct { - uint32_t offset; - uint32_t size; -} avi_index_entry; - -// Write 32-bit little-endian integer -void write_u32_le(FILE* f, uint32_t val) { - fwrite(&val, 4, 1, f); -} - -// Write 16-bit little-endian integer -void write_u16_le(FILE* f, uint16_t val) { - fwrite(&val, 2, 1, f); -} - -/** - * Create an MJPG AVI file from an array of sd_image_t images. - * Images are encoded to JPEG using stb_image_write. - * - * @param filename Output AVI file name. - * @param images Array of input images. - * @param num_images Number of images in the array. - * @param fps Frames per second for the video. - * @param quality JPEG quality (0-100). - * @return 0 on success, -1 on failure. - */ -int create_mjpg_avi_from_sd_images(const char* filename, sd_image_t* images, int num_images, int fps, int quality = 90) { - if (num_images == 0) { - fprintf(stderr, "Error: Image array is empty.\n"); - return -1; - } - - FILE* f = fopen(filename, "wb"); - if (!f) { - perror("Error opening file for writing"); - return -1; - } - - uint32_t width = images[0].width; - uint32_t height = images[0].height; - uint32_t channels = images[0].channel; - if (channels != 3 && channels != 4) { - fprintf(stderr, "Error: Unsupported channel count: %u\n", channels); - fclose(f); - return -1; - } - - // --- RIFF AVI Header --- - fwrite("RIFF", 4, 1, f); - long riff_size_pos = ftell(f); - write_u32_le(f, 0); // Placeholder for file size - fwrite("AVI ", 4, 1, f); - - // 'hdrl' LIST (header list) - fwrite("LIST", 4, 1, f); - write_u32_le(f, 4 + 8 + 56 + 8 + 4 + 8 + 56 + 8 + 40); - fwrite("hdrl", 4, 1, f); - - // 'avih' chunk (AVI main header) - fwrite("avih", 4, 1, f); - write_u32_le(f, 56); - write_u32_le(f, 1000000 / fps); // Microseconds per frame - write_u32_le(f, 0); // Max bytes per second - write_u32_le(f, 0); // Padding granularity - write_u32_le(f, 0x110); // Flags (HASINDEX | ISINTERLEAVED) - write_u32_le(f, num_images); // Total frames - write_u32_le(f, 0); // Initial frames - write_u32_le(f, 1); // Number of streams - write_u32_le(f, width * height * 3); // Suggested buffer size - write_u32_le(f, width); - write_u32_le(f, height); - write_u32_le(f, 0); // Reserved - write_u32_le(f, 0); // Reserved - write_u32_le(f, 0); // Reserved - write_u32_le(f, 0); // Reserved - - // 'strl' LIST (stream list) - fwrite("LIST", 4, 1, f); - write_u32_le(f, 4 + 8 + 56 + 8 + 40); - fwrite("strl", 4, 1, f); - - // 'strh' chunk (stream header) - fwrite("strh", 4, 1, f); - write_u32_le(f, 56); - fwrite("vids", 4, 1, f); // Stream type: video - fwrite("MJPG", 4, 1, f); // Codec: Motion JPEG - write_u32_le(f, 0); // Flags - write_u16_le(f, 0); // Priority - write_u16_le(f, 0); // Language - write_u32_le(f, 0); // Initial frames - write_u32_le(f, 1); // Scale - write_u32_le(f, fps); // Rate - write_u32_le(f, 0); // Start - write_u32_le(f, num_images); // Length - write_u32_le(f, width * height * 3); // Suggested buffer size - write_u32_le(f, (uint32_t)-1); // Quality - write_u32_le(f, 0); // Sample size - write_u16_le(f, 0); // rcFrame.left - write_u16_le(f, 0); // rcFrame.top - write_u16_le(f, 0); // rcFrame.right - write_u16_le(f, 0); // rcFrame.bottom - - // 'strf' chunk (stream format: BITMAPINFOHEADER) - fwrite("strf", 4, 1, f); - write_u32_le(f, 40); - write_u32_le(f, 40); // biSize - write_u32_le(f, width); - write_u32_le(f, height); - write_u16_le(f, 1); // biPlanes - write_u16_le(f, 24); // biBitCount - fwrite("MJPG", 4, 1, f); // biCompression (FOURCC) - write_u32_le(f, width * height * 3); // biSizeImage - write_u32_le(f, 0); // XPelsPerMeter - write_u32_le(f, 0); // YPelsPerMeter - write_u32_le(f, 0); // Colors used - write_u32_le(f, 0); // Colors important - - // 'movi' LIST (video frames) - // long movi_list_pos = ftell(f); - fwrite("LIST", 4, 1, f); - long movi_size_pos = ftell(f); - write_u32_le(f, 0); // Placeholder for movi size - fwrite("movi", 4, 1, f); - - avi_index_entry* index = (avi_index_entry*)malloc(sizeof(avi_index_entry) * num_images); - if (!index) { - fclose(f); - return -1; - } - - // Encode and write each frame as JPEG - struct { - uint8_t* buf; - size_t size; - } jpeg_data; - - for (int i = 0; i < num_images; i++) { - jpeg_data.buf = nullptr; - jpeg_data.size = 0; - - // Callback function to collect JPEG data into memory - auto write_to_buf = [](void* context, void* data, int size) { - auto jd = (decltype(jpeg_data)*)context; - jd->buf = (uint8_t*)realloc(jd->buf, jd->size + size); - memcpy(jd->buf + jd->size, data, size); - jd->size += size; - }; - - // Encode to JPEG in memory - stbi_write_jpg_to_func( - write_to_buf, - &jpeg_data, - images[i].width, - images[i].height, - channels, - images[i].data, - quality); - - // Write '00dc' chunk (video frame) - fwrite("00dc", 4, 1, f); - write_u32_le(f, (uint32_t)jpeg_data.size); - index[i].offset = ftell(f) - 8; - index[i].size = (uint32_t)jpeg_data.size; - fwrite(jpeg_data.buf, 1, jpeg_data.size, f); - - // Align to even byte size - if (jpeg_data.size % 2) - fputc(0, f); - - free(jpeg_data.buf); - } - - // Finalize 'movi' size - long cur_pos = ftell(f); - long movi_size = cur_pos - movi_size_pos - 4; - fseek(f, movi_size_pos, SEEK_SET); - write_u32_le(f, movi_size); - fseek(f, cur_pos, SEEK_SET); - - // Write 'idx1' index - fwrite("idx1", 4, 1, f); - write_u32_le(f, num_images * 16); - for (int i = 0; i < num_images; i++) { - fwrite("00dc", 4, 1, f); - write_u32_le(f, 0x10); - write_u32_le(f, index[i].offset); - write_u32_le(f, index[i].size); - } - - // Finalize RIFF size - cur_pos = ftell(f); - long file_size = cur_pos - riff_size_pos - 4; - fseek(f, riff_size_pos, SEEK_SET); - write_u32_le(f, file_size); - fseek(f, cur_pos, SEEK_SET); - - fclose(f); - free(index); - - return 0; -} - -#endif // __AVI_WRITER_H__ \ No newline at end of file diff --git a/examples/cli/image_metadata.cpp b/examples/cli/image_metadata.cpp index 015054fa..8dd339c8 100644 --- a/examples/cli/image_metadata.cpp +++ b/examples/cli/image_metadata.cpp @@ -40,6 +40,13 @@ namespace { static_cast(data[offset + 3]); } + uint32_t read_u32_le(const std::vector& data, size_t offset) { + return static_cast(data[offset]) | + (static_cast(data[offset + 1]) << 8) | + (static_cast(data[offset + 2]) << 16) | + (static_cast(data[offset + 3]) << 24); + } + uint16_t read_u16_tiff(const std::vector& data, size_t offset, bool little_endian) { if (little_endian) { return static_cast(data[offset]) | @@ -357,6 +364,11 @@ namespace { json& result, std::string& error); + bool parse_webp(const std::vector& data, + bool include_raw, + json& result, + std::string& error); + std::string abbreviate(const std::string& value, bool brief); void print_json_value(std::ostream& out, @@ -1008,6 +1020,83 @@ namespace { return true; } + bool parse_webp(const std::vector& data, + bool include_raw, + json& result, + std::string& error) { + if (data.size() < 12 || + memcmp(data.data(), "RIFF", 4) != 0 || + memcmp(data.data() + 8, "WEBP", 4) != 0) { + error = "not a WebP file"; + return false; + } + + result["format"] = "WEBP"; + result["entries"] = json::array(); + + size_t offset = 12; + while (offset + 8 <= data.size()) { + const std::string raw_type = + bytes_to_string(data.data() + offset, data.data() + offset + 4); + const uint32_t length = read_u32_le(data, offset + 4); + offset += 8; + + if (offset + static_cast(length) > data.size()) { + error = "WebP chunk exceeds file size"; + return false; + } + + const uint8_t* payload = data.data() + offset; + const std::string type = + !raw_type.empty() && raw_type.back() == ' ' + ? raw_type.substr(0, raw_type.size() - 1) + : raw_type; + + json entry; + entry["entry_type"] = "chunk"; + entry["name"] = type; + entry["length"] = length; + entry["metadata_like"] = + (raw_type == "ICCP" || raw_type == "EXIF" || raw_type == "XMP "); + + if (raw_type == "VP8X" && length >= 10) { + entry["data"] = json{ + {"icc_profile", (payload[0] & 0x20) != 0}, + {"alpha", (payload[0] & 0x10) != 0}, + {"exif", (payload[0] & 0x08) != 0}, + {"xmp", (payload[0] & 0x04) != 0}, + {"animation", (payload[0] & 0x02) != 0}, + {"canvas_width", 1 + static_cast(payload[4]) + (static_cast(payload[5]) << 8) + (static_cast(payload[6]) << 16)}, + {"canvas_height", 1 + static_cast(payload[7]) + (static_cast(payload[8]) << 8) + (static_cast(payload[9]) << 16)}, + }; + } else if (raw_type == "EXIF") { + std::string exif_error; + json meta = parse_exif_tiff(payload, length, include_raw, exif_error); + if (!meta.empty()) { + entry["data"] = std::move(meta); + } + if (!exif_error.empty()) { + entry["error"] = exif_error; + } + } else if (raw_type == "XMP ") { + entry["data"] = json{ + {"type", "XMP"}, + {"xml", trim_trailing_nuls(bytes_to_string(payload, payload + length))}, + }; + } else if (raw_type == "ICCP") { + entry["data"] = json{{"profile_size", length}}; + append_raw_preview(entry["data"], payload, length, include_raw); + } else { + append_raw_preview(entry, payload, length, include_raw); + } + + result["entries"].push_back(entry); + offset += static_cast(length) + (length & 1u); + } + + return true; + } + std::string abbreviate(const std::string& value, bool brief) { if (!brief || value.size() <= 240) { return value; @@ -1116,8 +1205,12 @@ namespace { if (data.size() >= 2 && data[0] == 0xFF && data[1] == 0xD8) { return parse_jpeg(data, include_raw, report, error); } + if (data.size() >= 12 && memcmp(data.data(), "RIFF", 4) == 0 && + memcmp(data.data() + 8, "WEBP", 4) == 0) { + return parse_webp(data, include_raw, report, error); + } - error = "unsupported image format; only PNG and JPEG are supported"; + error = "unsupported image format; only PNG, JPEG, and WebP are supported"; return false; } diff --git a/examples/cli/main.cpp b/examples/cli/main.cpp index 55538768..b4a3c343 100644 --- a/examples/cli/main.cpp +++ b/examples/cli/main.cpp @@ -16,8 +16,7 @@ #include "stable-diffusion.h" #include "common/common.hpp" - -#include "avi_writer.h" +#include "common/media_io.h" #include "image_metadata.h" const char* previews_str[] = { @@ -303,7 +302,7 @@ bool load_images_from_dir(const std::string dir, std::string ext = entry.path().extension().string(); std::transform(ext.begin(), ext.end(), ext.begin(), ::tolower); - if (ext == ".jpg" || ext == ".jpeg" || ext == ".png" || ext == ".bmp") { + if (ext == ".jpg" || ext == ".jpeg" || ext == ".png" || ext == ".bmp" || ext == ".webp") { LOG_DEBUG("load image %zu from '%s'", images.size(), path.c_str()); int width = 0; int height = 0; @@ -333,9 +332,17 @@ void step_callback(int step, int frame_count, sd_image_t* image, bool is_noisy, // is_noisy is set to true if the preview corresponds to noisy latents, false if it's denoised latents // unused in this app, it will either be always noisy or always denoised here if (frame_count == 1) { - stbi_write_png(cli_params->preview_path.c_str(), image->width, image->height, image->channel, image->data, 0); + if (!write_image_to_file(cli_params->preview_path, + image->data, + image->width, + image->height, + image->channel)) { + LOG_ERROR("save preview image to '%s' failed", cli_params->preview_path.c_str()); + } } else { - create_mjpg_avi_from_sd_images(cli_params->preview_path.c_str(), image, frame_count, cli_params->preview_fps); + if (create_video_from_sd_images(cli_params->preview_path.c_str(), image, frame_count, cli_params->preview_fps) != 0) { + LOG_ERROR("save preview video to '%s' failed", cli_params->preview_path.c_str()); + } } } @@ -385,9 +392,11 @@ bool save_results(const SDCliParams& cli_params, std::string ext_lower = ext.string(); std::transform(ext_lower.begin(), ext_lower.end(), ext_lower.begin(), ::tolower); - bool is_jpg = (ext_lower == ".jpg" || ext_lower == ".jpeg" || ext_lower == ".jpe"); + const EncodedImageFormat output_format = encoded_image_format_from_path(out_path.string()); if (!ext.empty()) { - if (is_jpg || ext_lower == ".png") { + if (output_format == EncodedImageFormat::JPEG || + output_format == EncodedImageFormat::PNG || + output_format == EncodedImageFormat::WEBP) { base_path.replace_extension(); } } @@ -405,20 +414,15 @@ bool save_results(const SDCliParams& cli_params, std::string params = gen_params.embed_image_metadata ? get_image_params(ctx_params, gen_params, gen_params.seed + idx) : ""; - int ok = 0; - if (is_jpg) { - ok = stbi_write_jpg(path.string().c_str(), img.width, img.height, img.channel, img.data, 90, params.size() > 0 ? params.c_str() : nullptr); - } else { - ok = stbi_write_png(path.string().c_str(), img.width, img.height, img.channel, img.data, 0, params.size() > 0 ? params.c_str() : nullptr); - } + const bool ok = write_image_to_file(path.string(), img.data, img.width, img.height, img.channel, params, 90); LOG_INFO("save result image %d to '%s' (%s)", idx, path.string().c_str(), ok ? "success" : "failure"); - return ok != 0; + return ok; }; int sucessful_reults = 0; if (std::regex_search(cli_params.output_path, format_specifier_regex)) { - if (!is_jpg && ext_lower != ".png") + if (output_format == EncodedImageFormat::UNKNOWN) ext = ".png"; fs::path pattern = base_path; pattern += ext; @@ -434,20 +438,20 @@ bool save_results(const SDCliParams& cli_params, } if (cli_params.mode == VID_GEN && num_results > 1) { - if (ext_lower != ".avi") + if (ext_lower != ".avi" && ext_lower != ".webp") ext = ".avi"; fs::path video_path = base_path; video_path += ext; - if (create_mjpg_avi_from_sd_images(video_path.string().c_str(), results, num_results, gen_params.fps) == 0) { - LOG_INFO("save result MJPG AVI video to '%s'", video_path.string().c_str()); + if (create_video_from_sd_images(video_path.string().c_str(), results, num_results, gen_params.fps) == 0) { + LOG_INFO("save result video to '%s'", video_path.string().c_str()); return true; } else { - LOG_ERROR("Failed to save result MPG AVI video to '%s'", video_path.string().c_str()); + LOG_ERROR("Failed to save result video to '%s'", video_path.string().c_str()); return false; } } - if (!is_jpg && ext_lower != ".png") + if (output_format == EncodedImageFormat::UNKNOWN) ext = ".png"; for (int i = 0; i < num_results; ++i) { diff --git a/examples/common/common.hpp b/examples/common/common.hpp index b170df30..7beef9d5 100644 --- a/examples/common/common.hpp +++ b/examples/common/common.hpp @@ -1,4 +1,6 @@ +#include +#include #include #include #include @@ -17,20 +19,9 @@ namespace fs = std::filesystem; #include #endif // _WIN32 +#include "log.h" #include "stable-diffusion.h" -#define STB_IMAGE_IMPLEMENTATION -#define STB_IMAGE_STATIC -#include "stb_image.h" - -#define STB_IMAGE_WRITE_IMPLEMENTATION -#define STB_IMAGE_WRITE_STATIC -#include "stb_image_write.h" - -#define STB_IMAGE_RESIZE_IMPLEMENTATION -#define STB_IMAGE_RESIZE_STATIC -#include "stb_image_resize.h" - #define SAFE_STR(s) ((s) ? (s) : "") #define BOOL_STR(b) ((b) ? "true" : "false") @@ -88,125 +79,6 @@ static std::string argv_to_utf8(int index, const char** argv) { #endif -static void print_utf8(FILE* stream, const char* utf8) { - if (!utf8) - return; - -#ifdef _WIN32 - HANDLE h = (stream == stderr) - ? GetStdHandle(STD_ERROR_HANDLE) - : GetStdHandle(STD_OUTPUT_HANDLE); - - DWORD mode; - BOOL is_console = GetConsoleMode(h, &mode); - - if (is_console) { - int wlen = MultiByteToWideChar(CP_UTF8, 0, utf8, -1, NULL, 0); - if (wlen <= 0) - return; - - wchar_t* wbuf = (wchar_t*)malloc(wlen * sizeof(wchar_t)); - if (!wbuf) - return; - - MultiByteToWideChar(CP_UTF8, 0, utf8, -1, wbuf, wlen); - - DWORD written; - WriteConsoleW(h, wbuf, wlen - 1, &written, NULL); - - free(wbuf); - } else { - DWORD written; - WriteFile(h, utf8, (DWORD)strlen(utf8), &written, NULL); - } -#else - fputs(utf8, stream); -#endif -} - -static std::string sd_basename(const std::string& path) { - size_t pos = path.find_last_of('/'); - if (pos != std::string::npos) { - return path.substr(pos + 1); - } - pos = path.find_last_of('\\'); - if (pos != std::string::npos) { - return path.substr(pos + 1); - } - return path; -} - -static void log_print(enum sd_log_level_t level, const char* log, bool verbose, bool color) { - int tag_color; - const char* level_str; - FILE* out_stream = (level == SD_LOG_ERROR) ? stderr : stdout; - - if (!log || (!verbose && level <= SD_LOG_DEBUG)) { - return; - } - - switch (level) { - case SD_LOG_DEBUG: - tag_color = 37; - level_str = "DEBUG"; - break; - case SD_LOG_INFO: - tag_color = 34; - level_str = "INFO"; - break; - case SD_LOG_WARN: - tag_color = 35; - level_str = "WARN"; - break; - case SD_LOG_ERROR: - tag_color = 31; - level_str = "ERROR"; - break; - default: /* Potential future-proofing */ - tag_color = 33; - level_str = "?????"; - break; - } - - if (color) { - fprintf(out_stream, "\033[%d;1m[%-5s]\033[0m ", tag_color, level_str); - } else { - fprintf(out_stream, "[%-5s] ", level_str); - } - print_utf8(out_stream, log); - fflush(out_stream); -} - -#define LOG_BUFFER_SIZE 4096 - -static bool log_verbose = false; -static bool log_color = false; - -static void log_printf(sd_log_level_t level, const char* file, int line, const char* format, ...) { - va_list args; - va_start(args, format); - - static char log_buffer[LOG_BUFFER_SIZE + 1]; - int written = snprintf(log_buffer, LOG_BUFFER_SIZE, "%s:%-4d - ", sd_basename(file).c_str(), line); - - if (written >= 0 && written < LOG_BUFFER_SIZE) { - vsnprintf(log_buffer + written, LOG_BUFFER_SIZE - written, format, args); - } - size_t len = strlen(log_buffer); - if (log_buffer[len - 1] != '\n') { - strncat(log_buffer, "\n", LOG_BUFFER_SIZE - len); - } - - log_print(level, log_buffer, log_verbose, log_color); - - va_end(args); -} - -#define LOG_DEBUG(format, ...) log_printf(SD_LOG_DEBUG, __FILE__, __LINE__, format, ##__VA_ARGS__) -#define LOG_INFO(format, ...) log_printf(SD_LOG_INFO, __FILE__, __LINE__, format, ##__VA_ARGS__) -#define LOG_WARN(format, ...) log_printf(SD_LOG_WARN, __FILE__, __LINE__, format, ##__VA_ARGS__) -#define LOG_ERROR(format, ...) log_printf(SD_LOG_ERROR, __FILE__, __LINE__, format, ##__VA_ARGS__) - struct StringOption { std::string short_name; std::string long_name; @@ -1967,144 +1839,6 @@ static std::string version_string() { return std::string("stable-diffusion.cpp version ") + sd_version() + ", commit " + sd_commit(); } -uint8_t* load_image_common(bool from_memory, - const char* image_path_or_bytes, - int len, - int& width, - int& height, - int expected_width = 0, - int expected_height = 0, - int expected_channel = 3) { - int c = 0; - const char* image_path; - uint8_t* image_buffer = nullptr; - if (from_memory) { - image_path = "memory"; - image_buffer = (uint8_t*)stbi_load_from_memory((const stbi_uc*)image_path_or_bytes, len, &width, &height, &c, expected_channel); - } else { - image_path = image_path_or_bytes; - image_buffer = (uint8_t*)stbi_load(image_path_or_bytes, &width, &height, &c, expected_channel); - } - if (image_buffer == nullptr) { - LOG_ERROR("load image from '%s' failed", image_path); - return nullptr; - } - if (c < expected_channel) { - fprintf(stderr, - "the number of channels for the input image must be >= %d," - "but got %d channels, image_path = %s", - expected_channel, - c, - image_path); - free(image_buffer); - return nullptr; - } - if (width <= 0) { - LOG_ERROR("error: the width of image must be greater than 0, image_path = %s", image_path); - free(image_buffer); - return nullptr; - } - if (height <= 0) { - LOG_ERROR("error: the height of image must be greater than 0, image_path = %s", image_path); - free(image_buffer); - return nullptr; - } - - // Resize input image ... - if ((expected_width > 0 && expected_height > 0) && (height != expected_height || width != expected_width)) { - float dst_aspect = (float)expected_width / (float)expected_height; - float src_aspect = (float)width / (float)height; - - int crop_x = 0, crop_y = 0; - int crop_w = width, crop_h = height; - - if (src_aspect > dst_aspect) { - crop_w = (int)(height * dst_aspect); - crop_x = (width - crop_w) / 2; - } else if (src_aspect < dst_aspect) { - crop_h = (int)(width / dst_aspect); - crop_y = (height - crop_h) / 2; - } - - if (crop_x != 0 || crop_y != 0) { - LOG_INFO("crop input image from %dx%d to %dx%d, image_path = %s", width, height, crop_w, crop_h, image_path); - uint8_t* cropped_image_buffer = (uint8_t*)malloc(crop_w * crop_h * expected_channel); - if (cropped_image_buffer == nullptr) { - LOG_ERROR("error: allocate memory for crop\n"); - free(image_buffer); - return nullptr; - } - for (int row = 0; row < crop_h; row++) { - uint8_t* src = image_buffer + ((crop_y + row) * width + crop_x) * expected_channel; - uint8_t* dst = cropped_image_buffer + (row * crop_w) * expected_channel; - memcpy(dst, src, crop_w * expected_channel); - } - - width = crop_w; - height = crop_h; - free(image_buffer); - image_buffer = cropped_image_buffer; - } - - LOG_INFO("resize input image from %dx%d to %dx%d", width, height, expected_width, expected_height); - int resized_height = expected_height; - int resized_width = expected_width; - - uint8_t* resized_image_buffer = (uint8_t*)malloc(resized_height * resized_width * expected_channel); - if (resized_image_buffer == nullptr) { - LOG_ERROR("error: allocate memory for resize input image\n"); - free(image_buffer); - return nullptr; - } - stbir_resize(image_buffer, width, height, 0, - resized_image_buffer, resized_width, resized_height, 0, STBIR_TYPE_UINT8, - expected_channel, STBIR_ALPHA_CHANNEL_NONE, 0, - STBIR_EDGE_CLAMP, STBIR_EDGE_CLAMP, - STBIR_FILTER_BOX, STBIR_FILTER_BOX, - STBIR_COLORSPACE_SRGB, nullptr); - width = resized_width; - height = resized_height; - free(image_buffer); - image_buffer = resized_image_buffer; - } - return image_buffer; -} - -uint8_t* load_image_from_file(const char* image_path, - int& width, - int& height, - int expected_width = 0, - int expected_height = 0, - int expected_channel = 3) { - return load_image_common(false, image_path, 0, width, height, expected_width, expected_height, expected_channel); -} - -bool load_sd_image_from_file(sd_image_t* image, - const char* image_path, - int expected_width = 0, - int expected_height = 0, - int expected_channel = 3) { - int width; - int height; - image->data = load_image_common(false, image_path, 0, width, height, expected_width, expected_height, expected_channel); - if (image->data == nullptr) { - return false; - } - image->width = width; - image->height = height; - return true; -} - -uint8_t* load_image_from_memory(const char* image_bytes, - int len, - int& width, - int& height, - int expected_width = 0, - int expected_height = 0, - int expected_channel = 3) { - return load_image_common(true, image_bytes, len, width, height, expected_width, expected_height, expected_channel); -} - std::string get_image_params(const SDContextParams& ctx_params, const SDGenerationParams& gen_params, int64_t seed) { std::string parameter_string; if (gen_params.prompt_with_lora.size() != 0) { diff --git a/examples/common/log.cpp b/examples/common/log.cpp new file mode 100644 index 00000000..44fcd1e4 --- /dev/null +++ b/examples/common/log.cpp @@ -0,0 +1,118 @@ +#include "log.h" + +bool log_verbose = false; +bool log_color = false; + +std::string sd_basename(const std::string& path) { + size_t pos = path.find_last_of('/'); + if (pos != std::string::npos) { + return path.substr(pos + 1); + } + pos = path.find_last_of('\\'); + if (pos != std::string::npos) { + return path.substr(pos + 1); + } + return path; +} + +void print_utf8(FILE* stream, const char* utf8) { + if (!utf8) { + return; + } + +#ifdef _WIN32 + HANDLE h = (stream == stderr) + ? GetStdHandle(STD_ERROR_HANDLE) + : GetStdHandle(STD_OUTPUT_HANDLE); + + DWORD mode; + BOOL is_console = GetConsoleMode(h, &mode); + + if (is_console) { + int wlen = MultiByteToWideChar(CP_UTF8, 0, utf8, -1, NULL, 0); + if (wlen <= 0) { + return; + } + + wchar_t* wbuf = (wchar_t*)malloc(wlen * sizeof(wchar_t)); + if (!wbuf) { + return; + } + + MultiByteToWideChar(CP_UTF8, 0, utf8, -1, wbuf, wlen); + + DWORD written; + WriteConsoleW(h, wbuf, wlen - 1, &written, NULL); + + free(wbuf); + } else { + DWORD written; + WriteFile(h, utf8, (DWORD)strlen(utf8), &written, NULL); + } +#else + fputs(utf8, stream); +#endif +} + +void log_print(enum sd_log_level_t level, const char* log, bool verbose, bool color) { + int tag_color; + const char* level_str; + FILE* out_stream = (level == SD_LOG_ERROR) ? stderr : stdout; + + if (!log || (!verbose && level <= SD_LOG_DEBUG)) { + return; + } + + switch (level) { + case SD_LOG_DEBUG: + tag_color = 37; + level_str = "DEBUG"; + break; + case SD_LOG_INFO: + tag_color = 34; + level_str = "INFO"; + break; + case SD_LOG_WARN: + tag_color = 35; + level_str = "WARN"; + break; + case SD_LOG_ERROR: + tag_color = 31; + level_str = "ERROR"; + break; + default: + tag_color = 33; + level_str = "?????"; + break; + } + + if (color) { + fprintf(out_stream, "\033[%d;1m[%-5s]\033[0m ", tag_color, level_str); + } else { + fprintf(out_stream, "[%-5s] ", level_str); + } + print_utf8(out_stream, log); + fflush(out_stream); +} + +void example_log_printf(sd_log_level_t level, const char* file, int line, const char* format, ...) { + constexpr size_t LOG_BUFFER_SIZE = 4096; + + va_list args; + va_start(args, format); + + static char log_buffer[LOG_BUFFER_SIZE + 1]; + int written = snprintf(log_buffer, LOG_BUFFER_SIZE, "%s:%-4d - ", sd_basename(file).c_str(), line); + + if (written >= 0 && written < static_cast(LOG_BUFFER_SIZE)) { + vsnprintf(log_buffer + written, LOG_BUFFER_SIZE - written, format, args); + } + size_t len = strlen(log_buffer); + if (len == 0 || log_buffer[len - 1] != '\n') { + strncat(log_buffer, "\n", LOG_BUFFER_SIZE - len); + } + + log_print(level, log_buffer, log_verbose, log_color); + + va_end(args); +} diff --git a/examples/common/log.h b/examples/common/log.h new file mode 100644 index 00000000..f28b4b4e --- /dev/null +++ b/examples/common/log.h @@ -0,0 +1,32 @@ +#ifndef __EXAMPLE_LOG_H__ +#define __EXAMPLE_LOG_H__ + +#include +#include +#include +#include +#include + +#if defined(_WIN32) +#ifndef NOMINMAX +#define NOMINMAX +#endif +#include +#endif // _WIN32 + +#include "stable-diffusion.h" + +extern bool log_verbose; +extern bool log_color; + +std::string sd_basename(const std::string& path); +void print_utf8(FILE* stream, const char* utf8); +void log_print(sd_log_level_t level, const char* log, bool verbose, bool color); +void example_log_printf(sd_log_level_t level, const char* file, int line, const char* format, ...); + +#define LOG_DEBUG(format, ...) example_log_printf(SD_LOG_DEBUG, __FILE__, __LINE__, format, ##__VA_ARGS__) +#define LOG_INFO(format, ...) example_log_printf(SD_LOG_INFO, __FILE__, __LINE__, format, ##__VA_ARGS__) +#define LOG_WARN(format, ...) example_log_printf(SD_LOG_WARN, __FILE__, __LINE__, format, ##__VA_ARGS__) +#define LOG_ERROR(format, ...) example_log_printf(SD_LOG_ERROR, __FILE__, __LINE__, format, ##__VA_ARGS__) + +#endif // __EXAMPLE_LOG_H__ diff --git a/examples/common/media_io.cpp b/examples/common/media_io.cpp new file mode 100644 index 00000000..a38513b9 --- /dev/null +++ b/examples/common/media_io.cpp @@ -0,0 +1,879 @@ +#include "log.h" +#include "media_io.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define STB_IMAGE_IMPLEMENTATION +#define STB_IMAGE_STATIC +#include "stb_image.h" + +#define STB_IMAGE_WRITE_IMPLEMENTATION +#define STB_IMAGE_WRITE_STATIC +#include "stb_image_write.h" + +#define STB_IMAGE_RESIZE_IMPLEMENTATION +#define STB_IMAGE_RESIZE_STATIC +#include "stb_image_resize.h" + +#ifdef SD_USE_WEBP +#include "webp/decode.h" +#include "webp/encode.h" +#include "webp/mux.h" +#endif + +namespace fs = std::filesystem; + +namespace { +bool read_binary_file_bytes(const char* path, std::vector& data) { + std::ifstream fin(fs::path(path), std::ios::binary); + if (!fin) { + return false; + } + + fin.seekg(0, std::ios::end); + std::streampos size = fin.tellg(); + if (size < 0) { + return false; + } + fin.seekg(0, std::ios::beg); + + data.resize(static_cast(size)); + if (!data.empty()) { + fin.read(reinterpret_cast(data.data()), size); + if (!fin) { + return false; + } + } + return true; +} + +bool write_binary_file_bytes(const std::string& path, const std::vector& data) { + std::ofstream fout(fs::path(path), std::ios::binary); + if (!fout) { + return false; + } + + if (!data.empty()) { + fout.write(reinterpret_cast(data.data()), static_cast(data.size())); + if (!fout) { + return false; + } + } + return true; +} + +int stbi_ext_write_png_to_func(stbi_write_func* func, + void* context, + int x, + int y, + int comp, + const void* data, + int stride_bytes, + const char* parameters) { + int len = 0; + unsigned char* png = stbi_write_png_to_mem((const unsigned char*)data, stride_bytes, x, y, comp, &len, parameters); + if (png == nullptr) { + return 0; + } + func(context, png, len); + STBIW_FREE(png); + return 1; +} + +bool is_webp_signature(const uint8_t* data, size_t size) { + return size >= 12 && + memcmp(data, "RIFF", 4) == 0 && + memcmp(data + 8, "WEBP", 4) == 0; +} + +std::string xml_escape(const std::string& value) { + std::string escaped; + escaped.reserve(value.size()); + + for (char ch : value) { + switch (ch) { + case '&': + escaped += "&"; + break; + case '<': + escaped += "<"; + break; + case '>': + escaped += ">"; + break; + case '"': + escaped += """; + break; + case '\'': + escaped += "'"; + break; + default: + escaped += ch; + break; + } + } + + return escaped; +} + +#ifdef SD_USE_WEBP +uint8_t* decode_webp_image_to_buffer(const uint8_t* data, + size_t size, + int& width, + int& height, + int expected_channel, + int& source_channel_count) { + WebPBitstreamFeatures features; + if (WebPGetFeatures(data, size, &features) != VP8_STATUS_OK) { + return nullptr; + } + + width = features.width; + height = features.height; + source_channel_count = features.has_alpha ? 4 : 3; + + const size_t pixel_count = static_cast(width) * static_cast(height); + + if (expected_channel == 1) { + int decoded_width = width; + int decoded_height = height; + uint8_t* decoded = features.has_alpha + ? WebPDecodeRGBA(data, size, &decoded_width, &decoded_height) + : WebPDecodeRGB(data, size, &decoded_width, &decoded_height); + if (decoded == nullptr) { + return nullptr; + } + + uint8_t* grayscale = (uint8_t*)malloc(pixel_count); + if (grayscale == nullptr) { + WebPFree(decoded); + return nullptr; + } + + const int decoded_channels = features.has_alpha ? 4 : 3; + for (size_t i = 0; i < pixel_count; ++i) { + const uint8_t* src = decoded + i * decoded_channels; + grayscale[i] = static_cast((77 * src[0] + 150 * src[1] + 29 * src[2] + 128) >> 8); + } + + WebPFree(decoded); + return grayscale; + } + + if (expected_channel != 3 && expected_channel != 4) { + return nullptr; + } + + int decoded_width = width; + int decoded_height = height; + uint8_t* decoded = (expected_channel == 4) + ? WebPDecodeRGBA(data, size, &decoded_width, &decoded_height) + : WebPDecodeRGB(data, size, &decoded_width, &decoded_height); + if (decoded == nullptr) { + return nullptr; + } + + const size_t out_size = pixel_count * static_cast(expected_channel); + uint8_t* output = (uint8_t*)malloc(out_size); + if (output == nullptr) { + WebPFree(decoded); + return nullptr; + } + + memcpy(output, decoded, out_size); + WebPFree(decoded); + return output; +} + +std::string build_webp_xmp_packet(const std::string& parameters) { + if (parameters.empty()) { + return ""; + } + + const std::string escaped_parameters = xml_escape(parameters); + return "\n" + "\n" + " \n" + " \n" + " " + + escaped_parameters + + "\n" + " \n" + " \n" + "\n" + ""; +} + +bool encode_webp_image_to_vector(const uint8_t* image, + int width, + int height, + int channels, + const std::string& parameters, + int quality, + std::vector& out) { + if (image == nullptr || width <= 0 || height <= 0) { + return false; + } + + std::vector rgb_image; + const uint8_t* input_image = image; + int input_channels = channels; + + if (channels == 1) { + rgb_image.resize(static_cast(width) * static_cast(height) * 3); + for (int i = 0; i < width * height; ++i) { + rgb_image[i * 3 + 0] = image[i]; + rgb_image[i * 3 + 1] = image[i]; + rgb_image[i * 3 + 2] = image[i]; + } + input_image = rgb_image.data(); + input_channels = 3; + } + + if (input_channels != 3 && input_channels != 4) { + return false; + } + + uint8_t* encoded = nullptr; + size_t encoded_size = (input_channels == 4) + ? WebPEncodeRGBA(input_image, width, height, width * input_channels, static_cast(quality), &encoded) + : WebPEncodeRGB(input_image, width, height, width * input_channels, static_cast(quality), &encoded); + if (encoded == nullptr || encoded_size == 0) { + return false; + } + + out.assign(encoded, encoded + encoded_size); + WebPFree(encoded); + + if (parameters.empty()) { + return true; + } + + WebPData image_data; + WebPData assembled_data; + WebPDataInit(&image_data); + WebPDataInit(&assembled_data); + + image_data.bytes = out.data(); + image_data.size = out.size(); + + WebPMux* mux = WebPMuxNew(); + if (mux == nullptr) { + return false; + } + + const std::string xmp_packet = build_webp_xmp_packet(parameters); + WebPData xmp_data; + WebPDataInit(&xmp_data); + xmp_data.bytes = reinterpret_cast(xmp_packet.data()); + xmp_data.size = xmp_packet.size(); + + const bool ok = WebPMuxSetImage(mux, &image_data, 1) == WEBP_MUX_OK && + WebPMuxSetChunk(mux, "XMP ", &xmp_data, 1) == WEBP_MUX_OK && + WebPMuxAssemble(mux, &assembled_data) == WEBP_MUX_OK; + + if (ok) { + out.assign(assembled_data.bytes, assembled_data.bytes + assembled_data.size); + } + + WebPDataClear(&assembled_data); + WebPMuxDelete(mux); + return ok; +} +#endif + +uint8_t* load_image_common(bool from_memory, + const char* image_path_or_bytes, + int len, + int& width, + int& height, + int expected_width, + int expected_height, + int expected_channel) { + const char* image_path; + uint8_t* image_buffer = nullptr; + int source_channel_count = 0; + +#ifdef SD_USE_WEBP + if (from_memory) { + image_path = "memory"; + if (len > 0 && is_webp_signature(reinterpret_cast(image_path_or_bytes), static_cast(len))) { + image_buffer = decode_webp_image_to_buffer(reinterpret_cast(image_path_or_bytes), + static_cast(len), + width, + height, + expected_channel, + source_channel_count); + } + } else { + image_path = image_path_or_bytes; + if (encoded_image_format_from_path(image_path_or_bytes) == EncodedImageFormat::WEBP) { + std::vector file_bytes; + if (!read_binary_file_bytes(image_path_or_bytes, file_bytes)) { + LOG_ERROR("load image from '%s' failed", image_path_or_bytes); + return nullptr; + } + if (!is_webp_signature(file_bytes.data(), file_bytes.size())) { + LOG_ERROR("load image from '%s' failed", image_path_or_bytes); + return nullptr; + } + image_buffer = decode_webp_image_to_buffer(file_bytes.data(), + file_bytes.size(), + width, + height, + expected_channel, + source_channel_count); + } + } +#endif + + if (from_memory) { + image_path = "memory"; + if (image_buffer == nullptr) { + int c = 0; + image_buffer = (uint8_t*)stbi_load_from_memory((const stbi_uc*)image_path_or_bytes, len, &width, &height, &c, expected_channel); + source_channel_count = c; + } + } else { + image_path = image_path_or_bytes; + if (image_buffer == nullptr) { + int c = 0; + image_buffer = (uint8_t*)stbi_load(image_path_or_bytes, &width, &height, &c, expected_channel); + source_channel_count = c; + } + } + if (image_buffer == nullptr) { + LOG_ERROR("load image from '%s' failed", image_path); + return nullptr; + } + if (source_channel_count < expected_channel) { + fprintf(stderr, + "the number of channels for the input image must be >= %d," + "but got %d channels, image_path = %s", + expected_channel, + source_channel_count, + image_path); + free(image_buffer); + return nullptr; + } + if (width <= 0) { + LOG_ERROR("error: the width of image must be greater than 0, image_path = %s", image_path); + free(image_buffer); + return nullptr; + } + if (height <= 0) { + LOG_ERROR("error: the height of image must be greater than 0, image_path = %s", image_path); + free(image_buffer); + return nullptr; + } + + if ((expected_width > 0 && expected_height > 0) && (height != expected_height || width != expected_width)) { + float dst_aspect = (float)expected_width / (float)expected_height; + float src_aspect = (float)width / (float)height; + + int crop_x = 0, crop_y = 0; + int crop_w = width, crop_h = height; + + if (src_aspect > dst_aspect) { + crop_w = (int)(height * dst_aspect); + crop_x = (width - crop_w) / 2; + } else if (src_aspect < dst_aspect) { + crop_h = (int)(width / dst_aspect); + crop_y = (height - crop_h) / 2; + } + + if (crop_x != 0 || crop_y != 0) { + LOG_INFO("crop input image from %dx%d to %dx%d, image_path = %s", width, height, crop_w, crop_h, image_path); + uint8_t* cropped_image_buffer = (uint8_t*)malloc(crop_w * crop_h * expected_channel); + if (cropped_image_buffer == nullptr) { + LOG_ERROR("error: allocate memory for crop\n"); + free(image_buffer); + return nullptr; + } + for (int row = 0; row < crop_h; row++) { + uint8_t* src = image_buffer + ((crop_y + row) * width + crop_x) * expected_channel; + uint8_t* dst = cropped_image_buffer + (row * crop_w) * expected_channel; + memcpy(dst, src, crop_w * expected_channel); + } + + width = crop_w; + height = crop_h; + free(image_buffer); + image_buffer = cropped_image_buffer; + } + + LOG_INFO("resize input image from %dx%d to %dx%d", width, height, expected_width, expected_height); + uint8_t* resized_image_buffer = (uint8_t*)malloc(expected_height * expected_width * expected_channel); + if (resized_image_buffer == nullptr) { + LOG_ERROR("error: allocate memory for resize input image\n"); + free(image_buffer); + return nullptr; + } + stbir_resize(image_buffer, width, height, 0, + resized_image_buffer, expected_width, expected_height, 0, STBIR_TYPE_UINT8, + expected_channel, STBIR_ALPHA_CHANNEL_NONE, 0, + STBIR_EDGE_CLAMP, STBIR_EDGE_CLAMP, + STBIR_FILTER_BOX, STBIR_FILTER_BOX, + STBIR_COLORSPACE_SRGB, nullptr); + width = expected_width; + height = expected_height; + free(image_buffer); + image_buffer = resized_image_buffer; + } + return image_buffer; +} + +typedef struct { + uint32_t offset; + uint32_t size; +} avi_index_entry; + +void write_u32_le(FILE* f, uint32_t val) { + fwrite(&val, 4, 1, f); +} + +void write_u16_le(FILE* f, uint16_t val) { + fwrite(&val, 2, 1, f); +} +} // namespace + +EncodedImageFormat encoded_image_format_from_path(const std::string& path) { + std::string ext = fs::path(path).extension().string(); + std::transform(ext.begin(), ext.end(), ext.begin(), ::tolower); + + if (ext == ".jpg" || ext == ".jpeg" || ext == ".jpe") { + return EncodedImageFormat::JPEG; + } + if (ext == ".png") { + return EncodedImageFormat::PNG; + } + if (ext == ".webp") { + return EncodedImageFormat::WEBP; + } + return EncodedImageFormat::UNKNOWN; +} + +std::vector encode_image_to_vector(EncodedImageFormat format, + const uint8_t* image, + int width, + int height, + int channels, + const std::string& parameters, + int quality) { + std::vector buffer; + + auto write_func = [&buffer](void* context, void* data, int size) { + (void)context; + uint8_t* src = reinterpret_cast(data); + buffer.insert(buffer.end(), src, src + size); + }; + + struct ContextWrapper { + decltype(write_func)& func; + } ctx{write_func}; + + auto c_func = [](void* context, void* data, int size) { + auto* wrapper = reinterpret_cast(context); + wrapper->func(context, data, size); + }; + + int result = 0; + switch (format) { + case EncodedImageFormat::JPEG: + result = stbi_write_jpg_to_func(c_func, &ctx, width, height, channels, image, quality); + break; + case EncodedImageFormat::PNG: + result = stbi_ext_write_png_to_func(c_func, &ctx, width, height, channels, image, width * channels, parameters.empty() ? nullptr : parameters.c_str()); + break; + case EncodedImageFormat::WEBP: +#ifdef SD_USE_WEBP + if (!encode_webp_image_to_vector(image, width, height, channels, parameters, quality, buffer)) { + buffer.clear(); + } + result = buffer.empty() ? 0 : 1; + break; +#else + result = 0; + break; +#endif + default: + result = 0; + break; + } + + if (!result) { + buffer.clear(); + } + return buffer; +} + +bool write_image_to_file(const std::string& path, + const uint8_t* image, + int width, + int height, + int channels, + const std::string& parameters, + int quality) { + const EncodedImageFormat format = encoded_image_format_from_path(path); + + switch (format) { + case EncodedImageFormat::JPEG: + return stbi_write_jpg(path.c_str(), width, height, channels, image, quality, parameters.empty() ? nullptr : parameters.c_str()) != 0; + case EncodedImageFormat::PNG: + return stbi_write_png(path.c_str(), width, height, channels, image, 0, parameters.empty() ? nullptr : parameters.c_str()) != 0; + case EncodedImageFormat::WEBP: { + const std::vector encoded = encode_image_to_vector(format, image, width, height, channels, parameters, quality); + return !encoded.empty() && write_binary_file_bytes(path, encoded); + } + default: + return false; + } +} + +uint8_t* load_image_from_file(const char* image_path, + int& width, + int& height, + int expected_width, + int expected_height, + int expected_channel) { + return load_image_common(false, image_path, 0, width, height, expected_width, expected_height, expected_channel); +} + +bool load_sd_image_from_file(sd_image_t* image, + const char* image_path, + int expected_width, + int expected_height, + int expected_channel) { + int width; + int height; + image->data = load_image_common(false, image_path, 0, width, height, expected_width, expected_height, expected_channel); + if (image->data == nullptr) { + return false; + } + image->width = width; + image->height = height; + return true; +} + +uint8_t* load_image_from_memory(const char* image_bytes, + int len, + int& width, + int& height, + int expected_width, + int expected_height, + int expected_channel) { + return load_image_common(true, image_bytes, len, width, height, expected_width, expected_height, expected_channel); +} + +int create_mjpg_avi_from_sd_images(const char* filename, sd_image_t* images, int num_images, int fps, int quality) { + if (num_images == 0) { + fprintf(stderr, "Error: Image array is empty.\n"); + return -1; + } + + FILE* f = fopen(filename, "wb"); + if (!f) { + perror("Error opening file for writing"); + return -1; + } + + uint32_t width = images[0].width; + uint32_t height = images[0].height; + uint32_t channels = images[0].channel; + if (channels != 3 && channels != 4) { + fprintf(stderr, "Error: Unsupported channel count: %u\n", channels); + fclose(f); + return -1; + } + + fwrite("RIFF", 4, 1, f); + long riff_size_pos = ftell(f); + write_u32_le(f, 0); + fwrite("AVI ", 4, 1, f); + + fwrite("LIST", 4, 1, f); + write_u32_le(f, 4 + 8 + 56 + 8 + 4 + 8 + 56 + 8 + 40); + fwrite("hdrl", 4, 1, f); + + fwrite("avih", 4, 1, f); + write_u32_le(f, 56); + write_u32_le(f, 1000000 / fps); + write_u32_le(f, 0); + write_u32_le(f, 0); + write_u32_le(f, 0x110); + write_u32_le(f, num_images); + write_u32_le(f, 0); + write_u32_le(f, 1); + write_u32_le(f, width * height * 3); + write_u32_le(f, width); + write_u32_le(f, height); + write_u32_le(f, 0); + write_u32_le(f, 0); + write_u32_le(f, 0); + write_u32_le(f, 0); + + fwrite("LIST", 4, 1, f); + write_u32_le(f, 4 + 8 + 56 + 8 + 40); + fwrite("strl", 4, 1, f); + + fwrite("strh", 4, 1, f); + write_u32_le(f, 56); + fwrite("vids", 4, 1, f); + fwrite("MJPG", 4, 1, f); + write_u32_le(f, 0); + write_u16_le(f, 0); + write_u16_le(f, 0); + write_u32_le(f, 0); + write_u32_le(f, 1); + write_u32_le(f, fps); + write_u32_le(f, 0); + write_u32_le(f, num_images); + write_u32_le(f, width * height * 3); + write_u32_le(f, (uint32_t)-1); + write_u32_le(f, 0); + write_u16_le(f, 0); + write_u16_le(f, 0); + write_u16_le(f, 0); + write_u16_le(f, 0); + + fwrite("strf", 4, 1, f); + write_u32_le(f, 40); + write_u32_le(f, 40); + write_u32_le(f, width); + write_u32_le(f, height); + write_u16_le(f, 1); + write_u16_le(f, 24); + fwrite("MJPG", 4, 1, f); + write_u32_le(f, width * height * 3); + write_u32_le(f, 0); + write_u32_le(f, 0); + write_u32_le(f, 0); + write_u32_le(f, 0); + + fwrite("LIST", 4, 1, f); + long movi_size_pos = ftell(f); + write_u32_le(f, 0); + fwrite("movi", 4, 1, f); + + avi_index_entry* index = (avi_index_entry*)malloc(sizeof(avi_index_entry) * num_images); + if (!index) { + fclose(f); + return -1; + } + + struct { + uint8_t* buf; + size_t size; + } jpeg_data; + + for (int i = 0; i < num_images; i++) { + jpeg_data.buf = nullptr; + jpeg_data.size = 0; + + auto write_to_buf = [](void* context, void* data, int size) { + auto jd = (decltype(jpeg_data)*)context; + jd->buf = (uint8_t*)realloc(jd->buf, jd->size + size); + memcpy(jd->buf + jd->size, data, size); + jd->size += size; + }; + + stbi_write_jpg_to_func(write_to_buf, &jpeg_data, images[i].width, images[i].height, channels, images[i].data, quality); + + fwrite("00dc", 4, 1, f); + write_u32_le(f, (uint32_t)jpeg_data.size); + index[i].offset = ftell(f) - 8; + index[i].size = (uint32_t)jpeg_data.size; + fwrite(jpeg_data.buf, 1, jpeg_data.size, f); + + if (jpeg_data.size % 2) { + fputc(0, f); + } + + free(jpeg_data.buf); + } + + long cur_pos = ftell(f); + long movi_size = cur_pos - movi_size_pos - 4; + fseek(f, movi_size_pos, SEEK_SET); + write_u32_le(f, movi_size); + fseek(f, cur_pos, SEEK_SET); + + fwrite("idx1", 4, 1, f); + write_u32_le(f, num_images * 16); + for (int i = 0; i < num_images; i++) { + fwrite("00dc", 4, 1, f); + write_u32_le(f, 0x10); + write_u32_le(f, index[i].offset); + write_u32_le(f, index[i].size); + } + + cur_pos = ftell(f); + long file_size = cur_pos - riff_size_pos - 4; + fseek(f, riff_size_pos, SEEK_SET); + write_u32_le(f, file_size); + fseek(f, cur_pos, SEEK_SET); + + fclose(f); + free(index); + + return 0; +} + +#ifdef SD_USE_WEBP +int create_animated_webp_from_sd_images(const char* filename, sd_image_t* images, int num_images, int fps, int quality) { + if (num_images == 0) { + fprintf(stderr, "Error: Image array is empty.\n"); + return -1; + } + if (fps <= 0) { + fprintf(stderr, "Error: FPS must be positive.\n"); + return -1; + } + + const int width = static_cast(images[0].width); + const int height = static_cast(images[0].height); + const int channels = static_cast(images[0].channel); + if (channels != 1 && channels != 3 && channels != 4) { + fprintf(stderr, "Error: Unsupported channel count: %d\n", channels); + return -1; + } + + WebPAnimEncoderOptions anim_options; + WebPConfig config; + if (!WebPAnimEncoderOptionsInit(&anim_options) || !WebPConfigInit(&config)) { + fprintf(stderr, "Error: Failed to initialize WebP animation encoder.\n"); + return -1; + } + + config.quality = static_cast(quality); + config.method = 4; + config.thread_level = 1; + if (channels == 4) { + config.exact = 1; + } + if (!WebPValidateConfig(&config)) { + fprintf(stderr, "Error: Invalid WebP encoder configuration.\n"); + return -1; + } + + WebPAnimEncoder* enc = WebPAnimEncoderNew(width, height, &anim_options); + if (enc == nullptr) { + fprintf(stderr, "Error: Could not create WebPAnimEncoder object.\n"); + return -1; + } + + const int frame_duration_ms = std::max(1, static_cast(std::lround(1000.0 / static_cast(fps)))); + int timestamp_ms = 0; + int ret = -1; + + for (int i = 0; i < num_images; ++i) { + const sd_image_t& image = images[i]; + if (static_cast(image.width) != width || static_cast(image.height) != height) { + fprintf(stderr, "Error: Frame dimensions do not match.\n"); + goto cleanup; + } + + WebPPicture picture; + if (!WebPPictureInit(&picture)) { + fprintf(stderr, "Error: Failed to initialize WebPPicture.\n"); + goto cleanup; + } + picture.use_argb = 1; + picture.width = width; + picture.height = height; + + bool picture_ok = false; + std::vector rgb_buffer; + if (image.channel == 1) { + rgb_buffer.resize(static_cast(width) * static_cast(height) * 3); + for (int p = 0; p < width * height; ++p) { + rgb_buffer[p * 3 + 0] = image.data[p]; + rgb_buffer[p * 3 + 1] = image.data[p]; + rgb_buffer[p * 3 + 2] = image.data[p]; + } + picture_ok = WebPPictureImportRGB(&picture, rgb_buffer.data(), width * 3) != 0; + } else if (image.channel == 4) { + picture_ok = WebPPictureImportRGBA(&picture, image.data, width * 4) != 0; + } else { + picture_ok = WebPPictureImportRGB(&picture, image.data, width * 3) != 0; + } + + if (!picture_ok) { + fprintf(stderr, "Error: Failed to import frame into WebPPicture.\n"); + WebPPictureFree(&picture); + goto cleanup; + } + + if (!WebPAnimEncoderAdd(enc, &picture, timestamp_ms, &config)) { + fprintf(stderr, "Error: Failed to add frame to animated WebP: %s\n", WebPAnimEncoderGetError(enc)); + WebPPictureFree(&picture); + goto cleanup; + } + + WebPPictureFree(&picture); + timestamp_ms += frame_duration_ms; + } + + if (!WebPAnimEncoderAdd(enc, nullptr, timestamp_ms, nullptr)) { + fprintf(stderr, "Error: Failed to finalize animated WebP frames: %s\n", WebPAnimEncoderGetError(enc)); + goto cleanup; + } + + { + WebPData webp_data; + WebPDataInit(&webp_data); + if (!WebPAnimEncoderAssemble(enc, &webp_data)) { + fprintf(stderr, "Error: Failed to assemble animated WebP: %s\n", WebPAnimEncoderGetError(enc)); + WebPDataClear(&webp_data); + goto cleanup; + } + + FILE* f = fopen(filename, "wb"); + if (!f) { + perror("Error opening file for writing"); + WebPDataClear(&webp_data); + goto cleanup; + } + if (webp_data.size > 0 && fwrite(webp_data.bytes, 1, webp_data.size, f) != webp_data.size) { + fprintf(stderr, "Error: Failed to write animated WebP file.\n"); + fclose(f); + WebPDataClear(&webp_data); + goto cleanup; + } + fclose(f); + WebPDataClear(&webp_data); + } + + ret = 0; + +cleanup: + WebPAnimEncoderDelete(enc); + return ret; +} +#endif + +int create_video_from_sd_images(const char* filename, sd_image_t* images, int num_images, int fps, int quality) { + std::string path = filename ? filename : ""; + auto pos = path.find_last_of('.'); + std::string ext = pos == std::string::npos ? "" : path.substr(pos); + for (char& ch : ext) { + ch = static_cast(tolower(static_cast(ch))); + } + +#ifdef SD_USE_WEBP + if (ext == ".webp") { + return create_animated_webp_from_sd_images(filename, images, num_images, fps, quality); + } +#endif + + return create_mjpg_avi_from_sd_images(filename, images, num_images, fps, quality); +} diff --git a/examples/common/media_io.h b/examples/common/media_io.h new file mode 100644 index 00000000..cb830290 --- /dev/null +++ b/examples/common/media_io.h @@ -0,0 +1,76 @@ +#ifndef __MEDIA_IO_H__ +#define __MEDIA_IO_H__ + +#include +#include +#include + +#include "stable-diffusion.h" + +enum class EncodedImageFormat { + JPEG, + PNG, + WEBP, + UNKNOWN, +}; + +EncodedImageFormat encoded_image_format_from_path(const std::string& path); + +std::vector encode_image_to_vector(EncodedImageFormat format, + const uint8_t* image, + int width, + int height, + int channels, + const std::string& parameters = "", + int quality = 90); + +bool write_image_to_file(const std::string& path, + const uint8_t* image, + int width, + int height, + int channels, + const std::string& parameters = "", + int quality = 90); + +uint8_t* load_image_from_file(const char* image_path, + int& width, + int& height, + int expected_width = 0, + int expected_height = 0, + int expected_channel = 3); + +bool load_sd_image_from_file(sd_image_t* image, + const char* image_path, + int expected_width = 0, + int expected_height = 0, + int expected_channel = 3); + +uint8_t* load_image_from_memory(const char* image_bytes, + int len, + int& width, + int& height, + int expected_width = 0, + int expected_height = 0, + int expected_channel = 3); + +int create_mjpg_avi_from_sd_images(const char* filename, + sd_image_t* images, + int num_images, + int fps, + int quality = 90); + +#ifdef SD_USE_WEBP +int create_animated_webp_from_sd_images(const char* filename, + sd_image_t* images, + int num_images, + int fps, + int quality = 90); +#endif + +int create_video_from_sd_images(const char* filename, + sd_image_t* images, + int num_images, + int fps, + int quality = 90); + +#endif // __MEDIA_IO_H__ diff --git a/examples/server/CMakeLists.txt b/examples/server/CMakeLists.txt index f2568d72..bf2b252b 100644 --- a/examples/server/CMakeLists.txt +++ b/examples/server/CMakeLists.txt @@ -56,7 +56,11 @@ else() message(STATUS "Frontend disabled or directory not found: ${FRONTEND_DIR}") endif() -add_executable(${TARGET} main.cpp) +add_executable(${TARGET} + ../common/log.cpp + ../common/media_io.cpp + main.cpp +) if(HAVE_FRONTEND_BUILD) add_dependencies(${TARGET} ${TARGET}_frontend) @@ -70,10 +74,13 @@ endif() install(TARGETS ${TARGET} RUNTIME) target_link_libraries(${TARGET} PRIVATE stable-diffusion ${CMAKE_THREAD_LIBS_INIT}) +if(SD_WEBP) + target_link_libraries(${TARGET} PRIVATE webp libwebpmux) +endif() # due to httplib; it contains a pragma for MSVC, but other things need explicit flags if(WIN32 AND NOT MSVC) target_link_libraries(${TARGET} PRIVATE ws2_32) endif() -target_compile_features(${TARGET} PUBLIC c_std_11 cxx_std_17) \ No newline at end of file +target_compile_features(${TARGET} PUBLIC c_std_11 cxx_std_17) diff --git a/examples/server/main.cpp b/examples/server/main.cpp index 6a503697..8d4e644b 100644 --- a/examples/server/main.cpp +++ b/examples/server/main.cpp @@ -12,6 +12,7 @@ #include "stable-diffusion.h" #include "common/common.hpp" +#include "common/media_io.h" #ifdef HAVE_INDEX_HTML #include "frontend/dist/gen_index_html.h" @@ -217,62 +218,6 @@ std::string extract_and_remove_sd_cpp_extra_args(std::string& text) { return extracted; } -enum class ImageFormat { JPEG, - PNG }; - -static int stbi_ext_write_png_to_func(stbi_write_func* func, void* context, int x, int y, int comp, const void* data, int stride_bytes, const char* parameters) { - int len; - unsigned char* png = stbi_write_png_to_mem((const unsigned char*)data, stride_bytes, x, y, comp, &len, parameters); - if (png == NULL) - return 0; - func(context, png, len); - STBIW_FREE(png); - return 1; -} - -std::vector write_image_to_vector( - ImageFormat format, - const uint8_t* image, - int width, - int height, - int channels, - std::string params = "", - int quality = 90) { - std::vector buffer; - - auto write_func = [&buffer](void* context, void* data, int size) { - uint8_t* src = reinterpret_cast(data); - buffer.insert(buffer.end(), src, src + size); - }; - - struct ContextWrapper { - decltype(write_func)& func; - } ctx{write_func}; - - auto c_func = [](void* context, void* data, int size) { - auto* wrapper = reinterpret_cast(context); - wrapper->func(context, data, size); - }; - - int result = 0; - switch (format) { - case ImageFormat::JPEG: - result = stbi_write_jpg_to_func(c_func, &ctx, width, height, channels, image, quality); - break; - case ImageFormat::PNG: - result = stbi_ext_write_png_to_func(c_func, &ctx, width, height, channels, image, width * channels, params.size() > 0 ? params.c_str() : nullptr); - break; - default: - throw std::runtime_error("invalid image format"); - } - - if (!result) { - throw std::runtime_error("write imgage to mem failed"); - } - - return buffer; -} - void sd_log_cb(enum sd_log_level_t level, const char* log, void* data) { SDSvrParams* svr_params = (SDSvrParams*)data; log_print(level, log, svr_params->verbose, svr_params->color); @@ -345,7 +290,7 @@ void free_results(sd_image_t* result_images, int num_results) { if (result_images) { for (int i = 0; i < num_results; ++i) { if (result_images[i].data) { - stbi_image_free(result_images[i].data); + free(result_images[i].data); result_images[i].data = nullptr; } } @@ -416,9 +361,9 @@ void register_openai_api_endpoints(httplib::Server& svr, ServerRuntime& rt) { std::string sd_cpp_extra_args_str = extract_and_remove_sd_cpp_extra_args(prompt); - if (output_format != "png" && output_format != "jpeg") { + if (output_format != "png" && output_format != "jpeg" && output_format != "webp") { res.status = 400; - res.set_content(R"({"error":"invalid output_format, must be one of [png, jpeg]"})", "application/json"); + res.set_content(R"({"error":"invalid output_format, must be one of [png, jpeg, webp]"})", "application/json"); return; } if (n <= 0) @@ -511,13 +456,17 @@ void register_openai_api_endpoints(httplib::Server& svr, ServerRuntime& rt) { std::string params = gen_params.embed_image_metadata ? get_image_params(*runtime->ctx_params, gen_params, gen_params.seed + i) : ""; - auto image_bytes = write_image_to_vector(output_format == "jpeg" ? ImageFormat::JPEG : ImageFormat::PNG, - results[i].data, - results[i].width, - results[i].height, - results[i].channel, - params, - output_compression); + auto image_bytes = encode_image_to_vector(output_format == "jpeg" + ? EncodedImageFormat::JPEG + : output_format == "webp" + ? EncodedImageFormat::WEBP + : EncodedImageFormat::PNG, + results[i].data, + results[i].width, + results[i].height, + results[i].channel, + params, + output_compression); if (image_bytes.empty()) { LOG_ERROR("write image to mem failed"); continue; @@ -765,13 +714,17 @@ void register_openai_api_endpoints(httplib::Server& svr, ServerRuntime& rt) { std::string params = gen_params.embed_image_metadata ? get_image_params(*runtime->ctx_params, gen_params, gen_params.seed + i) : ""; - auto image_bytes = write_image_to_vector(output_format == "jpeg" ? ImageFormat::JPEG : ImageFormat::PNG, - results[i].data, - results[i].width, - results[i].height, - results[i].channel, - params, - output_compression); + auto image_bytes = encode_image_to_vector(output_format == "jpeg" + ? EncodedImageFormat::JPEG + : output_format == "webp" + ? EncodedImageFormat::WEBP + : EncodedImageFormat::PNG, + results[i].data, + results[i].width, + results[i].height, + results[i].channel, + params, + output_compression); std::string b64 = base64_encode(image_bytes); json item; item["b64_json"] = b64; @@ -783,13 +736,13 @@ void register_openai_api_endpoints(httplib::Server& svr, ServerRuntime& rt) { res.status = 200; if (init_image.data) { - stbi_image_free(init_image.data); + free(init_image.data); } if (mask_image.data) { - stbi_image_free(mask_image.data); + free(mask_image.data); } for (auto ref_image : ref_images) { - stbi_image_free(ref_image.data); + free(ref_image.data); } } catch (const std::exception& e) { res.status = 500; @@ -1084,12 +1037,12 @@ void register_sdapi_endpoints(httplib::Server& svr, ServerRuntime& rt) { std::string params = gen_params.embed_image_metadata ? get_image_params(*runtime->ctx_params, gen_params, gen_params.seed + i) : ""; - auto image_bytes = write_image_to_vector(ImageFormat::PNG, - results[i].data, - results[i].width, - results[i].height, - results[i].channel, - params); + auto image_bytes = encode_image_to_vector(EncodedImageFormat::PNG, + results[i].data, + results[i].width, + results[i].height, + results[i].channel, + params); if (image_bytes.empty()) { LOG_ERROR("write image to mem failed"); @@ -1105,13 +1058,13 @@ void register_sdapi_endpoints(httplib::Server& svr, ServerRuntime& rt) { res.status = 200; if (init_image.data) { - stbi_image_free(init_image.data); + free(init_image.data); } if (mask_image.data && mask_data.empty()) { - stbi_image_free(mask_image.data); + free(mask_image.data); } for (auto ref_image : ref_images) { - stbi_image_free(ref_image.data); + free(ref_image.data); } } catch (const std::exception& e) { diff --git a/thirdparty/CMakeLists.txt b/thirdparty/CMakeLists.txt index 77274c33..a1717850 100644 --- a/thirdparty/CMakeLists.txt +++ b/thirdparty/CMakeLists.txt @@ -1,3 +1,20 @@ set(Z_TARGET zip) add_library(${Z_TARGET} OBJECT zip.c zip.h miniz.h) -target_include_directories(${Z_TARGET} PUBLIC .) \ No newline at end of file +target_include_directories(${Z_TARGET} PUBLIC .) + +if(SD_WEBP) + set(WEBP_BUILD_ANIM_UTILS OFF) + set(WEBP_BUILD_CWEBP OFF) + set(WEBP_BUILD_DWEBP OFF) + set(WEBP_BUILD_GIF2WEBP OFF) + set(WEBP_BUILD_IMG2WEBP OFF) + set(WEBP_BUILD_VWEBP OFF) + set(WEBP_BUILD_WEBPINFO OFF) + set(WEBP_BUILD_WEBPMUX OFF) + set(WEBP_BUILD_EXTRAS OFF) + set(WEBP_BUILD_WEBP_JS OFF) + set(WEBP_BUILD_FUZZTEST OFF) + set(WEBP_BUILD_LIBWEBPMUX ON) + + add_subdirectory(libwebp EXCLUDE_FROM_ALL) +endif() diff --git a/thirdparty/libwebp b/thirdparty/libwebp new file mode 160000 index 00000000..0c9546f7 --- /dev/null +++ b/thirdparty/libwebp @@ -0,0 +1 @@ +Subproject commit 0c9546f7efc61eac7f79ae115c3f99c91c21c443