mirror of
https://github.com/leejet/stable-diffusion.cpp.git
synced 2026-05-09 00:38:55 +00:00
Compare commits
No commits in common. "7397ddaa86f4e8837d5261724678cde0f36d4d89" and "687a81f251c8317988cc0655576622d6b4a14bac" have entirely different histories.
7397ddaa86
...
687a81f251
1
.github/workflows/build.yml
vendored
1
.github/workflows/build.yml
vendored
@ -239,7 +239,6 @@ jobs:
|
|||||||
id: build-push
|
id: build-push
|
||||||
uses: docker/build-push-action@v6
|
uses: docker/build-push-action@v6
|
||||||
with:
|
with:
|
||||||
context: .
|
|
||||||
platforms: linux/amd64
|
platforms: linux/amd64
|
||||||
push: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
|
push: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
|
||||||
file: Dockerfile.${{ matrix.variant }}
|
file: Dockerfile.${{ matrix.variant }}
|
||||||
|
|||||||
3
.gitmodules
vendored
3
.gitmodules
vendored
@ -7,6 +7,3 @@
|
|||||||
[submodule "thirdparty/libwebp"]
|
[submodule "thirdparty/libwebp"]
|
||||||
path = thirdparty/libwebp
|
path = thirdparty/libwebp
|
||||||
url = https://github.com/webmproject/libwebp.git
|
url = https://github.com/webmproject/libwebp.git
|
||||||
[submodule "thirdparty/libwebm"]
|
|
||||||
path = thirdparty/libwebm
|
|
||||||
url = https://github.com/webmproject/libwebm.git
|
|
||||||
|
|||||||
@ -32,16 +32,6 @@ else()
|
|||||||
set(SD_WEBP_DEFAULT ${SD_USE_SYSTEM_WEBP})
|
set(SD_WEBP_DEFAULT ${SD_USE_SYSTEM_WEBP})
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
set(SD_SUBMODULE_WEBM FALSE)
|
|
||||||
if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/thirdparty/libwebm/CMakeLists.txt")
|
|
||||||
set(SD_SUBMODULE_WEBM TRUE)
|
|
||||||
endif()
|
|
||||||
if(SD_SUBMODULE_WEBM)
|
|
||||||
set(SD_WEBM_DEFAULT ON)
|
|
||||||
else()
|
|
||||||
set(SD_WEBM_DEFAULT ${SD_USE_SYSTEM_WEBM})
|
|
||||||
endif()
|
|
||||||
|
|
||||||
#
|
#
|
||||||
# Option list
|
# Option list
|
||||||
#
|
#
|
||||||
@ -51,8 +41,6 @@ endif()
|
|||||||
option(SD_BUILD_EXAMPLES "sd: build examples" ${SD_STANDALONE})
|
option(SD_BUILD_EXAMPLES "sd: build examples" ${SD_STANDALONE})
|
||||||
option(SD_WEBP "sd: enable WebP image I/O support" ${SD_WEBP_DEFAULT})
|
option(SD_WEBP "sd: enable WebP image I/O support" ${SD_WEBP_DEFAULT})
|
||||||
option(SD_USE_SYSTEM_WEBP "sd: link against system libwebp" OFF)
|
option(SD_USE_SYSTEM_WEBP "sd: link against system libwebp" OFF)
|
||||||
option(SD_WEBM "sd: enable WebM video output support" ${SD_WEBM_DEFAULT})
|
|
||||||
option(SD_USE_SYSTEM_WEBM "sd: link against system libwebm" OFF)
|
|
||||||
option(SD_CUDA "sd: cuda backend" OFF)
|
option(SD_CUDA "sd: cuda backend" OFF)
|
||||||
option(SD_HIPBLAS "sd: rocm backend" OFF)
|
option(SD_HIPBLAS "sd: rocm backend" OFF)
|
||||||
option(SD_METAL "sd: metal backend" OFF)
|
option(SD_METAL "sd: metal backend" OFF)
|
||||||
@ -123,31 +111,7 @@ if(SD_WEBP)
|
|||||||
)
|
)
|
||||||
endif()
|
endif()
|
||||||
endif()
|
endif()
|
||||||
endif()
|
add_compile_definitions(SD_USE_WEBP)
|
||||||
|
|
||||||
if(SD_WEBM)
|
|
||||||
if(NOT SD_WEBP)
|
|
||||||
message(FATAL_ERROR "SD_WEBM requires SD_WEBP because WebM output reuses libwebp VP8 encoding.")
|
|
||||||
endif()
|
|
||||||
if(NOT SD_SUBMODULE_WEBM AND NOT SD_USE_SYSTEM_WEBM)
|
|
||||||
message(FATAL_ERROR "WebM support enabled but no source found.
|
|
||||||
Either initialize the submodule:\n git submodule update --init thirdparty/libwebm\n\n"
|
|
||||||
"Or link against system library:\n cmake (...) -DSD_USE_SYSTEM_WEBM=ON")
|
|
||||||
endif()
|
|
||||||
if(SD_USE_SYSTEM_WEBM)
|
|
||||||
find_path(WEBM_INCLUDE_DIR
|
|
||||||
NAMES mkvmuxer/mkvmuxer.h mkvparser/mkvparser.h common/webmids.h
|
|
||||||
PATH_SUFFIXES webm
|
|
||||||
REQUIRED)
|
|
||||||
find_library(WEBM_LIBRARY
|
|
||||||
NAMES webm libwebm
|
|
||||||
REQUIRED)
|
|
||||||
|
|
||||||
add_library(webm UNKNOWN IMPORTED)
|
|
||||||
set_target_properties(webm PROPERTIES
|
|
||||||
IMPORTED_LOCATION "${WEBM_LIBRARY}"
|
|
||||||
INTERFACE_INCLUDE_DIRECTORIES "${WEBM_INCLUDE_DIR}")
|
|
||||||
endif()
|
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
set(SD_LIB stable-diffusion)
|
set(SD_LIB stable-diffusion)
|
||||||
|
|||||||
@ -16,23 +16,15 @@ git submodule init
|
|||||||
git submodule update
|
git submodule update
|
||||||
```
|
```
|
||||||
|
|
||||||
## WebP and WebM Support in Examples
|
## WebP Support in Examples
|
||||||
|
|
||||||
The example applications (`examples/cli` and `examples/server`) use `libwebp` to support WebP image I/O, and `examples/cli` can also use `libwebm` for `.webm` video output. Both are enabled by default. WebM output currently reuses `libwebp` to encode each frame as VP8 before muxing with `libwebm`.
|
The example applications (`examples/cli` and `examples/server`) use `libwebp` to support WebP image I/O. This is enabled by default.
|
||||||
|
|
||||||
If you do not want WebP/WebM support, you can disable them at configure time:
|
If you do not want WebP support, you can disable it at configure time:
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
mkdir build && cd build
|
mkdir build && cd build
|
||||||
cmake .. -DSD_WEBP=OFF -DSD_WEBM=OFF
|
cmake .. -DSD_WEBP=OFF
|
||||||
cmake --build . --config Release
|
|
||||||
```
|
|
||||||
|
|
||||||
If the submodules are not available, you can also link against system packages instead:
|
|
||||||
|
|
||||||
```shell
|
|
||||||
mkdir build && cd build
|
|
||||||
cmake .. -DSD_USE_SYSTEM_WEBP=ON -DSD_USE_SYSTEM_WEBM=ON
|
|
||||||
cmake --build . --config Release
|
cmake --build . --config Release
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|||||||
@ -9,11 +9,6 @@ add_executable(${TARGET}
|
|||||||
install(TARGETS ${TARGET} RUNTIME)
|
install(TARGETS ${TARGET} RUNTIME)
|
||||||
target_link_libraries(${TARGET} PRIVATE stable-diffusion zip ${CMAKE_THREAD_LIBS_INIT})
|
target_link_libraries(${TARGET} PRIVATE stable-diffusion zip ${CMAKE_THREAD_LIBS_INIT})
|
||||||
if(SD_WEBP)
|
if(SD_WEBP)
|
||||||
target_compile_definitions(${TARGET} PRIVATE SD_USE_WEBP)
|
|
||||||
target_link_libraries(${TARGET} PRIVATE webp libwebpmux)
|
target_link_libraries(${TARGET} PRIVATE webp libwebpmux)
|
||||||
endif()
|
endif()
|
||||||
if(SD_WEBM)
|
|
||||||
target_compile_definitions(${TARGET} PRIVATE SD_USE_WEBM)
|
|
||||||
target_link_libraries(${TARGET} PRIVATE webm)
|
|
||||||
endif()
|
|
||||||
target_compile_features(${TARGET} PUBLIC c_std_11 cxx_std_17)
|
target_compile_features(${TARGET} PUBLIC c_std_11 cxx_std_17)
|
||||||
|
|||||||
@ -5,8 +5,8 @@ usage: ./bin/sd-cli [options]
|
|||||||
|
|
||||||
CLI Options:
|
CLI Options:
|
||||||
-o, --output <string> path to write result image to. you can use printf-style %d format specifiers for image sequences (default:
|
-o, --output <string> path to write result image to. you can use printf-style %d format specifiers for image sequences (default:
|
||||||
./output.png) (eg. output_%03d.png). For video generation, single-file outputs support .avi, .webm, and animated .webp
|
./output.png) (eg. output_%03d.png). For video generation, single-file outputs support .avi and animated .webp
|
||||||
--preview-path <string> path to write preview image to (default: ./preview.png). Multi-frame previews support .avi, .webm, and animated .webp
|
--preview-path <string> path to write preview image to (default: ./preview.png). Multi-frame previews support .avi and animated .webp
|
||||||
--preview-interval <int> interval in denoising steps between consecutive updates of the image preview file (default is 1, meaning updating at
|
--preview-interval <int> interval in denoising steps between consecutive updates of the image preview file (default is 1, meaning updating at
|
||||||
every step)
|
every step)
|
||||||
--output-begin-idx <int> starting index for output image sequence, must be non-negative (default 0 if specified %d in output path, 1 otherwise)
|
--output-begin-idx <int> starting index for output image sequence, must be non-negative (default 0 if specified %d in output path, 1 otherwise)
|
||||||
|
|||||||
@ -58,7 +58,7 @@ struct SDCliParams {
|
|||||||
options.string_options = {
|
options.string_options = {
|
||||||
{"-o",
|
{"-o",
|
||||||
"--output",
|
"--output",
|
||||||
"path to write result image to. you can use printf-style %d format specifiers for image sequences (default: ./output.png) (eg. output_%03d.png). Single-file video outputs support .avi, .webm, and animated .webp",
|
"path to write result image to. you can use printf-style %d format specifiers for image sequences (default: ./output.png) (eg. output_%03d.png)",
|
||||||
&output_path},
|
&output_path},
|
||||||
{"",
|
{"",
|
||||||
"--image",
|
"--image",
|
||||||
@ -70,7 +70,7 @@ struct SDCliParams {
|
|||||||
&metadata_format},
|
&metadata_format},
|
||||||
{"",
|
{"",
|
||||||
"--preview-path",
|
"--preview-path",
|
||||||
"path to write preview image to (default: ./preview.png). Multi-frame previews support .avi, .webm, and animated .webp",
|
"path to write preview image to (default: ./preview.png)",
|
||||||
&preview_path},
|
&preview_path},
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -396,9 +396,7 @@ bool save_results(const SDCliParams& cli_params,
|
|||||||
if (!ext.empty()) {
|
if (!ext.empty()) {
|
||||||
if (output_format == EncodedImageFormat::JPEG ||
|
if (output_format == EncodedImageFormat::JPEG ||
|
||||||
output_format == EncodedImageFormat::PNG ||
|
output_format == EncodedImageFormat::PNG ||
|
||||||
output_format == EncodedImageFormat::WEBP ||
|
output_format == EncodedImageFormat::WEBP) {
|
||||||
ext_lower == ".avi" ||
|
|
||||||
ext_lower == ".webm") {
|
|
||||||
base_path.replace_extension();
|
base_path.replace_extension();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -440,7 +438,7 @@ bool save_results(const SDCliParams& cli_params,
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (cli_params.mode == VID_GEN && num_results > 1) {
|
if (cli_params.mode == VID_GEN && num_results > 1) {
|
||||||
if (ext_lower != ".avi" && ext_lower != ".webp" && ext_lower != ".webm")
|
if (ext_lower != ".avi" && ext_lower != ".webp")
|
||||||
ext = ".avi";
|
ext = ".avi";
|
||||||
fs::path video_path = base_path;
|
fs::path video_path = base_path;
|
||||||
video_path += ext;
|
video_path += ext;
|
||||||
|
|||||||
@ -30,11 +30,6 @@
|
|||||||
#include "webp/mux.h"
|
#include "webp/mux.h"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef SD_USE_WEBM
|
|
||||||
#include "mkvmuxer/mkvmuxer.h"
|
|
||||||
#include "mkvmuxer/mkvwriter.h"
|
|
||||||
#endif
|
|
||||||
|
|
||||||
namespace fs = std::filesystem;
|
namespace fs = std::filesystem;
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
@ -76,13 +71,6 @@ bool write_binary_file_bytes(const std::string& path, const std::vector<uint8_t>
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
uint32_t read_u32_le_bytes(const uint8_t* data) {
|
|
||||||
return static_cast<uint32_t>(data[0]) |
|
|
||||||
(static_cast<uint32_t>(data[1]) << 8) |
|
|
||||||
(static_cast<uint32_t>(data[2]) << 16) |
|
|
||||||
(static_cast<uint32_t>(data[3]) << 24);
|
|
||||||
}
|
|
||||||
|
|
||||||
int stbi_ext_write_png_to_func(stbi_write_func* func,
|
int stbi_ext_write_png_to_func(stbi_write_func* func,
|
||||||
void* context,
|
void* context,
|
||||||
int x,
|
int x,
|
||||||
@ -301,76 +289,6 @@ bool encode_webp_image_to_vector(const uint8_t* image,
|
|||||||
WebPMuxDelete(mux);
|
WebPMuxDelete(mux);
|
||||||
return ok;
|
return ok;
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef SD_USE_WEBM
|
|
||||||
bool extract_vp8_frame_from_webp(const std::vector<uint8_t>& webp_data, std::vector<uint8_t>& vp8_frame) {
|
|
||||||
if (!is_webp_signature(webp_data.data(), webp_data.size())) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t offset = 12;
|
|
||||||
while (offset + 8 <= webp_data.size()) {
|
|
||||||
const uint8_t* chunk = webp_data.data() + offset;
|
|
||||||
const uint32_t chunk_len = read_u32_le_bytes(chunk + 4);
|
|
||||||
const size_t chunk_start = offset + 8;
|
|
||||||
const size_t padded_len = static_cast<size_t>(chunk_len) + (chunk_len & 1u);
|
|
||||||
|
|
||||||
if (chunk_start + chunk_len > webp_data.size()) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (memcmp(chunk, "VP8 ", 4) == 0) {
|
|
||||||
vp8_frame.assign(webp_data.data() + chunk_start,
|
|
||||||
webp_data.data() + chunk_start + chunk_len);
|
|
||||||
return !vp8_frame.empty();
|
|
||||||
}
|
|
||||||
|
|
||||||
offset = chunk_start + padded_len;
|
|
||||||
}
|
|
||||||
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool encode_sd_image_to_vp8_frame(const sd_image_t& image, int quality, std::vector<uint8_t>& vp8_frame) {
|
|
||||||
if (image.data == nullptr || image.width == 0 || image.height == 0) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
const int width = static_cast<int>(image.width);
|
|
||||||
const int height = static_cast<int>(image.height);
|
|
||||||
const int input_channel = static_cast<int>(image.channel);
|
|
||||||
if (input_channel != 1 && input_channel != 3 && input_channel != 4) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
std::vector<uint8_t> rgb_buffer;
|
|
||||||
const uint8_t* rgb_data = image.data;
|
|
||||||
if (input_channel == 1) {
|
|
||||||
rgb_buffer.resize(static_cast<size_t>(width) * static_cast<size_t>(height) * 3);
|
|
||||||
for (int i = 0; i < width * height; ++i) {
|
|
||||||
rgb_buffer[i * 3 + 0] = image.data[i];
|
|
||||||
rgb_buffer[i * 3 + 1] = image.data[i];
|
|
||||||
rgb_buffer[i * 3 + 2] = image.data[i];
|
|
||||||
}
|
|
||||||
rgb_data = rgb_buffer.data();
|
|
||||||
} else if (input_channel == 4) {
|
|
||||||
rgb_buffer.resize(static_cast<size_t>(width) * static_cast<size_t>(height) * 3);
|
|
||||||
for (int i = 0; i < width * height; ++i) {
|
|
||||||
rgb_buffer[i * 3 + 0] = image.data[i * 4 + 0];
|
|
||||||
rgb_buffer[i * 3 + 1] = image.data[i * 4 + 1];
|
|
||||||
rgb_buffer[i * 3 + 2] = image.data[i * 4 + 2];
|
|
||||||
}
|
|
||||||
rgb_data = rgb_buffer.data();
|
|
||||||
}
|
|
||||||
|
|
||||||
std::vector<uint8_t> encoded_webp;
|
|
||||||
if (!encode_webp_image_to_vector(rgb_data, width, height, 3, "", quality, encoded_webp)) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
return extract_vp8_frame_from_webp(encoded_webp, vp8_frame);
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
uint8_t* load_image_common(bool from_memory,
|
uint8_t* load_image_common(bool from_memory,
|
||||||
@ -943,99 +861,6 @@ cleanup:
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef SD_USE_WEBM
|
|
||||||
int create_webm_from_sd_images(const char* filename, sd_image_t* images, int num_images, int fps, int quality) {
|
|
||||||
if (num_images == 0) {
|
|
||||||
fprintf(stderr, "Error: Image array is empty.\n");
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
if (fps <= 0) {
|
|
||||||
fprintf(stderr, "Error: FPS must be positive.\n");
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
const int width = static_cast<int>(images[0].width);
|
|
||||||
const int height = static_cast<int>(images[0].height);
|
|
||||||
if (width <= 0 || height <= 0) {
|
|
||||||
fprintf(stderr, "Error: Invalid frame dimensions.\n");
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
mkvmuxer::MkvWriter writer;
|
|
||||||
if (!writer.Open(filename)) {
|
|
||||||
fprintf(stderr, "Error: Could not open WebM file for writing.\n");
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
const int ret = [&]() -> int {
|
|
||||||
mkvmuxer::Segment segment;
|
|
||||||
if (!segment.Init(&writer)) {
|
|
||||||
fprintf(stderr, "Error: Failed to initialize WebM muxer.\n");
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
segment.set_mode(mkvmuxer::Segment::kFile);
|
|
||||||
segment.OutputCues(true);
|
|
||||||
|
|
||||||
const uint64_t track_number = segment.AddVideoTrack(width, height, 0);
|
|
||||||
if (track_number == 0) {
|
|
||||||
fprintf(stderr, "Error: Failed to add VP8 video track.\n");
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
if (!segment.CuesTrack(track_number)) {
|
|
||||||
fprintf(stderr, "Error: Failed to set WebM cues track.\n");
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
mkvmuxer::VideoTrack* video_track = static_cast<mkvmuxer::VideoTrack*>(segment.GetTrackByNumber(track_number));
|
|
||||||
if (video_track != nullptr) {
|
|
||||||
video_track->set_display_width(static_cast<uint64_t>(width));
|
|
||||||
video_track->set_display_height(static_cast<uint64_t>(height));
|
|
||||||
video_track->set_frame_rate(static_cast<double>(fps));
|
|
||||||
}
|
|
||||||
segment.GetSegmentInfo()->set_writing_app("stable-diffusion.cpp");
|
|
||||||
segment.GetSegmentInfo()->set_muxing_app("stable-diffusion.cpp");
|
|
||||||
|
|
||||||
const uint64_t frame_duration_ns = std::max<uint64_t>(
|
|
||||||
1, static_cast<uint64_t>(std::llround(1000000000.0 / static_cast<double>(fps))));
|
|
||||||
uint64_t timestamp_ns = 0;
|
|
||||||
|
|
||||||
for (int i = 0; i < num_images; ++i) {
|
|
||||||
const sd_image_t& image = images[i];
|
|
||||||
if (static_cast<int>(image.width) != width || static_cast<int>(image.height) != height) {
|
|
||||||
fprintf(stderr, "Error: Frame dimensions do not match.\n");
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
std::vector<uint8_t> vp8_frame;
|
|
||||||
if (!encode_sd_image_to_vp8_frame(image, quality, vp8_frame)) {
|
|
||||||
fprintf(stderr, "Error: Failed to encode frame %d as VP8.\n", i);
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!segment.AddFrame(vp8_frame.data(),
|
|
||||||
static_cast<uint64_t>(vp8_frame.size()),
|
|
||||||
track_number,
|
|
||||||
timestamp_ns,
|
|
||||||
true)) {
|
|
||||||
fprintf(stderr, "Error: Failed to mux frame %d into WebM.\n", i);
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
timestamp_ns += frame_duration_ns;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!segment.Finalize()) {
|
|
||||||
fprintf(stderr, "Error: Failed to finalize WebM output.\n");
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
return 0;
|
|
||||||
}();
|
|
||||||
writer.Close();
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int create_video_from_sd_images(const char* filename, sd_image_t* images, int num_images, int fps, int quality) {
|
int create_video_from_sd_images(const char* filename, sd_image_t* images, int num_images, int fps, int quality) {
|
||||||
std::string path = filename ? filename : "";
|
std::string path = filename ? filename : "";
|
||||||
auto pos = path.find_last_of('.');
|
auto pos = path.find_last_of('.');
|
||||||
@ -1044,12 +869,6 @@ int create_video_from_sd_images(const char* filename, sd_image_t* images, int nu
|
|||||||
ch = static_cast<char>(tolower(static_cast<unsigned char>(ch)));
|
ch = static_cast<char>(tolower(static_cast<unsigned char>(ch)));
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef SD_USE_WEBM
|
|
||||||
if (ext == ".webm") {
|
|
||||||
return create_webm_from_sd_images(filename, images, num_images, fps, quality);
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifdef SD_USE_WEBP
|
#ifdef SD_USE_WEBP
|
||||||
if (ext == ".webp") {
|
if (ext == ".webp") {
|
||||||
return create_animated_webp_from_sd_images(filename, images, num_images, fps, quality);
|
return create_animated_webp_from_sd_images(filename, images, num_images, fps, quality);
|
||||||
|
|||||||
@ -67,14 +67,6 @@ int create_animated_webp_from_sd_images(const char* filename,
|
|||||||
int quality = 90);
|
int quality = 90);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef SD_USE_WEBM
|
|
||||||
int create_webm_from_sd_images(const char* filename,
|
|
||||||
sd_image_t* images,
|
|
||||||
int num_images,
|
|
||||||
int fps,
|
|
||||||
int quality = 90);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int create_video_from_sd_images(const char* filename,
|
int create_video_from_sd_images(const char* filename,
|
||||||
sd_image_t* images,
|
sd_image_t* images,
|
||||||
int num_images,
|
int num_images,
|
||||||
|
|||||||
@ -75,13 +75,8 @@ endif()
|
|||||||
install(TARGETS ${TARGET} RUNTIME)
|
install(TARGETS ${TARGET} RUNTIME)
|
||||||
target_link_libraries(${TARGET} PRIVATE stable-diffusion ${CMAKE_THREAD_LIBS_INIT})
|
target_link_libraries(${TARGET} PRIVATE stable-diffusion ${CMAKE_THREAD_LIBS_INIT})
|
||||||
if(SD_WEBP)
|
if(SD_WEBP)
|
||||||
target_compile_definitions(${TARGET} PRIVATE SD_USE_WEBP)
|
|
||||||
target_link_libraries(${TARGET} PRIVATE webp libwebpmux)
|
target_link_libraries(${TARGET} PRIVATE webp libwebpmux)
|
||||||
endif()
|
endif()
|
||||||
if(SD_WEBM)
|
|
||||||
target_compile_definitions(${TARGET} PRIVATE SD_USE_WEBM)
|
|
||||||
target_link_libraries(${TARGET} PRIVATE webm)
|
|
||||||
endif()
|
|
||||||
|
|
||||||
# due to httplib; it contains a pragma for MSVC, but other things need explicit flags
|
# due to httplib; it contains a pragma for MSVC, but other things need explicit flags
|
||||||
if(WIN32 AND NOT MSVC)
|
if(WIN32 AND NOT MSVC)
|
||||||
|
|||||||
@ -2846,8 +2846,7 @@ static std::optional<ImageGenerationLatents> prepare_image_generation_latents(sd
|
|||||||
{request->width / request->vae_scale_factor,
|
{request->width / request->vae_scale_factor,
|
||||||
request->height / request->vae_scale_factor,
|
request->height / request->vae_scale_factor,
|
||||||
1,
|
1,
|
||||||
1},
|
1});
|
||||||
sd::ops::InterpolateMode::NearestMax);
|
|
||||||
|
|
||||||
sd::Tensor<float> init_latent;
|
sd::Tensor<float> init_latent;
|
||||||
sd::Tensor<float> control_latent;
|
sd::Tensor<float> control_latent;
|
||||||
@ -2992,12 +2991,8 @@ static std::optional<ImageGenerationLatents> prepare_image_generation_latents(sd
|
|||||||
latents.ref_latents = std::move(ref_latents);
|
latents.ref_latents = std::move(ref_latents);
|
||||||
|
|
||||||
if (sd_version_is_inpaint(sd_ctx->sd->version)) {
|
if (sd_version_is_inpaint(sd_ctx->sd->version)) {
|
||||||
latent_mask = sd::ops::max_pool_2d(latent_mask,
|
latents.denoise_mask = std::move(latent_mask);
|
||||||
{3, 3},
|
|
||||||
{1, 1},
|
|
||||||
{1, 1});
|
|
||||||
}
|
}
|
||||||
latents.denoise_mask = std::move(latent_mask);
|
|
||||||
|
|
||||||
return latents;
|
return latents;
|
||||||
}
|
}
|
||||||
|
|||||||
197
src/tensor.hpp
197
src/tensor.hpp
@ -815,9 +815,6 @@ namespace sd {
|
|||||||
namespace ops {
|
namespace ops {
|
||||||
enum class InterpolateMode {
|
enum class InterpolateMode {
|
||||||
Nearest,
|
Nearest,
|
||||||
NearestMax,
|
|
||||||
NearestMin,
|
|
||||||
NearestAvg,
|
|
||||||
};
|
};
|
||||||
|
|
||||||
inline int64_t normalize_slice_bound(int64_t index, int64_t dim_size) {
|
inline int64_t normalize_slice_bound(int64_t index, int64_t dim_size) {
|
||||||
@ -1015,16 +1012,12 @@ namespace sd {
|
|||||||
std::vector<int64_t> output_shape,
|
std::vector<int64_t> output_shape,
|
||||||
InterpolateMode mode = InterpolateMode::Nearest,
|
InterpolateMode mode = InterpolateMode::Nearest,
|
||||||
bool align_corners = false) {
|
bool align_corners = false) {
|
||||||
const bool is_nearest_like_mode = (mode == InterpolateMode::Nearest ||
|
if (mode != InterpolateMode::Nearest) {
|
||||||
mode == InterpolateMode::NearestMax ||
|
tensor_throw_invalid_argument("Only nearest interpolate mode is implemented, got mode=" +
|
||||||
mode == InterpolateMode::NearestMin ||
|
|
||||||
mode == InterpolateMode::NearestAvg);
|
|
||||||
if (!is_nearest_like_mode) {
|
|
||||||
tensor_throw_invalid_argument("Only nearest-like interpolate modes are implemented, got mode=" +
|
|
||||||
std::to_string(static_cast<int>(mode)));
|
std::to_string(static_cast<int>(mode)));
|
||||||
}
|
}
|
||||||
if (align_corners) {
|
if (align_corners) {
|
||||||
tensor_throw_invalid_argument("align_corners is not supported for nearest-like interpolate: input_shape=" +
|
tensor_throw_invalid_argument("align_corners is not supported for nearest interpolate: input_shape=" +
|
||||||
tensor_shape_to_string(input.shape()) + ", output_shape=" +
|
tensor_shape_to_string(input.shape()) + ", output_shape=" +
|
||||||
tensor_shape_to_string(output_shape));
|
tensor_shape_to_string(output_shape));
|
||||||
}
|
}
|
||||||
@ -1051,102 +1044,14 @@ namespace sd {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bool has_downsampling = false;
|
|
||||||
for (int64_t i = 0; i < input.dim(); ++i) {
|
|
||||||
if (input.shape()[i] > output_shape[i]) {
|
|
||||||
has_downsampling = true;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
Tensor<T> output(std::move(output_shape));
|
Tensor<T> output(std::move(output_shape));
|
||||||
if (mode == InterpolateMode::Nearest || !has_downsampling) {
|
for (int64_t flat = 0; flat < output.numel(); ++flat) {
|
||||||
for (int64_t flat = 0; flat < output.numel(); ++flat) {
|
std::vector<int64_t> output_coord = tensor_unravel_index(flat, output.shape());
|
||||||
std::vector<int64_t> output_coord = tensor_unravel_index(flat, output.shape());
|
std::vector<int64_t> input_coord(static_cast<size_t>(input.dim()), 0);
|
||||||
std::vector<int64_t> input_coord(static_cast<size_t>(input.dim()), 0);
|
for (size_t i = 0; i < static_cast<size_t>(input.dim()); ++i) {
|
||||||
for (size_t i = 0; i < static_cast<size_t>(input.dim()); ++i) {
|
input_coord[i] = output_coord[i] * input.shape()[i] / output.shape()[i];
|
||||||
input_coord[i] = output_coord[i] * input.shape()[i] / output.shape()[i];
|
|
||||||
}
|
|
||||||
output[flat] = input.index(input_coord);
|
|
||||||
}
|
}
|
||||||
|
output[flat] = input.index(input_coord);
|
||||||
return output;
|
|
||||||
}
|
|
||||||
|
|
||||||
auto init_reduction = [&]() -> T {
|
|
||||||
switch (mode) {
|
|
||||||
case InterpolateMode::NearestMax:
|
|
||||||
return std::numeric_limits<T>::lowest();
|
|
||||||
case InterpolateMode::NearestMin:
|
|
||||||
return std::numeric_limits<T>::max();
|
|
||||||
case InterpolateMode::NearestAvg:
|
|
||||||
return T(0);
|
|
||||||
case InterpolateMode::Nearest:
|
|
||||||
return T(0);
|
|
||||||
}
|
|
||||||
|
|
||||||
tensor_throw_invalid_argument("Unsupported interpolate mode: mode=" +
|
|
||||||
std::to_string(static_cast<int>(mode)));
|
|
||||||
};
|
|
||||||
|
|
||||||
auto reduce_value = [&](T& acc, const T& sample) {
|
|
||||||
switch (mode) {
|
|
||||||
case InterpolateMode::NearestMax:
|
|
||||||
acc = std::max(acc, sample);
|
|
||||||
break;
|
|
||||||
case InterpolateMode::NearestMin:
|
|
||||||
acc = std::min(acc, sample);
|
|
||||||
break;
|
|
||||||
case InterpolateMode::NearestAvg:
|
|
||||||
acc += sample;
|
|
||||||
break;
|
|
||||||
case InterpolateMode::Nearest:
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
// Reduction modes only differ from nearest mode when downsampling.
|
|
||||||
for (int64_t flat_out = 0; flat_out < output.numel(); ++flat_out) {
|
|
||||||
std::vector<int64_t> output_coord = tensor_unravel_index(flat_out, output.shape());
|
|
||||||
|
|
||||||
std::vector<int64_t> input_start(output.dim(), 0);
|
|
||||||
std::vector<int64_t> input_end(output.dim(), 0);
|
|
||||||
|
|
||||||
for (size_t i = 0; i < static_cast<size_t>(output.dim()); ++i) {
|
|
||||||
const int64_t input_dim = input.shape()[i];
|
|
||||||
const int64_t output_dim = output.shape()[i];
|
|
||||||
|
|
||||||
input_start[i] = std::max(int64_t(0), static_cast<int64_t>(output_coord[i] * input_dim / output_dim));
|
|
||||||
input_end[i] = std::min(input_dim, ((output_coord[i] + 1) * input_dim + output_dim - 1) / output_dim);
|
|
||||||
}
|
|
||||||
|
|
||||||
T value = init_reduction();
|
|
||||||
bool done_window = false;
|
|
||||||
std::vector<int64_t> current_in_coord = input_start;
|
|
||||||
|
|
||||||
while (!done_window) {
|
|
||||||
reduce_value(value, input.index(current_in_coord));
|
|
||||||
|
|
||||||
for (int d = static_cast<int>(output.dim()) - 1; d >= 0; --d) {
|
|
||||||
if (++current_in_coord[d] < input_end[d]) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
current_in_coord[d] = input_start[d];
|
|
||||||
if (d == 0) {
|
|
||||||
done_window = true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (mode == InterpolateMode::NearestAvg) {
|
|
||||||
int64_t window_size = 1;
|
|
||||||
for (size_t i = 0; i < static_cast<size_t>(output.dim()); ++i) {
|
|
||||||
window_size *= (input_end[i] - input_start[i]);
|
|
||||||
}
|
|
||||||
value /= static_cast<T>(window_size);
|
|
||||||
}
|
|
||||||
|
|
||||||
output[flat_out] = value;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return output;
|
return output;
|
||||||
@ -1158,16 +1063,12 @@ namespace sd {
|
|||||||
const std::optional<std::vector<double>>& scale_factor,
|
const std::optional<std::vector<double>>& scale_factor,
|
||||||
InterpolateMode mode = InterpolateMode::Nearest,
|
InterpolateMode mode = InterpolateMode::Nearest,
|
||||||
bool align_corners = false) {
|
bool align_corners = false) {
|
||||||
const bool is_nearest_like_mode = (mode == InterpolateMode::Nearest ||
|
if (mode != InterpolateMode::Nearest) {
|
||||||
mode == InterpolateMode::NearestMax ||
|
tensor_throw_invalid_argument("Only nearest interpolate mode is implemented, got mode=" +
|
||||||
mode == InterpolateMode::NearestMin ||
|
|
||||||
mode == InterpolateMode::NearestAvg);
|
|
||||||
if (!is_nearest_like_mode) {
|
|
||||||
tensor_throw_invalid_argument("Only nearest-like interpolate modes are implemented, got mode=" +
|
|
||||||
std::to_string(static_cast<int>(mode)));
|
std::to_string(static_cast<int>(mode)));
|
||||||
}
|
}
|
||||||
if (align_corners) {
|
if (align_corners) {
|
||||||
tensor_throw_invalid_argument("align_corners is not supported for nearest-like interpolate: input_shape=" +
|
tensor_throw_invalid_argument("align_corners is not supported for nearest interpolate: input_shape=" +
|
||||||
tensor_shape_to_string(input.shape()));
|
tensor_shape_to_string(input.shape()));
|
||||||
}
|
}
|
||||||
if (size.has_value() == scale_factor.has_value()) {
|
if (size.has_value() == scale_factor.has_value()) {
|
||||||
@ -1227,80 +1128,6 @@ namespace sd {
|
|||||||
align_corners);
|
align_corners);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
|
||||||
inline Tensor<T> max_pool_2d(const Tensor<T>& input,
|
|
||||||
std::vector<int64_t> kernel_size,
|
|
||||||
std::vector<int64_t> stride,
|
|
||||||
std::vector<int64_t> padding) {
|
|
||||||
if (input.dim() < 2) {
|
|
||||||
tensor_throw_invalid_argument("Tensor max_pool_2d requires input_dim >= 2: input_dim=" +
|
|
||||||
std::to_string(input.dim()) + ", input_shape=" +
|
|
||||||
tensor_shape_to_string(input.shape()));
|
|
||||||
}
|
|
||||||
if (kernel_size.size() != 2 || stride.size() != 2 || padding.size() != 2) {
|
|
||||||
tensor_throw_invalid_argument("Tensor max_pool_2d requires kernel_size, stride, and padding to have length 2");
|
|
||||||
}
|
|
||||||
for (size_t i = 0; i < 2; ++i) {
|
|
||||||
if (kernel_size[i] <= 0) {
|
|
||||||
tensor_throw_invalid_argument("Tensor max_pool_2d kernel_size must be positive: kernel_size=" +
|
|
||||||
tensor_shape_to_string(kernel_size));
|
|
||||||
}
|
|
||||||
if (stride[i] <= 0) {
|
|
||||||
tensor_throw_invalid_argument("Tensor max_pool_2d stride must be positive: stride=" +
|
|
||||||
tensor_shape_to_string(stride));
|
|
||||||
}
|
|
||||||
if (padding[i] < 0) {
|
|
||||||
tensor_throw_invalid_argument("Tensor max_pool_2d padding must be non-negative: padding=" +
|
|
||||||
tensor_shape_to_string(padding));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
const int64_t in_height = input.shape()[0];
|
|
||||||
const int64_t in_width = input.shape()[1];
|
|
||||||
|
|
||||||
const int64_t out_height = (in_height + 2 * padding[0] - kernel_size[0]) / stride[0] + 1;
|
|
||||||
const int64_t out_width = (in_width + 2 * padding[1] - kernel_size[1]) / stride[1] + 1;
|
|
||||||
|
|
||||||
if (out_height <= 0 || out_width <= 0) {
|
|
||||||
tensor_throw_invalid_argument("max_pool_2d results in invalid output dimensions: " +
|
|
||||||
std::to_string(out_height) + "x" + std::to_string(out_width));
|
|
||||||
}
|
|
||||||
|
|
||||||
std::vector<int64_t> output_shape = input.shape();
|
|
||||||
output_shape[0] = out_height;
|
|
||||||
output_shape[1] = out_width;
|
|
||||||
|
|
||||||
Tensor<T> output(std::move(output_shape));
|
|
||||||
|
|
||||||
for (int64_t flat_out = 0; flat_out < output.numel(); ++flat_out) {
|
|
||||||
std::vector<int64_t> output_coord = tensor_unravel_index(flat_out, output.shape());
|
|
||||||
std::vector<int64_t> input_coord = output_coord;
|
|
||||||
|
|
||||||
const int64_t oh = output_coord[0];
|
|
||||||
const int64_t ow = output_coord[1];
|
|
||||||
|
|
||||||
T max_val = std::numeric_limits<T>::lowest();
|
|
||||||
bool has_valid_input = false;
|
|
||||||
|
|
||||||
for (int64_t kh = 0; kh < kernel_size[0]; ++kh) {
|
|
||||||
for (int64_t kw = 0; kw < kernel_size[1]; ++kw) {
|
|
||||||
const int64_t ih = oh * stride[0] + kh - padding[0];
|
|
||||||
const int64_t iw = ow * stride[1] + kw - padding[1];
|
|
||||||
|
|
||||||
if (ih >= 0 && ih < in_height && iw >= 0 && iw < in_width) {
|
|
||||||
input_coord[0] = ih;
|
|
||||||
input_coord[1] = iw;
|
|
||||||
max_val = std::max(max_val, input.index(input_coord));
|
|
||||||
has_valid_input = true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
output[flat_out] = has_valid_input ? max_val : T(0);
|
|
||||||
}
|
|
||||||
return output;
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
inline Tensor<T> concat(const Tensor<T>& lhs, const Tensor<T>& rhs, size_t dim) {
|
inline Tensor<T> concat(const Tensor<T>& lhs, const Tensor<T>& rhs, size_t dim) {
|
||||||
if (lhs.dim() != rhs.dim()) {
|
if (lhs.dim() != rhs.dim()) {
|
||||||
|
|||||||
25
thirdparty/CMakeLists.txt
vendored
25
thirdparty/CMakeLists.txt
vendored
@ -18,28 +18,3 @@ if(SD_WEBP AND NOT SD_USE_SYSTEM_WEBP)
|
|||||||
|
|
||||||
add_subdirectory(libwebp EXCLUDE_FROM_ALL)
|
add_subdirectory(libwebp EXCLUDE_FROM_ALL)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if(SD_WEBM AND NOT SD_USE_SYSTEM_WEBM)
|
|
||||||
if(MSVC)
|
|
||||||
set(MSVC_RUNTIME dll)
|
|
||||||
endif()
|
|
||||||
set(ENABLE_WEBMTS OFF)
|
|
||||||
set(ENABLE_WEBMINFO OFF)
|
|
||||||
set(ENABLE_TESTS OFF)
|
|
||||||
set(ENABLE_WEBM_PARSER OFF)
|
|
||||||
set(ENABLE_SAMPLE_PROGRAMS OFF)
|
|
||||||
|
|
||||||
set(SD_LIBWEBM_PARENT_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
|
|
||||||
|
|
||||||
add_subdirectory(libwebm EXCLUDE_FROM_ALL)
|
|
||||||
|
|
||||||
# libwebm mutates the global CMAKE_CXX_FLAGS for non-MSVC compilers to force
|
|
||||||
# C++11. Restore the parent flags so the main project keeps its own C++17
|
|
||||||
# requirements, then pin the libwebm targets to C++17 explicitly.
|
|
||||||
set(CMAKE_CXX_FLAGS "${SD_LIBWEBM_PARENT_CXX_FLAGS}" CACHE STRING "" FORCE)
|
|
||||||
target_compile_features(mkvmuxer PRIVATE cxx_std_17)
|
|
||||||
target_compile_features(mkvparser PRIVATE cxx_std_17)
|
|
||||||
target_compile_features(webm PRIVATE cxx_std_17)
|
|
||||||
|
|
||||||
target_include_directories(webm INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}/libwebm)
|
|
||||||
endif()
|
|
||||||
|
|||||||
1
thirdparty/libwebm
vendored
1
thirdparty/libwebm
vendored
@ -1 +0,0 @@
|
|||||||
Subproject commit 5bf12267eea773a32fcf4949de52b0add158a8d5
|
|
||||||
Loading…
x
Reference in New Issue
Block a user