mirror of
https://github.com/leejet/stable-diffusion.cpp.git
synced 2026-05-08 08:18:51 +00:00
Compare commits
6 Commits
a564fdf642
...
7d33d4b2dd
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
7d33d4b2dd | ||
|
|
3c99f700de | ||
|
|
4d626d24b2 | ||
|
|
f3f69e2fbe | ||
|
|
6a9cb31150 | ||
|
|
2bcff67480 |
1
.github/workflows/build.yml
vendored
1
.github/workflows/build.yml
vendored
@ -176,6 +176,7 @@ jobs:
|
|||||||
|
|
||||||
build-and-push-docker-images:
|
build-and-push-docker-images:
|
||||||
name: Build and push container images
|
name: Build and push container images
|
||||||
|
if: ${{ github.event_name != 'pull_request' }}
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
|
|
||||||
permissions:
|
permissions:
|
||||||
|
|||||||
@ -11,6 +11,10 @@ endif()
|
|||||||
if (MSVC)
|
if (MSVC)
|
||||||
add_compile_definitions(_CRT_SECURE_NO_WARNINGS)
|
add_compile_definitions(_CRT_SECURE_NO_WARNINGS)
|
||||||
add_compile_definitions(_SILENCE_CXX17_CODECVT_HEADER_DEPRECATION_WARNING)
|
add_compile_definitions(_SILENCE_CXX17_CODECVT_HEADER_DEPRECATION_WARNING)
|
||||||
|
add_compile_options(
|
||||||
|
$<$<COMPILE_LANGUAGE:C>:/MP>
|
||||||
|
$<$<COMPILE_LANGUAGE:CXX>:/MP>
|
||||||
|
)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
|
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
|
||||||
|
|||||||
@ -1589,10 +1589,18 @@ bool SDGenerationParams::from_json_str(
|
|||||||
LOG_ERROR("invalid init_image");
|
LOG_ERROR("invalid init_image");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
if (!parse_image_json_field(j, "end_image", 3, width, height, end_image)) {
|
||||||
|
LOG_ERROR("invalid end_image");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
if (!parse_image_array_json_field(j, "ref_images", 3, width, height, ref_images)) {
|
if (!parse_image_array_json_field(j, "ref_images", 3, width, height, ref_images)) {
|
||||||
LOG_ERROR("invalid ref_images");
|
LOG_ERROR("invalid ref_images");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
if (!parse_image_array_json_field(j, "control_frames", 3, width, height, control_frames)) {
|
||||||
|
LOG_ERROR("invalid control_frames");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
if (!parse_image_json_field(j, "mask_image", 1, width, height, mask_image)) {
|
if (!parse_image_json_field(j, "mask_image", 1, width, height, mask_image)) {
|
||||||
LOG_ERROR("invalid mask_image");
|
LOG_ERROR("invalid mask_image");
|
||||||
return false;
|
return false;
|
||||||
|
|||||||
@ -95,6 +95,57 @@ using WebPMuxPtr = std::unique_ptr<WebPMux, WebPMuxDeleter>;
|
|||||||
using WebPAnimEncoderPtr = std::unique_ptr<WebPAnimEncoder, WebPAnimEncoderDeleter>;
|
using WebPAnimEncoderPtr = std::unique_ptr<WebPAnimEncoder, WebPAnimEncoderDeleter>;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef SD_USE_WEBM
|
||||||
|
class MemoryMkvWriter : public mkvmuxer::IMkvWriter {
|
||||||
|
public:
|
||||||
|
mkvmuxer::int32 Write(const void* buf, mkvmuxer::uint32 len) override {
|
||||||
|
if (buf == nullptr && len > 0) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
const size_t end_pos = position_ + static_cast<size_t>(len);
|
||||||
|
if (end_pos > data_.size()) {
|
||||||
|
data_.resize(end_pos);
|
||||||
|
}
|
||||||
|
if (len > 0) {
|
||||||
|
memcpy(data_.data() + position_, buf, len);
|
||||||
|
}
|
||||||
|
position_ = end_pos;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
mkvmuxer::int64 Position() const override {
|
||||||
|
return static_cast<mkvmuxer::int64>(position_);
|
||||||
|
}
|
||||||
|
|
||||||
|
mkvmuxer::int32 Position(mkvmuxer::int64 position) override {
|
||||||
|
if (position < 0) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
const size_t target = static_cast<size_t>(position);
|
||||||
|
if (target > data_.size()) {
|
||||||
|
data_.resize(target);
|
||||||
|
}
|
||||||
|
position_ = target;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool Seekable() const override {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
void ElementStartNotify(mkvmuxer::uint64, mkvmuxer::int64) override {
|
||||||
|
}
|
||||||
|
|
||||||
|
const std::vector<uint8_t>& data() const {
|
||||||
|
return data_;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
std::vector<uint8_t> data_;
|
||||||
|
size_t position_ = 0;
|
||||||
|
};
|
||||||
|
#endif
|
||||||
|
|
||||||
bool read_binary_file_bytes(const char* path, std::vector<uint8_t>& data) {
|
bool read_binary_file_bytes(const char* path, std::vector<uint8_t>& data) {
|
||||||
std::ifstream fin(fs::path(path), std::ios::binary);
|
std::ifstream fin(fs::path(path), std::ios::binary);
|
||||||
if (!fin) {
|
if (!fin) {
|
||||||
@ -570,6 +621,32 @@ void write_u16_le(FILE* f, uint16_t val) {
|
|||||||
fwrite(&val, 2, 1, f);
|
fwrite(&val, 2, 1, f);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void write_u32_le(std::vector<uint8_t>& data, uint32_t val) {
|
||||||
|
data.push_back(static_cast<uint8_t>(val & 0xFF));
|
||||||
|
data.push_back(static_cast<uint8_t>((val >> 8) & 0xFF));
|
||||||
|
data.push_back(static_cast<uint8_t>((val >> 16) & 0xFF));
|
||||||
|
data.push_back(static_cast<uint8_t>((val >> 24) & 0xFF));
|
||||||
|
}
|
||||||
|
|
||||||
|
void write_u16_le(std::vector<uint8_t>& data, uint16_t val) {
|
||||||
|
data.push_back(static_cast<uint8_t>(val & 0xFF));
|
||||||
|
data.push_back(static_cast<uint8_t>((val >> 8) & 0xFF));
|
||||||
|
}
|
||||||
|
|
||||||
|
void patch_u32_le(std::vector<uint8_t>& data, size_t offset, uint32_t val) {
|
||||||
|
if (offset + 4 > data.size()) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
data[offset + 0] = static_cast<uint8_t>(val & 0xFF);
|
||||||
|
data[offset + 1] = static_cast<uint8_t>((val >> 8) & 0xFF);
|
||||||
|
data[offset + 2] = static_cast<uint8_t>((val >> 16) & 0xFF);
|
||||||
|
data[offset + 3] = static_cast<uint8_t>((val >> 24) & 0xFF);
|
||||||
|
}
|
||||||
|
|
||||||
|
void write_fourcc(std::vector<uint8_t>& data, const char* fourcc) {
|
||||||
|
data.insert(data.end(), fourcc, fourcc + 4);
|
||||||
|
}
|
||||||
|
|
||||||
EncodedImageFormat encoded_image_format_from_path(const std::string& path) {
|
EncodedImageFormat encoded_image_format_from_path(const std::string& path) {
|
||||||
std::string ext = fs::path(path).extension().string();
|
std::string ext = fs::path(path).extension().string();
|
||||||
std::transform(ext.begin(), ext.end(), ext.begin(), ::tolower);
|
std::transform(ext.begin(), ext.end(), ext.begin(), ::tolower);
|
||||||
@ -699,95 +776,96 @@ uint8_t* load_image_from_memory(const char* image_bytes,
|
|||||||
return load_image_common(true, image_bytes, len, width, height, expected_width, expected_height, expected_channel);
|
return load_image_common(true, image_bytes, len, width, height, expected_width, expected_height, expected_channel);
|
||||||
}
|
}
|
||||||
|
|
||||||
int create_mjpg_avi_from_sd_images(const char* filename, sd_image_t* images, int num_images, int fps, int quality) {
|
std::vector<uint8_t> create_mjpg_avi_from_sd_images_to_vector(sd_image_t* images, int num_images, int fps, int quality) {
|
||||||
if (num_images == 0) {
|
if (num_images == 0) {
|
||||||
fprintf(stderr, "Error: Image array is empty.\n");
|
fprintf(stderr, "Error: Image array is empty.\n");
|
||||||
return -1;
|
return {};
|
||||||
}
|
}
|
||||||
|
|
||||||
FilePtr file(fopen(filename, "wb"));
|
|
||||||
if (!file) {
|
|
||||||
perror("Error opening file for writing");
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
FILE* f = file.get();
|
|
||||||
|
|
||||||
uint32_t width = images[0].width;
|
uint32_t width = images[0].width;
|
||||||
uint32_t height = images[0].height;
|
uint32_t height = images[0].height;
|
||||||
uint32_t channels = images[0].channel;
|
uint32_t channels = images[0].channel;
|
||||||
if (channels != 3 && channels != 4) {
|
if (channels != 3 && channels != 4) {
|
||||||
fprintf(stderr, "Error: Unsupported channel count: %u\n", channels);
|
fprintf(stderr, "Error: Unsupported channel count: %u\n", channels);
|
||||||
return -1;
|
return {};
|
||||||
}
|
}
|
||||||
|
|
||||||
fwrite("RIFF", 4, 1, f);
|
// stb_image_write changes JPEG sampling behavior above quality 90.
|
||||||
long riff_size_pos = ftell(f);
|
// MJPG AVI playback is more compatible when we keep the encoder on the
|
||||||
write_u32_le(f, 0);
|
// <= 90 path.
|
||||||
fwrite("AVI ", 4, 1, f);
|
const int mjpg_quality = std::clamp(quality, 1, 90);
|
||||||
|
|
||||||
fwrite("LIST", 4, 1, f);
|
std::vector<uint8_t> avi_data;
|
||||||
write_u32_le(f, 4 + 8 + 56 + 8 + 4 + 8 + 56 + 8 + 40);
|
avi_data.reserve(static_cast<size_t>(num_images) * 1024);
|
||||||
fwrite("hdrl", 4, 1, f);
|
|
||||||
|
|
||||||
fwrite("avih", 4, 1, f);
|
write_fourcc(avi_data, "RIFF");
|
||||||
write_u32_le(f, 56);
|
const size_t riff_size_pos = avi_data.size();
|
||||||
write_u32_le(f, 1000000 / fps);
|
write_u32_le(avi_data, 0);
|
||||||
write_u32_le(f, 0);
|
write_fourcc(avi_data, "AVI ");
|
||||||
write_u32_le(f, 0);
|
|
||||||
write_u32_le(f, 0x110);
|
|
||||||
write_u32_le(f, num_images);
|
|
||||||
write_u32_le(f, 0);
|
|
||||||
write_u32_le(f, 1);
|
|
||||||
write_u32_le(f, width * height * 3);
|
|
||||||
write_u32_le(f, width);
|
|
||||||
write_u32_le(f, height);
|
|
||||||
write_u32_le(f, 0);
|
|
||||||
write_u32_le(f, 0);
|
|
||||||
write_u32_le(f, 0);
|
|
||||||
write_u32_le(f, 0);
|
|
||||||
|
|
||||||
fwrite("LIST", 4, 1, f);
|
write_fourcc(avi_data, "LIST");
|
||||||
write_u32_le(f, 4 + 8 + 56 + 8 + 40);
|
write_u32_le(avi_data, 4 + 8 + 56 + 8 + 4 + 8 + 56 + 8 + 40);
|
||||||
fwrite("strl", 4, 1, f);
|
write_fourcc(avi_data, "hdrl");
|
||||||
|
|
||||||
fwrite("strh", 4, 1, f);
|
write_fourcc(avi_data, "avih");
|
||||||
write_u32_le(f, 56);
|
write_u32_le(avi_data, 56);
|
||||||
fwrite("vids", 4, 1, f);
|
write_u32_le(avi_data, 1000000 / fps);
|
||||||
fwrite("MJPG", 4, 1, f);
|
write_u32_le(avi_data, 0);
|
||||||
write_u32_le(f, 0);
|
write_u32_le(avi_data, 0);
|
||||||
write_u16_le(f, 0);
|
write_u32_le(avi_data, 0x110);
|
||||||
write_u16_le(f, 0);
|
write_u32_le(avi_data, num_images);
|
||||||
write_u32_le(f, 0);
|
write_u32_le(avi_data, 0);
|
||||||
write_u32_le(f, 1);
|
write_u32_le(avi_data, 1);
|
||||||
write_u32_le(f, fps);
|
write_u32_le(avi_data, width * height * 3);
|
||||||
write_u32_le(f, 0);
|
write_u32_le(avi_data, width);
|
||||||
write_u32_le(f, num_images);
|
write_u32_le(avi_data, height);
|
||||||
write_u32_le(f, width * height * 3);
|
write_u32_le(avi_data, 0);
|
||||||
write_u32_le(f, (uint32_t)-1);
|
write_u32_le(avi_data, 0);
|
||||||
write_u32_le(f, 0);
|
write_u32_le(avi_data, 0);
|
||||||
write_u16_le(f, 0);
|
write_u32_le(avi_data, 0);
|
||||||
write_u16_le(f, 0);
|
|
||||||
write_u16_le(f, 0);
|
|
||||||
write_u16_le(f, 0);
|
|
||||||
|
|
||||||
fwrite("strf", 4, 1, f);
|
write_fourcc(avi_data, "LIST");
|
||||||
write_u32_le(f, 40);
|
write_u32_le(avi_data, 4 + 8 + 56 + 8 + 40);
|
||||||
write_u32_le(f, 40);
|
write_fourcc(avi_data, "strl");
|
||||||
write_u32_le(f, width);
|
|
||||||
write_u32_le(f, height);
|
|
||||||
write_u16_le(f, 1);
|
|
||||||
write_u16_le(f, 24);
|
|
||||||
fwrite("MJPG", 4, 1, f);
|
|
||||||
write_u32_le(f, width * height * 3);
|
|
||||||
write_u32_le(f, 0);
|
|
||||||
write_u32_le(f, 0);
|
|
||||||
write_u32_le(f, 0);
|
|
||||||
write_u32_le(f, 0);
|
|
||||||
|
|
||||||
fwrite("LIST", 4, 1, f);
|
write_fourcc(avi_data, "strh");
|
||||||
long movi_size_pos = ftell(f);
|
write_u32_le(avi_data, 56);
|
||||||
write_u32_le(f, 0);
|
write_fourcc(avi_data, "vids");
|
||||||
fwrite("movi", 4, 1, f);
|
write_fourcc(avi_data, "MJPG");
|
||||||
|
write_u32_le(avi_data, 0);
|
||||||
|
write_u16_le(avi_data, 0);
|
||||||
|
write_u16_le(avi_data, 0);
|
||||||
|
write_u32_le(avi_data, 0);
|
||||||
|
write_u32_le(avi_data, 1);
|
||||||
|
write_u32_le(avi_data, fps);
|
||||||
|
write_u32_le(avi_data, 0);
|
||||||
|
write_u32_le(avi_data, num_images);
|
||||||
|
write_u32_le(avi_data, width * height * 3);
|
||||||
|
write_u32_le(avi_data, static_cast<uint32_t>(-1));
|
||||||
|
write_u32_le(avi_data, 0);
|
||||||
|
write_u16_le(avi_data, 0);
|
||||||
|
write_u16_le(avi_data, 0);
|
||||||
|
write_u16_le(avi_data, 0);
|
||||||
|
write_u16_le(avi_data, 0);
|
||||||
|
|
||||||
|
write_fourcc(avi_data, "strf");
|
||||||
|
write_u32_le(avi_data, 40);
|
||||||
|
write_u32_le(avi_data, 40);
|
||||||
|
write_u32_le(avi_data, width);
|
||||||
|
write_u32_le(avi_data, height);
|
||||||
|
write_u16_le(avi_data, 1);
|
||||||
|
write_u16_le(avi_data, 24);
|
||||||
|
write_fourcc(avi_data, "MJPG");
|
||||||
|
write_u32_le(avi_data, width * height * 3);
|
||||||
|
write_u32_le(avi_data, 0);
|
||||||
|
write_u32_le(avi_data, 0);
|
||||||
|
write_u32_le(avi_data, 0);
|
||||||
|
write_u32_le(avi_data, 0);
|
||||||
|
|
||||||
|
write_fourcc(avi_data, "LIST");
|
||||||
|
const size_t movi_size_pos = avi_data.size();
|
||||||
|
write_u32_le(avi_data, 0);
|
||||||
|
write_fourcc(avi_data, "movi");
|
||||||
|
|
||||||
std::vector<avi_index_entry> index(static_cast<size_t>(num_images));
|
std::vector<avi_index_entry> index(static_cast<size_t>(num_images));
|
||||||
std::vector<uint8_t> jpeg_data;
|
std::vector<uint8_t> jpeg_data;
|
||||||
@ -801,55 +879,61 @@ int create_mjpg_avi_from_sd_images(const char* filename, sd_image_t* images, int
|
|||||||
buffer->insert(buffer->end(), src, src + size);
|
buffer->insert(buffer->end(), src, src + size);
|
||||||
};
|
};
|
||||||
|
|
||||||
if (!stbi_write_jpg_to_func(write_to_buf, &jpeg_data, images[i].width, images[i].height, channels, images[i].data, quality)) {
|
if (!stbi_write_jpg_to_func(write_to_buf, &jpeg_data, images[i].width, images[i].height, channels, images[i].data, mjpg_quality)) {
|
||||||
fprintf(stderr, "Error: Failed to encode JPEG frame.\n");
|
fprintf(stderr, "Error: Failed to encode JPEG frame.\n");
|
||||||
return -1;
|
return {};
|
||||||
}
|
}
|
||||||
|
|
||||||
fwrite("00dc", 4, 1, f);
|
index[i].offset = static_cast<uint32_t>(avi_data.size());
|
||||||
write_u32_le(f, (uint32_t)jpeg_data.size());
|
write_fourcc(avi_data, "00dc");
|
||||||
index[i].offset = ftell(f) - 8;
|
write_u32_le(avi_data, static_cast<uint32_t>(jpeg_data.size()));
|
||||||
index[i].size = (uint32_t)jpeg_data.size();
|
index[i].size = (uint32_t)jpeg_data.size();
|
||||||
fwrite(jpeg_data.data(), 1, jpeg_data.size(), f);
|
avi_data.insert(avi_data.end(), jpeg_data.begin(), jpeg_data.end());
|
||||||
|
|
||||||
if (jpeg_data.size() % 2) {
|
if (jpeg_data.size() % 2) {
|
||||||
fputc(0, f);
|
avi_data.push_back(0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
long cur_pos = ftell(f);
|
const size_t movi_size = avi_data.size() - movi_size_pos - 4;
|
||||||
long movi_size = cur_pos - movi_size_pos - 4;
|
patch_u32_le(avi_data, movi_size_pos, static_cast<uint32_t>(movi_size));
|
||||||
fseek(f, movi_size_pos, SEEK_SET);
|
|
||||||
write_u32_le(f, movi_size);
|
|
||||||
fseek(f, cur_pos, SEEK_SET);
|
|
||||||
|
|
||||||
fwrite("idx1", 4, 1, f);
|
write_fourcc(avi_data, "idx1");
|
||||||
write_u32_le(f, num_images * 16);
|
write_u32_le(avi_data, num_images * 16);
|
||||||
for (int i = 0; i < num_images; i++) {
|
for (int i = 0; i < num_images; i++) {
|
||||||
fwrite("00dc", 4, 1, f);
|
write_fourcc(avi_data, "00dc");
|
||||||
write_u32_le(f, 0x10);
|
write_u32_le(avi_data, 0x10);
|
||||||
write_u32_le(f, index[i].offset);
|
write_u32_le(avi_data, index[i].offset);
|
||||||
write_u32_le(f, index[i].size);
|
write_u32_le(avi_data, index[i].size);
|
||||||
}
|
}
|
||||||
|
|
||||||
cur_pos = ftell(f);
|
const size_t file_size = avi_data.size() - riff_size_pos - 4;
|
||||||
long file_size = cur_pos - riff_size_pos - 4;
|
patch_u32_le(avi_data, riff_size_pos, static_cast<uint32_t>(file_size));
|
||||||
fseek(f, riff_size_pos, SEEK_SET);
|
|
||||||
write_u32_le(f, file_size);
|
|
||||||
fseek(f, cur_pos, SEEK_SET);
|
|
||||||
|
|
||||||
|
return avi_data;
|
||||||
|
}
|
||||||
|
|
||||||
|
int create_mjpg_avi_from_sd_images(const char* filename, sd_image_t* images, int num_images, int fps, int quality) {
|
||||||
|
std::vector<uint8_t> avi_data = create_mjpg_avi_from_sd_images_to_vector(images, num_images, fps, quality);
|
||||||
|
if (avi_data.empty()) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (!write_binary_file_bytes(filename, avi_data)) {
|
||||||
|
perror("Error opening file for writing");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef SD_USE_WEBP
|
#ifdef SD_USE_WEBP
|
||||||
int create_animated_webp_from_sd_images(const char* filename, sd_image_t* images, int num_images, int fps, int quality) {
|
std::vector<uint8_t> create_animated_webp_from_sd_images_to_vector(sd_image_t* images, int num_images, int fps, int quality) {
|
||||||
if (num_images == 0) {
|
if (num_images == 0) {
|
||||||
fprintf(stderr, "Error: Image array is empty.\n");
|
fprintf(stderr, "Error: Image array is empty.\n");
|
||||||
return -1;
|
return {};
|
||||||
}
|
}
|
||||||
if (fps <= 0) {
|
if (fps <= 0) {
|
||||||
fprintf(stderr, "Error: FPS must be positive.\n");
|
fprintf(stderr, "Error: FPS must be positive.\n");
|
||||||
return -1;
|
return {};
|
||||||
}
|
}
|
||||||
|
|
||||||
const int width = static_cast<int>(images[0].width);
|
const int width = static_cast<int>(images[0].width);
|
||||||
@ -857,14 +941,14 @@ int create_animated_webp_from_sd_images(const char* filename, sd_image_t* images
|
|||||||
const int channels = static_cast<int>(images[0].channel);
|
const int channels = static_cast<int>(images[0].channel);
|
||||||
if (channels != 1 && channels != 3 && channels != 4) {
|
if (channels != 1 && channels != 3 && channels != 4) {
|
||||||
fprintf(stderr, "Error: Unsupported channel count: %d\n", channels);
|
fprintf(stderr, "Error: Unsupported channel count: %d\n", channels);
|
||||||
return -1;
|
return {};
|
||||||
}
|
}
|
||||||
|
|
||||||
WebPAnimEncoderOptions anim_options;
|
WebPAnimEncoderOptions anim_options;
|
||||||
WebPConfig config;
|
WebPConfig config;
|
||||||
if (!WebPAnimEncoderOptionsInit(&anim_options) || !WebPConfigInit(&config)) {
|
if (!WebPAnimEncoderOptionsInit(&anim_options) || !WebPConfigInit(&config)) {
|
||||||
fprintf(stderr, "Error: Failed to initialize WebP animation encoder.\n");
|
fprintf(stderr, "Error: Failed to initialize WebP animation encoder.\n");
|
||||||
return -1;
|
return {};
|
||||||
}
|
}
|
||||||
|
|
||||||
config.quality = static_cast<float>(quality);
|
config.quality = static_cast<float>(quality);
|
||||||
@ -875,13 +959,13 @@ int create_animated_webp_from_sd_images(const char* filename, sd_image_t* images
|
|||||||
}
|
}
|
||||||
if (!WebPValidateConfig(&config)) {
|
if (!WebPValidateConfig(&config)) {
|
||||||
fprintf(stderr, "Error: Invalid WebP encoder configuration.\n");
|
fprintf(stderr, "Error: Invalid WebP encoder configuration.\n");
|
||||||
return -1;
|
return {};
|
||||||
}
|
}
|
||||||
|
|
||||||
WebPAnimEncoderPtr enc(WebPAnimEncoderNew(width, height, &anim_options));
|
WebPAnimEncoderPtr enc(WebPAnimEncoderNew(width, height, &anim_options));
|
||||||
if (enc == nullptr) {
|
if (enc == nullptr) {
|
||||||
fprintf(stderr, "Error: Could not create WebPAnimEncoder object.\n");
|
fprintf(stderr, "Error: Could not create WebPAnimEncoder object.\n");
|
||||||
return -1;
|
return {};
|
||||||
}
|
}
|
||||||
|
|
||||||
const int frame_duration_ms = std::max(1, static_cast<int>(std::lround(1000.0 / static_cast<double>(fps))));
|
const int frame_duration_ms = std::max(1, static_cast<int>(std::lround(1000.0 / static_cast<double>(fps))));
|
||||||
@ -891,13 +975,13 @@ int create_animated_webp_from_sd_images(const char* filename, sd_image_t* images
|
|||||||
const sd_image_t& image = images[i];
|
const sd_image_t& image = images[i];
|
||||||
if (static_cast<int>(image.width) != width || static_cast<int>(image.height) != height) {
|
if (static_cast<int>(image.width) != width || static_cast<int>(image.height) != height) {
|
||||||
fprintf(stderr, "Error: Frame dimensions do not match.\n");
|
fprintf(stderr, "Error: Frame dimensions do not match.\n");
|
||||||
return -1;
|
return {};
|
||||||
}
|
}
|
||||||
|
|
||||||
WebPPictureGuard picture;
|
WebPPictureGuard picture;
|
||||||
if (!picture.initialized) {
|
if (!picture.initialized) {
|
||||||
fprintf(stderr, "Error: Failed to initialize WebPPicture.\n");
|
fprintf(stderr, "Error: Failed to initialize WebPPicture.\n");
|
||||||
return -1;
|
return {};
|
||||||
}
|
}
|
||||||
picture.picture.use_argb = 1;
|
picture.picture.use_argb = 1;
|
||||||
picture.picture.width = width;
|
picture.picture.width = width;
|
||||||
@ -921,12 +1005,12 @@ int create_animated_webp_from_sd_images(const char* filename, sd_image_t* images
|
|||||||
|
|
||||||
if (!picture_ok) {
|
if (!picture_ok) {
|
||||||
fprintf(stderr, "Error: Failed to import frame into WebPPicture.\n");
|
fprintf(stderr, "Error: Failed to import frame into WebPPicture.\n");
|
||||||
return -1;
|
return {};
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!WebPAnimEncoderAdd(enc.get(), &picture.picture, timestamp_ms, &config)) {
|
if (!WebPAnimEncoderAdd(enc.get(), &picture.picture, timestamp_ms, &config)) {
|
||||||
fprintf(stderr, "Error: Failed to add frame to animated WebP: %s\n", WebPAnimEncoderGetError(enc.get()));
|
fprintf(stderr, "Error: Failed to add frame to animated WebP: %s\n", WebPAnimEncoderGetError(enc.get()));
|
||||||
return -1;
|
return {};
|
||||||
}
|
}
|
||||||
|
|
||||||
timestamp_ms += frame_duration_ms;
|
timestamp_ms += frame_duration_ms;
|
||||||
@ -934,52 +1018,50 @@ int create_animated_webp_from_sd_images(const char* filename, sd_image_t* images
|
|||||||
|
|
||||||
if (!WebPAnimEncoderAdd(enc.get(), nullptr, timestamp_ms, nullptr)) {
|
if (!WebPAnimEncoderAdd(enc.get(), nullptr, timestamp_ms, nullptr)) {
|
||||||
fprintf(stderr, "Error: Failed to finalize animated WebP frames: %s\n", WebPAnimEncoderGetError(enc.get()));
|
fprintf(stderr, "Error: Failed to finalize animated WebP frames: %s\n", WebPAnimEncoderGetError(enc.get()));
|
||||||
return -1;
|
return {};
|
||||||
}
|
}
|
||||||
|
|
||||||
WebPDataGuard webp_data;
|
WebPDataGuard webp_data;
|
||||||
if (!WebPAnimEncoderAssemble(enc.get(), &webp_data.data)) {
|
if (!WebPAnimEncoderAssemble(enc.get(), &webp_data.data)) {
|
||||||
fprintf(stderr, "Error: Failed to assemble animated WebP: %s\n", WebPAnimEncoderGetError(enc.get()));
|
fprintf(stderr, "Error: Failed to assemble animated WebP: %s\n", WebPAnimEncoderGetError(enc.get()));
|
||||||
return -1;
|
return {};
|
||||||
}
|
}
|
||||||
|
|
||||||
FilePtr f(fopen(filename, "wb"));
|
return std::vector<uint8_t>(webp_data.data.bytes, webp_data.data.bytes + webp_data.data.size);
|
||||||
if (!f) {
|
}
|
||||||
|
|
||||||
|
int create_animated_webp_from_sd_images(const char* filename, sd_image_t* images, int num_images, int fps, int quality) {
|
||||||
|
std::vector<uint8_t> webp_data = create_animated_webp_from_sd_images_to_vector(images, num_images, fps, quality);
|
||||||
|
if (webp_data.empty()) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (!write_binary_file_bytes(filename, webp_data)) {
|
||||||
perror("Error opening file for writing");
|
perror("Error opening file for writing");
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
if (webp_data.data.size > 0 && fwrite(webp_data.data.bytes, 1, webp_data.data.size, f.get()) != webp_data.data.size) {
|
|
||||||
fprintf(stderr, "Error: Failed to write animated WebP file.\n");
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef SD_USE_WEBM
|
#ifdef SD_USE_WEBM
|
||||||
int create_webm_from_sd_images(const char* filename, sd_image_t* images, int num_images, int fps, int quality) {
|
std::vector<uint8_t> create_webm_from_sd_images_to_vector(sd_image_t* images, int num_images, int fps, int quality) {
|
||||||
if (num_images == 0) {
|
if (num_images == 0) {
|
||||||
fprintf(stderr, "Error: Image array is empty.\n");
|
fprintf(stderr, "Error: Image array is empty.\n");
|
||||||
return -1;
|
return {};
|
||||||
}
|
}
|
||||||
if (fps <= 0) {
|
if (fps <= 0) {
|
||||||
fprintf(stderr, "Error: FPS must be positive.\n");
|
fprintf(stderr, "Error: FPS must be positive.\n");
|
||||||
return -1;
|
return {};
|
||||||
}
|
}
|
||||||
|
|
||||||
const int width = static_cast<int>(images[0].width);
|
const int width = static_cast<int>(images[0].width);
|
||||||
const int height = static_cast<int>(images[0].height);
|
const int height = static_cast<int>(images[0].height);
|
||||||
if (width <= 0 || height <= 0) {
|
if (width <= 0 || height <= 0) {
|
||||||
fprintf(stderr, "Error: Invalid frame dimensions.\n");
|
fprintf(stderr, "Error: Invalid frame dimensions.\n");
|
||||||
return -1;
|
return {};
|
||||||
}
|
}
|
||||||
|
|
||||||
mkvmuxer::MkvWriter writer;
|
MemoryMkvWriter writer;
|
||||||
if (!writer.Open(filename)) {
|
|
||||||
fprintf(stderr, "Error: Could not open WebM file for writing.\n");
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
const int ret = [&]() -> int {
|
const int ret = [&]() -> int {
|
||||||
mkvmuxer::Segment segment;
|
mkvmuxer::Segment segment;
|
||||||
@ -1045,30 +1127,63 @@ int create_webm_from_sd_images(const char* filename, sd_image_t* images, int num
|
|||||||
}
|
}
|
||||||
return 0;
|
return 0;
|
||||||
}();
|
}();
|
||||||
writer.Close();
|
if (ret != 0) {
|
||||||
return ret;
|
return {};
|
||||||
|
}
|
||||||
|
return writer.data();
|
||||||
|
}
|
||||||
|
|
||||||
|
int create_webm_from_sd_images(const char* filename, sd_image_t* images, int num_images, int fps, int quality) {
|
||||||
|
std::vector<uint8_t> webm_data = create_webm_from_sd_images_to_vector(images, num_images, fps, quality);
|
||||||
|
if (webm_data.empty()) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (!write_binary_file_bytes(filename, webm_data)) {
|
||||||
|
perror("Error opening file for writing");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
std::vector<uint8_t> create_video_from_sd_images_to_vector(const std::string& output_format,
|
||||||
|
sd_image_t* images,
|
||||||
|
int num_images,
|
||||||
|
int fps,
|
||||||
|
int quality) {
|
||||||
|
std::string format = output_format;
|
||||||
|
std::transform(format.begin(), format.end(), format.begin(),
|
||||||
|
[](unsigned char c) { return static_cast<char>(tolower(c)); });
|
||||||
|
if (!format.empty() && format[0] == '.') {
|
||||||
|
format.erase(format.begin());
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef SD_USE_WEBM
|
||||||
|
if (format == "webm") {
|
||||||
|
return create_webm_from_sd_images_to_vector(images, num_images, fps, quality);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef SD_USE_WEBP
|
||||||
|
if (format == "webp") {
|
||||||
|
return create_animated_webp_from_sd_images_to_vector(images, num_images, fps, quality);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
return create_mjpg_avi_from_sd_images_to_vector(images, num_images, fps, quality);
|
||||||
|
}
|
||||||
|
|
||||||
int create_video_from_sd_images(const char* filename, sd_image_t* images, int num_images, int fps, int quality) {
|
int create_video_from_sd_images(const char* filename, sd_image_t* images, int num_images, int fps, int quality) {
|
||||||
std::string path = filename ? filename : "";
|
std::string path = filename ? filename : "";
|
||||||
auto pos = path.find_last_of('.');
|
auto pos = path.find_last_of('.');
|
||||||
std::string ext = pos == std::string::npos ? "" : path.substr(pos);
|
std::string ext = pos == std::string::npos ? "" : path.substr(pos);
|
||||||
for (char& ch : ext) {
|
std::vector<uint8_t> video_data = create_video_from_sd_images_to_vector(ext, images, num_images, fps, quality);
|
||||||
ch = static_cast<char>(tolower(static_cast<unsigned char>(ch)));
|
if (video_data.empty()) {
|
||||||
|
return -1;
|
||||||
}
|
}
|
||||||
|
if (!write_binary_file_bytes(filename, video_data)) {
|
||||||
#ifdef SD_USE_WEBM
|
perror("Error opening file for writing");
|
||||||
if (ext == ".webm") {
|
return -1;
|
||||||
return create_webm_from_sd_images(filename, images, num_images, fps, quality);
|
|
||||||
}
|
}
|
||||||
#endif
|
return 0;
|
||||||
|
|
||||||
#ifdef SD_USE_WEBP
|
|
||||||
if (ext == ".webp") {
|
|
||||||
return create_animated_webp_from_sd_images(filename, images, num_images, fps, quality);
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
return create_mjpg_avi_from_sd_images(filename, images, num_images, fps, quality);
|
|
||||||
}
|
}
|
||||||
|
|||||||
@ -58,6 +58,10 @@ int create_mjpg_avi_from_sd_images(const char* filename,
|
|||||||
int num_images,
|
int num_images,
|
||||||
int fps,
|
int fps,
|
||||||
int quality = 90);
|
int quality = 90);
|
||||||
|
std::vector<uint8_t> create_mjpg_avi_from_sd_images_to_vector(sd_image_t* images,
|
||||||
|
int num_images,
|
||||||
|
int fps,
|
||||||
|
int quality = 90);
|
||||||
|
|
||||||
#ifdef SD_USE_WEBP
|
#ifdef SD_USE_WEBP
|
||||||
int create_animated_webp_from_sd_images(const char* filename,
|
int create_animated_webp_from_sd_images(const char* filename,
|
||||||
@ -65,6 +69,10 @@ int create_animated_webp_from_sd_images(const char* filename,
|
|||||||
int num_images,
|
int num_images,
|
||||||
int fps,
|
int fps,
|
||||||
int quality = 90);
|
int quality = 90);
|
||||||
|
std::vector<uint8_t> create_animated_webp_from_sd_images_to_vector(sd_image_t* images,
|
||||||
|
int num_images,
|
||||||
|
int fps,
|
||||||
|
int quality = 90);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef SD_USE_WEBM
|
#ifdef SD_USE_WEBM
|
||||||
@ -73,6 +81,10 @@ int create_webm_from_sd_images(const char* filename,
|
|||||||
int num_images,
|
int num_images,
|
||||||
int fps,
|
int fps,
|
||||||
int quality = 90);
|
int quality = 90);
|
||||||
|
std::vector<uint8_t> create_webm_from_sd_images_to_vector(sd_image_t* images,
|
||||||
|
int num_images,
|
||||||
|
int fps,
|
||||||
|
int quality = 90);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
int create_video_from_sd_images(const char* filename,
|
int create_video_from_sd_images(const char* filename,
|
||||||
@ -80,5 +92,10 @@ int create_video_from_sd_images(const char* filename,
|
|||||||
int num_images,
|
int num_images,
|
||||||
int fps,
|
int fps,
|
||||||
int quality = 90);
|
int quality = 90);
|
||||||
|
std::vector<uint8_t> create_video_from_sd_images_to_vector(const std::string& output_format,
|
||||||
|
sd_image_t* images,
|
||||||
|
int num_images,
|
||||||
|
int fps,
|
||||||
|
int quality = 90);
|
||||||
|
|
||||||
#endif // __MEDIA_IO_H__
|
#endif // __MEDIA_IO_H__
|
||||||
|
|||||||
@ -9,7 +9,7 @@ The server currently exposes three API families:
|
|||||||
- `sdcpp API` under `/sdcpp/v1/...`
|
- `sdcpp API` under `/sdcpp/v1/...`
|
||||||
|
|
||||||
The `sdcpp API` is the native API surface.
|
The `sdcpp API` is the native API surface.
|
||||||
Its request schema is also the canonical schema for `sd_cpp_extra_args`.
|
Its request schema is the same schema used by `sd_cpp_extra_args`.
|
||||||
|
|
||||||
Global LoRA rule:
|
Global LoRA rule:
|
||||||
|
|
||||||
@ -55,8 +55,6 @@ Current endpoints include:
|
|||||||
- `POST /sdcpp/v1/jobs/{id}/cancel`
|
- `POST /sdcpp/v1/jobs/{id}/cancel`
|
||||||
- `POST /sdcpp/v1/vid_gen`
|
- `POST /sdcpp/v1/vid_gen`
|
||||||
|
|
||||||
`POST /sdcpp/v1/vid_gen` is currently exposed but returns `501 Not Implemented`.
|
|
||||||
|
|
||||||
## `sd_cpp_extra_args`
|
## `sd_cpp_extra_args`
|
||||||
|
|
||||||
`sd_cpp_extra_args` is an extension mechanism for the compatibility APIs.
|
`sd_cpp_extra_args` is an extension mechanism for the compatibility APIs.
|
||||||
@ -79,12 +77,12 @@ Behavior:
|
|||||||
- The JSON block is parsed using the same field rules as the `sdcpp API`.
|
- The JSON block is parsed using the same field rules as the `sdcpp API`.
|
||||||
- The block is removed from the final prompt before generation.
|
- The block is removed from the final prompt before generation.
|
||||||
|
|
||||||
Intended use:
|
Supported use:
|
||||||
|
|
||||||
- extend `OpenAI API` requests with native `stable-diffusion.cpp` controls
|
- extend `OpenAI API` requests with native `stable-diffusion.cpp` controls
|
||||||
- extend `sdapi` requests with native `stable-diffusion.cpp` controls
|
- extend `sdapi` requests with native `stable-diffusion.cpp` controls
|
||||||
|
|
||||||
Not intended use:
|
Unsupported use:
|
||||||
|
|
||||||
- do not use `sd_cpp_extra_args` with `/sdcpp/v1/*`
|
- do not use `sd_cpp_extra_args` with `/sdcpp/v1/*`
|
||||||
|
|
||||||
@ -372,20 +370,25 @@ Field types:
|
|||||||
|
|
||||||
Returns frontend-friendly capability metadata.
|
Returns frontend-friendly capability metadata.
|
||||||
|
|
||||||
Typical contents:
|
The mode-aware fields are the primary interface. The top-level compatibility fields are deprecated mirrors kept for older clients.
|
||||||
|
|
||||||
| Field | Type |
|
Top-level fields:
|
||||||
| --- | --- |
|
|
||||||
| `model` | `object` |
|
|
||||||
| `defaults` | `object` |
|
|
||||||
| `loras` | `array<object>` |
|
|
||||||
| `samplers` | `array<string>` |
|
|
||||||
| `schedulers` | `array<string>` |
|
|
||||||
| `output_formats` | `array<string>` |
|
|
||||||
| `limits` | `object` |
|
|
||||||
| `features` | `object` |
|
|
||||||
|
|
||||||
Nested fields currently returned:
|
| Field | Type | Notes |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| `model` | `object` | Loaded model metadata |
|
||||||
|
| `current_mode` | `string` | The native generation mode mirrored by top-level compatibility fields |
|
||||||
|
| `supported_modes` | `array<string>` | Supported native modes such as `img_gen` or `vid_gen` |
|
||||||
|
| `defaults` | `object` | Deprecated compatibility mirror of `defaults_by_mode[current_mode]` |
|
||||||
|
| `output_formats` | `array<string>` | Deprecated compatibility mirror of `output_formats_by_mode[current_mode]` |
|
||||||
|
| `features` | `object` | Deprecated compatibility mirror of `features_by_mode[current_mode]` |
|
||||||
|
| `defaults_by_mode` | `object` | Explicit defaults for each supported mode |
|
||||||
|
| `output_formats_by_mode` | `object` | Explicit output formats for each supported mode |
|
||||||
|
| `features_by_mode` | `object` | Explicit feature flags for each supported mode |
|
||||||
|
| `samplers` | `array<string>` | Available sampling methods |
|
||||||
|
| `schedulers` | `array<string>` | Available schedulers |
|
||||||
|
| `loras` | `array<object>` | Available LoRA entries |
|
||||||
|
| `limits` | `object` | Shared queue and size limits |
|
||||||
|
|
||||||
`model`
|
`model`
|
||||||
|
|
||||||
@ -395,50 +398,24 @@ Nested fields currently returned:
|
|||||||
| `model.stem` | `string` |
|
| `model.stem` | `string` |
|
||||||
| `model.path` | `string` |
|
| `model.path` | `string` |
|
||||||
|
|
||||||
`defaults`
|
Compatibility rules:
|
||||||
|
|
||||||
|
- `defaults`, `output_formats`, and `features` are deprecated compatibility mirrors
|
||||||
|
- those three top-level fields always mirror `current_mode`
|
||||||
|
- `supported_modes`, `defaults_by_mode`, `output_formats_by_mode`, and `features_by_mode` are the mode-aware fields
|
||||||
|
|
||||||
|
Mode-aware objects:
|
||||||
|
|
||||||
| Field | Type |
|
| Field | Type |
|
||||||
| --- | --- |
|
| --- | --- |
|
||||||
| `defaults.prompt` | `string` |
|
| `defaults_by_mode.img_gen` | `object` |
|
||||||
| `defaults.negative_prompt` | `string` |
|
| `defaults_by_mode.vid_gen` | `object` |
|
||||||
| `defaults.clip_skip` | `integer` |
|
| `output_formats_by_mode.img_gen` | `array<string>` |
|
||||||
| `defaults.width` | `integer` |
|
| `output_formats_by_mode.vid_gen` | `array<string>` |
|
||||||
| `defaults.height` | `integer` |
|
| `features_by_mode.img_gen` | `object` |
|
||||||
| `defaults.strength` | `number` |
|
| `features_by_mode.vid_gen` | `object` |
|
||||||
| `defaults.seed` | `integer` |
|
|
||||||
| `defaults.batch_count` | `integer` |
|
Shared nested fields:
|
||||||
| `defaults.auto_resize_ref_image` | `boolean` |
|
|
||||||
| `defaults.increase_ref_index` | `boolean` |
|
|
||||||
| `defaults.control_strength` | `number` |
|
|
||||||
| `defaults.sample_params` | `object` |
|
|
||||||
| `defaults.sample_params.scheduler` | `string` |
|
|
||||||
| `defaults.sample_params.sample_method` | `string` |
|
|
||||||
| `defaults.sample_params.sample_steps` | `integer` |
|
|
||||||
| `defaults.sample_params.eta` | `number \| null` |
|
|
||||||
| `defaults.sample_params.shifted_timestep` | `integer` |
|
|
||||||
| `defaults.sample_params.flow_shift` | `number \| null` |
|
|
||||||
| `defaults.sample_params.guidance` | `object` |
|
|
||||||
| `defaults.sample_params.guidance.txt_cfg` | `number` |
|
|
||||||
| `defaults.sample_params.guidance.img_cfg` | `number \| null` |
|
|
||||||
| `defaults.sample_params.guidance.distilled_guidance` | `number` |
|
|
||||||
| `defaults.sample_params.guidance.slg` | `object` |
|
|
||||||
| `defaults.sample_params.guidance.slg.layers` | `array<integer>` |
|
|
||||||
| `defaults.sample_params.guidance.slg.layer_start` | `number` |
|
|
||||||
| `defaults.sample_params.guidance.slg.layer_end` | `number` |
|
|
||||||
| `defaults.sample_params.guidance.slg.scale` | `number` |
|
|
||||||
| `defaults.vae_tiling_params` | `object` |
|
|
||||||
| `defaults.vae_tiling_params.enabled` | `boolean` |
|
|
||||||
| `defaults.vae_tiling_params.tile_size_x` | `integer` |
|
|
||||||
| `defaults.vae_tiling_params.tile_size_y` | `integer` |
|
|
||||||
| `defaults.vae_tiling_params.target_overlap` | `number` |
|
|
||||||
| `defaults.vae_tiling_params.rel_size_x` | `number` |
|
|
||||||
| `defaults.vae_tiling_params.rel_size_y` | `number` |
|
|
||||||
| `defaults.cache_mode` | `string` |
|
|
||||||
| `defaults.cache_option` | `string` |
|
|
||||||
| `defaults.scm_mask` | `string` |
|
|
||||||
| `defaults.scm_policy_dynamic` | `boolean` |
|
|
||||||
| `defaults.output_format` | `string` |
|
|
||||||
| `defaults.output_compression` | `integer` |
|
|
||||||
|
|
||||||
`loras`
|
`loras`
|
||||||
|
|
||||||
@ -458,19 +435,100 @@ Nested fields currently returned:
|
|||||||
| `limits.max_batch_count` | `integer` |
|
| `limits.max_batch_count` | `integer` |
|
||||||
| `limits.max_queue_size` | `integer` |
|
| `limits.max_queue_size` | `integer` |
|
||||||
|
|
||||||
`features`
|
Shared default fields used by both `img_gen` and `vid_gen`:
|
||||||
|
|
||||||
| Field | Type |
|
| Field | Type |
|
||||||
| --- | --- |
|
| --- | --- |
|
||||||
| `features.init_image` | `boolean` |
|
| `prompt` | `string` |
|
||||||
| `features.mask_image` | `boolean` |
|
| `negative_prompt` | `string` |
|
||||||
| `features.control_image` | `boolean` |
|
| `clip_skip` | `integer` |
|
||||||
| `features.ref_images` | `boolean` |
|
| `width` | `integer` |
|
||||||
| `features.lora` | `boolean` |
|
| `height` | `integer` |
|
||||||
| `features.vae_tiling` | `boolean` |
|
| `strength` | `number` |
|
||||||
| `features.cache` | `boolean` |
|
| `seed` | `integer` |
|
||||||
| `features.cancel_queued` | `boolean` |
|
| `sample_params` | `object` |
|
||||||
| `features.cancel_generating` | `boolean` |
|
| `sample_params.scheduler` | `string` |
|
||||||
|
| `sample_params.sample_method` | `string` |
|
||||||
|
| `sample_params.sample_steps` | `integer` |
|
||||||
|
| `sample_params.eta` | `number \| null` |
|
||||||
|
| `sample_params.shifted_timestep` | `integer` |
|
||||||
|
| `sample_params.flow_shift` | `number \| null` |
|
||||||
|
| `sample_params.guidance.txt_cfg` | `number` |
|
||||||
|
| `sample_params.guidance.img_cfg` | `number \| null` |
|
||||||
|
| `sample_params.guidance.distilled_guidance` | `number` |
|
||||||
|
| `sample_params.guidance.slg.layers` | `array<integer>` |
|
||||||
|
| `sample_params.guidance.slg.layer_start` | `number` |
|
||||||
|
| `sample_params.guidance.slg.layer_end` | `number` |
|
||||||
|
| `sample_params.guidance.slg.scale` | `number` |
|
||||||
|
| `vae_tiling_params` | `object` |
|
||||||
|
| `vae_tiling_params.enabled` | `boolean` |
|
||||||
|
| `vae_tiling_params.tile_size_x` | `integer` |
|
||||||
|
| `vae_tiling_params.tile_size_y` | `integer` |
|
||||||
|
| `vae_tiling_params.target_overlap` | `number` |
|
||||||
|
| `vae_tiling_params.rel_size_x` | `number` |
|
||||||
|
| `vae_tiling_params.rel_size_y` | `number` |
|
||||||
|
| `cache_mode` | `string` |
|
||||||
|
| `cache_option` | `string` |
|
||||||
|
| `scm_mask` | `string` |
|
||||||
|
| `scm_policy_dynamic` | `boolean` |
|
||||||
|
| `output_format` | `string` |
|
||||||
|
| `output_compression` | `integer` |
|
||||||
|
|
||||||
|
`img_gen`-specific default fields:
|
||||||
|
|
||||||
|
| Field | Type |
|
||||||
|
| --- | --- |
|
||||||
|
| `batch_count` | `integer` |
|
||||||
|
| `auto_resize_ref_image` | `boolean` |
|
||||||
|
| `increase_ref_index` | `boolean` |
|
||||||
|
| `control_strength` | `number` |
|
||||||
|
|
||||||
|
`vid_gen`-specific default fields:
|
||||||
|
|
||||||
|
| Field | Type |
|
||||||
|
| --- | --- |
|
||||||
|
| `video_frames` | `integer` |
|
||||||
|
| `fps` | `integer` |
|
||||||
|
| `moe_boundary` | `number` |
|
||||||
|
| `vace_strength` | `number` |
|
||||||
|
| `high_noise_sample_params` | `object` |
|
||||||
|
| `high_noise_sample_params.scheduler` | `string` |
|
||||||
|
| `high_noise_sample_params.sample_method` | `string` |
|
||||||
|
| `high_noise_sample_params.sample_steps` | `integer` |
|
||||||
|
| `high_noise_sample_params.eta` | `number \| null` |
|
||||||
|
| `high_noise_sample_params.shifted_timestep` | `integer` |
|
||||||
|
| `high_noise_sample_params.flow_shift` | `number \| null` |
|
||||||
|
| `high_noise_sample_params.guidance.txt_cfg` | `number` |
|
||||||
|
| `high_noise_sample_params.guidance.img_cfg` | `number \| null` |
|
||||||
|
| `high_noise_sample_params.guidance.distilled_guidance` | `number` |
|
||||||
|
| `high_noise_sample_params.guidance.slg.layers` | `array<integer>` |
|
||||||
|
| `high_noise_sample_params.guidance.slg.layer_start` | `number` |
|
||||||
|
| `high_noise_sample_params.guidance.slg.layer_end` | `number` |
|
||||||
|
| `high_noise_sample_params.guidance.slg.scale` | `number` |
|
||||||
|
|
||||||
|
Fields returned in `features_by_mode.img_gen`:
|
||||||
|
|
||||||
|
- `init_image`
|
||||||
|
- `mask_image`
|
||||||
|
- `control_image`
|
||||||
|
- `ref_images`
|
||||||
|
- `lora`
|
||||||
|
- `vae_tiling`
|
||||||
|
- `cache`
|
||||||
|
- `cancel_queued`
|
||||||
|
- `cancel_generating`
|
||||||
|
|
||||||
|
Fields returned in `features_by_mode.vid_gen`:
|
||||||
|
|
||||||
|
- `init_image`
|
||||||
|
- `end_image`
|
||||||
|
- `control_frames`
|
||||||
|
- `high_noise_sample_params`
|
||||||
|
- `lora`
|
||||||
|
- `vae_tiling`
|
||||||
|
- `cache`
|
||||||
|
- `cancel_queued`
|
||||||
|
- `cancel_generating`
|
||||||
|
|
||||||
#### `POST /sdcpp/v1/img_gen`
|
#### `POST /sdcpp/v1/img_gen`
|
||||||
|
|
||||||
@ -521,9 +579,7 @@ Typical status codes:
|
|||||||
- `409 Conflict`
|
- `409 Conflict`
|
||||||
- `410 Gone`
|
- `410 Gone`
|
||||||
|
|
||||||
### Canonical Request Schema
|
### Request Body
|
||||||
|
|
||||||
The `sdcpp API` request body is the canonical native schema.
|
|
||||||
|
|
||||||
Example:
|
Example:
|
||||||
|
|
||||||
@ -612,7 +668,7 @@ Channel expectations:
|
|||||||
If omitted or null:
|
If omitted or null:
|
||||||
|
|
||||||
- single-image fields map to an empty `sd_image_t`
|
- single-image fields map to an empty `sd_image_t`
|
||||||
- array fields map to `nullptr + count = 0`
|
- array fields map to an empty C-style array, represented as `pointer = nullptr` and `count = 0`
|
||||||
|
|
||||||
### Field Mapping Summary
|
### Field Mapping Summary
|
||||||
|
|
||||||
@ -686,11 +742,11 @@ HTTP-only output fields:
|
|||||||
| `output_format` | `string` |
|
| `output_format` | `string` |
|
||||||
| `output_compression` | `integer` |
|
| `output_compression` | `integer` |
|
||||||
|
|
||||||
### Optional Field Semantics
|
### Optional Field Handling
|
||||||
|
|
||||||
Clients should preserve unset semantics for optional sampling fields.
|
Optional sampling fields may be omitted.
|
||||||
|
|
||||||
If a user has not explicitly provided one of these fields, the client should omit it instead of injecting a guessed fallback:
|
When omitted, backend defaults apply to these fields:
|
||||||
|
|
||||||
- `sample_params.scheduler`
|
- `sample_params.scheduler`
|
||||||
- `sample_params.sample_method`
|
- `sample_params.sample_method`
|
||||||
@ -766,29 +822,394 @@ Example cancelled job:
|
|||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
### Validation and Retention
|
### Submission Errors
|
||||||
|
|
||||||
Recommended behavior:
|
`POST /sdcpp/v1/img_gen` may return:
|
||||||
|
|
||||||
- malformed JSON returns `400`
|
- `202 Accepted` when the job is created
|
||||||
- invalid image payloads return `400`
|
- `400 Bad Request` for an empty body, unsupported model mode, invalid JSON, or invalid generation parameters
|
||||||
- invalid parameter structure returns `400`
|
- `429 Too Many Requests` when the job queue is full
|
||||||
- queue full returns `429` or `503`
|
- `500 Internal Server Error` for unexpected server exceptions during submission
|
||||||
- accepted runtime failures transition the job to `failed`
|
|
||||||
- unsupported in-progress cancellation may return `409`
|
|
||||||
|
|
||||||
Recommended retention controls:
|
### `vid_gen`
|
||||||
|
|
||||||
- pending job limit
|
The following section documents the native async contract for video generation.
|
||||||
- completed job TTL
|
|
||||||
- failed job TTL
|
|
||||||
|
|
||||||
### Future `vid_gen`
|
#### `POST /sdcpp/v1/vid_gen`
|
||||||
|
|
||||||
Future `vid_gen` should reuse the same async job model:
|
Submits an async video generation job.
|
||||||
|
|
||||||
- `POST /sdcpp/v1/vid_gen`
|
Successful submission returns `202 Accepted`.
|
||||||
- `GET /sdcpp/v1/jobs/{id}`
|
|
||||||
- `POST /sdcpp/v1/jobs/{id}/cancel`
|
|
||||||
|
|
||||||
Its request body should mirror `sd_vid_gen_params_t` in the same way that `img_gen` mirrors `sd_img_gen_params_t`.
|
Example response:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"id": "job_01HTXYZVID",
|
||||||
|
"kind": "vid_gen",
|
||||||
|
"status": "queued",
|
||||||
|
"created": 1775401200,
|
||||||
|
"poll_url": "/sdcpp/v1/jobs/job_01HTXYZVID"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
Response fields:
|
||||||
|
|
||||||
|
| Field | Type |
|
||||||
|
| --- | --- |
|
||||||
|
| `id` | `string` |
|
||||||
|
| `kind` | `string` |
|
||||||
|
| `status` | `string` |
|
||||||
|
| `created` | `integer` |
|
||||||
|
| `poll_url` | `string` |
|
||||||
|
|
||||||
|
### Request Body
|
||||||
|
|
||||||
|
Compared with `img_gen`, the `vid_gen` request body:
|
||||||
|
|
||||||
|
- `vid_gen` is a single video sequence job, so `batch_count` is not part of the request schema
|
||||||
|
- `ref_images`, `mask_image`, `control_image`, `control_strength`, and `embed_image_metadata` are not part of the request schema
|
||||||
|
- `vid_gen` adds `end_image`, `control_frames`, `high_noise_sample_params`, `video_frames`, `fps`, `moe_boundary`, and `vace_strength`
|
||||||
|
|
||||||
|
Example:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"prompt": "a cat walking through a rainy alley",
|
||||||
|
"negative_prompt": "",
|
||||||
|
"clip_skip": -1,
|
||||||
|
"width": 832,
|
||||||
|
"height": 480,
|
||||||
|
"strength": 0.75,
|
||||||
|
"seed": -1,
|
||||||
|
"video_frames": 33,
|
||||||
|
"fps": 16,
|
||||||
|
"moe_boundary": 0.875,
|
||||||
|
"vace_strength": 1.0,
|
||||||
|
|
||||||
|
"init_image": null,
|
||||||
|
"end_image": null,
|
||||||
|
"control_frames": [],
|
||||||
|
|
||||||
|
"sample_params": {
|
||||||
|
"scheduler": "discrete",
|
||||||
|
"sample_method": "euler",
|
||||||
|
"sample_steps": 28,
|
||||||
|
"eta": 1.0,
|
||||||
|
"shifted_timestep": 0,
|
||||||
|
"custom_sigmas": [],
|
||||||
|
"flow_shift": 0.0,
|
||||||
|
"guidance": {
|
||||||
|
"txt_cfg": 7.0,
|
||||||
|
"img_cfg": 7.0,
|
||||||
|
"distilled_guidance": 3.5,
|
||||||
|
"slg": {
|
||||||
|
"layers": [7, 8, 9],
|
||||||
|
"layer_start": 0.01,
|
||||||
|
"layer_end": 0.2,
|
||||||
|
"scale": 0.0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
|
||||||
|
"high_noise_sample_params": {
|
||||||
|
"scheduler": "discrete",
|
||||||
|
"sample_method": "euler",
|
||||||
|
"sample_steps": -1,
|
||||||
|
"eta": 1.0,
|
||||||
|
"shifted_timestep": 0,
|
||||||
|
"flow_shift": 0.0,
|
||||||
|
"guidance": {
|
||||||
|
"txt_cfg": 7.0,
|
||||||
|
"img_cfg": 7.0,
|
||||||
|
"distilled_guidance": 3.5,
|
||||||
|
"slg": {
|
||||||
|
"layers": [7, 8, 9],
|
||||||
|
"layer_start": 0.01,
|
||||||
|
"layer_end": 0.2,
|
||||||
|
"scale": 0.0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
|
||||||
|
"lora": [],
|
||||||
|
|
||||||
|
"vae_tiling_params": {
|
||||||
|
"enabled": false,
|
||||||
|
"tile_size_x": 0,
|
||||||
|
"tile_size_y": 0,
|
||||||
|
"target_overlap": 0.5,
|
||||||
|
"rel_size_x": 0.0,
|
||||||
|
"rel_size_y": 0.0
|
||||||
|
},
|
||||||
|
|
||||||
|
"cache_mode": "disabled",
|
||||||
|
"cache_option": "",
|
||||||
|
"scm_mask": "",
|
||||||
|
"scm_policy_dynamic": true,
|
||||||
|
|
||||||
|
"output_format": "webm",
|
||||||
|
"output_compression": 100
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### LoRA Rules
|
||||||
|
|
||||||
|
- The server only accepts explicit LoRA entries from the `lora` field.
|
||||||
|
- Prompt-embedded `<lora:...>` tags are intentionally unsupported.
|
||||||
|
- `lora[].is_high_noise` controls whether a LoRA applies only to the high-noise stage.
|
||||||
|
|
||||||
|
### Image and Frame Encoding Rules
|
||||||
|
|
||||||
|
Any image field accepts:
|
||||||
|
|
||||||
|
- a raw base64 string, or
|
||||||
|
- a data URL such as `data:image/png;base64,...`
|
||||||
|
|
||||||
|
Channel expectations:
|
||||||
|
|
||||||
|
- `init_image`: 3 channels
|
||||||
|
- `end_image`: 3 channels
|
||||||
|
- `control_frames[]`: 3 channels
|
||||||
|
|
||||||
|
Frame ordering rules:
|
||||||
|
|
||||||
|
- `control_frames[]` order is the conditioning frame order
|
||||||
|
- `control_frames[]` is preserved in request order
|
||||||
|
|
||||||
|
If omitted or null:
|
||||||
|
|
||||||
|
- single-image fields map to an empty `sd_image_t`
|
||||||
|
- array fields map to an empty C-style array, represented as `pointer = nullptr` and `count = 0`
|
||||||
|
|
||||||
|
### Field Mapping Summary
|
||||||
|
|
||||||
|
Top-level scalar fields:
|
||||||
|
|
||||||
|
| Field | Type |
|
||||||
|
| --- | --- |
|
||||||
|
| `prompt` | `string` |
|
||||||
|
| `negative_prompt` | `string` |
|
||||||
|
| `clip_skip` | `integer` |
|
||||||
|
| `width` | `integer` |
|
||||||
|
| `height` | `integer` |
|
||||||
|
| `strength` | `number` |
|
||||||
|
| `seed` | `integer` |
|
||||||
|
| `video_frames` | `integer` |
|
||||||
|
| `fps` | `integer` |
|
||||||
|
| `moe_boundary` | `number` |
|
||||||
|
| `vace_strength` | `number` |
|
||||||
|
|
||||||
|
Image and frame fields:
|
||||||
|
|
||||||
|
| Field | Type |
|
||||||
|
| --- | --- |
|
||||||
|
| `init_image` | `string \| null` |
|
||||||
|
| `end_image` | `string \| null` |
|
||||||
|
| `control_frames` | `array<string>` |
|
||||||
|
|
||||||
|
LoRA fields:
|
||||||
|
|
||||||
|
| Field | Type |
|
||||||
|
| --- | --- |
|
||||||
|
| `lora[].path` | `string` |
|
||||||
|
| `lora[].multiplier` | `number` |
|
||||||
|
| `lora[].is_high_noise` | `boolean` |
|
||||||
|
|
||||||
|
Sampling fields:
|
||||||
|
|
||||||
|
| Field | Type |
|
||||||
|
| --- | --- |
|
||||||
|
| `sample_params.scheduler` | `string` |
|
||||||
|
| `sample_params.sample_method` | `string` |
|
||||||
|
| `sample_params.sample_steps` | `integer` |
|
||||||
|
| `sample_params.eta` | `number` |
|
||||||
|
| `sample_params.shifted_timestep` | `integer` |
|
||||||
|
| `sample_params.custom_sigmas` | `array<number>` |
|
||||||
|
| `sample_params.flow_shift` | `number` |
|
||||||
|
| `sample_params.guidance.txt_cfg` | `number` |
|
||||||
|
| `sample_params.guidance.img_cfg` | `number` |
|
||||||
|
| `sample_params.guidance.distilled_guidance` | `number` |
|
||||||
|
| `sample_params.guidance.slg.layers` | `array<integer>` |
|
||||||
|
| `sample_params.guidance.slg.layer_start` | `number` |
|
||||||
|
| `sample_params.guidance.slg.layer_end` | `number` |
|
||||||
|
| `sample_params.guidance.slg.scale` | `number` |
|
||||||
|
|
||||||
|
High-noise sampling fields:
|
||||||
|
|
||||||
|
| Field | Type |
|
||||||
|
| --- | --- |
|
||||||
|
| `high_noise_sample_params.scheduler` | `string` |
|
||||||
|
| `high_noise_sample_params.sample_method` | `string` |
|
||||||
|
| `high_noise_sample_params.sample_steps` | `integer` |
|
||||||
|
| `high_noise_sample_params.eta` | `number` |
|
||||||
|
| `high_noise_sample_params.shifted_timestep` | `integer` |
|
||||||
|
| `high_noise_sample_params.flow_shift` | `number` |
|
||||||
|
| `high_noise_sample_params.guidance.txt_cfg` | `number` |
|
||||||
|
| `high_noise_sample_params.guidance.img_cfg` | `number` |
|
||||||
|
| `high_noise_sample_params.guidance.distilled_guidance` | `number` |
|
||||||
|
| `high_noise_sample_params.guidance.slg.layers` | `array<integer>` |
|
||||||
|
| `high_noise_sample_params.guidance.slg.layer_start` | `number` |
|
||||||
|
| `high_noise_sample_params.guidance.slg.layer_end` | `number` |
|
||||||
|
| `high_noise_sample_params.guidance.slg.scale` | `number` |
|
||||||
|
|
||||||
|
Other native fields:
|
||||||
|
|
||||||
|
| Field | Type |
|
||||||
|
| --- | --- |
|
||||||
|
| `vae_tiling_params` | `object` |
|
||||||
|
| `cache_mode` | `string` |
|
||||||
|
| `cache_option` | `string` |
|
||||||
|
| `scm_mask` | `string` |
|
||||||
|
| `scm_policy_dynamic` | `boolean` |
|
||||||
|
|
||||||
|
HTTP-only output fields:
|
||||||
|
|
||||||
|
| Field | Type |
|
||||||
|
| --- | --- |
|
||||||
|
| `output_format` | `string` |
|
||||||
|
| `output_compression` | `integer` |
|
||||||
|
|
||||||
|
For `vid_gen`, `output_format` and `output_compression` control container encoding.
|
||||||
|
`fps` is request metadata for the generated sequence and is echoed in the completed job result.
|
||||||
|
|
||||||
|
Allowed `output_format` values:
|
||||||
|
|
||||||
|
- `webm`
|
||||||
|
- `webp`
|
||||||
|
- `avi`
|
||||||
|
|
||||||
|
Output format behavior:
|
||||||
|
|
||||||
|
- `output_format` defaults to `webm`
|
||||||
|
- `webp` means animated WebP
|
||||||
|
- `avi` means MJPG AVI
|
||||||
|
- `webm` requires the server to be built with WebM support; otherwise the request returns `400`
|
||||||
|
|
||||||
|
### Result Payload
|
||||||
|
|
||||||
|
Completed jobs return one encoded container payload, not a list of per-frame images.
|
||||||
|
|
||||||
|
Result fields:
|
||||||
|
|
||||||
|
- `result.b64_json` contains the whole encoded container file as base64
|
||||||
|
- `result.mime_type` identifies the media type
|
||||||
|
- `result.output_format` echoes the selected container format
|
||||||
|
- `result.fps` echoes the effective playback FPS
|
||||||
|
- `result.frame_count` reports the actual decoded frame count used to build the container
|
||||||
|
|
||||||
|
Expected MIME types:
|
||||||
|
|
||||||
|
| `output_format` | `mime_type` |
|
||||||
|
| --- | --- |
|
||||||
|
| `webm` | `video/webm` |
|
||||||
|
| `webp` | `image/webp` |
|
||||||
|
| `avi` | `video/x-msvideo` |
|
||||||
|
|
||||||
|
### Optional Field Handling
|
||||||
|
|
||||||
|
Optional sampling fields may be omitted.
|
||||||
|
|
||||||
|
When omitted, backend defaults apply to these fields:
|
||||||
|
|
||||||
|
- `sample_params.scheduler`
|
||||||
|
- `sample_params.sample_method`
|
||||||
|
- `sample_params.eta`
|
||||||
|
- `sample_params.flow_shift`
|
||||||
|
- `sample_params.guidance.img_cfg`
|
||||||
|
- `high_noise_sample_params.scheduler`
|
||||||
|
- `high_noise_sample_params.sample_method`
|
||||||
|
- `high_noise_sample_params.eta`
|
||||||
|
- `high_noise_sample_params.flow_shift`
|
||||||
|
- `high_noise_sample_params.guidance.img_cfg`
|
||||||
|
|
||||||
|
`high_noise_sample_params` may also be omitted entirely.
|
||||||
|
|
||||||
|
### Frame Count Semantics
|
||||||
|
|
||||||
|
`video_frames` is the requested target length, but the current core video path internally normalizes the effective frame count to the largest `4n + 1` value that does not exceed the requested count.
|
||||||
|
|
||||||
|
Examples:
|
||||||
|
|
||||||
|
- `video_frames = 33` stays `33`
|
||||||
|
- `video_frames = 34` becomes `33`
|
||||||
|
- `video_frames = 32` becomes `29`
|
||||||
|
|
||||||
|
The completed job payload includes the actual decoded `frame_count`.
|
||||||
|
|
||||||
|
### Completion Result
|
||||||
|
|
||||||
|
Example completed job:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"id": "job_01HTXYZVID",
|
||||||
|
"kind": "vid_gen",
|
||||||
|
"status": "completed",
|
||||||
|
"created": 1775401200,
|
||||||
|
"started": 1775401203,
|
||||||
|
"completed": 1775401215,
|
||||||
|
"queue_position": 0,
|
||||||
|
"result": {
|
||||||
|
"output_format": "webm",
|
||||||
|
"mime_type": "video/webm",
|
||||||
|
"fps": 16,
|
||||||
|
"frame_count": 33,
|
||||||
|
"b64_json": "GkXfo59ChoEBQveBAULygQRC84EIQo..."
|
||||||
|
},
|
||||||
|
"error": null
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
The response returns the encoded `.webm`, animated `.webp`, or `.avi` container payload directly.
|
||||||
|
|
||||||
|
### Failure Result
|
||||||
|
|
||||||
|
Example failed job:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"id": "job_01HTXYZVID",
|
||||||
|
"kind": "vid_gen",
|
||||||
|
"status": "failed",
|
||||||
|
"created": 1775401200,
|
||||||
|
"started": 1775401203,
|
||||||
|
"completed": 1775401204,
|
||||||
|
"queue_position": 0,
|
||||||
|
"result": null,
|
||||||
|
"error": {
|
||||||
|
"code": "generation_failed",
|
||||||
|
"message": "generate_video returned no results"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### Cancelled Result
|
||||||
|
|
||||||
|
Example cancelled job:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"id": "job_01HTXYZVID",
|
||||||
|
"kind": "vid_gen",
|
||||||
|
"status": "cancelled",
|
||||||
|
"created": 1775401200,
|
||||||
|
"started": null,
|
||||||
|
"completed": 1775401202,
|
||||||
|
"queue_position": 0,
|
||||||
|
"result": null,
|
||||||
|
"error": {
|
||||||
|
"code": "cancelled",
|
||||||
|
"message": "job cancelled by client"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### Submission Errors
|
||||||
|
|
||||||
|
`POST /sdcpp/v1/vid_gen` may return:
|
||||||
|
|
||||||
|
- `202 Accepted` when the job is created
|
||||||
|
- `400 Bad Request` for an empty body, unsupported model mode, invalid JSON, invalid generation parameters, or an unsupported output format
|
||||||
|
- `429 Too Many Requests` when the job queue is full
|
||||||
|
- `500 Internal Server Error` for unexpected server exceptions during submission
|
||||||
|
|||||||
@ -95,6 +95,10 @@ bool cancel_queued_job(AsyncJobManager& manager, AsyncGenerationJob& job) {
|
|||||||
job.status = AsyncJobStatus::Cancelled;
|
job.status = AsyncJobStatus::Cancelled;
|
||||||
job.completed_at = unix_timestamp_now();
|
job.completed_at = unix_timestamp_now();
|
||||||
job.result_images_b64.clear();
|
job.result_images_b64.clear();
|
||||||
|
job.result_media_b64.clear();
|
||||||
|
job.result_media_mime_type.clear();
|
||||||
|
job.result_frame_count = 0;
|
||||||
|
job.result_fps = 0;
|
||||||
job.error_code = "cancelled";
|
job.error_code = "cancelled";
|
||||||
job.error_message = "job cancelled by client";
|
job.error_message = "job cancelled by client";
|
||||||
return true;
|
return true;
|
||||||
@ -122,6 +126,15 @@ json make_async_job_json(const AsyncJobManager& manager, const AsyncGenerationJo
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (job.status == AsyncJobStatus::Completed) {
|
if (job.status == AsyncJobStatus::Completed) {
|
||||||
|
if (job.kind == AsyncJobKind::VidGen) {
|
||||||
|
result["result"] = {
|
||||||
|
{"output_format", job.vid_gen.output_format},
|
||||||
|
{"mime_type", job.result_media_mime_type},
|
||||||
|
{"fps", job.result_fps},
|
||||||
|
{"frame_count", job.result_frame_count},
|
||||||
|
{"b64_json", job.result_media_b64},
|
||||||
|
};
|
||||||
|
} else {
|
||||||
json images = json::array();
|
json images = json::array();
|
||||||
for (size_t i = 0; i < job.result_images_b64.size(); ++i) {
|
for (size_t i = 0; i < job.result_images_b64.size(); ++i) {
|
||||||
images.push_back({{"index", i}, {"b64_json", job.result_images_b64[i]}});
|
images.push_back({{"index", i}, {"b64_json", job.result_images_b64[i]}});
|
||||||
@ -130,6 +143,7 @@ json make_async_job_json(const AsyncJobManager& manager, const AsyncGenerationJo
|
|||||||
{"output_format", job.img_gen.output_format},
|
{"output_format", job.img_gen.output_format},
|
||||||
{"images", images},
|
{"images", images},
|
||||||
};
|
};
|
||||||
|
}
|
||||||
result["error"] = nullptr;
|
result["error"] = nullptr;
|
||||||
} else if (job.status == AsyncJobStatus::Failed ||
|
} else if (job.status == AsyncJobStatus::Failed ||
|
||||||
job.status == AsyncJobStatus::Cancelled) {
|
job.status == AsyncJobStatus::Cancelled) {
|
||||||
@ -156,16 +170,15 @@ bool execute_img_gen_job(ServerRuntime& runtime,
|
|||||||
sd_img_gen_params_t params = job.img_gen.to_sd_img_gen_params_t();
|
sd_img_gen_params_t params = job.img_gen.to_sd_img_gen_params_t();
|
||||||
|
|
||||||
SDImageVec results;
|
SDImageVec results;
|
||||||
int num_results = 0;
|
|
||||||
|
|
||||||
{
|
{
|
||||||
std::lock_guard<std::mutex> lock(*runtime.sd_ctx_mutex);
|
std::lock_guard<std::mutex> lock(*runtime.sd_ctx_mutex);
|
||||||
sd_image_t* raw_results = generate_image(runtime.sd_ctx, ¶ms);
|
sd_image_t* raw_results = generate_image(runtime.sd_ctx, ¶ms);
|
||||||
num_results = params.batch_count;
|
results.adopt(raw_results, params.batch_count);
|
||||||
results.adopt(raw_results, num_results);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (results.empty() || num_results <= 0) {
|
const int num_results = results.count();
|
||||||
|
if (num_results <= 0) {
|
||||||
error_message = "generate_image returned no results";
|
error_message = "generate_image returned no results";
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
@ -208,6 +221,47 @@ bool execute_img_gen_job(ServerRuntime& runtime,
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool execute_vid_gen_job(ServerRuntime& runtime,
|
||||||
|
AsyncGenerationJob& job,
|
||||||
|
std::string& output_media_b64,
|
||||||
|
std::string& output_media_mime_type,
|
||||||
|
int& output_frame_count,
|
||||||
|
int& output_fps,
|
||||||
|
std::string& error_message) {
|
||||||
|
sd_vid_gen_params_t params = job.vid_gen.to_sd_vid_gen_params_t();
|
||||||
|
|
||||||
|
SDImageVec results;
|
||||||
|
int num_results = 0;
|
||||||
|
|
||||||
|
{
|
||||||
|
std::lock_guard<std::mutex> lock(*runtime.sd_ctx_mutex);
|
||||||
|
sd_image_t* raw_results = generate_video(runtime.sd_ctx, ¶ms, &num_results);
|
||||||
|
results.adopt(raw_results, num_results);
|
||||||
|
}
|
||||||
|
|
||||||
|
num_results = results.count();
|
||||||
|
if (num_results <= 0) {
|
||||||
|
error_message = "generate_video returned no results";
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<uint8_t> video_bytes = create_video_from_sd_images_to_vector(job.vid_gen.output_format,
|
||||||
|
results.data(),
|
||||||
|
num_results,
|
||||||
|
job.vid_gen.gen_params.fps,
|
||||||
|
job.vid_gen.output_compression);
|
||||||
|
if (video_bytes.empty()) {
|
||||||
|
error_message = "failed to encode generated video container";
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
output_media_b64 = base64_encode(video_bytes);
|
||||||
|
output_media_mime_type = video_mime_type(job.vid_gen.output_format);
|
||||||
|
output_frame_count = num_results;
|
||||||
|
output_fps = job.vid_gen.gen_params.fps;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
void async_job_worker(ServerRuntime& runtime) {
|
void async_job_worker(ServerRuntime& runtime) {
|
||||||
AsyncJobManager& manager = *runtime.async_job_manager;
|
AsyncJobManager& manager = *runtime.async_job_manager;
|
||||||
|
|
||||||
@ -240,11 +294,23 @@ void async_job_worker(ServerRuntime& runtime) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
std::vector<std::string> output_images;
|
std::vector<std::string> output_images;
|
||||||
|
std::string output_media_b64;
|
||||||
|
std::string output_media_mime_type;
|
||||||
|
int output_frame_count = 0;
|
||||||
|
int output_fps = 0;
|
||||||
std::string error_message;
|
std::string error_message;
|
||||||
bool ok = false;
|
bool ok = false;
|
||||||
|
|
||||||
if (job->kind == AsyncJobKind::ImgGen) {
|
if (job->kind == AsyncJobKind::ImgGen) {
|
||||||
ok = execute_img_gen_job(runtime, *job, output_images, error_message);
|
ok = execute_img_gen_job(runtime, *job, output_images, error_message);
|
||||||
|
} else if (job->kind == AsyncJobKind::VidGen) {
|
||||||
|
ok = execute_vid_gen_job(runtime,
|
||||||
|
*job,
|
||||||
|
output_media_b64,
|
||||||
|
output_media_mime_type,
|
||||||
|
output_frame_count,
|
||||||
|
output_fps,
|
||||||
|
error_message);
|
||||||
} else {
|
} else {
|
||||||
error_message = "unsupported job kind";
|
error_message = "unsupported job kind";
|
||||||
}
|
}
|
||||||
@ -260,6 +326,10 @@ void async_job_worker(ServerRuntime& runtime) {
|
|||||||
if (ok) {
|
if (ok) {
|
||||||
job->status = AsyncJobStatus::Completed;
|
job->status = AsyncJobStatus::Completed;
|
||||||
job->result_images_b64 = std::move(output_images);
|
job->result_images_b64 = std::move(output_images);
|
||||||
|
job->result_media_b64 = std::move(output_media_b64);
|
||||||
|
job->result_media_mime_type = std::move(output_media_mime_type);
|
||||||
|
job->result_frame_count = output_frame_count;
|
||||||
|
job->result_fps = output_fps;
|
||||||
job->error_code.clear();
|
job->error_code.clear();
|
||||||
job->error_message.clear();
|
job->error_message.clear();
|
||||||
} else {
|
} else {
|
||||||
@ -267,6 +337,10 @@ void async_job_worker(ServerRuntime& runtime) {
|
|||||||
job->error_code = "generation_failed";
|
job->error_code = "generation_failed";
|
||||||
job->error_message = error_message.empty() ? "unknown generation error" : error_message;
|
job->error_message = error_message.empty() ? "unknown generation error" : error_message;
|
||||||
job->result_images_b64.clear();
|
job->result_images_b64.clear();
|
||||||
|
job->result_media_b64.clear();
|
||||||
|
job->result_media_mime_type.clear();
|
||||||
|
job->result_frame_count = 0;
|
||||||
|
job->result_fps = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
purge_expired_jobs(manager);
|
purge_expired_jobs(manager);
|
||||||
|
|||||||
@ -36,7 +36,12 @@ struct AsyncGenerationJob {
|
|||||||
int64_t started_at = 0;
|
int64_t started_at = 0;
|
||||||
int64_t completed_at = 0;
|
int64_t completed_at = 0;
|
||||||
ImgGenJobRequest img_gen;
|
ImgGenJobRequest img_gen;
|
||||||
|
VidGenJobRequest vid_gen;
|
||||||
std::vector<std::string> result_images_b64;
|
std::vector<std::string> result_images_b64;
|
||||||
|
std::string result_media_b64;
|
||||||
|
std::string result_media_mime_type;
|
||||||
|
int result_frame_count = 0;
|
||||||
|
int result_fps = 0;
|
||||||
std::string error_code;
|
std::string error_code;
|
||||||
std::string error_message;
|
std::string error_message;
|
||||||
};
|
};
|
||||||
@ -63,4 +68,11 @@ bool execute_img_gen_job(ServerRuntime& runtime,
|
|||||||
AsyncGenerationJob& job,
|
AsyncGenerationJob& job,
|
||||||
std::vector<std::string>& output_images,
|
std::vector<std::string>& output_images,
|
||||||
std::string& error_message);
|
std::string& error_message);
|
||||||
|
bool execute_vid_gen_job(ServerRuntime& runtime,
|
||||||
|
AsyncGenerationJob& job,
|
||||||
|
std::string& output_media_b64,
|
||||||
|
std::string& output_media_mime_type,
|
||||||
|
int& output_frame_count,
|
||||||
|
int& output_fps,
|
||||||
|
std::string& error_message);
|
||||||
void async_job_worker(ServerRuntime& runtime);
|
void async_job_worker(ServerRuntime& runtime);
|
||||||
|
|||||||
@ -1 +1 @@
|
|||||||
Subproject commit 740475a7a6794dc07fb23e8ec5dc56e7e80aa8c1
|
Subproject commit 797ccf80825cc035508ba9b599b2a21953e7f835
|
||||||
@ -253,6 +253,12 @@ void register_openai_api_endpoints(httplib::Server& svr, ServerRuntime& rt) {
|
|||||||
|
|
||||||
svr.Post("/v1/images/generations", [runtime](const httplib::Request& req, httplib::Response& res) {
|
svr.Post("/v1/images/generations", [runtime](const httplib::Request& req, httplib::Response& res) {
|
||||||
try {
|
try {
|
||||||
|
if (!runtime_supports_generation_mode(*runtime, IMG_GEN)) {
|
||||||
|
res.status = 400;
|
||||||
|
res.set_content(json({{"error", unsupported_generation_mode_error(IMG_GEN)}}).dump(), "application/json");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
ImgGenJobRequest request;
|
ImgGenJobRequest request;
|
||||||
std::string error_message;
|
std::string error_message;
|
||||||
if (!build_openai_generation_request(req, *runtime, request, error_message)) {
|
if (!build_openai_generation_request(req, *runtime, request, error_message)) {
|
||||||
@ -319,6 +325,12 @@ void register_openai_api_endpoints(httplib::Server& svr, ServerRuntime& rt) {
|
|||||||
|
|
||||||
svr.Post("/v1/images/edits", [runtime](const httplib::Request& req, httplib::Response& res) {
|
svr.Post("/v1/images/edits", [runtime](const httplib::Request& req, httplib::Response& res) {
|
||||||
try {
|
try {
|
||||||
|
if (!runtime_supports_generation_mode(*runtime, IMG_GEN)) {
|
||||||
|
res.status = 400;
|
||||||
|
res.set_content(json({{"error", unsupported_generation_mode_error(IMG_GEN)}}).dump(), "application/json");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
ImgGenJobRequest request;
|
ImgGenJobRequest request;
|
||||||
std::string error_message;
|
std::string error_message;
|
||||||
if (!build_openai_edit_request(req, *runtime, request, error_message)) {
|
if (!build_openai_edit_request(req, *runtime, request, error_message)) {
|
||||||
|
|||||||
@ -246,6 +246,11 @@ void register_sdapi_endpoints(httplib::Server& svr, ServerRuntime& rt) {
|
|||||||
res.set_content(R"({"error":"empty body"})", "application/json");
|
res.set_content(R"({"error":"empty body"})", "application/json");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
if (!runtime_supports_generation_mode(*runtime, IMG_GEN)) {
|
||||||
|
res.status = 400;
|
||||||
|
res.set_content(json({{"error", unsupported_generation_mode_error(IMG_GEN)}}).dump(), "application/json");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
json j = json::parse(req.body);
|
json j = json::parse(req.body);
|
||||||
ImgGenJobRequest request;
|
ImgGenJobRequest request;
|
||||||
|
|||||||
@ -75,61 +75,9 @@ static fs::path resolve_display_model_path(const ServerRuntime& runtime) {
|
|||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
|
|
||||||
static json make_capabilities_json(ServerRuntime& runtime) {
|
static json make_sample_params_json(const sd_sample_params_t& sample_params, const std::vector<int>& skip_layers) {
|
||||||
refresh_lora_cache(runtime);
|
|
||||||
|
|
||||||
AsyncJobManager& manager = *runtime.async_job_manager;
|
|
||||||
const auto& defaults = *runtime.default_gen_params;
|
|
||||||
const auto& sample_params = defaults.sample_params;
|
|
||||||
const auto& guidance = sample_params.guidance;
|
const auto& guidance = sample_params.guidance;
|
||||||
const fs::path model_path = resolve_display_model_path(runtime);
|
return {
|
||||||
json samplers = json::array();
|
|
||||||
json schedulers = json::array();
|
|
||||||
json output_formats = json::array({"png", "jpeg"});
|
|
||||||
json available_loras = json::array();
|
|
||||||
|
|
||||||
for (int i = 0; i < SAMPLE_METHOD_COUNT; ++i) {
|
|
||||||
samplers.push_back(sd_sample_method_name((sample_method_t)i));
|
|
||||||
}
|
|
||||||
|
|
||||||
for (int i = 0; i < SCHEDULER_COUNT; ++i) {
|
|
||||||
schedulers.push_back(sd_scheduler_name((scheduler_t)i));
|
|
||||||
}
|
|
||||||
|
|
||||||
#ifdef SD_USE_WEBP
|
|
||||||
output_formats.push_back("webp");
|
|
||||||
#endif
|
|
||||||
|
|
||||||
{
|
|
||||||
std::lock_guard<std::mutex> lock(*runtime.lora_mutex);
|
|
||||||
for (const auto& entry : *runtime.lora_cache) {
|
|
||||||
available_loras.push_back({
|
|
||||||
{"name", entry.name},
|
|
||||||
{"path", entry.path},
|
|
||||||
});
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
json result;
|
|
||||||
result["model"] = {
|
|
||||||
{"name", model_path.filename().u8string()},
|
|
||||||
{"stem", model_path.stem().u8string()},
|
|
||||||
{"path", model_path.u8string()},
|
|
||||||
};
|
|
||||||
result["defaults"] = {
|
|
||||||
{"prompt", defaults.prompt},
|
|
||||||
{"negative_prompt", defaults.negative_prompt},
|
|
||||||
{"clip_skip", defaults.clip_skip},
|
|
||||||
{"width", defaults.width > 0 ? defaults.width : 512},
|
|
||||||
{"height", defaults.height > 0 ? defaults.height : 512},
|
|
||||||
{"strength", defaults.strength},
|
|
||||||
{"seed", defaults.seed},
|
|
||||||
{"batch_count", defaults.batch_count},
|
|
||||||
{"auto_resize_ref_image", defaults.auto_resize_ref_image},
|
|
||||||
{"increase_ref_index", defaults.increase_ref_index},
|
|
||||||
{"control_strength", defaults.control_strength},
|
|
||||||
{"sample_params",
|
|
||||||
{
|
|
||||||
{"scheduler", capability_scheduler_name(sample_params.scheduler)},
|
{"scheduler", capability_scheduler_name(sample_params.scheduler)},
|
||||||
{"sample_method", capability_sample_method_name(sample_params.sample_method)},
|
{"sample_method", capability_sample_method_name(sample_params.sample_method)},
|
||||||
{"sample_steps", sample_params.sample_steps},
|
{"sample_steps", sample_params.sample_steps},
|
||||||
@ -143,33 +91,66 @@ static json make_capabilities_json(ServerRuntime& runtime) {
|
|||||||
{"distilled_guidance", guidance.distilled_guidance},
|
{"distilled_guidance", guidance.distilled_guidance},
|
||||||
{"slg",
|
{"slg",
|
||||||
{
|
{
|
||||||
{"layers", defaults.skip_layers},
|
{"layers", skip_layers},
|
||||||
{"layer_start", guidance.slg.layer_start},
|
{"layer_start", guidance.slg.layer_start},
|
||||||
{"layer_end", guidance.slg.layer_end},
|
{"layer_end", guidance.slg.layer_end},
|
||||||
{"scale", guidance.slg.scale},
|
{"scale", guidance.slg.scale},
|
||||||
}},
|
}},
|
||||||
}},
|
}},
|
||||||
}},
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
static json make_img_gen_defaults_json(const SDGenerationParams& defaults, const std::string& output_format) {
|
||||||
|
return {
|
||||||
|
{"prompt", defaults.prompt},
|
||||||
|
{"negative_prompt", defaults.negative_prompt},
|
||||||
|
{"clip_skip", defaults.clip_skip},
|
||||||
|
{"width", defaults.width > 0 ? defaults.width : 512},
|
||||||
|
{"height", defaults.height > 0 ? defaults.height : 512},
|
||||||
|
{"strength", defaults.strength},
|
||||||
|
{"seed", defaults.seed},
|
||||||
|
{"batch_count", defaults.batch_count},
|
||||||
|
{"auto_resize_ref_image", defaults.auto_resize_ref_image},
|
||||||
|
{"increase_ref_index", defaults.increase_ref_index},
|
||||||
|
{"control_strength", defaults.control_strength},
|
||||||
|
{"sample_params", make_sample_params_json(defaults.sample_params, defaults.skip_layers)},
|
||||||
{"vae_tiling_params", make_vae_tiling_json(defaults.vae_tiling_params)},
|
{"vae_tiling_params", make_vae_tiling_json(defaults.vae_tiling_params)},
|
||||||
{"cache_mode", defaults.cache_mode},
|
{"cache_mode", defaults.cache_mode},
|
||||||
{"cache_option", defaults.cache_option},
|
{"cache_option", defaults.cache_option},
|
||||||
{"scm_mask", defaults.scm_mask},
|
{"scm_mask", defaults.scm_mask},
|
||||||
{"scm_policy_dynamic", defaults.scm_policy_dynamic},
|
{"scm_policy_dynamic", defaults.scm_policy_dynamic},
|
||||||
{"output_format", "png"},
|
{"output_format", output_format},
|
||||||
{"output_compression", 100},
|
{"output_compression", 100},
|
||||||
};
|
};
|
||||||
result["limits"] = {
|
}
|
||||||
{"min_width", 64},
|
|
||||||
{"max_width", 4096},
|
static json make_vid_gen_defaults_json(const SDGenerationParams& defaults, const std::string& output_format) {
|
||||||
{"min_height", 64},
|
return {
|
||||||
{"max_height", 4096},
|
{"prompt", defaults.prompt},
|
||||||
{"max_batch_count", 8},
|
{"negative_prompt", defaults.negative_prompt},
|
||||||
{"max_queue_size", manager.max_pending_jobs},
|
{"clip_skip", defaults.clip_skip},
|
||||||
|
{"width", defaults.width > 0 ? defaults.width : 512},
|
||||||
|
{"height", defaults.height > 0 ? defaults.height : 512},
|
||||||
|
{"strength", defaults.strength},
|
||||||
|
{"seed", defaults.seed},
|
||||||
|
{"video_frames", defaults.video_frames},
|
||||||
|
{"fps", defaults.fps},
|
||||||
|
{"moe_boundary", defaults.moe_boundary},
|
||||||
|
{"vace_strength", defaults.vace_strength},
|
||||||
|
{"sample_params", make_sample_params_json(defaults.sample_params, defaults.skip_layers)},
|
||||||
|
{"high_noise_sample_params", make_sample_params_json(defaults.high_noise_sample_params, defaults.high_noise_skip_layers)},
|
||||||
|
{"vae_tiling_params", make_vae_tiling_json(defaults.vae_tiling_params)},
|
||||||
|
{"cache_mode", defaults.cache_mode},
|
||||||
|
{"cache_option", defaults.cache_option},
|
||||||
|
{"scm_mask", defaults.scm_mask},
|
||||||
|
{"scm_policy_dynamic", defaults.scm_policy_dynamic},
|
||||||
|
{"output_format", output_format},
|
||||||
|
{"output_compression", 100},
|
||||||
};
|
};
|
||||||
result["samplers"] = samplers;
|
}
|
||||||
result["schedulers"] = schedulers;
|
|
||||||
result["output_formats"] = output_formats;
|
static json make_img_gen_features_json() {
|
||||||
result["features"] = {
|
return {
|
||||||
{"init_image", true},
|
{"init_image", true},
|
||||||
{"mask_image", true},
|
{"mask_image", true},
|
||||||
{"control_image", true},
|
{"control_image", true},
|
||||||
@ -180,6 +161,128 @@ static json make_capabilities_json(ServerRuntime& runtime) {
|
|||||||
{"cancel_queued", true},
|
{"cancel_queued", true},
|
||||||
{"cancel_generating", false},
|
{"cancel_generating", false},
|
||||||
};
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
static json make_vid_gen_features_json() {
|
||||||
|
return {
|
||||||
|
{"init_image", true},
|
||||||
|
{"end_image", true},
|
||||||
|
{"control_frames", true},
|
||||||
|
{"high_noise_sample_params", true},
|
||||||
|
{"lora", true},
|
||||||
|
{"vae_tiling", true},
|
||||||
|
{"cache", true},
|
||||||
|
{"cancel_queued", true},
|
||||||
|
{"cancel_generating", false},
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
static json make_capabilities_json(ServerRuntime& runtime) {
|
||||||
|
refresh_lora_cache(runtime);
|
||||||
|
|
||||||
|
AsyncJobManager& manager = *runtime.async_job_manager;
|
||||||
|
const auto& defaults = *runtime.default_gen_params;
|
||||||
|
const fs::path model_path = resolve_display_model_path(runtime);
|
||||||
|
const bool supports_img = runtime_supports_generation_mode(runtime, IMG_GEN);
|
||||||
|
const bool supports_vid = runtime_supports_generation_mode(runtime, VID_GEN);
|
||||||
|
json samplers = json::array();
|
||||||
|
json schedulers = json::array();
|
||||||
|
json image_output_formats = supported_img_output_formats();
|
||||||
|
json video_output_formats = supported_vid_output_formats();
|
||||||
|
json available_loras = json::array();
|
||||||
|
json supported_modes = json::array();
|
||||||
|
|
||||||
|
for (int i = 0; i < SAMPLE_METHOD_COUNT; ++i) {
|
||||||
|
samplers.push_back(sd_sample_method_name((sample_method_t)i));
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = 0; i < SCHEDULER_COUNT; ++i) {
|
||||||
|
schedulers.push_back(sd_scheduler_name((scheduler_t)i));
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
std::lock_guard<std::mutex> lock(*runtime.lora_mutex);
|
||||||
|
for (const auto& entry : *runtime.lora_cache) {
|
||||||
|
available_loras.push_back({
|
||||||
|
{"name", entry.name},
|
||||||
|
{"path", entry.path},
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (supports_img) {
|
||||||
|
supported_modes.push_back("img_gen");
|
||||||
|
}
|
||||||
|
if (supports_vid) {
|
||||||
|
supported_modes.push_back("vid_gen");
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string default_img_output_format = "png";
|
||||||
|
std::string default_vid_output_format = "avi";
|
||||||
|
if (!image_output_formats.empty()) {
|
||||||
|
default_img_output_format = image_output_formats[0].get<std::string>();
|
||||||
|
}
|
||||||
|
if (!video_output_formats.empty()) {
|
||||||
|
default_vid_output_format = video_output_formats[0].get<std::string>();
|
||||||
|
}
|
||||||
|
|
||||||
|
json defaults_by_mode = json::object();
|
||||||
|
json output_formats_by_mode = json::object();
|
||||||
|
json features_by_mode = json::object();
|
||||||
|
if (supports_img) {
|
||||||
|
defaults_by_mode["img_gen"] = make_img_gen_defaults_json(defaults, default_img_output_format);
|
||||||
|
output_formats_by_mode["img_gen"] = image_output_formats;
|
||||||
|
features_by_mode["img_gen"] = make_img_gen_features_json();
|
||||||
|
}
|
||||||
|
if (supports_vid) {
|
||||||
|
defaults_by_mode["vid_gen"] = make_vid_gen_defaults_json(defaults, default_vid_output_format);
|
||||||
|
output_formats_by_mode["vid_gen"] = video_output_formats;
|
||||||
|
features_by_mode["vid_gen"] = make_vid_gen_features_json();
|
||||||
|
}
|
||||||
|
|
||||||
|
json top_level_defaults = json::object();
|
||||||
|
json top_level_output_formats = json::array();
|
||||||
|
json top_level_features = {
|
||||||
|
{"cancel_queued", true},
|
||||||
|
{"cancel_generating", false},
|
||||||
|
};
|
||||||
|
std::string current_mode = "";
|
||||||
|
if (supports_img) {
|
||||||
|
current_mode = "img_gen";
|
||||||
|
top_level_defaults = defaults_by_mode["img_gen"];
|
||||||
|
top_level_output_formats = output_formats_by_mode["img_gen"];
|
||||||
|
top_level_features = features_by_mode["img_gen"];
|
||||||
|
} else if (supports_vid) {
|
||||||
|
current_mode = "vid_gen";
|
||||||
|
top_level_defaults = defaults_by_mode["vid_gen"];
|
||||||
|
top_level_output_formats = output_formats_by_mode["vid_gen"];
|
||||||
|
top_level_features = features_by_mode["vid_gen"];
|
||||||
|
}
|
||||||
|
|
||||||
|
json result;
|
||||||
|
result["model"] = {
|
||||||
|
{"name", model_path.filename().u8string()},
|
||||||
|
{"stem", model_path.stem().u8string()},
|
||||||
|
{"path", model_path.u8string()},
|
||||||
|
};
|
||||||
|
result["current_mode"] = current_mode;
|
||||||
|
result["supported_modes"] = supported_modes;
|
||||||
|
result["defaults"] = top_level_defaults;
|
||||||
|
result["defaults_by_mode"] = defaults_by_mode;
|
||||||
|
result["limits"] = {
|
||||||
|
{"min_width", 64},
|
||||||
|
{"max_width", 4096},
|
||||||
|
{"min_height", 64},
|
||||||
|
{"max_height", 4096},
|
||||||
|
{"max_batch_count", 8},
|
||||||
|
{"max_queue_size", manager.max_pending_jobs},
|
||||||
|
};
|
||||||
|
result["samplers"] = samplers;
|
||||||
|
result["schedulers"] = schedulers;
|
||||||
|
result["output_formats"] = top_level_output_formats;
|
||||||
|
result["output_formats_by_mode"] = output_formats_by_mode;
|
||||||
|
result["features"] = top_level_features;
|
||||||
|
result["features_by_mode"] = features_by_mode;
|
||||||
result["loras"] = available_loras;
|
result["loras"] = available_loras;
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
@ -211,6 +314,33 @@ static bool parse_img_gen_request(const json& body,
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool parse_vid_gen_request(const json& body,
|
||||||
|
ServerRuntime& runtime,
|
||||||
|
VidGenJobRequest& request,
|
||||||
|
std::string& error_message) {
|
||||||
|
request.gen_params = *runtime.default_gen_params;
|
||||||
|
|
||||||
|
refresh_lora_cache(runtime);
|
||||||
|
if (!request.gen_params.from_json_str(body.dump(), [&](const std::string& path) {
|
||||||
|
return get_lora_full_path(runtime, path);
|
||||||
|
})) {
|
||||||
|
error_message = "invalid generation parameters";
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string output_format = body.value("output_format", "webm");
|
||||||
|
int output_compression = body.value("output_compression", 100);
|
||||||
|
if (!assign_output_options(request, output_format, output_compression, error_message)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
// Intentionally disable prompt-embedded LoRA tag parsing for server APIs.
|
||||||
|
if (!request.gen_params.resolve_and_validate(VID_GEN, "", true)) {
|
||||||
|
error_message = "invalid generation parameters";
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
void register_sdcpp_api_endpoints(httplib::Server& svr, ServerRuntime& rt) {
|
void register_sdcpp_api_endpoints(httplib::Server& svr, ServerRuntime& rt) {
|
||||||
ServerRuntime* runtime = &rt;
|
ServerRuntime* runtime = &rt;
|
||||||
|
|
||||||
@ -226,6 +356,11 @@ void register_sdcpp_api_endpoints(httplib::Server& svr, ServerRuntime& rt) {
|
|||||||
res.set_content(R"({"error":"empty body"})", "application/json");
|
res.set_content(R"({"error":"empty body"})", "application/json");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
if (!runtime_supports_generation_mode(*runtime, IMG_GEN)) {
|
||||||
|
res.status = 400;
|
||||||
|
res.set_content(json({{"error", unsupported_generation_mode_error(IMG_GEN)}}).dump(), "application/json");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
json body = json::parse(req.body);
|
json body = json::parse(req.body);
|
||||||
ImgGenJobRequest request;
|
ImgGenJobRequest request;
|
||||||
@ -276,9 +411,66 @@ void register_sdcpp_api_endpoints(httplib::Server& svr, ServerRuntime& rt) {
|
|||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
svr.Post("/sdcpp/v1/vid_gen", [](const httplib::Request&, httplib::Response& res) {
|
svr.Post("/sdcpp/v1/vid_gen", [runtime](const httplib::Request& req, httplib::Response& res) {
|
||||||
res.status = 501;
|
try {
|
||||||
res.set_content(R"({"error":"vid_gen is reserved and not implemented yet"})", "application/json");
|
if (req.body.empty()) {
|
||||||
|
res.status = 400;
|
||||||
|
res.set_content(R"({"error":"empty body"})", "application/json");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (!runtime_supports_generation_mode(*runtime, VID_GEN)) {
|
||||||
|
res.status = 400;
|
||||||
|
res.set_content(json({{"error", unsupported_generation_mode_error(VID_GEN)}}).dump(), "application/json");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
json body = json::parse(req.body);
|
||||||
|
VidGenJobRequest request;
|
||||||
|
std::string error_message;
|
||||||
|
if (!parse_vid_gen_request(body, *runtime, request, error_message)) {
|
||||||
|
res.status = 400;
|
||||||
|
res.set_content(json({{"error", error_message}}).dump(), "application/json");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
AsyncJobManager& manager = *runtime->async_job_manager;
|
||||||
|
std::shared_ptr<AsyncGenerationJob> job = std::make_shared<AsyncGenerationJob>();
|
||||||
|
job->kind = AsyncJobKind::VidGen;
|
||||||
|
job->status = AsyncJobStatus::Queued;
|
||||||
|
job->created_at = unix_timestamp_now();
|
||||||
|
job->vid_gen = std::move(request);
|
||||||
|
|
||||||
|
{
|
||||||
|
std::lock_guard<std::mutex> lock(manager.mutex);
|
||||||
|
purge_expired_jobs(manager);
|
||||||
|
if (count_pending_jobs(manager) >= manager.max_pending_jobs) {
|
||||||
|
res.status = 429;
|
||||||
|
res.set_content(R"({"error":"job queue is full"})", "application/json");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
job->id = make_async_job_id(manager);
|
||||||
|
manager.jobs[job->id] = job;
|
||||||
|
manager.queue.push_back(job->id);
|
||||||
|
}
|
||||||
|
|
||||||
|
manager.cv.notify_one();
|
||||||
|
|
||||||
|
json out;
|
||||||
|
out["id"] = job->id;
|
||||||
|
out["kind"] = async_job_kind_name(job->kind);
|
||||||
|
out["status"] = async_job_status_name(job->status);
|
||||||
|
out["created"] = job->created_at;
|
||||||
|
out["poll_url"] = "/sdcpp/v1/jobs/" + job->id;
|
||||||
|
|
||||||
|
res.status = 202;
|
||||||
|
res.set_content(out.dump(), "application/json");
|
||||||
|
} catch (const json::parse_error& e) {
|
||||||
|
res.status = 400;
|
||||||
|
res.set_content(json({{"error", "invalid json"}, {"message", e.what()}}).dump(), "application/json");
|
||||||
|
} catch (const std::exception& e) {
|
||||||
|
res.status = 500;
|
||||||
|
res.set_content(json({{"error", "server_error"}, {"message", e.what()}}).dump(), "application/json");
|
||||||
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
svr.Get(R"(/sdcpp/v1/jobs/([A-Za-z0-9_\-]+))", [runtime](const httplib::Request& req, httplib::Response& res) {
|
svr.Get(R"(/sdcpp/v1/jobs/([A-Za-z0-9_\-]+))", [runtime](const httplib::Request& req, httplib::Response& res) {
|
||||||
|
|||||||
@ -45,6 +45,44 @@ std::string normalize_output_format(std::string output_format) {
|
|||||||
return output_format;
|
return output_format;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::vector<std::string> supported_img_output_formats(bool allow_webp) {
|
||||||
|
std::vector<std::string> formats = {"png", "jpeg"};
|
||||||
|
#ifdef SD_USE_WEBP
|
||||||
|
if (allow_webp) {
|
||||||
|
formats.push_back("webp");
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
(void)allow_webp;
|
||||||
|
#endif
|
||||||
|
return formats;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<std::string> supported_vid_output_formats() {
|
||||||
|
std::vector<std::string> formats;
|
||||||
|
#ifdef SD_USE_WEBM
|
||||||
|
formats.push_back("webm");
|
||||||
|
#endif
|
||||||
|
#ifdef SD_USE_WEBP
|
||||||
|
formats.push_back("webp");
|
||||||
|
#endif
|
||||||
|
formats.push_back("avi");
|
||||||
|
return formats;
|
||||||
|
}
|
||||||
|
|
||||||
|
static std::string valid_vid_output_formats_message() {
|
||||||
|
const std::vector<std::string> formats = supported_vid_output_formats();
|
||||||
|
|
||||||
|
std::string message = "invalid output_format, must be one of [";
|
||||||
|
for (size_t i = 0; i < formats.size(); ++i) {
|
||||||
|
if (i > 0) {
|
||||||
|
message += ", ";
|
||||||
|
}
|
||||||
|
message += formats[i];
|
||||||
|
}
|
||||||
|
message += "]";
|
||||||
|
return message;
|
||||||
|
}
|
||||||
|
|
||||||
bool assign_output_options(ImgGenJobRequest& request,
|
bool assign_output_options(ImgGenJobRequest& request,
|
||||||
std::string output_format,
|
std::string output_format,
|
||||||
int output_compression,
|
int output_compression,
|
||||||
@ -53,19 +91,88 @@ bool assign_output_options(ImgGenJobRequest& request,
|
|||||||
request.output_format = normalize_output_format(std::move(output_format));
|
request.output_format = normalize_output_format(std::move(output_format));
|
||||||
request.output_compression = std::clamp(output_compression, 0, 100);
|
request.output_compression = std::clamp(output_compression, 0, 100);
|
||||||
|
|
||||||
const bool valid_format = request.output_format == "png" ||
|
const std::vector<std::string> valid_formats = supported_img_output_formats(allow_webp);
|
||||||
request.output_format == "jpeg" ||
|
const bool valid_format = std::find(valid_formats.begin(),
|
||||||
(allow_webp && request.output_format == "webp");
|
valid_formats.end(),
|
||||||
|
request.output_format) != valid_formats.end();
|
||||||
if (!valid_format) {
|
if (!valid_format) {
|
||||||
error_message = allow_webp
|
error_message = "invalid output_format, must be one of [";
|
||||||
? "invalid output_format, must be one of [png, jpeg, webp]"
|
for (size_t i = 0; i < valid_formats.size(); ++i) {
|
||||||
: "invalid output_format, must be one of [png, jpeg]";
|
if (i > 0) {
|
||||||
|
error_message += ", ";
|
||||||
|
}
|
||||||
|
error_message += valid_formats[i];
|
||||||
|
}
|
||||||
|
error_message += "]";
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool assign_output_options(VidGenJobRequest& request,
|
||||||
|
std::string output_format,
|
||||||
|
int output_compression,
|
||||||
|
std::string& error_message) {
|
||||||
|
request.output_format = normalize_output_format(std::move(output_format));
|
||||||
|
request.output_compression = std::clamp(output_compression, 0, 100);
|
||||||
|
|
||||||
|
if (request.output_format == "avi") {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (request.output_format == "webm") {
|
||||||
|
#ifdef SD_USE_WEBM
|
||||||
|
return true;
|
||||||
|
#else
|
||||||
|
error_message = valid_vid_output_formats_message();
|
||||||
|
return false;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
if (request.output_format == "webp") {
|
||||||
|
#ifdef SD_USE_WEBP
|
||||||
|
return true;
|
||||||
|
#else
|
||||||
|
error_message = valid_vid_output_formats_message();
|
||||||
|
return false;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
error_message = valid_vid_output_formats_message();
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string video_mime_type(const std::string& output_format) {
|
||||||
|
if (output_format == "webm") {
|
||||||
|
return "video/webm";
|
||||||
|
}
|
||||||
|
if (output_format == "webp") {
|
||||||
|
return "image/webp";
|
||||||
|
}
|
||||||
|
return "video/x-msvideo";
|
||||||
|
}
|
||||||
|
|
||||||
|
bool runtime_supports_generation_mode(const ServerRuntime& runtime, SDMode mode) {
|
||||||
|
if (mode == VID_GEN) {
|
||||||
|
return sd_ctx_supports_video_generation(runtime.sd_ctx);
|
||||||
|
}
|
||||||
|
if (mode == IMG_GEN) {
|
||||||
|
return sd_ctx_supports_image_generation(runtime.sd_ctx);
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string unsupported_generation_mode_error(SDMode mode) {
|
||||||
|
if (mode == VID_GEN) {
|
||||||
|
return "loaded model does not support vid_gen";
|
||||||
|
}
|
||||||
|
if (mode == IMG_GEN) {
|
||||||
|
return "loaded model does not support img_gen";
|
||||||
|
}
|
||||||
|
return "loaded model does not support requested mode";
|
||||||
|
}
|
||||||
|
|
||||||
ArgOptions SDSvrParams::get_options() {
|
ArgOptions SDSvrParams::get_options() {
|
||||||
ArgOptions options;
|
ArgOptions options;
|
||||||
|
|
||||||
|
|||||||
@ -58,13 +58,32 @@ struct ImgGenJobRequest {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct VidGenJobRequest {
|
||||||
|
SDGenerationParams gen_params;
|
||||||
|
std::string output_format = "webm";
|
||||||
|
int output_compression = 100;
|
||||||
|
|
||||||
|
sd_vid_gen_params_t to_sd_vid_gen_params_t() {
|
||||||
|
return gen_params.to_sd_vid_gen_params_t();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
std::string base64_encode(const std::vector<uint8_t>& bytes);
|
std::string base64_encode(const std::vector<uint8_t>& bytes);
|
||||||
std::string normalize_output_format(std::string output_format);
|
std::string normalize_output_format(std::string output_format);
|
||||||
|
std::vector<std::string> supported_img_output_formats(bool allow_webp = true);
|
||||||
|
std::vector<std::string> supported_vid_output_formats();
|
||||||
bool assign_output_options(ImgGenJobRequest& request,
|
bool assign_output_options(ImgGenJobRequest& request,
|
||||||
std::string output_format,
|
std::string output_format,
|
||||||
int output_compression,
|
int output_compression,
|
||||||
bool allow_webp,
|
bool allow_webp,
|
||||||
std::string& error_message);
|
std::string& error_message);
|
||||||
|
bool assign_output_options(VidGenJobRequest& request,
|
||||||
|
std::string output_format,
|
||||||
|
int output_compression,
|
||||||
|
std::string& error_message);
|
||||||
|
std::string video_mime_type(const std::string& output_format);
|
||||||
|
bool runtime_supports_generation_mode(const ServerRuntime& runtime, SDMode mode);
|
||||||
|
std::string unsupported_generation_mode_error(SDMode mode);
|
||||||
void refresh_lora_cache(ServerRuntime& rt);
|
void refresh_lora_cache(ServerRuntime& rt);
|
||||||
std::string get_lora_full_path(ServerRuntime& rt, const std::string& path);
|
std::string get_lora_full_path(ServerRuntime& rt, const std::string& path);
|
||||||
int64_t unix_timestamp_now();
|
int64_t unix_timestamp_now();
|
||||||
|
|||||||
@ -348,6 +348,8 @@ SD_API void sd_set_progress_callback(sd_progress_cb_t cb, void* data);
|
|||||||
SD_API void sd_set_preview_callback(sd_preview_cb_t cb, enum preview_t mode, int interval, bool denoised, bool noisy, void* data);
|
SD_API void sd_set_preview_callback(sd_preview_cb_t cb, enum preview_t mode, int interval, bool denoised, bool noisy, void* data);
|
||||||
SD_API int32_t sd_get_num_physical_cores();
|
SD_API int32_t sd_get_num_physical_cores();
|
||||||
SD_API const char* sd_get_system_info();
|
SD_API const char* sd_get_system_info();
|
||||||
|
SD_API bool sd_ctx_supports_image_generation(const sd_ctx_t* sd_ctx);
|
||||||
|
SD_API bool sd_ctx_supports_video_generation(const sd_ctx_t* sd_ctx);
|
||||||
|
|
||||||
SD_API const char* sd_type_name(enum sd_type_t type);
|
SD_API const char* sd_type_name(enum sd_type_t type);
|
||||||
SD_API enum sd_type_t str_to_sd_type(const char* str);
|
SD_API enum sd_type_t str_to_sd_type(const char* str);
|
||||||
|
|||||||
104
src/denoiser.hpp
104
src/denoiser.hpp
@ -953,8 +953,9 @@ static sd::Tensor<float> sample_dpmpp_2s_ancestral(denoise_cb_t model,
|
|||||||
float t_next = t_fn(sigma_down);
|
float t_next = t_fn(sigma_down);
|
||||||
float h = t_next - t;
|
float h = t_next - t;
|
||||||
float s = t + 0.5f * h;
|
float s = t + 0.5f * h;
|
||||||
sd::Tensor<float> x2 = (sigma_fn(s) / sigma_fn(t)) * x - (exp(-h * 0.5f) - 1) * denoised;
|
float sigma_s = sigma_fn(s);
|
||||||
auto denoised2_opt = model(x2, sigmas[i + 1], i + 1);
|
sd::Tensor<float> x2 = (sigma_s / sigma_fn(t)) * x - (exp(-h * 0.5f) - 1) * denoised;
|
||||||
|
auto denoised2_opt = model(x2, sigma_s, i + 1);
|
||||||
if (denoised2_opt.empty()) {
|
if (denoised2_opt.empty()) {
|
||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
@ -969,6 +970,102 @@ static sd::Tensor<float> sample_dpmpp_2s_ancestral(denoise_cb_t model,
|
|||||||
return x;
|
return x;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static sd::Tensor<float> sample_dpmpp_2s_ancestral_flow(denoise_cb_t model,
|
||||||
|
sd::Tensor<float> x,
|
||||||
|
const std::vector<float>& sigmas,
|
||||||
|
std::shared_ptr<RNG> rng,
|
||||||
|
float eta = 1.0f) {
|
||||||
|
int steps = static_cast<int>(sigmas.size()) - 1;
|
||||||
|
for (int i = 0; i < steps; i++) {
|
||||||
|
float sigma = sigmas[i];
|
||||||
|
float sigma_to = sigmas[i + 1];
|
||||||
|
|
||||||
|
bool opt_first_step = (1.0 - sigma < 1e-6);
|
||||||
|
|
||||||
|
auto denoised_opt = model(x, sigma, (opt_first_step ? 1 : -1) * (i + 1));
|
||||||
|
if (denoised_opt.empty()) {
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
sd::Tensor<float> denoised = std::move(denoised_opt);
|
||||||
|
|
||||||
|
if (sigma_to == 0.0f) {
|
||||||
|
// Euler method (final step, no noise)
|
||||||
|
// sigma_to == 0 --> sigma_down = 0, so:
|
||||||
|
// x + d * (sigma_down - sigma)
|
||||||
|
// = x + ((x - denoised) / sigma) * (sigma_down - sigma)
|
||||||
|
// = x + ((x - denoised) / sigma) * ( 0 - sigma)
|
||||||
|
// = x + ((x - denoised) ) * -1
|
||||||
|
// = x -x + denoised
|
||||||
|
x = denoised;
|
||||||
|
|
||||||
|
} else {
|
||||||
|
auto [sigma_down, sigma_up, alpha_scale] = get_ancestral_step_flow(sigma, sigma_to, eta);
|
||||||
|
sd::Tensor<float> D_i;
|
||||||
|
|
||||||
|
if (opt_first_step) {
|
||||||
|
// the reformulated exp_s calc already accounts for this, but we can avoid
|
||||||
|
// a redundant model call for the typical sigma 1 at the first step:
|
||||||
|
// exp_s = sqrt((1-sigma)/sigma * (1-sigma_down)/sigma_down)
|
||||||
|
// = sqrt((1- 1)/ 1 * (1-sigma_down)/sigma_down)
|
||||||
|
// = 0
|
||||||
|
// so sigma_s = 1 = sigma, and sigma_s_i_ratio = sigma_s / sigma = 1
|
||||||
|
// u = (x*sigma_s_i_ratio)+(denoised*(1.0f-sigma_s_i_ratio))
|
||||||
|
// = (x*1)+(denoised*0) = x
|
||||||
|
// so D_i = model(u, sigma_s, i + 1)
|
||||||
|
// = model(x, sigma, i + 1)
|
||||||
|
// = denoised
|
||||||
|
D_i = denoised;
|
||||||
|
|
||||||
|
} else {
|
||||||
|
float sigma_s;
|
||||||
|
|
||||||
|
// ref implementation would be:
|
||||||
|
// auto lambda_fn = [](float sigma) -> float {
|
||||||
|
// return std::log((1.0f - sigma) / sigma); };
|
||||||
|
// auto sigma_fn = [](float lbda) -> float {
|
||||||
|
// return 1.0f / (std::exp(lbda) + 1.0f); };
|
||||||
|
// t_i = lambda_fn(sigma);
|
||||||
|
// t_down = lambda_fn(sigma_down);
|
||||||
|
// float r = 0.5f;
|
||||||
|
// h = t_down - t_i;
|
||||||
|
// s = t_i + r * h;
|
||||||
|
// sigma_s = sigma_fn(s);
|
||||||
|
|
||||||
|
// assuming r is constant, we sidestep the singularity at sigma -> 1 by:
|
||||||
|
// s = 0.5 * (lambda_fn(sigma) + lambda_fn(sigma_down))
|
||||||
|
// = 0.5 * (log((1-sigma)/sigma) + log((1-sigma_down)/sigma_down))
|
||||||
|
// = 0.5 * log(((1-sigma)/sigma) * ((1-sigma_down)/sigma_down))
|
||||||
|
// = log(sqrt (((1-sigma)/sigma) * ((1-sigma_down)/sigma_down)))
|
||||||
|
// so exp(s) = sqrt((1-sigma)/sigma * (1-sigma_down)/sigma_down)
|
||||||
|
// and sigma_s = sigma_fn(s) = 1.0f / (exp(s) + 1.0f)
|
||||||
|
|
||||||
|
float exp_s = std::sqrt(((1 - sigma) / sigma) * ((1 - sigma_down) / sigma_down));
|
||||||
|
sigma_s = 1.0f / (exp_s + 1.0f);
|
||||||
|
|
||||||
|
float sigma_s_i_ratio = sigma_s / sigma;
|
||||||
|
sd::Tensor<float> u = (x * sigma_s_i_ratio) + (denoised * (1.0f - sigma_s_i_ratio));
|
||||||
|
|
||||||
|
auto denoised2_opt = model(u, sigma_s, i + 1);
|
||||||
|
if (denoised2_opt.empty()) {
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
D_i = std::move(denoised2_opt);
|
||||||
|
}
|
||||||
|
|
||||||
|
float sigma_down_i_ratio = sigma_down / sigma;
|
||||||
|
x = (x * sigma_down_i_ratio) + (D_i * (1.0f - sigma_down_i_ratio));
|
||||||
|
|
||||||
|
if (sigma_to > 0.0f && eta > 0.0f) {
|
||||||
|
x = alpha_scale * x + sd::Tensor<float>::randn_like(x, rng) * sigma_up;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return x;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
static sd::Tensor<float> sample_dpmpp_2m(denoise_cb_t model,
|
static sd::Tensor<float> sample_dpmpp_2m(denoise_cb_t model,
|
||||||
sd::Tensor<float> x,
|
sd::Tensor<float> x,
|
||||||
const std::vector<float>& sigmas) {
|
const std::vector<float>& sigmas) {
|
||||||
@ -1565,6 +1662,9 @@ static sd::Tensor<float> sample_k_diffusion(sample_method_t method,
|
|||||||
case DPM2_SAMPLE_METHOD:
|
case DPM2_SAMPLE_METHOD:
|
||||||
return sample_dpm2(model, std::move(x), sigmas);
|
return sample_dpm2(model, std::move(x), sigmas);
|
||||||
case DPMPP2S_A_SAMPLE_METHOD:
|
case DPMPP2S_A_SAMPLE_METHOD:
|
||||||
|
if (is_flow_denoiser)
|
||||||
|
return sample_dpmpp_2s_ancestral_flow(model, std::move(x), sigmas, rng, eta);
|
||||||
|
else
|
||||||
return sample_dpmpp_2s_ancestral(model, std::move(x), sigmas, rng, eta);
|
return sample_dpmpp_2s_ancestral(model, std::move(x), sigmas, rng, eta);
|
||||||
case DPMPP2M_SAMPLE_METHOD:
|
case DPMPP2M_SAMPLE_METHOD:
|
||||||
return sample_dpmpp_2m(model, std::move(x), sigmas);
|
return sample_dpmpp_2m(model, std::move(x), sigmas);
|
||||||
|
|||||||
@ -943,6 +943,8 @@ public:
|
|||||||
pred_type = FLOW_PRED;
|
pred_type = FLOW_PRED;
|
||||||
if (sd_version_is_wan(version)) {
|
if (sd_version_is_wan(version)) {
|
||||||
default_flow_shift = 5.f;
|
default_flow_shift = 5.f;
|
||||||
|
} else if (sd_version_is_ernie_image(version)) {
|
||||||
|
default_flow_shift = 4.f;
|
||||||
} else {
|
} else {
|
||||||
default_flow_shift = 3.f;
|
default_flow_shift = 3.f;
|
||||||
}
|
}
|
||||||
@ -2388,6 +2390,14 @@ struct sd_ctx_t {
|
|||||||
StableDiffusionGGML* sd = nullptr;
|
StableDiffusionGGML* sd = nullptr;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static bool sd_version_supports_video_generation(SDVersion version) {
|
||||||
|
return version == VERSION_SVD || sd_version_is_wan(version);
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool sd_version_supports_image_generation(SDVersion version) {
|
||||||
|
return !sd_version_supports_video_generation(version);
|
||||||
|
}
|
||||||
|
|
||||||
sd_ctx_t* new_sd_ctx(const sd_ctx_params_t* sd_ctx_params) {
|
sd_ctx_t* new_sd_ctx(const sd_ctx_params_t* sd_ctx_params) {
|
||||||
sd_ctx_t* sd_ctx = (sd_ctx_t*)malloc(sizeof(sd_ctx_t));
|
sd_ctx_t* sd_ctx = (sd_ctx_t*)malloc(sizeof(sd_ctx_t));
|
||||||
if (sd_ctx == nullptr) {
|
if (sd_ctx == nullptr) {
|
||||||
@ -2417,6 +2427,20 @@ void free_sd_ctx(sd_ctx_t* sd_ctx) {
|
|||||||
free(sd_ctx);
|
free(sd_ctx);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
SD_API bool sd_ctx_supports_image_generation(const sd_ctx_t* sd_ctx) {
|
||||||
|
if (sd_ctx == nullptr || sd_ctx->sd == nullptr) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return sd_version_supports_image_generation(sd_ctx->sd->version);
|
||||||
|
}
|
||||||
|
|
||||||
|
SD_API bool sd_ctx_supports_video_generation(const sd_ctx_t* sd_ctx) {
|
||||||
|
if (sd_ctx == nullptr || sd_ctx->sd == nullptr) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return sd_version_supports_video_generation(sd_ctx->sd->version);
|
||||||
|
}
|
||||||
|
|
||||||
enum sample_method_t sd_get_default_sample_method(const sd_ctx_t* sd_ctx) {
|
enum sample_method_t sd_get_default_sample_method(const sd_ctx_t* sd_ctx) {
|
||||||
if (sd_ctx != nullptr && sd_ctx->sd != nullptr) {
|
if (sd_ctx != nullptr && sd_ctx->sd != nullptr) {
|
||||||
if (sd_version_is_dit(sd_ctx->sd->version)) {
|
if (sd_version_is_dit(sd_ctx->sd->version)) {
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user