leejet 67dda3f897
feat: add ltx2.3 support (#1463)
* add GemmaTokenizer

* add basic ltx2.3 support

* change vocab file encoding

* fix ci

* fix ubuntu build

* add temporal tiling support

* add ltx audio support

* update ggml submodule url

* fix generate_video

* add i2v support

* minify bundled Gemma tokenizer vocab sources

* pass video fps into temporal rope embeddings

* fix av_ca_timestep_scale_multiplier

* add LTX2Scheduler support

* update docs

* fix ci
2026-05-17 16:46:20 +08:00

114 lines
5.2 KiB
C++

#ifndef __MEDIA_IO_H__
#define __MEDIA_IO_H__
#include <cstdint>
#include <string>
#include <vector>
#include "stable-diffusion.h"
enum class EncodedImageFormat {
JPEG,
PNG,
WEBP,
UNKNOWN,
};
EncodedImageFormat encoded_image_format_from_path(const std::string& path);
std::vector<uint8_t> encode_image_to_vector(EncodedImageFormat format,
const uint8_t* image,
int width,
int height,
int channels,
const std::string& parameters = "",
int quality = 90);
bool write_image_to_file(const std::string& path,
const uint8_t* image,
int width,
int height,
int channels,
const std::string& parameters = "",
int quality = 90);
uint8_t* load_image_from_file(const char* image_path,
int& width,
int& height,
int expected_width = 0,
int expected_height = 0,
int expected_channel = 3);
bool load_sd_image_from_file(sd_image_t* image,
const char* image_path,
int expected_width = 0,
int expected_height = 0,
int expected_channel = 3);
uint8_t* load_image_from_memory(const char* image_bytes,
int len,
int& width,
int& height,
int expected_width = 0,
int expected_height = 0,
int expected_channel = 3);
int create_mjpg_avi_from_sd_images(const char* filename,
sd_image_t* images,
int num_images,
int fps,
int quality = 90,
const sd_audio_t* audio = nullptr);
std::vector<uint8_t> create_mjpg_avi_from_sd_images_to_vector(sd_image_t* images,
int num_images,
int fps,
int quality = 90,
const sd_audio_t* audio = nullptr);
#ifdef SD_USE_WEBP
int create_animated_webp_from_sd_images(const char* filename,
sd_image_t* images,
int num_images,
int fps,
int quality = 90);
std::vector<uint8_t> create_animated_webp_from_sd_images_to_vector(sd_image_t* images,
int num_images,
int fps,
int quality = 90);
#endif
#ifdef SD_USE_WEBM
int create_webm_from_sd_images(const char* filename,
sd_image_t* images,
int num_images,
int fps,
int quality = 90,
const sd_audio_t* audio = nullptr);
std::vector<uint8_t> create_webm_from_sd_images_to_vector(sd_image_t* images,
int num_images,
int fps,
int quality = 90,
const sd_audio_t* audio = nullptr);
#endif
int create_video_from_sd_images(const char* filename,
sd_image_t* images,
int num_images,
int fps,
int quality = 90,
const sd_audio_t* audio = nullptr);
std::vector<uint8_t> create_video_from_sd_images_to_vector(const std::string& output_format,
sd_image_t* images,
int num_images,
int fps,
int quality = 90,
const sd_audio_t* audio = nullptr);
bool write_wav_to_file(const std::string& path,
const float* interleaved_samples,
uint64_t sample_count,
uint32_t channels,
uint32_t sample_rate);
#endif // __MEDIA_IO_H__