mirror of
https://github.com/leejet/stable-diffusion.cpp.git
synced 2025-12-13 05:48:56 +00:00
add vace v2v support
This commit is contained in:
parent
e751ae6d6f
commit
f68ce0582a
@ -35,6 +35,8 @@
|
|||||||
#define SAFE_STR(s) ((s) ? (s) : "")
|
#define SAFE_STR(s) ((s) ? (s) : "")
|
||||||
#define BOOL_STR(b) ((b) ? "true" : "false")
|
#define BOOL_STR(b) ((b) ? "true" : "false")
|
||||||
|
|
||||||
|
namespace fs = std::filesystem;
|
||||||
|
|
||||||
const char* modes_str[] = {
|
const char* modes_str[] = {
|
||||||
"img_gen",
|
"img_gen",
|
||||||
"vid_gen",
|
"vid_gen",
|
||||||
@ -75,6 +77,7 @@ struct SDParams {
|
|||||||
std::string mask_image_path;
|
std::string mask_image_path;
|
||||||
std::string control_image_path;
|
std::string control_image_path;
|
||||||
std::vector<std::string> ref_image_paths;
|
std::vector<std::string> ref_image_paths;
|
||||||
|
std::string control_video_path;
|
||||||
bool increase_ref_index = false;
|
bool increase_ref_index = false;
|
||||||
|
|
||||||
std::string prompt;
|
std::string prompt;
|
||||||
@ -158,6 +161,7 @@ void print_params(SDParams params) {
|
|||||||
for (auto& path : params.ref_image_paths) {
|
for (auto& path : params.ref_image_paths) {
|
||||||
printf(" %s\n", path.c_str());
|
printf(" %s\n", path.c_str());
|
||||||
};
|
};
|
||||||
|
printf(" control_video_path: %s\n", params.control_video_path.c_str());
|
||||||
printf(" increase_ref_index: %s\n", params.increase_ref_index ? "true" : "false");
|
printf(" increase_ref_index: %s\n", params.increase_ref_index ? "true" : "false");
|
||||||
printf(" offload_params_to_cpu: %s\n", params.offload_params_to_cpu ? "true" : "false");
|
printf(" offload_params_to_cpu: %s\n", params.offload_params_to_cpu ? "true" : "false");
|
||||||
printf(" clip_on_cpu: %s\n", params.clip_on_cpu ? "true" : "false");
|
printf(" clip_on_cpu: %s\n", params.clip_on_cpu ? "true" : "false");
|
||||||
@ -178,7 +182,7 @@ void print_params(SDParams params) {
|
|||||||
printf(" flow_shift: %.2f\n", params.flow_shift);
|
printf(" flow_shift: %.2f\n", params.flow_shift);
|
||||||
printf(" strength(img2img): %.2f\n", params.strength);
|
printf(" strength(img2img): %.2f\n", params.strength);
|
||||||
printf(" rng: %s\n", sd_rng_type_name(params.rng_type));
|
printf(" rng: %s\n", sd_rng_type_name(params.rng_type));
|
||||||
printf(" seed: %ld\n", params.seed);
|
printf(" seed: %zd\n", params.seed);
|
||||||
printf(" batch_count: %d\n", params.batch_count);
|
printf(" batch_count: %d\n", params.batch_count);
|
||||||
printf(" vae_tiling: %s\n", params.vae_tiling ? "true" : "false");
|
printf(" vae_tiling: %s\n", params.vae_tiling ? "true" : "false");
|
||||||
printf(" upscale_repeats: %d\n", params.upscale_repeats);
|
printf(" upscale_repeats: %d\n", params.upscale_repeats);
|
||||||
@ -226,6 +230,9 @@ void print_usage(int argc, const char* argv[]) {
|
|||||||
printf(" -i, --end-img [IMAGE] path to the end image, required by flf2v\n");
|
printf(" -i, --end-img [IMAGE] path to the end image, required by flf2v\n");
|
||||||
printf(" --control-image [IMAGE] path to image condition, control net\n");
|
printf(" --control-image [IMAGE] path to image condition, control net\n");
|
||||||
printf(" -r, --ref-image [PATH] reference image for Flux Kontext models (can be used multiple times) \n");
|
printf(" -r, --ref-image [PATH] reference image for Flux Kontext models (can be used multiple times) \n");
|
||||||
|
printf(" --control-video [PATH] path to control video frames, It must be a directory path.");
|
||||||
|
printf(" The video frames inside should be stored as images in lexicographical (character) order\n");
|
||||||
|
printf(" For example, if the control video path is `frames`, the directory contain images such as 00.png, 01.png, … etc.\n");
|
||||||
printf(" --increase-ref-index automatically increase the indices of references images based on the order they are listed (starting with 1).\n");
|
printf(" --increase-ref-index automatically increase the indices of references images based on the order they are listed (starting with 1).\n");
|
||||||
printf(" -o, --output OUTPUT path to write result image to (default: ./output.png)\n");
|
printf(" -o, --output OUTPUT path to write result image to (default: ./output.png)\n");
|
||||||
printf(" -p, --prompt [PROMPT] the prompt to render\n");
|
printf(" -p, --prompt [PROMPT] the prompt to render\n");
|
||||||
@ -484,6 +491,7 @@ void parse_args(int argc, const char** argv, SDParams& params) {
|
|||||||
{"", "--input-id-images-dir", "", ¶ms.input_id_images_path},
|
{"", "--input-id-images-dir", "", ¶ms.input_id_images_path},
|
||||||
{"", "--mask", "", ¶ms.mask_image_path},
|
{"", "--mask", "", ¶ms.mask_image_path},
|
||||||
{"", "--control-image", "", ¶ms.control_image_path},
|
{"", "--control-image", "", ¶ms.control_image_path},
|
||||||
|
{"", "--control-video", "", ¶ms.control_video_path},
|
||||||
{"-o", "--output", "", ¶ms.output_path},
|
{"-o", "--output", "", ¶ms.output_path},
|
||||||
{"-p", "--prompt", "", ¶ms.prompt},
|
{"-p", "--prompt", "", ¶ms.prompt},
|
||||||
{"-n", "--negative-prompt", "", ¶ms.negative_prompt},
|
{"-n", "--negative-prompt", "", ¶ms.negative_prompt},
|
||||||
@ -1062,6 +1070,7 @@ int main(int argc, const char* argv[]) {
|
|||||||
sd_image_t control_image = {(uint32_t)params.width, (uint32_t)params.height, 3, NULL};
|
sd_image_t control_image = {(uint32_t)params.width, (uint32_t)params.height, 3, NULL};
|
||||||
sd_image_t mask_image = {(uint32_t)params.width, (uint32_t)params.height, 1, NULL};
|
sd_image_t mask_image = {(uint32_t)params.width, (uint32_t)params.height, 1, NULL};
|
||||||
std::vector<sd_image_t> ref_images;
|
std::vector<sd_image_t> ref_images;
|
||||||
|
std::vector<sd_image_t> control_frames;
|
||||||
|
|
||||||
auto release_all_resources = [&]() {
|
auto release_all_resources = [&]() {
|
||||||
free(init_image.data);
|
free(init_image.data);
|
||||||
@ -1073,6 +1082,11 @@ int main(int argc, const char* argv[]) {
|
|||||||
ref_image.data = NULL;
|
ref_image.data = NULL;
|
||||||
}
|
}
|
||||||
ref_images.clear();
|
ref_images.clear();
|
||||||
|
for (auto frame : control_frames) {
|
||||||
|
free(frame.data);
|
||||||
|
frame.data = NULL;
|
||||||
|
}
|
||||||
|
control_frames.clear();
|
||||||
};
|
};
|
||||||
|
|
||||||
if (params.init_image_path.size() > 0) {
|
if (params.init_image_path.size() > 0) {
|
||||||
@ -1131,14 +1145,12 @@ int main(int argc, const char* argv[]) {
|
|||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
if (params.canny_preprocess) { // apply preprocessor
|
if (params.canny_preprocess) { // apply preprocessor
|
||||||
control_image.data = preprocess_canny(control_image.data,
|
preprocess_canny(control_image,
|
||||||
control_image.width,
|
0.08f,
|
||||||
control_image.height,
|
0.08f,
|
||||||
0.08f,
|
0.8f,
|
||||||
0.08f,
|
1.0f,
|
||||||
0.8f,
|
false);
|
||||||
1.0f,
|
|
||||||
false);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1160,6 +1172,48 @@ int main(int argc, const char* argv[]) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!params.control_video_path.empty()) {
|
||||||
|
std::string dir = params.control_video_path;
|
||||||
|
|
||||||
|
if (!fs::exists(dir) || !fs::is_directory(dir)) {
|
||||||
|
fprintf(stderr, "'%s' is not a valid directory\n", dir.c_str());
|
||||||
|
release_all_resources();
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (const auto& entry : fs::directory_iterator(dir)) {
|
||||||
|
if (!entry.is_regular_file())
|
||||||
|
continue;
|
||||||
|
|
||||||
|
std::string path = entry.path().string();
|
||||||
|
std::string ext = entry.path().extension().string();
|
||||||
|
std::transform(ext.begin(), ext.end(), ext.begin(), ::tolower);
|
||||||
|
|
||||||
|
if (ext == ".jpg" || ext == ".jpeg" || ext == ".png" || ext == ".bmp") {
|
||||||
|
if (params.verbose) {
|
||||||
|
printf("load control frame %zu from '%s'\n", control_frames.size(), path.c_str());
|
||||||
|
}
|
||||||
|
int width = 0;
|
||||||
|
int height = 0;
|
||||||
|
uint8_t* image_buffer = load_image(path.c_str(), width, height, params.width, params.height);
|
||||||
|
if (image_buffer == NULL) {
|
||||||
|
fprintf(stderr, "load image from '%s' failed\n", path.c_str());
|
||||||
|
release_all_resources();
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
control_frames.push_back({(uint32_t)params.width,
|
||||||
|
(uint32_t)params.height,
|
||||||
|
3,
|
||||||
|
image_buffer});
|
||||||
|
|
||||||
|
if (control_frames.size() >= params.video_frames) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (params.mode == VID_GEN) {
|
if (params.mode == VID_GEN) {
|
||||||
vae_decode_only = false;
|
vae_decode_only = false;
|
||||||
}
|
}
|
||||||
@ -1239,6 +1293,8 @@ int main(int argc, const char* argv[]) {
|
|||||||
params.clip_skip,
|
params.clip_skip,
|
||||||
init_image,
|
init_image,
|
||||||
end_image,
|
end_image,
|
||||||
|
control_frames.data(),
|
||||||
|
(int)control_frames.size(),
|
||||||
params.width,
|
params.width,
|
||||||
params.height,
|
params.height,
|
||||||
params.sample_params,
|
params.sample_params,
|
||||||
@ -1290,7 +1346,6 @@ int main(int argc, const char* argv[]) {
|
|||||||
|
|
||||||
// create directory if not exists
|
// create directory if not exists
|
||||||
{
|
{
|
||||||
namespace fs = std::filesystem;
|
|
||||||
const fs::path out_path = params.output_path;
|
const fs::path out_path = params.output_path;
|
||||||
if (const fs::path out_dir = out_path.parent_path(); !out_dir.empty()) {
|
if (const fs::path out_dir = out_path.parent_path(); !out_dir.empty()) {
|
||||||
std::error_code ec;
|
std::error_code ec;
|
||||||
|
|||||||
@ -173,6 +173,14 @@ __STATIC_INLINE__ ggml_fp16_t ggml_tensor_get_f16(const ggml_tensor* tensor, int
|
|||||||
return *(ggml_fp16_t*)((char*)(tensor->data) + i * tensor->nb[3] + j * tensor->nb[2] + k * tensor->nb[1] + l * tensor->nb[0]);
|
return *(ggml_fp16_t*)((char*)(tensor->data) + i * tensor->nb[3] + j * tensor->nb[2] + k * tensor->nb[1] + l * tensor->nb[0]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
__STATIC_INLINE__ float sd_image_get_f32(sd_image_t image, int iw, int ih, int ic, bool scale = true) {
|
||||||
|
float value = *(image.data + ih * image.width * image.channel + iw * image.channel + ic);
|
||||||
|
if (scale) {
|
||||||
|
value /= 255.f;
|
||||||
|
}
|
||||||
|
return value;
|
||||||
|
}
|
||||||
|
|
||||||
static struct ggml_tensor* get_tensor_from_graph(struct ggml_cgraph* gf, const char* name) {
|
static struct ggml_tensor* get_tensor_from_graph(struct ggml_cgraph* gf, const char* name) {
|
||||||
struct ggml_tensor* res = NULL;
|
struct ggml_tensor* res = NULL;
|
||||||
for (int i = 0; i < ggml_graph_n_nodes(gf); i++) {
|
for (int i = 0; i < ggml_graph_n_nodes(gf); i++) {
|
||||||
@ -255,13 +263,12 @@ __STATIC_INLINE__ void ggml_tensor_iter(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
__STATIC_INLINE__ void ggml_tensor_diff(
|
__STATIC_INLINE__ void ggml_tensor_diff(
|
||||||
ggml_tensor* a,
|
ggml_tensor* a,
|
||||||
ggml_tensor* b,
|
ggml_tensor* b,
|
||||||
float gap = 0.1f) {
|
float gap = 0.1f) {
|
||||||
GGML_ASSERT(ggml_nelements(a) == ggml_nelements(b));
|
GGML_ASSERT(ggml_nelements(a) == ggml_nelements(b));
|
||||||
ggml_tensor_iter(a, [&] (ggml_tensor* a, int64_t i0, int64_t i1, int64_t i2, int64_t i3) {
|
ggml_tensor_iter(a, [&](ggml_tensor* a, int64_t i0, int64_t i1, int64_t i2, int64_t i3) {
|
||||||
float a_value = ggml_tensor_get_f32(a, i0, i1, i2, i3);
|
float a_value = ggml_tensor_get_f32(a, i0, i1, i2, i3);
|
||||||
float b_value = ggml_tensor_get_f32(b, i0, i1, i2, i3);
|
float b_value = ggml_tensor_get_f32(b, i0, i1, i2, i3);
|
||||||
if (abs(a_value - b_value) > gap) {
|
if (abs(a_value - b_value) > gap) {
|
||||||
@ -401,42 +408,18 @@ __STATIC_INLINE__ uint8_t* sd_tensor_to_image(struct ggml_tensor* input, int idx
|
|||||||
return image_data;
|
return image_data;
|
||||||
}
|
}
|
||||||
|
|
||||||
__STATIC_INLINE__ void sd_image_to_tensor(const uint8_t* image_data,
|
__STATIC_INLINE__ void sd_image_to_tensor(sd_image_t image,
|
||||||
struct ggml_tensor* output,
|
ggml_tensor* tensor,
|
||||||
bool scale = true) {
|
bool scale = true) {
|
||||||
int64_t width = output->ne[0];
|
GGML_ASSERT(image.width == tensor->ne[0]);
|
||||||
int64_t height = output->ne[1];
|
GGML_ASSERT(image.height == tensor->ne[1]);
|
||||||
int64_t channels = output->ne[2];
|
GGML_ASSERT(image.channel == tensor->ne[2]);
|
||||||
GGML_ASSERT(channels == 3 && output->type == GGML_TYPE_F32);
|
GGML_ASSERT(1 == tensor->ne[3]);
|
||||||
for (int iy = 0; iy < height; iy++) {
|
GGML_ASSERT(tensor->type == GGML_TYPE_F32);
|
||||||
for (int ix = 0; ix < width; ix++) {
|
ggml_tensor_iter(tensor, [&](ggml_tensor* tensor, int64_t i0, int64_t i1, int64_t i2, int64_t i3) {
|
||||||
for (int k = 0; k < channels; k++) {
|
float value = sd_image_get_f32(image, i0, i1, i2, scale);
|
||||||
float value = *(image_data + iy * width * channels + ix * channels + k);
|
ggml_tensor_set_f32(tensor, value, i0, i1, i2, i3);
|
||||||
if (scale) {
|
});
|
||||||
value /= 255.f;
|
|
||||||
}
|
|
||||||
ggml_tensor_set_f32(output, value, ix, iy, k);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
__STATIC_INLINE__ void sd_mask_to_tensor(const uint8_t* image_data,
|
|
||||||
struct ggml_tensor* output,
|
|
||||||
bool scale = true) {
|
|
||||||
int64_t width = output->ne[0];
|
|
||||||
int64_t height = output->ne[1];
|
|
||||||
int64_t channels = output->ne[2];
|
|
||||||
GGML_ASSERT(channels == 1 && output->type == GGML_TYPE_F32);
|
|
||||||
for (int iy = 0; iy < height; iy++) {
|
|
||||||
for (int ix = 0; ix < width; ix++) {
|
|
||||||
float value = *(image_data + iy * width * channels + ix);
|
|
||||||
if (scale) {
|
|
||||||
value /= 255.f;
|
|
||||||
}
|
|
||||||
ggml_tensor_set_f32(output, value, ix, iy);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
__STATIC_INLINE__ void sd_apply_mask(struct ggml_tensor* image_data,
|
__STATIC_INLINE__ void sd_apply_mask(struct ggml_tensor* image_data,
|
||||||
|
|||||||
@ -162,7 +162,7 @@ void threshold_hystersis(struct ggml_tensor* img, float high_threshold, float lo
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
uint8_t* preprocess_canny(uint8_t* img, int width, int height, float high_threshold, float low_threshold, float weak, float strong, bool inverse) {
|
bool preprocess_canny(sd_image_t img, float high_threshold, float low_threshold, float weak, float strong, bool inverse) {
|
||||||
struct ggml_init_params params;
|
struct ggml_init_params params;
|
||||||
params.mem_size = static_cast<size_t>(10 * 1024 * 1024); // 10
|
params.mem_size = static_cast<size_t>(10 * 1024 * 1024); // 10
|
||||||
params.mem_buffer = NULL;
|
params.mem_buffer = NULL;
|
||||||
@ -171,7 +171,7 @@ uint8_t* preprocess_canny(uint8_t* img, int width, int height, float high_thresh
|
|||||||
|
|
||||||
if (!work_ctx) {
|
if (!work_ctx) {
|
||||||
LOG_ERROR("ggml_init() failed");
|
LOG_ERROR("ggml_init() failed");
|
||||||
return NULL;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
float kX[9] = {
|
float kX[9] = {
|
||||||
@ -192,8 +192,8 @@ uint8_t* preprocess_canny(uint8_t* img, int width, int height, float high_thresh
|
|||||||
struct ggml_tensor* sf_ky = ggml_new_tensor_4d(work_ctx, GGML_TYPE_F32, 3, 3, 1, 1);
|
struct ggml_tensor* sf_ky = ggml_new_tensor_4d(work_ctx, GGML_TYPE_F32, 3, 3, 1, 1);
|
||||||
memcpy(sf_ky->data, kY, ggml_nbytes(sf_ky));
|
memcpy(sf_ky->data, kY, ggml_nbytes(sf_ky));
|
||||||
gaussian_kernel(gkernel);
|
gaussian_kernel(gkernel);
|
||||||
struct ggml_tensor* image = ggml_new_tensor_4d(work_ctx, GGML_TYPE_F32, width, height, 3, 1);
|
struct ggml_tensor* image = ggml_new_tensor_4d(work_ctx, GGML_TYPE_F32, img.width, img.height, 3, 1);
|
||||||
struct ggml_tensor* image_gray = ggml_new_tensor_4d(work_ctx, GGML_TYPE_F32, width, height, 1, 1);
|
struct ggml_tensor* image_gray = ggml_new_tensor_4d(work_ctx, GGML_TYPE_F32, img.width, img.height, 1, 1);
|
||||||
struct ggml_tensor* iX = ggml_dup_tensor(work_ctx, image_gray);
|
struct ggml_tensor* iX = ggml_dup_tensor(work_ctx, image_gray);
|
||||||
struct ggml_tensor* iY = ggml_dup_tensor(work_ctx, image_gray);
|
struct ggml_tensor* iY = ggml_dup_tensor(work_ctx, image_gray);
|
||||||
struct ggml_tensor* G = ggml_dup_tensor(work_ctx, image_gray);
|
struct ggml_tensor* G = ggml_dup_tensor(work_ctx, image_gray);
|
||||||
@ -209,8 +209,8 @@ uint8_t* preprocess_canny(uint8_t* img, int width, int height, float high_thresh
|
|||||||
non_max_supression(image_gray, G, tetha);
|
non_max_supression(image_gray, G, tetha);
|
||||||
threshold_hystersis(image_gray, high_threshold, low_threshold, weak, strong);
|
threshold_hystersis(image_gray, high_threshold, low_threshold, weak, strong);
|
||||||
// to RGB channels
|
// to RGB channels
|
||||||
for (int iy = 0; iy < height; iy++) {
|
for (int iy = 0; iy < img.height; iy++) {
|
||||||
for (int ix = 0; ix < width; ix++) {
|
for (int ix = 0; ix < img.width; ix++) {
|
||||||
float gray = ggml_tensor_get_f32(image_gray, ix, iy);
|
float gray = ggml_tensor_get_f32(image_gray, ix, iy);
|
||||||
gray = inverse ? 1.0f - gray : gray;
|
gray = inverse ? 1.0f - gray : gray;
|
||||||
ggml_tensor_set_f32(image, gray, ix, iy);
|
ggml_tensor_set_f32(image, gray, ix, iy);
|
||||||
@ -218,10 +218,11 @@ uint8_t* preprocess_canny(uint8_t* img, int width, int height, float high_thresh
|
|||||||
ggml_tensor_set_f32(image, gray, ix, iy, 2);
|
ggml_tensor_set_f32(image, gray, ix, iy, 2);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
free(img);
|
|
||||||
uint8_t* output = sd_tensor_to_image(image);
|
uint8_t* output = sd_tensor_to_image(image);
|
||||||
|
free(img.data);
|
||||||
|
img.data = output;
|
||||||
ggml_free(work_ctx);
|
ggml_free(work_ctx);
|
||||||
return output;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif // __PREPROCESSING_HPP__
|
#endif // __PREPROCESSING_HPP__
|
||||||
@ -952,7 +952,7 @@ public:
|
|||||||
free(resized_image.data);
|
free(resized_image.data);
|
||||||
resized_image.data = NULL;
|
resized_image.data = NULL;
|
||||||
} else {
|
} else {
|
||||||
sd_image_to_tensor(init_image.data, init_img);
|
sd_image_to_tensor(init_image, init_img);
|
||||||
}
|
}
|
||||||
if (augmentation_level > 0.f) {
|
if (augmentation_level > 0.f) {
|
||||||
struct ggml_tensor* noise = ggml_dup_tensor(work_ctx, init_img);
|
struct ggml_tensor* noise = ggml_dup_tensor(work_ctx, init_img);
|
||||||
@ -1947,7 +1947,7 @@ sd_image_t* generate_image_internal(sd_ctx_t* sd_ctx,
|
|||||||
struct ggml_tensor* image_hint = NULL;
|
struct ggml_tensor* image_hint = NULL;
|
||||||
if (control_image.data != NULL) {
|
if (control_image.data != NULL) {
|
||||||
image_hint = ggml_new_tensor_4d(work_ctx, GGML_TYPE_F32, width, height, 3, 1);
|
image_hint = ggml_new_tensor_4d(work_ctx, GGML_TYPE_F32, width, height, 3, 1);
|
||||||
sd_image_to_tensor(control_image.data, image_hint);
|
sd_image_to_tensor(control_image, image_hint);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Sample
|
// Sample
|
||||||
@ -2208,8 +2208,8 @@ sd_image_t* generate_image(sd_ctx_t* sd_ctx, const sd_img_gen_params_t* sd_img_g
|
|||||||
ggml_tensor* init_img = ggml_new_tensor_4d(work_ctx, GGML_TYPE_F32, width, height, 3, 1);
|
ggml_tensor* init_img = ggml_new_tensor_4d(work_ctx, GGML_TYPE_F32, width, height, 3, 1);
|
||||||
ggml_tensor* mask_img = ggml_new_tensor_4d(work_ctx, GGML_TYPE_F32, width, height, 1, 1);
|
ggml_tensor* mask_img = ggml_new_tensor_4d(work_ctx, GGML_TYPE_F32, width, height, 1, 1);
|
||||||
|
|
||||||
sd_mask_to_tensor(sd_img_gen_params->mask_image.data, mask_img);
|
sd_image_to_tensor(sd_img_gen_params->mask_image, mask_img);
|
||||||
sd_image_to_tensor(sd_img_gen_params->init_image.data, init_img);
|
sd_image_to_tensor(sd_img_gen_params->init_image, init_img);
|
||||||
|
|
||||||
if (sd_version_is_inpaint(sd_ctx->sd->version)) {
|
if (sd_version_is_inpaint(sd_ctx->sd->version)) {
|
||||||
int64_t mask_channels = 1;
|
int64_t mask_channels = 1;
|
||||||
@ -2300,7 +2300,7 @@ sd_image_t* generate_image(sd_ctx_t* sd_ctx, const sd_img_gen_params_t* sd_img_g
|
|||||||
sd_img_gen_params->ref_images[i].height,
|
sd_img_gen_params->ref_images[i].height,
|
||||||
3,
|
3,
|
||||||
1);
|
1);
|
||||||
sd_image_to_tensor(sd_img_gen_params->ref_images[i].data, img);
|
sd_image_to_tensor(sd_img_gen_params->ref_images[i], img);
|
||||||
|
|
||||||
ggml_tensor* latent = NULL;
|
ggml_tensor* latent = NULL;
|
||||||
if (sd_ctx->sd->use_tiny_autoencoder) {
|
if (sd_ctx->sd->use_tiny_autoencoder) {
|
||||||
@ -2401,7 +2401,7 @@ SD_API sd_image_t* generate_video(sd_ctx_t* sd_ctx, const sd_vid_gen_params_t* s
|
|||||||
}
|
}
|
||||||
|
|
||||||
struct ggml_init_params params;
|
struct ggml_init_params params;
|
||||||
params.mem_size = static_cast<size_t>(1024 * 1024) * 1024; // 1G
|
params.mem_size = static_cast<size_t>(1024 * 1024) * 1024; // 1G
|
||||||
params.mem_buffer = NULL;
|
params.mem_buffer = NULL;
|
||||||
params.no_alloc = false;
|
params.no_alloc = false;
|
||||||
// LOG_DEBUG("mem_size %u ", params.mem_size);
|
// LOG_DEBUG("mem_size %u ", params.mem_size);
|
||||||
@ -2500,7 +2500,7 @@ SD_API sd_image_t* generate_video(sd_ctx_t* sd_ctx, const sd_vid_gen_params_t* s
|
|||||||
|
|
||||||
int64_t t1 = ggml_time_ms();
|
int64_t t1 = ggml_time_ms();
|
||||||
ggml_tensor* init_img = ggml_new_tensor_4d(work_ctx, GGML_TYPE_F32, width, height, 3, 1);
|
ggml_tensor* init_img = ggml_new_tensor_4d(work_ctx, GGML_TYPE_F32, width, height, 3, 1);
|
||||||
sd_image_to_tensor(sd_vid_gen_params->init_image.data, init_img);
|
sd_image_to_tensor(sd_vid_gen_params->init_image, init_img);
|
||||||
init_img = ggml_reshape_4d(work_ctx, init_img, width, height, 1, 3);
|
init_img = ggml_reshape_4d(work_ctx, init_img, width, height, 1, 3);
|
||||||
|
|
||||||
auto init_image_latent = sd_ctx->sd->encode_first_stage(work_ctx, init_img); // [b*c, 1, h/16, w/16]
|
auto init_image_latent = sd_ctx->sd->encode_first_stage(work_ctx, init_img); // [b*c, 1, h/16, w/16]
|
||||||
@ -2530,7 +2530,7 @@ SD_API sd_image_t* generate_video(sd_ctx_t* sd_ctx, const sd_vid_gen_params_t* s
|
|||||||
ggml_tensor* ref_image_latent = NULL;
|
ggml_tensor* ref_image_latent = NULL;
|
||||||
if (sd_vid_gen_params->init_image.data) {
|
if (sd_vid_gen_params->init_image.data) {
|
||||||
ggml_tensor* ref_img = ggml_new_tensor_4d(work_ctx, GGML_TYPE_F32, width, height, 3, 1);
|
ggml_tensor* ref_img = ggml_new_tensor_4d(work_ctx, GGML_TYPE_F32, width, height, 3, 1);
|
||||||
sd_image_to_tensor(sd_vid_gen_params->init_image.data, ref_img);
|
sd_image_to_tensor(sd_vid_gen_params->init_image, ref_img);
|
||||||
ref_img = ggml_reshape_4d(work_ctx, ref_img, width, height, 1, 3);
|
ref_img = ggml_reshape_4d(work_ctx, ref_img, width, height, 1, 3);
|
||||||
|
|
||||||
ref_image_latent = sd_ctx->sd->encode_first_stage(work_ctx, ref_img); // [b*c, 1, h/16, w/16]
|
ref_image_latent = sd_ctx->sd->encode_first_stage(work_ctx, ref_img); // [b*c, 1, h/16, w/16]
|
||||||
@ -2541,7 +2541,13 @@ SD_API sd_image_t* generate_video(sd_ctx_t* sd_ctx, const sd_vid_gen_params_t* s
|
|||||||
}
|
}
|
||||||
|
|
||||||
ggml_tensor* control_video = ggml_new_tensor_4d(work_ctx, GGML_TYPE_F32, width, height, frames, 3);
|
ggml_tensor* control_video = ggml_new_tensor_4d(work_ctx, GGML_TYPE_F32, width, height, frames, 3);
|
||||||
ggml_set_f32(control_video, 0.5f);
|
ggml_tensor_iter(control_video, [&](ggml_tensor* control_video, int64_t i0, int64_t i1, int64_t i2, int64_t i3) {
|
||||||
|
float value = 0.5f;
|
||||||
|
if (i2 < sd_vid_gen_params->control_frames_size) {
|
||||||
|
value = sd_image_get_f32(sd_vid_gen_params->control_frames[i2], i0, i1, i3);
|
||||||
|
}
|
||||||
|
ggml_tensor_set_f32(control_video, value, i0, i1, i2, i3);
|
||||||
|
});
|
||||||
ggml_tensor* mask = ggml_new_tensor_4d(work_ctx, GGML_TYPE_F32, width, height, frames, 1);
|
ggml_tensor* mask = ggml_new_tensor_4d(work_ctx, GGML_TYPE_F32, width, height, frames, 1);
|
||||||
ggml_set_f32(mask, 1.0f);
|
ggml_set_f32(mask, 1.0f);
|
||||||
ggml_tensor* inactive = ggml_dup_tensor(work_ctx, control_video);
|
ggml_tensor* inactive = ggml_dup_tensor(work_ctx, control_video);
|
||||||
|
|||||||
@ -203,6 +203,8 @@ typedef struct {
|
|||||||
int clip_skip;
|
int clip_skip;
|
||||||
sd_image_t init_image;
|
sd_image_t init_image;
|
||||||
sd_image_t end_image;
|
sd_image_t end_image;
|
||||||
|
sd_image_t* control_frames;
|
||||||
|
int control_frames_size;
|
||||||
int width;
|
int width;
|
||||||
int height;
|
int height;
|
||||||
sd_sample_params_t sample_params;
|
sd_sample_params_t sample_params;
|
||||||
@ -267,14 +269,12 @@ SD_API bool convert(const char* input_path,
|
|||||||
enum sd_type_t output_type,
|
enum sd_type_t output_type,
|
||||||
const char* tensor_type_rules);
|
const char* tensor_type_rules);
|
||||||
|
|
||||||
SD_API uint8_t* preprocess_canny(uint8_t* img,
|
SD_API bool preprocess_canny(sd_image_t image,
|
||||||
int width,
|
float high_threshold,
|
||||||
int height,
|
float low_threshold,
|
||||||
float high_threshold,
|
float weak,
|
||||||
float low_threshold,
|
float strong,
|
||||||
float weak,
|
bool inverse);
|
||||||
float strong,
|
|
||||||
bool inverse);
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
}
|
}
|
||||||
|
|||||||
@ -82,7 +82,7 @@ struct UpscalerGGML {
|
|||||||
}
|
}
|
||||||
LOG_DEBUG("upscale work buffer size: %.2f MB", params.mem_size / 1024.f / 1024.f);
|
LOG_DEBUG("upscale work buffer size: %.2f MB", params.mem_size / 1024.f / 1024.f);
|
||||||
ggml_tensor* input_image_tensor = ggml_new_tensor_4d(upscale_ctx, GGML_TYPE_F32, input_image.width, input_image.height, 3, 1);
|
ggml_tensor* input_image_tensor = ggml_new_tensor_4d(upscale_ctx, GGML_TYPE_F32, input_image.width, input_image.height, 3, 1);
|
||||||
sd_image_to_tensor(input_image.data, input_image_tensor);
|
sd_image_to_tensor(input_image, input_image_tensor);
|
||||||
|
|
||||||
ggml_tensor* upscaled = ggml_new_tensor_4d(upscale_ctx, GGML_TYPE_F32, output_width, output_height, 3, 1);
|
ggml_tensor* upscaled = ggml_new_tensor_4d(upscale_ctx, GGML_TYPE_F32, output_width, output_height, 3, 1);
|
||||||
auto on_tiling = [&](ggml_tensor* in, ggml_tensor* out, bool init) {
|
auto on_tiling = [&](ggml_tensor* in, ggml_tensor* out, bool init) {
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user