mirror of
https://github.com/leejet/stable-diffusion.cpp.git
synced 2025-12-12 21:38:58 +00:00
feat: add auto-resize support for reference images (was Qwen-Image-Edit only) (#898)
This commit is contained in:
parent
db6f4791b4
commit
90ef5f8246
@ -35,8 +35,9 @@ arguments:
|
|||||||
-r, --ref-image [PATH] reference image for Flux Kontext models (can be used multiple times)
|
-r, --ref-image [PATH] reference image for Flux Kontext models (can be used multiple times)
|
||||||
--control-video [PATH] path to control video frames, It must be a directory path.
|
--control-video [PATH] path to control video frames, It must be a directory path.
|
||||||
The video frames inside should be stored as images in lexicographical (character) order
|
The video frames inside should be stored as images in lexicographical (character) order
|
||||||
For example, if the control video path is `frames`, the directory contain images such as 00.png, 01.png, 鈥?etc.
|
For example, if the control video path is `frames`, the directory contain images such as 00.png, 01.png, ... etc.
|
||||||
--increase-ref-index automatically increase the indices of references images based on the order they are listed (starting with 1).
|
--increase-ref-index automatically increase the indices of references images based on the order they are listed (starting with 1).
|
||||||
|
--disable-auto-resize-ref-image disable auto resize of ref images
|
||||||
-o, --output OUTPUT path to write result image to (default: ./output.png)
|
-o, --output OUTPUT path to write result image to (default: ./output.png)
|
||||||
-p, --prompt [PROMPT] the prompt to render
|
-p, --prompt [PROMPT] the prompt to render
|
||||||
-n, --negative-prompt PROMPT the negative prompt (default: "")
|
-n, --negative-prompt PROMPT the negative prompt (default: "")
|
||||||
|
|||||||
@ -80,7 +80,8 @@ struct SDParams {
|
|||||||
std::string control_image_path;
|
std::string control_image_path;
|
||||||
std::vector<std::string> ref_image_paths;
|
std::vector<std::string> ref_image_paths;
|
||||||
std::string control_video_path;
|
std::string control_video_path;
|
||||||
bool increase_ref_index = false;
|
bool auto_resize_ref_image = true;
|
||||||
|
bool increase_ref_index = false;
|
||||||
|
|
||||||
std::string prompt;
|
std::string prompt;
|
||||||
std::string negative_prompt;
|
std::string negative_prompt;
|
||||||
@ -175,6 +176,7 @@ void print_params(SDParams params) {
|
|||||||
printf(" %s\n", path.c_str());
|
printf(" %s\n", path.c_str());
|
||||||
};
|
};
|
||||||
printf(" control_video_path: %s\n", params.control_video_path.c_str());
|
printf(" control_video_path: %s\n", params.control_video_path.c_str());
|
||||||
|
printf(" auto_resize_ref_image: %s\n", params.auto_resize_ref_image ? "true" : "false");
|
||||||
printf(" increase_ref_index: %s\n", params.increase_ref_index ? "true" : "false");
|
printf(" increase_ref_index: %s\n", params.increase_ref_index ? "true" : "false");
|
||||||
printf(" offload_params_to_cpu: %s\n", params.offload_params_to_cpu ? "true" : "false");
|
printf(" offload_params_to_cpu: %s\n", params.offload_params_to_cpu ? "true" : "false");
|
||||||
printf(" clip_on_cpu: %s\n", params.clip_on_cpu ? "true" : "false");
|
printf(" clip_on_cpu: %s\n", params.clip_on_cpu ? "true" : "false");
|
||||||
@ -244,9 +246,10 @@ void print_usage(int argc, const char* argv[]) {
|
|||||||
printf(" -i, --end-img [IMAGE] path to the end image, required by flf2v\n");
|
printf(" -i, --end-img [IMAGE] path to the end image, required by flf2v\n");
|
||||||
printf(" --control-image [IMAGE] path to image condition, control net\n");
|
printf(" --control-image [IMAGE] path to image condition, control net\n");
|
||||||
printf(" -r, --ref-image [PATH] reference image for Flux Kontext models (can be used multiple times) \n");
|
printf(" -r, --ref-image [PATH] reference image for Flux Kontext models (can be used multiple times) \n");
|
||||||
|
printf(" --disable-auto-resize-ref-image disable auto resize of ref images\n");
|
||||||
printf(" --control-video [PATH] path to control video frames, It must be a directory path.\n");
|
printf(" --control-video [PATH] path to control video frames, It must be a directory path.\n");
|
||||||
printf(" The video frames inside should be stored as images in lexicographical (character) order\n");
|
printf(" The video frames inside should be stored as images in lexicographical (character) order\n");
|
||||||
printf(" For example, if the control video path is `frames`, the directory contain images such as 00.png, 01.png, … etc.\n");
|
printf(" For example, if the control video path is `frames`, the directory contain images such as 00.png, 01.png, ... etc.\n");
|
||||||
printf(" --increase-ref-index automatically increase the indices of references images based on the order they are listed (starting with 1).\n");
|
printf(" --increase-ref-index automatically increase the indices of references images based on the order they are listed (starting with 1).\n");
|
||||||
printf(" -o, --output OUTPUT path to write result image to (default: ./output.png)\n");
|
printf(" -o, --output OUTPUT path to write result image to (default: ./output.png)\n");
|
||||||
printf(" -p, --prompt [PROMPT] the prompt to render\n");
|
printf(" -p, --prompt [PROMPT] the prompt to render\n");
|
||||||
@ -579,6 +582,7 @@ void parse_args(int argc, const char** argv, SDParams& params) {
|
|||||||
{"", "--chroma-disable-dit-mask", "", false, ¶ms.chroma_use_dit_mask},
|
{"", "--chroma-disable-dit-mask", "", false, ¶ms.chroma_use_dit_mask},
|
||||||
{"", "--chroma-enable-t5-mask", "", true, ¶ms.chroma_use_t5_mask},
|
{"", "--chroma-enable-t5-mask", "", true, ¶ms.chroma_use_t5_mask},
|
||||||
{"", "--increase-ref-index", "", true, ¶ms.increase_ref_index},
|
{"", "--increase-ref-index", "", true, ¶ms.increase_ref_index},
|
||||||
|
{"", "--disable-auto-resize-ref-image", "", false, ¶ms.auto_resize_ref_image},
|
||||||
};
|
};
|
||||||
|
|
||||||
auto on_mode_arg = [&](int argc, const char** argv, int index) {
|
auto on_mode_arg = [&](int argc, const char** argv, int index) {
|
||||||
@ -1428,6 +1432,7 @@ int main(int argc, const char* argv[]) {
|
|||||||
init_image,
|
init_image,
|
||||||
ref_images.data(),
|
ref_images.data(),
|
||||||
(int)ref_images.size(),
|
(int)ref_images.size(),
|
||||||
|
params.auto_resize_ref_image,
|
||||||
params.increase_ref_index,
|
params.increase_ref_index,
|
||||||
mask_image,
|
mask_image,
|
||||||
params.width,
|
params.width,
|
||||||
|
|||||||
@ -1970,6 +1970,7 @@ char* sd_img_gen_params_to_str(const sd_img_gen_params_t* sd_img_gen_params) {
|
|||||||
"seed: %" PRId64
|
"seed: %" PRId64
|
||||||
"batch_count: %d\n"
|
"batch_count: %d\n"
|
||||||
"ref_images_count: %d\n"
|
"ref_images_count: %d\n"
|
||||||
|
"auto_resize_ref_image: %s\n"
|
||||||
"increase_ref_index: %s\n"
|
"increase_ref_index: %s\n"
|
||||||
"control_strength: %.2f\n"
|
"control_strength: %.2f\n"
|
||||||
"photo maker: {style_strength = %.2f, id_images_count = %d, id_embed_path = %s}\n"
|
"photo maker: {style_strength = %.2f, id_images_count = %d, id_embed_path = %s}\n"
|
||||||
@ -1984,6 +1985,7 @@ char* sd_img_gen_params_to_str(const sd_img_gen_params_t* sd_img_gen_params) {
|
|||||||
sd_img_gen_params->seed,
|
sd_img_gen_params->seed,
|
||||||
sd_img_gen_params->batch_count,
|
sd_img_gen_params->batch_count,
|
||||||
sd_img_gen_params->ref_images_count,
|
sd_img_gen_params->ref_images_count,
|
||||||
|
BOOL_STR(sd_img_gen_params->auto_resize_ref_image),
|
||||||
BOOL_STR(sd_img_gen_params->increase_ref_index),
|
BOOL_STR(sd_img_gen_params->increase_ref_index),
|
||||||
sd_img_gen_params->control_strength,
|
sd_img_gen_params->control_strength,
|
||||||
sd_img_gen_params->pm_params.style_strength,
|
sd_img_gen_params->pm_params.style_strength,
|
||||||
@ -2624,14 +2626,20 @@ sd_image_t* generate_image(sd_ctx_t* sd_ctx, const sd_img_gen_params_t* sd_img_g
|
|||||||
std::vector<ggml_tensor*> ref_latents;
|
std::vector<ggml_tensor*> ref_latents;
|
||||||
for (int i = 0; i < ref_images.size(); i++) {
|
for (int i = 0; i < ref_images.size(); i++) {
|
||||||
ggml_tensor* img;
|
ggml_tensor* img;
|
||||||
if (sd_version_is_qwen_image(sd_ctx->sd->version)) {
|
if (sd_img_gen_params->auto_resize_ref_image) {
|
||||||
|
LOG_DEBUG("auto resize ref images");
|
||||||
sd_image_f32_t ref_image = sd_image_t_to_sd_image_f32_t(*ref_images[i]);
|
sd_image_f32_t ref_image = sd_image_t_to_sd_image_f32_t(*ref_images[i]);
|
||||||
int VAE_IMAGE_SIZE = std::min(1024 * 1024, width * height);
|
int VAE_IMAGE_SIZE = std::min(1024 * 1024, width * height);
|
||||||
double vae_width = sqrt(VAE_IMAGE_SIZE * ref_image.width / ref_image.height);
|
double vae_width = sqrt(VAE_IMAGE_SIZE * ref_image.width / ref_image.height);
|
||||||
double vae_height = vae_width * ref_image.height / ref_image.width;
|
double vae_height = vae_width * ref_image.height / ref_image.width;
|
||||||
|
|
||||||
vae_height = round(vae_height / 32) * 32;
|
int factor = 16;
|
||||||
vae_width = round(vae_width / 32) * 32;
|
if (sd_version_is_qwen_image(sd_ctx->sd->version)) {
|
||||||
|
factor = 32;
|
||||||
|
}
|
||||||
|
|
||||||
|
vae_height = round(vae_height / factor) * factor;
|
||||||
|
vae_width = round(vae_width / factor) * factor;
|
||||||
|
|
||||||
sd_image_f32_t resized_image = resize_sd_image_f32_t(ref_image, static_cast<int>(vae_width), static_cast<int>(vae_height));
|
sd_image_f32_t resized_image = resize_sd_image_f32_t(ref_image, static_cast<int>(vae_width), static_cast<int>(vae_height));
|
||||||
free(ref_image.data);
|
free(ref_image.data);
|
||||||
|
|||||||
@ -216,6 +216,7 @@ typedef struct {
|
|||||||
sd_image_t init_image;
|
sd_image_t init_image;
|
||||||
sd_image_t* ref_images;
|
sd_image_t* ref_images;
|
||||||
int ref_images_count;
|
int ref_images_count;
|
||||||
|
bool auto_resize_ref_image;
|
||||||
bool increase_ref_index;
|
bool increase_ref_index;
|
||||||
sd_image_t mask_image;
|
sd_image_t mask_image;
|
||||||
int width;
|
int width;
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user