mirror of
https://github.com/leejet/stable-diffusion.cpp.git
synced 2025-12-13 05:48:56 +00:00
feat: support incrementing ref image index (omni-kontext) (#755)
* kontext: support ref images indices * lora: support x_embedder * update help message * Support for negative indices * support for OmniControl (offsets at index 0) * c++11 compat * add --increase-ref-index option * simplify the logic and fix some issues * update README.md * remove unused variable --------- Co-authored-by: leejet <leejet714@gmail.com>
This commit is contained in:
parent
f8fe4e7db9
commit
c587a43c99
@ -319,6 +319,7 @@ arguments:
|
|||||||
-i, --end-img [IMAGE] path to the end image, required by flf2v
|
-i, --end-img [IMAGE] path to the end image, required by flf2v
|
||||||
--control-image [IMAGE] path to image condition, control net
|
--control-image [IMAGE] path to image condition, control net
|
||||||
-r, --ref-image [PATH] reference image for Flux Kontext models (can be used multiple times)
|
-r, --ref-image [PATH] reference image for Flux Kontext models (can be used multiple times)
|
||||||
|
--increase-ref-index automatically increase the indices of references images based on the order they are listed (starting with 1).
|
||||||
-o, --output OUTPUT path to write result image to (default: ./output.png)
|
-o, --output OUTPUT path to write result image to (default: ./output.png)
|
||||||
-p, --prompt [PROMPT] the prompt to render
|
-p, --prompt [PROMPT] the prompt to render
|
||||||
-n, --negative-prompt PROMPT the negative prompt (default: "")
|
-n, --negative-prompt PROMPT the negative prompt (default: "")
|
||||||
|
|||||||
@ -16,6 +16,7 @@ struct DiffusionModel {
|
|||||||
struct ggml_tensor* y,
|
struct ggml_tensor* y,
|
||||||
struct ggml_tensor* guidance,
|
struct ggml_tensor* guidance,
|
||||||
std::vector<ggml_tensor*> ref_latents = {},
|
std::vector<ggml_tensor*> ref_latents = {},
|
||||||
|
bool increase_ref_index = false,
|
||||||
int num_video_frames = -1,
|
int num_video_frames = -1,
|
||||||
std::vector<struct ggml_tensor*> controls = {},
|
std::vector<struct ggml_tensor*> controls = {},
|
||||||
float control_strength = 0.f,
|
float control_strength = 0.f,
|
||||||
@ -77,6 +78,7 @@ struct UNetModel : public DiffusionModel {
|
|||||||
struct ggml_tensor* y,
|
struct ggml_tensor* y,
|
||||||
struct ggml_tensor* guidance,
|
struct ggml_tensor* guidance,
|
||||||
std::vector<ggml_tensor*> ref_latents = {},
|
std::vector<ggml_tensor*> ref_latents = {},
|
||||||
|
bool increase_ref_index = false,
|
||||||
int num_video_frames = -1,
|
int num_video_frames = -1,
|
||||||
std::vector<struct ggml_tensor*> controls = {},
|
std::vector<struct ggml_tensor*> controls = {},
|
||||||
float control_strength = 0.f,
|
float control_strength = 0.f,
|
||||||
@ -133,6 +135,7 @@ struct MMDiTModel : public DiffusionModel {
|
|||||||
struct ggml_tensor* y,
|
struct ggml_tensor* y,
|
||||||
struct ggml_tensor* guidance,
|
struct ggml_tensor* guidance,
|
||||||
std::vector<ggml_tensor*> ref_latents = {},
|
std::vector<ggml_tensor*> ref_latents = {},
|
||||||
|
bool increase_ref_index = false,
|
||||||
int num_video_frames = -1,
|
int num_video_frames = -1,
|
||||||
std::vector<struct ggml_tensor*> controls = {},
|
std::vector<struct ggml_tensor*> controls = {},
|
||||||
float control_strength = 0.f,
|
float control_strength = 0.f,
|
||||||
@ -191,13 +194,14 @@ struct FluxModel : public DiffusionModel {
|
|||||||
struct ggml_tensor* y,
|
struct ggml_tensor* y,
|
||||||
struct ggml_tensor* guidance,
|
struct ggml_tensor* guidance,
|
||||||
std::vector<ggml_tensor*> ref_latents = {},
|
std::vector<ggml_tensor*> ref_latents = {},
|
||||||
|
bool increase_ref_index = false,
|
||||||
int num_video_frames = -1,
|
int num_video_frames = -1,
|
||||||
std::vector<struct ggml_tensor*> controls = {},
|
std::vector<struct ggml_tensor*> controls = {},
|
||||||
float control_strength = 0.f,
|
float control_strength = 0.f,
|
||||||
struct ggml_tensor** output = NULL,
|
struct ggml_tensor** output = NULL,
|
||||||
struct ggml_context* output_ctx = NULL,
|
struct ggml_context* output_ctx = NULL,
|
||||||
std::vector<int> skip_layers = std::vector<int>()) {
|
std::vector<int> skip_layers = std::vector<int>()) {
|
||||||
return flux.compute(n_threads, x, timesteps, context, c_concat, y, guidance, ref_latents, output, output_ctx, skip_layers);
|
return flux.compute(n_threads, x, timesteps, context, c_concat, y, guidance, ref_latents, increase_ref_index, output, output_ctx, skip_layers);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -250,6 +254,7 @@ struct WanModel : public DiffusionModel {
|
|||||||
struct ggml_tensor* y,
|
struct ggml_tensor* y,
|
||||||
struct ggml_tensor* guidance,
|
struct ggml_tensor* guidance,
|
||||||
std::vector<ggml_tensor*> ref_latents = {},
|
std::vector<ggml_tensor*> ref_latents = {},
|
||||||
|
bool increase_ref_index = false,
|
||||||
int num_video_frames = -1,
|
int num_video_frames = -1,
|
||||||
std::vector<struct ggml_tensor*> controls = {},
|
std::vector<struct ggml_tensor*> controls = {},
|
||||||
float control_strength = 0.f,
|
float control_strength = 0.f,
|
||||||
|
|||||||
@ -74,6 +74,7 @@ struct SDParams {
|
|||||||
std::string mask_image_path;
|
std::string mask_image_path;
|
||||||
std::string control_image_path;
|
std::string control_image_path;
|
||||||
std::vector<std::string> ref_image_paths;
|
std::vector<std::string> ref_image_paths;
|
||||||
|
bool increase_ref_index = false;
|
||||||
|
|
||||||
std::string prompt;
|
std::string prompt;
|
||||||
std::string negative_prompt;
|
std::string negative_prompt;
|
||||||
@ -156,6 +157,7 @@ void print_params(SDParams params) {
|
|||||||
for (auto& path : params.ref_image_paths) {
|
for (auto& path : params.ref_image_paths) {
|
||||||
printf(" %s\n", path.c_str());
|
printf(" %s\n", path.c_str());
|
||||||
};
|
};
|
||||||
|
printf(" increase_ref_index: %s\n", params.increase_ref_index ? "true" : "false");
|
||||||
printf(" offload_params_to_cpu: %s\n", params.offload_params_to_cpu ? "true" : "false");
|
printf(" offload_params_to_cpu: %s\n", params.offload_params_to_cpu ? "true" : "false");
|
||||||
printf(" clip_on_cpu: %s\n", params.clip_on_cpu ? "true" : "false");
|
printf(" clip_on_cpu: %s\n", params.clip_on_cpu ? "true" : "false");
|
||||||
printf(" control_net_cpu: %s\n", params.control_net_cpu ? "true" : "false");
|
printf(" control_net_cpu: %s\n", params.control_net_cpu ? "true" : "false");
|
||||||
@ -222,6 +224,7 @@ void print_usage(int argc, const char* argv[]) {
|
|||||||
printf(" -i, --end-img [IMAGE] path to the end image, required by flf2v\n");
|
printf(" -i, --end-img [IMAGE] path to the end image, required by flf2v\n");
|
||||||
printf(" --control-image [IMAGE] path to image condition, control net\n");
|
printf(" --control-image [IMAGE] path to image condition, control net\n");
|
||||||
printf(" -r, --ref-image [PATH] reference image for Flux Kontext models (can be used multiple times) \n");
|
printf(" -r, --ref-image [PATH] reference image for Flux Kontext models (can be used multiple times) \n");
|
||||||
|
printf(" --increase-ref-index automatically increase the indices of references images based on the order they are listed (starting with 1).\n");
|
||||||
printf(" -o, --output OUTPUT path to write result image to (default: ./output.png)\n");
|
printf(" -o, --output OUTPUT path to write result image to (default: ./output.png)\n");
|
||||||
printf(" -p, --prompt [PROMPT] the prompt to render\n");
|
printf(" -p, --prompt [PROMPT] the prompt to render\n");
|
||||||
printf(" -n, --negative-prompt PROMPT the negative prompt (default: \"\")\n");
|
printf(" -n, --negative-prompt PROMPT the negative prompt (default: \"\")\n");
|
||||||
@ -536,6 +539,7 @@ void parse_args(int argc, const char** argv, SDParams& params) {
|
|||||||
{"", "--color", "", true, ¶ms.color},
|
{"", "--color", "", true, ¶ms.color},
|
||||||
{"", "--chroma-disable-dit-mask", "", false, ¶ms.chroma_use_dit_mask},
|
{"", "--chroma-disable-dit-mask", "", false, ¶ms.chroma_use_dit_mask},
|
||||||
{"", "--chroma-enable-t5-mask", "", true, ¶ms.chroma_use_t5_mask},
|
{"", "--chroma-enable-t5-mask", "", true, ¶ms.chroma_use_t5_mask},
|
||||||
|
{"", "--increase-ref-index", "", true, ¶ms.increase_ref_index},
|
||||||
};
|
};
|
||||||
|
|
||||||
auto on_mode_arg = [&](int argc, const char** argv, int index) {
|
auto on_mode_arg = [&](int argc, const char** argv, int index) {
|
||||||
@ -1207,6 +1211,7 @@ int main(int argc, const char* argv[]) {
|
|||||||
init_image,
|
init_image,
|
||||||
ref_images.data(),
|
ref_images.data(),
|
||||||
(int)ref_images.size(),
|
(int)ref_images.size(),
|
||||||
|
params.increase_ref_index,
|
||||||
mask_image,
|
mask_image,
|
||||||
params.width,
|
params.width,
|
||||||
params.height,
|
params.height,
|
||||||
|
|||||||
7
flux.hpp
7
flux.hpp
@ -960,6 +960,7 @@ namespace Flux {
|
|||||||
struct ggml_tensor* y,
|
struct ggml_tensor* y,
|
||||||
struct ggml_tensor* guidance,
|
struct ggml_tensor* guidance,
|
||||||
std::vector<ggml_tensor*> ref_latents = {},
|
std::vector<ggml_tensor*> ref_latents = {},
|
||||||
|
bool increase_ref_index = false,
|
||||||
std::vector<int> skip_layers = {}) {
|
std::vector<int> skip_layers = {}) {
|
||||||
GGML_ASSERT(x->ne[3] == 1);
|
GGML_ASSERT(x->ne[3] == 1);
|
||||||
struct ggml_cgraph* gf = ggml_new_graph_custom(compute_ctx, FLUX_GRAPH_SIZE, false);
|
struct ggml_cgraph* gf = ggml_new_graph_custom(compute_ctx, FLUX_GRAPH_SIZE, false);
|
||||||
@ -999,6 +1000,7 @@ namespace Flux {
|
|||||||
x->ne[3],
|
x->ne[3],
|
||||||
context->ne[1],
|
context->ne[1],
|
||||||
ref_latents,
|
ref_latents,
|
||||||
|
increase_ref_index,
|
||||||
flux_params.theta,
|
flux_params.theta,
|
||||||
flux_params.axes_dim);
|
flux_params.axes_dim);
|
||||||
int pos_len = pe_vec.size() / flux_params.axes_dim_sum / 2;
|
int pos_len = pe_vec.size() / flux_params.axes_dim_sum / 2;
|
||||||
@ -1035,6 +1037,7 @@ namespace Flux {
|
|||||||
struct ggml_tensor* y,
|
struct ggml_tensor* y,
|
||||||
struct ggml_tensor* guidance,
|
struct ggml_tensor* guidance,
|
||||||
std::vector<ggml_tensor*> ref_latents = {},
|
std::vector<ggml_tensor*> ref_latents = {},
|
||||||
|
bool increase_ref_index = false,
|
||||||
struct ggml_tensor** output = NULL,
|
struct ggml_tensor** output = NULL,
|
||||||
struct ggml_context* output_ctx = NULL,
|
struct ggml_context* output_ctx = NULL,
|
||||||
std::vector<int> skip_layers = std::vector<int>()) {
|
std::vector<int> skip_layers = std::vector<int>()) {
|
||||||
@ -1044,7 +1047,7 @@ namespace Flux {
|
|||||||
// y: [N, adm_in_channels] or [1, adm_in_channels]
|
// y: [N, adm_in_channels] or [1, adm_in_channels]
|
||||||
// guidance: [N, ]
|
// guidance: [N, ]
|
||||||
auto get_graph = [&]() -> struct ggml_cgraph* {
|
auto get_graph = [&]() -> struct ggml_cgraph* {
|
||||||
return build_graph(x, timesteps, context, c_concat, y, guidance, ref_latents, skip_layers);
|
return build_graph(x, timesteps, context, c_concat, y, guidance, ref_latents, increase_ref_index, skip_layers);
|
||||||
};
|
};
|
||||||
|
|
||||||
GGMLRunner::compute(get_graph, n_threads, false, output, output_ctx);
|
GGMLRunner::compute(get_graph, n_threads, false, output, output_ctx);
|
||||||
@ -1084,7 +1087,7 @@ namespace Flux {
|
|||||||
struct ggml_tensor* out = NULL;
|
struct ggml_tensor* out = NULL;
|
||||||
|
|
||||||
int t0 = ggml_time_ms();
|
int t0 = ggml_time_ms();
|
||||||
compute(8, x, timesteps, context, NULL, y, guidance, {}, &out, work_ctx);
|
compute(8, x, timesteps, context, NULL, y, guidance, {}, false, &out, work_ctx);
|
||||||
int t1 = ggml_time_ms();
|
int t1 = ggml_time_ms();
|
||||||
|
|
||||||
print_ggml_tensor(out);
|
print_ggml_tensor(out);
|
||||||
|
|||||||
1
lora.hpp
1
lora.hpp
@ -58,6 +58,7 @@ struct LoraModel : public GGMLRunner {
|
|||||||
{"x_block.attn.proj", "attn.to_out.0"},
|
{"x_block.attn.proj", "attn.to_out.0"},
|
||||||
{"x_block.attn2.proj", "attn2.to_out.0"},
|
{"x_block.attn2.proj", "attn2.to_out.0"},
|
||||||
// flux
|
// flux
|
||||||
|
{"img_in", "x_embedder"},
|
||||||
// singlestream
|
// singlestream
|
||||||
{"linear2", "proj_out"},
|
{"linear2", "proj_out"},
|
||||||
{"modulation.lin", "norm.linear"},
|
{"modulation.lin", "norm.linear"},
|
||||||
|
|||||||
23
rope.hpp
23
rope.hpp
@ -156,25 +156,33 @@ struct Rope {
|
|||||||
int patch_size,
|
int patch_size,
|
||||||
int bs,
|
int bs,
|
||||||
int context_len,
|
int context_len,
|
||||||
std::vector<ggml_tensor*> ref_latents) {
|
std::vector<ggml_tensor*> ref_latents,
|
||||||
|
bool increase_ref_index) {
|
||||||
auto txt_ids = gen_txt_ids(bs, context_len);
|
auto txt_ids = gen_txt_ids(bs, context_len);
|
||||||
auto img_ids = gen_img_ids(h, w, patch_size, bs);
|
auto img_ids = gen_img_ids(h, w, patch_size, bs);
|
||||||
|
|
||||||
auto ids = concat_ids(txt_ids, img_ids, bs);
|
auto ids = concat_ids(txt_ids, img_ids, bs);
|
||||||
uint64_t curr_h_offset = 0;
|
uint64_t curr_h_offset = 0;
|
||||||
uint64_t curr_w_offset = 0;
|
uint64_t curr_w_offset = 0;
|
||||||
|
int index = 1;
|
||||||
for (ggml_tensor* ref : ref_latents) {
|
for (ggml_tensor* ref : ref_latents) {
|
||||||
uint64_t h_offset = 0;
|
uint64_t h_offset = 0;
|
||||||
uint64_t w_offset = 0;
|
uint64_t w_offset = 0;
|
||||||
if (ref->ne[1] + curr_h_offset > ref->ne[0] + curr_w_offset) {
|
if (!increase_ref_index) {
|
||||||
w_offset = curr_w_offset;
|
if (ref->ne[1] + curr_h_offset > ref->ne[0] + curr_w_offset) {
|
||||||
} else {
|
w_offset = curr_w_offset;
|
||||||
h_offset = curr_h_offset;
|
} else {
|
||||||
|
h_offset = curr_h_offset;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
auto ref_ids = gen_img_ids(ref->ne[1], ref->ne[0], patch_size, bs, 1, h_offset, w_offset);
|
auto ref_ids = gen_img_ids(ref->ne[1], ref->ne[0], patch_size, bs, index, h_offset, w_offset);
|
||||||
ids = concat_ids(ids, ref_ids, bs);
|
ids = concat_ids(ids, ref_ids, bs);
|
||||||
|
|
||||||
|
if (increase_ref_index) {
|
||||||
|
index++;
|
||||||
|
}
|
||||||
|
|
||||||
curr_h_offset = std::max(curr_h_offset, ref->ne[1] + h_offset);
|
curr_h_offset = std::max(curr_h_offset, ref->ne[1] + h_offset);
|
||||||
curr_w_offset = std::max(curr_w_offset, ref->ne[0] + w_offset);
|
curr_w_offset = std::max(curr_w_offset, ref->ne[0] + w_offset);
|
||||||
}
|
}
|
||||||
@ -188,9 +196,10 @@ struct Rope {
|
|||||||
int bs,
|
int bs,
|
||||||
int context_len,
|
int context_len,
|
||||||
std::vector<ggml_tensor*> ref_latents,
|
std::vector<ggml_tensor*> ref_latents,
|
||||||
|
bool increase_ref_index,
|
||||||
int theta,
|
int theta,
|
||||||
const std::vector<int>& axes_dim) {
|
const std::vector<int>& axes_dim) {
|
||||||
std::vector<std::vector<float>> ids = gen_flux_ids(h, w, patch_size, bs, context_len, ref_latents);
|
std::vector<std::vector<float>> ids = gen_flux_ids(h, w, patch_size, bs, context_len, ref_latents, increase_ref_index);
|
||||||
return embed_nd(ids, bs, theta, axes_dim);
|
return embed_nd(ids, bs, theta, axes_dim);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -775,7 +775,7 @@ public:
|
|||||||
|
|
||||||
int64_t t0 = ggml_time_ms();
|
int64_t t0 = ggml_time_ms();
|
||||||
struct ggml_tensor* out = ggml_dup_tensor(work_ctx, x_t);
|
struct ggml_tensor* out = ggml_dup_tensor(work_ctx, x_t);
|
||||||
diffusion_model->compute(n_threads, x_t, timesteps, c, concat, NULL, NULL, {}, -1, {}, 0.f, &out);
|
diffusion_model->compute(n_threads, x_t, timesteps, c, concat, NULL, NULL, {}, false, -1, {}, 0.f, &out);
|
||||||
diffusion_model->free_compute_buffer();
|
diffusion_model->free_compute_buffer();
|
||||||
|
|
||||||
double result = 0.f;
|
double result = 0.f;
|
||||||
@ -1032,6 +1032,7 @@ public:
|
|||||||
int start_merge_step,
|
int start_merge_step,
|
||||||
SDCondition id_cond,
|
SDCondition id_cond,
|
||||||
std::vector<ggml_tensor*> ref_latents = {},
|
std::vector<ggml_tensor*> ref_latents = {},
|
||||||
|
bool increase_ref_index = false,
|
||||||
ggml_tensor* denoise_mask = nullptr) {
|
ggml_tensor* denoise_mask = nullptr) {
|
||||||
std::vector<int> skip_layers(guidance.slg.layers, guidance.slg.layers + guidance.slg.layer_count);
|
std::vector<int> skip_layers(guidance.slg.layers, guidance.slg.layers + guidance.slg.layer_count);
|
||||||
|
|
||||||
@ -1126,6 +1127,7 @@ public:
|
|||||||
cond.c_vector,
|
cond.c_vector,
|
||||||
guidance_tensor,
|
guidance_tensor,
|
||||||
ref_latents,
|
ref_latents,
|
||||||
|
increase_ref_index,
|
||||||
-1,
|
-1,
|
||||||
controls,
|
controls,
|
||||||
control_strength,
|
control_strength,
|
||||||
@ -1139,6 +1141,7 @@ public:
|
|||||||
id_cond.c_vector,
|
id_cond.c_vector,
|
||||||
guidance_tensor,
|
guidance_tensor,
|
||||||
ref_latents,
|
ref_latents,
|
||||||
|
increase_ref_index,
|
||||||
-1,
|
-1,
|
||||||
controls,
|
controls,
|
||||||
control_strength,
|
control_strength,
|
||||||
@ -1160,6 +1163,7 @@ public:
|
|||||||
uncond.c_vector,
|
uncond.c_vector,
|
||||||
guidance_tensor,
|
guidance_tensor,
|
||||||
ref_latents,
|
ref_latents,
|
||||||
|
increase_ref_index,
|
||||||
-1,
|
-1,
|
||||||
controls,
|
controls,
|
||||||
control_strength,
|
control_strength,
|
||||||
@ -1177,6 +1181,7 @@ public:
|
|||||||
img_cond.c_vector,
|
img_cond.c_vector,
|
||||||
guidance_tensor,
|
guidance_tensor,
|
||||||
ref_latents,
|
ref_latents,
|
||||||
|
increase_ref_index,
|
||||||
-1,
|
-1,
|
||||||
controls,
|
controls,
|
||||||
control_strength,
|
control_strength,
|
||||||
@ -1198,6 +1203,7 @@ public:
|
|||||||
cond.c_vector,
|
cond.c_vector,
|
||||||
guidance_tensor,
|
guidance_tensor,
|
||||||
ref_latents,
|
ref_latents,
|
||||||
|
increase_ref_index,
|
||||||
-1,
|
-1,
|
||||||
controls,
|
controls,
|
||||||
control_strength,
|
control_strength,
|
||||||
@ -1710,6 +1716,7 @@ char* sd_img_gen_params_to_str(const sd_img_gen_params_t* sd_img_gen_params) {
|
|||||||
"\n"
|
"\n"
|
||||||
"batch_count: %d\n"
|
"batch_count: %d\n"
|
||||||
"ref_images_count: %d\n"
|
"ref_images_count: %d\n"
|
||||||
|
"increase_ref_index: %s\n"
|
||||||
"control_strength: %.2f\n"
|
"control_strength: %.2f\n"
|
||||||
"style_strength: %.2f\n"
|
"style_strength: %.2f\n"
|
||||||
"normalize_input: %s\n"
|
"normalize_input: %s\n"
|
||||||
@ -1724,6 +1731,7 @@ char* sd_img_gen_params_to_str(const sd_img_gen_params_t* sd_img_gen_params) {
|
|||||||
sd_img_gen_params->seed,
|
sd_img_gen_params->seed,
|
||||||
sd_img_gen_params->batch_count,
|
sd_img_gen_params->batch_count,
|
||||||
sd_img_gen_params->ref_images_count,
|
sd_img_gen_params->ref_images_count,
|
||||||
|
BOOL_STR(sd_img_gen_params->increase_ref_index),
|
||||||
sd_img_gen_params->control_strength,
|
sd_img_gen_params->control_strength,
|
||||||
sd_img_gen_params->style_strength,
|
sd_img_gen_params->style_strength,
|
||||||
BOOL_STR(sd_img_gen_params->normalize_input),
|
BOOL_STR(sd_img_gen_params->normalize_input),
|
||||||
@ -1797,6 +1805,7 @@ sd_image_t* generate_image_internal(sd_ctx_t* sd_ctx,
|
|||||||
bool normalize_input,
|
bool normalize_input,
|
||||||
std::string input_id_images_path,
|
std::string input_id_images_path,
|
||||||
std::vector<ggml_tensor*> ref_latents,
|
std::vector<ggml_tensor*> ref_latents,
|
||||||
|
bool increase_ref_index,
|
||||||
ggml_tensor* concat_latent = NULL,
|
ggml_tensor* concat_latent = NULL,
|
||||||
ggml_tensor* denoise_mask = NULL) {
|
ggml_tensor* denoise_mask = NULL) {
|
||||||
if (seed < 0) {
|
if (seed < 0) {
|
||||||
@ -2054,6 +2063,7 @@ sd_image_t* generate_image_internal(sd_ctx_t* sd_ctx,
|
|||||||
start_merge_step,
|
start_merge_step,
|
||||||
id_cond,
|
id_cond,
|
||||||
ref_latents,
|
ref_latents,
|
||||||
|
increase_ref_index,
|
||||||
denoise_mask);
|
denoise_mask);
|
||||||
// print_ggml_tensor(x_0);
|
// print_ggml_tensor(x_0);
|
||||||
int64_t sampling_end = ggml_time_ms();
|
int64_t sampling_end = ggml_time_ms();
|
||||||
@ -2304,7 +2314,7 @@ sd_image_t* generate_image(sd_ctx_t* sd_ctx, const sd_img_gen_params_t* sd_img_g
|
|||||||
LOG_INFO("EDIT mode");
|
LOG_INFO("EDIT mode");
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<struct ggml_tensor*> ref_latents;
|
std::vector<ggml_tensor*> ref_latents;
|
||||||
for (int i = 0; i < sd_img_gen_params->ref_images_count; i++) {
|
for (int i = 0; i < sd_img_gen_params->ref_images_count; i++) {
|
||||||
ggml_tensor* img = ggml_new_tensor_4d(work_ctx,
|
ggml_tensor* img = ggml_new_tensor_4d(work_ctx,
|
||||||
GGML_TYPE_F32,
|
GGML_TYPE_F32,
|
||||||
@ -2359,6 +2369,7 @@ sd_image_t* generate_image(sd_ctx_t* sd_ctx, const sd_img_gen_params_t* sd_img_g
|
|||||||
sd_img_gen_params->normalize_input,
|
sd_img_gen_params->normalize_input,
|
||||||
sd_img_gen_params->input_id_images_path,
|
sd_img_gen_params->input_id_images_path,
|
||||||
ref_latents,
|
ref_latents,
|
||||||
|
sd_img_gen_params->increase_ref_index,
|
||||||
concat_latent,
|
concat_latent,
|
||||||
denoise_mask);
|
denoise_mask);
|
||||||
|
|
||||||
|
|||||||
@ -182,6 +182,7 @@ typedef struct {
|
|||||||
sd_image_t init_image;
|
sd_image_t init_image;
|
||||||
sd_image_t* ref_images;
|
sd_image_t* ref_images;
|
||||||
int ref_images_count;
|
int ref_images_count;
|
||||||
|
bool increase_ref_index;
|
||||||
sd_image_t mask_image;
|
sd_image_t mask_image;
|
||||||
int width;
|
int width;
|
||||||
int height;
|
int height;
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user