mirror of
https://github.com/leejet/stable-diffusion.cpp.git
synced 2025-12-13 05:48:56 +00:00
optimize clip_preprocess and fix get_first_stage_encoding
This commit is contained in:
parent
4e48e6b82b
commit
95cae28465
@ -256,7 +256,7 @@ namespace Qwen {
|
||||
auto txt_gate1 = txt_mod_param_vec[2];
|
||||
|
||||
auto [img_attn_output, txt_attn_output] = attn->forward(ctx, backend, img_modulated, txt_modulated, pe);
|
||||
|
||||
|
||||
img = ggml_add(ctx, img, ggml_mul(ctx, img_attn_output, img_gate1));
|
||||
txt = ggml_add(ctx, txt, ggml_mul(ctx, txt_attn_output, txt_gate1));
|
||||
|
||||
@ -564,7 +564,7 @@ namespace Qwen {
|
||||
timesteps,
|
||||
context,
|
||||
pe,
|
||||
ref_latents);
|
||||
ref_latents);
|
||||
|
||||
ggml_build_forward_expand(gf, out);
|
||||
|
||||
|
||||
@ -952,7 +952,7 @@ public:
|
||||
ggml_set_f32(output, 0.f);
|
||||
} else {
|
||||
sd_image_f32_t image = sd_image_t_to_sd_image_f32_t(init_image);
|
||||
sd_image_f32_t resized_image = clip_preprocess(image, clip_vision->vision_model.image_size);
|
||||
sd_image_f32_t resized_image = clip_preprocess(image, clip_vision->vision_model.image_size, clip_vision->vision_model.image_size);
|
||||
free(image.data);
|
||||
image.data = NULL;
|
||||
|
||||
@ -2029,7 +2029,7 @@ sd_image_t* generate_image_internal(sd_ctx_t* sd_ctx,
|
||||
std::vector<sd_image_f32_t> processed_id_images;
|
||||
for (int i = 0; i < pm_params.id_images_count; i++) {
|
||||
sd_image_f32_t id_image = sd_image_t_to_sd_image_f32_t(pm_params.id_images[i]);
|
||||
sd_image_f32_t processed_id_image = clip_preprocess(id_image, clip_image_size);
|
||||
sd_image_f32_t processed_id_image = clip_preprocess(id_image, clip_image_size, clip_image_size);
|
||||
free(id_image.data);
|
||||
id_image.data = NULL;
|
||||
processed_id_images.push_back(processed_id_image);
|
||||
|
||||
48
util.cpp
48
util.cpp
@ -84,6 +84,7 @@ int round_up_to(int value, int base) {
|
||||
}
|
||||
|
||||
#ifdef _WIN32 // code for windows
|
||||
#define NOMINMAX
|
||||
#include <windows.h>
|
||||
|
||||
bool file_exists(const std::string& filename) {
|
||||
@ -427,18 +428,21 @@ float means[3] = {0.48145466, 0.4578275, 0.40821073};
|
||||
float stds[3] = {0.26862954, 0.26130258, 0.27577711};
|
||||
|
||||
// Function to clip and preprocess sd_image_f32_t
|
||||
sd_image_f32_t clip_preprocess(sd_image_f32_t image, int size) {
|
||||
float scale = (float)size / fmin(image.width, image.height);
|
||||
sd_image_f32_t clip_preprocess(sd_image_f32_t image, int target_width, int target_height) {
|
||||
float width_scale = (float)target_width / image.width;
|
||||
float height_scale = (float)target_height / image.height;
|
||||
|
||||
float scale = std::fmax(width_scale, height_scale);
|
||||
|
||||
// Interpolation
|
||||
int new_width = (int)(scale * image.width);
|
||||
int new_height = (int)(scale * image.height);
|
||||
float* resized_data = (float*)malloc(new_width * new_height * image.channel * sizeof(float));
|
||||
int resized_width = (int)(scale * image.width);
|
||||
int resized_height = (int)(scale * image.height);
|
||||
float* resized_data = (float*)malloc(resized_width * resized_height * image.channel * sizeof(float));
|
||||
|
||||
for (int y = 0; y < new_height; y++) {
|
||||
for (int x = 0; x < new_width; x++) {
|
||||
float original_x = (float)x * image.width / new_width;
|
||||
float original_y = (float)y * image.height / new_height;
|
||||
for (int y = 0; y < resized_height; y++) {
|
||||
for (int x = 0; x < resized_width; x++) {
|
||||
float original_x = (float)x * image.width / resized_width;
|
||||
float original_y = (float)y * image.height / resized_height;
|
||||
|
||||
int x1 = (int)original_x;
|
||||
int y1 = (int)original_y;
|
||||
@ -456,26 +460,26 @@ sd_image_f32_t clip_preprocess(sd_image_f32_t image, int size) {
|
||||
|
||||
float value = interpolate(v1, v2, v3, v4, x_ratio, y_ratio);
|
||||
|
||||
*(resized_data + y * new_width * image.channel + x * image.channel + k) = value;
|
||||
*(resized_data + y * resized_width * image.channel + x * image.channel + k) = value;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Clip and preprocess
|
||||
int h = (new_height - size) / 2;
|
||||
int w = (new_width - size) / 2;
|
||||
int h_offset = std::max((int)(resized_height - target_height) / 2, 0);
|
||||
int w_offset = std::max((int)(resized_width - target_width) / 2, 0);
|
||||
|
||||
sd_image_f32_t result;
|
||||
result.width = size;
|
||||
result.height = size;
|
||||
result.width = target_width;
|
||||
result.height = target_height;
|
||||
result.channel = image.channel;
|
||||
result.data = (float*)malloc(size * size * image.channel * sizeof(float));
|
||||
result.data = (float*)malloc(target_height * target_width * image.channel * sizeof(float));
|
||||
|
||||
for (int k = 0; k < image.channel; k++) {
|
||||
for (int i = 0; i < size; i++) {
|
||||
for (int j = 0; j < size; j++) {
|
||||
*(result.data + i * size * image.channel + j * image.channel + k) =
|
||||
fmin(fmax(*(resized_data + (i + h) * new_width * image.channel + (j + w) * image.channel + k), 0.0f), 255.0f) / 255.0f;
|
||||
for (int i = 0; i < result.height; i++) {
|
||||
for (int j = 0; j < result.width; j++) {
|
||||
*(result.data + i * result.width * image.channel + j * image.channel + k) =
|
||||
fmin(fmax(*(resized_data + (i + h_offset) * resized_width * image.channel + (j + w_offset) * image.channel + k), 0.0f), 255.0f) / 255.0f;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -485,10 +489,10 @@ sd_image_f32_t clip_preprocess(sd_image_f32_t image, int size) {
|
||||
|
||||
// Normalize
|
||||
for (int k = 0; k < image.channel; k++) {
|
||||
for (int i = 0; i < size; i++) {
|
||||
for (int j = 0; j < size; j++) {
|
||||
for (int i = 0; i < result.height; i++) {
|
||||
for (int j = 0; j < result.width; j++) {
|
||||
// *(result.data + i * size * image.channel + j * image.channel + k) = 0.5f;
|
||||
int offset = i * size * image.channel + j * image.channel + k;
|
||||
int offset = i * result.width * image.channel + j * image.channel + k;
|
||||
float value = *(result.data + offset);
|
||||
value = (value - means[k]) / stds[k];
|
||||
// value = 0.5f;
|
||||
|
||||
2
util.h
2
util.h
@ -42,7 +42,7 @@ sd_image_f32_t sd_image_t_to_sd_image_f32_t(sd_image_t image);
|
||||
|
||||
sd_image_f32_t resize_sd_image_f32_t(sd_image_f32_t image, int target_width, int target_height);
|
||||
|
||||
sd_image_f32_t clip_preprocess(sd_image_f32_t image, int size);
|
||||
sd_image_f32_t clip_preprocess(sd_image_f32_t image, int target_width, int target_height);
|
||||
|
||||
std::string path_join(const std::string& p1, const std::string& p2);
|
||||
std::vector<std::string> split_string(const std::string& str, char delimiter);
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user