Compare commits

...

2 Commits

Author SHA1 Message Date
leejet
4570715727 fix: use ggml_nn_attention in vae 2024-11-24 18:21:31 +08:00
stduhpf
53b415f787
fix: remove default variables in c headers (#478) 2024-11-24 18:10:25 +08:00
5 changed files with 61 additions and 27 deletions

View File

@ -926,7 +926,8 @@ int main(int argc, const char* argv[]) {
params.style_ratio, params.style_ratio,
params.normalize_input, params.normalize_input,
params.input_id_images_path.c_str(), params.input_id_images_path.c_str(),
params.skip_layers, params.skip_layers.data(),
params.skip_layers.size(),
params.slg_scale, params.slg_scale,
params.skip_layer_start, params.skip_layer_start,
params.skip_layer_end); params.skip_layer_end);
@ -991,7 +992,12 @@ int main(int argc, const char* argv[]) {
params.control_strength, params.control_strength,
params.style_ratio, params.style_ratio,
params.normalize_input, params.normalize_input,
params.input_id_images_path.c_str()); params.input_id_images_path.c_str(),
params.skip_layers.data(),
params.skip_layers.size(),
params.slg_scale,
params.skip_layer_start,
params.skip_layer_end);
} }
} }

View File

@ -661,6 +661,30 @@ __STATIC_INLINE__ std::vector<struct ggml_tensor*> split_qkv(struct ggml_context
return {q, k, v}; return {q, k, v};
} }
// q: [N * n_head, n_token, d_head]
// k: [N * n_head, n_k, d_head]
// v: [N * n_head, d_head, n_k]
// return: [N * n_head, n_token, d_head]
__STATIC_INLINE__ struct ggml_tensor* ggml_nn_attention(struct ggml_context* ctx,
struct ggml_tensor* q,
struct ggml_tensor* k,
struct ggml_tensor* v,
bool mask = false) {
#if defined(SD_USE_FLASH_ATTENTION) && !defined(SD_USE_CUBLAS) && !defined(SD_USE_METAL) && !defined(SD_USE_VULKAN) && !defined(SD_USE_SYCL)
struct ggml_tensor* kqv = ggml_flash_attn(ctx, q, k, v, false); // [N * n_head, n_token, d_head]
#else
float d_head = (float)q->ne[0];
struct ggml_tensor* kq = ggml_mul_mat(ctx, k, q); // [N * n_head, n_token, n_k]
kq = ggml_scale_inplace(ctx, kq, 1.0f / sqrt(d_head));
if (mask) {
kq = ggml_diag_mask_inf_inplace(ctx, kq, 0);
}
kq = ggml_soft_max_inplace(ctx, kq);
struct ggml_tensor* kqv = ggml_mul_mat(ctx, v, kq); // [N * n_head, n_token, d_head]
#endif
return kqv;
}
// q: [N, L_q, C] or [N*n_head, L_q, d_head] // q: [N, L_q, C] or [N*n_head, L_q, d_head]
// k: [N, L_k, C] or [N*n_head, L_k, d_head] // k: [N, L_k, C] or [N*n_head, L_k, d_head]
// v: [N, L_k, C] or [N, L_k, n_head, d_head] // v: [N, L_k, C] or [N, L_k, n_head, d_head]

View File

@ -780,7 +780,7 @@ public:
int start_merge_step, int start_merge_step,
SDCondition id_cond, SDCondition id_cond,
std::vector<int> skip_layers = {}, std::vector<int> skip_layers = {},
float slg_scale = 2.5, float slg_scale = 0,
float skip_layer_start = 0.01, float skip_layer_start = 0.01,
float skip_layer_end = 0.2) { float skip_layer_end = 0.2) {
size_t steps = sigmas.size() - 1; size_t steps = sigmas.size() - 1;
@ -1162,7 +1162,7 @@ sd_image_t* generate_image(sd_ctx_t* sd_ctx,
bool normalize_input, bool normalize_input,
std::string input_id_images_path, std::string input_id_images_path,
std::vector<int> skip_layers = {}, std::vector<int> skip_layers = {},
float slg_scale = 2.5, float slg_scale = 0,
float skip_layer_start = 0.01, float skip_layer_start = 0.01,
float skip_layer_end = 0.2) { float skip_layer_end = 0.2) {
if (seed < 0) { if (seed < 0) {
@ -1453,10 +1453,12 @@ sd_image_t* txt2img(sd_ctx_t* sd_ctx,
float style_ratio, float style_ratio,
bool normalize_input, bool normalize_input,
const char* input_id_images_path_c_str, const char* input_id_images_path_c_str,
std::vector<int> skip_layers, int* skip_layers = NULL,
float slg_scale, size_t skip_layers_count = 0,
float skip_layer_start, float slg_scale = 0,
float skip_layer_end) { float skip_layer_start = 0.01,
float skip_layer_end = 0.2) {
std::vector<int> skip_layers_vec(skip_layers, skip_layers + skip_layers_count);
LOG_DEBUG("txt2img %dx%d", width, height); LOG_DEBUG("txt2img %dx%d", width, height);
if (sd_ctx == NULL) { if (sd_ctx == NULL) {
return NULL; return NULL;
@ -1525,7 +1527,7 @@ sd_image_t* txt2img(sd_ctx_t* sd_ctx,
style_ratio, style_ratio,
normalize_input, normalize_input,
input_id_images_path_c_str, input_id_images_path_c_str,
skip_layers, skip_layers_vec,
slg_scale, slg_scale,
skip_layer_start, skip_layer_start,
skip_layer_end); skip_layer_end);
@ -1556,10 +1558,12 @@ sd_image_t* img2img(sd_ctx_t* sd_ctx,
float style_ratio, float style_ratio,
bool normalize_input, bool normalize_input,
const char* input_id_images_path_c_str, const char* input_id_images_path_c_str,
std::vector<int> skip_layers, int* skip_layers = NULL,
float slg_scale, size_t skip_layers_count = 0,
float skip_layer_start, float slg_scale = 0,
float skip_layer_end) { float skip_layer_start = 0.01,
float skip_layer_end = 0.2) {
std::vector<int> skip_layers_vec(skip_layers, skip_layers + skip_layers_count);
LOG_DEBUG("img2img %dx%d", width, height); LOG_DEBUG("img2img %dx%d", width, height);
if (sd_ctx == NULL) { if (sd_ctx == NULL) {
return NULL; return NULL;
@ -1634,7 +1638,7 @@ sd_image_t* img2img(sd_ctx_t* sd_ctx,
style_ratio, style_ratio,
normalize_input, normalize_input,
input_id_images_path_c_str, input_id_images_path_c_str,
skip_layers, skip_layers_vec,
slg_scale, slg_scale,
skip_layer_start, skip_layer_start,
skip_layer_end); skip_layer_end);

View File

@ -166,10 +166,11 @@ SD_API sd_image_t* txt2img(sd_ctx_t* sd_ctx,
float style_strength, float style_strength,
bool normalize_input, bool normalize_input,
const char* input_id_images_path, const char* input_id_images_path,
std::vector<int> skip_layers = {}, int* skip_layers,
float slg_scale = 2.5, size_t skip_layers_count,
float skip_layer_start = 0.01, float slg_scale,
float skip_layer_end = 0.2); float skip_layer_start,
float skip_layer_end);
SD_API sd_image_t* img2img(sd_ctx_t* sd_ctx, SD_API sd_image_t* img2img(sd_ctx_t* sd_ctx,
sd_image_t init_image, sd_image_t init_image,
@ -190,10 +191,11 @@ SD_API sd_image_t* img2img(sd_ctx_t* sd_ctx,
float style_strength, float style_strength,
bool normalize_input, bool normalize_input,
const char* input_id_images_path, const char* input_id_images_path,
std::vector<int> skip_layers = {}, int* skip_layers,
float slg_scale = 2.5, size_t skip_layers_count,
float skip_layer_start = 0.01, float slg_scale,
float skip_layer_end = 0.2); float skip_layer_start,
float skip_layer_end);
SD_API sd_image_t* img2vid(sd_ctx_t* sd_ctx, SD_API sd_image_t* img2vid(sd_ctx_t* sd_ctx,
sd_image_t init_image, sd_image_t init_image,

View File

@ -99,12 +99,10 @@ public:
k = ggml_cont(ctx, ggml_permute(ctx, k, 1, 2, 0, 3)); // [N, h, w, in_channels] k = ggml_cont(ctx, ggml_permute(ctx, k, 1, 2, 0, 3)); // [N, h, w, in_channels]
k = ggml_reshape_3d(ctx, k, c, h * w, n); // [N, h * w, in_channels] k = ggml_reshape_3d(ctx, k, c, h * w, n); // [N, h * w, in_channels]
auto v = v_proj->forward(ctx, h_); // [N, in_channels, h, w] auto v = v_proj->forward(ctx, h_); // [N, in_channels, h, w]
v = ggml_cont(ctx, ggml_permute(ctx, v, 1, 2, 0, 3)); // [N, h, w, in_channels] v = ggml_reshape_3d(ctx, v, h * w, c, n); // [N, in_channels, h * w]
v = ggml_reshape_3d(ctx, v, c, h * w, n); // [N, h * w, in_channels]
// h_ = ggml_nn_attention(ctx, q, k, v, false); // [N, h * w, in_channels] h_ = ggml_nn_attention(ctx, q, k, v, false); // [N, h * w, in_channels]
h_ = ggml_nn_attention_ext(ctx, q, k, v, 1, nullptr, false, true, false);
h_ = ggml_cont(ctx, ggml_permute(ctx, h_, 1, 0, 2, 3)); // [N, in_channels, h * w] h_ = ggml_cont(ctx, ggml_permute(ctx, h_, 1, 0, 2, 3)); // [N, in_channels, h * w]
h_ = ggml_reshape_4d(ctx, h_, w, h, c, n); // [N, in_channels, h, w] h_ = ggml_reshape_4d(ctx, h_, w, h, c, n); // [N, in_channels, h, w]