fix: use ggml_nn_attention in vae

fix: remove default variables in c headers (#478 )
2026-06-25 07:36:38 +00:00 · 2024-11-24 18:21:31 +08:00 · 2024-11-24 18:10:25 +08:00
5 changed files with 61 additions and 27 deletions
--- a/examples/cli/main.cpp
+++ b/examples/cli/main.cpp
@ -926,7 +926,8 @@ int main(int argc, const char* argv[]) {
                          params.style_ratio,
                          params.normalize_input,
                          params.input_id_images_path.c_str(),
-                          params.skip_layers,
+                          params.skip_layers.data(),
+                          params.skip_layers.size(),
                          params.slg_scale,
                          params.skip_layer_start,
                          params.skip_layer_end);
@ -991,7 +992,12 @@ int main(int argc, const char* argv[]) {
                              params.control_strength,
                              params.style_ratio,
                              params.normalize_input,
-                              params.input_id_images_path.c_str());
+                              params.input_id_images_path.c_str(),
+                              params.skip_layers.data(),
+                              params.skip_layers.size(),
+                              params.slg_scale,
+                              params.skip_layer_start,
+                              params.skip_layer_end);
        }
    }

--- a/ggml_extend.hpp
+++ b/ggml_extend.hpp
@ -661,6 +661,30 @@ __STATIC_INLINE__ std::vector<struct ggml_tensor*> split_qkv(struct ggml_context
    return {q, k, v};
 }

+// q: [N * n_head, n_token, d_head]
+// k: [N * n_head, n_k, d_head]
+// v: [N * n_head, d_head, n_k]
+// return: [N * n_head, n_token, d_head]
+__STATIC_INLINE__ struct ggml_tensor* ggml_nn_attention(struct ggml_context* ctx,
+                                                        struct ggml_tensor* q,
+                                                        struct ggml_tensor* k,
+                                                        struct ggml_tensor* v,
+                                                        bool mask = false) {
+#if defined(SD_USE_FLASH_ATTENTION) && !defined(SD_USE_CUBLAS) && !defined(SD_USE_METAL) && !defined(SD_USE_VULKAN) && !defined(SD_USE_SYCL)
+    struct ggml_tensor* kqv = ggml_flash_attn(ctx, q, k, v, false);  // [N * n_head, n_token, d_head]
+#else
+    float d_head = (float)q->ne[0];
+    struct ggml_tensor* kq = ggml_mul_mat(ctx, k, q);  // [N * n_head, n_token, n_k]
+    kq                     = ggml_scale_inplace(ctx, kq, 1.0f / sqrt(d_head));
+    if (mask) {
+        kq = ggml_diag_mask_inf_inplace(ctx, kq, 0);
+    }
+    kq = ggml_soft_max_inplace(ctx, kq);
+    struct ggml_tensor* kqv = ggml_mul_mat(ctx, v, kq);  // [N * n_head, n_token, d_head]
+#endif
+    return kqv;
+}
+
 // q: [N, L_q, C] or [N*n_head, L_q, d_head]
 // k: [N, L_k, C] or [N*n_head, L_k, d_head]
 // v: [N, L_k, C] or [N, L_k, n_head, d_head]
--- a/stable-diffusion.cpp
+++ b/stable-diffusion.cpp
@ -780,7 +780,7 @@ public:
                        int start_merge_step,
                        SDCondition id_cond,
                        std::vector<int> skip_layers = {},
-                        float slg_scale              = 2.5,
+                        float slg_scale              = 0,
                        float skip_layer_start       = 0.01,
                        float skip_layer_end         = 0.2) {
        size_t steps = sigmas.size() - 1;
@ -1162,7 +1162,7 @@ sd_image_t* generate_image(sd_ctx_t* sd_ctx,
                           bool normalize_input,
                           std::string input_id_images_path,
                           std::vector<int> skip_layers = {},
-                           float slg_scale              = 2.5,
+                           float slg_scale              = 0,
                           float skip_layer_start       = 0.01,
                           float skip_layer_end         = 0.2) {
    if (seed < 0) {
@ -1453,10 +1453,12 @@ sd_image_t* txt2img(sd_ctx_t* sd_ctx,
                    float style_ratio,
                    bool normalize_input,
                    const char* input_id_images_path_c_str,
-                    std::vector<int> skip_layers,
-                    float slg_scale,
-                    float skip_layer_start,
-                    float skip_layer_end) {
+                    int* skip_layers         = NULL,
+                    size_t skip_layers_count = 0,
+                    float slg_scale          = 0,
+                    float skip_layer_start   = 0.01,
+                    float skip_layer_end     = 0.2) {
+    std::vector<int> skip_layers_vec(skip_layers, skip_layers + skip_layers_count);
    LOG_DEBUG("txt2img %dx%d", width, height);
    if (sd_ctx == NULL) {
        return NULL;
@ -1525,7 +1527,7 @@ sd_image_t* txt2img(sd_ctx_t* sd_ctx,
                                               style_ratio,
                                               normalize_input,
                                               input_id_images_path_c_str,
-                                               skip_layers,
+                                               skip_layers_vec,
                                               slg_scale,
                                               skip_layer_start,
                                               skip_layer_end);
@ -1556,10 +1558,12 @@ sd_image_t* img2img(sd_ctx_t* sd_ctx,
                    float style_ratio,
                    bool normalize_input,
                    const char* input_id_images_path_c_str,
-                    std::vector<int> skip_layers,
-                    float slg_scale,
-                    float skip_layer_start,
-                    float skip_layer_end) {
+                    int* skip_layers         = NULL,
+                    size_t skip_layers_count = 0,
+                    float slg_scale          = 0,
+                    float skip_layer_start   = 0.01,
+                    float skip_layer_end     = 0.2) {
+    std::vector<int> skip_layers_vec(skip_layers, skip_layers + skip_layers_count);
    LOG_DEBUG("img2img %dx%d", width, height);
    if (sd_ctx == NULL) {
        return NULL;
@ -1634,7 +1638,7 @@ sd_image_t* img2img(sd_ctx_t* sd_ctx,
                                               style_ratio,
                                               normalize_input,
                                               input_id_images_path_c_str,
-                                               skip_layers,
+                                               skip_layers_vec,
                                               slg_scale,
                                               skip_layer_start,
                                               skip_layer_end);
--- a/stable-diffusion.h
+++ b/stable-diffusion.h
@ -166,10 +166,11 @@ SD_API sd_image_t* txt2img(sd_ctx_t* sd_ctx,
                           float style_strength,
                           bool normalize_input,
                           const char* input_id_images_path,
-                           std::vector<int> skip_layers = {},
-                           float slg_scale              = 2.5,
-                           float skip_layer_start       = 0.01,
-                           float skip_layer_end         = 0.2);
+                           int* skip_layers,
+                           size_t skip_layers_count,
+                           float slg_scale,
+                           float skip_layer_start,
+                           float skip_layer_end);

 SD_API sd_image_t* img2img(sd_ctx_t* sd_ctx,
                           sd_image_t init_image,
@ -190,10 +191,11 @@ SD_API sd_image_t* img2img(sd_ctx_t* sd_ctx,
                           float style_strength,
                           bool normalize_input,
                           const char* input_id_images_path,
-                           std::vector<int> skip_layers = {},
-                           float slg_scale              = 2.5,
-                           float skip_layer_start       = 0.01,
-                           float skip_layer_end         = 0.2);
+                           int* skip_layers,
+                           size_t skip_layers_count,
+                           float slg_scale,
+                           float skip_layer_start,
+                           float skip_layer_end);

 SD_API sd_image_t* img2vid(sd_ctx_t* sd_ctx,
                           sd_image_t init_image,
--- a/vae.hpp
+++ b/vae.hpp
@ -99,12 +99,10 @@ public:
        k      = ggml_cont(ctx, ggml_permute(ctx, k, 1, 2, 0, 3));  // [N, h, w, in_channels]
        k      = ggml_reshape_3d(ctx, k, c, h * w, n);              // [N, h * w, in_channels]

-        auto v = v_proj->forward(ctx, h_);                          // [N, in_channels, h, w]
-        v      = ggml_cont(ctx, ggml_permute(ctx, v, 1, 2, 0, 3));  // [N, h, w, in_channels]
-        v      = ggml_reshape_3d(ctx, v, c, h * w, n);              // [N, h * w, in_channels]
+        auto v = v_proj->forward(ctx, h_);              // [N, in_channels, h, w]
+        v      = ggml_reshape_3d(ctx, v, h * w, c, n);  // [N, in_channels, h * w]

-        // h_ = ggml_nn_attention(ctx, q, k, v, false);  // [N, h * w, in_channels]
-        h_ = ggml_nn_attention_ext(ctx, q, k, v, 1, nullptr, false, true, false);
+        h_ = ggml_nn_attention(ctx, q, k, v, false);  // [N, h * w, in_channels]

        h_ = ggml_cont(ctx, ggml_permute(ctx, h_, 1, 0, 2, 3));  // [N, in_channels, h * w]
        h_ = ggml_reshape_4d(ctx, h_, w, h, c, n);               // [N, in_channels, h, w]
Author	SHA1	Message	Date
leejet	4570715727	fix: use ggml_nn_attention in vae	2024-11-24 18:21:31 +08:00
stduhpf	53b415f787	fix: remove default variables in c headers (#478 )	2024-11-24 18:10:25 +08:00