diff --git a/ggml b/ggml
index 2d3876d..f5425c0 160000
--- a/ggml
+++ b/ggml
@@ -1 +1 @@
-Subproject commit 2d3876d554551d35c06dccc5852be50d5fd2a275
+Subproject commit f5425c0ee5e582a7d64411f06139870bff3e52e0
diff --git a/ggml_extend.hpp b/ggml_extend.hpp
index 26dff49..28fd018 100644
--- a/ggml_extend.hpp
+++ b/ggml_extend.hpp
@@ -1270,6 +1270,9 @@ __STATIC_INLINE__ struct ggml_tensor* ggml_ext_attention_ext(struct ggml_context
         }
 
         if (mask_in != nullptr) {
+            // the need for padding got removed in ggml 4767bda
+            // ensure we can still use the old version for now
+#ifdef GGML_KQ_MASK_PAD
             int mask_pad = 0;
             if (mask_in->ne[1] % GGML_KQ_MASK_PAD != 0) {
                 mask_pad = GGML_PAD(L_q, GGML_KQ_MASK_PAD) - mask_in->ne[1];
@@ -1277,6 +1280,7 @@ __STATIC_INLINE__ struct ggml_tensor* ggml_ext_attention_ext(struct ggml_context
             if (mask_pad > 0) {
                 mask_in = ggml_pad(ctx, mask_in, 0, mask_pad, 0, 0);
             }
+#endif
             mask_in = ggml_cast(ctx, mask_in, GGML_TYPE_F16);
         }