diff --git a/ggml b/ggml index 2d3876d..f5425c0 160000 --- a/ggml +++ b/ggml @@ -1 +1 @@ -Subproject commit 2d3876d554551d35c06dccc5852be50d5fd2a275 +Subproject commit f5425c0ee5e582a7d64411f06139870bff3e52e0 diff --git a/ggml_extend.hpp b/ggml_extend.hpp index 26dff49..28fd018 100644 --- a/ggml_extend.hpp +++ b/ggml_extend.hpp @@ -1270,6 +1270,9 @@ __STATIC_INLINE__ struct ggml_tensor* ggml_ext_attention_ext(struct ggml_context } if (mask_in != nullptr) { + // the need for padding got removed in ggml 4767bda + // ensure we can still use the old version for now +#ifdef GGML_KQ_MASK_PAD int mask_pad = 0; if (mask_in->ne[1] % GGML_KQ_MASK_PAD != 0) { mask_pad = GGML_PAD(L_q, GGML_KQ_MASK_PAD) - mask_in->ne[1]; @@ -1277,6 +1280,7 @@ __STATIC_INLINE__ struct ggml_tensor* ggml_ext_attention_ext(struct ggml_context if (mask_pad > 0) { mask_in = ggml_pad(ctx, mask_in, 0, mask_pad, 0, 0); } +#endif mask_in = ggml_cast(ctx, mask_in, GGML_TYPE_F16); }