@@ -557,8 +557,6 @@ extern "C" {
557557 GGML_GLU_OP_REGLU ,
558558 GGML_GLU_OP_GEGLU ,
559559 GGML_GLU_OP_SWIGLU ,
560- GGML_GLU_OP_GEGLU_ERF ,
561- GGML_GLU_OP_GEGLU_QUICK ,
562560
563561 GGML_GLU_OP_COUNT ,
564562 };
@@ -648,9 +646,6 @@ extern "C" {
648646
649647 // misc
650648
651- GGML_API const char * ggml_version (void );
652- GGML_API const char * ggml_commit (void );
653-
654649 GGML_API void ggml_time_init (void ); // call this once at the beginning of the program
655650 GGML_API int64_t ggml_time_ms (void );
656651 GGML_API int64_t ggml_time_us (void );
@@ -1149,22 +1144,6 @@ extern "C" {
11491144 struct ggml_context * ctx ,
11501145 struct ggml_tensor * a );
11511146
1152- GGML_API struct ggml_tensor * ggml_geglu_erf (
1153- struct ggml_context * ctx ,
1154- struct ggml_tensor * a );
1155-
1156- GGML_API struct ggml_tensor * ggml_geglu_erf_swapped (
1157- struct ggml_context * ctx ,
1158- struct ggml_tensor * a );
1159-
1160- GGML_API struct ggml_tensor * ggml_geglu_quick (
1161- struct ggml_context * ctx ,
1162- struct ggml_tensor * a );
1163-
1164- GGML_API struct ggml_tensor * ggml_geglu_quick_swapped (
1165- struct ggml_context * ctx ,
1166- struct ggml_tensor * a );
1167-
11681147 // A: n columns, r rows,
11691148 // B: n columns, r rows,
11701149 GGML_API struct ggml_tensor * ggml_glu_split (
@@ -1188,16 +1167,6 @@ extern "C" {
11881167 struct ggml_tensor * a ,
11891168 struct ggml_tensor * b );
11901169
1191- GGML_API struct ggml_tensor * ggml_geglu_erf_split (
1192- struct ggml_context * ctx ,
1193- struct ggml_tensor * a ,
1194- struct ggml_tensor * b );
1195-
1196- GGML_API struct ggml_tensor * ggml_geglu_quick_split (
1197- struct ggml_context * ctx ,
1198- struct ggml_tensor * a ,
1199- struct ggml_tensor * b );
1200-
12011170 // normalize along rows
12021171 GGML_API struct ggml_tensor * ggml_norm (
12031172 struct ggml_context * ctx ,
@@ -2011,16 +1980,15 @@ extern "C" {
20111980
20121981#define GGML_KQ_MASK_PAD 64
20131982
2014- // q: [n_embd_k, n_batch, n_head, ne3 ]
2015- // k: [n_embd_k, n_kv, n_head_kv, ne3 ]
2016- // v: [n_embd_v, n_kv, n_head_kv, ne3 ] !! not transposed !!
2017- // mask: [n_kv, n_batch_pad, ne32, ne33 ] !! n_batch_pad = GGML_PAD(n_batch, GGML_KQ_MASK_PAD) !!
2018- // res: [n_embd_v, n_head, n_batch, ne3 ] !! permuted !!
1983+ // q: [n_embd_k, n_batch, n_head, ne3]
1984+ // k: [n_embd_k, n_kv, n_head_kv, ne3]
1985+ // v: [n_embd_v, n_kv, n_head_kv, ne3] !! not transposed !!
1986+ // mask: [n_kv, n_batch_pad, ne32, 1 ] !! n_batch_pad = GGML_PAD(n_batch, GGML_KQ_MASK_PAD) !!
1987+ // res: [n_embd_v, n_head, n_batch, ne3] !! permuted !!
20191988 //
20201989 // broadcast:
20211990 // n_head % n_head_kv == 0
2022- // n_head % ne32 == 0
2023- // ne3 % ne33 == 0
1991+ // ne3 % ne32 == 0
20241992 //
20251993 GGML_API struct ggml_tensor * ggml_flash_attn_ext (
20261994 struct ggml_context * ctx ,
@@ -2060,8 +2028,7 @@ extern "C" {
20602028 struct ggml_tensor * dt ,
20612029 struct ggml_tensor * A ,
20622030 struct ggml_tensor * B ,
2063- struct ggml_tensor * C ,
2064- struct ggml_tensor * ids );
2031+ struct ggml_tensor * C );
20652032
20662033 // partition into non-overlapping windows with padding if needed
20672034 // example:
0 commit comments