Skip to content

Commit e1241d9

Browse files
committed
metal : switch to execution barriers + fix one of the barriers
1 parent 109e7aa commit e1241d9

File tree

1 file changed

+9
-2
lines changed

1 file changed

+9
-2
lines changed

ggml-metal.metal

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -385,8 +385,11 @@ kernel void kernel_soft_max(
385385
pdst[i00] = exp_psrc0;
386386
}
387387

388+
// This barrier fixes a failing test
389+
// ref: https://github.com/ggerganov/ggml/pull/621#discussion_r1425156335
390+
threadgroup_barrier(mem_flags::mem_none);
391+
388392
float sum = simd_sum(lsum);
389-
threadgroup_barrier(mem_flags::mem_threadgroup);
390393

391394
if (ntg > N_SIMDWIDTH) {
392395
if (sgitg == 0) {
@@ -470,9 +473,13 @@ kernel void kernel_soft_max_4(
470473
}
471474

472475
const float lsum = lsum4[0] + lsum4[1] + lsum4[2] + lsum4[3];
473-
threadgroup_barrier(mem_flags::mem_threadgroup);
476+
477+
// This barrier fixes a failing test
478+
// ref: https://github.com/ggerganov/ggml/pull/621#discussion_r1425156335
479+
threadgroup_barrier(mem_flags::mem_none);
474480

475481
float sum = simd_sum(lsum);
482+
476483
if (ntg > N_SIMDWIDTH) {
477484
if (sgitg == 0) {
478485
buf[tiisg] = 0.0f;

0 commit comments

Comments
 (0)