Skip to content

Commit 68e86a6

Browse files
committed
Add a bunch of local_barrier()'s.
They are overkill but seem to fix the problems with the testcases, at least so far.
1 parent 58c9359 commit 68e86a6

File tree

2 files changed

+49
-46
lines changed

2 files changed

+49
-46
lines changed

src/gpuarray_reduction.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2600,6 +2600,7 @@ static void reduxGenSrcAppendDecode (GpuReduction* gr){
26002600
" TK1* restrict const SHMEMK1 = (TK1*)(SHMEM + SHMEMK1Off);\n");
26012601
}
26022602
srcbAppends(&gr->srcGen,
2603+
" local_barrier();\n"
26032604
" INITREDUXSTATE(SHMEMK0[LID_0], SHMEMK1[LID_0]);\n"
26042605
" if(D<LDIM_0 && LID_0+LDIM_0<H){\n"
26052606
" INITREDUXSTATE(SHMEMK0[LID_0+LDIM_0], SHMEMK1[LID_0+LDIM_0]);\n"
@@ -2736,6 +2737,7 @@ static void reduxGenSrcAppendIncrement (GpuReduction* gr,
27362737
static void reduxGenSrcAppendDstWrite (GpuReduction* gr,
27372738
uint32_t selector,
27382739
int initial){
2740+
srcbAppends(&gr->srcGen, " local_barrier();\n");
27392741
if (initial){
27402742
srcbAppends(&gr->srcGen, " if(LID_0 < D){\n"
27412743
" SETREDUXSTATE(W0R[GID_0*D + LID_0],\n"
@@ -2762,6 +2764,7 @@ static void reduxGenSrcAppendDstWrite (GpuReduction* gr,
27622764
" }\n");
27632765
}
27642766
}
2767+
srcbAppends(&gr->srcGen, " local_barrier();\n");
27652768
}
27662769
static void reduxGenSrcAppendPhase1 (GpuReduction* gr){
27672770
/**

0 commit comments

Comments
 (0)