@@ -37,6 +37,8 @@ int32_t shuffle(uint64_t Mask, int32_t Var, int32_t SrcLane);
3737int32_t shuffleDown (uint64_t Mask, int32_t Var, uint32_t LaneDelta,
3838 int32_t Width);
3939
40+ uint64_t ballotSync (uint64_t Mask, int32_t Pred);
41+
4042// / AMDGCN Implementation
4143// /
4244// /{
@@ -57,6 +59,12 @@ int32_t shuffleDown(uint64_t Mask, int32_t Var, uint32_t LaneDelta,
5759 return __builtin_amdgcn_ds_bpermute (Index << 2 , Var);
5860}
5961
62+ uint64_t ballotSync (uint64_t Mask, int32_t Pred) {
63+ return Mask &
64+ (__AMDGCN_WAVEFRONT_SIZE == 32 ? __builtin_amdgcn_ballot_w32 (Pred)
65+ : __builtin_amdgcn_ballot_w64 (Pred));
66+ }
67+
6068bool isSharedMemPtr (const void *Ptr) {
6169 return __builtin_amdgcn_is_shared (
6270 (const __attribute__ ((address_space (0 ))) void *)Ptr);
@@ -80,6 +88,10 @@ int32_t shuffleDown(uint64_t Mask, int32_t Var, uint32_t Delta, int32_t Width) {
8088 return __nvvm_shfl_sync_down_i32 (Mask, Var, Delta, T);
8189}
8290
91+ uint64_t ballotSync (uint64_t Mask, int32_t Pred) {
92+ return __nvvm_vote_ballot_sync (static_cast <uint32_t >(Mask), Pred);
93+ }
94+
8395bool isSharedMemPtr (const void *Ptr) { return __nvvm_isspacep_shared (Ptr); }
8496
8597#pragma omp end declare variant
0 commit comments