Skip to content

Commit 1a90670

Browse files
committed
[OpenMP][OMPX] Add ballot_sync
1 parent 4c62bca commit 1a90670

File tree

5 files changed

+69
-0
lines changed

5 files changed

+69
-0
lines changed

offload/DeviceRTL/include/Utils.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@ int32_t shuffle(uint64_t Mask, int32_t Var, int32_t SrcLane);
2525

2626
int32_t shuffleDown(uint64_t Mask, int32_t Var, uint32_t Delta, int32_t Width);
2727

28+
uint32_t ballotSync(uint32_t Mask, int32_t Pred);
29+
2830
/// Return \p LowBits and \p HighBits packed into a single 64 bit value.
2931
uint64_t pack(uint32_t LowBits, uint32_t HighBits);
3032

offload/DeviceRTL/src/Mapping.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -364,4 +364,8 @@ _TGT_KERNEL_LANGUAGE(block_id, getBlockIdInKernel)
364364
_TGT_KERNEL_LANGUAGE(block_dim, getNumberOfThreadsInBlock)
365365
_TGT_KERNEL_LANGUAGE(grid_dim, getNumberOfBlocksInKernel)
366366

367+
extern "C" uint64_t ompx_ballot_sync(uint64_t mask, int pred) {
368+
return utils::ballotSync(mask, pred);
369+
}
370+
367371
#pragma omp end declare target

offload/DeviceRTL/src/Utils.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,8 @@ int32_t shuffle(uint64_t Mask, int32_t Var, int32_t SrcLane);
3737
int32_t shuffleDown(uint64_t Mask, int32_t Var, uint32_t LaneDelta,
3838
int32_t Width);
3939

40+
uint64_t ballotSync(uint64_t Mask, int32_t Pred);
41+
4042
/// AMDGCN Implementation
4143
///
4244
///{
@@ -57,6 +59,12 @@ int32_t shuffleDown(uint64_t Mask, int32_t Var, uint32_t LaneDelta,
5759
return __builtin_amdgcn_ds_bpermute(Index << 2, Var);
5860
}
5961

62+
uint64_t ballotSync(uint64_t Mask, int32_t Pred) {
63+
return Mask &
64+
(__AMDGCN_WAVEFRONT_SIZE == 32 ? __builtin_amdgcn_ballot_w32(Pred)
65+
: __builtin_amdgcn_ballot_w64(Pred));
66+
}
67+
6068
bool isSharedMemPtr(const void *Ptr) {
6169
return __builtin_amdgcn_is_shared(
6270
(const __attribute__((address_space(0))) void *)Ptr);
@@ -80,6 +88,10 @@ int32_t shuffleDown(uint64_t Mask, int32_t Var, uint32_t Delta, int32_t Width) {
8088
return __nvvm_shfl_sync_down_i32(Mask, Var, Delta, T);
8189
}
8290

91+
uint64_t ballotSync(uint64_t Mask, int32_t Pred) {
92+
return __nvvm_vote_ballot_sync(static_cast<uint32_t>(Mask), Pred);
93+
}
94+
8395
bool isSharedMemPtr(const void *Ptr) { return __nvvm_isspacep_shared(Ptr); }
8496

8597
#pragma omp end declare variant
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
// RUN: %libomptarget-compilexx-run-and-check-generic
2+
//
3+
// UNSUPPORTED: x86_64-pc-linux-gnu
4+
// UNSUPPORTED: x86_64-pc-linux-gnu-LTO
5+
// UNSUPPORTED: aarch64-unknown-linux-gnu
6+
// UNSUPPORTED: aarch64-unknown-linux-gnu-LTO
7+
// UNSUPPORTED: s390x-ibm-linux-gnu
8+
// UNSUPPORTED: s390x-ibm-linux-gnu-LTO
9+
10+
#include <assert.h>
11+
#include <ompx.h>
12+
#include <stdint.h>
13+
#include <stdio.h>
14+
#include <stdlib.h>
15+
16+
int main(int argc, char *argv[]) {
17+
const int num_blocks = 1;
18+
const int block_size = 64;
19+
const int N = num_blocks * block_size;
20+
uint64_t *data = (int *)malloc(N * sizeof(uint64_t));
21+
22+
for (int i = 0; i < N; ++i)
23+
data[i] = i & 0x1;
24+
25+
#pragma omp target teams ompx_bare num_teams(num_blocks) thread_limit(block_size) map(tofrom: data[0:N])
26+
{
27+
int tid = ompx_thread_id_x();
28+
uint64_t mask = ompx_ballot_sync(~0U, data[tid]);
29+
data[tid] += mask;
30+
}
31+
32+
for (int i = 0; i < N; ++i)
33+
assert(data[i] == ((i & 0x1) + 0xaaaaaaaa));
34+
35+
// CHECK: PASS
36+
printf("PASS\n");
37+
38+
return 0;
39+
}

openmp/runtime/src/include/ompx.h.var

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@
1111

1212
#define INLINE [[clang::always_inline]]
1313

14+
typedef unsigned long uint64_t;
15+
1416
#ifdef __cplusplus
1517
extern "C" {
1618
#endif
@@ -83,6 +85,10 @@ _TGT_KERNEL_LANGUAGE_HOST_IMPL_SYNC_C(void, sync_block_divergent, int Ordering,
8385
#undef _TGT_KERNEL_LANGUAGE_HOST_IMPL_SYNC_C
8486
///}
8587

88+
static INLINE uint64_t ompx_ballot_sync(uint64_t mask, int pred) {
89+
__builtin_trap();
90+
}
91+
8692
#pragma omp end declare variant
8793

8894
/// ompx_{sync_block}_{,divergent}
@@ -111,6 +117,8 @@ _TGT_KERNEL_LANGUAGE_DECL_GRID_C(grid_dim)
111117
#undef _TGT_KERNEL_LANGUAGE_DECL_GRID_C
112118
///}
113119

120+
uint64_t ompx_ballot_sync(uint64_t mask, int pred);
121+
114122
#ifdef __cplusplus
115123
}
116124
#endif
@@ -162,6 +170,10 @@ _TGT_KERNEL_LANGUAGE_HOST_IMPL_SYNC_CXX(void, sync_block_divergent,
162170
#undef _TGT_KERNEL_LANGUAGE_HOST_IMPL_SYNC_CXX
163171
///}
164172

173+
static INLINE uint64_t ballot_sync(uint64_t mask, int pred) {
174+
return ompx_ballot_sync(mask, pred);
175+
}
176+
165177
} // namespace ompx
166178
#endif
167179

0 commit comments

Comments
 (0)