Skip to content
This repository was archived by the owner on Apr 23, 2020. It is now read-only.

Commit ee03160

Browse files
[libomptarget] Refactor syncthreads macro to inline function
Summary: [libomptarget] Refactor syncthreads macro to inline function See also abandoned D66846, split into this diff and others. Rev 2 of D66855 Reviewers: jdoerfert, ABataev, grokos, ronlieb, gregrodgers Subscribers: openmp-commits Tags: #openmp Differential Revision: https://reviews.llvm.org/D66861 git-svn-id: https://llvm.org/svn/llvm-project/openmp/trunk@370210 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent 7ed5372 commit ee03160

File tree

5 files changed

+16
-19
lines changed

5 files changed

+16
-19
lines changed

libomptarget/deviceRTLs/nvptx/src/data_sharing.cu

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -553,8 +553,7 @@ EXTERN void __kmpc_get_team_static_memory(int16_t isSPMDExecutionMode,
553553
if (GetThreadIdInBlock() == 0) {
554554
*frame = omptarget_nvptx_simpleMemoryManager.Acquire(buf, size);
555555
}
556-
// FIXME: use __syncthreads instead when the function copy is fixed in LLVM.
557-
__SYNCTHREADS();
556+
__kmpc_impl_syncthreads();
558557
return;
559558
}
560559
ASSERT0(LT_FUSSY, GetThreadIdInBlock() == GetMasterThreadID(),
@@ -568,8 +567,7 @@ EXTERN void __kmpc_restore_team_static_memory(int16_t isSPMDExecutionMode,
568567
if (is_shared)
569568
return;
570569
if (isSPMDExecutionMode) {
571-
// FIXME: use __syncthreads instead when the function copy is fixed in LLVM.
572-
__SYNCTHREADS();
570+
__kmpc_impl_syncthreads();
573571
if (GetThreadIdInBlock() == 0) {
574572
omptarget_nvptx_simpleMemoryManager.Release();
575573
}

libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.cu

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
//===----------------------------------------------------------------------===//
1212

1313
#include "omptarget-nvptx.h"
14+
#include "target_impl.h"
1415

1516
////////////////////////////////////////////////////////////////////////////////
1617
// global data tables
@@ -106,7 +107,7 @@ EXTERN void __kmpc_spmd_kernel_init(int ThreadLimit, int16_t RequiresOMPRuntime,
106107
}
107108
if (!RequiresOMPRuntime) {
108109
// Runtime is not required - exit.
109-
__SYNCTHREADS();
110+
__kmpc_impl_syncthreads();
110111
return;
111112
}
112113

@@ -125,8 +126,7 @@ EXTERN void __kmpc_spmd_kernel_init(int ThreadLimit, int16_t RequiresOMPRuntime,
125126
// init team context
126127
currTeamDescr.InitTeamDescr();
127128
}
128-
// FIXME: use __syncthreads instead when the function copy is fixed in LLVM.
129-
__SYNCTHREADS();
129+
__kmpc_impl_syncthreads();
130130

131131
omptarget_nvptx_TeamDescr &currTeamDescr = getMyTeamDescriptor();
132132
omptarget_nvptx_WorkDescr &workDescr = getMyWorkDescriptor();
@@ -168,8 +168,7 @@ EXTERN void __kmpc_spmd_kernel_deinit_v2(int16_t RequiresOMPRuntime) {
168168
if (!RequiresOMPRuntime)
169169
return;
170170

171-
// FIXME: use __syncthreads instead when the function copy is fixed in LLVM.
172-
__SYNCTHREADS();
171+
__kmpc_impl_syncthreads();
173172
int threadId = GetThreadIdInBlock();
174173
if (threadId == 0) {
175174
// Enqueue omp state object for use by another team.

libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -56,14 +56,6 @@
5656
#define __ACTIVEMASK() __ballot(1)
5757
#endif // CUDA_VERSION
5858

59-
#define __SYNCTHREADS_N(n) asm volatile("bar.sync %0;" : : "r"(n) : "memory");
60-
// Use original __syncthreads if compiled by nvcc or clang >= 9.0.
61-
#if !defined(__clang__) || __clang_major__ >= 9
62-
#define __SYNCTHREADS() __syncthreads()
63-
#else
64-
#define __SYNCTHREADS() __SYNCTHREADS_N(0)
65-
#endif
66-
6759
// arguments needed for L0 parallelism only.
6860
class omptarget_nvptx_SharedArgs {
6961
public:

libomptarget/deviceRTLs/nvptx/src/sync.cu

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -75,8 +75,7 @@ EXTERN void __kmpc_barrier(kmp_Ident *loc_ref, int32_t tid) {
7575
// parallel region and that all worker threads participate.
7676
EXTERN void __kmpc_barrier_simple_spmd(kmp_Ident *loc_ref, int32_t tid) {
7777
PRINT0(LD_SYNC, "call kmpc_barrier_simple_spmd\n");
78-
// FIXME: use __syncthreads instead when the function copy is fixed in LLVM.
79-
__SYNCTHREADS();
78+
__kmpc_impl_syncthreads();
8079
PRINT0(LD_SYNC, "completed kmpc_barrier_simple_spmd\n");
8180
}
8281

libomptarget/deviceRTLs/nvptx/src/target_impl.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,15 @@ INLINE int32_t __kmpc_impl_shfl_down_sync(__kmpc_impl_lanemask_t Mask,
6363
#endif // CUDA_VERSION
6464
}
6565

66+
INLINE void __kmpc_impl_syncthreads() {
67+
// Use original __syncthreads if compiled by nvcc or clang >= 9.0.
68+
#if !defined(__clang__) || __clang_major__ >= 9
69+
__syncthreads();
70+
#else
71+
asm volatile("bar.sync %0;" : : "r"(0) : "memory");
72+
#endif // __clang__
73+
}
74+
6675
INLINE void __kmpc_impl_syncwarp(__kmpc_impl_lanemask_t Mask) {
6776
#if CUDA_VERSION >= 9000
6877
__syncwarp(Mask);

0 commit comments

Comments
 (0)