Skip to content

Commit dda743a

Browse files
authored
[LIBCLC][CUDA] Apply always_inline to all atomics (#5710)
Fixes: #5429 Interestingly enough, the performance penalty comes here not from performing the call, but from clang not being able to optimise away all the cases that atomics define, but don't need at call site.
1 parent bd15de9 commit dda743a

File tree

8 files changed

+31
-18
lines changed

8 files changed

+31
-18
lines changed

libclc/ptx-nvidiacl/libspirv/atomic/atomic_cmpxchg.cl

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -76,10 +76,12 @@ Memory order is stored in the lowest 5 bits */
7676

7777
#define __CLC_NVVM_ATOMIC_CAS(TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV, \
7878
OP, OP_MANGLED) \
79+
__attribute__((always_inline)) \
7980
__CLC_NVVM_ATOMIC_CAS_IMPL(TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV, OP, \
8081
OP_MANGLED, __global, AS1, _global_) \
81-
__CLC_NVVM_ATOMIC_CAS_IMPL(TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV, OP, \
82-
OP_MANGLED, __local, AS3, _shared_)
82+
__attribute__((always_inline)) \
83+
__CLC_NVVM_ATOMIC_CAS_IMPL(TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV, \
84+
OP, OP_MANGLED, __local, AS3, _shared_)
8385

8486
__CLC_NVVM_ATOMIC_CAS(int, i, int, i, cas, CompareExchange)
8587
__CLC_NVVM_ATOMIC_CAS(long, l, long, l, cas, CompareExchange)

libclc/ptx-nvidiacl/libspirv/atomic/atomic_helpers.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -78,9 +78,11 @@ Memory order is stored in the lowest 5 bits */
7878

7979
#define __CLC_NVVM_ATOMIC(TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV, OP, \
8080
NAME_MANGLED) \
81+
__attribute__((always_inline)) \
8182
__CLC_NVVM_ATOMIC_IMPL(TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV, OP, \
8283
NAME_MANGLED, __global, AS1, _global_) \
83-
__CLC_NVVM_ATOMIC_IMPL(TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV, OP, \
84-
NAME_MANGLED, __local, AS3, _shared_)
84+
__attribute__((always_inline)) \
85+
__CLC_NVVM_ATOMIC_IMPL(TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV, OP, \
86+
NAME_MANGLED, __local, AS3, _shared_)
8587

8688
#endif

libclc/ptx-nvidiacl/libspirv/atomic/atomic_inc_dec_helpers.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,9 @@
2727
}
2828

2929
#define __CLC_NVVM_ATOMIC_INCDEC(TYPE, TYPE_MANGLED, OP_MANGLED, VAL) \
30+
__attribute__((always_inline)) \
3031
__CLC_NVVM_ATOMIC_INCDEC_IMPL(TYPE, TYPE_MANGLED, OP_MANGLED, VAL, __global, \
31-
AS1) \
32+
AS1) __attribute__((always_inline)) \
3233
__CLC_NVVM_ATOMIC_INCDEC_IMPL(TYPE, TYPE_MANGLED, OP_MANGLED, VAL, __local, \
3334
AS3)
3435

libclc/ptx-nvidiacl/libspirv/atomic/atomic_load.cl

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -64,10 +64,11 @@ Memory order is stored in the lowest 5 bits */
6464
}
6565

6666
#define __CLC_NVVM_ATOMIC_LOAD(TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV) \
67-
__CLC_NVVM_ATOMIC_LOAD_IMPL(TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV, \
68-
__global, AS1, _global_) \
69-
__CLC_NVVM_ATOMIC_LOAD_IMPL(TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV, \
70-
__local, AS3, _shared_)
67+
__attribute__((always_inline)) __CLC_NVVM_ATOMIC_LOAD_IMPL( \
68+
TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV, __global, AS1, _global_) \
69+
__attribute__((always_inline)) \
70+
__CLC_NVVM_ATOMIC_LOAD_IMPL(TYPE, TYPE_MANGLED, TYPE_NV, \
71+
TYPE_MANGLED_NV, __local, AS3, _shared_)
7172

7273
__CLC_NVVM_ATOMIC_LOAD(int, i, int, i)
7374
__CLC_NVVM_ATOMIC_LOAD(uint, j, int, i)

libclc/ptx-nvidiacl/libspirv/atomic/atomic_max.cl

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -70,10 +70,12 @@ __CLC_NVVM_ATOMIC(unsigned long, m, unsigned long, ul, max,
7070

7171
#define __CLC_NVVM_ATOMIC_MAX(TYPE, TYPE_MANGLED, TYPE_INT, TYPE_INT_MANGLED, \
7272
OP_MANGLED) \
73+
__attribute__((always_inline)) \
7374
__CLC_NVVM_ATOMIC_MAX_IMPL(TYPE, TYPE_MANGLED, TYPE_INT, TYPE_INT_MANGLED, \
7475
OP_MANGLED, __global, AS1) \
75-
__CLC_NVVM_ATOMIC_MAX_IMPL(TYPE, TYPE_MANGLED, TYPE_INT, TYPE_INT_MANGLED, \
76-
OP_MANGLED, __local, AS3)
76+
__attribute__((always_inline)) \
77+
__CLC_NVVM_ATOMIC_MAX_IMPL(TYPE, TYPE_MANGLED, TYPE_INT, \
78+
TYPE_INT_MANGLED, OP_MANGLED, __local, AS3)
7779

7880
__CLC_NVVM_ATOMIC_MAX(float, f, int, i, FMaxEXT)
7981
__CLC_NVVM_ATOMIC_MAX(double, d, long, l, FMaxEXT)

libclc/ptx-nvidiacl/libspirv/atomic/atomic_min.cl

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -68,10 +68,12 @@ __CLC_NVVM_ATOMIC(ulong, m, ulong, ul, min, _Z18__spirv_AtomicUMin)
6868

6969
#define __CLC_NVVM_ATOMIC_MIN(TYPE, TYPE_MANGLED, TYPE_INT, TYPE_INT_MANGLED, \
7070
OP_MANGLED) \
71+
__attribute__((always_inline)) \
7172
__CLC_NVVM_ATOMIC_MIN_IMPL(TYPE, TYPE_MANGLED, TYPE_INT, TYPE_INT_MANGLED, \
7273
OP_MANGLED, __global, AS1) \
73-
__CLC_NVVM_ATOMIC_MIN_IMPL(TYPE, TYPE_MANGLED, TYPE_INT, TYPE_INT_MANGLED, \
74-
OP_MANGLED, __local, AS3)
74+
__attribute__((always_inline)) \
75+
__CLC_NVVM_ATOMIC_MIN_IMPL(TYPE, TYPE_MANGLED, TYPE_INT, \
76+
TYPE_INT_MANGLED, OP_MANGLED, __local, AS3)
7577

7678
__CLC_NVVM_ATOMIC_MIN(float, f, int, i, FMinEXT)
7779
__CLC_NVVM_ATOMIC_MIN(double, d, long, l, FMinEXT)

libclc/ptx-nvidiacl/libspirv/atomic/atomic_store.cl

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -65,10 +65,11 @@ Memory order is stored in the lowest 5 bits */
6565
}
6666

6767
#define __CLC_NVVM_ATOMIC_STORE(TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV) \
68-
__CLC_NVVM_ATOMIC_STORE_IMPL(TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV, \
69-
__global, AS1, _global_) \
70-
__CLC_NVVM_ATOMIC_STORE_IMPL(TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV, \
71-
__local, AS3, _shared_)
68+
__attribute__((always_inline)) __CLC_NVVM_ATOMIC_STORE_IMPL( \
69+
TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV, __global, AS1, _global_) \
70+
__attribute__((always_inline)) \
71+
__CLC_NVVM_ATOMIC_STORE_IMPL(TYPE, TYPE_MANGLED, TYPE_NV, \
72+
TYPE_MANGLED_NV, __local, AS3, _shared_)
7273

7374
__CLC_NVVM_ATOMIC_STORE(int, i, int, i)
7475
__CLC_NVVM_ATOMIC_STORE(uint, j, int, i)

libclc/ptx-nvidiacl/libspirv/atomic/atomic_sub.cl

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,10 @@
2424
}
2525

2626
#define __CLC_NVVM_ATOMIC_SUB(TYPE, TYPE_MANGLED, OP_MANGLED) \
27+
__attribute__((always_inline)) \
2728
__CLC_NVVM_ATOMIC_SUB_IMPL(TYPE, TYPE_MANGLED, OP_MANGLED, __global, AS1) \
28-
__CLC_NVVM_ATOMIC_SUB_IMPL(TYPE, TYPE_MANGLED, OP_MANGLED, __local, AS3)
29+
__attribute__((always_inline)) \
30+
__CLC_NVVM_ATOMIC_SUB_IMPL(TYPE, TYPE_MANGLED, OP_MANGLED, __local, AS3)
2931

3032
__CLC_NVVM_ATOMIC_SUB(int, i, ISub)
3133
__CLC_NVVM_ATOMIC_SUB(unsigned int, j, ISub)

0 commit comments

Comments
 (0)