Skip to content

Commit 4cb4fec

Browse files
authored
[SYCL] Change linkage to linkonce_odr unless SYCL_EXTERNAL. (#10317)
In getLLVMLinkageForDeclarator, CUDA device functions are given internal linkage to allow improved IPO. The same optimizations would also be useful for SYCL. However, as multiple translation units are a possibility here, it cannot be done exactly the same way. There are a number of extra considerations that need to be made. - If a device function or object is annotated with the SYCL_EXTERNAL macro, it may be called from a different module and its handling should not be touched. - If a device function or object is not annotated with the SYCL_EXTERNAL macro, it may not be called from a different translation unit unless that other translation unit provides its own definition of it. As such, if other optimizations remove all references in the current module, the function or object itself can be removed, and other passes should be aware that it is legal to remove the function or object. - If two kernels defined in different translation units call into the same non-kernel inline or template device function, these different copies of the device function would previously be merged together, and should continue to be. The linkage that satisfies these criteria is linkonce_odr, so this commit marks functions and objects so, unless the sycl_kernel or sycl_device attributes are present which indicate that it must remain externally accessible.
1 parent deb020b commit 4cb4fec

15 files changed

+118
-95
lines changed

clang/include/clang/Sema/Sema.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14187,7 +14187,7 @@ class Sema final {
1418714187
SourceLocation RParenLoc);
1418814188

1418914189
template <typename AttrTy>
14190-
bool isTypeDecoratedWithDeclAttribute(QualType Ty) {
14190+
static bool isTypeDecoratedWithDeclAttribute(QualType Ty) {
1419114191
const CXXRecordDecl *RecTy = Ty->getAsCXXRecordDecl();
1419214192
if (!RecTy)
1419314193
return false;

clang/lib/CodeGen/CodeGenModule.cpp

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@
4848
#include "clang/CodeGen/BackendUtil.h"
4949
#include "clang/CodeGen/ConstantInitBuilder.h"
5050
#include "clang/Frontend/FrontendDiagnostic.h"
51+
#include "clang/Sema/Sema.h"
5152
#include "llvm/ADT/STLExtras.h"
5253
#include "llvm/ADT/StringExtras.h"
5354
#include "llvm/ADT/StringSwitch.h"
@@ -6062,6 +6063,19 @@ CodeGenModule::getLLVMLinkageForDeclarator(const DeclaratorDecl *D,
60626063
if (Linkage == GVA_AvailableExternally)
60636064
return llvm::GlobalValue::AvailableExternallyLinkage;
60646065

6066+
// SYCL: Device code is not generally limited to one translation unit, but
6067+
// anything accessed from another translation unit is required to be annotated
6068+
// with the SYCL_EXTERNAL macro. For any function or variable that does not
6069+
// have this, linkonce_odr suffices. If -fno-sycl-rdc is passed, we know there
6070+
// is only one translation unit and can so mark them internal.
6071+
if (getLangOpts().SYCLIsDevice && !D->hasAttr<SYCLKernelAttr>() &&
6072+
!D->hasAttr<SYCLDeviceAttr>() &&
6073+
!Sema::isTypeDecoratedWithDeclAttribute<SYCLDeviceGlobalAttr>(
6074+
D->getType()))
6075+
return getLangOpts().GPURelocatableDeviceCode
6076+
? llvm::Function::LinkOnceODRLinkage
6077+
: llvm::Function::InternalLinkage;
6078+
60656079
// Note that Apple's kernel linker doesn't support symbol
60666080
// coalescing, so we need to avoid linkonce and weak linkages there.
60676081
// Normally, this means we just map to internal, but for explicit

clang/test/CodeGenSYCL/Inputs/sycl.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
extern "C" int printf(const char* fmt, ...);
77

88
#ifdef __SYCL_DEVICE_ONLY__
9-
__attribute__((convergent)) extern SYCL_EXTERNAL void
9+
__attribute__((convergent)) extern __attribute__((sycl_device)) void
1010
__spirv_ControlBarrier(int, int, int) noexcept;
1111
#endif
1212

clang/test/CodeGenSYCL/check-direct-attribute-propagation.cpp

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -175,7 +175,7 @@ int main() {
175175
// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name4() #0{{.*}} !kernel_arg_buffer_location ![[NUM]]
176176
// CHECK-NOT: !scheduler_target_fmax_mhz
177177
// CHECK-SAME: {
178-
// CHECK: define dso_local spir_func void @_Z3foov()
178+
// CHECK: define {{.*}}spir_func void @_Z3foov()
179179
h.single_task<class kernel_name4>(
180180
[]() { foo(); });
181181

@@ -195,7 +195,7 @@ int main() {
195195
// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name8() #0{{.*}} !kernel_arg_buffer_location ![[NUM]]
196196
// CHECK-NOT: !num_simd_work_items
197197
// CHECK-SAME: {
198-
// CHECK: define dso_local spir_func void @_Z4foo1v()
198+
// CHECK: define {{.*}}spir_func void @_Z4foo1v()
199199
h.single_task<class kernel_name8>(
200200
[]() { foo1(); });
201201

@@ -215,7 +215,7 @@ int main() {
215215
// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name12() #0{{.*}} !kernel_arg_buffer_location ![[NUM]]
216216
// CHECK-NOT: !no_global_work_offset
217217
// CHECK-SAME: {
218-
// CHECK: define dso_local spir_func void @_Z4foo2v()
218+
// CHECK: define {{.*}}spir_func void @_Z4foo2v()
219219
h.single_task<class kernel_name12>(
220220
[]() { foo2(); });
221221

@@ -235,7 +235,7 @@ int main() {
235235
// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name16() #0{{.*}} !kernel_arg_buffer_location ![[NUM]]
236236
// CHECK-NOT: !max_global_work_dim
237237
// CHECK-SAME: {
238-
// CHECK: define dso_local spir_func void @_Z4foo3v()
238+
// CHECK: define {{.*}}spir_func void @_Z4foo3v()
239239
h.single_task<class kernel_name16>(
240240
[]() { foo3(); });
241241

@@ -255,7 +255,7 @@ int main() {
255255
// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name20() #0{{.*}} !kernel_arg_buffer_location ![[NUM]]
256256
// CHECK-NOT: !reqd_sub_group_size
257257
// CHECK-SAME: {
258-
// CHECK: define dso_local spir_func void @_Z4foo4v()
258+
// CHECK: define {{.*}}spir_func void @_Z4foo4v()
259259
Functor4 f4;
260260
h.single_task<class kernel_name20>(f4);
261261

@@ -275,7 +275,7 @@ int main() {
275275
// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name24() #0{{.*}} !kernel_arg_buffer_location ![[NUM]]
276276
// CHECK-NOT: !reqd_work_group_size
277277
// CHECK-SAME: {
278-
// CHECK: define dso_local spir_func void @_Z4foo5v()
278+
// CHECK: define {{.*}}spir_func void @_Z4foo5v()
279279
Functor6 f6;
280280
h.single_task<class kernel_name24>(f6);
281281

@@ -295,7 +295,7 @@ int main() {
295295
// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name28() #0{{.*}} !kernel_arg_buffer_location ![[NUM]]
296296
// CHECK-NOT: !max_work_group_size
297297
// CHECK-SAME: {
298-
// CHECK: define dso_local spir_func void @_Z4foo6v()
298+
// CHECK: define {{.*}}spir_func void @_Z4foo6v()
299299
Functor8 f8;
300300
h.single_task<class kernel_name28>(f8);
301301

@@ -320,7 +320,7 @@ int main() {
320320
// CHECK: define {{.*}}spir_func void @{{.*}}Functor10{{.*}}(ptr addrspace(4) noundef align 1 dereferenceable_or_null(1) %this) #3 comdat align 2
321321
// CHECK-NOT: noalias
322322
// CHECK-SAME: {
323-
// CHECK: define dso_local spir_func void @_Z4foo8v()
323+
// CHECK: define {{.*}}spir_func void @_Z4foo8v()
324324
Functor10 f10;
325325
h.single_task<class kernel_name32>(f10);
326326

@@ -350,7 +350,7 @@ int main() {
350350
// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name38()
351351
// CHECK-NOT: !work_group_size_hint
352352
// CHECK-SAME: {
353-
// CHECK: define dso_local spir_func void @_Z5foo11v()
353+
// CHECK: define {{.*}}spir_func void @_Z5foo11v()
354354
h.single_task<class kernel_name38>(
355355
[]() { foo11(); });
356356

clang/test/CodeGenSYCL/device_global.cpp

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
1-
// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -triple spir64-unknown-unknown -disable-llvm-passes -fsycl-unique-prefix=THE_PREFIX -std=c++17 -emit-llvm %s -o - | FileCheck %s
1+
// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -triple spir64-unknown-unknown -disable-llvm-passes -fsycl-unique-prefix=THE_PREFIX -std=c++17 -fgpu-rdc -emit-llvm %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-RDC
2+
// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -triple spir64-unknown-unknown -disable-llvm-passes -fsycl-unique-prefix=THE_PREFIX -std=c++17 -fno-gpu-rdc -emit-llvm %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-NORDC
23
#include "sycl.hpp"
34

45
// Test cases below show that 'sycl-unique-id' LLVM IR attribute is attached to the
@@ -10,14 +11,16 @@ using namespace sycl;
1011
queue q;
1112

1213
device_global<int> A;
14+
#ifdef SYCL_EXTERNAL
1315
SYCL_EXTERNAL device_global<int> AExt;
16+
#endif
1417
static device_global<int> B;
1518

1619
struct Foo {
1720
static device_global<int> C;
1821
};
1922
device_global<int> Foo::C;
20-
// CHECK: @AExt = addrspace(1) global %"class.sycl::_V1::ext::oneapi::device_global" zeroinitializer, align 8 #[[AEXT_ATTRS:[0-9]+]]
23+
// CHECK-RDC: @AExt = addrspace(1) global %"class.sycl::_V1::ext::oneapi::device_global" zeroinitializer, align 8 #[[AEXT_ATTRS:[0-9]+]]
2124
// CHECK: @A = addrspace(1) global %"class.sycl::_V1::ext::oneapi::device_global" zeroinitializer, align 8 #[[A_ATTRS:[0-9]+]]
2225
// CHECK: @_ZL1B = internal addrspace(1) global %"class.sycl::_V1::ext::oneapi::device_global" zeroinitializer, align 8 #[[B_ATTRS:[0-9]+]]
2326
// CHECK: @_ZN3Foo1CE = addrspace(1) global %"class.sycl::_V1::ext::oneapi::device_global" zeroinitializer, align 8 #[[C_ATTRS:[0-9]+]]
@@ -44,7 +47,8 @@ class [[__sycl_detail__::global_variable_allowed]] only_global_var_allowed {
4447
// check that we don't generate `sycl-unique-id` IR attribute if class does not use
4548
// [[__sycl_detail__::device_global]]
4649
only_global_var_allowed<int> no_device_global;
47-
// CHECK: @no_device_global = addrspace(1) global %class.only_global_var_allowed zeroinitializer, align 8{{$}}
50+
// CHECK-RDC: @no_device_global = linkonce_odr addrspace(1) global %class.only_global_var_allowed zeroinitializer, align 8{{$}}
51+
// CHECK-NORDC: @no_device_global = internal addrspace(1) global %class.only_global_var_allowed zeroinitializer, align 8{{$}}
4852

4953
inline namespace Bar {
5054
device_global<float> InlineNS;
@@ -97,7 +101,7 @@ void bar() {
97101
// CHECK-SAME: @_ZL1B
98102
// CHECK-SAME: @_ZN12_GLOBAL__N_19same_nameE
99103

100-
// CHECK: attributes #[[AEXT_ATTRS]] = { "sycl-unique-id"="_Z4AExt" }
104+
// CHECK-RDC: attributes #[[AEXT_ATTRS]] = { "sycl-unique-id"="_Z4AExt" }
101105
// CHECK: attributes #[[A_ATTRS]] = { "sycl-unique-id"="_Z1A" }
102106
// CHECK: attributes #[[B_ATTRS]] = { "sycl-unique-id"="THE_PREFIX____ZL1B" }
103107
// CHECK: attributes #[[C_ATTRS]] = { "sycl-unique-id"="_ZN3Foo1CE" }

clang/test/CodeGenSYCL/device_has.cpp

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -8,33 +8,33 @@ queue q;
88

99
// CHECK-DAG: define dso_local spir_kernel void @{{.*}}kernel_name_1{{.*}} !sycl_declared_aspects ![[ASPECTS1:[0-9]+]] !srcloc ![[SRCLOC1:[0-9]+]]
1010

11-
// CHECK-DAG: define dso_local spir_func void @{{.*}}func1{{.*}} !sycl_declared_aspects ![[ASPECTS1]] !srcloc ![[SRCLOC2:[0-9]+]] {
11+
// CHECK-DAG: define {{.*}}spir_func void @{{.*}}func1{{.*}} !sycl_declared_aspects ![[ASPECTS1]] !srcloc ![[SRCLOC2:[0-9]+]] {
1212
[[sycl::device_has(sycl::aspect::cpu)]] void func1() {}
1313

14-
// CHECK-DAG: define dso_local spir_func void @{{.*}}func2{{.*}} !sycl_declared_aspects ![[ASPECTS2:[0-9]+]] !srcloc ![[SRCLOC3:[0-9]+]] {
14+
// CHECK-DAG: define {{.*}}spir_func void @{{.*}}func2{{.*}} !sycl_declared_aspects ![[ASPECTS2:[0-9]+]] !srcloc ![[SRCLOC3:[0-9]+]] {
1515
[[sycl::device_has(sycl::aspect::fp16, sycl::aspect::gpu)]] void func2() {}
1616

17-
// CHECK-DAG: define dso_local spir_func void @{{.*}}func3{{.*}} !sycl_declared_aspects ![[EMPTYASPECTS:[0-9]+]] !srcloc ![[SRCLOC4:[0-9]+]] {
17+
// CHECK-DAG: define {{.*}}spir_func void @{{.*}}func3{{.*}} !sycl_declared_aspects ![[EMPTYASPECTS:[0-9]+]] !srcloc ![[SRCLOC4:[0-9]+]] {
1818
[[sycl::device_has()]] void func3() {}
1919

20-
// CHECK-DAG: define linkonce_odr spir_func void @{{.*}}func4{{.*}} !sycl_declared_aspects ![[ASPECTS3:[0-9]+]] !srcloc ![[SRCLOC5:[0-9]+]] {
20+
// CHECK-DAG: define {{.*}}spir_func void @{{.*}}func4{{.*}} !sycl_declared_aspects ![[ASPECTS3:[0-9]+]] !srcloc ![[SRCLOC5:[0-9]+]] {
2121
template <sycl::aspect Aspect>
2222
[[sycl::device_has(Aspect)]] void func4() {}
2323

24-
// CHECK-DAG: define dso_local spir_func void @{{.*}}func5{{.*}} !sycl_declared_aspects ![[ASPECTS1]] !srcloc ![[SRCLOC6:[0-9]+]] {
24+
// CHECK-DAG: define {{.*}}spir_func void @{{.*}}func5{{.*}} !sycl_declared_aspects ![[ASPECTS1]] !srcloc ![[SRCLOC6:[0-9]+]] {
2525
[[sycl::device_has(sycl::aspect::cpu)]] void func5();
2626
void func5() {}
2727

2828
constexpr sycl::aspect getAspect() { return sycl::aspect::cpu; }
29-
// CHECK-DAG: define dso_local spir_func void @{{.*}}func6{{.*}} !sycl_declared_aspects ![[ASPECTS1]] !srcloc ![[SRCLOC7:[0-9]+]] {
29+
// CHECK-DAG: define {{.*}}spir_func void @{{.*}}func6{{.*}} !sycl_declared_aspects ![[ASPECTS1]] !srcloc ![[SRCLOC7:[0-9]+]] {
3030
[[sycl::device_has(getAspect())]] void func6() {}
3131

32-
// CHECK-DAG: define linkonce_odr spir_func void @{{.*}}func7{{.*}} !sycl_declared_aspects ![[ASPECTS1]]
33-
// CHECK-DAG: define linkonce_odr spir_func void @{{.*}}func7{{.*}} !sycl_declared_aspects ![[ASPECTS5:[0-9]+]]
32+
// CHECK-DAG: define {{.*}}spir_func void @{{.*}}func7{{.*}} !sycl_declared_aspects ![[ASPECTS1]]
33+
// CHECK-DAG: define {{.*}}spir_func void @{{.*}}func7{{.*}} !sycl_declared_aspects ![[ASPECTS5:[0-9]+]]
3434
template <sycl::aspect... Asp>
3535
[[sycl::device_has(Asp...)]] void func7() {}
3636

37-
// CHECK-DAG: define linkonce_odr spir_func void @{{.*}}func8{{.*}} !sycl_declared_aspects ![[ASPECTS5]]
37+
// CHECK-DAG: define {{.*}}spir_func void @{{.*}}func8{{.*}} !sycl_declared_aspects ![[ASPECTS5]]
3838
template <sycl::aspect Asp, sycl::aspect... AspPack>
3939
[[sycl::device_has(Asp, AspPack...)]] void func8() {}
4040

clang/test/CodeGenSYCL/function-attrs.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33

44
int foo();
55

6-
// CHECK: define dso_local spir_func void @_Z3barv() [[BAR:#[0-9]+]]
6+
// CHECK: define {{.*}}spir_func void @_Z3barv() [[BAR:#[0-9]+]]
77
// CHECK: attributes [[BAR]] =
88
// CHECK-SAME: convergent
99
// CHECK-SAME: nounwind

clang/test/CodeGenSYCL/functionptr-addrspace.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ __attribute__((sycl_kernel)) void kernel_single_task(const Func &kernelFunc) {
77
kernelFunc();
88
}
99

10-
// CHECK: define dso_local spir_func{{.*}}invoke_function{{.*}}(ptr noundef %fptr, ptr addrspace(4) noundef %ptr)
10+
// CHECK: define {{.*}}spir_func{{.*}}invoke_function{{.*}}(ptr noundef %fptr, ptr addrspace(4) noundef %ptr)
1111
void invoke_function(int (*fptr)(), int *ptr) {}
1212

1313
int f() { return 0; }

clang/test/CodeGenSYCL/loop_unroll.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
// RUN: %clang_cc1 -triple spir64-unknown-unknown -disable-llvm-passes -fsycl-is-device -emit-llvm %s -o - | FileCheck %s
22

33
void enable() {
4-
// CHECK-LABEL: define dso_local spir_func void @_Z6enablev()
4+
// CHECK-LABEL: define {{.*}}spir_func void @_Z6enablev()
55
int i = 1000;
66
// CHECK: br i1 %{{.*}}, label %do.body, label %do.end, !llvm.loop ![[ENABLE:[0-9]+]]
77
[[clang::loop_unroll]]
@@ -10,7 +10,7 @@ void enable() {
1010

1111
template <int A>
1212
void count() {
13-
// CHECK-LABEL: define linkonce_odr spir_func void @_Z5countILi4EEvv()
13+
// CHECK-LABEL: define {{.*}}spir_func void @_Z5countILi4EEvv()
1414
// CHECK: br label %for.cond, !llvm.loop ![[COUNT:[0-9]+]]
1515
[[clang::loop_unroll(8)]]
1616
for (int i = 0; i < 1000; ++i);
@@ -21,7 +21,7 @@ void count() {
2121

2222
template <int A>
2323
void disable() {
24-
// CHECK-LABEL: define linkonce_odr spir_func void @_Z7disableILi1EEvv()
24+
// CHECK-LABEL: define {{.*}}spir_func void @_Z7disableILi1EEvv()
2525
int i = 1000, j = 100;
2626
// CHECK: br label %while.cond, !llvm.loop ![[DISABLE:[0-9]+]]
2727
[[clang::loop_unroll(1)]]

clang/test/CodeGenSYCL/nontrivial_device_copyable.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ int main() {
2929

3030
// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name(ptr noundef byval(%struct.NontriviallyCopyable)
3131
// CHECK-NOT: define {{.*}}spir_func void @{{.*}}device_func{{.*}}({{.*}}byval(%struct.NontriviallyCopyable)
32-
// CHECK: define dso_local spir_func void @_Z11device_func20NontriviallyCopyable(ptr noundef %X)
32+
// CHECK: define {{.*}}spir_func void @_Z11device_func20NontriviallyCopyable(ptr noundef %X)
3333
// CHECK: %X.indirect_addr = alloca ptr addrspace(4)
3434
// CHECK: %X.indirect_addr.ascast = addrspacecast ptr %X.indirect_addr to ptr addrspace(4)
3535
// CHECK: %X.ascast = addrspacecast ptr %X to ptr addrspace(4)

0 commit comments

Comments
 (0)