Skip to content

Commit 1dce70f

Browse files
authored
[SYCL] Enable proper behavior of optional kernel features with SYCL_EXTERNAL (#9611)
Currently, the code generated from a translation unit with a declaration of a `SYCL_EXTERNAL` function with a `[[sycl::device_has(...)]]` attribute, but with no definition of that function, is a LLVM module with a declaration of the function but with no `sycl_declared_aspects` metadata. Because of this, `SYCLPropagateAspectsPass` does not propagate any used aspect information to functions that (transitively) call a `SYCL_EXTERNAL` function. This causes `sycl-post-link` to fail to split kernels that call `SYCL_EXTERNAL` functions with different required aspects. With this PR, the `sycl_declared_aspects` metadata is now attached to a `SYCL_EXTERNAL` function even if there is no definition (in the same translation unit). Additionally, `SYCLPropagateAspectsPass` now collects aspects information for function declarations.
1 parent fdd609a commit 1dce70f

File tree

6 files changed

+119
-38
lines changed

6 files changed

+119
-38
lines changed

clang/lib/CodeGen/CodeGenFunction.cpp

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1121,16 +1121,6 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, QualType RetTy,
11211121
Fn->setMetadata("loop_fuse",
11221122
llvm::MDNode::get(getLLVMContext(), AttrMDArgs));
11231123
}
1124-
if (const auto *A = D->getAttr<SYCLDeviceHasAttr>()) {
1125-
SmallVector<llvm::Metadata *, 4> AspectsMD;
1126-
for (auto *Aspect : A->aspects()) {
1127-
llvm::APSInt AspectInt = Aspect->EvaluateKnownConstInt(getContext());
1128-
AspectsMD.push_back(llvm::ConstantAsMetadata::get(
1129-
Builder.getInt32(AspectInt.getZExtValue())));
1130-
}
1131-
Fn->setMetadata("sycl_declared_aspects",
1132-
llvm::MDNode::get(getLLVMContext(), AspectsMD));
1133-
}
11341124
if (const auto *A = D->getAttr<SYCLUsesAspectsAttr>()) {
11351125
SmallVector<llvm::Metadata *, 4> AspectsMD;
11361126
for (auto *Aspect : A->aspects()) {

clang/lib/CodeGen/CodeGenModule.cpp

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4565,8 +4565,20 @@ llvm::Constant *CodeGenModule::GetOrCreateLLVMFunction(
45654565
}
45664566

45674567
assert(F->getName() == MangledName && "name was uniqued!");
4568-
if (D)
4568+
if (D) {
45694569
SetFunctionAttributes(GD, F, IsIncompleteFunction, IsThunk);
4570+
if (const auto *A = D->getAttr<SYCLDeviceHasAttr>()) {
4571+
SmallVector<llvm::Metadata *, 4> AspectsMD;
4572+
for (auto *Aspect : A->aspects()) {
4573+
llvm::APSInt AspectInt = Aspect->EvaluateKnownConstInt(getContext());
4574+
auto *T = llvm::Type::getInt32Ty(getLLVMContext());
4575+
auto *C = llvm::Constant::getIntegerValue(T, AspectInt);
4576+
AspectsMD.push_back(llvm::ConstantAsMetadata::get(C));
4577+
}
4578+
F->setMetadata("sycl_declared_aspects",
4579+
llvm::MDNode::get(getLLVMContext(), AspectsMD));
4580+
}
4581+
}
45704582
if (ExtraAttrs.hasFnAttrs()) {
45714583
llvm::AttrBuilder B(F->getContext(), ExtraAttrs.getFnAttrs());
45724584
F->addFnAttrs(B);

clang/test/CodeGenSYCL/device_has.cpp

Lines changed: 36 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -6,38 +6,46 @@
66
using namespace sycl;
77
queue q;
88

9-
// CHECK: define dso_local spir_kernel void @{{.*}}kernel_name_1{{.*}} !sycl_declared_aspects ![[ASPECTS1:[0-9]+]] !srcloc ![[SRCLOC1:[0-9]+]]
9+
// CHECK-DAG: define dso_local spir_kernel void @{{.*}}kernel_name_1{{.*}} !sycl_declared_aspects ![[ASPECTS1:[0-9]+]] !srcloc ![[SRCLOC1:[0-9]+]]
1010

11-
// CHECK: define dso_local spir_func void @{{.*}}func1{{.*}} !sycl_declared_aspects ![[ASPECTS1]] !srcloc ![[SRCLOC2:[0-9]+]] {
11+
// CHECK-DAG: define dso_local spir_func void @{{.*}}func1{{.*}} !sycl_declared_aspects ![[ASPECTS1]] !srcloc ![[SRCLOC2:[0-9]+]] {
1212
[[sycl::device_has(sycl::aspect::cpu)]] void func1() {}
1313

14-
// CHECK: define dso_local spir_func void @{{.*}}func2{{.*}} !sycl_declared_aspects ![[ASPECTS2:[0-9]+]] !srcloc ![[SRCLOC3:[0-9]+]] {
14+
// CHECK-DAG: define dso_local spir_func void @{{.*}}func2{{.*}} !sycl_declared_aspects ![[ASPECTS2:[0-9]+]] !srcloc ![[SRCLOC3:[0-9]+]] {
1515
[[sycl::device_has(sycl::aspect::fp16, sycl::aspect::gpu)]] void func2() {}
1616

17-
// CHECK: define dso_local spir_func void @{{.*}}func3{{.*}} !sycl_declared_aspects ![[EMPTYASPECTS:[0-9]+]] !srcloc ![[SRCLOC4:[0-9]+]] {
17+
// CHECK-DAG: define dso_local spir_func void @{{.*}}func3{{.*}} !sycl_declared_aspects ![[EMPTYASPECTS:[0-9]+]] !srcloc ![[SRCLOC4:[0-9]+]] {
1818
[[sycl::device_has()]] void func3() {}
1919

20-
// CHECK: define linkonce_odr spir_func void @{{.*}}func4{{.*}} !sycl_declared_aspects ![[ASPECTS3:[0-9]+]] !srcloc ![[SRCLOC5:[0-9]+]] {
20+
// CHECK-DAG: define linkonce_odr spir_func void @{{.*}}func4{{.*}} !sycl_declared_aspects ![[ASPECTS3:[0-9]+]] !srcloc ![[SRCLOC5:[0-9]+]] {
2121
template <sycl::aspect Aspect>
2222
[[sycl::device_has(Aspect)]] void func4() {}
2323

24-
// CHECK: define dso_local spir_func void @{{.*}}func5{{.*}} !sycl_declared_aspects ![[ASPECTS1]] !srcloc ![[SRCLOC6:[0-9]+]] {
24+
// CHECK-DAG: define dso_local spir_func void @{{.*}}func5{{.*}} !sycl_declared_aspects ![[ASPECTS1]] !srcloc ![[SRCLOC6:[0-9]+]] {
2525
[[sycl::device_has(sycl::aspect::cpu)]] void func5();
2626
void func5() {}
2727

2828
constexpr sycl::aspect getAspect() { return sycl::aspect::cpu; }
29-
// CHECK: define dso_local spir_func void @{{.*}}func6{{.*}} !sycl_declared_aspects ![[ASPECTS1]] !srcloc ![[SRCLOC7:[0-9]+]] {
29+
// CHECK-DAG: define dso_local spir_func void @{{.*}}func6{{.*}} !sycl_declared_aspects ![[ASPECTS1]] !srcloc ![[SRCLOC7:[0-9]+]] {
3030
[[sycl::device_has(getAspect())]] void func6() {}
3131

32-
// CHECK: define linkonce_odr spir_func void @{{.*}}func7{{.*}} !sycl_declared_aspects ![[ASPECTS1]]
33-
// CHECK: define linkonce_odr spir_func void @{{.*}}func7{{.*}} !sycl_declared_aspects ![[ASPECTS5:[0-9]+]]
32+
// CHECK-DAG: define linkonce_odr spir_func void @{{.*}}func7{{.*}} !sycl_declared_aspects ![[ASPECTS1]]
33+
// CHECK-DAG: define linkonce_odr spir_func void @{{.*}}func7{{.*}} !sycl_declared_aspects ![[ASPECTS5:[0-9]+]]
3434
template <sycl::aspect... Asp>
3535
[[sycl::device_has(Asp...)]] void func7() {}
3636

37-
// CHECK: define linkonce_odr spir_func void @{{.*}}func8{{.*}} !sycl_declared_aspects ![[ASPECTS5]]
37+
// CHECK-DAG: define linkonce_odr spir_func void @{{.*}}func8{{.*}} !sycl_declared_aspects ![[ASPECTS5]]
3838
template <sycl::aspect Asp, sycl::aspect... AspPack>
3939
[[sycl::device_has(Asp, AspPack...)]] void func8() {}
4040

41+
// CHECK-DAG: declare !sycl_declared_aspects ![[ASPECTS6:[0-9]+]] spir_func void @{{.*}}func9{{.*}}
42+
[[sycl::device_has(sycl::aspect::fp16)]]
43+
SYCL_EXTERNAL void func9();
44+
45+
// CHECK-DAG: define dso_local spir_func void @{{.*}}func10{{.*}} !sycl_declared_aspects ![[ASPECTS6]]
46+
[[sycl::device_has(sycl::aspect::fp16)]]
47+
SYCL_EXTERNAL void func10() {}
48+
4149
class KernelFunctor {
4250
public:
4351
[[sycl::device_has(sycl::aspect::cpu)]] void operator()() const {
@@ -50,29 +58,32 @@ class KernelFunctor {
5058
func7<sycl::aspect::cpu>();
5159
func7<sycl::aspect::cpu, sycl::aspect::host>();
5260
func8<sycl::aspect::cpu, sycl::aspect::host>();
61+
func9();
62+
func10();
5363
}
5464
};
5565

5666
void foo() {
5767
q.submit([&](handler &h) {
5868
KernelFunctor f1;
5969
h.single_task<class kernel_name_1>(f1);
60-
// CHECK: define dso_local spir_kernel void @{{.*}}kernel_name_2{{.*}} !sycl_declared_aspects ![[ASPECTS4:[0-9]+]] !srcloc ![[SRCLOC8:[0-9]+]]
70+
// CHECK-DAG: define dso_local spir_kernel void @{{.*}}kernel_name_2{{.*}} !sycl_declared_aspects ![[ASPECTS4:[0-9]+]] !srcloc ![[SRCLOC8:[0-9]+]]
6171
h.single_task<class kernel_name_2>([]() [[sycl::device_has(sycl::aspect::gpu)]] {});
6272
});
6373
}
6474

65-
// CHECK: [[ASPECTS1]] = !{i32 1}
66-
// CHECK: [[SRCLOC1]] = !{i32 {{[0-9]+}}}
67-
// CHECK: [[EMPTYASPECTS]] = !{}
68-
// CHECK: [[SRCLOC2]] = !{i32 {{[0-9]+}}}
69-
// CHECK: [[ASPECTS2]] = !{i32 5, i32 2}
70-
// CHECK: [[SRCLOC3]] = !{i32 {{[0-9]+}}}
71-
// CHECK: [[SRCLOC4]] = !{i32 {{[0-9]+}}}
72-
// CHECK: [[ASPECTS3]] = !{i32 0}
73-
// CHECK: [[SRCLOC5]] = !{i32 {{[0-9]+}}}
74-
// CHECK: [[SRCLOC6]] = !{i32 {{[0-9]+}}}
75-
// CHECK: [[SRCLOC7]] = !{i32 {{[0-9]+}}}
76-
// CHECK: [[ASPECTS5]] = !{i32 1, i32 0}
77-
// CHECK: [[ASPECTS4]] = !{i32 2}
78-
// CHECK: [[SRCLOC8]] = !{i32 {{[0-9]+}}}
75+
// CHECK-DAG: [[ASPECTS1]] = !{i32 1}
76+
// CHECK-DAG: [[SRCLOC1]] = !{i32 {{[0-9]+}}}
77+
// CHECK-DAG: [[EMPTYASPECTS]] = !{}
78+
// CHECK-DAG: [[SRCLOC2]] = !{i32 {{[0-9]+}}}
79+
// CHECK-DAG: [[ASPECTS2]] = !{i32 5, i32 2}
80+
// CHECK-DAG: [[SRCLOC3]] = !{i32 {{[0-9]+}}}
81+
// CHECK-DAG: [[SRCLOC4]] = !{i32 {{[0-9]+}}}
82+
// CHECK-DAG: [[ASPECTS3]] = !{i32 0}
83+
// CHECK-DAG: [[SRCLOC5]] = !{i32 {{[0-9]+}}}
84+
// CHECK-DAG: [[SRCLOC6]] = !{i32 {{[0-9]+}}}
85+
// CHECK-DAG: [[SRCLOC7]] = !{i32 {{[0-9]+}}}
86+
// CHECK-DAG: [[ASPECTS5]] = !{i32 1, i32 0}
87+
// CHECK-DAG: [[ASPECTS6]] = !{i32 5}
88+
// CHECK-DAG: [[ASPECTS4]] = !{i32 2}
89+
// CHECK-DAG: [[SRCLOC8]] = !{i32 {{[0-9]+}}}

llvm/lib/SYCLLowerIR/SYCLPropagateAspectsUsage.cpp

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -550,8 +550,6 @@ buildFunctionsToAspectsMap(Module &M, TypeToAspectsMapTy &TypesWithAspects,
550550
CallGraphTy CG;
551551

552552
for (Function &F : M.functions()) {
553-
if (F.isDeclaration())
554-
continue;
555553
processFunction(F, FunctionToUsedAspects, FunctionToDeclaredAspects,
556554
TypesWithAspects, CG);
557555
}
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
; RUN: opt -passes=sycl-propagate-aspects-usage %s -S | FileCheck %s
2+
3+
target triple = "spir64-unknown-unknown"
4+
5+
; CHECK: void @kernel() !sycl_used_aspects ![[#ASPECT:]]
6+
define weak_odr dso_local spir_kernel void @kernel() {
7+
entry:
8+
call spir_func void @_Z3foov()
9+
ret void
10+
}
11+
12+
; CHECK: !sycl_declared_aspects ![[#ASPECT]] !sycl_used_aspects ![[#ASPECT]] {{.*}} @_Z3foov()
13+
declare !sycl_declared_aspects !1 dso_local spir_func void @_Z3foov()
14+
15+
!sycl_aspects = !{!0}
16+
17+
!0 = !{!"fp64", i32 6}
18+
!1 = !{i32 2}
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
// RUN: %{build} -DSOURCE1 -c -o %t1.o
2+
// RUN: %{build} -DSOURCE2 -c -o %t2.o
3+
// RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple} %t1.o %t2.o -o %t.exe
4+
// RUN: %{run} %t.exe
5+
6+
#ifdef SOURCE1
7+
#include <iostream>
8+
#include <sycl/sycl.hpp>
9+
10+
using accT = sycl::accessor<int, 1>;
11+
constexpr int value = 42;
12+
13+
template <sycl::aspect aspect>
14+
[[sycl::device_has(aspect)]] SYCL_EXTERNAL void func(const accT &acc);
15+
16+
int main() {
17+
sycl::queue q;
18+
int data = 0;
19+
sycl::buffer<int> buf{&data, {1}};
20+
if (q.get_device().has(sycl::aspect::cpu)) {
21+
q.submit([&](sycl::handler &cgh) {
22+
accT acc{buf, cgh};
23+
cgh.single_task<class Foo>([=] { func<sycl::aspect::cpu>(acc); });
24+
}).wait_and_throw();
25+
} else if (q.get_device().has(sycl::aspect::gpu)) {
26+
q.submit([&](sycl::handler &cgh) {
27+
accT acc{buf, cgh};
28+
cgh.single_task<class Bar>([=] { func<sycl::aspect::gpu>(acc); });
29+
}).wait_and_throw();
30+
}
31+
std::cout << "OK" << std::endl;
32+
}
33+
34+
#endif // SOURCE1
35+
36+
#ifdef SOURCE2
37+
#include <sycl/sycl.hpp>
38+
39+
constexpr int value = 42;
40+
41+
using accT = sycl::accessor<int, 1>;
42+
43+
template <sycl::aspect aspect>
44+
[[sycl::device_has(aspect)]] SYCL_EXTERNAL void func(const accT &acc);
45+
template <> SYCL_EXTERNAL void func<sycl::aspect::cpu>(const accT &acc) {
46+
acc[0] = value;
47+
}
48+
template <> SYCL_EXTERNAL void func<sycl::aspect::gpu>(const accT &acc) {
49+
acc[0] = value;
50+
}
51+
52+
#endif // SOURCE2

0 commit comments

Comments
 (0)