Skip to content

[SYCL] Enable proper behavior of optional kernel features with SYCL_EXTERNAL #9611

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Jun 1, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 0 additions & 10 deletions clang/lib/CodeGen/CodeGenFunction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1121,16 +1121,6 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, QualType RetTy,
Fn->setMetadata("loop_fuse",
llvm::MDNode::get(getLLVMContext(), AttrMDArgs));
}
if (const auto *A = D->getAttr<SYCLDeviceHasAttr>()) {
SmallVector<llvm::Metadata *, 4> AspectsMD;
for (auto *Aspect : A->aspects()) {
llvm::APSInt AspectInt = Aspect->EvaluateKnownConstInt(getContext());
AspectsMD.push_back(llvm::ConstantAsMetadata::get(
Builder.getInt32(AspectInt.getZExtValue())));
}
Fn->setMetadata("sycl_declared_aspects",
llvm::MDNode::get(getLLVMContext(), AspectsMD));
}
if (const auto *A = D->getAttr<SYCLUsesAspectsAttr>()) {
SmallVector<llvm::Metadata *, 4> AspectsMD;
for (auto *Aspect : A->aspects()) {
Expand Down
14 changes: 13 additions & 1 deletion clang/lib/CodeGen/CodeGenModule.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4546,8 +4546,20 @@ llvm::Constant *CodeGenModule::GetOrCreateLLVMFunction(
}

assert(F->getName() == MangledName && "name was uniqued!");
if (D)
if (D) {
SetFunctionAttributes(GD, F, IsIncompleteFunction, IsThunk);
if (const auto *A = D->getAttr<SYCLDeviceHasAttr>()) {
SmallVector<llvm::Metadata *, 4> AspectsMD;
for (auto *Aspect : A->aspects()) {
llvm::APSInt AspectInt = Aspect->EvaluateKnownConstInt(getContext());
auto *T = llvm::Type::getInt32Ty(getLLVMContext());
auto *C = llvm::Constant::getIntegerValue(T, AspectInt);
AspectsMD.push_back(llvm::ConstantAsMetadata::get(C));
}
F->setMetadata("sycl_declared_aspects",
llvm::MDNode::get(getLLVMContext(), AspectsMD));
}
}
if (ExtraAttrs.hasFnAttrs()) {
llvm::AttrBuilder B(F->getContext(), ExtraAttrs.getFnAttrs());
F->addFnAttrs(B);
Expand Down
61 changes: 36 additions & 25 deletions clang/test/CodeGenSYCL/device_has.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,38 +6,46 @@
using namespace sycl;
queue q;

// CHECK: define dso_local spir_kernel void @{{.*}}kernel_name_1{{.*}} !sycl_declared_aspects ![[ASPECTS1:[0-9]+]] !srcloc ![[SRCLOC1:[0-9]+]]
// CHECK-DAG: define dso_local spir_kernel void @{{.*}}kernel_name_1{{.*}} !sycl_declared_aspects ![[ASPECTS1:[0-9]+]] !srcloc ![[SRCLOC1:[0-9]+]]

// CHECK: define dso_local spir_func void @{{.*}}func1{{.*}} !sycl_declared_aspects ![[ASPECTS1]] !srcloc ![[SRCLOC2:[0-9]+]] {
// CHECK-DAG: define dso_local spir_func void @{{.*}}func1{{.*}} !sycl_declared_aspects ![[ASPECTS1]] !srcloc ![[SRCLOC2:[0-9]+]] {
[[sycl::device_has(sycl::aspect::cpu)]] void func1() {}

// CHECK: define dso_local spir_func void @{{.*}}func2{{.*}} !sycl_declared_aspects ![[ASPECTS2:[0-9]+]] !srcloc ![[SRCLOC3:[0-9]+]] {
// CHECK-DAG: define dso_local spir_func void @{{.*}}func2{{.*}} !sycl_declared_aspects ![[ASPECTS2:[0-9]+]] !srcloc ![[SRCLOC3:[0-9]+]] {
[[sycl::device_has(sycl::aspect::fp16, sycl::aspect::gpu)]] void func2() {}

// CHECK: define dso_local spir_func void @{{.*}}func3{{.*}} !sycl_declared_aspects ![[EMPTYASPECTS:[0-9]+]] !srcloc ![[SRCLOC4:[0-9]+]] {
// CHECK-DAG: define dso_local spir_func void @{{.*}}func3{{.*}} !sycl_declared_aspects ![[EMPTYASPECTS:[0-9]+]] !srcloc ![[SRCLOC4:[0-9]+]] {
[[sycl::device_has()]] void func3() {}

// CHECK: define linkonce_odr spir_func void @{{.*}}func4{{.*}} !sycl_declared_aspects ![[ASPECTS3:[0-9]+]] !srcloc ![[SRCLOC5:[0-9]+]] {
// CHECK-DAG: define linkonce_odr spir_func void @{{.*}}func4{{.*}} !sycl_declared_aspects ![[ASPECTS3:[0-9]+]] !srcloc ![[SRCLOC5:[0-9]+]] {
template <sycl::aspect Aspect>
[[sycl::device_has(Aspect)]] void func4() {}

// CHECK: define dso_local spir_func void @{{.*}}func5{{.*}} !sycl_declared_aspects ![[ASPECTS1]] !srcloc ![[SRCLOC6:[0-9]+]] {
// CHECK-DAG: define dso_local spir_func void @{{.*}}func5{{.*}} !sycl_declared_aspects ![[ASPECTS1]] !srcloc ![[SRCLOC6:[0-9]+]] {
[[sycl::device_has(sycl::aspect::cpu)]] void func5();
void func5() {}

constexpr sycl::aspect getAspect() { return sycl::aspect::cpu; }
// CHECK: define dso_local spir_func void @{{.*}}func6{{.*}} !sycl_declared_aspects ![[ASPECTS1]] !srcloc ![[SRCLOC7:[0-9]+]] {
// CHECK-DAG: define dso_local spir_func void @{{.*}}func6{{.*}} !sycl_declared_aspects ![[ASPECTS1]] !srcloc ![[SRCLOC7:[0-9]+]] {
[[sycl::device_has(getAspect())]] void func6() {}

// CHECK: define linkonce_odr spir_func void @{{.*}}func7{{.*}} !sycl_declared_aspects ![[ASPECTS1]]
// CHECK: define linkonce_odr spir_func void @{{.*}}func7{{.*}} !sycl_declared_aspects ![[ASPECTS5:[0-9]+]]
// CHECK-DAG: define linkonce_odr spir_func void @{{.*}}func7{{.*}} !sycl_declared_aspects ![[ASPECTS1]]
// CHECK-DAG: define linkonce_odr spir_func void @{{.*}}func7{{.*}} !sycl_declared_aspects ![[ASPECTS5:[0-9]+]]
template <sycl::aspect... Asp>
[[sycl::device_has(Asp...)]] void func7() {}

// CHECK: define linkonce_odr spir_func void @{{.*}}func8{{.*}} !sycl_declared_aspects ![[ASPECTS5]]
// CHECK-DAG: define linkonce_odr spir_func void @{{.*}}func8{{.*}} !sycl_declared_aspects ![[ASPECTS5]]
template <sycl::aspect Asp, sycl::aspect... AspPack>
[[sycl::device_has(Asp, AspPack...)]] void func8() {}

// CHECK-DAG: declare !sycl_declared_aspects ![[ASPECTS6:[0-9]+]] spir_func void @{{.*}}func9{{.*}}
[[sycl::device_has(sycl::aspect::fp16)]]
SYCL_EXTERNAL void func9();
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you also test defined SYCL_EXTERNAL function, please?


// CHECK-DAG: define dso_local spir_func void @{{.*}}func10{{.*}} !sycl_declared_aspects ![[ASPECTS6]]
[[sycl::device_has(sycl::aspect::fp16)]]
SYCL_EXTERNAL void func10() {}

class KernelFunctor {
public:
[[sycl::device_has(sycl::aspect::cpu)]] void operator()() const {
Expand All @@ -50,29 +58,32 @@ class KernelFunctor {
func7<sycl::aspect::cpu>();
func7<sycl::aspect::cpu, sycl::aspect::host>();
func8<sycl::aspect::cpu, sycl::aspect::host>();
func9();
func10();
}
};

void foo() {
q.submit([&](handler &h) {
KernelFunctor f1;
h.single_task<class kernel_name_1>(f1);
// CHECK: define dso_local spir_kernel void @{{.*}}kernel_name_2{{.*}} !sycl_declared_aspects ![[ASPECTS4:[0-9]+]] !srcloc ![[SRCLOC8:[0-9]+]]
// CHECK-DAG: define dso_local spir_kernel void @{{.*}}kernel_name_2{{.*}} !sycl_declared_aspects ![[ASPECTS4:[0-9]+]] !srcloc ![[SRCLOC8:[0-9]+]]
h.single_task<class kernel_name_2>([]() [[sycl::device_has(sycl::aspect::gpu)]] {});
});
}

// CHECK: [[ASPECTS1]] = !{i32 1}
// CHECK: [[SRCLOC1]] = !{i32 {{[0-9]+}}}
// CHECK: [[EMPTYASPECTS]] = !{}
// CHECK: [[SRCLOC2]] = !{i32 {{[0-9]+}}}
// CHECK: [[ASPECTS2]] = !{i32 5, i32 2}
// CHECK: [[SRCLOC3]] = !{i32 {{[0-9]+}}}
// CHECK: [[SRCLOC4]] = !{i32 {{[0-9]+}}}
// CHECK: [[ASPECTS3]] = !{i32 0}
// CHECK: [[SRCLOC5]] = !{i32 {{[0-9]+}}}
// CHECK: [[SRCLOC6]] = !{i32 {{[0-9]+}}}
// CHECK: [[SRCLOC7]] = !{i32 {{[0-9]+}}}
// CHECK: [[ASPECTS5]] = !{i32 1, i32 0}
// CHECK: [[ASPECTS4]] = !{i32 2}
// CHECK: [[SRCLOC8]] = !{i32 {{[0-9]+}}}
// CHECK-DAG: [[ASPECTS1]] = !{i32 1}
// CHECK-DAG: [[SRCLOC1]] = !{i32 {{[0-9]+}}}
// CHECK-DAG: [[EMPTYASPECTS]] = !{}
// CHECK-DAG: [[SRCLOC2]] = !{i32 {{[0-9]+}}}
// CHECK-DAG: [[ASPECTS2]] = !{i32 5, i32 2}
// CHECK-DAG: [[SRCLOC3]] = !{i32 {{[0-9]+}}}
// CHECK-DAG: [[SRCLOC4]] = !{i32 {{[0-9]+}}}
// CHECK-DAG: [[ASPECTS3]] = !{i32 0}
// CHECK-DAG: [[SRCLOC5]] = !{i32 {{[0-9]+}}}
// CHECK-DAG: [[SRCLOC6]] = !{i32 {{[0-9]+}}}
// CHECK-DAG: [[SRCLOC7]] = !{i32 {{[0-9]+}}}
// CHECK-DAG: [[ASPECTS5]] = !{i32 1, i32 0}
// CHECK-DAG: [[ASPECTS6]] = !{i32 5}
// CHECK-DAG: [[ASPECTS4]] = !{i32 2}
// CHECK-DAG: [[SRCLOC8]] = !{i32 {{[0-9]+}}}
2 changes: 0 additions & 2 deletions llvm/lib/SYCLLowerIR/SYCLPropagateAspectsUsage.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -550,8 +550,6 @@ buildFunctionsToAspectsMap(Module &M, TypeToAspectsMapTy &TypesWithAspects,
CallGraphTy CG;

for (Function &F : M.functions()) {
if (F.isDeclaration())
continue;
processFunction(F, FunctionToUsedAspects, FunctionToDeclaredAspects,
TypesWithAspects, CG);
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
; RUN: opt -passes=sycl-propagate-aspects-usage %s -S | FileCheck %s

target triple = "spir64-unknown-unknown"

; CHECK: void @kernel() !sycl_used_aspects ![[#ASPECT:]]
define weak_odr dso_local spir_kernel void @kernel() {
entry:
call spir_func void @_Z3foov()
ret void
}

; CHECK: !sycl_declared_aspects ![[#ASPECT]] !sycl_used_aspects ![[#ASPECT]] {{.*}} @_Z3foov()
declare !sycl_declared_aspects !1 dso_local spir_func void @_Z3foov()

!sycl_aspects = !{!0}

!0 = !{!"fp64", i32 6}
!1 = !{i32 2}
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
// RUN: %{build} -DSOURCE1 -c -o %t1.o
// RUN: %{build} -DSOURCE2 -c -o %t2.o
// RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple} %t1.o %t2.o -o %t.exe
// RUN: %{run} %t.exe

#ifdef SOURCE1
#include <iostream>
#include <sycl/sycl.hpp>

using accT = sycl::accessor<int, 1>;
constexpr int value = 42;

template <sycl::aspect aspect>
[[sycl::device_has(aspect)]] SYCL_EXTERNAL void func(const accT &acc);

int main() {
sycl::queue q;
int data = 0;
sycl::buffer<int> buf{&data, {1}};
if (q.get_device().has(sycl::aspect::cpu)) {
q.submit([&](sycl::handler &cgh) {
accT acc{buf, cgh};
cgh.single_task<class Foo>([=] { func<sycl::aspect::cpu>(acc); });
}).wait_and_throw();
} else if (q.get_device().has(sycl::aspect::gpu)) {
q.submit([&](sycl::handler &cgh) {
accT acc{buf, cgh};
cgh.single_task<class Bar>([=] { func<sycl::aspect::gpu>(acc); });
}).wait_and_throw();
}
std::cout << "OK" << std::endl;
}

#endif // SOURCE1

#ifdef SOURCE2
#include <sycl/sycl.hpp>

constexpr int value = 42;

using accT = sycl::accessor<int, 1>;

template <sycl::aspect aspect>
[[sycl::device_has(aspect)]] SYCL_EXTERNAL void func(const accT &acc);
template <> SYCL_EXTERNAL void func<sycl::aspect::cpu>(const accT &acc) {
acc[0] = value;
}
template <> SYCL_EXTERNAL void func<sycl::aspect::gpu>(const accT &acc) {
acc[0] = value;
}

#endif // SOURCE2