Skip to content

Commit 414c1e5

Browse files
authored
[SYCL] Mark kernel on host for enabling optimizations. (#2186)
To enable better optimizations on the host, kernel implementation routines may be marked with the attribute [[clang::sycl_kernel_impl]]. The function object passed to the SYCL kernel is also marked with alwaysinline on the host to enable these optimizations to be more effective. Signed-off-by: Premanand M Rao <premanand.m.rao@intel.com>
1 parent 0c38b35 commit 414c1e5

File tree

7 files changed

+114
-38
lines changed

7 files changed

+114
-38
lines changed

clang/include/clang/Basic/Attr.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1131,7 +1131,7 @@ def SYCLDevice : InheritableAttr {
11311131
def SYCLKernel : InheritableAttr {
11321132
let Spellings = [Clang<"sycl_kernel">];
11331133
let Subjects = SubjectList<[FunctionTmpl]>;
1134-
let LangOpts = [SYCLIsDevice];
1134+
let LangOpts = [SYCLIsHost, SYCLIsDevice];
11351135
let Documentation = [SYCLKernelDocs];
11361136
}
11371137

clang/include/clang/Basic/DiagnosticSemaKinds.td

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10919,8 +10919,8 @@ def warn_sycl_kernel_invalid_template_param_type : Warning<
1091910919
"template parameter of a function template with the 'sycl_kernel' attribute"
1092010920
" cannot be a non-type template parameter">, InGroup<IgnoredAttributes>;
1092110921
def warn_sycl_kernel_num_of_function_params : Warning<
10922-
"function template with 'sycl_kernel' attribute must have a single parameter">,
10923-
InGroup<IgnoredAttributes>;
10922+
"function template with 'sycl_kernel' attribute must have at least one"
10923+
" parameter">, InGroup<IgnoredAttributes>;
1092410924
def warn_sycl_kernel_return_type : Warning<
1092510925
"function template with 'sycl_kernel' attribute must have a 'void' return type">,
1092610926
InGroup<IgnoredAttributes>;

clang/lib/CodeGen/CodeGenFunction.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -889,6 +889,9 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, QualType RetTy,
889889
if (D && D->hasAttr<CFICanonicalJumpTableAttr>())
890890
Fn->addFnAttr("cfi-canonical-jump-table");
891891

892+
if (getLangOpts().SYCLIsHost && D && D->hasAttr<SYCLKernelAttr>())
893+
Fn->addFnAttr("sycl_kernel");
894+
892895
if (getLangOpts().OpenCL || getLangOpts().SYCLIsDevice) {
893896
// Add metadata for a kernel function.
894897
if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D)) {

clang/lib/Sema/SemaDeclAttr.cpp

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7188,15 +7188,16 @@ static void handleSYCLKernelAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
71887188
const FunctionTemplateDecl *FT = FD->getDescribedFunctionTemplate();
71897189
assert(FT && "Function template is expected");
71907190

7191-
// Function template must have at least two template parameters.
7191+
// Function template must have at least two template parameters so it
7192+
// can be used in OpenCL kernel generation.
71927193
const TemplateParameterList *TL = FT->getTemplateParameters();
7193-
if (TL->size() < 2) {
7194+
if (S.LangOpts.SYCLIsDevice && TL->size() < 2) {
71947195
S.Diag(FT->getLocation(), diag::warn_sycl_kernel_num_of_template_params);
71957196
return;
71967197
}
71977198

7198-
// Template parameters must be typenames.
7199-
for (unsigned I = 0; I < 2; ++I) {
7199+
// The first two template parameters must be typenames.
7200+
for (unsigned I = 0; I < 2 && I < TL->size(); ++I) {
72007201
const NamedDecl *TParam = TL->getParam(I);
72017202
if (isa<NonTypeTemplateParmDecl>(TParam)) {
72027203
S.Diag(FT->getLocation(),
@@ -7205,8 +7206,8 @@ static void handleSYCLKernelAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
72057206
}
72067207
}
72077208

7208-
// Function must have at least one argument.
7209-
if (getFunctionOrMethodNumParams(D) != 1) {
7209+
// Function must have at least one parameter.
7210+
if (getFunctionOrMethodNumParams(D) < 1) {
72107211
S.Diag(FT->getLocation(), diag::warn_sycl_kernel_num_of_function_params);
72117212
return;
72127213
}

clang/lib/Sema/SemaTemplateInstantiateDecl.cpp

Lines changed: 32 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -6148,6 +6148,32 @@ NamedDecl *Sema::FindInstantiatedDecl(SourceLocation Loc, NamedDecl *D,
61486148
return D;
61496149
}
61506150

6151+
static void processSYCLKernel(Sema &S, FunctionDecl *FD, MangleContext &MC) {
6152+
if (S.LangOpts.SYCLIsDevice) {
6153+
S.ConstructOpenCLKernel(FD, MC);
6154+
} else if (S.LangOpts.SYCLIsHost) {
6155+
CXXRecordDecl *CRD = (*FD->param_begin())->getType()->getAsCXXRecordDecl();
6156+
for (auto *Method : CRD->methods())
6157+
if (Method->getOverloadedOperator() == OO_Call &&
6158+
!Method->hasAttr<AlwaysInlineAttr>())
6159+
Method->addAttr(AlwaysInlineAttr::CreateImplicit(S.getASTContext()));
6160+
}
6161+
}
6162+
6163+
static void processFunctionInstantiation(Sema &S,
6164+
SourceLocation PointOfInstantiation,
6165+
FunctionDecl *FD,
6166+
bool DefinitionRequired,
6167+
MangleContext &MC) {
6168+
S.InstantiateFunctionDefinition(/*FIXME:*/ PointOfInstantiation, FD, true,
6169+
DefinitionRequired, true);
6170+
if (!FD->isDefined())
6171+
return;
6172+
if (FD->hasAttr<SYCLKernelAttr>())
6173+
processSYCLKernel(S, FD, MC);
6174+
FD->setInstantiationIsPending(false);
6175+
}
6176+
61516177
/// Performs template instantiation for all implicit template
61526178
/// instantiations we have seen until this point.
61536179
void Sema::PerformPendingInstantiations(bool LocalOnly) {
@@ -6170,37 +6196,16 @@ void Sema::PerformPendingInstantiations(bool LocalOnly) {
61706196
if (FunctionDecl *Function = dyn_cast<FunctionDecl>(Inst.first)) {
61716197
bool DefinitionRequired = Function->getTemplateSpecializationKind() ==
61726198
TSK_ExplicitInstantiationDefinition;
6173-
if (Function->isMultiVersion()) {
6199+
if (Function->isMultiVersion())
61746200
getASTContext().forEachMultiversionedFunctionVersion(
61756201
Function, [this, Inst, DefinitionRequired,
61766202
MangleCtx = move(MangleCtx)](FunctionDecl *CurFD) {
6177-
InstantiateFunctionDefinition(/*FIXME:*/ Inst.second, CurFD, true,
6178-
DefinitionRequired, true);
6179-
if (CurFD->isDefined()) {
6180-
// Because all SYCL kernel functions are template functions - they
6181-
// have deferred instantination. We need bodies of these functions
6182-
// so we are checking for SYCL kernel attribute after instantination.
6183-
if (getLangOpts().SYCLIsDevice &&
6184-
CurFD->hasAttr<SYCLKernelAttr>()) {
6185-
ConstructOpenCLKernel(CurFD, *MangleCtx);
6186-
}
6187-
CurFD->setInstantiationIsPending(false);
6188-
}
6203+
processFunctionInstantiation(*this, Inst.second, CurFD,
6204+
DefinitionRequired, *MangleCtx);
61896205
});
6190-
} else {
6191-
InstantiateFunctionDefinition(/*FIXME:*/ Inst.second, Function, true,
6192-
DefinitionRequired, true);
6193-
if (Function->isDefined()) {
6194-
// Because all SYCL kernel functions are template functions - they
6195-
// have deferred instantination. We need bodies of these functions
6196-
// so we are checking for SYCL kernel attribute after instantination.
6197-
if (getLangOpts().SYCLIsDevice &&
6198-
Function->hasAttr<SYCLKernelAttr>()) {
6199-
ConstructOpenCLKernel(Function, *MangleCtx);
6200-
}
6201-
Function->setInstantiationIsPending(false);
6202-
}
6203-
}
6206+
else
6207+
processFunctionInstantiation(*this, Inst.second, Function,
6208+
DefinitionRequired, *MangleCtx);
62046209
// Definition of a PCH-ed template declaration may be available only in the TU.
62056210
if (!LocalOnly && LangOpts.PCHInstantiateTemplates &&
62066211
TUKind == TU_Prefix && Function->instantiationIsPending())
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
// RUN: %clang_cc1 -fsycl -fsycl-is-host -triple spir64 -disable-llvm-passes %s -emit-llvm -o - | FileCheck %s
2+
// Test that the kernel implementation routine marked with 'sycl_kernel'
3+
// has the attribute 'sycl_kernel' in the generated LLVM IR and that the
4+
// function object passed to the sycl kernel is marked 'alwaysinline'
5+
// on the host.
6+
7+
// CHECK: define spir_func void @{{.*}}func{{.*}}() #[[NOSKA:[0-9]+]] {
8+
// CHECK: define internal spir_func void @{{.*}}Kernel{{.*}}Foo{{.*}}({{.*}}) #[[SKA:[0-9]+]] {
9+
// CHECK: call spir_func void @{{.*}}KernelImpl{{.*}}({{.*}}, i32 1, double 2.000000e+00)
10+
// CHECK: define internal spir_func void @{{.*}}Kernel{{.*}}Bar{{.*}}({{.*}}) #[[SKA]] {
11+
// CHECK: call spir_func void @{{.*}}KernelImpl{{.*}}({{.*}}, i32 1, double 2.000000e+00)
12+
// CHECK: define internal spir_func void @{{.*}}KernelImpl{{.*}}({{.*}} %f, i32 %i, double %d) #[[SKA]] {
13+
// CHECK: call spir_func void @"{{.*}}func{{.*}}"(%class
14+
// CHECK: define internal spir_func void @{{.*}}func{{.*}}(%class.anon* %this, i32 %i, double %d) #[[ALWAYSINLINE:[0-9]+]]
15+
// CHECK: define linkonce_odr spir_func void @{{.*}}KernelImpl{{.*}}Functor{{.*}}({{.*}}, i32 %i, double %d) #[[SKA]] comdat {
16+
// CHECK: call spir_func void @{{.*}}Functor{{.*}}(%struct
17+
// CHECK: define linkonce_odr spir_func void @{{.*}}Functor{{.*}}(%struct.Functor* %this, i32 %i, double %d) #[[ALWAYSINLINE]]
18+
19+
template <typename Func>
20+
void __attribute__((sycl_kernel))
21+
KernelImpl(Func f, int i, double d) {
22+
// CHECK-NOT: call void
23+
f(i, d);
24+
}
25+
26+
template <typename Name, typename Func>
27+
void __attribute__((sycl_kernel))
28+
Kernel(Func f) {
29+
KernelImpl(f, 1, 2.0);
30+
}
31+
32+
struct Functor {
33+
void operator()(int i, double d) { d = i + 2; };
34+
} functionobj;
35+
36+
void func() {
37+
auto Lambda = [](int i, double d) { d += i; };
38+
Kernel<class Foo>(Lambda);
39+
Kernel<class Bar>(functionobj);
40+
}
41+
42+
// CHECK-NOT: attributes #[[NOSKA]] = { {{.*}}"sycl_kernel"{{.*}} }
43+
// CHECK: attributes #[[SKA]] = { {{.*}}"sycl_kernel"{{.*}} }
44+
// CHECK: attributes #[[ALWAYSINLINE]] = { {{.*}}alwaysinline{{.*}} }

clang/test/SemaSYCL/kernel-attribute.cpp

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
// RUN: %clang_cc1 -std=c++11 -fsyntax-only -fsycl -fsycl-is-device -verify %s
2+
// RUN: %clang_cc1 -fsycl -fsycl-is-host -DHOST -fsyntax-only -verify %s
23

34
// Only function templates
45
[[clang::sycl_kernel]] int gv2 = 0; // expected-warning {{'sycl_kernel' attribute only applies to function templates}}
@@ -13,11 +14,13 @@ __attribute__((sycl_kernel(1))) void foo(T P); // expected-error {{'sycl_kernel'
1314
template <typename T, typename A, int I>
1415
[[clang::sycl_kernel(1)]] void foo1(T P);// expected-error {{'sycl_kernel' attribute takes no arguments}}
1516

17+
#ifndef HOST
1618
// At least two template parameters
1719
template <typename T>
1820
__attribute__((sycl_kernel)) void foo(T P); // expected-warning {{'sycl_kernel' attribute only applies to a function template with at least two template parameters}}
1921
template <typename T>
2022
[[clang::sycl_kernel]] void foo1(T P); // expected-warning {{'sycl_kernel' attribute only applies to a function template with at least two template parameters}}
23+
#endif
2124

2225
// First two template parameters cannot be non-type template parameters
2326
template <typename T, int A>
@@ -33,12 +36,32 @@ template <typename T, typename A>
3336

3437
// Must take at least one argument
3538
template <typename T, typename A>
36-
__attribute__((sycl_kernel)) void foo(); // expected-warning {{function template with 'sycl_kernel' attribute must have a single parameter}}
39+
__attribute__((sycl_kernel)) void foo(); // expected-warning {{function template with 'sycl_kernel' attribute must have at least one parameter}}
3740
template <typename T, typename A>
38-
[[clang::sycl_kernel]] void foo1(T t, A a); // expected-warning {{function template with 'sycl_kernel' attribute must have a single parameter}}
41+
[[clang::sycl_kernel]] void foo1(T t, A a); // no diagnostics
3942

4043
// No diagnostics
4144
template <typename T, typename A>
4245
__attribute__((sycl_kernel)) void foo(T P);
4346
template <typename T, typename A, int I>
4447
[[clang::sycl_kernel]] void foo1(T P);
48+
49+
#ifdef HOST
50+
// No diagnostics
51+
template <typename Func>
52+
void __attribute__((sycl_kernel))
53+
KernelImpl4(Func f, int i, double d) {
54+
f(i, d);
55+
}
56+
57+
template <typename Name, typename Func>
58+
void __attribute__((sycl_kernel))
59+
Kernel(Func f) {
60+
KernelImpl4(f, 1, 2.0);
61+
}
62+
63+
void func() {
64+
auto Lambda = [](int i, double d) { d += i; };
65+
Kernel<class Foo>(Lambda);
66+
}
67+
#endif

0 commit comments

Comments
 (0)