Skip to content

Commit 4f75718

Browse files
authored
[SYCL] Optimize SYCL framework functions with -O0 (#7376)
1 parent 325bc4e commit 4f75718

File tree

11 files changed

+168
-0
lines changed

11 files changed

+168
-0
lines changed

clang/include/clang/Basic/CodeGenOptions.def

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -515,6 +515,10 @@ CODEGENOPT(CtorDtorReturnThis, 1, 0)
515515
/// Whether to disable the standard optimization pipeline for the SYCL device compiler.
516516
CODEGENOPT(DisableSYCLEarlyOpts, 1, 0)
517517

518+
/// Optimize SYCL Framework functions. These are functions
519+
/// which do not contain "user" code.
520+
CODEGENOPT(OptimizeSYCLFramework, 1, 0)
521+
518522
#undef CODEGENOPT
519523
#undef ENUM_CODEGENOPT
520524
#undef VALUE_CODEGENOPT

clang/include/clang/Basic/DiagnosticDriverKinds.td

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -356,6 +356,8 @@ def warn_drv_sycl_target_missing : Warning<
356356
InGroup<SyclTarget>;
357357
def err_drv_no_rdc_sycl_target_missing : Error<
358358
"linked binaries do not contain expected '%0' target; found targets: '%1', this is not supported with '-fno-sycl-rdc'">;
359+
def err_drv_fsycl_wrong_optimization_options : Error<
360+
"-fsycl-optimize-non-user-code option can be used only in conjunction with %0">;
359361
def err_drv_multiple_target_with_forced_target : Error<
360362
"multiple target usage with '%0' is not supported with '%1'">;
361363
def err_drv_failed_to_deduce_target_from_arch : Error<

clang/include/clang/Driver/Options.td

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3031,6 +3031,10 @@ def fsycl_max_parallel_jobs_EQ : Joined<["-"], "fsycl-max-parallel-link-jobs=">,
30313031
"or AOT compilation of each device image.">;
30323032
def : Flag<["-"], "fsycl-rdc">, Flags<[CoreOption]>, Alias<fgpu_rdc>;
30333033
def : Flag<["-"], "fno-sycl-rdc">, Flags<[CoreOption]>, Alias<fno_gpu_rdc>;
3034+
def fsycl_optimize_non_user_code : Flag<["-"], "fsycl-optimize-non-user-code">,
3035+
Flags<[CC1Option, CoreOption]>, MarshallingInfoFlag<CodeGenOpts<"OptimizeSYCLFramework">>,
3036+
HelpText<"Option used in conjunction with -O0 to "
3037+
"optimize SYCL framework utility functions and leave user's kernel code unoptimized. (experimental)">;
30343038
def fsyntax_only : Flag<["-"], "fsyntax-only">,
30353039
Flags<[NoXarchOption,CoreOption,CC1Option,FC1Option,FlangOption]>, Group<Action_Group>,
30363040
HelpText<"Run the preprocessor, parser and semantic analysis stages">;

clang/lib/CodeGen/BackendUtil.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -851,6 +851,9 @@ void EmitAssemblyHelper::RunOptimizationPipeline(
851851
// Only enable CGProfilePass when using integrated assembler, since
852852
// non-integrated assemblers don't recognize .cgprofile section.
853853
PTO.CallGraphProfile = !CodeGenOpts.DisableIntegratedAS;
854+
// Enable a custom optimization pipeline for non-user SYCL code.
855+
PTO.OptimizeSYCLFramework =
856+
CodeGenOpts.OptimizeSYCLFramework && !CodeGenOpts.DisableLLVMPasses;
854857

855858
LoopAnalysisManager LAM;
856859
FunctionAnalysisManager FAM;

clang/lib/CodeGen/CodeGenModule.cpp

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2162,6 +2162,23 @@ CodeGenModule::GetOrCreateRTTIProxyGlobalVariable(llvm::Constant *Addr) {
21622162
return FTRTTIProxy;
21632163
}
21642164

2165+
/// Function checks whether given DeclContext contains a topmost
2166+
/// namespace with name "sycl"
2167+
static bool checkIfDeclaredInSYCLNamespace(const Decl *D) {
2168+
const DeclContext *DC = D->getDeclContext()->getEnclosingNamespaceContext();
2169+
const auto *ND = dyn_cast<NamespaceDecl>(DC);
2170+
if (!ND)
2171+
return false;
2172+
2173+
while (const DeclContext *Parent = ND->getParent()) {
2174+
if (!isa<NamespaceDecl>(Parent))
2175+
break;
2176+
ND = cast<NamespaceDecl>(Parent);
2177+
}
2178+
2179+
return ND && ND->getName() == "sycl";
2180+
}
2181+
21652182
void CodeGenModule::SetLLVMFunctionAttributesForDefinition(const Decl *D,
21662183
llvm::Function *F) {
21672184
llvm::AttrBuilder B(F->getContext());
@@ -2284,6 +2301,12 @@ void CodeGenModule::SetLLVMFunctionAttributesForDefinition(const Decl *D,
22842301

22852302
F->addFnAttrs(B);
22862303

2304+
if (getLangOpts().SYCLIsDevice && getCodeGenOpts().OptimizeSYCLFramework &&
2305+
checkIfDeclaredInSYCLNamespace(D)) {
2306+
F->removeFnAttr(llvm::Attribute::OptimizeNone);
2307+
F->removeFnAttr(llvm::Attribute::NoInline);
2308+
}
2309+
22872310
unsigned alignment = D->getMaxAlignment() / Context.getCharWidth();
22882311
if (alignment)
22892312
F->setAlignment(llvm::Align(alignment));

clang/lib/Driver/ToolChains/Clang.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5054,6 +5054,18 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
50545054

50555055
// Forward -fsycl-default-sub-group-size if in SYCL mode.
50565056
Args.AddLastArg(CmdArgs, options::OPT_fsycl_default_sub_group_size);
5057+
5058+
if (Args.hasArg(options::OPT_fsycl_optimize_non_user_code)) {
5059+
const Arg *OArg = Args.getLastArg(options::OPT_O_Group);
5060+
if (!OArg || !OArg->getOption().matches(options::OPT_O0)) {
5061+
bool isCLMode = C.getDriver().IsCLMode();
5062+
// Linux and Windows have different debug options.
5063+
const StringRef Option = isCLMode ? "-Od" : "-O0";
5064+
D.Diag(diag::err_drv_fsycl_wrong_optimization_options) << Option;
5065+
}
5066+
5067+
CmdArgs.push_back("-fsycl-optimize-non-user-code");
5068+
}
50575069
}
50585070

50595071
if (IsSYCL) {
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
// Test checks that noinline and optnone function's attributes aren't attached
2+
// to functions whose topmost namespace is sycl.
3+
4+
// RUN: %clang_cc1 -fsycl-is-device -O0 -fsycl-optimize-non-user-code -internal-isystem %S/Inputs -triple spir64-unknown-unknown -disable-llvm-passes -emit-llvm %s -o - | FileCheck %s
5+
6+
#include "sycl.hpp"
7+
8+
// Check that kernel marked with noinline and optnone func attrs.
9+
// CHECK: spir_kernel {{.*}} #[[KERNEL_ATTRS:[0-9]+]]
10+
11+
// Check that user code contain noinline and optnone func attrs.
12+
// CHECK: define {{.*}} @_Z3foov() #[[FOO_ATTRS:[0-9]+]]
13+
int foo() {
14+
return 123;
15+
}
16+
17+
// Check that all functions on sycl::* namespace do not contain
18+
// noinline and optnone func attrs.
19+
namespace sycl {
20+
// CHECK: define {{.*}} @_ZN4sycl4bar1Ev() #[[BAR1_ATTRS:[0-9]+]]
21+
void bar1() {}
22+
23+
namespace V1 {
24+
// bar1 and bar2 have common function attrs
25+
// CHECK: define {{.*}} @_ZN4sycl2V14bar2Ev() #[[BAR1_ATTRS]]
26+
void bar2() {}
27+
}
28+
}
29+
30+
// Check that V1::sycl::* functions do not contain noinline and optnone
31+
// func attrs since topmost namespace is V1 instead of sycl.
32+
namespace V1 {
33+
namespace sycl {
34+
// foo and bar3 have common function attrs
35+
// CHECK: define {{.*}} @_ZN2V14sycl4bar3Ev() #[[FOO_ATTRS]]
36+
void bar3() {}
37+
}
38+
}
39+
40+
// Check attributes
41+
// CHECK-DAG: attributes #[[KERNEL_ATTRS]] = {{.*}} {{noinline|optnone}} {{.*}} {{noinline|optnone}}
42+
// CHECK-DAG: attributes #[[FOO_ATTRS]] = {{.*}} noinline {{.*}} optnone
43+
// CHECK-NOT: attributes #[[BAR1_ATTRS]] = {{.*}} {{noinline|optnone}}
44+
45+
int main() {
46+
sycl::kernel_single_task<class kernel>([]() {
47+
foo();
48+
sycl::bar1();
49+
sycl::V1::bar2();
50+
V1::sycl::bar3();
51+
});
52+
}
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
// Test checks that noinline and optnone function's attributes aren't attached
2+
// to functions whose topmost namespace is not sycl.
3+
4+
// RUN: %clang_cc1 -fsycl-is-device -O0 -fsycl-optimize-non-user-code -triple spir64-unknown-unknown -disable-llvm-passes -emit-llvm %s -o - | FileCheck %s
5+
6+
// Check that kernel contains noinline and optnone func attrs.
7+
// CHECK: define {{.*}} @_ZTSZ4mainE6kernel() #[[KERNEL_ATTRS:[0-9]+]]
8+
9+
// Check that 'anonymous namespace'::sycl::* functions contains
10+
// noinline and optnone func attrs since topmost namespace is anonymous
11+
// instead of sycl.
12+
namespace {
13+
namespace sycl {
14+
// CHECK: define {{.*}} @_ZN12_GLOBAL__N_14sycl4bar4Ev() #[[BAR4_ATTRS:[0-9]+]]
15+
void bar4() {}
16+
}
17+
}
18+
19+
template <typename name, typename Func>
20+
__attribute__((sycl_kernel)) void kernel_single_task(const Func &func) {
21+
func();
22+
}
23+
24+
// CHECK: attributes #[[KERNEL_ATTRS]] = {{.*}} noinline {{.*}} optnone
25+
// CHECK: attributes #[[BAR4_ATTRS]] = {{.*}} noinline {{.*}} optnone
26+
27+
int main() {
28+
kernel_single_task<class kernel>([]() {
29+
sycl::bar4();
30+
});
31+
}
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
// RUN: %clangxx -fsycl -O0 -### %s 2>&1 | FileCheck %s -check-prefix=NO-OPT-CHECK
2+
// NO-OPT-CHECK-NOT: fsycl-optimize-non-user-code
3+
4+
// RUN: %clangxx -fsycl -O0 -fsycl-optimize-non-user-code -### %s 2>&1 | FileCheck %s -check-prefix=OPT-CHECK
5+
// OPT-CHECK: fsycl-optimize-non-user-code
6+
7+
// RUN: not %clangxx -fsycl -O1 -fsycl-optimize-non-user-code %s 2>&1 | FileCheck %s -check-prefix=CHECK-ERROR
8+
// RUN: not %clangxx -fsycl -fsycl-optimize-non-user-code %s 2>&1 | FileCheck %s -check-prefix=CHECK-ERROR
9+
// CHECK-ERROR: error: -fsycl-optimize-non-user-code option can be used only in conjunction with -O0
10+
11+
// Check cases for Microsoft Windows Driver.
12+
// RUN: %clang_cl -fsycl -Od -### %s 2>&1 | FileCheck %s -check-prefix=NO-OPT-WIN-CHECK
13+
// NO-OPT-WIN-CHECK-NOT: fsycl-optimize-non-user-code
14+
15+
// RUN: %clang_cl -fsycl -Od -fsycl-optimize-non-user-code -### %s 2>&1 | FileCheck %s -check-prefix=OPT-WIN-CHECK
16+
// OPT-WIN-CHECK: fsycl-optimize-non-user-code
17+
18+
// RUN: not %clang_cl -fsycl -O1 -fsycl-optimize-non-user-code %s 2>&1 | FileCheck %s -check-prefix=CHECK-WIN-ERROR
19+
// RUN: not %clang_cl -fsycl -fsycl-optimize-non-user-code %s 2>&1 | FileCheck %s -check-prefix=CHECK-WIN-ERROR
20+
// CHECK-WIN-ERROR: error: -fsycl-optimize-non-user-code option can be used only in conjunction with -Od

llvm/include/llvm/Passes/PassBuilder.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,10 @@ class PipelineTuningOptions {
8585
// analyses after various module->function or cgscc->function adaptors in the
8686
// default pipelines.
8787
bool EagerlyInvalidateAnalyses;
88+
89+
/// Tuning option to enable a subset of optimizations in O0 optimization
90+
/// mode for non-user SYCL code.
91+
bool OptimizeSYCLFramework = false;
8892
};
8993

9094
/// This class provides access to building LLVM's passes.
@@ -298,6 +302,10 @@ class PassBuilder {
298302
ModulePassManager buildO0DefaultPipeline(OptimizationLevel Level,
299303
bool LTOPreLink = false);
300304

305+
/// Constructs a optimization pipeline of a SYCL framework part of code
306+
/// and appends it to the given MPM.
307+
void addDefaultSYCLFrameworkOptimizationPipeline(ModulePassManager &MPM);
308+
301309
/// Build the default `AAManager` with the default alias analysis pipeline
302310
/// registered.
303311
///

llvm/lib/Passes/PassBuilderPipelines.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1989,9 +1989,18 @@ ModulePassManager PassBuilder::buildO0DefaultPipeline(OptimizationLevel Level,
19891989

19901990
MPM.addPass(createModuleToFunctionPassAdaptor(AnnotationRemarksPass()));
19911991

1992+
if (PTO.OptimizeSYCLFramework)
1993+
addDefaultSYCLFrameworkOptimizationPipeline(MPM);
1994+
19921995
return MPM;
19931996
}
19941997

1998+
void PassBuilder::addDefaultSYCLFrameworkOptimizationPipeline(
1999+
ModulePassManager &MPM) {
2000+
MPM.addPass(
2001+
buildInlinerPipeline(OptimizationLevel::O2, ThinOrFullLTOPhase::None));
2002+
}
2003+
19952004
AAManager PassBuilder::buildDefaultAAPipeline() {
19962005
AAManager AA;
19972006

0 commit comments

Comments
 (0)