-
Notifications
You must be signed in to change notification settings - Fork 769
Add kernel property to propagate compile options to backend #7373
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
4d58f01
d73a1da
54d0eb4
f4eefc5
8c8e0e1
a997862
fad0731
4ea7a9c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||||||||||||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
@@ -9,7 +9,6 @@ | |||||||||||||||||||||||||
// This coordinates the per-function state used while generating code. | ||||||||||||||||||||||||||
// | ||||||||||||||||||||||||||
//===----------------------------------------------------------------------===// | ||||||||||||||||||||||||||
|
||||||||||||||||||||||||||
#include "CodeGenFunction.h" | ||||||||||||||||||||||||||
#include "CGBlocks.h" | ||||||||||||||||||||||||||
#include "CGCUDARuntime.h" | ||||||||||||||||||||||||||
|
@@ -583,11 +582,24 @@ void CodeGenFunction::EmitKernelMetadata(const FunctionDecl *FD, | |||||||||||||||||||||||||
&& !FD->hasAttr<SYCLDeviceAttr>()) | ||||||||||||||||||||||||||
return; | ||||||||||||||||||||||||||
|
||||||||||||||||||||||||||
// TODO Module identifier is not reliable for this purpose since two modules | ||||||||||||||||||||||||||
// can have the same ID, needs improvement | ||||||||||||||||||||||||||
if (getLangOpts().SYCLIsDevice) | ||||||||||||||||||||||||||
|
||||||||||||||||||||||||||
if (getLangOpts().SYCLIsDevice) { | ||||||||||||||||||||||||||
// TODO Module identifier is not reliable for this purpose since two modules | ||||||||||||||||||||||||||
// can have the same ID, needs improvement | ||||||||||||||||||||||||||
Fn->addFnAttr("sycl-module-id", Fn->getParent()->getModuleIdentifier()); | ||||||||||||||||||||||||||
|
||||||||||||||||||||||||||
int SYCLDeviceCompileOptLevel = -1; | ||||||||||||||||||||||||||
switch (CGM.getCodeGenOpts().OptimizationLevel) { | ||||||||||||||||||||||||||
default: | ||||||||||||||||||||||||||
llvm_unreachable("Invalid optimization level!"); | ||||||||||||||||||||||||||
case 0: | ||||||||||||||||||||||||||
case 1: | ||||||||||||||||||||||||||
case 2: | ||||||||||||||||||||||||||
case 3: | ||||||||||||||||||||||||||
SYCLDeviceCompileOptLevel = CGM.getCodeGenOpts().OptimizationLevel; | ||||||||||||||||||||||||||
} | ||||||||||||||||||||||||||
Comment on lines
+590
to
+599
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
Why not (something like) this instead? I am trying to understand the benefit of checking the 0,1,2,3 values in the switch. |
||||||||||||||||||||||||||
if (SYCLDeviceCompileOptLevel >= 0) | ||||||||||||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Based on the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think 'assert' might be redundant. I can just remove the 'if' condition. |
||||||||||||||||||||||||||
Fn->addFnAttr("sycl-device-compile-optlevel", std::to_string(SYCLDeviceCompileOptLevel)); | ||||||||||||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. LIT test is missing for this change There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I added the check inside the sycl test to check if he attribute is being added. Do you want me to split it up? Thanks There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Usually, we have unit-tests for each component. The test in Unit-tests which are limited to a single component will allow to highlight issues more precisely |
||||||||||||||||||||||||||
} | ||||||||||||||||||||||||||
llvm::LLVMContext &Context = getLLVMContext(); | ||||||||||||||||||||||||||
|
||||||||||||||||||||||||||
if (FD->hasAttr<OpenCLKernelAttr>() || FD->hasAttr<CUDAGlobalAttr>()) | ||||||||||||||||||||||||||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -43,6 +43,9 @@ constexpr char ESIMD_MARKER_MD[] = "sycl_explicit_simd"; | |
|
||
constexpr char ATTR_SYCL_MODULE_ID[] = "sycl-module-id"; | ||
|
||
// Similar copy in sycl-post-link.cpp | ||
constexpr char ATTR_OPT_LEVEL[] = "sycl-device-compile-optlevel"; | ||
|
||
bool hasIndirectFunctionsOrCalls(const Module &M) { | ||
for (const auto &F : M.functions()) { | ||
// There are functions marked with [[intel::device_indirectly_callable]] | ||
|
@@ -261,6 +264,44 @@ EntryPointGroupVec groupEntryPointsByScope(ModuleDesc &MD, | |
return EntryPointGroups; | ||
} | ||
|
||
template <class EntryPoinGroupFunc> | ||
EntryPointGroupVec | ||
groupEntryPointsByOptLevel(ModuleDesc &MD, StringRef AttrName, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we expect to have more than one attribute related to optimization level? If not, then we should either rename this function to make it more generic, like There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I can remove AttrName argument. |
||
bool EmitOnlyKernelsAsEntryPoints, | ||
EntryPoinGroupFunc F) { | ||
EntryPointGroupVec EntryPointGroups{}; | ||
std::map<StringRef, EntryPointSet> EntryPointMap; | ||
Module &M = MD.getModule(); | ||
|
||
// Only process module entry points: | ||
for (auto &F : M.functions()) { | ||
if (!isEntryPoint(F, EmitOnlyKernelsAsEntryPoints) || | ||
!MD.isEntryPointCandidate(F)) { | ||
continue; | ||
} | ||
if (F.hasFnAttribute(AttrName)) { | ||
SmallString<16> stringConstName; | ||
StringRef OptLevelStr = F.getFnAttribute(AttrName).getValueAsString(); | ||
EntryPointMap[OptLevelStr].insert(&F); | ||
} else { | ||
EntryPointMap["-1"].insert(&F); | ||
} | ||
} | ||
if (!EntryPointMap.empty()) { | ||
EntryPointGroups.reserve(EntryPointMap.size()); | ||
for (auto &EPG : EntryPointMap) { | ||
EntryPointGroups.emplace_back(EntryPointGroup{ | ||
EPG.first, std::move(EPG.second), MD.getEntryPointGroup().Props}); | ||
F(EntryPointGroups.back()); | ||
} | ||
} else { | ||
// No entry points met, record this. | ||
EntryPointGroups.push_back({GLOBAL_SCOPE_NAME, {}}); | ||
F(EntryPointGroups.back()); | ||
} | ||
return EntryPointGroups; | ||
} | ||
|
||
// Represents a call graph between functions in a module. Nodes are functions, | ||
// edges are "calls" relation. | ||
class CallGraph { | ||
|
@@ -861,5 +902,28 @@ getSplitterByOptionalFeatures(ModuleDesc &&MD, | |
return std::make_unique<ModuleCopier>(std::move(MD), std::move(Groups)); | ||
} | ||
|
||
std::unique_ptr<ModuleSplitterBase> | ||
getOptLevelSplitter(ModuleDesc &&MD, bool EmitOnlyKernelsAsEntryPoints) { | ||
EntryPointGroupVec Groups = groupEntryPointsByOptLevel( | ||
MD, ATTR_OPT_LEVEL, EmitOnlyKernelsAsEntryPoints, | ||
[](EntryPointGroup &G) { | ||
if (G.GroupId == "3") | ||
G.Props.OptLevel = 3; | ||
else if (G.GroupId == "2") | ||
G.Props.OptLevel = 2; | ||
else if (G.GroupId == "1") | ||
G.Props.OptLevel = 1; | ||
else if (G.GroupId == "0") | ||
G.Props.OptLevel = 0; | ||
Comment on lines
+910
to
+917
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. int OptLevel = -1; There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks. Looks better. Will make the change in the next commit. |
||
}); | ||
assert(!Groups.empty() && "At least one group is expected"); | ||
assert(Groups.size() <= 2 && "At most 2 groups are expected"); | ||
|
||
if (Groups.size() > 1) | ||
return std::make_unique<ModuleSplitter>(std::move(MD), std::move(Groups)); | ||
else | ||
return std::make_unique<ModuleCopier>(std::move(MD), std::move(Groups)); | ||
} | ||
|
||
} // namespace module_split | ||
} // namespace llvm |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -60,12 +60,17 @@ struct EntryPointGroup { | |
// Scope represented by EPs in a group | ||
EntryPointsGroupScope Scope = Scope_Global; | ||
|
||
// opt level | ||
int OptLevel = -1; | ||
|
||
Properties merge(const Properties &Other) const { | ||
Properties Res; | ||
Res.HasESIMD = HasESIMD == Other.HasESIMD | ||
? HasESIMD | ||
: SyclEsimdSplitStatus::SYCL_AND_ESIMD; | ||
Res.UsesLargeGRF = UsesLargeGRF || Other.UsesLargeGRF; | ||
// TODO What do we do about optimization levels while merging? | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. My understanding is that merge operation can only happen for modules which we produced by ESIMD split from a top-level module. That should mean that all properties except for I suppose that this code can be simplified because of introduction of optional kernel features split, but that is a topic for another PR |
||
// Opt Level remains at '-1' | ||
// Scope remains global | ||
return Res; | ||
} | ||
|
@@ -93,6 +98,9 @@ struct EntryPointGroup { | |
// Tells if some entry points use large GRF mode. | ||
bool isLargeGRF() const { return Props.UsesLargeGRF; } | ||
|
||
// Get opt level. | ||
int getOptLevel() const { return Props.OptLevel; } | ||
|
||
void saveNames(std::vector<std::string> &Dest) const; | ||
void rebuildFromNames(const std::vector<std::string> &Names, const Module &M); | ||
void rebuild(const Module &M); | ||
|
@@ -147,6 +155,7 @@ class ModuleDesc { | |
bool isESIMD() const { return EntryPoints.isEsimd(); } | ||
bool isSYCL() const { return EntryPoints.isSycl(); } | ||
bool isLargeGRF() const { return EntryPoints.isLargeGRF(); } | ||
int getOptLevel() const { return EntryPoints.getOptLevel(); } | ||
|
||
const EntryPointSet &entries() const { return EntryPoints.Functions; } | ||
const EntryPointGroup &getEntryPointGroup() const { return EntryPoints; } | ||
|
@@ -256,6 +265,9 @@ std::unique_ptr<ModuleSplitterBase> | |
getSplitterByOptionalFeatures(ModuleDesc &&MD, | ||
bool EmitOnlyKernelsAsEntryPoints); | ||
|
||
std::unique_ptr<ModuleSplitterBase> | ||
getOptLevelSplitter(ModuleDesc &&MD, bool EmitOnlyKernelsAsEntryPoints); | ||
|
||
#ifndef NDEBUG | ||
void dumpEntryPoints(const EntryPointSet &C, const char *msg = "", int Tab = 0); | ||
void dumpEntryPoints(const Module &M, bool OnlyKernelsAreEntryPoints = false, | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -91,6 +91,9 @@ constexpr char COL_CODE[] = "Code"; | |
constexpr char COL_SYM[] = "Symbols"; | ||
constexpr char COL_PROPS[] = "Properties"; | ||
|
||
// Similar copy in ModuleSplitter.cpp | ||
constexpr char ATTR_OPT_LEVEL[] = "sycl-device-compile-optlevel"; | ||
|
||
// InputFilename - The filename to read from. | ||
cl::opt<std::string> InputFilename{cl::Positional, | ||
cl::desc("<input bitcode file>"), | ||
|
@@ -449,6 +452,11 @@ std::string saveModuleProperties(module_split::ModuleDesc &MD, | |
ProgramMetadata.insert({MetadataNames.back(), GV.getName()}); | ||
} | ||
} | ||
|
||
auto OptLevel = MD.getOptLevel(); | ||
if (OptLevel >= 0) | ||
PropSet[PropSetRegTy::SYCL_MISC_PROP].insert({"OptLevel", OptLevel}); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. LIT test is missing for this change There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I thought I am covering this in sycl-opt-level.cpp. Can you please take a look? Thanks |
||
|
||
if (MD.isESIMD()) { | ||
PropSet[PropSetRegTy::SYCL_MISC_PROP].insert({"isEsimdImage", true}); | ||
} | ||
|
@@ -775,6 +783,7 @@ processInputModule(std::unique_ptr<Module> M) { | |
|
||
const bool SplitByScope = ScopedSplitter->remainingSplits() > 1; | ||
bool SplitByOptionalFeatures = false; | ||
bool SplitByOptLevel = false; | ||
|
||
while (ScopedSplitter->hasMoreSplits()) { | ||
module_split::ModuleDesc MD = ScopedSplitter->nextSplit(); | ||
|
@@ -797,12 +806,22 @@ processInputModule(std::unique_ptr<Module> M) { | |
SplitByOptionalFeatures |= OptionalFeaturesSplitter->remainingSplits() > 1; | ||
|
||
while (OptionalFeaturesSplitter->hasMoreSplits()) { | ||
TopLevelModules.emplace_back(OptionalFeaturesSplitter->nextSplit()); | ||
// Here, we perform third-level splitting based on optimization level. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would vote for reusing optional features splitter here and extending it with support for optimization levels. It should make the splitting less nested and most likely will require less changes. Plus, we will do less module clones along the way, i.e. we will produce the final modules distribution in one pass instead of creating temporary modules which will be split further anyway. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It would be great if we can get this checked in in the current shape and take an action item to improve the code within an acceptable amount of time. It will be beneficial to have this feature out sooner than later. |
||
// This step is mandatory, as optimization level is at module level. | ||
module_split::ModuleDesc MDesc = OptionalFeaturesSplitter->nextSplit(); | ||
std::unique_ptr<module_split::ModuleSplitterBase> OptLevelSplitter = | ||
module_split::getOptLevelSplitter(std::move(MDesc), | ||
EmitOnlyKernelsAsEntryPoints); | ||
SplitByOptLevel |= OptLevelSplitter->remainingSplits() > 1; | ||
while (OptLevelSplitter->hasMoreSplits()) { | ||
TopLevelModules.emplace_back(OptLevelSplitter->nextSplit()); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. LIT tests for new split dimension are missing There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I am currently exploring the way to add LIT tests that include multiple source files. I will add this test soon. is that going to be a blocker o get this checked in? Thanks |
||
} | ||
} | ||
} | ||
|
||
Modified |= SplitByScope; | ||
Modified |= SplitByOptionalFeatures; | ||
Modified |= SplitByOptLevel; | ||
|
||
// TODO this nested splitting scheme will not scale well when other split | ||
// "dimensions" will be added. Some infra/"split manager" needs to be | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,71 @@ | ||
# Propagation of optimization levels used by front-end compiler to linker and backend compiler | ||
|
||
In order to ease the process of debugging, there is a user requirement to compile different modules with different levels of optimization. This document proposes a compiler flow that will enable propagation of compiler options specified for front-end to the linker and runtimes and eventually to the backend. Currently, only O0/O1/O2/O3 options are handled. | ||
|
||
**NOTE**: This is not a final version. The document is still in progress. | ||
|
||
## Background | ||
|
||
When building an application with several source and object files, it should be possible to specify the optimization parameters individually for each source file/object file (for each invocation of the DPCPP compiler). The linker should pass the original optimization options (e.g. -O0 or -O2) used when building an object file to the device backend compiler (IGC compiler). This will improve the debugging experience by selectively disabling/enabling optimizations for each source file, and therefore achieving better debuggability and better performance as needed. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think that we should mention IGC here, because we have several device compilers There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Agreed. I will remove it. |
||
|
||
The current behavior, is that the device backend optimization options are determined by the linker optimization options. If the -O0 option is specified for linker, the linker will pass -cl-opt-disable option to IGC for {*}all kernels{*}, essentially disabling optimizations globally. Otherwise, if the -O0 option is not specified for linker, it will not pass -cl-opt-disable option at all, therefore making the kernels mostly undebuggable, regardless of the original front-end compiler options. | ||
|
||
Here is an example that demonstrates this pain point: | ||
|
||
``` | ||
icx -c -fsycl test1.c -o test1 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Neither There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Oops..That's correct. I will change it. Good catch. Thanks |
||
icx -c -O0 -fsycl test2.c -o test2 | ||
icx -fsycl -o test test1.o test2.o | ||
``` | ||
|
||
In this scenario, the fat binary is 'test' and there are no compilation flags sent across to the backend compiler. Though the user wanted to have full debuggability with test2.c module, some of the debuggablity is lost. | ||
|
||
Another scenario is shown below: | ||
|
||
``` | ||
icpx -c -O0 -fsycl -g test.cpp -o test.o | ||
icpx -fsycl test.o | ||
``` | ||
|
||
In this scenario, the fat binary is 'test' and there are no compilation flags sent across to the backend compiler. Though the user wanted to have full debuggability with test.cpp module, some of the debuggablity is lost. The user was not able to set a breakpoint inside device code. | ||
|
||
## Requirements | ||
|
||
In order to support module-level debuggability, the user will compile different module files with different levels of optimization. These optimization levels must be preserved and made use of during every stage of compilation. Following are the requirements for this feature. | ||
- If the user specifies '-Ox' as a front-end compile option for a particular module, this option must be preserved during compilation, linking, AOT compilation as well as JIT compilation. | ||
- If the user specifies '-Ox' option as a front-end linker option, this option will override any front-end compile options and the linker option will be preserved during AOT and JIT compilation. | ||
- If the user specifies '-O0' option, we need to pass '-cl-opt-disable' to AOT and JIT compilation stages. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think we should either remove this or rewrite this. Level Zero does not support There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. cl-opt-disable is an option being passed to the backend compiler. I don't think we need to consider whether it's L0 or OpenCL. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Level Zero spec does not list |
||
|
||
## Use case | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this section can be omitted, because it essentially duplicates examples from "Background" section There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I will double check and may be remove. Thanks |
||
|
||
Following is a possible use case: | ||
|
||
``` | ||
A list of modules: | ||
test1.cpp | ||
test2.cpp | ||
test3.cpp | ||
``` | ||
|
||
``` | ||
Following are the compilation steps: | ||
# compiling | ||
icpx -c -O0 -fsycl test1.cpp -o test1.o | ||
icpx -c -O3 -fsycl test2.cpp -o test2.o | ||
icpx -c -fsycl test3.cpp -o test3.o | ||
# linking | ||
icpx -o test -fsycl test1.o test2.o test3.o | ||
# JIT compilation (For GPU backends, this calls igc-standalone compiler in the background) | ||
./test | ||
``` | ||
|
||
Since we have three modules with three different compiler options, we will need to end up with three device binaries, each with their own compiler option specified. | ||
|
||
## Proposed design | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This section should be expanded, because that's essentially the main point of this document. I suggest that we have a separate sub-section for each modified component as it is done in other design documents. Device global, for example. |
||
|
||
Following are changes required in various stages of the compilation pipeline: | ||
- Front-end code generation: For each SYCL kernel, identify the compilation option. Add an appropriate attribute to that kernel. Name of that attribute is 'sycl-device-compile-optlevel'. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Which exact "compilation option" are we identifying here? Is it I also suggest that we can omit |
||
- During the llvm-link stage, all modules are linked into a single module. This is an existing behavior. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I suggest we only document changes to the existing implementation |
||
- During sycl-post-link stage, we first split the kernels into multiple modules based on their optimization level. For each split module, an entry corresponding to its optimization level is made in its .props file. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We should document which exact property set is being modified (or new added), what are names of new properties, what is the format of their values; which information is used to generate values for those properties. |
||
- During ocloc call generation, the .props file will be parsed and appropriate option will be added to the list of compiler options. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We should not Do I understand correctly, that compiler driver will be responsible for performing a mapping from generic optimization options to options specific for each AOT compiler? |
||
- In SYCL runtime, logic will be added to program manager to parse the .props file, extract the optimization level, and add '-cl-opt-disable' if the optimization level is 0. Otherwise, we do nothing. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please note that runtime has no access to I'm also concerned about adding |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -407,6 +407,14 @@ static void appendCompileOptionsFromImage(std::string &CompileOpts, | |
// TODO: Remove isDoubleGRF check in next ABI break | ||
bool isLargeGRF = getUint32PropAsBool(Img, "isLargeGRF") || | ||
getUint32PropAsBool(Img, "isDoubleGRF"); | ||
pi_device_binary_property Prop = Img.getProperty("OptLevel"); | ||
int OptLevel = Prop ? DeviceBinaryProperty(Prop).asUint32() : -1; | ||
std::string OptLevelStr = ""; | ||
// Currently, we do not do anything for other opt levels | ||
// TODO: Figure out a way to send some info across for other opt levels. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Note: optimizations flags are different for each backend. FYI:
|
||
if (OptLevel == 0) | ||
OptLevelStr = "-cl-opt-disable"; | ||
|
||
// The -vc-codegen option is always preserved for ESIMD kernels, regardless | ||
// of the contents SYCL_PROGRAM_COMPILE_OPTIONS environment variable. | ||
if (isEsimdImage) { | ||
|
@@ -425,6 +433,11 @@ static void appendCompileOptionsFromImage(std::string &CompileOpts, | |
// is fixed | ||
CompileOpts += isEsimdImage ? "-doubleGRF" : "-ze-opt-large-register-file"; | ||
} | ||
if (!OptLevelStr.empty()) { | ||
if (!CompileOpts.empty()) | ||
CompileOpts += " "; | ||
CompileOpts += OptLevelStr; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Unit-tests are missing for RT changes |
||
} | ||
} | ||
|
||
static void applyOptionsFromImage(std::string &CompileOpts, | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
// RUN: %clangxx %s -O0 -S -o %t.ll -fsycl-device-only | ||
// RUN: FileCheck %s --input-file %t.ll -check-prefixes=CHECK-IR | ||
// CHECK-IR: define weak_odr dso_local spir_kernel void @{{.*}}main{{.*}}sycl{{.*}}handler{{.*}}() #[[ATTR:[0-9]+]] | ||
// CHECK-IR: attributes #[[ATTR]] = { {{.*}} "sycl-device-compile-optlevel"="0" {{.*}}} | ||
|
||
// RUN: %clangxx %s -O0 -o %t.bc -fsycl-device-only | ||
// RUN: sycl-post-link -split=source -symbols -S %t.bc -o %t.table | ||
// RUN: FileCheck %s -input-file=%t.table | ||
// RUN: FileCheck %s -input-file=%t_0.prop --check-prefixes CHECK-OPT-LEVEL-PROP | ||
|
||
// CHECK: [Code|Properties|Symbols] | ||
// CHECK: {{.*}}_0.ll|{{.*}}_0.prop|{{.*}}_0.sym | ||
|
||
// CHECK-OPT-LEVEL-PROP: OptLevel=1|0 | ||
|
||
// This test checks adding of the attribute 'sycl-device-compile-optlevel' | ||
// by the clang front-end | ||
// This test also checks parsing of the attribute 'sycl-device-compile-optlevel' | ||
// by the sycl-post-link-tool: | ||
// Splitting happens as usual. | ||
// - sycl-post-link adds 'OptLevel' property to the device binary | ||
|
||
#include <sycl/sycl.hpp> | ||
|
||
int main() { | ||
sycl::queue q; | ||
q.submit([&](sycl::handler &h) { | ||
h.single_task([=]() {}); | ||
}); | ||
return 0; | ||
} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We should avoid doing unrelated changes