Skip to content

Commit f45fb51

Browse files
authored
[SYCL] Add support to propagate compile flags to device backend compiler (#8763)
This change is a second attempt to add this support. An earlier attempt was here: #7373 In this change, following changes have been made: 1. clang changes to add a new function attribute: sycl-optlevel 2. sycl-post-link changes to process this attribute, split modules based on optimization level, and add a new property named 'optLevel' to SYCL/misc properties' property set. 3. SYCL runtime and plugin changes to access this device image property and propagate a backend specific optimization flag to the backend compiler. 4. Documentation 5. 2 unit tests and 1 e2e test --------- Signed-off-by: Arvind Sudarsanam <arvind.sudarsanam@intel.com>
1 parent d867bd3 commit f45fb51

File tree

32 files changed

+619
-54
lines changed

32 files changed

+619
-54
lines changed

clang/lib/CodeGen/BackendUtil.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@
4848
#include "llvm/SYCLLowerIR/ESIMD/ESIMDVerifier.h"
4949
#include "llvm/SYCLLowerIR/LowerWGLocalMemory.h"
5050
#include "llvm/SYCLLowerIR/MutatePrintfAddrspace.h"
51+
#include "llvm/SYCLLowerIR/SYCLAddOptLevelAttribute.h"
5152
#include "llvm/SYCLLowerIR/SYCLPropagateAspectsUsage.h"
5253
#include "llvm/Support/BuryPointer.h"
5354
#include "llvm/Support/CommandLine.h"
@@ -60,8 +61,8 @@
6061
#include "llvm/Support/raw_ostream.h"
6162
#include "llvm/Target/TargetMachine.h"
6263
#include "llvm/Target/TargetOptions.h"
63-
#include "llvm/Transforms/IPO/DeadArgumentElimination.h"
6464
#include "llvm/TargetParser/Triple.h"
65+
#include "llvm/Transforms/IPO/DeadArgumentElimination.h"
6566
#include "llvm/Transforms/IPO/LowerTypeTests.h"
6667
#include "llvm/Transforms/IPO/ThinLTOBitcodeWriter.h"
6768
#include "llvm/Transforms/InstCombine/InstCombine.h"
@@ -1045,6 +1046,9 @@ void EmitAssemblyHelper::RunOptimizationPipeline(
10451046
MPM.addPass(SYCLPropagateAspectsUsagePass(/*ExcludeAspects=*/{},
10461047
/*ValidateAspects=*/false));
10471048

1049+
// Add attribute corresponding to optimization level.
1050+
MPM.addPass(SYCLAddOptLevelAttributePass(CodeGenOpts.OptimizationLevel));
1051+
10481052
// Add SPIRITTAnnotations pass to the pass manager if
10491053
// -fsycl-instrument-device-code option was passed. This option can be
10501054
// used only with spir triple.
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
// RUN: %clangxx %s -O0 -S -o %t.ll -fsycl-device-only
2+
// RUN: FileCheck %s --input-file %t.ll -check-prefixes=CHECK-IR
3+
// CHECK-IR: define {{.*}} spir_kernel void @{{.*}}main{{.*}}sycl{{.*}}handler{{.*}}() #[[ATTR:[0-9]+]]
4+
// CHECK-IR: attributes #[[ATTR]] = { {{.*}} "sycl-optlevel"="0" {{.*}}}
5+
6+
// This test checks adding of the attribute 'sycl-optlevel'
7+
// by the clang front-end
8+
9+
#include <sycl/sycl.hpp>
10+
11+
int main() {
12+
sycl::queue q;
13+
q.submit([&](sycl::handler &h) {
14+
h.single_task([=]() {});
15+
});
16+
return 0;
17+
}
18+
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
//===----- SYCLAddOptLevelAttribute.h - SYCLAddOptLevelAttribute Pass -----===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
//
9+
// Pass adds 'sycl-optlevel' function attribute based on optimization level
10+
// passed in.
11+
//
12+
//===----------------------------------------------------------------------===//
13+
//
14+
#ifndef LLVM_SYCL_ADD_OPT_LEVEL_ATTRIBUTE_H
15+
#define LLVM_SYCL_ADD_OPT_LEVEL_ATTRIBUTE_H
16+
17+
#include "llvm/IR/PassManager.h"
18+
19+
namespace llvm {
20+
21+
class SYCLAddOptLevelAttributePass
22+
: public PassInfoMixin<SYCLAddOptLevelAttributePass> {
23+
public:
24+
SYCLAddOptLevelAttributePass(int OptLevel = -1) : OptLevel{OptLevel} {};
25+
PreservedAnalyses run(Module &M, ModuleAnalysisManager &);
26+
27+
private:
28+
int OptLevel;
29+
};
30+
31+
} // namespace llvm
32+
33+
#endif // LLVM_SYCL_ADD_OPT_LEVEL_ATTRIBUTE_H

llvm/lib/Passes/PassBuilder.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,7 @@
9090
#include "llvm/SYCLLowerIR/LowerWGLocalMemory.h"
9191
#include "llvm/SYCLLowerIR/LowerWGScope.h"
9292
#include "llvm/SYCLLowerIR/MutatePrintfAddrspace.h"
93+
#include "llvm/SYCLLowerIR/SYCLAddOptLevelAttribute.h"
9394
#include "llvm/SYCLLowerIR/SYCLPropagateAspectsUsage.h"
9495
#include "llvm/Support/CommandLine.h"
9596
#include "llvm/Support/Debug.h"

llvm/lib/Passes/PassRegistry.def

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,7 @@ MODULE_PASS("deadargelim-sycl", DeadArgumentEliminationSYCLPass())
140140
MODULE_PASS("sycllowerwglocalmemory", SYCLLowerWGLocalMemoryPass())
141141
MODULE_PASS("lower-esimd-kernel-attrs", SYCLFixupESIMDKernelWrapperMDPass())
142142
MODULE_PASS("sycl-propagate-aspects-usage", SYCLPropagateAspectsUsagePass())
143+
MODULE_PASS("sycl-add-opt-level-attribute", SYCLAddOptLevelAttributePass())
143144
MODULE_PASS("compile-time-properties", CompileTimePropertiesPass())
144145
#undef MODULE_PASS
145146

llvm/lib/SYCLLowerIR/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@ add_llvm_component_library(LLVMSYCLLowerIR
6363
LowerWGLocalMemory.cpp
6464
LowerWGScope.cpp
6565
MutatePrintfAddrspace.cpp
66+
SYCLAddOptLevelAttribute.cpp
6667
SYCLPropagateAspectsUsage.cpp
6768
SYCLUtils.cpp
6869

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
//===---- SYCLAddOptLevelAttribute.cpp - SYCLAddOptLevelAttribute Pass ---===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===---------------------------------------------------------------------===//
8+
//
9+
// Pass adds 'sycl-optlevel' function attribute based on optimization level
10+
// passed in.
11+
//===---------------------------------------------------------------------===//
12+
13+
#include "llvm/SYCLLowerIR/SYCLAddOptLevelAttribute.h"
14+
15+
#include "llvm/IR/Module.h"
16+
17+
using namespace llvm;
18+
19+
PreservedAnalyses
20+
SYCLAddOptLevelAttributePass::run(Module &M, ModuleAnalysisManager &MAM) {
21+
// Here, we add a function attribute 'sycl-optlevel' to store the
22+
// optimization level.
23+
assert(OptLevel >= 0 && "Invalid optimization level!");
24+
for (Function &F : M.functions()) {
25+
if (F.isDeclaration())
26+
continue;
27+
F.addFnAttr("sycl-optlevel", std::to_string(OptLevel));
28+
}
29+
return PreservedAnalyses::all();
30+
}

llvm/test/tools/sycl-post-link/device-code-split/per-aspect-split-1.ll

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -10,29 +10,29 @@
1010
; RUN: sycl-post-link -split=auto -symbols -S < %s -o %t.table
1111
; RUN: FileCheck %s -input-file=%t_0.ll --check-prefixes CHECK-M0-IR \
1212
; RUN: --implicit-check-not kernel0 --implicit-check-not kernel1
13-
; RUN: FileCheck %s -input-file=%t_1.ll --check-prefixes CHECK-M1-IR \
13+
; RUN: FileCheck %s -input-file=%t_2.ll --check-prefixes CHECK-M1-IR \
1414
; RUN: --implicit-check-not kernel0 --implicit-check-not kernel1
15-
; RUN: FileCheck %s -input-file=%t_2.ll --check-prefixes CHECK-M2-IR \
15+
; RUN: FileCheck %s -input-file=%t_1.ll --check-prefixes CHECK-M2-IR \
1616
; RUN: --implicit-check-not kernel0 --implicit-check-not kernel1
1717
; RUN: FileCheck %s -input-file=%t_0.sym --check-prefixes CHECK-M0-SYMS \
1818
; RUN: --implicit-check-not kernel0 --implicit-check-not kernel1
19-
; RUN: FileCheck %s -input-file=%t_1.sym --check-prefixes CHECK-M1-SYMS \
19+
; RUN: FileCheck %s -input-file=%t_2.sym --check-prefixes CHECK-M1-SYMS \
2020
; RUN: --implicit-check-not kernel0 --implicit-check-not kernel1
21-
; RUN: FileCheck %s -input-file=%t_2.sym --check-prefixes CHECK-M2-SYMS \
21+
; RUN: FileCheck %s -input-file=%t_1.sym --check-prefixes CHECK-M2-SYMS \
2222
; RUN: --implicit-check-not kernel0 --implicit-check-not kernel1
2323

2424
; RUN: sycl-post-link -split=source -symbols -S < %s -o %t.table
2525
; RUN: FileCheck %s -input-file=%t_0.ll --check-prefixes CHECK-M0-IR \
2626
; RUN: --implicit-check-not kernel0 --implicit-check-not kernel1
27-
; RUN: FileCheck %s -input-file=%t_1.ll --check-prefixes CHECK-M1-IR \
27+
; RUN: FileCheck %s -input-file=%t_2.ll --check-prefixes CHECK-M1-IR \
2828
; RUN: --implicit-check-not kernel0 --implicit-check-not kernel1
29-
; RUN: FileCheck %s -input-file=%t_2.ll --check-prefixes CHECK-M2-IR \
29+
; RUN: FileCheck %s -input-file=%t_1.ll --check-prefixes CHECK-M2-IR \
3030
; RUN: --implicit-check-not kernel0 --implicit-check-not kernel1
3131
; RUN: FileCheck %s -input-file=%t_0.sym --check-prefixes CHECK-M0-SYMS \
3232
; RUN: --implicit-check-not kernel0 --implicit-check-not kernel1
33-
; RUN: FileCheck %s -input-file=%t_1.sym --check-prefixes CHECK-M1-SYMS \
33+
; RUN: FileCheck %s -input-file=%t_2.sym --check-prefixes CHECK-M1-SYMS \
3434
; RUN: --implicit-check-not kernel0 --implicit-check-not kernel1
35-
; RUN: FileCheck %s -input-file=%t_2.sym --check-prefixes CHECK-M2-SYMS \
35+
; RUN: FileCheck %s -input-file=%t_1.sym --check-prefixes CHECK-M2-SYMS \
3636
; RUN: --implicit-check-not kernel0 --implicit-check-not kernel1
3737

3838
; RUN: sycl-post-link -split=kernel -symbols -S < %s -o %t.table

llvm/test/tools/sycl-post-link/device-code-split/per-reqd-wg-size-split-2.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,14 +4,14 @@
44
; RUN: sycl-post-link -split=auto -symbols -S < %s -o %t.table
55
; RUN: FileCheck %s -input-file=%t.table --check-prefix CHECK-TABLE
66
;
7-
; RUN: FileCheck %s -input-file=%t_0.sym --check-prefix CHECK-M0-SYMS \
7+
; RUN: FileCheck %s -input-file=%t_1.sym --check-prefix CHECK-M0-SYMS \
88
; RUN: --implicit-check-not kernel0 --implicit-check-not kernel1 \
99
; RUN: --implicit-check-not kernel2
1010
;
11-
; RUN: FileCheck %s -input-file=%t_1.sym --check-prefix CHECK-M2-SYMS \
11+
; RUN: FileCheck %s -input-file=%t_2.sym --check-prefix CHECK-M2-SYMS \
1212
; RUN: --implicit-check-not kernel0 --implicit-check-not kernel3
1313
;
14-
; RUN: FileCheck %s -input-file=%t_2.sym --check-prefix CHECK-M1-SYMS \
14+
; RUN: FileCheck %s -input-file=%t_0.sym --check-prefix CHECK-M1-SYMS \
1515
; RUN: --implicit-check-not kernel1 --implicit-check-not kernel2 \
1616
; RUN: --implicit-check-not kernel3
1717

llvm/test/tools/sycl-post-link/sycl-esimd-large-grf.ll

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -9,16 +9,16 @@
99

1010
; RUN: sycl-post-link -split=source -symbols -split-esimd -lower-esimd -S < %s -o %t.table
1111
; RUN: FileCheck %s -input-file=%t.table
12-
; RUN: FileCheck %s -input-file=%t_esimd_large_grf_0.ll --check-prefixes CHECK-ESIMD-LargeGRF-IR --implicit-check-not='RegisterAllocMode'
13-
; RUN: FileCheck %s -input-file=%t_esimd_large_grf_0.prop --check-prefixes CHECK-ESIMD-LargeGRF-PROP
14-
; RUN: FileCheck %s -input-file=%t_1.sym --check-prefixes CHECK-SYCL-SYM
15-
; RUN: FileCheck %s -input-file=%t_esimd_1.sym --check-prefixes CHECK-ESIMD-SYM
16-
; RUN: FileCheck %s -input-file=%t_esimd_large_grf_0.sym --check-prefixes CHECK-ESIMD-LargeGRF-SYM
12+
; RUN: FileCheck %s -input-file=%t_esimd_large_grf_1.ll --check-prefixes CHECK-ESIMD-LargeGRF-IR --implicit-check-not='RegisterAllocMode'
13+
; RUN: FileCheck %s -input-file=%t_esimd_large_grf_1.prop --check-prefixes CHECK-ESIMD-LargeGRF-PROP
14+
; RUN: FileCheck %s -input-file=%t_0.sym --check-prefixes CHECK-SYCL-SYM
15+
; RUN: FileCheck %s -input-file=%t_esimd_0.sym --check-prefixes CHECK-ESIMD-SYM
16+
; RUN: FileCheck %s -input-file=%t_esimd_large_grf_1.sym --check-prefixes CHECK-ESIMD-LargeGRF-SYM
1717

1818
; CHECK: [Code|Properties|Symbols]
19-
; CHECK: {{.*}}_0.ll|{{.*}}_0.prop|{{.*}}_0.sym
20-
; CHECK: {{.*}}esimd-large-grf.ll.tmp_1.ll|{{.*}}esimd-large-grf.ll.tmp_1.prop|{{.*}}esimd-large-grf.ll.tmp_1.sym
21-
; CHECK: {{.*}}esimd-large-grf.ll.tmp_esimd_1.ll|{{.*}}esimd-large-grf.ll.tmp_esimd_1.prop|{{.*}}esimd-large-grf.ll.tmp_esimd_1.sym
19+
; CHECK: {{.*}}esimd-large-grf.ll.tmp_0.ll|{{.*}}esimd-large-grf.ll.tmp_0.prop|{{.*}}esimd-large-grf.ll.tmp_0.sym
20+
; CHECK: {{.*}}esimd-large-grf.ll.tmp_esimd_0.ll|{{.*}}esimd-large-grf.ll.tmp_esimd_0.prop|{{.*}}esimd-large-grf.ll.tmp_esimd_0.sym
21+
; CHECK: {{.*}}_1.ll|{{.*}}_1.prop|{{.*}}_1.sym
2222

2323
; CHECK-ESIMD-LargeGRF-PROP: isEsimdImage=1|1
2424
; CHECK-ESIMD-LargeGRF-PROP: isLargeGRF=1|1

llvm/test/tools/sycl-post-link/sycl-large-grf.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -9,14 +9,14 @@
99

1010
; RUN: sycl-post-link -split=source -symbols -split-esimd -lower-esimd -S < %s -o %t.table
1111
; RUN: FileCheck %s -input-file=%t.table
12-
; RUN: FileCheck %s -input-file=%t_large_grf_0.ll --check-prefixes CHECK-LARGE-GRF-IR
13-
; RUN: FileCheck %s -input-file=%t_large_grf_0.prop --check-prefixes CHECK-LARGE-GRF-PROP
14-
; RUN: FileCheck %s -input-file=%t_1.sym --check-prefixes CHECK-SYCL-SYM
15-
; RUN: FileCheck %s -input-file=%t_large_grf_0.sym --check-prefixes CHECK-LARGE-GRF-SYM
12+
; RUN: FileCheck %s -input-file=%t_large_grf_1.ll --check-prefixes CHECK-LARGE-GRF-IR
13+
; RUN: FileCheck %s -input-file=%t_large_grf_1.prop --check-prefixes CHECK-LARGE-GRF-PROP
14+
; RUN: FileCheck %s -input-file=%t_0.sym --check-prefixes CHECK-SYCL-SYM
15+
; RUN: FileCheck %s -input-file=%t_large_grf_1.sym --check-prefixes CHECK-LARGE-GRF-SYM
1616

1717
; CHECK: [Code|Properties|Symbols]
18-
; CHECK: {{.*}}_0.ll|{{.*}}_0.prop|{{.*}}_0.sym
19-
; CHECK: {{.*}}-large-grf.ll.tmp_1.ll|{{.*}}-large-grf.ll.tmp_1.prop|{{.*}}-large-grf.ll.tmp_1.sym
18+
; CHECK: {{.*}}-large-grf.ll.tmp_0.ll|{{.*}}-large-grf.ll.tmp_0.prop|{{.*}}-large-grf.ll.tmp_0.sym
19+
; CHECK: {{.*}}_1.ll|{{.*}}_1.prop|{{.*}}_1.sym
2020

2121
; CHECK-LARGE-GRF-PROP: isLargeGRF=1|1
2222

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
; This test checks parsing of the attribute 'sycl-optlevel'
2+
; by the sycl-post-link-tool:
3+
; In addition to splitting requested by user, the kernels are also split based
4+
; on their optimization levels.
5+
; sycl-post-link adds 'optLevel' property to the device binary
6+
7+
; RUN: sycl-post-link -split=source -symbols -S < %s -o %t.table
8+
; RUN: FileCheck %s -input-file=%t.table
9+
; RUN: FileCheck %s -input-file=%t_0.prop --check-prefixes CHECK-OPT-LEVEL-PROP-0
10+
; RUN: FileCheck %s -input-file=%t_1.prop --check-prefixes CHECK-OPT-LEVEL-PROP-1
11+
12+
; CHECK: [Code|Properties|Symbols]
13+
; CHECK: {{.*}}_0.ll|{{.*}}_0.prop|{{.*}}_0.sym
14+
; CHECK: {{.*}}_1.ll|{{.*}}_1.prop|{{.*}}_1.sym
15+
16+
; CHECK-OPT-LEVEL-PROP-0: optLevel=1|0
17+
; CHECK-OPT-LEVEL-PROP-1: optLevel=1|2
18+
19+
target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64"
20+
target triple = "spir64-unknown-unknown"
21+
22+
define dso_local spir_func noundef i32 @_Z3fooii(i32 noundef %a, i32 noundef %b) local_unnamed_addr #0 {
23+
entry:
24+
%sub = sub nsw i32 %a, %b
25+
ret i32 %sub
26+
}
27+
28+
define dso_local spir_func noundef i32 @_Z3booii(i32 noundef %a, i32 noundef %b) #1 {
29+
entry:
30+
%retval = alloca i32, align 4
31+
%a.addr = alloca i32, align 4
32+
%b.addr = alloca i32, align 4
33+
%retval.ascast = addrspacecast i32* %retval to i32 addrspace(4)*
34+
%a.addr.ascast = addrspacecast i32* %a.addr to i32 addrspace(4)*
35+
%b.addr.ascast = addrspacecast i32* %b.addr to i32 addrspace(4)*
36+
store i32 %a, i32 addrspace(4)* %a.addr.ascast, align 4
37+
store i32 %b, i32 addrspace(4)* %b.addr.ascast, align 4
38+
%0 = load i32, i32 addrspace(4)* %a.addr.ascast, align 4
39+
%1 = load i32, i32 addrspace(4)* %b.addr.ascast, align 4
40+
%add = add nsw i32 %0, %1
41+
ret i32 %add
42+
}
43+
44+
attributes #0 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) "sycl-module-id"="test3.cpp" "sycl-optlevel"="2" }
45+
attributes #1 = { convergent mustprogress noinline norecurse nounwind optnone "sycl-module-id"="test2.cpp" "sycl-optlevel"="0" }
46+

llvm/tools/sycl-post-link/ModuleSplitter.cpp

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ constexpr char GLOBAL_SCOPE_NAME[] = "<GLOBAL>";
4242
constexpr char SYCL_SCOPE_NAME[] = "<SYCL>";
4343
constexpr char ESIMD_SCOPE_NAME[] = "<ESIMD>";
4444
constexpr char ESIMD_MARKER_MD[] = "sycl_explicit_simd";
45+
constexpr char ATTR_OPT_LEVEL[] = "sycl-optlevel";
4546

4647
bool hasIndirectFunctionsOrCalls(const Module &M) {
4748
for (const auto &F : M.functions()) {
@@ -674,7 +675,8 @@ void ModuleDesc::dump() const {
674675
llvm::errs() << " ESIMD:" << toString(EntryPoints.Props.HasESIMD)
675676
<< ", SpecConstMet:" << (Props.SpecConstsMet ? "YES" : "NO")
676677
<< ", LargeGRF:"
677-
<< (EntryPoints.Props.UsesLargeGRF ? "YES" : "NO") << "\n";
678+
<< (EntryPoints.Props.UsesLargeGRF ? "YES" : "NO")
679+
<< ", OptLevel:" << EntryPoints.getOptLevel() << "\n";
678680
dumpEntryPoints(entries(), EntryPoints.GroupId.c_str(), 1);
679681
llvm::errs() << "}\n";
680682
}
@@ -713,6 +715,7 @@ namespace {
713715
struct UsedOptionalFeatures {
714716
SmallVector<int, 4> Aspects;
715717
bool UsesLargeGRF = false;
718+
int OptLevel = -1;
716719
SmallVector<int, 3> ReqdWorkGroupSize;
717720
// TODO: extend this further with reqd-sub-group-size and other properties
718721

@@ -735,6 +738,12 @@ struct UsedOptionalFeatures {
735738
if (F->hasFnAttribute(::sycl::kernel_props::ATTR_LARGE_GRF))
736739
UsesLargeGRF = true;
737740

741+
if (F->hasFnAttribute(ATTR_OPT_LEVEL))
742+
if (F->getFnAttribute(ATTR_OPT_LEVEL)
743+
.getValueAsString()
744+
.getAsInteger(10, OptLevel))
745+
OptLevel = -1;
746+
738747
if (const MDNode *MDN = F->getMetadata("reqd_work_group_size")) {
739748
size_t NumOperands = MDN->getNumOperands();
740749
assert(NumOperands >= 1 && NumOperands <= 3 &&
@@ -750,8 +759,9 @@ struct UsedOptionalFeatures {
750759
llvm::hash_code LargeGRFHash = llvm::hash_value(UsesLargeGRF);
751760
llvm::hash_code ReqdWorkGroupSizeHash = llvm::hash_combine_range(
752761
ReqdWorkGroupSize.begin(), ReqdWorkGroupSize.end());
753-
Hash = static_cast<unsigned>(
754-
llvm::hash_combine(AspectsHash, LargeGRFHash, ReqdWorkGroupSizeHash));
762+
llvm::hash_code OptLevelHash = llvm::hash_value(OptLevel);
763+
Hash = static_cast<unsigned>(llvm::hash_combine(
764+
AspectsHash, LargeGRFHash, ReqdWorkGroupSizeHash, OptLevelHash));
755765
}
756766

757767
std::string generateModuleName(StringRef BaseName) const {
@@ -773,6 +783,9 @@ struct UsedOptionalFeatures {
773783
if (UsesLargeGRF)
774784
Ret += "-large-grf";
775785

786+
if (OptLevel != -1)
787+
Ret += "-O" + std::to_string(OptLevel);
788+
776789
return Ret;
777790
}
778791

@@ -808,7 +821,8 @@ struct UsedOptionalFeatures {
808821
return false;
809822
}
810823

811-
return IsEmpty == Other.IsEmpty && UsesLargeGRF == Other.UsesLargeGRF;
824+
return IsEmpty == Other.IsEmpty && UsesLargeGRF == Other.UsesLargeGRF &&
825+
OptLevel == Other.OptLevel;
812826
}
813827

814828
unsigned hash() const { return static_cast<unsigned>(Hash); }
@@ -869,6 +883,8 @@ getSplitterByOptionalFeatures(ModuleDesc &&MD,
869883
// Propagate LargeGRF flag to entry points group
870884
if (Features.UsesLargeGRF)
871885
MDProps.UsesLargeGRF = true;
886+
if (Features.OptLevel != -1)
887+
MDProps.OptLevel = Features.OptLevel;
872888
Groups.emplace_back(
873889
Features.generateModuleName(MD.getEntryPointGroup().GroupId),
874890
std::move(EntryPoints), MDProps);

0 commit comments

Comments
 (0)