Skip to content

Commit 5824f08

Browse files
[SYCL] Reduce list of potential indirect callee candidates (#9871)
There was a concern expressed during code review of #8589, that we may be adding too many functions into a module when trying to find all potential callees of an indirect call, thus bloating module. This PR attempts to fix that by limiting our signature matching to functions which are marked with `referenced-indirectly` attribute. This change is not expected to affect any E2E tests, because it returns the behavior, where we only considered `referenced-indirectly` marked functions as potential indirect call targets, which we had before #8589. --------- Co-authored-by: asudarsa <arvind.sudarsanam@intel.com>
1 parent fe61b99 commit 5824f08

File tree

3 files changed

+39
-21
lines changed

3 files changed

+39
-21
lines changed

llvm/test/tools/sycl-post-link/device-code-split/auto-module-split-3.ll

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,11 @@
66
; placed in the same module as @_Z3foov.
77
;
88
; RUN: FileCheck %s -input-file=%t_0.ll --check-prefixes CHECK-TU0-IR \
9-
; RUN: --implicit-check-not TU0_kernel --implicit-check-not _Z3foov
9+
; RUN: --implicit-check-not TU0_kernel --implicit-check-not _Z3foov \
10+
; RUN: --implicit-check-not _Z4foo3v
1011
; RUN: FileCheck %s -input-file=%t_1.ll --check-prefixes CHECK-TU1-IR \
11-
; RUN: --implicit-check-not TU1_kernel --implicit-check-not _Z4foo2v
12+
; RUN: --implicit-check-not TU1_kernel --implicit-check-not _Z4foo2v \
13+
; RUN: --implicit-check-not _Z4foo1v
1214
; RUN: FileCheck %s -input-file=%t_0.sym --check-prefixes CHECK-TU0-SYM
1315
; RUN: FileCheck %s -input-file=%t_1.sym --check-prefixes CHECK-TU1-SYM
1416
;
@@ -25,7 +27,7 @@
2527
;
2628
; CHECK-TU1-IR: define dso_local spir_kernel void @_ZTSZ4mainE10TU0_kernel
2729
; CHECK-TU1-IR: define dso_local spir_func void @_Z3foov
28-
; CHECK-TU1-IR: define dso_local spir_func i32 @_Z4foo1v
30+
; CHECK-TU1-IR: define dso_local spir_func i32 @_Z4foo3v
2931

3032
target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024"
3133
target triple = "spir64-unknown-linux"
@@ -75,6 +77,14 @@ entry:
7577
ret i32 %arg
7678
}
7779

80+
; Function Attrs: nounwind
81+
define dso_local spir_func i32 @_Z4foo3v(i32 %arg) #2 {
82+
entry:
83+
%a = alloca i32, align 4
84+
store i32 %arg, i32* %a, align 4
85+
ret i32 %arg
86+
}
87+
7888
define dso_local spir_kernel void @_ZTSZ4mainE11TU1_kernel1() #1 {
7989
entry:
8090
call spir_func void @_Z4foo2v()
@@ -93,6 +103,7 @@ entry:
93103

94104
attributes #0 = { "sycl-module-id"="TU1.cpp" }
95105
attributes #1 = { "sycl-module-id"="TU2.cpp" }
106+
attributes #2 = { "referenced-indirectly" }
96107

97108
!opencl.spir.version = !{!0, !0}
98109
!spirv.Source = !{!1, !1}

llvm/test/tools/sycl-post-link/device-code-split/complex-indirect-call-chain.ll

Lines changed: 11 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -4,46 +4,44 @@
44
; RUN: sycl-post-link -split=auto -S < %s -o %t.table
55
; RUN: FileCheck %s -input-file=%t_0.ll --check-prefix CHECK0 \
66
; RUN: --implicit-check-not @foo --implicit-check-not @kernel_A \
7-
; RUN: --implicit-check-not @kernel_B
7+
; RUN: --implicit-check-not @kernel_B --implicit-check-not @baz
88
; RUN: FileCheck %s -input-file=%t_1.ll --check-prefix CHECK1 \
99
; RUN: --implicit-check-not @kernel_A --implicit-check-not @kernel_C
1010
; RUN: FileCheck %s -input-file=%t_2.ll --check-prefix CHECK2 \
1111
; RUN: --implicit-check-not @foo --implicit-check-not @bar \
12-
; RUN: --implicit-check-not @baz_2 --implicit-check-not @kernel_B \
12+
; RUN: --implicit-check-not @BAZ --implicit-check-not @kernel_B \
1313
; RUN: --implicit-check-not @kernel_C
1414
;
1515
; RUN: sycl-post-link -split=source -S < %s -o %t.table
1616
; RUN: FileCheck %s -input-file=%t_0.ll --check-prefix CHECK0 \
1717
; RUN: --implicit-check-not @foo --implicit-check-not @kernel_A \
18-
; RUN: --implicit-check-not @kernel_B
18+
; RUN: --implicit-check-not @kernel_B --implicit-check-not @baz
1919
; RUN: FileCheck %s -input-file=%t_1.ll --check-prefix CHECK1 \
2020
; RUN: --implicit-check-not @kernel_A --implicit-check-not @kernel_C
2121
; RUN: FileCheck %s -input-file=%t_2.ll --check-prefix CHECK2 \
2222
; RUN: --implicit-check-not @foo --implicit-check-not @bar \
23-
; RUN: --implicit-check-not @baz_2 --implicit-check-not @kernel_B \
23+
; RUN: --implicit-check-not @BAZ --implicit-check-not @kernel_B \
2424
; RUN: --implicit-check-not @kernel_C
2525
;
2626
; RUN: sycl-post-link -split=kernel -S < %s -o %t.table
2727
; RUN: FileCheck %s -input-file=%t_0.ll --check-prefix CHECK0 \
2828
; RUN: --implicit-check-not @foo --implicit-check-not @kernel_A \
29-
; RUN: --implicit-check-not @kernel_B
29+
; RUN: --implicit-check-not @kernel_B --implicit-check-not @baz
3030
; RUN: FileCheck %s -input-file=%t_1.ll --check-prefix CHECK1 \
3131
; RUN: --implicit-check-not @kernel_A --implicit-check-not @kernel_C
3232
; RUN: FileCheck %s -input-file=%t_2.ll --check-prefix CHECK2 \
3333
; RUN: --implicit-check-not @foo --implicit-check-not @bar \
34-
; RUN: --implicit-check-not @baz_2 --implicit-check-not @kernel_B \
34+
; RUN: --implicit-check-not @BAZ --implicit-check-not @kernel_B \
3535
; RUN: --implicit-check-not @kernel_C
3636

3737
; CHECK0-DAG: define spir_kernel void @kernel_C
3838
; CHECK0-DAG: define spir_func i32 @bar
39-
; CHECK0-DAG: define spir_func void @baz
40-
; CHECK0-DAG: define spir_func void @baz_2
39+
; CHECK0-DAG: define spir_func void @BAZ
4140

4241
; CHECK1-DAG: define spir_kernel void @kernel_B
4342
; CHECK1-DAG: define spir_func i32 @foo
4443
; CHECK1-DAG: define spir_func i32 @bar
45-
; CHECK1-DAG: define spir_func void @baz
46-
; CHECK1-DAG: define spir_func void @baz_2
44+
; CHECK1-DAG: define spir_func void @BAZ
4745

4846
; CHECK2-DAG: define spir_kernel void @kernel_A
4947
; CHECK2-DAG: define spir_func void @baz
@@ -56,7 +54,7 @@ define spir_func i32 @foo(i32 (i32, void ()*)* %ptr1, void ()* %ptr2) {
5654
ret i32 %1
5755
}
5856

59-
define spir_func i32 @bar(i32 %arg, void ()* %ptr) {
57+
define spir_func i32 @bar(i32 %arg, void ()* %ptr) #3 {
6058
call spir_func void %ptr()
6159
ret i32 %arg
6260
}
@@ -65,7 +63,7 @@ define spir_func void @baz() {
6563
ret void
6664
}
6765

68-
define spir_func void @baz_2() {
66+
define spir_func void @BAZ() #3 {
6967
ret void
7068
}
7169

@@ -87,3 +85,4 @@ define spir_kernel void @kernel_C() #2 {
8785
attributes #0 = { "sycl-module-id"="TU1.cpp" }
8886
attributes #1 = { "sycl-module-id"="TU2.cpp" }
8987
attributes #2 = { "sycl-module-id"="TU3.cpp" }
88+
attributes #3 = { "referenced-indirectly" }

llvm/tools/sycl-post-link/ModuleSplitter.cpp

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -191,8 +191,8 @@ groupEntryPointsByKernelType(ModuleDesc &MD,
191191
// bitcast, phi node, call, etc.): "A" -> "B" edge will be added to the
192192
// graph;
193193
// 2. function A performs an indirect call of a function with signature S and
194-
// there is a function B with signature S. "A" -> "B" edge will be added to
195-
// the graph;
194+
// there is a function B with signature S marked with "referenced-indirectly"
195+
// attribute. "A" -> "B" edge will be added to the graph;
196196
class DependencyGraph {
197197
public:
198198
using GlobalSet = SmallPtrSet<const GlobalValue *, 16>;
@@ -203,8 +203,15 @@ class DependencyGraph {
203203
FuncTypeToFuncsMap;
204204
for (const auto &F : M.functions()) {
205205
// Kernels can't be called (either directly or indirectly) in SYCL
206-
if (!isKernel(F))
207-
FuncTypeToFuncsMap[F.getFunctionType()].insert(&F);
206+
if (isKernel(F))
207+
continue;
208+
209+
// Only functions which are marked with "referenced-indireclty" attribute
210+
// are considered to be indirect callee candidates.
211+
if (!F.hasFnAttribute("referenced-indirectly"))
212+
continue;
213+
214+
FuncTypeToFuncsMap[F.getFunctionType()].insert(&F);
208215
}
209216

210217
// We add every function into the graph
@@ -335,8 +342,9 @@ ModuleDesc extractSubModule(const ModuleDesc &MD,
335342
return ModuleDesc{std::move(SubM), std::move(ModuleEntryPoints), MD.Props};
336343
}
337344

338-
// The function produces a copy of input LLVM IR module M with only those entry
339-
// points that are specified in ModuleEntryPoints vector.
345+
// The function produces a copy of input LLVM IR module M with only those
346+
// functions and globals that can be called from entry points that are specified
347+
// in ModuleEntryPoints vector, in addition to the entry point functions.
340348
ModuleDesc extractCallGraph(const ModuleDesc &MD,
341349
EntryPointGroup &&ModuleEntryPoints,
342350
const DependencyGraph &CG,

0 commit comments

Comments
 (0)