Skip to content

[SYCL] Enhance device code split call graph analysis #8589

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
45 commits
Select commit Hold shift + click to select a range
07d737d
Initial prototype
AlexeySachkov Mar 8, 2023
b917080
More through analysis
AlexeySachkov Mar 9, 2023
93ad79f
some fixes
AlexeySachkov Mar 9, 2023
1ec9e77
further fixes
AlexeySachkov Mar 9, 2023
574295b
cleanup dead code
AlexeySachkov Mar 9, 2023
a0e46a6
Merge remote-tracking branch 'origin/sycl' into private/asachkov/devi…
AlexeySachkov Mar 9, 2023
afc3add
WIP
AlexeySachkov Mar 13, 2023
8abc480
introduce dependency graph
AlexeySachkov Mar 17, 2023
afcfaed
Refactor ESIMD splitter; it compiles
AlexeySachkov Mar 21, 2023
6c2f625
less debug prints; more fixes
AlexeySachkov Mar 22, 2023
c5bc4b4
fixes
AlexeySachkov Mar 22, 2023
b0f9a13
Merge remote-tracking branch 'origin/sycl' into private/asachkov/devi…
AlexeySachkov Mar 22, 2023
231ffc1
clang-format
AlexeySachkov Mar 22, 2023
946d39b
more clang-format
AlexeySachkov Mar 22, 2023
31d62c1
fix a couple of tests
AlexeySachkov Mar 23, 2023
f22b98e
Merge remote-tracking branch 'origin/sycl' into private/asachkov/devi…
AlexeySachkov Mar 29, 2023
78b8a5f
Merge remote-tracking branch 'origin/sycl' into private/asachkov/devi…
AlexeySachkov May 4, 2023
7e8623a
compilation fix
AlexeySachkov May 8, 2023
a4d5be3
Merge remote-tracking branch 'origin/sycl' into private/asachkov/devi…
AlexeySachkov May 9, 2023
d309a36
Fix test after merge
AlexeySachkov May 10, 2023
e83fd96
Fix another test after merge
AlexeySachkov May 10, 2023
45c497b
Implement global variables tracking in device code split
AlexeySachkov May 10, 2023
bbc2e66
Fix remaining sycl-post-link LIT tests
AlexeySachkov May 10, 2023
dbf7d72
Revert some unnecessary changes
AlexeySachkov May 11, 2023
21be8aa
Refactoring to remove some weird changes
AlexeySachkov May 11, 2023
bbba4cb
Merge remote-tracking branch 'origin/sycl' into private/asachkov/devi…
AlexeySachkov May 11, 2023
db50c71
Resolve a few TODOs
AlexeySachkov May 11, 2023
1a6b8c8
Merge remote-tracking branch 'origin/sycl' into private/asachkov/devi…
AlexeySachkov May 24, 2023
3c7a4d5
Apply comments
AlexeySachkov May 24, 2023
0b368da
Add invoke-esimd-double test
jzc May 22, 2023
ff59041
Enchance new test with double data type check
AlexeySachkov May 24, 2023
df0b4e3
Add more complex test; remove kernels from list of indirectly called …
AlexeySachkov May 25, 2023
e2aecc9
Add comments to resolve some concerns with the algorithm
AlexeySachkov May 26, 2023
07371ee
remove unnecessary fixme
AlexeySachkov May 26, 2023
8454a2c
Merge remote-tracking branch 'origin/sycl' into private/asachkov/devi…
AlexeySachkov Jun 6, 2023
ece594b
Add one more LIT test
AlexeySachkov Jun 6, 2023
0791c88
Fix typos; remove duplicated example
AlexeySachkov Jun 6, 2023
9190fc2
Improve references between comments
AlexeySachkov Jun 6, 2023
026d79e
Apply comments: renaming
AlexeySachkov Jun 6, 2023
14deb76
Apply comments: move comment around
AlexeySachkov Jun 6, 2023
e067e3c
Fix some typos
AlexeySachkov Jun 6, 2023
800bd77
Add more comments to the code
AlexeySachkov Jun 6, 2023
f41d34b
Some renaming
AlexeySachkov Jun 6, 2023
b1b3910
Restore some missing pieces of code
AlexeySachkov Jun 6, 2023
5929e4d
opimize includes
AlexeySachkov Jun 6, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 11 additions & 3 deletions llvm/test/tools/sycl-post-link/assert/indirect-with-split-2.ll
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,16 @@
; marked as using asserts.

; RUN: sycl-post-link -split=auto -symbols -S < %s -o %t.table
; RUN: FileCheck %s -input-file=%t_0.prop -check-prefix=PRESENCE-CHECK
; RUN: FileCheck %s -input-file=%t_0.prop -check-prefix=ABSENCE-CHECK
; RUN: FileCheck %s -input-file=%t_0.prop -check-prefixes=CHECK,CHECK0 \
; RUN: --implicit-check-not TU1
; RUN: FileCheck %s -input-file=%t_1.prop -check-prefixes=CHECK,CHECK1 \
; RUN: --implicit-check-not TU0
;
; CHECK: [SYCL/assert used]
; CHECK0-DAG: main_TU1_kernel0
; CHECK0-DAG: main_TU1_kernel1
;
; CHECK1: main_TU0_kernel0

target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024"
target triple = "spir64-unknown-linux"
Expand Down Expand Up @@ -40,7 +48,7 @@ entry:
}

; ABSENCE-CHECK-NOT: empty_kernel
define dso_local spir_kernel void @empty_kernel() {
define dso_local spir_kernel void @empty_kernel() #2 {
%1 = ptrtoint void ()* @bar to i64
ret void
}
Expand Down
22 changes: 15 additions & 7 deletions llvm/test/tools/sycl-post-link/assert/indirect-with-split.ll
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,21 @@
; marked as using asserts.

; RUN: sycl-post-link -split=auto -symbols -S < %s -o %t.table
; RUN: FileCheck %s -input-file=%t_0.prop
; RUN: FileCheck %s -input-file=%t_0.prop --check-prefixes=CHECK,CHECK1 \
; RUN: --implicit-check-not TU0
; RUN: FileCheck %s -input-file=%t_1.prop --check-prefixes=CHECK,CHECK0 \
; RUN: --implicit-check-not TU1 --implicit-check-not kernel1
;
; With recent improvements to device code split, this file is actually being
; split to two modules and one of them does not contain "indirectly-referenced"
; function, meaning that only direct users of 'assert' will be mentioned in
; device image properties.
;
; CHECK: [SYCL/assert used]
; CHECK0: main_TU0_kernel0
;
; CHECK1-DAG: main_TU1_kernel0
; CHECK1-DAG: main_TU1_kernel1

target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024"
target triple = "spir64-unknown-linux"
Expand All @@ -20,9 +34,6 @@ target triple = "spir64-unknown-linux"
@__spirv_BuiltInLocalInvocationId = external dso_local local_unnamed_addr addrspace(1) constant <3 x i64>, align 32
@_ZL10assert_fmt = internal addrspace(2) constant [85 x i8] c"%s:%d: %s: global id: [%lu,%lu,%lu], local id: [%lu,%lu,%lu] Assertion `%s` failed.\0A\00", align 1

; CHECK: [SYCL/assert used]

; CHECK-DAG: main_TU0_kernel0
define dso_local spir_kernel void @main_TU0_kernel0() #0 {
entry:
call spir_func void @_Z3foov()
Expand All @@ -40,7 +51,6 @@ entry:
ret void
}

; CHECK-DAG: main_TU0_kernel1
define dso_local spir_kernel void @main_TU0_kernel1() #0 {
entry:
call spir_func void @_Z4foo1v()
Expand All @@ -55,14 +65,12 @@ entry:
ret void
}

; CHECK-DAG: main_TU1_kernel0
define dso_local spir_kernel void @main_TU1_kernel0() #2 {
entry:
call spir_func void @_Z3foov()
ret void
}

; CHECK-DAG: main_TU1_kernel1
define dso_local spir_kernel void @main_TU1_kernel1() #2 {
entry:
call spir_func void @_Z4foo2v()
Expand Down
Original file line number Diff line number Diff line change
@@ -1,24 +1,42 @@
; RUN: sycl-post-link -split=auto -symbols -S < %s -o %t.table
; In precense of indirectly callable function auto mode is equal to no split,
; which means that separate LLVM IR file for device is not generated and we only
; need to check generated symbol table
; RUN: FileCheck %s -input-file=%t_0.sym --check-prefixes CHECK
;
; This is the same as auto-module-split-1 test with the only difference is that
; @_Z3foov is marked with "referenced-indirectly" attribute.
; The purpose of this test is to make sure that we can still perform device code
; split as usual, because that function is not a part of any indirect calls
;
; RUN: FileCheck %s -input-file=%t_0.ll --check-prefixes CHECK-TU0,CHECK
; RUN: FileCheck %s -input-file=%t_1.ll --check-prefixes CHECK-TU1,CHECK
; RUN: FileCheck %s -input-file=%t_0.sym --check-prefixes CHECK-TU0-TXT
; RUN: FileCheck %s -input-file=%t_1.sym --check-prefixes CHECK-TU1-TXT

target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024"
target triple = "spir64-unknown-linux"

$_Z3barIiET_S0_ = comdat any

; CHECK-TU1-NOT: @{{.*}}GV{{.*}}
; CHECK-TU0: @{{.*}}GV{{.*}} = internal addrspace(1) constant [1 x i32] [i32 42], align 4
@_ZL2GV = internal addrspace(1) constant [1 x i32] [i32 42], align 4

; CHECK: {{.*}}TU0_kernel0{{.*}}
; CHECK-TU1: define dso_local spir_kernel void @{{.*}}TU0_kernel0{{.*}}
; CHECK-TU1-TXT: {{.*}}TU0_kernel0{{.*}}
; CHECK-TU0-NOT: define dso_local spir_kernel void @{{.*}}TU0_kernel0{{.*}}
; CHECK-TU0-TXT-NOT: {{.*}}TU0_kernel0{{.*}}

; CHECK-TU1: call spir_func void @{{.*}}foo{{.*}}()

define dso_local spir_kernel void @_ZTSZ4mainE11TU0_kernel0() #0 {
entry:
call spir_func void @_Z3foov()
ret void
}

; CHECK-TU1: define dso_local spir_func void @{{.*}}foo{{.*}}()
; CHECK-TU0-NOT: define dso_local spir_func void @{{.*}}foo{{.*}}()

; CHECK-TU1: call spir_func i32 @{{.*}}bar{{.*}}(i32 1)

define dso_local spir_func void @_Z3foov() #2 {
entry:
%a = alloca i32, align 4
Expand All @@ -28,6 +46,9 @@ entry:
ret void
}

; CHECK-TU1: define {{.*}} spir_func i32 @{{.*}}bar{{.*}}(i32 %arg)
; CHECK-TU0-NOT: define {{.*}} spir_func i32 @{{.*}}bar{{.*}}(i32 %arg)

; Function Attrs: nounwind
define linkonce_odr dso_local spir_func i32 @_Z3barIiET_S0_(i32 %arg) comdat {
entry:
Expand All @@ -37,33 +58,51 @@ entry:
ret i32 %0
}

; CHECK: {{.*}}TU0_kernel1{{.*}}
; CHECK-TU1: define dso_local spir_kernel void @{{.*}}TU0_kernel1{{.*}}()
; CHECK-TU1-TXT: {{.*}}TU0_kernel1{{.*}}
; CHECK-TU0-NOT: define dso_local spir_kernel void @{{.*}}TU0_kernel1{{.*}}()
; CHECK-TU0-TXT-NOT: {{.*}}TU0_kernel1{{.*}}

; CHECK-TU1: call spir_func void @{{.*}}foo1{{.*}}()

define dso_local spir_kernel void @_ZTSZ4mainE11TU0_kernel1() #0 {
entry:
call spir_func void @_Z4foo1v()
ret void
}

; CHECK-TU1: define dso_local spir_func void @{{.*}}foo1{{.*}}()
; CHECK-TU0-NOT: define dso_local spir_func void @{{.*}}foo1{{.*}}()

; Function Attrs: nounwind
define dso_local spir_func void @_Z4foo1v() {
entry:
%a = alloca i32, align 4
store i32 2, i32* %a, align 4
ret void
}
; CHECK: {{.*}}TU1_kernel{{.*}}

; CHECK-TU1-NOT: define dso_local spir_kernel void @{{.*}}TU1_kernel{{.*}}()
; CHECK-TU1-TXT-NOT: {{.*}}TU1_kernel{{.*}}
; CHECK-TU0: define dso_local spir_kernel void @{{.*}}TU1_kernel{{.*}}()
; CHECK-TU0-TXT: {{.*}}TU1_kernel{{.*}}

; CHECK-TU0: call spir_func void @{{.*}}foo2{{.*}}()

define dso_local spir_kernel void @_ZTSZ4mainE10TU1_kernel() #1 {
entry:
call spir_func void @_Z4foo2v()
ret void
}

; CHECK-TU1-NOT: define dso_local spir_func void @{{.*}}foo2{{.*}}()
; CHECK-TU0: define dso_local spir_func void @{{.*}}foo2{{.*}}()

; Function Attrs: nounwind
define dso_local spir_func void @_Z4foo2v() {
entry:
%a = alloca i32, align 4
; CHECK-TU0: %0 = load i32, i32 addrspace(4)* getelementptr inbounds ([1 x i32], [1 x i32] addrspace(4)* addrspacecast ([1 x i32] addrspace(1)* @{{.*}}GV{{.*}} to [1 x i32] addrspace(4)*), i64 0, i64 0), align 4
%0 = load i32, i32 addrspace(4)* getelementptr inbounds ([1 x i32], [1 x i32] addrspace(4)* addrspacecast ([1 x i32] addrspace(1)* @_ZL2GV to [1 x i32] addrspace(4)*), i64 0, i64 0), align 4
%add = add nsw i32 4, %0
store i32 %add, i32* %a, align 4
Expand All @@ -74,8 +113,15 @@ attributes #0 = { "sycl-module-id"="TU1.cpp" }
attributes #1 = { "sycl-module-id"="TU2.cpp" }
attributes #2 = { "referenced-indirectly" }

; Metadata is saved in both modules.
; CHECK: !opencl.spir.version = !{!0, !0}
; CHECK: !spirv.Source = !{!1, !1}

!opencl.spir.version = !{!0, !0}
!spirv.Source = !{!1, !1}

; CHECK: !0 = !{i32 1, i32 2}
; CHECK: !1 = !{i32 4, i32 100000}

!0 = !{i32 1, i32 2}
!1 = !{i32 4, i32 100000}
Original file line number Diff line number Diff line change
@@ -1,8 +1,31 @@
; RUN: sycl-post-link -split=auto -symbols -S < %s -o %t.table
; In precense of indirect calls auto mode is equal to no split,
; which means that separate LLVM IR file for device is not generated and we only
; need to check generated symbol table
; RUN: FileCheck %s -input-file=%t_0.sym --check-prefixes CHECK
;
; In precense of indirect calls we start matching functions using their
; signatures, i.e. we have an indirect call to i32(i32) function within
; @_Z3foov, which means that all functions with i32(i32) signature should be
; placed in the same module as @_Z3foov.
;
; RUN: FileCheck %s -input-file=%t_0.ll --check-prefixes CHECK-TU0-IR \
; RUN: --implicit-check-not TU0_kernel --implicit-check-not _Z3foov
; RUN: FileCheck %s -input-file=%t_1.ll --check-prefixes CHECK-TU1-IR \
; RUN: --implicit-check-not TU1_kernel --implicit-check-not _Z4foo2v
; RUN: FileCheck %s -input-file=%t_0.sym --check-prefixes CHECK-TU0-SYM
; RUN: FileCheck %s -input-file=%t_1.sym --check-prefixes CHECK-TU1-SYM
;
; CHECK-TU0-SYM: _ZTSZ4mainE11TU1_kernel0
; CHECK-TU0-SYM: _ZTSZ4mainE11TU1_kernel1
;
; CHECK-TU1-SYM: _ZTSZ4mainE10TU0_kernel
;
; CHECK-TU0-IR: @_ZL2GV = internal addrspace(1) constant
; CHECK-TU0-IR: define dso_local spir_kernel void @_ZTSZ4mainE11TU1_kernel0
; CHECK-TU0-IR: define dso_local spir_func i32 @_Z4foo1v
; CHECK-TU0-IR: define dso_local spir_kernel void @_ZTSZ4mainE11TU1_kernel1
; CHECK-TU0-IR: define dso_local spir_func void @_Z4foo2v
;
; CHECK-TU1-IR: define dso_local spir_kernel void @_ZTSZ4mainE10TU0_kernel
; CHECK-TU1-IR: define dso_local spir_func void @_Z3foov
; CHECK-TU1-IR: define dso_local spir_func i32 @_Z4foo1v

target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024"
target triple = "spir64-unknown-linux"
Expand All @@ -11,9 +34,7 @@ $_Z3barIiET_S0_ = comdat any

@_ZL2GV = internal addrspace(1) constant [1 x i32] [i32 42], align 4

; CHECK: {{.*}}TU0_kernel0{{.*}}

define dso_local spir_kernel void @_ZTSZ4mainE11TU0_kernel0() #0 {
define dso_local spir_kernel void @_ZTSZ4mainE10TU0_kernel() #0 {
entry:
call spir_func void @_Z3foov()
ret void
Expand All @@ -38,24 +59,23 @@ entry:
ret i32 %0
}

; CHECK: {{.*}}TU0_kernel1{{.*}}

define dso_local spir_kernel void @_ZTSZ4mainE11TU0_kernel1() #0 {
define dso_local spir_kernel void @_ZTSZ4mainE11TU1_kernel0() #1 {
entry:
call spir_func void @_Z4foo1v()
%a = alloca i32, align 4
%arg = load i32, i32* %a, align 4
%call = call spir_func i32 @_Z4foo1v(i32 %arg)
ret void
}

; Function Attrs: nounwind
define dso_local spir_func void @_Z4foo1v() {
define dso_local spir_func i32 @_Z4foo1v(i32 %arg) {
entry:
%a = alloca i32, align 4
store i32 2, i32* %a, align 4
ret void
store i32 %arg, i32* %a, align 4
ret i32 %arg
}
; CHECK: {{.*}}TU1_kernel{{.*}}

define dso_local spir_kernel void @_ZTSZ4mainE10TU1_kernel() #1 {
define dso_local spir_kernel void @_ZTSZ4mainE11TU1_kernel1() #1 {
entry:
call spir_func void @_Z4foo2v()
ret void
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,20 @@
; RUN: sycl-post-link -split=auto -symbols -S < %s -o %t.table
; RUN: FileCheck %s -input-file=%t_0.sym

; This test checkes that module is not split if function pointer's user is not
; CallInst.
; RUN: FileCheck %s -input-file=%t_0.sym --check-prefix=CHECK-SYM0
; RUN: FileCheck %s -input-file=%t_1.sym --check-prefix=CHECK-SYM1
; RUN: FileCheck %s -input-file=%t_0.ll --check-prefix=CHECK-IR0
; RUN: FileCheck %s -input-file=%t_1.ll --check-prefix=CHECK-IR1

; This test checkes that we can properly perform device code split by tracking
; all uses of functions (not only direct calls)

; CHECK-SYM0: kernel2
; CHECK-SYM1: kernel1
;
; CHECK-IR0: define dso_local spir_kernel void @kernel2
;
; CHECK-IR1: @_Z2f1iTable = weak global [1 x i32 (i32)*] [i32 (i32)* @_Z2f1i]
; CHECK-IR1: define dso_local spir_func i32 @_Z2f1i
; CHECK-IR1: define weak_odr dso_local spir_kernel void @kernel1

target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64"
target triple = "spir64_x86_64-unknown-unknown"
Expand Down
Loading