Skip to content

Commit 6144d82

Browse files
AMDGPU/GlobalISel: Add regbanklegalize rule for load and readanylane tests
Add tests for missing readanylane combines from regbanklegalize. Add rule for uniform 64 bit addrspace(1) load that has to be regbanklegalized to vgpr dst that is readanylaned bask to sgpr.
1 parent 54d544b commit 6144d82

File tree

3 files changed

+560
-0
lines changed

3 files changed

+560
-0
lines changed

llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -646,6 +646,7 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
646646
.Any({{{UniB256, UniP1}, isAlign4 && isUL}, {{SgprB256}, {SgprP1}}})
647647
.Any({{{UniB512, UniP1}, isAlign4 && isUL}, {{SgprB512}, {SgprP1}}})
648648
.Any({{{UniB32, UniP1}, !isAlign4 || !isUL}, {{UniInVgprB32}, {SgprP1}}})
649+
.Any({{{UniB64, UniP1}, !isAlign4 || !isUL}, {{UniInVgprB64}, {SgprP1}}})
649650
.Any({{{UniB256, UniP1}, !isAlign4 || !isUL}, {{UniInVgprB256}, {VgprP1}, SplitLoad}})
650651
.Any({{{UniB512, UniP1}, !isAlign4 || !isUL}, {{UniInVgprB512}, {VgprP1}, SplitLoad}})
651652

Lines changed: 166 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,166 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 -new-reg-bank-select < %s | FileCheck %s
3+
4+
define amdgpu_ps void @readanylane_to_virtual_vgpr(ptr addrspace(1) inreg %ptr0, ptr addrspace(1) inreg %ptr1) {
5+
; CHECK-LABEL: readanylane_to_virtual_vgpr:
6+
; CHECK: ; %bb.0:
7+
; CHECK-NEXT: v_mov_b32_e32 v0, 0
8+
; CHECK-NEXT: global_load_dword v1, v0, s[0:1] glc dlc
9+
; CHECK-NEXT: s_waitcnt vmcnt(0)
10+
; CHECK-NEXT: global_store_dword v0, v1, s[2:3]
11+
; CHECK-NEXT: s_endpgm
12+
%load = load volatile float, ptr addrspace(1) %ptr0
13+
store float %load, ptr addrspace(1) %ptr1
14+
ret void
15+
}
16+
17+
define amdgpu_ps float @readanylane_to_physical_vgpr(ptr addrspace(1) inreg %ptr) {
18+
; CHECK-LABEL: readanylane_to_physical_vgpr:
19+
; CHECK: ; %bb.0:
20+
; CHECK-NEXT: v_mov_b32_e32 v0, 0
21+
; CHECK-NEXT: global_load_dword v0, v0, s[0:1] glc dlc
22+
; CHECK-NEXT: s_waitcnt vmcnt(0)
23+
; CHECK-NEXT: v_readfirstlane_b32 s0, v0
24+
; CHECK-NEXT: v_mov_b32_e32 v0, s0
25+
; CHECK-NEXT: ; return to shader part epilog
26+
%load = load volatile float, ptr addrspace(1) %ptr
27+
ret float %load
28+
}
29+
30+
define amdgpu_ps void @readanylane_to_bitcast_to_virtual_vgpr(ptr addrspace(1) inreg %ptr0, ptr addrspace(1) inreg %ptr1) {
31+
; CHECK-LABEL: readanylane_to_bitcast_to_virtual_vgpr:
32+
; CHECK: ; %bb.0:
33+
; CHECK-NEXT: v_mov_b32_e32 v0, 0
34+
; CHECK-NEXT: global_load_dword v1, v0, s[0:1] glc dlc
35+
; CHECK-NEXT: s_waitcnt vmcnt(0)
36+
; CHECK-NEXT: v_readfirstlane_b32 s0, v1
37+
; CHECK-NEXT: v_mov_b32_e32 v1, s0
38+
; CHECK-NEXT: global_store_dword v0, v1, s[2:3]
39+
; CHECK-NEXT: s_endpgm
40+
%load = load volatile <2 x i16>, ptr addrspace(1) %ptr0
41+
%bitcast = bitcast <2 x i16> %load to i32
42+
store i32 %bitcast, ptr addrspace(1) %ptr1
43+
ret void
44+
}
45+
46+
define amdgpu_ps float @readanylane_to_bitcast_to_physical_vgpr(ptr addrspace(1) inreg %ptr0, ptr addrspace(1) inreg %ptr1) {
47+
; CHECK-LABEL: readanylane_to_bitcast_to_physical_vgpr:
48+
; CHECK: ; %bb.0:
49+
; CHECK-NEXT: v_mov_b32_e32 v0, 0
50+
; CHECK-NEXT: global_load_dword v0, v0, s[0:1] glc dlc
51+
; CHECK-NEXT: s_waitcnt vmcnt(0)
52+
; CHECK-NEXT: v_readfirstlane_b32 s0, v0
53+
; CHECK-NEXT: v_mov_b32_e32 v0, s0
54+
; CHECK-NEXT: ; return to shader part epilog
55+
%load = load volatile <2 x i16>, ptr addrspace(1) %ptr0
56+
%bitcast = bitcast <2 x i16> %load to float
57+
ret float %bitcast
58+
}
59+
60+
define amdgpu_ps void @unmerge_readanylane_merge_to_virtual_vgpr(ptr addrspace(1) inreg %ptr0, ptr addrspace(1) inreg %ptr1) {
61+
; CHECK-LABEL: unmerge_readanylane_merge_to_virtual_vgpr:
62+
; CHECK: ; %bb.0:
63+
; CHECK-NEXT: v_mov_b32_e32 v2, 0
64+
; CHECK-NEXT: global_load_dwordx2 v[0:1], v2, s[0:1] glc dlc
65+
; CHECK-NEXT: s_waitcnt vmcnt(0)
66+
; CHECK-NEXT: v_readfirstlane_b32 s0, v0
67+
; CHECK-NEXT: v_readfirstlane_b32 s1, v1
68+
; CHECK-NEXT: v_mov_b32_e32 v0, s0
69+
; CHECK-NEXT: v_mov_b32_e32 v1, s1
70+
; CHECK-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3]
71+
; CHECK-NEXT: s_endpgm
72+
%load = load volatile i64, ptr addrspace(1) %ptr0
73+
store i64 %load, ptr addrspace(1) %ptr1
74+
ret void
75+
}
76+
77+
;define amdgpu_ps double @unmerge_readanylane_merge_to_physical_vgpr(ptr addrspace(1) inreg %ptr0, ptr addrspace(1) inreg %ptr1) {
78+
; %load = load volatile double, ptr addrspace(1) %ptr0
79+
; ret double %load
80+
;}
81+
82+
define amdgpu_ps void @unmerge_readanylane_merge_bitcast_to_virtual_vgpr(ptr addrspace(1) inreg %ptr0, ptr addrspace(1) inreg %ptr1) {
83+
; CHECK-LABEL: unmerge_readanylane_merge_bitcast_to_virtual_vgpr:
84+
; CHECK: ; %bb.0:
85+
; CHECK-NEXT: v_mov_b32_e32 v2, 0
86+
; CHECK-NEXT: global_load_dwordx2 v[0:1], v2, s[0:1] glc dlc
87+
; CHECK-NEXT: s_waitcnt vmcnt(0)
88+
; CHECK-NEXT: v_readfirstlane_b32 s0, v0
89+
; CHECK-NEXT: v_readfirstlane_b32 s1, v1
90+
; CHECK-NEXT: v_mov_b32_e32 v0, s0
91+
; CHECK-NEXT: v_mov_b32_e32 v1, s1
92+
; CHECK-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3]
93+
; CHECK-NEXT: s_endpgm
94+
%load = load volatile <2 x i32>, ptr addrspace(1) %ptr0
95+
%bitcast = bitcast <2 x i32> %load to double
96+
store double %bitcast, ptr addrspace(1) %ptr1
97+
ret void
98+
}
99+
100+
;define amdgpu_ps double @unmerge_readanylane_merge_bitcast_to_physical_vgpr(ptr addrspace(1) inreg %ptr0, ptr addrspace(1) inreg %ptr1) {
101+
; %load = load volatile <2 x i32>, ptr addrspace(1) %ptr0
102+
; %bitcast = bitcast <2 x i32> %load to double
103+
; ret double %bitcast
104+
;}
105+
106+
define amdgpu_ps void @unmerge_readanylane_merge_extract_to_virtual_vgpr(ptr addrspace(1) inreg %ptr0, ptr addrspace(1) inreg %ptr1) {
107+
; CHECK-LABEL: unmerge_readanylane_merge_extract_to_virtual_vgpr:
108+
; CHECK: ; %bb.0:
109+
; CHECK-NEXT: v_mov_b32_e32 v2, 0
110+
; CHECK-NEXT: global_load_dwordx2 v[0:1], v2, s[0:1] glc dlc
111+
; CHECK-NEXT: s_waitcnt vmcnt(0)
112+
; CHECK-NEXT: v_readfirstlane_b32 s0, v1
113+
; CHECK-NEXT: v_mov_b32_e32 v0, s0
114+
; CHECK-NEXT: global_store_dword v2, v0, s[2:3]
115+
; CHECK-NEXT: s_endpgm
116+
%load = load volatile <2 x i32>, ptr addrspace(1) %ptr0
117+
%extracted = extractelement <2 x i32> %load, i32 1
118+
store i32 %extracted, ptr addrspace(1) %ptr1
119+
ret void
120+
}
121+
122+
define amdgpu_ps float @unmerge_readanylane_merge_extract_to_physical_vgpr(ptr addrspace(1) inreg %ptr0, ptr addrspace(1) inreg %ptr1) {
123+
; CHECK-LABEL: unmerge_readanylane_merge_extract_to_physical_vgpr:
124+
; CHECK: ; %bb.0:
125+
; CHECK-NEXT: v_mov_b32_e32 v0, 0
126+
; CHECK-NEXT: global_load_dwordx2 v[0:1], v0, s[0:1] glc dlc
127+
; CHECK-NEXT: s_waitcnt vmcnt(0)
128+
; CHECK-NEXT: v_readfirstlane_b32 s0, v1
129+
; CHECK-NEXT: v_mov_b32_e32 v0, s0
130+
; CHECK-NEXT: ; return to shader part epilog
131+
%load = load volatile <2 x float>, ptr addrspace(1) %ptr0
132+
%extracted = extractelement <2 x float> %load, i32 1
133+
ret float %extracted
134+
}
135+
136+
define amdgpu_ps void @unmerge_readanylane_merge_extract_bitcast_to_virtual_vgpr(ptr addrspace(1) inreg %ptr0, ptr addrspace(1) inreg %ptr1) {
137+
; CHECK-LABEL: unmerge_readanylane_merge_extract_bitcast_to_virtual_vgpr:
138+
; CHECK: ; %bb.0:
139+
; CHECK-NEXT: v_mov_b32_e32 v2, 0
140+
; CHECK-NEXT: global_load_dwordx2 v[0:1], v2, s[0:1] glc dlc
141+
; CHECK-NEXT: s_waitcnt vmcnt(0)
142+
; CHECK-NEXT: v_readfirstlane_b32 s0, v0
143+
; CHECK-NEXT: v_mov_b32_e32 v0, s0
144+
; CHECK-NEXT: global_store_dword v2, v0, s[2:3]
145+
; CHECK-NEXT: s_endpgm
146+
%load = load volatile <4 x i16>, ptr addrspace(1) %ptr0
147+
%extracted = shufflevector <4 x i16> %load, <4 x i16> %load, <2 x i32> <i32 0, i32 1>
148+
%bitcast = bitcast <2 x i16> %extracted to float
149+
store float %bitcast, ptr addrspace(1) %ptr1
150+
ret void
151+
}
152+
153+
define amdgpu_ps float @unmerge_readanylane_merge_extract_bitcast_to_physical_vgpr(ptr addrspace(1) inreg %ptr0, ptr addrspace(1) inreg %ptr1) {
154+
; CHECK-LABEL: unmerge_readanylane_merge_extract_bitcast_to_physical_vgpr:
155+
; CHECK: ; %bb.0:
156+
; CHECK-NEXT: v_mov_b32_e32 v0, 0
157+
; CHECK-NEXT: global_load_dwordx2 v[0:1], v0, s[0:1] glc dlc
158+
; CHECK-NEXT: s_waitcnt vmcnt(0)
159+
; CHECK-NEXT: v_readfirstlane_b32 s0, v0
160+
; CHECK-NEXT: v_mov_b32_e32 v0, s0
161+
; CHECK-NEXT: ; return to shader part epilog
162+
%load = load volatile <4 x i16>, ptr addrspace(1) %ptr0
163+
%extracted = shufflevector <4 x i16> %load, <4 x i16> %load, <2 x i32> <i32 0, i32 1>
164+
%bitcast = bitcast <2 x i16> %extracted to float
165+
ret float %bitcast
166+
}

0 commit comments

Comments
 (0)