Skip to content

Commit e1069c1

Browse files
committed
[AMDGPU] Ensure return address is save/restored if clobbered or when function has calls
This test is to make sure the return address registers, if clobbered in the function or when the function has calls, are save/restored irrespective of whether the IPRA is enabled/disabled. This test is found to be not save/restore the return address registers, when clobbered in the function, with the corresponding downstream changes of D114652. The test could not be reduced further as the register allocator needs enough register pressure so that it allocates the return address registers as well. Reviewed By: arsenm Differential Revision: https://reviews.llvm.org/D120922
1 parent 8d0c34f commit e1069c1

File tree

1 file changed

+199
-0
lines changed

1 file changed

+199
-0
lines changed
Lines changed: 199 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,199 @@
1+
; RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs -enable-ipra=1 < %s | FileCheck -check-prefix=GCN %s
2+
; RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs -enable-ipra=0 < %s | FileCheck -check-prefix=GCN %s
3+
4+
; This test is to make sure the return address registers, if clobbered in the
5+
; function or the function has calls, are save/restored when IPRA is enabled/disabled.
6+
7+
; TODO: An artificial test with high register pressure would be more reliable in the
8+
; long run as branches on constants could be fragile.
9+
10+
%struct.ShaderData = type { <3 x float>, <3 x float>, <3 x float>, <3 x float>, i32, i32, i32, i32, i32, float, float, i32, i32, float, float, %struct.differential3, %struct.differential3, %struct.differential, %struct.differential, <3 x float>, <3 x float>, <3 x float>, %struct.differential3, i32, i32, i32, float, <3 x float>, <3 x float>, <3 x float>, [1 x %struct.ShaderClosure] }
11+
%struct.differential = type { float, float }
12+
%struct.differential3 = type { <3 x float>, <3 x float> }
13+
%struct.ShaderClosure = type { <3 x float>, i32, float, <3 x float>, [10 x float], [8 x i8] }
14+
%struct.MicrofacetExtra = type { <3 x float>, <3 x float>, <3 x float>, float, [12 x i8] }
15+
16+
; Function Attrs: nofree nosync nounwind readnone speculatable willreturn
17+
declare float @llvm.fmuladd.f32(float, float, float) #0
18+
19+
; Function Attrs: nofree nosync nounwind readnone speculatable willreturn
20+
declare <3 x float> @llvm.fmuladd.v3f32(<3 x float>, <3 x float>, <3 x float>) #0
21+
22+
; Function Attrs: nofree nosync nounwind readnone speculatable willreturn
23+
declare <4 x float> @llvm.fmuladd.v4f32(<4 x float>, <4 x float>, <4 x float>) #0
24+
25+
; Function Attrs: argmemonly nofree nosync nounwind willreturn
26+
declare void @llvm.lifetime.end.p5i8(i64 immarg, i8 addrspace(5)* nocapture) #1
27+
28+
; Function Attrs: norecurse
29+
define internal fastcc void @svm_node_closure_bsdf(%struct.ShaderData addrspace(1)* %sd, float* %stack, <4 x i32> %node, i32* %offset, i32 %0, i8 %trunc, float %1, float %2, float %mul80, i1 %cmp412.old, <4 x i32> %3, float %4, i32 %5, i1 %cmp440, i1 %cmp442, i1 %or.cond1306, float %.op, %struct.ShaderClosure addrspace(1)* %arrayidx.i.i2202, %struct.ShaderClosure addrspace(1)* %retval.0.i.i22089, %struct.ShaderClosure addrspace(1)* %retval.1.i221310, i1 %cmp575, i32 addrspace(1)* %num_closure_left.i2215, i32 %6, i1 %cmp.i2216, i32 %7, i64 %idx.ext.i2223, i32 %sub5.i2221) #2 {
30+
; GCN-LABEL: {{^}}svm_node_closure_bsdf:
31+
; GCN-NOT: s30,
32+
; GCN-NOT: s31,
33+
; GCN: s_waitcnt vmcnt(0)
34+
; GCN: s_setpc_b64 s[30:31]
35+
; GCN: .size svm_node_closure_bsdf
36+
entry:
37+
%8 = extractelement <4 x i32> %node, i64 0
38+
%cmp.i.not = icmp eq i32 undef, 0
39+
br i1 undef, label %common.ret.critedge, label %cond.true
40+
41+
cond.true: ; preds = %entry
42+
%9 = load float, float* null, align 4
43+
%phi.cmp = fcmp oeq float %9, 0.000000e+00
44+
br i1 %phi.cmp, label %common.ret, label %cond.true20
45+
46+
cond.true20: ; preds = %cond.true
47+
%trunc1 = trunc i32 %0 to i8
48+
switch i8 %trunc, label %common.ret [
49+
i8 44, label %sw.bb
50+
i8 0, label %if.end.i.i2285
51+
]
52+
53+
sw.bb: ; preds = %cond.true20
54+
%10 = load float, float* null, align 4
55+
%11 = load float, float* null, align 4
56+
%12 = tail call float @llvm.amdgcn.fmed3.f32(float %1, float 0.000000e+00, float 0.000000e+00)
57+
%mul802 = fmul nsz float %1, 0.000000e+00
58+
%cmp412.old3 = fcmp nsz ogt float %1, 0.000000e+00
59+
br i1 %cmp412.old, label %if.then413, label %common.ret
60+
61+
if.then413: ; preds = %sw.bb
62+
%13 = load <4 x i32>, <4 x i32> addrspace(1)* null, align 16
63+
%14 = extractelement <4 x i32> %node, i64 0
64+
%cmp4404 = fcmp nsz ole float %1, 0.000000e+00
65+
%cmp4425 = icmp eq i32 %0, 0
66+
%or.cond13066 = select i1 %cmp412.old, i1 false, i1 %cmp412.old
67+
br i1 %or.cond1306, label %if.then443, label %if.else568
68+
69+
if.then443: ; preds = %if.then413
70+
br i1 true, label %if.end511, label %common.ret
71+
72+
common.ret.critedge: ; preds = %entry
73+
store i32 0, i32* null, align 4
74+
br label %common.ret
75+
76+
common.ret: ; preds = %if.end.i.i2285, %if.end627.sink.split, %cond.end579, %bsdf_alloc.exit2188, %if.end511, %common.ret.critedge, %if.then443, %sw.bb, %cond.true20, %cond.true
77+
ret void
78+
79+
if.end511: ; preds = %if.then443
80+
br i1 false, label %common.ret, label %if.then519
81+
82+
if.then519: ; preds = %if.end511
83+
br i1 false, label %bsdf_alloc.exit2188, label %if.then.i2172
84+
85+
if.then.i2172: ; preds = %if.then519
86+
br i1 false, label %closure_alloc.exit.i2184, label %if.end.i.i2181
87+
88+
if.end.i.i2181: ; preds = %if.then.i2172
89+
br label %closure_alloc.exit.i2184
90+
91+
closure_alloc.exit.i2184: ; preds = %if.end.i.i2181, %if.then.i2172
92+
br i1 false, label %bsdf_alloc.exit2188, label %if.end.i2186
93+
94+
if.end.i2186: ; preds = %closure_alloc.exit.i2184
95+
br label %bsdf_alloc.exit2188
96+
97+
bsdf_alloc.exit2188: ; preds = %if.end.i2186, %closure_alloc.exit.i2184, %if.then519
98+
br i1 false, label %common.ret, label %if.then534
99+
100+
if.then534: ; preds = %bsdf_alloc.exit2188
101+
%.op7 = fmul nsz float undef, 0.000000e+00
102+
%mul558 = select i1 %cmp440, float 0.000000e+00, float %1
103+
%15 = tail call float @llvm.amdgcn.fmed3.f32(float 0.000000e+00, float 0.000000e+00, float 0.000000e+00)
104+
store float %mul558, float addrspace(1)* null, align 4
105+
br label %if.end627.sink.split
106+
107+
if.else568: ; preds = %if.then413
108+
br i1 undef, label %bsdf_alloc.exit2214, label %if.then.i2198
109+
110+
if.then.i2198: ; preds = %if.else568
111+
br i1 undef, label %closure_alloc.exit.i2210, label %if.end.i.i2207
112+
113+
if.end.i.i2207: ; preds = %if.then.i2198
114+
%arrayidx.i.i22028 = getelementptr inbounds %struct.ShaderData, %struct.ShaderData addrspace(1)* %sd, i64 0, i32 30, i64 undef
115+
br label %closure_alloc.exit.i2210
116+
117+
closure_alloc.exit.i2210: ; preds = %if.end.i.i2207, %if.then.i2198
118+
%retval.0.i.i220899 = phi %struct.ShaderClosure addrspace(1)* [ %arrayidx.i.i2202, %if.end.i.i2207 ], [ null, %if.then.i2198 ]
119+
br i1 false, label %bsdf_alloc.exit2214, label %if.end.i2212
120+
121+
if.end.i2212: ; preds = %closure_alloc.exit.i2210
122+
br label %bsdf_alloc.exit2214
123+
124+
bsdf_alloc.exit2214: ; preds = %if.end.i2212, %closure_alloc.exit.i2210, %if.else568
125+
%retval.1.i22131010 = phi %struct.ShaderClosure addrspace(1)* [ %arrayidx.i.i2202, %if.end.i2212 ], [ null, %closure_alloc.exit.i2210 ], [ null, %if.else568 ]
126+
%cmp57511 = icmp ne %struct.ShaderClosure addrspace(1)* %arrayidx.i.i2202, null
127+
br i1 %cmp442, label %cond.true576, label %cond.end579
128+
129+
cond.true576: ; preds = %bsdf_alloc.exit2214
130+
%num_closure_left.i221512 = getelementptr inbounds %struct.ShaderData, %struct.ShaderData addrspace(1)* %sd, i64 0, i32 25
131+
%16 = load i32, i32 addrspace(1)* %num_closure_left.i2215, align 8
132+
%cmp.i221613 = icmp slt i32 %0, 0
133+
br i1 %cmp440, label %cond.end579, label %if.end.i2227
134+
135+
if.end.i2227: ; preds = %cond.true576
136+
%sub5.i222114 = add nuw nsw i32 %0, 0
137+
%17 = load i32, i32 addrspace(1)* null, align 4294967296
138+
%idx.ext.i222315 = sext i32 %0 to i64
139+
%add.ptr.i2224 = getelementptr inbounds %struct.ShaderData, %struct.ShaderData addrspace(1)* %sd, i64 0, i32 30, i64 %idx.ext.i2223
140+
%idx.ext8.i22252724 = zext i32 %0 to i64
141+
%add.ptr9.i2226 = getelementptr inbounds %struct.ShaderClosure, %struct.ShaderClosure addrspace(1)* %add.ptr.i2224, i64 %idx.ext8.i22252724
142+
%phi.cast2731 = bitcast %struct.ShaderClosure addrspace(1)* %add.ptr9.i2226 to %struct.MicrofacetExtra addrspace(1)*
143+
br label %cond.end579
144+
145+
cond.end579: ; preds = %if.end.i2227, %cond.true576, %bsdf_alloc.exit2214
146+
%cond580 = phi %struct.MicrofacetExtra addrspace(1)* [ null, %bsdf_alloc.exit2214 ], [ %phi.cast2731, %if.end.i2227 ], [ null, %cond.true576 ]
147+
%tobool583 = icmp ne %struct.MicrofacetExtra addrspace(1)* %cond580, null
148+
%or.cond1308 = select i1 %cmp442, i1 %tobool583, i1 false
149+
br i1 %or.cond1308, label %if.then584, label %common.ret
150+
151+
if.then584: ; preds = %cond.end579
152+
store %struct.MicrofacetExtra addrspace(1)* null, %struct.MicrofacetExtra addrspace(1)* addrspace(1)* null, align 4294967296
153+
br label %if.end627.sink.split
154+
155+
if.end627.sink.split: ; preds = %if.then584, %if.then534
156+
store i32 0, i32 addrspace(1)* null, align 4
157+
br label %common.ret
158+
159+
if.end.i.i2285: ; preds = %cond.true20
160+
store i32 0, i32 addrspace(1)* null, align 4294967296
161+
br label %common.ret
162+
}
163+
164+
define internal fastcc void @svm_eval_nodes(%struct.ShaderData addrspace(1)* %sd) {
165+
sw.bb10:
166+
; GCN-LABEL: {{^}}svm_eval_nodes:
167+
; GCN-DAG: v_writelane_b32 [[CSR_VGPR:v[0-9]+]], s30,
168+
; GCN-DAG: v_writelane_b32 [[CSR_VGPR]], s31,
169+
; GCN: s_swappc_b64 s[30:31]
170+
; GCN-DAG: v_readlane_b32 s4, [[CSR_VGPR]],
171+
; GCN-DAG: v_readlane_b32 s5, [[CSR_VGPR]],
172+
; GCN: s_waitcnt vmcnt(0)
173+
; GCN: s_setpc_b64 s[4:5]
174+
call fastcc void @svm_node_closure_bsdf(%struct.ShaderData addrspace(1)* null, float* null, <4 x i32> zeroinitializer, i32* null, i32 undef, i8 undef, float undef, float undef, float undef, i1 undef, <4 x i32> undef, float undef, i32 undef, i1 undef, i1 undef, i1 undef, float undef, %struct.ShaderClosure addrspace(1)* undef, %struct.ShaderClosure addrspace(1)* undef, %struct.ShaderClosure addrspace(1)* undef, i1 undef, i32 addrspace(1)* undef, i32 undef, i1 undef, i32 undef, i64 undef, i32 undef)
175+
ret void
176+
}
177+
178+
define amdgpu_kernel void @kernel_ocl_path_trace_shadow_blocked_dl() {
179+
kernel_set_buffer_pointers.exit:
180+
; GCN-LABEL: {{^}}kernel_ocl_path_trace_shadow_blocked_dl:
181+
; GCN: s_swappc_b64 s[30:31]
182+
; GCN: endpgm
183+
tail call fastcc void @svm_eval_nodes(%struct.ShaderData addrspace(1)* null)
184+
ret void
185+
}
186+
187+
; Function Attrs: nofree nosync nounwind readnone speculatable willreturn
188+
declare float @llvm.fabs.f32(float) #0
189+
190+
; Function Attrs: nofree nosync nounwind readnone speculatable willreturn
191+
declare float @llvm.maxnum.f32(float, float) #0
192+
193+
; Function Attrs: nounwind readnone speculatable willreturn
194+
declare float @llvm.amdgcn.fmed3.f32(float, float, float) #3
195+
196+
attributes #0 = { nofree nosync nounwind readnone speculatable willreturn }
197+
attributes #1 = { argmemonly nofree nosync nounwind willreturn }
198+
attributes #2 = { norecurse }
199+
attributes #3 = { nounwind readnone speculatable willreturn }

0 commit comments

Comments
 (0)