Skip to content

Commit 3e605b1

Browse files
diggerlinamy-kwan
andauthored
[NFC] Add a pre-commit test case for #111696 (#136730)
Add a pre- commit test case for Patch #111696 Test ppc-vsx-fma-mutate pass work with -schedule-ppc-vsx-fma-mutation-early not hoist the instruction `xxspltiw vs2, 1170469888` out the loop. --------- Co-authored-by: Amy Kwan <amy.kwan1@ibm.com>
1 parent 52a9649 commit 3e605b1

File tree

1 file changed

+178
-0
lines changed

1 file changed

+178
-0
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,178 @@
1+
;; Tests that the ppc-vsx-fma-mutate pass with the schedule-ppc-vsx-fma-mutation-early pass does not hoist xxspltiw out of loops.
2+
; RUN: llc -verify-machineinstrs -mcpu=pwr10 -disable-ppc-vsx-fma-mutation=false \
3+
; RUN: -ppc-asm-full-reg-names -schedule-ppc-vsx-fma-mutation-early \
4+
; RUN: -mtriple powerpc64-ibm-aix < %s | FileCheck --check-prefixes=CHECK64,AIX64 %s
5+
6+
; RUN: llc -verify-machineinstrs -mcpu=pwr10 -disable-ppc-vsx-fma-mutation=false \
7+
; RUN: -ppc-asm-full-reg-names -schedule-ppc-vsx-fma-mutation-early \
8+
; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck --check-prefixes=CHECK64,LINUX64 %s
9+
10+
; RUN: llc -verify-machineinstrs -mcpu=pwr10 -disable-ppc-vsx-fma-mutation=false \
11+
; RUN: -ppc-asm-full-reg-names -schedule-ppc-vsx-fma-mutation-early \
12+
; RUN: -mtriple powerpc-ibm-aix < %s | FileCheck --check-prefix=CHECK32 %s
13+
14+
define void @bar(ptr noalias nocapture noundef writeonly %__output_a, ptr noalias nocapture noundef readonly %var1321In_a, ptr noalias nocapture noundef readonly %n) {
15+
entry:
16+
%0 = load i32, ptr %n, align 4
17+
%cmp11 = icmp sgt i32 %0, 0
18+
br i1 %cmp11, label %for.body.preheader, label %for.end
19+
20+
for.body.preheader:
21+
%wide.trip.count = zext i32 %0 to i64
22+
br label %for.body
23+
24+
for.body:
25+
%indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
26+
%1 = shl nsw i64 %indvars.iv, 2
27+
%add.ptr = getelementptr inbounds float, ptr %var1321In_a, i64 %1
28+
%add.ptr.val = load <4 x float>, ptr %add.ptr, align 1
29+
%2 = tail call contract <4 x float> @llvm.fma.v4f32(<4 x float> %add.ptr.val, <4 x float> <float 0x3FF7154760000000, float 0x3FF7154760000000, float 0x3FF7154760000000, float 0x3FF7154760000000>, <4 x float> <float 6.270500e+03, float 6.270500e+03, float 6.270500e+03, float 6.270500e+03>)
30+
%add.ptr6 = getelementptr inbounds float, ptr %__output_a, i64 %1
31+
store <4 x float> %2, ptr %add.ptr6, align 1
32+
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
33+
%exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count
34+
br i1 %exitcond.not, label %for.end, label %for.body
35+
36+
for.end:
37+
ret void
38+
}
39+
40+
define void @foo(i1 %cmp97) #0 {
41+
entry:
42+
br i1 %cmp97, label %for.body, label %for.end
43+
44+
for.body: ; preds = %for.body, %entry
45+
%0 = phi float [ %vecext.i, %for.body ], [ 0.000000e+00, %entry ]
46+
%splat.splatinsert.i = insertelement <4 x float> zeroinitializer, float %0, i64 0
47+
%1 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %splat.splatinsert.i, <4 x float> zeroinitializer, <4 x float> splat (float 6.270500e+03))
48+
%2 = tail call <4 x i32> @llvm.ppc.vsx.xvcmpgtsp(<4 x float> zeroinitializer, <4 x float> %splat.splatinsert.i)
49+
%3 = bitcast <4 x float> %1 to <4 x i32>
50+
%and1.i8896 = and <4 x i32> %2, %3
51+
%4 = bitcast <4 x i32> %and1.i8896 to <4 x float>
52+
%vecext.i = extractelement <4 x float> %4, i64 0
53+
br label %for.body
54+
55+
for.end: ; preds = %entry
56+
ret void
57+
}
58+
59+
; Function Attrs: mustprogress nocallback nofree nosync nounwind speculatable willreturn memory(none)
60+
declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>)
61+
62+
; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none)
63+
declare <4 x i32> @llvm.ppc.vsx.xvcmpgtsp(<4 x float>, <4 x float>)
64+
65+
; CHECK64: bar:
66+
; CHECK64: # %bb.0: # %entry
67+
; CHECK64-NEXT: lwz r5, 0(r5)
68+
; CHECK64-NEXT: cmpwi r5, 1
69+
; CHECK64-NEXT: bltlr cr0
70+
; CHECK64-NEXT: # %bb.1: # %for.body.preheader
71+
; CHECK64-NEXT: xxspltiw vs0, 1069066811
72+
; CHECK64-NEXT: mtctr r5
73+
; CHECK64-NEXT: li r5, 0
74+
; CHECK64-NEXT: {{.*}}align 5
75+
; CHECK64-NEXT: [[L2_bar:.*]]: # %for.body
76+
; CHECK64-NEXT: # =>This Inner Loop Header: Depth=1
77+
; CHECK64-NEXT: lxvx vs1, r4, r5
78+
; CHECK64-NEXT: xxspltiw vs2, 1170469888
79+
; CHECK64-NEXT: xvmaddasp vs2, vs1, vs0
80+
; CHECK64-NEXT: stxvx vs2, r3, r5
81+
; CHECK64-NEXT: addi r5, r5, 16
82+
; CHECK64-NEXT: bdnz [[L2_bar]]
83+
; CHECK64-NEXT: # %bb.3: # %for.end
84+
; CHECK64-NEXT: blr
85+
86+
; AIX64: .foo:
87+
; AIX64-NEXT: # %bb.0: # %entry
88+
; AIX64-NEXT: andi. r3, r3, 1
89+
; AIX64-NEXT: bclr 4, gt, 0
90+
; AIX64-NEXT: # %bb.1: # %for.body.preheader
91+
; AIX64-NEXT: xxlxor f0, f0, f0
92+
; AIX64-NEXT: xxlxor vs1, vs1, vs1
93+
; AIX64-NEXT: xxlxor f2, f2, f2
94+
; AIX64-NEXT: .align 4
95+
; AIX64-NEXT: L..BB1_2: # %for.body
96+
; AIX64-NEXT: # =>This Inner Loop Header: Depth=1
97+
; AIX64-NEXT: xxmrghd vs2, vs2, vs0
98+
; AIX64-NEXT: xvcvdpsp vs34, vs2
99+
; AIX64-NEXT: xxmrghd vs2, vs0, vs0
100+
; AIX64-NEXT: xvcvdpsp vs35, vs2
101+
; AIX64-NEXT: xxspltiw vs2, 1170469888
102+
; AIX64-NEXT: vmrgew v2, v2, v3
103+
; AIX64-NEXT: xvcmpgtsp vs3, vs1, vs34
104+
; AIX64-NEXT: xvmaddasp vs2, vs34, vs1
105+
; AIX64-NEXT: xxland vs2, vs3, vs2
106+
; AIX64-NEXT: xscvspdpn f2, vs2
107+
; AIX64-NEXT: b L..BB1_2
108+
109+
; LINUX64: foo: # @foo
110+
; LINUX64-NEXT: .Lfunc_begin1:
111+
; LINUX64-NEXT: .cfi_startproc
112+
; LINUX64-NEXT: # %bb.0: # %entry
113+
; LINUX64-NEXT: andi. r3, r3, 1
114+
; LINUX64-NEXT: bclr 4, gt, 0
115+
; LINUX64-NEXT: # %bb.1: # %for.body.preheader
116+
; LINUX64-NEXT: xxlxor f0, f0, f0
117+
; LINUX64-NEXT: xxlxor vs1, vs1, vs1
118+
; LINUX64-NEXT: xxlxor f2, f2, f2
119+
; LINUX64-NEXT: .p2align 4
120+
; LINUX64-NEXT: .LBB1_2: # %for.body
121+
; LINUX64-NEXT: # =>This Inner Loop Header: Depth=1
122+
; LINUX64-NEXT: xxmrghd vs2, vs0, vs2
123+
; LINUX64-NEXT: xvcvdpsp vs34, vs2
124+
; LINUX64-NEXT: xxspltd vs2, vs0, 0
125+
; LINUX64-NEXT: xvcvdpsp vs35, vs2
126+
; LINUX64-NEXT: xxspltiw vs2, 1170469888
127+
; LINUX64-NEXT: vmrgew v2, v3, v2
128+
; LINUX64-NEXT: xvcmpgtsp vs3, vs1, vs34
129+
; LINUX64-NEXT: xvmaddasp vs2, vs34, vs1
130+
; LINUX64-NEXT: xxland vs2, vs3, vs2
131+
; LINUX64-NEXT: xxsldwi vs2, vs2, vs2, 3
132+
; LINUX64-NEXT: xscvspdpn f2, vs2
133+
; LINUX64-NEXT: b .LBB1_2
134+
135+
; CHECK32: .bar:
136+
; CHECK32-NEXT: # %bb.0: # %entry
137+
; CHECK32-NEXT: lwz r5, 0(r5)
138+
; CHECK32-NEXT: cmpwi r5, 0
139+
; CHECK32-NEXT: blelr cr0
140+
; CHECK32-NEXT: # %bb.1: # %for.body.preheader
141+
; CHECK32-NEXT: xxspltiw vs0, 1069066811
142+
; CHECK32-NEXT: li r6, 0
143+
; CHECK32-NEXT: li r7, 0
144+
; CHECK32-NEXT: .align 4
145+
; CHECK32-NEXT: [[L2_foo:.*]]: # %for.body
146+
; CHECK32-NEXT: # =>This Inner Loop Header: Depth=1
147+
; CHECK32-NEXT: slwi r8, r7, 4
148+
; CHECK32-NEXT: xxspltiw vs2, 1170469888
149+
; CHECK32-NEXT: addic r7, r7, 1
150+
; CHECK32-NEXT: addze r6, r6
151+
; CHECK32-NEXT: lxvx vs1, r4, r8
152+
; CHECK32-NEXT: xvmaddasp vs2, vs1, vs0
153+
; CHECK32-NEXT: stxvx vs2, r3, r8
154+
; CHECK32-NEXT: xor r8, r7, r5
155+
; CHECK32-NEXT: or. r8, r8, r6
156+
; CHECK32-NEXT: bne cr0, [[L2_foo]]
157+
158+
; CHECK32: .foo:
159+
; CHECK32-NEXT: # %bb.0: # %entry
160+
; CHECK32-NEXT: andi. r3, r3, 1
161+
; CHECK32-NEXT: bclr 4, gt, 0
162+
; CHECK32-NEXT: # %bb.1: # %for.body.preheader
163+
; CHECK32-NEXT: lwz r3, L..C0(r2) # %const.0
164+
; CHECK32-NEXT: xxlxor f1, f1, f1
165+
; CHECK32-NEXT: xxlxor vs0, vs0, vs0
166+
; CHECK32-NEXT: xscvdpspn vs35, f1
167+
; CHECK32-NEXT: lxv vs34, 0(r3)
168+
; CHECK32-NEXT: .align 4
169+
; CHECK32-NEXT: L..BB1_2: # %for.body
170+
; CHECK32-NEXT: # =>This Inner Loop Header: Depth=1
171+
; CHECK32-NEXT: xscvdpspn vs36, f1
172+
; CHECK32-NEXT: xxspltiw vs1, 1170469888
173+
; CHECK32-NEXT: vperm v4, v4, v3, v2
174+
; CHECK32-NEXT: xvcmpgtsp vs2, vs0, vs36
175+
; CHECK32-NEXT: xvmaddasp vs1, vs36, vs0
176+
; CHECK32-NEXT: xxland vs1, vs2, vs1
177+
; CHECK32-NEXT: xscvspdpn f1, vs1
178+
; CHECK32-NEXT: b L..BB1_2

0 commit comments

Comments
 (0)