Skip to content

Commit 6ff3fef

Browse files
committed
add a pre- commit test case for Patch llvm#111696
1 parent dba8acd commit 6ff3fef

File tree

1 file changed

+84
-0
lines changed

1 file changed

+84
-0
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
;; Test ppc-vsx-fma-mutate pass with -schedule-ppc-vsx-fma-mutation-early do not hosit some xxspltiw instruction.
2+
; RUN: llc -verify-machineinstrs -mcpu=pwr10 -disable-ppc-vsx-fma-mutation=false \
3+
; RUN: -ppc-asm-full-reg-names -schedule-ppc-vsx-fma-mutation-early \
4+
; RUN: -mtriple powerpc64-ibm-aix < %s | FileCheck --check-prefix=CHECK64-M %s
5+
6+
; RUN: llc -verify-machineinstrs -mcpu=pwr10 -disable-ppc-vsx-fma-mutation=false \
7+
; RUN: -ppc-asm-full-reg-names -schedule-ppc-vsx-fma-mutation-early \
8+
; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck --check-prefix=CHECK64-M %s
9+
10+
; RUN: llc -verify-machineinstrs -mcpu=pwr10 -disable-ppc-vsx-fma-mutation=false \
11+
; RUN: -ppc-asm-full-reg-names -schedule-ppc-vsx-fma-mutation-early \
12+
; RUN: -mtriple powerpc-ibm-aix < %s | FileCheck --check-prefix=CHECK32-M %s
13+
14+
define void @vsexp(ptr noalias nocapture noundef writeonly %__output_a, ptr noalias nocapture noundef readonly %var1321In_a, ptr noalias nocapture noundef readonly %n) {
15+
entry:
16+
%0 = load i32, ptr %n, align 4
17+
%cmp11 = icmp sgt i32 %0, 0
18+
br i1 %cmp11, label %for.body.preheader, label %for.end
19+
20+
for.body.preheader:
21+
%wide.trip.count = zext i32 %0 to i64
22+
br label %for.body
23+
24+
for.body:
25+
%indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
26+
%1 = shl nsw i64 %indvars.iv, 2
27+
%add.ptr = getelementptr inbounds float, ptr %var1321In_a, i64 %1
28+
%add.ptr.val = load <4 x float>, ptr %add.ptr, align 1
29+
%2 = tail call contract <4 x float> @llvm.fma.v4f32(<4 x float> %add.ptr.val, <4 x float> <float 0x3FF7154760000000, float 0x3FF7154760000000, float 0x3FF7154760000000, float 0x3FF7154760000000>, <4 x float> <float 6.270500e+03, float 6.270500e+03, float 6.270500e+03, float 6.270500e+03>)
30+
%add.ptr6 = getelementptr inbounds float, ptr %__output_a, i64 %1
31+
store <4 x float> %2, ptr %add.ptr6, align 1
32+
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
33+
%exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count
34+
br i1 %exitcond.not, label %for.end, label %for.body
35+
36+
for.end:
37+
ret void
38+
}
39+
40+
; Function Attrs: mustprogress nocallback nofree nosync nounwind speculatable willreturn memory(none)
41+
declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>)
42+
43+
; CHECK64-M: # %bb.0: # %entry
44+
; CHECK64-M-NEXT: lwz r5, 0(r5)
45+
; CHECK64-M-NEXT: cmpwi r5, 1
46+
; CHECK64-M-NEXT: bltlr cr0
47+
; CHECK64-M-NEXT: # %bb.1: # %for.body.preheader
48+
; CHECK64-M-NEXT: xxspltiw vs0, 1069066811
49+
; CHECK64-M-NEXT: mtctr r5
50+
; CHECK64-M-NEXT: li r5, 0
51+
; CHECK64-M-NEXT: {{.*}}align 5
52+
; CHECK64-M-NEXT: [[L2:.*]]: # %for.body
53+
; CHECK64-M-NEXT: # =>This Inner Loop Header: Depth=1
54+
; CHECK64-M-NEXT: lxvx vs1, r4, r5
55+
; CHECK64-M-NEXT: xxspltiw vs2, 1170469888
56+
; CHECK64-M-NEXT: xvmaddasp vs2, vs1, vs0
57+
; CHECK64-M-NEXT: stxvx vs2, r3, r5
58+
; CHECK64-M-NEXT: addi r5, r5, 16
59+
; CHECK64-M-NEXT: bdnz [[L2]]
60+
; CHECK64-M-NEXT: # %bb.3: # %for.end
61+
; CHECK64-M-NEXT: blr
62+
63+
; CHECK32-M: .vsexp:
64+
; CHECK32-M-NEXT: # %bb.0: # %entry
65+
; CHECK32-M-NEXT: lwz r5, 0(r5)
66+
; CHECK32-M-NEXT: cmpwi r5, 0
67+
; CHECK32-M-NEXT: blelr cr0
68+
; CHECK32-M-NEXT: # %bb.1: # %for.body.preheader
69+
; CHECK32-M-NEXT: xxspltiw vs0, 1069066811
70+
; CHECK32-M-NEXT: li r6, 0
71+
; CHECK32-M-NEXT: li r7, 0
72+
; CHECK32-M-NEXT: .align 4
73+
; CHECK32-M-NEXT: L..BB0_2: # %for.body
74+
; CHECK32-M-NEXT: # =>This Inner Loop Header: Depth=1
75+
; CHECK32-M-NEXT: slwi r8, r7, 4
76+
; CHECK32-M-NEXT: xxspltiw vs2, 1170469888
77+
; CHECK32-M-NEXT: addic r7, r7, 1
78+
; CHECK32-M-NEXT: addze r6, r6
79+
; CHECK32-M-NEXT: lxvx vs1, r4, r8
80+
; CHECK32-M-NEXT: xvmaddasp vs2, vs1, vs0
81+
; CHECK32-M-NEXT: stxvx vs2, r3, r8
82+
; CHECK32-M-NEXT: xor r8, r7, r5
83+
; CHECK32-M-NEXT: or. r8, r8, r6
84+
; CHECK32-M-NEXT: bne cr0, L..BB0_2

0 commit comments

Comments
 (0)