|
| 1 | +;; Test ppc-vsx-fma-mutate pass with -schedule-ppc-vsx-fma-mutation-early do not hosit some xxspltiw instruction. |
| 2 | +; RUN: llc -verify-machineinstrs -mcpu=pwr10 -disable-ppc-vsx-fma-mutation=false \ |
| 3 | +; RUN: -ppc-asm-full-reg-names -schedule-ppc-vsx-fma-mutation-early \ |
| 4 | +; RUN: -mtriple powerpc64-ibm-aix < %s | FileCheck --check-prefix=CHECK64-M %s |
| 5 | + |
| 6 | +; RUN: llc -verify-machineinstrs -mcpu=pwr10 -disable-ppc-vsx-fma-mutation=false \ |
| 7 | +; RUN: -ppc-asm-full-reg-names -schedule-ppc-vsx-fma-mutation-early \ |
| 8 | +; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck --check-prefix=CHECK64-M %s |
| 9 | + |
| 10 | +; RUN: llc -verify-machineinstrs -mcpu=pwr10 -disable-ppc-vsx-fma-mutation=false \ |
| 11 | +; RUN: -ppc-asm-full-reg-names -schedule-ppc-vsx-fma-mutation-early \ |
| 12 | +; RUN: -mtriple powerpc-ibm-aix < %s | FileCheck --check-prefix=CHECK32-M %s |
| 13 | + |
| 14 | +define void @vsexp(ptr noalias nocapture noundef writeonly %__output_a, ptr noalias nocapture noundef readonly %var1321In_a, ptr noalias nocapture noundef readonly %n) { |
| 15 | +entry: |
| 16 | + %0 = load i32, ptr %n, align 4 |
| 17 | + %cmp11 = icmp sgt i32 %0, 0 |
| 18 | + br i1 %cmp11, label %for.body.preheader, label %for.end |
| 19 | + |
| 20 | +for.body.preheader: |
| 21 | + %wide.trip.count = zext i32 %0 to i64 |
| 22 | + br label %for.body |
| 23 | + |
| 24 | +for.body: |
| 25 | + %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ] |
| 26 | + %1 = shl nsw i64 %indvars.iv, 2 |
| 27 | + %add.ptr = getelementptr inbounds float, ptr %var1321In_a, i64 %1 |
| 28 | + %add.ptr.val = load <4 x float>, ptr %add.ptr, align 1 |
| 29 | + %2 = tail call contract <4 x float> @llvm.fma.v4f32(<4 x float> %add.ptr.val, <4 x float> <float 0x3FF7154760000000, float 0x3FF7154760000000, float 0x3FF7154760000000, float 0x3FF7154760000000>, <4 x float> <float 6.270500e+03, float 6.270500e+03, float 6.270500e+03, float 6.270500e+03>) |
| 30 | + %add.ptr6 = getelementptr inbounds float, ptr %__output_a, i64 %1 |
| 31 | + store <4 x float> %2, ptr %add.ptr6, align 1 |
| 32 | + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 |
| 33 | + %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count |
| 34 | + br i1 %exitcond.not, label %for.end, label %for.body |
| 35 | + |
| 36 | +for.end: |
| 37 | + ret void |
| 38 | +} |
| 39 | + |
| 40 | +; Function Attrs: mustprogress nocallback nofree nosync nounwind speculatable willreturn memory(none) |
| 41 | +declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>) |
| 42 | + |
| 43 | +; CHECK64-M: # %bb.0: # %entry |
| 44 | +; CHECK64-M-NEXT: lwz r5, 0(r5) |
| 45 | +; CHECK64-M-NEXT: cmpwi r5, 1 |
| 46 | +; CHECK64-M-NEXT: bltlr cr0 |
| 47 | +; CHECK64-M-NEXT: # %bb.1: # %for.body.preheader |
| 48 | +; CHECK64-M-NEXT: xxspltiw vs0, 1069066811 |
| 49 | +; CHECK64-M-NEXT: mtctr r5 |
| 50 | +; CHECK64-M-NEXT: li r5, 0 |
| 51 | +; CHECK64-M-NEXT: {{.*}}align 5 |
| 52 | +; CHECK64-M-NEXT: [[L2:.*]]: # %for.body |
| 53 | +; CHECK64-M-NEXT: # =>This Inner Loop Header: Depth=1 |
| 54 | +; CHECK64-M-NEXT: lxvx vs1, r4, r5 |
| 55 | +; CHECK64-M-NEXT: xxspltiw vs2, 1170469888 |
| 56 | +; CHECK64-M-NEXT: xvmaddasp vs2, vs1, vs0 |
| 57 | +; CHECK64-M-NEXT: stxvx vs2, r3, r5 |
| 58 | +; CHECK64-M-NEXT: addi r5, r5, 16 |
| 59 | +; CHECK64-M-NEXT: bdnz [[L2]] |
| 60 | +; CHECK64-M-NEXT: # %bb.3: # %for.end |
| 61 | +; CHECK64-M-NEXT: blr |
| 62 | + |
| 63 | +; CHECK32-M: .vsexp: |
| 64 | +; CHECK32-M-NEXT: # %bb.0: # %entry |
| 65 | +; CHECK32-M-NEXT: lwz r5, 0(r5) |
| 66 | +; CHECK32-M-NEXT: cmpwi r5, 0 |
| 67 | +; CHECK32-M-NEXT: blelr cr0 |
| 68 | +; CHECK32-M-NEXT: # %bb.1: # %for.body.preheader |
| 69 | +; CHECK32-M-NEXT: xxspltiw vs0, 1069066811 |
| 70 | +; CHECK32-M-NEXT: li r6, 0 |
| 71 | +; CHECK32-M-NEXT: li r7, 0 |
| 72 | +; CHECK32-M-NEXT: .align 4 |
| 73 | +; CHECK32-M-NEXT: L..BB0_2: # %for.body |
| 74 | +; CHECK32-M-NEXT: # =>This Inner Loop Header: Depth=1 |
| 75 | +; CHECK32-M-NEXT: slwi r8, r7, 4 |
| 76 | +; CHECK32-M-NEXT: xxspltiw vs2, 1170469888 |
| 77 | +; CHECK32-M-NEXT: addic r7, r7, 1 |
| 78 | +; CHECK32-M-NEXT: addze r6, r6 |
| 79 | +; CHECK32-M-NEXT: lxvx vs1, r4, r8 |
| 80 | +; CHECK32-M-NEXT: xvmaddasp vs2, vs1, vs0 |
| 81 | +; CHECK32-M-NEXT: stxvx vs2, r3, r8 |
| 82 | +; CHECK32-M-NEXT: xor r8, r7, r5 |
| 83 | +; CHECK32-M-NEXT: or. r8, r8, r6 |
| 84 | +; CHECK32-M-NEXT: bne cr0, L..BB0_2 |
0 commit comments