-
Notifications
You must be signed in to change notification settings - Fork 13.3k
[PowerPC] hoist xxspltiw instruction out of the loop with FMA mutation pass. #111696
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
@llvm/pr-subscribers-backend-powerpc Author: zhijian lin (diggerlin) ChangesFull diff: https://github.com/llvm/llvm-project/pull/111696.diff 2 Files Affected:
diff --git a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
index 7d0455942923dd..a9e8d038ffd8bd 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -572,7 +572,8 @@ void PPCPassConfig::addMachineSSAOptimization() {
void PPCPassConfig::addPreRegAlloc() {
if (getOptLevel() != CodeGenOptLevel::None) {
initializePPCVSXFMAMutatePass(*PassRegistry::getPassRegistry());
- insertPass(VSXFMAMutateEarly ? &RegisterCoalescerID : &MachineSchedulerID,
+ insertPass(VSXFMAMutateEarly ? &TwoAddressInstructionPassID
+ : &MachineSchedulerID,
&PPCVSXFMAMutateID);
}
diff --git a/llvm/test/CodeGen/PowerPC/vsx-fma-m-early.ll b/llvm/test/CodeGen/PowerPC/vsx-fma-m-early.ll
new file mode 100644
index 00000000000000..fa86fe7664e41c
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/vsx-fma-m-early.ll
@@ -0,0 +1,77 @@
+; RUN: llc -verify-machineinstrs -mcpu=pwr10 -disable-ppc-vsx-fma-mutation=false -ppc-asm-full-reg-names -schedule-ppc-vsx-fma-mutation-early < %s | \
+; RUN: FileCheck --check-prefix=CHECK-M %s
+
+; RUN: llc -verify-machineinstrs -mcpu=pwr10 -disable-ppc-vsx-fma-mutation=false -ppc-asm-full-reg-names < %s | \
+; RUN: FileCheck --check-prefix=CHECK-A %s
+
+target triple = "powerpc64-ibm-aix7.2.0.0"
+define void @vsexp(ptr noalias nocapture noundef writeonly %__output_a, ptr noalias nocapture noundef readonly %var1321In_a, ptr noalias nocapture noundef readonly %n) local_unnamed_addr #0 {
+entry:
+ %0 = load i32, ptr %n, align 4
+ %cmp11 = icmp sgt i32 %0, 0
+ br i1 %cmp11, label %for.body.preheader, label %for.end
+
+for.body.preheader: ; preds = %entry
+ %wide.trip.count = zext i32 %0 to i64
+ br label %for.body
+
+for.body: ; preds = %for.body.preheader, %for.body
+ %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
+ %1 = shl nsw i64 %indvars.iv, 2
+ %add.ptr = getelementptr inbounds float, ptr %var1321In_a, i64 %1
+ %add.ptr.val = load <4 x float>, ptr %add.ptr, align 1
+ %2 = tail call contract <4 x float> @llvm.fma.v4f32(<4 x float> %add.ptr.val, <4 x float> <float 0x3FF7154760000000, float 0x3FF7154760000000, float 0x3FF7154760000000, float 0x3FF7154760000000>, <4 x float> <float 6.270500e+03, float 6.270500e+03, float 6.270500e+03, float 6.270500e+03>)
+ %add.ptr6 = getelementptr inbounds float, ptr %__output_a, i64 %1
+ store <4 x float> %2, ptr %add.ptr6, align 1
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count
+ br i1 %exitcond.not, label %for.end, label %for.body
+
+for.end: ; preds = %for.body, %entry
+ ret void
+}
+
+; Function Attrs: mustprogress nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>)
+
+; CHECK-M: .csect ..text..[PR],5{{[[:space:]].*}}.vsexp:
+; CHECK-M-NEXT: # %bb.0: # %entry
+; CHECK-M-NEXT: lwz r5, 0(r5)
+; CHECK-M-NEXT: cmpwi r5, 1
+; CHECK-M-NEXT: bltlr cr0
+; CHECK-M-NEXT: # %bb.1: # %for.body.preheader
+; CHECK-M-NEXT: xxspltiw vs0, 1069066811
+; CHECK-M-NEXT: xxspltiw vs1, 1170469888
+; CHECK-M-NEXT: mtctr r5
+; CHECK-M-NEXT: li r5, 0
+; CHECK-M-NEXT: .align 5
+; CHECK-M-NEXT: L..BB0_2: # %for.body
+; CHECK-M-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-M-NEXT: lxvx vs2, r4, r5
+; CHECK-M-NEXT: xvmaddmsp vs2, vs0, vs1
+; CHECK-M-NEXT: stxvx vs2, r3, r5
+; CHECK-M-NEXT: addi r5, r5, 16
+; CHECK-M-NEXT: bdnz L..BB0_2
+; CHECK-M-NEXT: # %bb.3: # %for.end
+; CHECK-M-NEXT: blr
+
+; CHECK-A: .csect ..text..[PR],5{{[[:space:]].*}}.vsexp:
+; CHECK-A-NEXT: # %bb.0: # %entry
+; CHECK-A-NEXT: lwz r5, 0(r5)
+; CHECK-A-NEXT: cmpwi r5, 1
+; CHECK-A-NEXT: bltlr cr0
+; CHECK-A-NEXT: # %bb.1: # %for.body.preheader
+; CHECK-A-NEXT: xxspltiw vs0, 1069066811
+; CHECK-A-NEXT: mtctr r5
+; CHECK-A-NEXT: li r5, 0
+; CHECK-A-NEXT: .align 5
+; CHECK-A-NEXT: L..BB0_2: # %for.body
+; CHECK-A-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-A-NEXT: lxvx vs1, r4, r5
+; CHECK-A-NEXT: xxspltiw vs2, 1170469888
+; CHECK-A-NEXT: xvmaddasp vs2, vs1, vs0
+; CHECK-A-NEXT: stxvx vs2, r3, r5
+; CHECK-A-NEXT: addi r5, r5, 16
+; CHECK-A-NEXT: bdnz L..BB0_2
+; CHECK-A-NEXT: # %bb.3: # %for.end
+; CHECK-A-NEXT: blr
|
PowerPC VSX FMA Mutation pass
head of Register Coalescer pass
PowerPC VSX FMA Mutation pass
head of Register Coalescer pass
for option -schedule-ppc-vsx-fma-mutation-early
The description for this PR is way too long to be used as a commit message. I think it's better if you put the details in the issue this PR fixes (open one if there isn't one) and provide a more general commit message here. |
PowerPC VSX FMA Mutation pass
head of Register Coalescer pass
for option -schedule-ppc-vsx-fma-mutation-early
PowerPC VSX FMA Mutation
pass ahead of the Register Coalescer pas
s when the -schedule-ppc-vsx-fma-mutation-early option is enabled.
PowerPC VSX FMA Mutation
pass ahead of the Register Coalescer pas
s when the -schedule-ppc-vsx-fma-mutation-early option is enabled.-schedule-ppc-vsx-fma-mutation-early
-schedule-ppc-vsx-fma-mutation-early
-schedule-ppc-vsx-fma-mutation-early
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I agree with Lei, the description is a bit too long and should concisely summarize the patch.
b3f6120
to
811c86c
Compare
✅ With the latest revision this PR passed the C/C++ code formatter. |
811c86c
to
c99bff3
Compare
-schedule-ppc-vsx-fma-mutation-early
Add a pre- commit test case for Patch #111696 Test ppc-vsx-fma-mutate pass work with -schedule-ppc-vsx-fma-mutation-early not hoist the instruction `xxspltiw vs2, 1170469888` out the loop. --------- Co-authored-by: Amy Kwan <amy.kwan1@ibm.com>
c99bff3
to
3e605b1
Compare
Summary:
The patch fixes the issue [PowerPC] missing VSX FMA Mutation optimize in some case for option -schedule-ppc-vsx-fma-mutation-early #111906
The Register Coalescer pass, which eliminates COPY instructions, can prevent the PowerPC VSX FMA Mutation pass from converting a COPY adjacent to an XSMADDADP into a single XSMADDMDP instruction in some cases. This results in the xxspltiw instruction being hoisted out of the loop.
This patch allows the VSX FMA Mutation pass to run before the Register Coalescer pass to address the problem.