-
Notifications
You must be signed in to change notification settings - Fork 14.1k
[LegalizeVectorOps][PowerPC] Use xor to expand fneg. #106595
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
This preserves the semantis of fneg and matches what we do in LegalizeDAG. I kept the legal FSUB check to force unrolling for some targets that don't have FSUB but have XOR like AArch64.
@llvm/pr-subscribers-backend-powerpc @llvm/pr-subscribers-llvm-selectiondag Author: Craig Topper (topperc) ChangesThis preserves the semantis of fneg and matches what we do in LegalizeDAG. I kept the legal FSUB check to force unrolling for some targets that don't have FSUB but have XOR. On Aarch64 using xor broke some tests that expected to see a (v1f64 (fma (insertvector_elt (f64 (fneg (extractvectorelt X)))))) pattern. Full diff: https://github.com/llvm/llvm-project/pull/106595.diff 5 Files Affected:
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index 3f104baed97b1a..2557fa288606e7 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -1669,12 +1669,18 @@ void VectorLegalizer::ExpandUINT_TO_FLOAT(SDNode *Node,
}
SDValue VectorLegalizer::ExpandFNEG(SDNode *Node) {
- if (TLI.isOperationLegalOrCustom(ISD::FSUB, Node->getValueType(0))) {
+ EVT VT = Node->getValueType(0);
+ EVT IntVT = VT.changeVectorElementTypeToInteger();
+
+ // FIXME: The FSUB check is here to force unrolling v1f64 vectors on AArch64.
+ if (TLI.isOperationLegalOrCustom(ISD::XOR, IntVT) &&
+ TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) {
SDLoc DL(Node);
- SDValue Zero = DAG.getConstantFP(-0.0, DL, Node->getValueType(0));
- // TODO: If FNEG had fast-math-flags, they'd get propagated to this FSUB.
- return DAG.getNode(ISD::FSUB, DL, Node->getValueType(0), Zero,
- Node->getOperand(0));
+ SDValue Cast = DAG.getNode(ISD::BITCAST, DL, IntVT, Node->getOperand(0));
+ SDValue SignMask = DAG.getConstant(
+ APInt::getSignMask(IntVT.getScalarSizeInBits()), DL, IntVT);
+ SDValue Xor = DAG.getNode(ISD::XOR, DL, IntVT, Cast, SignMask);
+ return DAG.getNode(ISD::BITCAST, DL, VT, Xor);
}
return DAG.UnrollVectorOp(Node);
}
diff --git a/llvm/test/CodeGen/PowerPC/fma-negate.ll b/llvm/test/CodeGen/PowerPC/fma-negate.ll
index 22118c44ece706..1f8e0968ca98ea 100644
--- a/llvm/test/CodeGen/PowerPC/fma-negate.ll
+++ b/llvm/test/CodeGen/PowerPC/fma-negate.ll
@@ -155,7 +155,7 @@ define <4 x float> @test_neg_fma_v4f32(<4 x float> %a, <4 x float> %b,
; NO-VSX: # %bb.0: # %entry
; NO-VSX-NEXT: vspltisb 5, -1
; NO-VSX-NEXT: vslw 5, 5, 5
-; NO-VSX-NEXT: vsubfp 2, 5, 2
+; NO-VSX-NEXT: vxor 2, 2, 5
; NO-VSX-NEXT: vmaddfp 2, 2, 3, 4
; NO-VSX-NEXT: blr
<4 x float> %c) {
diff --git a/llvm/test/CodeGen/PowerPC/fp-strict.ll b/llvm/test/CodeGen/PowerPC/fp-strict.ll
index 124c588ba242c1..d3025f1da658af 100644
--- a/llvm/test/CodeGen/PowerPC/fp-strict.ll
+++ b/llvm/test/CodeGen/PowerPC/fp-strict.ll
@@ -915,7 +915,7 @@ define <4 x float> @fmsub_v4f32(<4 x float> %vf0, <4 x float> %vf1, <4 x float>
; NOVSX-NEXT: vslw v5, v5, v5
; NOVSX-NEXT: stvx v3, 0, r3
; NOVSX-NEXT: addi r3, r1, -64
-; NOVSX-NEXT: vsubfp v4, v5, v4
+; NOVSX-NEXT: vxor v4, v4, v5
; NOVSX-NEXT: stvx v2, 0, r3
; NOVSX-NEXT: addi r3, r1, -32
; NOVSX-NEXT: stvx v4, 0, r3
@@ -1213,7 +1213,7 @@ define <4 x float> @fnmadd_v4f32(<4 x float> %vf0, <4 x float> %vf1, <4 x float>
; NOVSX-NEXT: fmadds f0, f2, f1, f0
; NOVSX-NEXT: stfs f0, -16(r1)
; NOVSX-NEXT: lvx v2, 0, r3
-; NOVSX-NEXT: vsubfp v2, v3, v2
+; NOVSX-NEXT: vxor v2, v2, v3
; NOVSX-NEXT: blr
;
; SPE-LABEL: fnmadd_v4f32:
@@ -1462,7 +1462,7 @@ define <4 x float> @fnmsub_v4f32(<4 x float> %vf0, <4 x float> %vf1, <4 x float>
; NOVSX-NEXT: vslw v5, v5, v5
; NOVSX-NEXT: stvx v3, 0, r3
; NOVSX-NEXT: addi r3, r1, -64
-; NOVSX-NEXT: vsubfp v4, v5, v4
+; NOVSX-NEXT: vxor v4, v4, v5
; NOVSX-NEXT: stvx v2, 0, r3
; NOVSX-NEXT: addi r3, r1, -32
; NOVSX-NEXT: stvx v4, 0, r3
@@ -1488,7 +1488,7 @@ define <4 x float> @fnmsub_v4f32(<4 x float> %vf0, <4 x float> %vf1, <4 x float>
; NOVSX-NEXT: fmadds f0, f1, f0, f2
; NOVSX-NEXT: stfs f0, -16(r1)
; NOVSX-NEXT: lvx v2, 0, r3
-; NOVSX-NEXT: vsubfp v2, v5, v2
+; NOVSX-NEXT: vxor v2, v2, v5
; NOVSX-NEXT: blr
;
; SPE-LABEL: fnmsub_v4f32:
diff --git a/llvm/test/CodeGen/PowerPC/vec_abs.ll b/llvm/test/CodeGen/PowerPC/vec_abs.ll
index f7ff18f3ce1790..50dcfc3faf62e9 100644
--- a/llvm/test/CodeGen/PowerPC/vec_abs.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_abs.ll
@@ -44,7 +44,7 @@ define <4 x float> @test2_float(<4 x float> %aa) #0 {
; CHECK-NOVSX: fabs
; CHECK-NOVSX: fabs
; CHECK-NOVSX: fabs
-; CHECK-NOVSX: vsubfp
+; CHECK-NOVSX: vxor
; CHECK-NOVSX: blr
define <2 x double> @test_double(<2 x double> %aa) #0 {
diff --git a/llvm/test/CodeGen/PowerPC/vec_fneg.ll b/llvm/test/CodeGen/PowerPC/vec_fneg.ll
index 2854a31cad9e17..bbbdd45cbb01ac 100644
--- a/llvm/test/CodeGen/PowerPC/vec_fneg.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_fneg.ll
@@ -15,7 +15,7 @@ define void @test_float(ptr %A) {
; CHECK: xvnegsp
; CHECK: blr
-; CHECK-NOVSX: vsubfp
+; CHECK-NOVSX: vxor
; CHECK-NOVSX: blr
}
|
This preserves the semantis of fneg and matches what we do in LegalizeDAG.
I kept the legal FSUB check to force unrolling for some targets that don't have FSUB but have XOR. On Aarch64 using xor broke some tests that expected to see a (v1f64 (fma (insertvector_elt (f64 (fneg (extractvectorelt X)))))) pattern.