Skip to content

Commit 62ac736

Browse files
author
Hal Finkel
committed
Optionally enable more-aggressive FMA formation in DAGCombine
The heuristic used by DAGCombine to form FMAs checks that the FMUL has only one use, but this is overly-conservative on some systems. Specifically, if the FMA and the FADD have the same latency (and the FMA does not compete for resources with the FMUL any more than the FADD does), there is no need for the restriction, and furthermore, forming the FMA leaving the FMUL can still allow for higher overall throughput and decreased critical-path length. Here we add a new TLI callback, enableAggressiveFMAFusion, false by default, to elide the hasOneUse check. This is enabled for PowerPC by default, as most PowerPC systems will benefit. Patch by Olivier Sallenave, thanks! llvm-svn: 218120
1 parent 3549ea1 commit 62ac736

File tree

5 files changed

+45
-8
lines changed

5 files changed

+45
-8
lines changed

llvm/include/llvm/Target/TargetLowering.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -268,6 +268,13 @@ class TargetLoweringBase {
268268
return HasFloatingPointExceptions;
269269
}
270270

271+
/// Return true if target always beneficiates from combining into FMA for a
272+
/// given value type. This must typically return false on targets where FMA
273+
/// takes more cycles to execute than FADD.
274+
virtual bool enableAggressiveFMAFusion(EVT VT) const {
275+
return false;
276+
}
277+
271278
/// Return the ValueType of the result of SETCC operations. Also used to
272279
/// obtain the target's preferred type for the condition operand of SELECT and
273280
/// BRCOND nodes. In the case of BRCOND the argument passed is MVT::Other

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6684,13 +6684,15 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
66846684
(!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT))) {
66856685

66866686
// fold (fadd (fmul x, y), z) -> (fma x, y, z)
6687-
if (N0.getOpcode() == ISD::FMUL && N0->hasOneUse())
6687+
if (N0.getOpcode() == ISD::FMUL &&
6688+
(N0->hasOneUse() || TLI.enableAggressiveFMAFusion(VT)))
66886689
return DAG.getNode(ISD::FMA, SDLoc(N), VT,
66896690
N0.getOperand(0), N0.getOperand(1), N1);
66906691

66916692
// fold (fadd x, (fmul y, z)) -> (fma y, z, x)
66926693
// Note: Commutes FADD operands.
6693-
if (N1.getOpcode() == ISD::FMUL && N1->hasOneUse())
6694+
if (N1.getOpcode() == ISD::FMUL &&
6695+
(N1->hasOneUse() || TLI.enableAggressiveFMAFusion(VT)))
66946696
return DAG.getNode(ISD::FMA, SDLoc(N), VT,
66956697
N1.getOperand(0), N1.getOperand(1), N0);
66966698
}
@@ -6762,14 +6764,16 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
67626764
(!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT))) {
67636765

67646766
// fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
6765-
if (N0.getOpcode() == ISD::FMUL && N0->hasOneUse())
6767+
if (N0.getOpcode() == ISD::FMUL &&
6768+
(N0->hasOneUse() || TLI.enableAggressiveFMAFusion(VT)))
67666769
return DAG.getNode(ISD::FMA, dl, VT,
67676770
N0.getOperand(0), N0.getOperand(1),
67686771
DAG.getNode(ISD::FNEG, dl, VT, N1));
67696772

67706773
// fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
67716774
// Note: Commutes FSUB operands.
6772-
if (N1.getOpcode() == ISD::FMUL && N1->hasOneUse())
6775+
if (N1.getOpcode() == ISD::FMUL &&
6776+
(N1->hasOneUse() || TLI.enableAggressiveFMAFusion(VT)))
67736777
return DAG.getNode(ISD::FMA, dl, VT,
67746778
DAG.getNode(ISD::FNEG, dl, VT,
67756779
N1.getOperand(0)),
@@ -6778,7 +6782,8 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
67786782
// fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
67796783
if (N0.getOpcode() == ISD::FNEG &&
67806784
N0.getOperand(0).getOpcode() == ISD::FMUL &&
6781-
N0->hasOneUse() && N0.getOperand(0).hasOneUse()) {
6785+
((N0->hasOneUse() && N0.getOperand(0).hasOneUse()) ||
6786+
TLI.enableAggressiveFMAFusion(VT))) {
67826787
SDValue N00 = N0.getOperand(0).getOperand(0);
67836788
SDValue N01 = N0.getOperand(0).getOperand(1);
67846789
return DAG.getNode(ISD::FMA, dl, VT,

llvm/lib/Target/PowerPC/PPCISelLowering.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -826,6 +826,11 @@ EVT PPCTargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
826826
return VT.changeVectorElementTypeToInteger();
827827
}
828828

829+
bool PPCTargetLowering::enableAggressiveFMAFusion(EVT VT) const {
830+
assert(VT.isFloatingPoint() && "Non-floating-point FMA?");
831+
return true;
832+
}
833+
829834
//===----------------------------------------------------------------------===//
830835
// Node matching predicates, for use by the tblgen matching code.
831836
//===----------------------------------------------------------------------===//

llvm/lib/Target/PowerPC/PPCISelLowering.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -360,6 +360,11 @@ namespace llvm {
360360
/// getSetCCResultType - Return the ISD::SETCC ValueType
361361
EVT getSetCCResultType(LLVMContext &Context, EVT VT) const override;
362362

363+
/// Return true if target always beneficiates from combining into FMA for a
364+
/// given value type. This must typically return false on targets where FMA
365+
/// takes more cycles to execute than FADD.
366+
bool enableAggressiveFMAFusion(EVT VT) const override;
367+
363368
/// getPreIndexedAddressParts - returns true by value, base pointer and
364369
/// offset pointer and addressing mode by reference if the node's address
365370
/// can be legally represented as pre-indexed load / store address.

llvm/test/CodeGen/PowerPC/fma.ll

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,12 @@
11
; RUN: llc < %s -march=ppc32 -fp-contract=fast | FileCheck %s
22

3+
declare double @dummy1(double) #0
4+
declare double @dummy2(double, double) #0
5+
declare double @dummy3(double, double, double) #0
6+
37
define double @test_FMADD1(double %A, double %B, double %C) {
48
%D = fmul double %A, %B ; <double> [#uses=1]
5-
%E = fadd double %D, %C ; <double> [#uses=1]
9+
%E = fadd double %C, %D ; <double> [#uses=1]
610
ret double %E
711
; CHECK-LABEL: test_FMADD1:
812
; CHECK: fmadd
@@ -18,15 +22,26 @@ define double @test_FMADD2(double %A, double %B, double %C) {
1822
; CHECK-NEXT: blr
1923
}
2024

21-
define double @test_FMSUB(double %A, double %B, double %C) {
25+
define double @test_FMSUB1(double %A, double %B, double %C) {
2226
%D = fmul double %A, %B ; <double> [#uses=1]
2327
%E = fsub double %D, %C ; <double> [#uses=1]
2428
ret double %E
25-
; CHECK-LABEL: test_FMSUB:
29+
; CHECK-LABEL: test_FMSUB1:
2630
; CHECK: fmsub
2731
; CHECK-NEXT: blr
2832
}
2933

34+
define double @test_FMSUB2(double %A, double %B, double %C, double %D) {
35+
%E = fmul double %A, %B ; <double> [#uses=2]
36+
%F = fadd double %E, %C ; <double> [#uses=1]
37+
%G = fsub double %E, %D ; <double> [#uses=1]
38+
%H = call double @dummy2(double %F, double %G) ; <double> [#uses=1]
39+
ret double %H
40+
; CHECK-LABEL: test_FMSUB2:
41+
; CHECK: fmadd
42+
; CHECK-NEXT: fmsub
43+
}
44+
3045
define double @test_FNMADD1(double %A, double %B, double %C) {
3146
%D = fmul double %A, %B ; <double> [#uses=1]
3247
%E = fadd double %D, %C ; <double> [#uses=1]

0 commit comments

Comments
 (0)