Skip to content

Commit a10f795

Browse files
committed
Use FMA for v8f16
1 parent 2bf7173 commit a10f795

File tree

5 files changed

+58
-27
lines changed

5 files changed

+58
-27
lines changed

llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -317,10 +317,11 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering(
317317
setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, T, Custom);
318318
}
319319

320+
if (Subtarget->hasFP16()) {
321+
setOperationAction(ISD::FMA, MVT::v8f16, Legal);
322+
}
323+
320324
if (Subtarget->hasRelaxedSIMD()) {
321-
if (Subtarget->hasFP16()) {
322-
setOperationAction(ISD::FMULADD, MVT::v8f16, Legal);
323-
}
324325
setOperationAction(ISD::FMULADD, MVT::v4f32, Legal);
325326
setOperationAction(ISD::FMULADD, MVT::v2f64, Legal);
326327
}
@@ -1128,6 +1129,18 @@ WebAssemblyTargetLowering::getPreferredVectorAction(MVT VT) const {
11281129
return TargetLoweringBase::getPreferredVectorAction(VT);
11291130
}
11301131

1132+
bool WebAssemblyTargetLowering::isFMAFasterThanFMulAndFAdd(
1133+
const MachineFunction &MF, EVT VT) const {
1134+
if (!Subtarget->hasFP16() || !VT.isVector())
1135+
return false;
1136+
1137+
EVT ScalarVT = VT.getScalarType();
1138+
if (!ScalarVT.isSimple())
1139+
return false;
1140+
1141+
return ScalarVT.getSimpleVT().SimpleTy == MVT::f16;
1142+
}
1143+
11311144
bool WebAssemblyTargetLowering::shouldSimplifyDemandedVectorElts(
11321145
SDValue Op, const TargetLoweringOpt &TLO) const {
11331146
// ISel process runs DAGCombiner after legalization; this step is called

llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,8 @@ class WebAssemblyTargetLowering final : public TargetLowering {
8181

8282
TargetLoweringBase::LegalizeTypeAction
8383
getPreferredVectorAction(MVT VT) const override;
84+
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
85+
EVT VT) const override;
8486

8587
SDValue LowerCall(CallLoweringInfo &CLI,
8688
SmallVectorImpl<SDValue> &InVals) const override;

llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td

Lines changed: 25 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1583,7 +1583,8 @@ defm "" : RelaxedConvert<I32x4, F64x2, int_wasm_relaxed_trunc_unsigned_zero,
15831583
// Relaxed (Negative) Multiply-Add (madd/nmadd)
15841584
//===----------------------------------------------------------------------===//
15851585

1586-
multiclass SIMDMADD<Vec vec, bits<32> simdopA, bits<32> simdopS, list<Predicate> reqs> {
1586+
multiclass RELAXED_SIMDMADD<Vec vec, bits<32> simdopA, bits<32> simdopS,
1587+
list<Predicate> reqs> {
15871588
defm MADD_#vec :
15881589
SIMD_I<(outs V128:$dst), (ins V128:$a, V128:$b, V128:$c), (outs), (ins),
15891590
[(set (vec.vt V128:$dst), (int_wasm_relaxed_madd
@@ -1608,9 +1609,29 @@ multiclass SIMDMADD<Vec vec, bits<32> simdopA, bits<32> simdopS, list<Predicate>
16081609
(!cast<Instruction>("NMADD_"#vec) V128:$a, V128:$b, V128:$c)>, Requires<reqs>;
16091610
}
16101611

1611-
defm "" : SIMDMADD<F32x4, 0x105, 0x106, [HasRelaxedSIMD]>;
1612-
defm "" : SIMDMADD<F64x2, 0x107, 0x108, [HasRelaxedSIMD]>;
1613-
defm "" : SIMDMADD<F16x8, 0x14e, 0x14f, [HasRelaxedSIMD, HasFP16]>;
1612+
defm "" : RELAXED_SIMDMADD<F32x4, 0x105, 0x106, [HasRelaxedSIMD]>;
1613+
defm "" : RELAXED_SIMDMADD<F64x2, 0x107, 0x108, [HasRelaxedSIMD]>;
1614+
1615+
//===----------------------------------------------------------------------===//
1616+
// FP16 (Negative) Multiply-Add (madd/nmadd)
1617+
//===----------------------------------------------------------------------===//
1618+
1619+
multiclass HALF_PRECISION_SIMDMADD<Vec vec, bits<32> simdopA, bits<32> simdopS,
1620+
list<Predicate> reqs> {
1621+
defm MADD_#vec :
1622+
SIMD_I<(outs V128:$dst), (ins V128:$a, V128:$b, V128:$c), (outs), (ins),
1623+
[(set (vec.vt V128:$dst), (fma
1624+
(vec.vt V128:$a), (vec.vt V128:$b), (vec.vt V128:$c)))],
1625+
vec.prefix#".madd\t$dst, $a, $b, $c",
1626+
vec.prefix#".madd", simdopA, reqs>;
1627+
defm NMADD_#vec :
1628+
SIMD_I<(outs V128:$dst), (ins V128:$a, V128:$b, V128:$c), (outs), (ins),
1629+
[(set (vec.vt V128:$dst), (fma
1630+
(fneg (vec.vt V128:$a)), (vec.vt V128:$b), (vec.vt V128:$c)))],
1631+
vec.prefix#".nmadd\t$dst, $a, $b, $c",
1632+
vec.prefix#".nmadd", simdopS, reqs>;
1633+
}
1634+
defm "" : HALF_PRECISION_SIMDMADD<F16x8, 0x14e, 0x14f, [HasFP16]>;
16141635

16151636
//===----------------------------------------------------------------------===//
16161637
// Laneselect

llvm/test/CodeGen/WebAssembly/simd-relaxed-fma.ll

Lines changed: 9 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -417,15 +417,14 @@ define <8 x half> @fadd_fmul_contract_8xf16(<8 x half> %a, <8 x half> %b, <8 x h
417417
; RELAXED-LABEL: fadd_fmul_contract_8xf16:
418418
; RELAXED: .functype fadd_fmul_contract_8xf16 (v128, v128, v128) -> (v128)
419419
; RELAXED-NEXT: # %bb.0:
420-
; RELAXED-NEXT: f16x8.relaxed_madd $push0=, $1, $0, $2
420+
; RELAXED-NEXT: f16x8.madd $push0=, $1, $0, $2
421421
; RELAXED-NEXT: return $pop0
422422
;
423423
; STRICT-LABEL: fadd_fmul_contract_8xf16:
424424
; STRICT: .functype fadd_fmul_contract_8xf16 (v128, v128, v128) -> (v128)
425425
; STRICT-NEXT: # %bb.0:
426-
; STRICT-NEXT: f16x8.mul $push0=, $1, $0
427-
; STRICT-NEXT: f16x8.add $push1=, $pop0, $2
428-
; STRICT-NEXT: return $pop1
426+
; STRICT-NEXT: f16x8.madd $push0=, $1, $0, $2
427+
; STRICT-NEXT: return $pop0
429428
;
430429
; NOFP16-LABEL: fadd_fmul_contract_8xf16:
431430
; NOFP16: .functype fadd_fmul_contract_8xf16 (i32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32) -> ()
@@ -648,15 +647,14 @@ define <8 x half> @fmuladd_contract_8xf16(<8 x half> %a, <8 x half> %b, <8 x hal
648647
; RELAXED-LABEL: fmuladd_contract_8xf16:
649648
; RELAXED: .functype fmuladd_contract_8xf16 (v128, v128, v128) -> (v128)
650649
; RELAXED-NEXT: # %bb.0:
651-
; RELAXED-NEXT: f16x8.relaxed_madd $push0=, $0, $1, $2
650+
; RELAXED-NEXT: f16x8.madd $push0=, $0, $1, $2
652651
; RELAXED-NEXT: return $pop0
653652
;
654653
; STRICT-LABEL: fmuladd_contract_8xf16:
655654
; STRICT: .functype fmuladd_contract_8xf16 (v128, v128, v128) -> (v128)
656655
; STRICT-NEXT: # %bb.0:
657-
; STRICT-NEXT: f16x8.mul $push0=, $0, $1
658-
; STRICT-NEXT: f16x8.add $push1=, $pop0, $2
659-
; STRICT-NEXT: return $pop1
656+
; STRICT-NEXT: f16x8.madd $push0=, $0, $1, $2
657+
; STRICT-NEXT: return $pop0
660658
;
661659
; NOFP16-LABEL: fmuladd_contract_8xf16:
662660
; NOFP16: .functype fmuladd_contract_8xf16 (i32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32) -> ()
@@ -835,15 +833,14 @@ define <8 x half> @fmuladd_8xf16(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
835833
; RELAXED-LABEL: fmuladd_8xf16:
836834
; RELAXED: .functype fmuladd_8xf16 (v128, v128, v128) -> (v128)
837835
; RELAXED-NEXT: # %bb.0:
838-
; RELAXED-NEXT: f16x8.relaxed_madd $push0=, $0, $1, $2
836+
; RELAXED-NEXT: f16x8.madd $push0=, $0, $1, $2
839837
; RELAXED-NEXT: return $pop0
840838
;
841839
; STRICT-LABEL: fmuladd_8xf16:
842840
; STRICT: .functype fmuladd_8xf16 (v128, v128, v128) -> (v128)
843841
; STRICT-NEXT: # %bb.0:
844-
; STRICT-NEXT: f16x8.mul $push0=, $0, $1
845-
; STRICT-NEXT: f16x8.add $push1=, $pop0, $2
846-
; STRICT-NEXT: return $pop1
842+
; STRICT-NEXT: f16x8.madd $push0=, $0, $1, $2
843+
; STRICT-NEXT: return $pop0
847844
;
848845
; NOFP16-LABEL: fmuladd_8xf16:
849846
; NOFP16: .functype fmuladd_8xf16 (i32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32) -> ()

llvm/test/CodeGen/WebAssembly/simd-relaxed-fnma.ll

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -46,15 +46,14 @@ define <8 x half> @fsub_fmul_contract_8xf16(<8 x half> %a, <8 x half> %b, <8 x h
4646
; RELAXED-LABEL: fsub_fmul_contract_8xf16:
4747
; RELAXED: .functype fsub_fmul_contract_8xf16 (v128, v128, v128) -> (v128)
4848
; RELAXED-NEXT: # %bb.0:
49-
; RELAXED-NEXT: f16x8.relaxed_nmadd $push0=, $1, $0, $2
49+
; RELAXED-NEXT: f16x8.nmadd $push0=, $1, $0, $2
5050
; RELAXED-NEXT: return $pop0
5151
;
5252
; STRICT-LABEL: fsub_fmul_contract_8xf16:
5353
; STRICT: .functype fsub_fmul_contract_8xf16 (v128, v128, v128) -> (v128)
5454
; STRICT-NEXT: # %bb.0:
55-
; STRICT-NEXT: f16x8.mul $push0=, $1, $0
56-
; STRICT-NEXT: f16x8.sub $push1=, $2, $pop0
57-
; STRICT-NEXT: return $pop1
55+
; STRICT-NEXT: f16x8.nmadd $push0=, $1, $0, $2
56+
; STRICT-NEXT: return $pop0
5857
%mul = fmul contract <8 x half> %b, %a
5958
%sub = fsub contract <8 x half> %c, %mul
6059
ret <8 x half> %sub
@@ -147,15 +146,14 @@ define <8 x half> @fmuladd_8xf16(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
147146
; RELAXED-LABEL: fmuladd_8xf16:
148147
; RELAXED: .functype fmuladd_8xf16 (v128, v128, v128) -> (v128)
149148
; RELAXED-NEXT: # %bb.0:
150-
; RELAXED-NEXT: f16x8.relaxed_nmadd $push0=, $0, $1, $2
149+
; RELAXED-NEXT: f16x8.nmadd $push0=, $0, $1, $2
151150
; RELAXED-NEXT: return $pop0
152151
;
153152
; STRICT-LABEL: fmuladd_8xf16:
154153
; STRICT: .functype fmuladd_8xf16 (v128, v128, v128) -> (v128)
155154
; STRICT-NEXT: # %bb.0:
156-
; STRICT-NEXT: f16x8.mul $push0=, $0, $1
157-
; STRICT-NEXT: f16x8.sub $push1=, $2, $pop0
158-
; STRICT-NEXT: return $pop1
155+
; STRICT-NEXT: f16x8.nmadd $push0=, $0, $1, $2
156+
; STRICT-NEXT: return $pop0
159157
%fneg = fneg <8 x half> %a
160158
%fma = call <8 x half> @llvm.fmuladd(<8 x half> %fneg, <8 x half> %b, <8 x half> %c)
161159
ret <8 x half> %fma

0 commit comments

Comments
 (0)