|
30 | 30 | // Altivec transformation functions and pattern fragments. |
31 | 31 | // |
32 | 32 |
|
| 33 | +// fneg is not legal, and desugared as an xor. |
| 34 | +def desugared_fneg : PatFrag<(ops node:$x), (v4f32 (bitconvert (xor (bitconvert $x), |
| 35 | + (int_ppc_altivec_vslw (bitconvert (v16i8 immAllOnesV)), |
| 36 | + (bitconvert (v16i8 immAllOnesV))))))>; |
| 37 | + |
33 | 38 | def vpkuhum_shuffle : PatFrag<(ops node:$lhs, node:$rhs), |
34 | 39 | (vector_shuffle node:$lhs, node:$rhs), [{ |
35 | 40 | return PPC::isVPKUHUMShuffleMask(cast<ShuffleVectorSDNode>(N), 0, *CurDAG); |
@@ -467,11 +472,12 @@ def VMADDFP : VAForm_1<46, (outs vrrc:$RT), (ins vrrc:$RA, vrrc:$RC, vrrc:$RB), |
467 | 472 | [(set v4f32:$RT, |
468 | 473 | (fma v4f32:$RA, v4f32:$RC, v4f32:$RB))]>; |
469 | 474 |
|
470 | | -// FIXME: The fma+fneg pattern won't match because fneg is not legal. |
| 475 | +// fneg is not legal, hence we have to match on the desugared version. |
471 | 476 | def VNMSUBFP: VAForm_1<47, (outs vrrc:$RT), (ins vrrc:$RA, vrrc:$RC, vrrc:$RB), |
472 | 477 | "vnmsubfp $RT, $RA, $RC, $RB", IIC_VecFP, |
473 | | - [(set v4f32:$RT, (fneg (fma v4f32:$RA, v4f32:$RC, |
474 | | - (fneg v4f32:$RB))))]>; |
| 478 | + [(set v4f32:$RT, (desugared_fneg (fma v4f32:$RA, v4f32:$RC, |
| 479 | + (desugared_fneg v4f32:$RB))))]>; |
| 480 | + |
475 | 481 | let hasSideEffects = 1 in { |
476 | 482 | def VMHADDSHS : VA1a_Int_Ty<32, "vmhaddshs", int_ppc_altivec_vmhaddshs, v8i16>; |
477 | 483 | def VMHRADDSHS : VA1a_Int_Ty<33, "vmhraddshs", int_ppc_altivec_vmhraddshs, |
@@ -892,6 +898,13 @@ def : Pat<(mul v8i16:$vA, v8i16:$vB), (VMLADDUHM $vA, $vB, (v8i16(V_SET0H)))>; |
892 | 898 | // Add |
893 | 899 | def : Pat<(add (mul v8i16:$vA, v8i16:$vB), v8i16:$vC), (VMLADDUHM $vA, $vB, $vC)>; |
894 | 900 |
|
| 901 | + |
| 902 | +// Fused negated multiply-subtract |
| 903 | +def : Pat<(v4f32 (desugared_fneg |
| 904 | + (int_ppc_altivec_vmaddfp v4f32:$RA, v4f32:$RC, |
| 905 | + (desugared_fneg v4f32:$RB)))), |
| 906 | + (VNMSUBFP $RA, $RC, $RB)>; |
| 907 | + |
895 | 908 | // Saturating adds/subtracts. |
896 | 909 | def : Pat<(v16i8 (saddsat v16i8:$vA, v16i8:$vB)), (v16i8 (VADDSBS $vA, $vB))>; |
897 | 910 | def : Pat<(v16i8 (uaddsat v16i8:$vA, v16i8:$vB)), (v16i8 (VADDUBS $vA, $vB))>; |
|
0 commit comments