@@ -2034,9 +2034,9 @@ void CodeGen::genFMAIntrinsic(GenTreeHWIntrinsic* node)
2034
2034
NamedIntrinsic intrinsicId = node->GetHWIntrinsicId ();
2035
2035
var_types baseType = node->GetSimdBaseType ();
2036
2036
emitAttr attr = emitActualTypeSize (Compiler::getSIMDTypeForSize (node->GetSimdSize ()));
2037
- instruction ins = HWIntrinsicInfo::lookupIns (intrinsicId, baseType); // 213 form
2038
- instruction _132form = (instruction)(ins - 1 );
2039
- instruction _231form = (instruction)(ins + 1 );
2037
+ instruction _213form = HWIntrinsicInfo::lookupIns (intrinsicId, baseType); // 213 form
2038
+ instruction _132form = (instruction)(_213form - 1 );
2039
+ instruction _231form = (instruction)(_213form + 1 );
2040
2040
GenTree* op1 = node->Op (1 );
2041
2041
GenTree* op2 = node->Op (2 );
2042
2042
GenTree* op3 = node->Op (3 );
@@ -2058,57 +2058,81 @@ void CodeGen::genFMAIntrinsic(GenTreeHWIntrinsic* node)
2058
2058
// Intrinsics with CopyUpperBits semantics cannot have op1 be contained
2059
2059
assert (!copiesUpperBits || !op1->isContained ());
2060
2060
2061
+ // We need to keep this in sync with lsraxarch.cpp
2062
+ // Ideally we'd actually swap the operands in lsra and simplify codegen
2063
+ // but its a bit more complicated to do so for many operands as well
2064
+ // as being complicated to tell codegen how to pick the right instruction
2065
+
2066
+ instruction ins = INS_invalid;
2067
+
2061
2068
if (op1->isContained () || op1->isUsedFromSpillTemp ())
2062
2069
{
2070
+ // targetReg == op3NodeReg or targetReg == ?
2071
+ // op3 = ([op1] * op2) + op3
2072
+ // 231 form: XMM1 = (XMM2 * [XMM3]) + XMM1
2073
+ ins = _231form;
2074
+ std::swap (emitOp1, emitOp3);
2075
+
2063
2076
if (targetReg == op2NodeReg)
2064
2077
{
2065
- std::swap (emitOp1, emitOp2);
2066
2078
// op2 = ([op1] * op2) + op3
2067
2079
// 132 form: XMM1 = (XMM1 * [XMM3]) + XMM2
2068
2080
ins = _132form;
2069
- std::swap (emitOp2, emitOp3 );
2081
+ std::swap (emitOp1, emitOp2 );
2070
2082
}
2071
- else
2083
+ }
2084
+ else if (op3->isContained () || op3->isUsedFromSpillTemp ())
2085
+ {
2086
+ // targetReg could be op1NodeReg, op2NodeReg, or not equal to any op
2087
+ // op1 = (op1 * op2) + [op3] or op2 = (op1 * op2) + [op3]
2088
+ // ? = (op1 * op2) + [op3] or ? = (op1 * op2) + op3
2089
+ // 213 form: XMM1 = (XMM2 * XMM1) + [XMM3]
2090
+ ins = _213form;
2091
+
2092
+ if (!copiesUpperBits && (targetReg == op2NodeReg))
2072
2093
{
2073
- // targetReg == op3NodeReg or targetReg == ?
2074
- // op3 = ([op1] * op2) + op3
2075
- // 231 form: XMM1 = (XMM2 * [XMM3]) + XMM1
2076
- ins = _231form;
2077
- std::swap (emitOp1, emitOp3);
2094
+ // op2 = (op1 * op2) + [op3]
2095
+ // 213 form: XMM1 = (XMM2 * XMM1) + [XMM3]
2096
+ std::swap (emitOp1, emitOp2);
2078
2097
}
2079
2098
}
2080
2099
else if (op2->isContained () || op2->isUsedFromSpillTemp ())
2081
2100
{
2101
+ // targetReg == op1NodeReg or targetReg == ?
2102
+ // op1 = (op1 * [op2]) + op3
2103
+ // 132 form: XMM1 = (XMM1 * [XMM3]) + XMM2
2104
+ ins = _132form;
2105
+ std::swap (emitOp2, emitOp3);
2106
+
2082
2107
if (!copiesUpperBits && (targetReg == op3NodeReg))
2083
2108
{
2084
2109
// op3 = (op1 * [op2]) + op3
2085
2110
// 231 form: XMM1 = (XMM2 * [XMM3]) + XMM1
2086
2111
ins = _231form;
2087
- std::swap (emitOp1, emitOp3);
2088
- }
2089
- else
2090
- {
2091
- // targetReg == op1NodeReg or targetReg == ?
2092
- // op1 = (op1 * [op2]) + op3
2093
- // 132 form: XMM1 = (XMM1 * [XMM3]) + XMM2
2094
- ins = _132form;
2112
+ std::swap (emitOp1, emitOp2);
2095
2113
}
2096
- std::swap (emitOp2, emitOp3);
2097
2114
}
2098
2115
else
2099
2116
{
2100
- // targetReg could be op1NodeReg, op2NodeReg, or not equal to any op
2101
- // op1 = (op1 * op2) + [op3] or op2 = (op1 * op2) + [op3]
2102
- // ? = (op1 * op2) + [op3] or ? = (op1 * op2) + op3
2103
- // 213 form: XMM1 = (XMM2 * XMM1) + [XMM3]
2104
- if (!copiesUpperBits && (targetReg == op2NodeReg))
2117
+ // containedOpNum == 0
2118
+ // no extra work when resultOpNum is 0 or 1
2119
+ if (targetReg == op2NodeReg)
2105
2120
{
2106
- // op2 = (op1 * op2) + [op3]
2107
- // 213 form: XMM1 = (XMM2 * XMM1) + [XMM3]
2121
+ ins = _213form;
2108
2122
std::swap (emitOp1, emitOp2);
2109
2123
}
2124
+ else if (targetReg == op3NodeReg)
2125
+ {
2126
+ ins = _231form;
2127
+ std::swap (emitOp1, emitOp3);
2128
+ }
2129
+ else
2130
+ {
2131
+ ins = _213form;
2132
+ }
2110
2133
}
2111
2134
2135
+ assert (ins != INS_invalid);
2112
2136
genHWIntrinsic_R_R_R_RM (ins, attr, targetReg, emitOp1->GetRegNum (), emitOp2->GetRegNum (), emitOp3);
2113
2137
genProduceReg (node);
2114
2138
}
0 commit comments