Skip to content

Commit c9f56b8

Browse files
Ensure that FMA codegen operand swapping matches the lsra selections (#62382)
* Ensure that FMA codegen operand swapping matches the lsra selections * Ensure operands end up in the right slots
1 parent 1872ef5 commit c9f56b8

File tree

2 files changed

+57
-28
lines changed

2 files changed

+57
-28
lines changed

src/coreclr/jit/hwintrinsiccodegenxarch.cpp

Lines changed: 51 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -2034,9 +2034,9 @@ void CodeGen::genFMAIntrinsic(GenTreeHWIntrinsic* node)
20342034
NamedIntrinsic intrinsicId = node->GetHWIntrinsicId();
20352035
var_types baseType = node->GetSimdBaseType();
20362036
emitAttr attr = emitActualTypeSize(Compiler::getSIMDTypeForSize(node->GetSimdSize()));
2037-
instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType); // 213 form
2038-
instruction _132form = (instruction)(ins - 1);
2039-
instruction _231form = (instruction)(ins + 1);
2037+
instruction _213form = HWIntrinsicInfo::lookupIns(intrinsicId, baseType); // 213 form
2038+
instruction _132form = (instruction)(_213form - 1);
2039+
instruction _231form = (instruction)(_213form + 1);
20402040
GenTree* op1 = node->Op(1);
20412041
GenTree* op2 = node->Op(2);
20422042
GenTree* op3 = node->Op(3);
@@ -2058,57 +2058,81 @@ void CodeGen::genFMAIntrinsic(GenTreeHWIntrinsic* node)
20582058
// Intrinsics with CopyUpperBits semantics cannot have op1 be contained
20592059
assert(!copiesUpperBits || !op1->isContained());
20602060

2061+
// We need to keep this in sync with lsraxarch.cpp
2062+
// Ideally we'd actually swap the operands in lsra and simplify codegen
2063+
// but its a bit more complicated to do so for many operands as well
2064+
// as being complicated to tell codegen how to pick the right instruction
2065+
2066+
instruction ins = INS_invalid;
2067+
20612068
if (op1->isContained() || op1->isUsedFromSpillTemp())
20622069
{
2070+
// targetReg == op3NodeReg or targetReg == ?
2071+
// op3 = ([op1] * op2) + op3
2072+
// 231 form: XMM1 = (XMM2 * [XMM3]) + XMM1
2073+
ins = _231form;
2074+
std::swap(emitOp1, emitOp3);
2075+
20632076
if (targetReg == op2NodeReg)
20642077
{
2065-
std::swap(emitOp1, emitOp2);
20662078
// op2 = ([op1] * op2) + op3
20672079
// 132 form: XMM1 = (XMM1 * [XMM3]) + XMM2
20682080
ins = _132form;
2069-
std::swap(emitOp2, emitOp3);
2081+
std::swap(emitOp1, emitOp2);
20702082
}
2071-
else
2083+
}
2084+
else if (op3->isContained() || op3->isUsedFromSpillTemp())
2085+
{
2086+
// targetReg could be op1NodeReg, op2NodeReg, or not equal to any op
2087+
// op1 = (op1 * op2) + [op3] or op2 = (op1 * op2) + [op3]
2088+
// ? = (op1 * op2) + [op3] or ? = (op1 * op2) + op3
2089+
// 213 form: XMM1 = (XMM2 * XMM1) + [XMM3]
2090+
ins = _213form;
2091+
2092+
if (!copiesUpperBits && (targetReg == op2NodeReg))
20722093
{
2073-
// targetReg == op3NodeReg or targetReg == ?
2074-
// op3 = ([op1] * op2) + op3
2075-
// 231 form: XMM1 = (XMM2 * [XMM3]) + XMM1
2076-
ins = _231form;
2077-
std::swap(emitOp1, emitOp3);
2094+
// op2 = (op1 * op2) + [op3]
2095+
// 213 form: XMM1 = (XMM2 * XMM1) + [XMM3]
2096+
std::swap(emitOp1, emitOp2);
20782097
}
20792098
}
20802099
else if (op2->isContained() || op2->isUsedFromSpillTemp())
20812100
{
2101+
// targetReg == op1NodeReg or targetReg == ?
2102+
// op1 = (op1 * [op2]) + op3
2103+
// 132 form: XMM1 = (XMM1 * [XMM3]) + XMM2
2104+
ins = _132form;
2105+
std::swap(emitOp2, emitOp3);
2106+
20822107
if (!copiesUpperBits && (targetReg == op3NodeReg))
20832108
{
20842109
// op3 = (op1 * [op2]) + op3
20852110
// 231 form: XMM1 = (XMM2 * [XMM3]) + XMM1
20862111
ins = _231form;
2087-
std::swap(emitOp1, emitOp3);
2088-
}
2089-
else
2090-
{
2091-
// targetReg == op1NodeReg or targetReg == ?
2092-
// op1 = (op1 * [op2]) + op3
2093-
// 132 form: XMM1 = (XMM1 * [XMM3]) + XMM2
2094-
ins = _132form;
2112+
std::swap(emitOp1, emitOp2);
20952113
}
2096-
std::swap(emitOp2, emitOp3);
20972114
}
20982115
else
20992116
{
2100-
// targetReg could be op1NodeReg, op2NodeReg, or not equal to any op
2101-
// op1 = (op1 * op2) + [op3] or op2 = (op1 * op2) + [op3]
2102-
// ? = (op1 * op2) + [op3] or ? = (op1 * op2) + op3
2103-
// 213 form: XMM1 = (XMM2 * XMM1) + [XMM3]
2104-
if (!copiesUpperBits && (targetReg == op2NodeReg))
2117+
// containedOpNum == 0
2118+
// no extra work when resultOpNum is 0 or 1
2119+
if (targetReg == op2NodeReg)
21052120
{
2106-
// op2 = (op1 * op2) + [op3]
2107-
// 213 form: XMM1 = (XMM2 * XMM1) + [XMM3]
2121+
ins = _213form;
21082122
std::swap(emitOp1, emitOp2);
21092123
}
2124+
else if (targetReg == op3NodeReg)
2125+
{
2126+
ins = _231form;
2127+
std::swap(emitOp1, emitOp3);
2128+
}
2129+
else
2130+
{
2131+
ins = _213form;
2132+
}
21102133
}
21112134

2135+
assert(ins != INS_invalid);
21122136
genHWIntrinsic_R_R_R_RM(ins, attr, targetReg, emitOp1->GetRegNum(), emitOp2->GetRegNum(), emitOp3);
21132137
genProduceReg(node);
21142138
}

src/coreclr/jit/lsraxarch.cpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2305,6 +2305,11 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree)
23052305
// Intrinsics with CopyUpperBits semantics must have op1 as target
23062306
assert(containedOpNum != 1 || !copiesUpperBits);
23072307

2308+
// We need to keep this in sync with hwintrinsiccodegenxarch.cpp
2309+
// Ideally we'd actually swap the operands here and simplify codegen
2310+
// but its a bit more complicated to do so for many operands as well
2311+
// as being complicated to tell codegen how to pick the right instruction
2312+
23082313
if (containedOpNum == 1)
23092314
{
23102315
// https://github.com/dotnet/runtime/issues/62215
@@ -2316,7 +2321,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree)
23162321
if (resultOpNum == 2)
23172322
{
23182323
// op2 = ([op1] * op2) + op3
2319-
std::swap(emitOp2, emitOp3);
2324+
std::swap(emitOp1, emitOp2);
23202325
}
23212326
}
23222327
else if (containedOpNum == 3)

0 commit comments

Comments
 (0)