Skip to content

Commit 25b9210

Browse files
Adding support for morphing associative hwintrinsics (#104224)
1 parent 1fe7d18 commit 25b9210

File tree

3 files changed

+189
-18
lines changed

3 files changed

+189
-18
lines changed

src/coreclr/jit/compiler.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6633,6 +6633,7 @@ class Compiler
66336633
#if defined(FEATURE_HW_INTRINSICS)
66346634
GenTree* fgMorphHWIntrinsic(GenTreeHWIntrinsic* tree);
66356635
GenTree* fgOptimizeHWIntrinsic(GenTreeHWIntrinsic* node);
6636+
GenTree* fgOptimizeHWIntrinsicAssociative(GenTreeHWIntrinsic* node);
66366637
#endif // FEATURE_HW_INTRINSICS
66376638
GenTree* fgOptimizeCommutativeArithmetic(GenTreeOp* tree);
66386639
GenTree* fgOptimizeRelationalComparisonWithCasts(GenTreeOp* cmp);

src/coreclr/jit/gentree.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20213,6 +20213,12 @@ var_types GenTreeJitIntrinsic::GetSimdBaseType() const
2021320213
#endif // FEATURE_SIMD
2021420214

2021520215
#ifdef FEATURE_HW_INTRINSICS
20216+
//------------------------------------------------------------------------
20217+
// isCommutativeHWIntrinsic: Checks if the intrinsic is commutative
20218+
//
20219+
// Return Value:
20220+
// true if the intrisic is commutative
20221+
//
2021620222
bool GenTree::isCommutativeHWIntrinsic() const
2021720223
{
2021820224
assert(gtOper == GT_HWINTRINSIC);

src/coreclr/jit/morph.cpp

Lines changed: 182 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -9809,9 +9809,30 @@ GenTree* Compiler::fgOptimizeRelationalComparisonWithConst(GenTreeOp* cmp)
98099809
//
98109810
GenTree* Compiler::fgOptimizeHWIntrinsic(GenTreeHWIntrinsic* node)
98119811
{
9812-
assert(!optValnumCSE_phase);
98139812
assert(opts.OptimizationEnabled());
98149813

9814+
GenTree* optimizedTree = fgOptimizeHWIntrinsicAssociative(node);
9815+
9816+
if (optimizedTree != nullptr)
9817+
{
9818+
if (optimizedTree != node)
9819+
{
9820+
assert(!fgIsCommaThrow(optimizedTree));
9821+
INDEBUG(optimizedTree->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED);
9822+
return optimizedTree;
9823+
}
9824+
else if (!optimizedTree->OperIsHWIntrinsic())
9825+
{
9826+
INDEBUG(optimizedTree->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED);
9827+
return optimizedTree;
9828+
}
9829+
}
9830+
9831+
if (optValnumCSE_phase)
9832+
{
9833+
return node;
9834+
}
9835+
98159836
NamedIntrinsic intrinsicId = node->GetHWIntrinsicId();
98169837
var_types retType = node->TypeGet();
98179838
CorInfoType simdBaseJitType = node->GetSimdBaseJitType();
@@ -10366,7 +10387,153 @@ GenTree* Compiler::fgOptimizeHWIntrinsic(GenTreeHWIntrinsic* node)
1036610387
return node;
1036710388
}
1036810389

10369-
#endif
10390+
//------------------------------------------------------------------------
10391+
// fgOptimizeHWIntrinsicAssociative: Morph an associative GenTreeHWIntrinsic tree.
10392+
//
10393+
// Arguments:
10394+
// tree - The tree to morph
10395+
//
10396+
// Return Value:
10397+
// The fully morphed tree.
10398+
//
10399+
GenTree* Compiler::fgOptimizeHWIntrinsicAssociative(GenTreeHWIntrinsic* tree)
10400+
{
10401+
// In general this tries to simplify `(v1 op c1) op c2` into `v1 op (c1 op c2)`
10402+
// so that we can fold it down to `v1 op c3`
10403+
assert(opts.OptimizationEnabled());
10404+
10405+
NamedIntrinsic intrinsicId = tree->GetHWIntrinsicId();
10406+
var_types simdType = tree->TypeGet();
10407+
CorInfoType simdBaseJitType = tree->GetSimdBaseJitType();
10408+
var_types simdBaseType = tree->GetSimdBaseType();
10409+
unsigned simdSize = tree->GetSimdSize();
10410+
10411+
if (!varTypeIsSIMD(simdType))
10412+
{
10413+
return nullptr;
10414+
}
10415+
10416+
bool isScalar = false;
10417+
genTreeOps oper = tree->HWOperGet(&isScalar);
10418+
bool needsMatchingBaseType = false;
10419+
10420+
switch (oper)
10421+
{
10422+
case GT_ADD:
10423+
case GT_MUL:
10424+
{
10425+
if (varTypeIsIntegral(simdBaseType))
10426+
{
10427+
needsMatchingBaseType = true;
10428+
break;
10429+
}
10430+
return nullptr;
10431+
}
10432+
10433+
case GT_AND:
10434+
case GT_OR:
10435+
case GT_XOR:
10436+
{
10437+
break;
10438+
}
10439+
10440+
default:
10441+
{
10442+
return nullptr;
10443+
}
10444+
}
10445+
10446+
// op1 can be GT_COMMA, in which case we're going to fold
10447+
// `(..., (v1 op c1)) op c2` to `(..., (v1 op c3))`
10448+
10449+
GenTree* op1 = tree->Op(1);
10450+
GenTree* effectiveOp1 = op1->gtEffectiveVal();
10451+
10452+
if (!effectiveOp1->OperIsHWIntrinsic())
10453+
{
10454+
return nullptr;
10455+
}
10456+
10457+
GenTreeHWIntrinsic* intrinOp1 = effectiveOp1->AsHWIntrinsic();
10458+
10459+
bool op1IsScalar = false;
10460+
genTreeOps op1Oper = intrinOp1->HWOperGet(&op1IsScalar);
10461+
10462+
if ((op1Oper != oper) || (op1IsScalar != isScalar))
10463+
{
10464+
return nullptr;
10465+
}
10466+
assert(intrinOp1->GetHWIntrinsicId() == intrinsicId);
10467+
10468+
if (needsMatchingBaseType && (intrinOp1->GetSimdBaseType() != simdBaseType))
10469+
{
10470+
return nullptr;
10471+
}
10472+
10473+
if (!intrinOp1->Op(2)->IsCnsVec() || !tree->Op(2)->IsCnsVec())
10474+
{
10475+
return nullptr;
10476+
}
10477+
10478+
if (!fgGlobalMorph && (effectiveOp1 != op1))
10479+
{
10480+
// Since 'tree->Op(1)' can have complex structure; e.g. `(.., (.., op1))`
10481+
// don't run the optimization for such trees outside of global morph.
10482+
// Otherwise, there is a chance of violating VNs invariants and/or modifying a tree
10483+
// that is an active CSE candidate.
10484+
return nullptr;
10485+
}
10486+
10487+
if (gtIsActiveCSE_Candidate(tree) || gtIsActiveCSE_Candidate(effectiveOp1))
10488+
{
10489+
// In the case op1 is a comma, the optimization removes 'tree' from IR and changes
10490+
// the value of op1 and otherwise we're changing the value of 'tree' instead
10491+
return nullptr;
10492+
}
10493+
10494+
GenTreeVecCon* cns1 = intrinOp1->Op(2)->AsVecCon();
10495+
GenTreeVecCon* cns2 = tree->Op(2)->AsVecCon();
10496+
10497+
assert(cns1->TypeIs(simdType));
10498+
assert(cns2->TypeIs(simdType));
10499+
10500+
if (gtIsActiveCSE_Candidate(cns1) || gtIsActiveCSE_Candidate(cns2))
10501+
{
10502+
// The optimization removes 'cns2' from IR and changes the value of 'cns1'.
10503+
return nullptr;
10504+
}
10505+
10506+
GenTree* res = gtNewSimdHWIntrinsicNode(simdType, cns1, cns2, intrinsicId, simdBaseJitType, simdSize);
10507+
res = gtFoldExprHWIntrinsic(res->AsHWIntrinsic());
10508+
10509+
assert(res == cns1);
10510+
assert(res->IsCnsVec());
10511+
10512+
if (effectiveOp1 != op1)
10513+
{
10514+
// We had a comma, so pull the VNs from node
10515+
op1->SetVNsFromNode(tree);
10516+
10517+
DEBUG_DESTROY_NODE(cns2);
10518+
DEBUG_DESTROY_NODE(tree);
10519+
10520+
return op1;
10521+
}
10522+
else
10523+
{
10524+
// We had a simple tree, so pull the value and new constant up
10525+
10526+
tree->Op(1) = intrinOp1->Op(1);
10527+
tree->Op(2) = intrinOp1->Op(2);
10528+
10529+
DEBUG_DESTROY_NODE(cns2);
10530+
DEBUG_DESTROY_NODE(intrinOp1);
10531+
10532+
assert(tree->Op(2) == cns1);
10533+
return tree;
10534+
}
10535+
}
10536+
#endif // FEATURE_HW_INTRINSICS
1037010537

1037110538
//------------------------------------------------------------------------
1037210539
// fgOptimizeCommutativeArithmetic: Optimizes commutative operations.
@@ -11408,6 +11575,18 @@ GenTree* Compiler::fgMorphHWIntrinsic(GenTreeHWIntrinsic* tree)
1140811575

1140911576
if (opts.OptimizationEnabled())
1141011577
{
11578+
if (tree->isCommutativeHWIntrinsic())
11579+
{
11580+
assert(tree->GetOperandCount() == 2);
11581+
GenTree*& op1 = tree->Op(1);
11582+
11583+
if (op1->IsVectorConst())
11584+
{
11585+
// Move constant vectors from op1 to op2 for commutative operations
11586+
std::swap(op1, tree->Op(2));
11587+
}
11588+
}
11589+
1141111590
// Try to fold it, maybe we get lucky,
1141211591
GenTree* foldedTree = gtFoldExpr(tree);
1141311592

@@ -11423,18 +11602,6 @@ GenTree* Compiler::fgMorphHWIntrinsic(GenTreeHWIntrinsic* tree)
1142311602
return foldedTree;
1142411603
}
1142511604

11426-
// Move constant vectors from op1 to op2 for commutative and compare operations
11427-
if (tree->isCommutativeHWIntrinsic())
11428-
{
11429-
assert(tree->GetOperandCount() == 2);
11430-
GenTree*& op1 = tree->Op(1);
11431-
11432-
if (op1->IsVectorConst())
11433-
{
11434-
std::swap(op1, tree->Op(2));
11435-
}
11436-
}
11437-
1143811605
if (allArgsAreConst && tree->IsVectorCreate())
1143911606
{
1144011607
// Avoid unexpected CSE for constant arguments for Vector_.Create
@@ -11446,10 +11613,7 @@ GenTree* Compiler::fgMorphHWIntrinsic(GenTreeHWIntrinsic* tree)
1144611613
}
1144711614
}
1144811615

11449-
if (!optValnumCSE_phase)
11450-
{
11451-
return fgOptimizeHWIntrinsic(tree->AsHWIntrinsic());
11452-
}
11616+
return fgOptimizeHWIntrinsic(tree->AsHWIntrinsic());
1145311617
}
1145411618

1145511619
return tree;

0 commit comments

Comments
 (0)