@@ -9809,9 +9809,30 @@ GenTree* Compiler::fgOptimizeRelationalComparisonWithConst(GenTreeOp* cmp)
9809
9809
//
9810
9810
GenTree* Compiler::fgOptimizeHWIntrinsic(GenTreeHWIntrinsic* node)
9811
9811
{
9812
- assert(!optValnumCSE_phase);
9813
9812
assert(opts.OptimizationEnabled());
9814
9813
9814
+ GenTree* optimizedTree = fgOptimizeHWIntrinsicAssociative(node);
9815
+
9816
+ if (optimizedTree != nullptr)
9817
+ {
9818
+ if (optimizedTree != node)
9819
+ {
9820
+ assert(!fgIsCommaThrow(optimizedTree));
9821
+ INDEBUG(optimizedTree->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED);
9822
+ return optimizedTree;
9823
+ }
9824
+ else if (!optimizedTree->OperIsHWIntrinsic())
9825
+ {
9826
+ INDEBUG(optimizedTree->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED);
9827
+ return optimizedTree;
9828
+ }
9829
+ }
9830
+
9831
+ if (optValnumCSE_phase)
9832
+ {
9833
+ return node;
9834
+ }
9835
+
9815
9836
NamedIntrinsic intrinsicId = node->GetHWIntrinsicId();
9816
9837
var_types retType = node->TypeGet();
9817
9838
CorInfoType simdBaseJitType = node->GetSimdBaseJitType();
@@ -10366,7 +10387,153 @@ GenTree* Compiler::fgOptimizeHWIntrinsic(GenTreeHWIntrinsic* node)
10366
10387
return node;
10367
10388
}
10368
10389
10369
- #endif
10390
+ //------------------------------------------------------------------------
10391
+ // fgOptimizeHWIntrinsicAssociative: Morph an associative GenTreeHWIntrinsic tree.
10392
+ //
10393
+ // Arguments:
10394
+ // tree - The tree to morph
10395
+ //
10396
+ // Return Value:
10397
+ // The fully morphed tree.
10398
+ //
10399
+ GenTree* Compiler::fgOptimizeHWIntrinsicAssociative(GenTreeHWIntrinsic* tree)
10400
+ {
10401
+ // In general this tries to simplify `(v1 op c1) op c2` into `v1 op (c1 op c2)`
10402
+ // so that we can fold it down to `v1 op c3`
10403
+ assert(opts.OptimizationEnabled());
10404
+
10405
+ NamedIntrinsic intrinsicId = tree->GetHWIntrinsicId();
10406
+ var_types simdType = tree->TypeGet();
10407
+ CorInfoType simdBaseJitType = tree->GetSimdBaseJitType();
10408
+ var_types simdBaseType = tree->GetSimdBaseType();
10409
+ unsigned simdSize = tree->GetSimdSize();
10410
+
10411
+ if (!varTypeIsSIMD(simdType))
10412
+ {
10413
+ return nullptr;
10414
+ }
10415
+
10416
+ bool isScalar = false;
10417
+ genTreeOps oper = tree->HWOperGet(&isScalar);
10418
+ bool needsMatchingBaseType = false;
10419
+
10420
+ switch (oper)
10421
+ {
10422
+ case GT_ADD:
10423
+ case GT_MUL:
10424
+ {
10425
+ if (varTypeIsIntegral(simdBaseType))
10426
+ {
10427
+ needsMatchingBaseType = true;
10428
+ break;
10429
+ }
10430
+ return nullptr;
10431
+ }
10432
+
10433
+ case GT_AND:
10434
+ case GT_OR:
10435
+ case GT_XOR:
10436
+ {
10437
+ break;
10438
+ }
10439
+
10440
+ default:
10441
+ {
10442
+ return nullptr;
10443
+ }
10444
+ }
10445
+
10446
+ // op1 can be GT_COMMA, in which case we're going to fold
10447
+ // `(..., (v1 op c1)) op c2` to `(..., (v1 op c3))`
10448
+
10449
+ GenTree* op1 = tree->Op(1);
10450
+ GenTree* effectiveOp1 = op1->gtEffectiveVal();
10451
+
10452
+ if (!effectiveOp1->OperIsHWIntrinsic())
10453
+ {
10454
+ return nullptr;
10455
+ }
10456
+
10457
+ GenTreeHWIntrinsic* intrinOp1 = effectiveOp1->AsHWIntrinsic();
10458
+
10459
+ bool op1IsScalar = false;
10460
+ genTreeOps op1Oper = intrinOp1->HWOperGet(&op1IsScalar);
10461
+
10462
+ if ((op1Oper != oper) || (op1IsScalar != isScalar))
10463
+ {
10464
+ return nullptr;
10465
+ }
10466
+ assert(intrinOp1->GetHWIntrinsicId() == intrinsicId);
10467
+
10468
+ if (needsMatchingBaseType && (intrinOp1->GetSimdBaseType() != simdBaseType))
10469
+ {
10470
+ return nullptr;
10471
+ }
10472
+
10473
+ if (!intrinOp1->Op(2)->IsCnsVec() || !tree->Op(2)->IsCnsVec())
10474
+ {
10475
+ return nullptr;
10476
+ }
10477
+
10478
+ if (!fgGlobalMorph && (effectiveOp1 != op1))
10479
+ {
10480
+ // Since 'tree->Op(1)' can have complex structure; e.g. `(.., (.., op1))`
10481
+ // don't run the optimization for such trees outside of global morph.
10482
+ // Otherwise, there is a chance of violating VNs invariants and/or modifying a tree
10483
+ // that is an active CSE candidate.
10484
+ return nullptr;
10485
+ }
10486
+
10487
+ if (gtIsActiveCSE_Candidate(tree) || gtIsActiveCSE_Candidate(effectiveOp1))
10488
+ {
10489
+ // In the case op1 is a comma, the optimization removes 'tree' from IR and changes
10490
+ // the value of op1 and otherwise we're changing the value of 'tree' instead
10491
+ return nullptr;
10492
+ }
10493
+
10494
+ GenTreeVecCon* cns1 = intrinOp1->Op(2)->AsVecCon();
10495
+ GenTreeVecCon* cns2 = tree->Op(2)->AsVecCon();
10496
+
10497
+ assert(cns1->TypeIs(simdType));
10498
+ assert(cns2->TypeIs(simdType));
10499
+
10500
+ if (gtIsActiveCSE_Candidate(cns1) || gtIsActiveCSE_Candidate(cns2))
10501
+ {
10502
+ // The optimization removes 'cns2' from IR and changes the value of 'cns1'.
10503
+ return nullptr;
10504
+ }
10505
+
10506
+ GenTree* res = gtNewSimdHWIntrinsicNode(simdType, cns1, cns2, intrinsicId, simdBaseJitType, simdSize);
10507
+ res = gtFoldExprHWIntrinsic(res->AsHWIntrinsic());
10508
+
10509
+ assert(res == cns1);
10510
+ assert(res->IsCnsVec());
10511
+
10512
+ if (effectiveOp1 != op1)
10513
+ {
10514
+ // We had a comma, so pull the VNs from node
10515
+ op1->SetVNsFromNode(tree);
10516
+
10517
+ DEBUG_DESTROY_NODE(cns2);
10518
+ DEBUG_DESTROY_NODE(tree);
10519
+
10520
+ return op1;
10521
+ }
10522
+ else
10523
+ {
10524
+ // We had a simple tree, so pull the value and new constant up
10525
+
10526
+ tree->Op(1) = intrinOp1->Op(1);
10527
+ tree->Op(2) = intrinOp1->Op(2);
10528
+
10529
+ DEBUG_DESTROY_NODE(cns2);
10530
+ DEBUG_DESTROY_NODE(intrinOp1);
10531
+
10532
+ assert(tree->Op(2) == cns1);
10533
+ return tree;
10534
+ }
10535
+ }
10536
+ #endif // FEATURE_HW_INTRINSICS
10370
10537
10371
10538
//------------------------------------------------------------------------
10372
10539
// fgOptimizeCommutativeArithmetic: Optimizes commutative operations.
@@ -11408,6 +11575,18 @@ GenTree* Compiler::fgMorphHWIntrinsic(GenTreeHWIntrinsic* tree)
11408
11575
11409
11576
if (opts.OptimizationEnabled())
11410
11577
{
11578
+ if (tree->isCommutativeHWIntrinsic())
11579
+ {
11580
+ assert(tree->GetOperandCount() == 2);
11581
+ GenTree*& op1 = tree->Op(1);
11582
+
11583
+ if (op1->IsVectorConst())
11584
+ {
11585
+ // Move constant vectors from op1 to op2 for commutative operations
11586
+ std::swap(op1, tree->Op(2));
11587
+ }
11588
+ }
11589
+
11411
11590
// Try to fold it, maybe we get lucky,
11412
11591
GenTree* foldedTree = gtFoldExpr(tree);
11413
11592
@@ -11423,18 +11602,6 @@ GenTree* Compiler::fgMorphHWIntrinsic(GenTreeHWIntrinsic* tree)
11423
11602
return foldedTree;
11424
11603
}
11425
11604
11426
- // Move constant vectors from op1 to op2 for commutative and compare operations
11427
- if (tree->isCommutativeHWIntrinsic())
11428
- {
11429
- assert(tree->GetOperandCount() == 2);
11430
- GenTree*& op1 = tree->Op(1);
11431
-
11432
- if (op1->IsVectorConst())
11433
- {
11434
- std::swap(op1, tree->Op(2));
11435
- }
11436
- }
11437
-
11438
11605
if (allArgsAreConst && tree->IsVectorCreate())
11439
11606
{
11440
11607
// Avoid unexpected CSE for constant arguments for Vector_.Create
@@ -11446,10 +11613,7 @@ GenTree* Compiler::fgMorphHWIntrinsic(GenTreeHWIntrinsic* tree)
11446
11613
}
11447
11614
}
11448
11615
11449
- if (!optValnumCSE_phase)
11450
- {
11451
- return fgOptimizeHWIntrinsic(tree->AsHWIntrinsic());
11452
- }
11616
+ return fgOptimizeHWIntrinsic(tree->AsHWIntrinsic());
11453
11617
}
11454
11618
11455
11619
return tree;
0 commit comments