@@ -21492,19 +21492,63 @@ GenTree* Compiler::gtNewSimdBinOpNode(
21492
21492
{
21493
21493
assert((simdSize == 16) || (simdSize == 32) || (simdSize == 64));
21494
21494
21495
- if (simdSize == 64)
21495
+ bool isV512Supported = false;
21496
+ if (compIsEvexOpportunisticallySupported(isV512Supported, InstructionSet_AVX512DQ_VL))
21496
21497
{
21497
- assert(compIsaSupportedDebugOnly(InstructionSet_AVX512DQ));
21498
- intrinsic = NI_AVX512DQ_MultiplyLow;
21499
- }
21500
- else if (compOpportunisticallyDependsOn(InstructionSet_AVX10v1))
21501
- {
21502
- intrinsic = NI_AVX10v1_MultiplyLow;
21498
+ if (simdSize == 64)
21499
+ {
21500
+ assert(isV512Supported);
21501
+ intrinsic = NI_AVX512DQ_MultiplyLow;
21502
+ }
21503
+ else
21504
+ {
21505
+ intrinsic = !isV512Supported ? NI_AVX10v1_MultiplyLow : NI_AVX512DQ_VL_MultiplyLow;
21506
+ }
21503
21507
}
21504
21508
else
21505
21509
{
21506
- assert(compIsaSupportedDebugOnly(InstructionSet_AVX512DQ_VL));
21507
- intrinsic = NI_AVX512DQ_VL_MultiplyLow;
21510
+ assert(((simdSize == 16) && compOpportunisticallyDependsOn(InstructionSet_SSE41)) ||
21511
+ ((simdSize == 32) && compOpportunisticallyDependsOn(InstructionSet_AVX2)));
21512
+
21513
+ // Make op1 and op2 multi-use:
21514
+ GenTree* op1Dup = fgMakeMultiUse(&op1);
21515
+ GenTree* op2Dup = fgMakeMultiUse(&op2);
21516
+
21517
+ const bool is256 = simdSize == 32;
21518
+
21519
+ // Vector256<ulong> tmp0 = Avx2.Multiply(left, right);
21520
+ GenTreeHWIntrinsic* tmp0 =
21521
+ gtNewSimdHWIntrinsicNode(type, op1, op2, is256 ? NI_AVX2_Multiply : NI_SSE2_Multiply,
21522
+ CORINFO_TYPE_ULONG, simdSize);
21523
+
21524
+ // Vector256<uint> tmp1 = Avx2.Shuffle(right.AsUInt32(), ZWXY);
21525
+ GenTree* shuffleMask = gtNewIconNode(SHUFFLE_ZWXY, TYP_INT);
21526
+ GenTreeHWIntrinsic* tmp1 = gtNewSimdHWIntrinsicNode(type, op2Dup, shuffleMask,
21527
+ is256 ? NI_AVX2_Shuffle : NI_SSE2_Shuffle,
21528
+ CORINFO_TYPE_UINT, simdSize);
21529
+
21530
+ // Vector256<uint> tmp2 = Avx2.MultiplyLow(left.AsUInt32(), tmp1);
21531
+ GenTreeHWIntrinsic* tmp2 =
21532
+ gtNewSimdHWIntrinsicNode(type, op1Dup, tmp1,
21533
+ is256 ? NI_AVX2_MultiplyLow : NI_SSE41_MultiplyLow,
21534
+ CORINFO_TYPE_UINT, simdSize);
21535
+
21536
+ // Vector256<int> tmp3 = Avx2.HorizontalAdd(tmp2.AsInt32(), Vector256<int>.Zero);
21537
+ GenTreeHWIntrinsic* tmp3 =
21538
+ gtNewSimdHWIntrinsicNode(type, tmp2, gtNewZeroConNode(type),
21539
+ is256 ? NI_AVX2_HorizontalAdd : NI_SSSE3_HorizontalAdd,
21540
+ CORINFO_TYPE_UINT, simdSize);
21541
+
21542
+ // Vector256<int> tmp4 = Avx2.Shuffle(tmp3, YWXW);
21543
+ shuffleMask = gtNewIconNode(SHUFFLE_YWXW, TYP_INT);
21544
+ GenTreeHWIntrinsic* tmp4 =
21545
+ gtNewSimdHWIntrinsicNode(type, tmp3, shuffleMask, is256 ? NI_AVX2_Shuffle : NI_SSE2_Shuffle,
21546
+ CORINFO_TYPE_UINT, simdSize);
21547
+
21548
+ // result = tmp0 + tmp4;
21549
+ op1 = tmp0;
21550
+ op2 = tmp4;
21551
+ intrinsic = simdSize == 32 ? NI_AVX2_Add : NI_SSE2_Add;
21508
21552
}
21509
21553
21510
21554
break;
0 commit comments