Skip to content

Commit 33ca32d

Browse files
Accelerate Vector128<long>::op_Multiply on x64 (#103555)
Co-authored-by: Tanner Gooding <tagoo@outlook.com>
1 parent 9501cce commit 33ca32d

File tree

4 files changed

+70
-17
lines changed

4 files changed

+70
-17
lines changed

src/coreclr/jit/gentree.cpp

Lines changed: 53 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -21492,19 +21492,63 @@ GenTree* Compiler::gtNewSimdBinOpNode(
2149221492
{
2149321493
assert((simdSize == 16) || (simdSize == 32) || (simdSize == 64));
2149421494

21495-
if (simdSize == 64)
21495+
bool isV512Supported = false;
21496+
if (compIsEvexOpportunisticallySupported(isV512Supported, InstructionSet_AVX512DQ_VL))
2149621497
{
21497-
assert(compIsaSupportedDebugOnly(InstructionSet_AVX512DQ));
21498-
intrinsic = NI_AVX512DQ_MultiplyLow;
21499-
}
21500-
else if (compOpportunisticallyDependsOn(InstructionSet_AVX10v1))
21501-
{
21502-
intrinsic = NI_AVX10v1_MultiplyLow;
21498+
if (simdSize == 64)
21499+
{
21500+
assert(isV512Supported);
21501+
intrinsic = NI_AVX512DQ_MultiplyLow;
21502+
}
21503+
else
21504+
{
21505+
intrinsic = !isV512Supported ? NI_AVX10v1_MultiplyLow : NI_AVX512DQ_VL_MultiplyLow;
21506+
}
2150321507
}
2150421508
else
2150521509
{
21506-
assert(compIsaSupportedDebugOnly(InstructionSet_AVX512DQ_VL));
21507-
intrinsic = NI_AVX512DQ_VL_MultiplyLow;
21510+
assert(((simdSize == 16) && compOpportunisticallyDependsOn(InstructionSet_SSE41)) ||
21511+
((simdSize == 32) && compOpportunisticallyDependsOn(InstructionSet_AVX2)));
21512+
21513+
// Make op1 and op2 multi-use:
21514+
GenTree* op1Dup = fgMakeMultiUse(&op1);
21515+
GenTree* op2Dup = fgMakeMultiUse(&op2);
21516+
21517+
const bool is256 = simdSize == 32;
21518+
21519+
// Vector256<ulong> tmp0 = Avx2.Multiply(left, right);
21520+
GenTreeHWIntrinsic* tmp0 =
21521+
gtNewSimdHWIntrinsicNode(type, op1, op2, is256 ? NI_AVX2_Multiply : NI_SSE2_Multiply,
21522+
CORINFO_TYPE_ULONG, simdSize);
21523+
21524+
// Vector256<uint> tmp1 = Avx2.Shuffle(right.AsUInt32(), ZWXY);
21525+
GenTree* shuffleMask = gtNewIconNode(SHUFFLE_ZWXY, TYP_INT);
21526+
GenTreeHWIntrinsic* tmp1 = gtNewSimdHWIntrinsicNode(type, op2Dup, shuffleMask,
21527+
is256 ? NI_AVX2_Shuffle : NI_SSE2_Shuffle,
21528+
CORINFO_TYPE_UINT, simdSize);
21529+
21530+
// Vector256<uint> tmp2 = Avx2.MultiplyLow(left.AsUInt32(), tmp1);
21531+
GenTreeHWIntrinsic* tmp2 =
21532+
gtNewSimdHWIntrinsicNode(type, op1Dup, tmp1,
21533+
is256 ? NI_AVX2_MultiplyLow : NI_SSE41_MultiplyLow,
21534+
CORINFO_TYPE_UINT, simdSize);
21535+
21536+
// Vector256<int> tmp3 = Avx2.HorizontalAdd(tmp2.AsInt32(), Vector256<int>.Zero);
21537+
GenTreeHWIntrinsic* tmp3 =
21538+
gtNewSimdHWIntrinsicNode(type, tmp2, gtNewZeroConNode(type),
21539+
is256 ? NI_AVX2_HorizontalAdd : NI_SSSE3_HorizontalAdd,
21540+
CORINFO_TYPE_UINT, simdSize);
21541+
21542+
// Vector256<int> tmp4 = Avx2.Shuffle(tmp3, YWXW);
21543+
shuffleMask = gtNewIconNode(SHUFFLE_YWXW, TYP_INT);
21544+
GenTreeHWIntrinsic* tmp4 =
21545+
gtNewSimdHWIntrinsicNode(type, tmp3, shuffleMask, is256 ? NI_AVX2_Shuffle : NI_SSE2_Shuffle,
21546+
CORINFO_TYPE_UINT, simdSize);
21547+
21548+
// result = tmp0 + tmp4;
21549+
op1 = tmp0;
21550+
op2 = tmp4;
21551+
intrinsic = simdSize == 32 ? NI_AVX2_Add : NI_SSE2_Add;
2150821552
}
2150921553

2151021554
break;

src/coreclr/jit/hwintrinsicxarch.cpp

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2750,17 +2750,25 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,
27502750

27512751
if (varTypeIsLong(simdBaseType))
27522752
{
2753-
if (simdSize != 64 && !canUseEvexEncoding())
2753+
if (TARGET_POINTER_SIZE == 4)
27542754
{
2755-
// TODO-XARCH-CQ: We should support long/ulong multiplication
2755+
// TODO-XARCH-CQ: 32bit support
27562756
break;
27572757
}
2758-
// else if simdSize == 64 then above assert would check if baseline isa supported
27592758

2760-
#if defined(TARGET_X86)
2761-
// TODO-XARCH-CQ: We need to support 64-bit CreateBroadcast
2762-
break;
2763-
#endif // TARGET_X86
2759+
if ((simdSize == 32) && compOpportunisticallyDependsOn(InstructionSet_AVX2))
2760+
{
2761+
// Emulate NI_AVX512DQ_VL_MultiplyLow with AVX2 for SIMD32
2762+
}
2763+
else if ((simdSize == 16) && compOpportunisticallyDependsOn(InstructionSet_SSE41))
2764+
{
2765+
// Emulate NI_AVX512DQ_VL_MultiplyLow with SSE41 for SIMD16
2766+
}
2767+
else
2768+
{
2769+
// Software fallback
2770+
break;
2771+
}
27642772
}
27652773

27662774
CORINFO_ARG_LIST_HANDLE arg1 = sig->args;

src/coreclr/jit/simd.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1019,6 +1019,7 @@ void BroadcastConstantToSimd(TSimd* result, TBase arg0)
10191019
#define SHUFFLE_XYZW 0x1B // 00 01 10 11
10201020
#define SHUFFLE_YXYX 0x44 // 01 00 01 00
10211021
#define SHUFFLE_YWXZ 0x72 // 01 11 00 10
1022+
#define SHUFFLE_YWXW 0x73 // 01 11 00 11
10221023
#define SHUFFLE_YYZZ 0x5A // 01 01 10 10
10231024
#define SHUFFLE_ZXXX 0x80 // 10 00 00 00
10241025
#define SHUFFLE_ZXXY 0x81 // 10 00 00 01

src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128_1.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// Licensed to the .NET Foundation under one or more agreements.
1+
// Licensed to the .NET Foundation under one or more agreements.
22
// The .NET Foundation licenses this file to you under the MIT license.
33

44
using System.Diagnostics;

0 commit comments

Comments
 (0)