Skip to content

Remove more unnecessary scenarios from HWIntrinsic test templates and fix timeout/failure #85026

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Apr 19, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/coreclr/jit/emitxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ bool emitter::IsSSEOrAVXInstruction(instruction ins)
}

//------------------------------------------------------------------------
// IsKInstruction: Does this instruction require K register.
// IsKInstruction: Does this instruction require K register?
//
// Arguments:
// ins - The instruction to check.
Expand Down
27 changes: 15 additions & 12 deletions src/coreclr/jit/hwintrinsiccodegenxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1786,12 +1786,22 @@ void CodeGen::genAvxFamilyIntrinsic(GenTreeHWIntrinsic* node)
break;
}

case NI_AVX512F_ConvertToVector256Int32:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why was this moved up?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To be consistent with the other places that specially handle this one path and to avoid the cost of the varTypeIsFloating check for the many intrinsics where it can't be true.

{
if (varTypeIsFloating(baseType))
{
instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType);
genHWIntrinsic_R_RM(node, ins, attr, targetReg, op1);
break;
}
FALLTHROUGH;
}

case NI_AVX512F_ConvertToVector128Int16:
case NI_AVX512F_ConvertToVector128Int32:
case NI_AVX512F_ConvertToVector128UInt16:
case NI_AVX512F_ConvertToVector128UInt32:
case NI_AVX512F_ConvertToVector256Int16:
case NI_AVX512F_ConvertToVector256Int32:
case NI_AVX512F_ConvertToVector256UInt16:
case NI_AVX512F_ConvertToVector256UInt32:
case NI_AVX512BW_ConvertToVector128Byte:
Expand All @@ -1801,18 +1811,11 @@ void CodeGen::genAvxFamilyIntrinsic(GenTreeHWIntrinsic* node)
{
instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType);

if (varTypeIsFloating(baseType))
{
genHWIntrinsic_R_RM(node, ins, attr, targetReg, op1);
}
else
{
// These instructions are RM_R and so we need to ensure the targetReg
// is passed in as the RM register and op1 is passed as the R register
// These instructions are RM_R and so we need to ensure the targetReg
// is passed in as the RM register and op1 is passed as the R register

op1Reg = op1->GetRegNum();
emit->emitIns_R_R(ins, attr, op1Reg, targetReg);
}
op1Reg = op1->GetRegNum();
emit->emitIns_R_R(ins, attr, op1Reg, targetReg);
break;
}

Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/jit/hwintrinsiclistxarch.h
Original file line number Diff line number Diff line change
Expand Up @@ -801,7 +801,7 @@ HARDWARE_INTRINSIC(AVX512F, ConvertToVector128UInt16,
HARDWARE_INTRINSIC(AVX512F, ConvertToVector128UInt32, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovqd, INS_vpmovqd, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen)
HARDWARE_INTRINSIC(AVX512F, ConvertToVector256Int16, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovdw, INS_vpmovdw, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen)
HARDWARE_INTRINSIC(AVX512F, ConvertToVector256Int32, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovqd, INS_vpmovqd, INS_invalid, INS_cvtpd2dq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen)
HARDWARE_INTRINSIC(AVX512F, ConvertToVector128Int32WithTruncation, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvttpd2dq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

so we don't need ConvertToVector128Int32WithTruncation at all for AVX512F?

Copy link
Member Author

@tannergooding tannergooding Apr 19, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There is no such API for Avx512F

You have Sse2.ConvertToVector128Int32WithTruncation which handles Vector128<float> to Vector128<int> (4x32 to 4x32) and Avx.ConvertToVector128Int32WithTruncation which handles Vector256<double> to Vector128<int> (4x64 to 4x32)

There is then no need for any Avx512F API since we don't have any 4x128 to 4x32 like scenario. This was jsut a typo that should've been Vector256Int32 since it handles Vector512<double> to Vector256<int> (8x64 to 8x32)

HARDWARE_INTRINSIC(AVX512F, ConvertToVector256Int32WithTruncation, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvttpd2dq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(AVX512F, ConvertToVector256Single, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtpd2ps}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(AVX512F, ConvertToVector256UInt16, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovdw, INS_vpmovdw, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen)
HARDWARE_INTRINSIC(AVX512F, ConvertToVector256UInt32, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovqd, INS_vpmovqd, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen)
Expand Down
22 changes: 20 additions & 2 deletions src/coreclr/jit/lowerxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5759,12 +5759,20 @@ void Lowering::ContainCheckStoreIndir(GenTreeStoreInd* node)
break;
}

case NI_AVX512F_ConvertToVector256Int32:
{
if (varTypeIsFloating(simdBaseType))
{
break;
}
FALLTHROUGH;
}

case NI_AVX512F_ConvertToVector128Int16:
case NI_AVX512F_ConvertToVector128Int32:
case NI_AVX512F_ConvertToVector128UInt16:
case NI_AVX512F_ConvertToVector128UInt32:
case NI_AVX512F_ConvertToVector256Int16:
case NI_AVX512F_ConvertToVector256Int32:
case NI_AVX512F_ConvertToVector256UInt16:
case NI_AVX512F_ConvertToVector256UInt32:
case NI_AVX512BW_ConvertToVector128Byte:
Expand Down Expand Up @@ -7402,12 +7410,22 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node)
break;
}

case NI_AVX512F_ConvertToVector256Int32:
{
if (varTypeIsFloating(simdBaseType))
{
// This version is "ins xmm, xmm/mem" and
// gets the default containment handling
break;
}
FALLTHROUGH;
}

case NI_AVX512F_ConvertToVector128Int16:
case NI_AVX512F_ConvertToVector128Int32:
case NI_AVX512F_ConvertToVector128UInt16:
case NI_AVX512F_ConvertToVector128UInt32:
case NI_AVX512F_ConvertToVector256Int16:
case NI_AVX512F_ConvertToVector256Int32:
case NI_AVX512F_ConvertToVector256UInt16:
case NI_AVX512F_ConvertToVector256UInt32:
case NI_AVX512BW_ConvertToVector128Byte:
Expand Down
4 changes: 4 additions & 0 deletions src/coreclr/tools/Common/Compiler/InstructionSetSupport.cs
Original file line number Diff line number Diff line change
Expand Up @@ -58,13 +58,17 @@ public static string GetHardwareIntrinsicId(TargetArchitecture architecture, Typ
{
if (potentialType.Name == "X64")
potentialType = (MetadataType)potentialType.ContainingType;
if (potentialType.Name == "VL")
potentialType = (MetadataType)potentialType.ContainingType;
if (potentialType.Namespace != "System.Runtime.Intrinsics.X86")
return "";
}
else if (architecture == TargetArchitecture.X86)
{
if (potentialType.Name == "X64")
potentialType = (MetadataType)potentialType.ContainingType;
if (potentialType.Name == "VL")
potentialType = (MetadataType)potentialType.ContainingType;
if (potentialType.Namespace != "System.Runtime.Intrinsics.X86")
return "";
}
Expand Down
Loading