Skip to content

Commit 8ac975e

Browse files
committed
allow any baseline intrinsics in lowering
1 parent fd8933a commit 8ac975e

File tree

6 files changed

+78
-56
lines changed

6 files changed

+78
-56
lines changed

src/coreclr/jit/hwintrinsic.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -546,6 +546,7 @@ struct HWIntrinsicInfo
546546
static bool isScalarIsa(CORINFO_InstructionSet isa);
547547

548548
#ifdef TARGET_XARCH
549+
static bool isBaselineIsa(CORINFO_InstructionSet isa);
549550
static bool isAVX2GatherIntrinsic(NamedIntrinsic id);
550551
static FloatComparisonMode lookupFloatComparisonModeForSwappedArgs(FloatComparisonMode comparison);
551552
static NamedIntrinsic lookupIdForFloatComparisonMode(NamedIntrinsic intrinsic,

src/coreclr/jit/hwintrinsiccodegenxarch.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -403,8 +403,9 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
403403
GenTree* embMaskNode = nullptr;
404404
GenTree* embMaskOp = nullptr;
405405

406-
// We need to validate that other phases of the compiler haven't introduced unsupported intrinsics
407-
assert(compiler->compIsaSupportedDebugOnly(isa));
406+
// We need to validate that other phases of the compiler haven't introduced unsupported intrinsics.
407+
// We allow an exception for baseline intrinsics to be introduced unconditionally in LIR.
408+
assert(compiler->compIsaSupportedDebugOnly(isa) || HWIntrinsicInfo::isBaselineIsa(isa));
408409
assert(HWIntrinsicInfo::RequiresCodegen(intrinsicId));
409410
assert(!HWIntrinsicInfo::NeedsNormalizeSmallTypeToInt(intrinsicId) || !varTypeIsSmall(node->GetSimdBaseType()));
410411

@@ -1827,7 +1828,6 @@ void CodeGen::genBaseIntrinsic(GenTreeHWIntrinsic* node, insOpts instOptions)
18271828
regNumber targetReg = node->GetRegNum();
18281829
var_types baseType = node->GetSimdBaseType();
18291830

1830-
assert(compiler->compIsaSupportedDebugOnly(InstructionSet_SSE));
18311831
assert((baseType >= TYP_BYTE) && (baseType <= TYP_DOUBLE));
18321832

18331833
GenTree* op1 = (node->GetOperandCount() >= 1) ? node->Op(1) : nullptr;

src/coreclr/jit/hwintrinsicxarch.cpp

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -855,6 +855,39 @@ NamedIntrinsic HWIntrinsicInfo::lookupIdForFloatComparisonMode(NamedIntrinsic
855855
}
856856
}
857857

858+
//------------------------------------------------------------------------
859+
// isBaselineIsa: Gets a value that indicates whether the InstructionSet is
860+
// part of the required hardware support for this platform
861+
//
862+
// Arguments:
863+
// isa - The InstructionSet to check
864+
//
865+
// Return Value:
866+
// true if isa is part of the baseline; otherwise, false
867+
bool HWIntrinsicInfo::isBaselineIsa(CORINFO_InstructionSet isa)
868+
{
869+
switch (isa)
870+
{
871+
case InstructionSet_X86Base:
872+
case InstructionSet_SSE:
873+
case InstructionSet_SSE2:
874+
#ifdef TARGET_AMD64
875+
case InstructionSet_X86Base_X64:
876+
case InstructionSet_SSE_X64:
877+
case InstructionSet_SSE2_X64:
878+
#endif // TARGET_AMD64
879+
case InstructionSet_Vector128:
880+
{
881+
return true;
882+
}
883+
884+
default:
885+
{
886+
return false;
887+
}
888+
}
889+
}
890+
858891
//------------------------------------------------------------------------
859892
// isFullyImplementedIsa: Gets a value that indicates whether the InstructionSet is fully implemented
860893
//

src/coreclr/jit/lowerxarch.cpp

Lines changed: 28 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -868,6 +868,11 @@ void Lowering::LowerCast(GenTree* tree)
868868
{
869869
// If we don't have AVX10v2 saturating conversion instructions for
870870
// floating->integral, we have to handle the saturation logic here.
871+
//
872+
// Since this implements ordinary casts, we bend the normal rules around ISA support
873+
// for HWIntrinsics and assume the baseline ISA set (SSE2 and below) is available.
874+
// For this reason, we eschew most gentree convenience methods (e.g. gtNewSimdBinOpNode)
875+
// and create the HWIntrinsic nodes explicitly, as most helpers assert ISA support.
871876

872877
JITDUMP("LowerCast before:\n");
873878
DISPTREERANGE(BlockRange(), tree);
@@ -904,8 +909,8 @@ void Lowering::LowerCast(GenTree* tree)
904909
GenTree* zero = comp->gtNewZeroConNode(TYP_SIMD16);
905910
GenTree* fixupVal =
906911
comp->gtNewSimdHWIntrinsicNode(TYP_SIMD16, srcVector, zero, maxScalarIntrinsic, srcBaseType, 16);
907-
908-
GenTree* toScalar = comp->gtNewSimdToScalarNode(srcType, fixupVal, srcBaseType, 16);
912+
GenTree* toScalar =
913+
comp->gtNewSimdHWIntrinsicNode(srcType, fixupVal, NI_Vector128_ToScalar, srcBaseType, 16);
909914

910915
castRange.InsertAtEnd(zero);
911916
castRange.InsertAtEnd(fixupVal);
@@ -915,9 +920,6 @@ void Lowering::LowerCast(GenTree* tree)
915920
}
916921
else
917922
{
918-
assert(comp->IsBaselineSimdIsaSupportedDebugOnly());
919-
assert(!TargetArchitecture::Is64Bit || comp->compIsaSupportedDebugOnly(InstructionSet_SSE2_X64));
920-
921923
// We need to fix up NaN as well as handle possible overflow. Signed conversions
922924
// return int/long.MinValue for any overflow, which is correct for saturation of
923925
// negative, but the result must be replaced with MaxValue for positive overflow.
@@ -953,16 +955,14 @@ void Lowering::LowerCast(GenTree* tree)
953955
if (srcType == TYP_FLOAT)
954956
{
955957
maxFloatSimdVal->f32[0] = 4294967296.0f;
956-
convertIntrinsic = comp->compOpportunisticallyDependsOn(InstructionSet_SSE_X64)
957-
? NI_SSE_X64_ConvertToInt64WithTruncation
958-
: NI_SSE2_ConvertToVector128Int32WithTruncation;
958+
convertIntrinsic = TargetArchitecture::Is64Bit ? NI_SSE_X64_ConvertToInt64WithTruncation
959+
: NI_SSE2_ConvertToVector128Int32WithTruncation;
959960
}
960961
else
961962
{
962963
maxFloatSimdVal->f64[0] = 4294967296.0;
963-
convertIntrinsic = comp->compOpportunisticallyDependsOn(InstructionSet_SSE2_X64)
964-
? NI_SSE2_X64_ConvertToInt64WithTruncation
965-
: NI_SSE2_ConvertToVector128Int32WithTruncation;
964+
convertIntrinsic = TargetArchitecture::Is64Bit ? NI_SSE2_X64_ConvertToInt64WithTruncation
965+
: NI_SSE2_ConvertToVector128Int32WithTruncation;
966966
}
967967
break;
968968
}
@@ -1023,6 +1023,7 @@ void Lowering::LowerCast(GenTree* tree)
10231023
// var fixupVal = Sse.And(srcVec, nanMask);
10241024
// convertResult = Sse.ConvertToInt32WithTruncation(fixupVal);
10251025

1026+
NamedIntrinsic andIntrinsic = (srcType == TYP_FLOAT) ? NI_SSE_And : NI_SSE2_And;
10261027
NamedIntrinsic compareNaNIntrinsic =
10271028
(srcType == TYP_FLOAT) ? NI_SSE_CompareScalarOrdered : NI_SSE2_CompareScalarOrdered;
10281029

@@ -1033,8 +1034,9 @@ void Lowering::LowerCast(GenTree* tree)
10331034
castRange.InsertAtEnd(srcClone);
10341035
castRange.InsertAtEnd(nanMask);
10351036

1036-
srcClone = comp->gtClone(srcVector);
1037-
GenTree* fixupVal = comp->gtNewSimdBinOpNode(GT_AND, TYP_SIMD16, nanMask, srcClone, srcBaseType, 16);
1037+
srcClone = comp->gtClone(srcVector);
1038+
GenTree* fixupVal =
1039+
comp->gtNewSimdHWIntrinsicNode(TYP_SIMD16, nanMask, srcClone, andIntrinsic, srcBaseType, 16);
10381040

10391041
castRange.InsertAtEnd(srcClone);
10401042
castRange.InsertAtEnd(fixupVal);
@@ -1120,15 +1122,16 @@ void Lowering::LowerCast(GenTree* tree)
11201122
// This creates the equivalent of the following C# code:
11211123
// floorVal = ((srcVector.AsUInt64() >>> 21) << 21).AsDouble();
11221124

1123-
GenTree* twentyOne = comp->gtNewIconNode(21);
1124-
GenTree* rightShift = comp->gtNewSimdBinOpNode(GT_RSZ, TYP_SIMD16, floorVal, twentyOne,
1125-
CORINFO_TYPE_ULONG, 16);
1125+
GenTree* twentyOne = comp->gtNewIconNode(21);
1126+
GenTree* rightShift =
1127+
comp->gtNewSimdHWIntrinsicNode(TYP_SIMD16, floorVal, twentyOne,
1128+
NI_SSE2_ShiftRightLogical, CORINFO_TYPE_ULONG, 16);
11261129
castRange.InsertAtEnd(twentyOne);
11271130
castRange.InsertAtEnd(rightShift);
11281131

11291132
twentyOne = comp->gtClone(twentyOne);
1130-
floorVal = comp->gtNewSimdBinOpNode(GT_LSH, TYP_SIMD16, rightShift, twentyOne,
1131-
CORINFO_TYPE_ULONG, 16);
1133+
floorVal = comp->gtNewSimdHWIntrinsicNode(TYP_SIMD16, rightShift, twentyOne,
1134+
NI_SSE2_ShiftLeftLogical, CORINFO_TYPE_ULONG, 16);
11321135
castRange.InsertAtEnd(twentyOne);
11331136
castRange.InsertAtEnd(floorVal);
11341137
}
@@ -1191,21 +1194,23 @@ void Lowering::LowerCast(GenTree* tree)
11911194

11921195
GenTree* thirtyOne = comp->gtNewIconNode(31);
11931196
GenTree* mask =
1194-
comp->gtNewSimdBinOpNode(GT_RSH, TYP_SIMD16, result, thirtyOne, CORINFO_TYPE_INT, 16);
1197+
comp->gtNewSimdHWIntrinsicNode(TYP_SIMD16, result, thirtyOne,
1198+
NI_SSE2_ShiftRightArithmetic, CORINFO_TYPE_INT, 16);
11951199
GenTree* andMask =
1196-
comp->gtNewSimdBinOpNode(GT_AND, TYP_SIMD16, mask, negated, dstBaseType, 16);
1200+
comp->gtNewSimdHWIntrinsicNode(TYP_SIMD16, mask, negated, NI_SSE2_And, dstBaseType, 16);
11971201

11981202
castRange.InsertAtEnd(thirtyOne);
11991203
castRange.InsertAtEnd(mask);
12001204
castRange.InsertAtEnd(andMask);
12011205

1202-
convertResult =
1203-
comp->gtNewSimdBinOpNode(GT_OR, TYP_SIMD16, andMask, resultClone, dstBaseType, 16);
1206+
convertResult = comp->gtNewSimdHWIntrinsicNode(TYP_SIMD16, andMask, resultClone, NI_SSE2_Or,
1207+
dstBaseType, 16);
12041208
}
12051209

12061210
// Because the results are in a SIMD register, we need to ToScalar() them out.
12071211
castRange.InsertAtEnd(convertResult);
1208-
convertResult = comp->gtNewSimdToScalarNode(TYP_INT, convertResult, dstBaseType, 16);
1212+
convertResult = comp->gtNewSimdHWIntrinsicNode(TYP_INT, convertResult, NI_Vector128_ToScalar,
1213+
dstBaseType, 16);
12091214
}
12101215
else
12111216
{

src/coreclr/jit/morph.cpp

Lines changed: 9 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -294,20 +294,14 @@ GenTree* Compiler::fgMorphExpandCast(GenTreeCast* tree)
294294
if (varTypeIsFloating(srcType) && varTypeIsIntegral(dstType))
295295
{
296296
if (srcType == TYP_FLOAT
297-
#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
298-
// Arm64: src = float, dst is overflow conversion.
297+
#if defined(TARGET_64BIT)
298+
// 64-bit: src = float, dst is overflow conversion.
299299
// This goes through helper and hence src needs to be converted to double.
300300
&& tree->gtOverflow()
301-
#elif defined(TARGET_AMD64)
302-
// Amd64: src = float, dst = overflow conversion or SSE2 is not enabled
303-
&& (tree->gtOverflow() || !IsBaselineSimdIsaSupported())
304-
#elif defined(TARGET_ARM)
305-
// Arm: src = float, dst = int64/uint64 or overflow conversion.
306-
&& (tree->gtOverflow() || varTypeIsLong(dstType))
307301
#else
308-
// x86: src = float, dst = int64/uint64 or overflow conversion or SSE2 is not enabled
309-
&& (tree->gtOverflow() || varTypeIsLong(dstType) || !IsBaselineSimdIsaSupported())
310-
#endif
302+
// 32-bit: src = float, dst = int64/uint64 or overflow conversion.
303+
&& (tree->gtOverflow() || varTypeIsLong(dstType))
304+
#endif // TARGET_64BIT
311305
)
312306
{
313307
oper = gtNewCastNode(TYP_DOUBLE, oper, false, TYP_DOUBLE);
@@ -328,39 +322,24 @@ GenTree* Compiler::fgMorphExpandCast(GenTreeCast* tree)
328322
{
329323
if (!tree->gtOverflow())
330324
{
331-
// ARM64 and LoongArch64 optimize all non-overflow checking conversions
332-
#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
325+
#ifdef TARGET_64BIT
333326
return nullptr;
334327
#else
335-
#if defined(TARGET_XARCH)
336-
if (IsBaselineSimdIsaSupported() && (!varTypeIsLong(dstType) || TargetArchitecture::Is64Bit))
328+
if (!varTypeIsLong(dstType))
337329
{
338330
return nullptr;
339331
}
340-
#endif // TARGET_XARCH
332+
341333
switch (dstType)
342334
{
343-
case TYP_INT:
344-
#ifdef TARGET_XARCH
345-
return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2INT, oper);
346-
#endif // TARGET_XARCH
347-
return nullptr;
348-
349-
case TYP_UINT:
350-
#if defined(TARGET_ARM)
351-
return nullptr;
352-
#endif
353-
return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2UINT, oper);
354-
355335
case TYP_LONG:
356336
return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2LNG, oper);
357-
358337
case TYP_ULONG:
359338
return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2ULNG, oper);
360339
default:
361340
unreached();
362341
}
363-
#endif // TARGET_ARM64 || TARGET_LOONGARCH64 || TARGET_RISCV64
342+
#endif // TARGET_64BIT
364343
}
365344
else
366345
{

src/coreclr/jit/rationalize.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -826,6 +826,10 @@ Compiler::fgWalkResult Rationalizer::RationalizeVisitor::PreOrderVisit(GenTree**
826826
#if defined(FEATURE_HW_INTRINSICS)
827827
else if (node->OperIsHWIntrinsic())
828828
{
829+
// All intrinsics introduced in HIR must be explicitly supported.
830+
NamedIntrinsic intrinsicId = node->AsHWIntrinsic()->GetHWIntrinsicId();
831+
assert(m_compiler->compIsaSupportedDebugOnly(HWIntrinsicInfo::lookupIsa(intrinsicId)));
832+
829833
if (node->AsHWIntrinsic()->IsUserCall())
830834
{
831835
m_rationalizer.RewriteHWIntrinsicAsUserCall(use, this->m_ancestors);

0 commit comments

Comments
 (0)