@@ -868,6 +868,11 @@ void Lowering::LowerCast(GenTree* tree)
868
868
{
869
869
// If we don't have AVX10v2 saturating conversion instructions for
870
870
// floating->integral, we have to handle the saturation logic here.
871
+ //
872
+ // Since this implements ordinary casts, we bend the normal rules around ISA support
873
+ // for HWIntrinsics and assume the baseline ISA set (SSE2 and below) is available.
874
+ // For this reason, we eschew most gentree convenience methods (e.g. gtNewSimdBinOpNode)
875
+ // and create the HWIntrinsic nodes explicitly, as most helpers assert ISA support.
871
876
872
877
JITDUMP("LowerCast before:\n");
873
878
DISPTREERANGE(BlockRange(), tree);
@@ -904,8 +909,8 @@ void Lowering::LowerCast(GenTree* tree)
904
909
GenTree* zero = comp->gtNewZeroConNode(TYP_SIMD16);
905
910
GenTree* fixupVal =
906
911
comp->gtNewSimdHWIntrinsicNode(TYP_SIMD16, srcVector, zero, maxScalarIntrinsic, srcBaseType, 16);
907
-
908
- GenTree* toScalar = comp->gtNewSimdToScalarNode (srcType, fixupVal, srcBaseType, 16);
912
+ GenTree* toScalar =
913
+ comp->gtNewSimdHWIntrinsicNode (srcType, fixupVal, NI_Vector128_ToScalar , srcBaseType, 16);
909
914
910
915
castRange.InsertAtEnd(zero);
911
916
castRange.InsertAtEnd(fixupVal);
@@ -915,9 +920,6 @@ void Lowering::LowerCast(GenTree* tree)
915
920
}
916
921
else
917
922
{
918
- assert(comp->IsBaselineSimdIsaSupportedDebugOnly());
919
- assert(!TargetArchitecture::Is64Bit || comp->compIsaSupportedDebugOnly(InstructionSet_SSE2_X64));
920
-
921
923
// We need to fix up NaN as well as handle possible overflow. Signed conversions
922
924
// return int/long.MinValue for any overflow, which is correct for saturation of
923
925
// negative, but the result must be replaced with MaxValue for positive overflow.
@@ -953,16 +955,14 @@ void Lowering::LowerCast(GenTree* tree)
953
955
if (srcType == TYP_FLOAT)
954
956
{
955
957
maxFloatSimdVal->f32[0] = 4294967296.0f;
956
- convertIntrinsic = comp->compOpportunisticallyDependsOn(InstructionSet_SSE_X64)
957
- ? NI_SSE_X64_ConvertToInt64WithTruncation
958
- : NI_SSE2_ConvertToVector128Int32WithTruncation;
958
+ convertIntrinsic = TargetArchitecture::Is64Bit ? NI_SSE_X64_ConvertToInt64WithTruncation
959
+ : NI_SSE2_ConvertToVector128Int32WithTruncation;
959
960
}
960
961
else
961
962
{
962
963
maxFloatSimdVal->f64[0] = 4294967296.0;
963
- convertIntrinsic = comp->compOpportunisticallyDependsOn(InstructionSet_SSE2_X64)
964
- ? NI_SSE2_X64_ConvertToInt64WithTruncation
965
- : NI_SSE2_ConvertToVector128Int32WithTruncation;
964
+ convertIntrinsic = TargetArchitecture::Is64Bit ? NI_SSE2_X64_ConvertToInt64WithTruncation
965
+ : NI_SSE2_ConvertToVector128Int32WithTruncation;
966
966
}
967
967
break;
968
968
}
@@ -1023,6 +1023,7 @@ void Lowering::LowerCast(GenTree* tree)
1023
1023
// var fixupVal = Sse.And(srcVec, nanMask);
1024
1024
// convertResult = Sse.ConvertToInt32WithTruncation(fixupVal);
1025
1025
1026
+ NamedIntrinsic andIntrinsic = (srcType == TYP_FLOAT) ? NI_SSE_And : NI_SSE2_And;
1026
1027
NamedIntrinsic compareNaNIntrinsic =
1027
1028
(srcType == TYP_FLOAT) ? NI_SSE_CompareScalarOrdered : NI_SSE2_CompareScalarOrdered;
1028
1029
@@ -1033,8 +1034,9 @@ void Lowering::LowerCast(GenTree* tree)
1033
1034
castRange.InsertAtEnd(srcClone);
1034
1035
castRange.InsertAtEnd(nanMask);
1035
1036
1036
- srcClone = comp->gtClone(srcVector);
1037
- GenTree* fixupVal = comp->gtNewSimdBinOpNode(GT_AND, TYP_SIMD16, nanMask, srcClone, srcBaseType, 16);
1037
+ srcClone = comp->gtClone(srcVector);
1038
+ GenTree* fixupVal =
1039
+ comp->gtNewSimdHWIntrinsicNode(TYP_SIMD16, nanMask, srcClone, andIntrinsic, srcBaseType, 16);
1038
1040
1039
1041
castRange.InsertAtEnd(srcClone);
1040
1042
castRange.InsertAtEnd(fixupVal);
@@ -1120,15 +1122,16 @@ void Lowering::LowerCast(GenTree* tree)
1120
1122
// This creates the equivalent of the following C# code:
1121
1123
// floorVal = ((srcVector.AsUInt64() >>> 21) << 21).AsDouble();
1122
1124
1123
- GenTree* twentyOne = comp->gtNewIconNode(21);
1124
- GenTree* rightShift = comp->gtNewSimdBinOpNode(GT_RSZ, TYP_SIMD16, floorVal, twentyOne,
1125
- CORINFO_TYPE_ULONG, 16);
1125
+ GenTree* twentyOne = comp->gtNewIconNode(21);
1126
+ GenTree* rightShift =
1127
+ comp->gtNewSimdHWIntrinsicNode(TYP_SIMD16, floorVal, twentyOne,
1128
+ NI_SSE2_ShiftRightLogical, CORINFO_TYPE_ULONG, 16);
1126
1129
castRange.InsertAtEnd(twentyOne);
1127
1130
castRange.InsertAtEnd(rightShift);
1128
1131
1129
1132
twentyOne = comp->gtClone(twentyOne);
1130
- floorVal = comp->gtNewSimdBinOpNode(GT_LSH, TYP_SIMD16, rightShift, twentyOne,
1131
- CORINFO_TYPE_ULONG, 16);
1133
+ floorVal = comp->gtNewSimdHWIntrinsicNode( TYP_SIMD16, rightShift, twentyOne,
1134
+ NI_SSE2_ShiftLeftLogical, CORINFO_TYPE_ULONG, 16);
1132
1135
castRange.InsertAtEnd(twentyOne);
1133
1136
castRange.InsertAtEnd(floorVal);
1134
1137
}
@@ -1191,21 +1194,23 @@ void Lowering::LowerCast(GenTree* tree)
1191
1194
1192
1195
GenTree* thirtyOne = comp->gtNewIconNode(31);
1193
1196
GenTree* mask =
1194
- comp->gtNewSimdBinOpNode(GT_RSH, TYP_SIMD16, result, thirtyOne, CORINFO_TYPE_INT, 16);
1197
+ comp->gtNewSimdHWIntrinsicNode(TYP_SIMD16, result, thirtyOne,
1198
+ NI_SSE2_ShiftRightArithmetic, CORINFO_TYPE_INT, 16);
1195
1199
GenTree* andMask =
1196
- comp->gtNewSimdBinOpNode(GT_AND, TYP_SIMD16, mask, negated, dstBaseType, 16);
1200
+ comp->gtNewSimdHWIntrinsicNode( TYP_SIMD16, mask, negated, NI_SSE2_And , dstBaseType, 16);
1197
1201
1198
1202
castRange.InsertAtEnd(thirtyOne);
1199
1203
castRange.InsertAtEnd(mask);
1200
1204
castRange.InsertAtEnd(andMask);
1201
1205
1202
- convertResult =
1203
- comp->gtNewSimdBinOpNode(GT_OR, TYP_SIMD16, andMask, resultClone, dstBaseType, 16);
1206
+ convertResult = comp->gtNewSimdHWIntrinsicNode(TYP_SIMD16, andMask, resultClone, NI_SSE2_Or,
1207
+ dstBaseType, 16);
1204
1208
}
1205
1209
1206
1210
// Because the results are in a SIMD register, we need to ToScalar() them out.
1207
1211
castRange.InsertAtEnd(convertResult);
1208
- convertResult = comp->gtNewSimdToScalarNode(TYP_INT, convertResult, dstBaseType, 16);
1212
+ convertResult = comp->gtNewSimdHWIntrinsicNode(TYP_INT, convertResult, NI_Vector128_ToScalar,
1213
+ dstBaseType, 16);
1209
1214
}
1210
1215
else
1211
1216
{
0 commit comments