Skip to content

Commit 88c0c48

Browse files
Ensure Vector<T>.op_Multiply is handled as an intrinsic in appropriate cases (#49503)
* Ensure Vector<T>.op_Multiply is handled as an intrinsic in appropriate cases * Applying formatting patch * Ensure TYP_BYTE and TYP_UBYTE are handled for Vector<T>.op_Multiply on ARM64 * Ensure broadcast nodes are inserted for all `operator *(Vector<T>, T)` * Ensure ARM64 uses MultiplyByScalar when its available * Applying formatting patch * Ensure the scalar for op_Multiply is op2 on ARM64 * Ensure we do a full multiply for `Vector<T> * Vector<T>` on ARM64 * Applying formatting patch
1 parent c1c65b2 commit 88c0c48

File tree

4 files changed

+214
-38
lines changed

4 files changed

+214
-38
lines changed

src/coreclr/jit/simdashwintrinsic.cpp

Lines changed: 208 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -879,56 +879,154 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic,
879879

880880
case NI_VectorT128_op_Multiply:
881881
{
882-
assert(baseType == TYP_INT);
883-
884882
NamedIntrinsic hwIntrinsic = NI_Illegal;
883+
GenTree** broadcastOp = nullptr;
885884

886-
if (compOpportunisticallyDependsOn(InstructionSet_SSE41))
885+
if (varTypeIsArithmetic(op1->TypeGet()))
887886
{
888-
hwIntrinsic = NI_SSE41_MultiplyLow;
887+
broadcastOp = &op1;
889888
}
890-
else
889+
else if (varTypeIsArithmetic(op2->TypeGet()))
891890
{
892-
// op1Dup = op1
893-
GenTree* op1Dup;
894-
op1 = impCloneExpr(op1, &op1Dup, clsHnd, (unsigned)CHECK_SPILL_ALL,
895-
nullptr DEBUGARG("Clone op1 for Vector<T>.Multiply"));
891+
broadcastOp = &op2;
892+
}
896893

897-
// op2Dup = op2
898-
GenTree* op2Dup;
899-
op2 = impCloneExpr(op2, &op2Dup, clsHnd, (unsigned)CHECK_SPILL_ALL,
900-
nullptr DEBUGARG("Clone op2 for Vector<T>.Multiply"));
894+
if (broadcastOp != nullptr)
895+
{
896+
*broadcastOp = gtNewSimdCreateBroadcastNode(simdType, *broadcastOp, baseType, simdSize,
897+
/* isSimdAsHWIntrinsic */ true);
898+
}
901899

902-
// op1 = Sse2.ShiftRightLogical128BitLane(op1, 4)
903-
op1 = gtNewSimdAsHWIntrinsicNode(retType, op1, gtNewIconNode(4, TYP_INT),
904-
NI_SSE2_ShiftRightLogical128BitLane, baseType, simdSize);
900+
switch (baseType)
901+
{
902+
case TYP_SHORT:
903+
case TYP_USHORT:
904+
{
905+
hwIntrinsic = NI_SSE2_MultiplyLow;
906+
break;
907+
}
905908

906-
// op2 = Sse2.ShiftRightLogical128BitLane(op1, 4)
907-
op2 = gtNewSimdAsHWIntrinsicNode(retType, op2, gtNewIconNode(4, TYP_INT),
908-
NI_SSE2_ShiftRightLogical128BitLane, baseType, simdSize);
909+
case TYP_INT:
910+
case TYP_UINT:
911+
{
912+
if (compOpportunisticallyDependsOn(InstructionSet_SSE41))
913+
{
914+
hwIntrinsic = NI_SSE41_MultiplyLow;
915+
}
916+
else
917+
{
918+
// op1Dup = op1
919+
GenTree* op1Dup;
920+
op1 = impCloneExpr(op1, &op1Dup, clsHnd, (unsigned)CHECK_SPILL_ALL,
921+
nullptr DEBUGARG("Clone op1 for Vector<T>.Multiply"));
922+
923+
// op2Dup = op2
924+
GenTree* op2Dup;
925+
op2 = impCloneExpr(op2, &op2Dup, clsHnd, (unsigned)CHECK_SPILL_ALL,
926+
nullptr DEBUGARG("Clone op2 for Vector<T>.Multiply"));
927+
928+
// op1 = Sse2.ShiftRightLogical128BitLane(op1, 4)
929+
op1 =
930+
gtNewSimdAsHWIntrinsicNode(retType, op1, gtNewIconNode(4, TYP_INT),
931+
NI_SSE2_ShiftRightLogical128BitLane, baseType, simdSize);
932+
933+
// op2 = Sse2.ShiftRightLogical128BitLane(op1, 4)
934+
op2 =
935+
gtNewSimdAsHWIntrinsicNode(retType, op2, gtNewIconNode(4, TYP_INT),
936+
NI_SSE2_ShiftRightLogical128BitLane, baseType, simdSize);
937+
938+
// op2 = Sse2.Multiply(op2.AsUInt64(), op1.AsUInt64()).AsInt32()
939+
op2 = gtNewSimdAsHWIntrinsicNode(retType, op2, op1, NI_SSE2_Multiply, TYP_ULONG,
940+
simdSize);
941+
942+
// op2 = Sse2.Shuffle(op2, (0, 0, 2, 0))
943+
op2 = gtNewSimdAsHWIntrinsicNode(retType, op2, gtNewIconNode(SHUFFLE_XXZX, TYP_INT),
944+
NI_SSE2_Shuffle, baseType, simdSize);
945+
946+
// op1 = Sse2.Multiply(op1Dup.AsUInt64(), op2Dup.AsUInt64()).AsInt32()
947+
op1 = gtNewSimdAsHWIntrinsicNode(retType, op1Dup, op2Dup, NI_SSE2_Multiply, TYP_ULONG,
948+
simdSize);
949+
950+
// op1 = Sse2.Shuffle(op1, (0, 0, 2, 0))
951+
op1 = gtNewSimdAsHWIntrinsicNode(retType, op1, gtNewIconNode(SHUFFLE_XXZX, TYP_INT),
952+
NI_SSE2_Shuffle, baseType, simdSize);
953+
954+
// result = Sse2.UnpackLow(op1, op2)
955+
hwIntrinsic = NI_SSE2_UnpackLow;
956+
}
957+
break;
958+
}
909959

910-
// op2 = Sse2.Multiply(op2.AsUInt64(), op1.AsUInt64()).AsInt32()
911-
op2 = gtNewSimdAsHWIntrinsicNode(retType, op2, op1, NI_SSE2_Multiply, TYP_ULONG, simdSize);
960+
case TYP_FLOAT:
961+
{
962+
hwIntrinsic = NI_SSE_Multiply;
963+
break;
964+
}
912965

913-
// op2 = Sse2.Shuffle(op2, (0, 0, 2, 0))
914-
op2 = gtNewSimdAsHWIntrinsicNode(retType, op2, gtNewIconNode(SHUFFLE_XXZX, TYP_INT),
915-
NI_SSE2_Shuffle, baseType, simdSize);
966+
case TYP_DOUBLE:
967+
{
968+
hwIntrinsic = NI_SSE2_Multiply;
969+
break;
970+
}
916971

917-
// op1 = Sse2.Multiply(op1Dup.AsUInt64(), op2Dup.AsUInt64()).AsInt32()
918-
op1 =
919-
gtNewSimdAsHWIntrinsicNode(retType, op1Dup, op2Dup, NI_SSE2_Multiply, TYP_ULONG, simdSize);
972+
default:
973+
{
974+
unreached();
975+
}
976+
}
977+
978+
assert(hwIntrinsic != NI_Illegal);
979+
return gtNewSimdAsHWIntrinsicNode(retType, op1, op2, hwIntrinsic, baseType, simdSize);
980+
}
920981

921-
// op1 = Sse2.Shuffle(op1, (0, 0, 2, 0))
922-
op1 = gtNewSimdAsHWIntrinsicNode(retType, op1, gtNewIconNode(SHUFFLE_XXZX, TYP_INT),
923-
NI_SSE2_Shuffle, baseType, simdSize);
982+
case NI_VectorT256_op_Multiply:
983+
{
984+
NamedIntrinsic hwIntrinsic = NI_Illegal;
985+
GenTree** broadcastOp = nullptr;
924986

925-
// result = Sse2.UnpackLow(op1, op2)
926-
hwIntrinsic = NI_SSE2_UnpackLow;
987+
if (varTypeIsArithmetic(op1->TypeGet()))
988+
{
989+
broadcastOp = &op1;
990+
}
991+
else if (varTypeIsArithmetic(op2->TypeGet()))
992+
{
993+
broadcastOp = &op2;
927994
}
928-
assert(hwIntrinsic != NI_Illegal);
929995

996+
if (broadcastOp != nullptr)
997+
{
998+
*broadcastOp = gtNewSimdCreateBroadcastNode(simdType, *broadcastOp, baseType, simdSize,
999+
/* isSimdAsHWIntrinsic */ true);
1000+
}
1001+
1002+
switch (baseType)
1003+
{
1004+
case TYP_SHORT:
1005+
case TYP_USHORT:
1006+
case TYP_INT:
1007+
case TYP_UINT:
1008+
{
1009+
hwIntrinsic = NI_AVX2_MultiplyLow;
1010+
break;
1011+
}
1012+
1013+
case TYP_FLOAT:
1014+
case TYP_DOUBLE:
1015+
{
1016+
hwIntrinsic = NI_AVX_Multiply;
1017+
break;
1018+
}
1019+
1020+
default:
1021+
{
1022+
unreached();
1023+
}
1024+
}
1025+
1026+
assert(hwIntrinsic != NI_Illegal);
9301027
return gtNewSimdAsHWIntrinsicNode(retType, op1, op2, hwIntrinsic, baseType, simdSize);
9311028
}
1029+
9321030
#elif defined(TARGET_ARM64)
9331031
case NI_Vector2_CreateBroadcast:
9341032
case NI_Vector3_CreateBroadcast:
@@ -969,6 +1067,83 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic,
9691067
// result = ConditionalSelect(op1, op1Dup, op2Dup)
9701068
return impSimdAsHWIntrinsicCndSel(clsHnd, retType, baseType, simdSize, op1, op1Dup, op2Dup);
9711069
}
1070+
1071+
case NI_VectorT128_op_Multiply:
1072+
{
1073+
NamedIntrinsic hwIntrinsic = NI_Illegal;
1074+
NamedIntrinsic scalarIntrinsic = NI_Illegal;
1075+
GenTree** scalarOp = nullptr;
1076+
1077+
if (varTypeIsArithmetic(op1->TypeGet()))
1078+
{
1079+
// MultiplyByScalar requires the scalar op to be op2
1080+
std::swap(op1, op2);
1081+
1082+
scalarOp = &op2;
1083+
}
1084+
else if (varTypeIsArithmetic(op2->TypeGet()))
1085+
{
1086+
scalarOp = &op2;
1087+
}
1088+
1089+
switch (baseType)
1090+
{
1091+
case TYP_BYTE:
1092+
case TYP_UBYTE:
1093+
{
1094+
if (scalarOp != nullptr)
1095+
{
1096+
*scalarOp = gtNewSimdCreateBroadcastNode(simdType, *scalarOp, baseType, simdSize,
1097+
/* isSimdAsHWIntrinsic */ true);
1098+
}
1099+
1100+
hwIntrinsic = NI_AdvSimd_Multiply;
1101+
break;
1102+
}
1103+
1104+
case TYP_SHORT:
1105+
case TYP_USHORT:
1106+
case TYP_INT:
1107+
case TYP_UINT:
1108+
case TYP_FLOAT:
1109+
{
1110+
if (scalarOp != nullptr)
1111+
{
1112+
hwIntrinsic = NI_AdvSimd_MultiplyByScalar;
1113+
*scalarOp = gtNewSimdAsHWIntrinsicNode(TYP_SIMD8, *scalarOp,
1114+
NI_Vector64_CreateScalarUnsafe, baseType, 8);
1115+
}
1116+
else
1117+
{
1118+
hwIntrinsic = NI_AdvSimd_Multiply;
1119+
}
1120+
break;
1121+
}
1122+
1123+
case TYP_DOUBLE:
1124+
{
1125+
if (scalarOp != nullptr)
1126+
{
1127+
hwIntrinsic = NI_AdvSimd_Arm64_MultiplyByScalar;
1128+
*scalarOp =
1129+
gtNewSimdAsHWIntrinsicNode(TYP_SIMD8, *scalarOp, NI_Vector64_Create, baseType, 8);
1130+
}
1131+
else
1132+
{
1133+
hwIntrinsic = NI_AdvSimd_Arm64_Multiply;
1134+
}
1135+
break;
1136+
}
1137+
1138+
default:
1139+
{
1140+
unreached();
1141+
}
1142+
}
1143+
1144+
assert(hwIntrinsic != NI_Illegal);
1145+
return gtNewSimdAsHWIntrinsicNode(retType, op1, op2, hwIntrinsic, baseType, simdSize);
1146+
}
9721147
#else
9731148
#error Unsupported platform
9741149
#endif // !TARGET_XARCH && !TARGET_ARM64

src/coreclr/jit/simdashwintrinsiclistarm64.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,7 @@ SIMD_AS_HWINTRINSIC_ID(VectorT128, op_Equality,
128128
SIMD_AS_HWINTRINSIC_ID(VectorT128, op_ExclusiveOr, 2, {NI_AdvSimd_Xor, NI_AdvSimd_Xor, NI_AdvSimd_Xor, NI_AdvSimd_Xor, NI_AdvSimd_Xor, NI_AdvSimd_Xor, NI_AdvSimd_Xor, NI_AdvSimd_Xor, NI_AdvSimd_Xor, NI_AdvSimd_Xor}, SimdAsHWIntrinsicFlag::None)
129129
SIMD_AS_HWINTRINSIC_ID(VectorT128, op_Explicit, 1, {NI_VectorT128_op_Explicit, NI_VectorT128_op_Explicit, NI_VectorT128_op_Explicit, NI_VectorT128_op_Explicit, NI_VectorT128_op_Explicit, NI_VectorT128_op_Explicit, NI_VectorT128_op_Explicit, NI_VectorT128_op_Explicit, NI_VectorT128_op_Explicit, NI_VectorT128_op_Explicit}, SimdAsHWIntrinsicFlag::None)
130130
SIMD_AS_HWINTRINSIC_ID(VectorT128, op_Inequality, 2, {NI_Vector128_op_Inequality, NI_Vector128_op_Inequality, NI_Vector128_op_Inequality, NI_Vector128_op_Inequality, NI_Vector128_op_Inequality, NI_Vector128_op_Inequality, NI_Vector128_op_Inequality, NI_Vector128_op_Inequality, NI_Vector128_op_Inequality, NI_Vector128_op_Inequality}, SimdAsHWIntrinsicFlag::None)
131-
SIMD_AS_HWINTRINSIC_ID(VectorT128, op_Multiply, 2, {NI_AdvSimd_Multiply, NI_AdvSimd_Multiply, NI_AdvSimd_Multiply, NI_AdvSimd_Multiply, NI_AdvSimd_Multiply, NI_AdvSimd_Multiply, NI_Illegal, NI_Illegal, NI_AdvSimd_Multiply, NI_AdvSimd_Arm64_Multiply}, SimdAsHWIntrinsicFlag::None)
131+
SIMD_AS_HWINTRINSIC_ID(VectorT128, op_Multiply, 2, {NI_VectorT128_op_Multiply, NI_VectorT128_op_Multiply, NI_VectorT128_op_Multiply, NI_VectorT128_op_Multiply, NI_VectorT128_op_Multiply, NI_VectorT128_op_Multiply, NI_Illegal, NI_Illegal, NI_VectorT128_op_Multiply, NI_VectorT128_op_Multiply}, SimdAsHWIntrinsicFlag::None)
132132
SIMD_AS_HWINTRINSIC_ID(VectorT128, op_Subtraction, 2, {NI_AdvSimd_Subtract, NI_AdvSimd_Subtract, NI_AdvSimd_Subtract, NI_AdvSimd_Subtract, NI_AdvSimd_Subtract, NI_AdvSimd_Subtract, NI_AdvSimd_Subtract, NI_AdvSimd_Subtract, NI_AdvSimd_Subtract, NI_AdvSimd_Arm64_Subtract}, SimdAsHWIntrinsicFlag::None)
133133
SIMD_AS_HWINTRINSIC_ID(VectorT128, SquareRoot, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Arm64_Sqrt, NI_AdvSimd_Arm64_Sqrt}, SimdAsHWIntrinsicFlag::None)
134134

0 commit comments

Comments
 (0)