@@ -879,56 +879,154 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic,
879879
880880 case NI_VectorT128_op_Multiply:
881881 {
882- assert (baseType == TYP_INT);
883-
884882 NamedIntrinsic hwIntrinsic = NI_Illegal;
883+ GenTree** broadcastOp = nullptr ;
885884
886- if (compOpportunisticallyDependsOn (InstructionSet_SSE41 ))
885+ if (varTypeIsArithmetic (op1-> TypeGet () ))
887886 {
888- hwIntrinsic = NI_SSE41_MultiplyLow ;
887+ broadcastOp = &op1 ;
889888 }
890- else
889+ else if ( varTypeIsArithmetic (op2-> TypeGet ()))
891890 {
892- // op1Dup = op1
893- GenTree* op1Dup;
894- op1 = impCloneExpr (op1, &op1Dup, clsHnd, (unsigned )CHECK_SPILL_ALL,
895- nullptr DEBUGARG (" Clone op1 for Vector<T>.Multiply" ));
891+ broadcastOp = &op2;
892+ }
896893
897- // op2Dup = op2
898- GenTree* op2Dup;
899- op2 = impCloneExpr (op2, &op2Dup, clsHnd, (unsigned )CHECK_SPILL_ALL,
900- nullptr DEBUGARG (" Clone op2 for Vector<T>.Multiply" ));
894+ if (broadcastOp != nullptr )
895+ {
896+ *broadcastOp = gtNewSimdCreateBroadcastNode (simdType, *broadcastOp, baseType, simdSize,
897+ /* isSimdAsHWIntrinsic */ true );
898+ }
901899
902- // op1 = Sse2.ShiftRightLogical128BitLane(op1, 4)
903- op1 = gtNewSimdAsHWIntrinsicNode (retType, op1, gtNewIconNode (4 , TYP_INT),
904- NI_SSE2_ShiftRightLogical128BitLane, baseType, simdSize);
900+ switch (baseType)
901+ {
902+ case TYP_SHORT:
903+ case TYP_USHORT:
904+ {
905+ hwIntrinsic = NI_SSE2_MultiplyLow;
906+ break ;
907+ }
905908
906- // op2 = Sse2.ShiftRightLogical128BitLane(op1, 4)
907- op2 = gtNewSimdAsHWIntrinsicNode (retType, op2, gtNewIconNode (4 , TYP_INT),
908- NI_SSE2_ShiftRightLogical128BitLane, baseType, simdSize);
909+ case TYP_INT:
910+ case TYP_UINT:
911+ {
912+ if (compOpportunisticallyDependsOn (InstructionSet_SSE41))
913+ {
914+ hwIntrinsic = NI_SSE41_MultiplyLow;
915+ }
916+ else
917+ {
918+ // op1Dup = op1
919+ GenTree* op1Dup;
920+ op1 = impCloneExpr (op1, &op1Dup, clsHnd, (unsigned )CHECK_SPILL_ALL,
921+ nullptr DEBUGARG (" Clone op1 for Vector<T>.Multiply" ));
922+
923+ // op2Dup = op2
924+ GenTree* op2Dup;
925+ op2 = impCloneExpr (op2, &op2Dup, clsHnd, (unsigned )CHECK_SPILL_ALL,
926+ nullptr DEBUGARG (" Clone op2 for Vector<T>.Multiply" ));
927+
928+ // op1 = Sse2.ShiftRightLogical128BitLane(op1, 4)
929+ op1 =
930+ gtNewSimdAsHWIntrinsicNode (retType, op1, gtNewIconNode (4 , TYP_INT),
931+ NI_SSE2_ShiftRightLogical128BitLane, baseType, simdSize);
932+
933+ // op2 = Sse2.ShiftRightLogical128BitLane(op1, 4)
934+ op2 =
935+ gtNewSimdAsHWIntrinsicNode (retType, op2, gtNewIconNode (4 , TYP_INT),
936+ NI_SSE2_ShiftRightLogical128BitLane, baseType, simdSize);
937+
938+ // op2 = Sse2.Multiply(op2.AsUInt64(), op1.AsUInt64()).AsInt32()
939+ op2 = gtNewSimdAsHWIntrinsicNode (retType, op2, op1, NI_SSE2_Multiply, TYP_ULONG,
940+ simdSize);
941+
942+ // op2 = Sse2.Shuffle(op2, (0, 0, 2, 0))
943+ op2 = gtNewSimdAsHWIntrinsicNode (retType, op2, gtNewIconNode (SHUFFLE_XXZX, TYP_INT),
944+ NI_SSE2_Shuffle, baseType, simdSize);
945+
946+ // op1 = Sse2.Multiply(op1Dup.AsUInt64(), op2Dup.AsUInt64()).AsInt32()
947+ op1 = gtNewSimdAsHWIntrinsicNode (retType, op1Dup, op2Dup, NI_SSE2_Multiply, TYP_ULONG,
948+ simdSize);
949+
950+ // op1 = Sse2.Shuffle(op1, (0, 0, 2, 0))
951+ op1 = gtNewSimdAsHWIntrinsicNode (retType, op1, gtNewIconNode (SHUFFLE_XXZX, TYP_INT),
952+ NI_SSE2_Shuffle, baseType, simdSize);
953+
954+ // result = Sse2.UnpackLow(op1, op2)
955+ hwIntrinsic = NI_SSE2_UnpackLow;
956+ }
957+ break ;
958+ }
909959
910- // op2 = Sse2.Multiply(op2.AsUInt64(), op1.AsUInt64()).AsInt32()
911- op2 = gtNewSimdAsHWIntrinsicNode (retType, op2, op1, NI_SSE2_Multiply, TYP_ULONG, simdSize);
960+ case TYP_FLOAT:
961+ {
962+ hwIntrinsic = NI_SSE_Multiply;
963+ break ;
964+ }
912965
913- // op2 = Sse2.Shuffle(op2, (0, 0, 2, 0))
914- op2 = gtNewSimdAsHWIntrinsicNode (retType, op2, gtNewIconNode (SHUFFLE_XXZX, TYP_INT),
915- NI_SSE2_Shuffle, baseType, simdSize);
966+ case TYP_DOUBLE:
967+ {
968+ hwIntrinsic = NI_SSE2_Multiply;
969+ break ;
970+ }
916971
917- // op1 = Sse2.Multiply(op1Dup.AsUInt64(), op2Dup.AsUInt64()).AsInt32()
918- op1 =
919- gtNewSimdAsHWIntrinsicNode (retType, op1Dup, op2Dup, NI_SSE2_Multiply, TYP_ULONG, simdSize);
972+ default :
973+ {
974+ unreached ();
975+ }
976+ }
977+
978+ assert (hwIntrinsic != NI_Illegal);
979+ return gtNewSimdAsHWIntrinsicNode (retType, op1, op2, hwIntrinsic, baseType, simdSize);
980+ }
920981
921- // op1 = Sse2.Shuffle(op1, (0, 0, 2, 0))
922- op1 = gtNewSimdAsHWIntrinsicNode (retType, op1, gtNewIconNode (SHUFFLE_XXZX, TYP_INT),
923- NI_SSE2_Shuffle, baseType, simdSize);
982+ case NI_VectorT256_op_Multiply:
983+ {
984+ NamedIntrinsic hwIntrinsic = NI_Illegal;
985+ GenTree** broadcastOp = nullptr ;
924986
925- // result = Sse2.UnpackLow(op1, op2)
926- hwIntrinsic = NI_SSE2_UnpackLow;
987+ if (varTypeIsArithmetic (op1->TypeGet ()))
988+ {
989+ broadcastOp = &op1;
990+ }
991+ else if (varTypeIsArithmetic (op2->TypeGet ()))
992+ {
993+ broadcastOp = &op2;
927994 }
928- assert (hwIntrinsic != NI_Illegal);
929995
996+ if (broadcastOp != nullptr )
997+ {
998+ *broadcastOp = gtNewSimdCreateBroadcastNode (simdType, *broadcastOp, baseType, simdSize,
999+ /* isSimdAsHWIntrinsic */ true );
1000+ }
1001+
1002+ switch (baseType)
1003+ {
1004+ case TYP_SHORT:
1005+ case TYP_USHORT:
1006+ case TYP_INT:
1007+ case TYP_UINT:
1008+ {
1009+ hwIntrinsic = NI_AVX2_MultiplyLow;
1010+ break ;
1011+ }
1012+
1013+ case TYP_FLOAT:
1014+ case TYP_DOUBLE:
1015+ {
1016+ hwIntrinsic = NI_AVX_Multiply;
1017+ break ;
1018+ }
1019+
1020+ default :
1021+ {
1022+ unreached ();
1023+ }
1024+ }
1025+
1026+ assert (hwIntrinsic != NI_Illegal);
9301027 return gtNewSimdAsHWIntrinsicNode (retType, op1, op2, hwIntrinsic, baseType, simdSize);
9311028 }
1029+
9321030#elif defined(TARGET_ARM64)
9331031 case NI_Vector2_CreateBroadcast:
9341032 case NI_Vector3_CreateBroadcast:
@@ -969,6 +1067,83 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic,
9691067 // result = ConditionalSelect(op1, op1Dup, op2Dup)
9701068 return impSimdAsHWIntrinsicCndSel (clsHnd, retType, baseType, simdSize, op1, op1Dup, op2Dup);
9711069 }
1070+
1071+ case NI_VectorT128_op_Multiply:
1072+ {
1073+ NamedIntrinsic hwIntrinsic = NI_Illegal;
1074+ NamedIntrinsic scalarIntrinsic = NI_Illegal;
1075+ GenTree** scalarOp = nullptr ;
1076+
1077+ if (varTypeIsArithmetic (op1->TypeGet ()))
1078+ {
1079+ // MultiplyByScalar requires the scalar op to be op2
1080+ std::swap (op1, op2);
1081+
1082+ scalarOp = &op2;
1083+ }
1084+ else if (varTypeIsArithmetic (op2->TypeGet ()))
1085+ {
1086+ scalarOp = &op2;
1087+ }
1088+
1089+ switch (baseType)
1090+ {
1091+ case TYP_BYTE:
1092+ case TYP_UBYTE:
1093+ {
1094+ if (scalarOp != nullptr )
1095+ {
1096+ *scalarOp = gtNewSimdCreateBroadcastNode (simdType, *scalarOp, baseType, simdSize,
1097+ /* isSimdAsHWIntrinsic */ true );
1098+ }
1099+
1100+ hwIntrinsic = NI_AdvSimd_Multiply;
1101+ break ;
1102+ }
1103+
1104+ case TYP_SHORT:
1105+ case TYP_USHORT:
1106+ case TYP_INT:
1107+ case TYP_UINT:
1108+ case TYP_FLOAT:
1109+ {
1110+ if (scalarOp != nullptr )
1111+ {
1112+ hwIntrinsic = NI_AdvSimd_MultiplyByScalar;
1113+ *scalarOp = gtNewSimdAsHWIntrinsicNode (TYP_SIMD8, *scalarOp,
1114+ NI_Vector64_CreateScalarUnsafe, baseType, 8 );
1115+ }
1116+ else
1117+ {
1118+ hwIntrinsic = NI_AdvSimd_Multiply;
1119+ }
1120+ break ;
1121+ }
1122+
1123+ case TYP_DOUBLE:
1124+ {
1125+ if (scalarOp != nullptr )
1126+ {
1127+ hwIntrinsic = NI_AdvSimd_Arm64_MultiplyByScalar;
1128+ *scalarOp =
1129+ gtNewSimdAsHWIntrinsicNode (TYP_SIMD8, *scalarOp, NI_Vector64_Create, baseType, 8 );
1130+ }
1131+ else
1132+ {
1133+ hwIntrinsic = NI_AdvSimd_Arm64_Multiply;
1134+ }
1135+ break ;
1136+ }
1137+
1138+ default :
1139+ {
1140+ unreached ();
1141+ }
1142+ }
1143+
1144+ assert (hwIntrinsic != NI_Illegal);
1145+ return gtNewSimdAsHWIntrinsicNode (retType, op1, op2, hwIntrinsic, baseType, simdSize);
1146+ }
9721147#else
9731148#error Unsupported platform
9741149#endif // !TARGET_XARCH && !TARGET_ARM64
0 commit comments