@@ -893,17 +893,21 @@ void C2_MacroAssembler::stringL_indexof_char(Register str1, Register cnt1,
893893// Compare strings, used for char[] and byte[].
894894void C2_MacroAssembler::string_compare (Register str1, Register str2,
895895 Register cnt1, Register cnt2, Register result,
896- int ae, Register tmp1, Register tmp2) {
897- Label L, Loop, LoopEnd, HaveResult, Done;
896+ int ae, Register tmp1, Register tmp2,
897+ FloatRegister vtmp1, FloatRegister vtmp2) {
898+ Label L, Loop, LoopEnd, HaveResult, Done, Loop_Start,
899+ V_L, V_Loop, V_Result, V_Start;
898900
899901 bool isLL = ae == StrIntrinsicNode::LL;
900902 bool isLU = ae == StrIntrinsicNode::LU;
901903 bool isUL = ae == StrIntrinsicNode::UL;
904+ bool isUU = ae == StrIntrinsicNode::UU;
902905
903906 bool str1_isL = isLL || isLU;
904907 bool str2_isL = isLL || isUL;
905908
906909 int charsInWord = isLL ? wordSize : wordSize/2 ;
910+ int charsInFloatRegister = (UseLASX && (isLL||isUU))?(isLL? 32 : 16 ):(isLL? 16 : 8 );
907911
908912 if (!str1_isL) srli_w (cnt1, cnt1, 1 );
909913 if (!str2_isL) srli_w (cnt2, cnt2, 1 );
@@ -912,10 +916,190 @@ void C2_MacroAssembler::string_compare(Register str1, Register str2,
912916 sub_d (result, cnt1, cnt2); // result holds the difference of two lengths
913917
914918 // compute the shorter length (in cnt1)
915- ori (AT, R0, charsInWord);
916- bge (cnt2, cnt1, Loop);
919+ bge (cnt2, cnt1, V_Start);
917920 move (cnt1, cnt2);
918921
922+ bind (V_Start);
923+ // it is hard to apply the xvilvl to flate 16 bytes into 32 bytes,
924+ // so we employ the LASX only for the LL or UU StrIntrinsicNode.
925+ if (UseLASX && (isLL || isUU)) {
926+ ori (AT, R0, charsInFloatRegister);
927+ addi_d (tmp1, R0, 16 );
928+ xvinsgr2vr_d (fscratch, R0, 0 );
929+ xvinsgr2vr_d (fscratch, tmp1, 2 );
930+ bind (V_Loop);
931+ blt (cnt1, AT, Loop_Start);
932+ if (isLL) {
933+ xvld (vtmp1, str1, 0 );
934+ xvld (vtmp2, str2, 0 );
935+ xvxor_v (vtmp1, vtmp1, vtmp2);
936+ xvseteqz_v (FCC0, vtmp1);
937+ bceqz (FCC0, V_L);
938+
939+ addi_d (str1, str1, 32 );
940+ addi_d (str2, str2, 32 );
941+ addi_d (cnt1, cnt1, -charsInFloatRegister);
942+ b (V_Loop);
943+
944+ bind (V_L);
945+ xvxor_v (vtmp2, vtmp2, vtmp2);
946+ xvabsd_b (vtmp1, vtmp1, vtmp2);
947+ xvneg_b (vtmp1, vtmp1);
948+ xvfrstp_b (vtmp2, vtmp1, fscratch);
949+ xvpickve2gr_du (tmp1, vtmp2, 0 );
950+ addi_d (cnt2, R0, 16 );
951+ bne (tmp1, cnt2, V_Result);
952+
953+ xvpickve2gr_du (tmp1, vtmp2, 2 );
954+ addi_d (tmp1, tmp1, 16 );
955+
956+ // the index value was stored in tmp1
957+ bind (V_Result);
958+ ldx_bu (result, str1, tmp1);
959+ ldx_bu (tmp2, str2, tmp1);
960+ sub_d (result, result, tmp2);
961+ b (Done);
962+ } else if (isUU) {
963+ xvld (vtmp1, str1, 0 );
964+ xvld (vtmp2, str2, 0 );
965+ xvxor_v (vtmp1, vtmp1, vtmp2);
966+ xvseteqz_v (FCC0, vtmp1);
967+ bceqz (FCC0, V_L);
968+
969+ addi_d (str1, str1, 32 );
970+ addi_d (str2, str2, 32 );
971+ addi_d (cnt1, cnt1, -charsInFloatRegister);
972+ b (V_Loop);
973+
974+ bind (V_L);
975+ xvxor_v (vtmp2, vtmp2, vtmp2);
976+ xvabsd_h (vtmp1, vtmp1, vtmp2);
977+ xvneg_h (vtmp1, vtmp1);
978+ xvfrstp_h (vtmp2, vtmp1, fscratch);
979+ xvpickve2gr_du (tmp1, vtmp2, 0 );
980+ addi_d (cnt2, R0, 8 );
981+ bne (tmp1, cnt2, V_Result);
982+
983+ xvpickve2gr_du (tmp1, vtmp2, 2 );
984+ addi_d (tmp1, tmp1, 8 );
985+
986+ // the index value was stored in tmp1
987+ bind (V_Result);
988+ slli_d (tmp1, tmp1, 1 );
989+ ldx_hu (result, str1, tmp1);
990+ ldx_hu (tmp2, str2, tmp1);
991+ sub_d (result, result, tmp2);
992+ b (Done);
993+ }
994+ } else if (UseLSX) {
995+ ori (AT, R0, charsInFloatRegister);
996+ vxor_v (fscratch, fscratch, fscratch);
997+ bind (V_Loop);
998+ blt (cnt1, AT, Loop_Start);
999+ if (isLL) {
1000+ vld (vtmp1, str1, 0 );
1001+ vld (vtmp2, str2, 0 );
1002+ vxor_v (vtmp1, vtmp1, vtmp2);
1003+ vseteqz_v (FCC0, vtmp1);
1004+ bceqz (FCC0, V_L);
1005+
1006+ addi_d (str1, str1, 16 );
1007+ addi_d (str2, str2, 16 );
1008+ addi_d (cnt1, cnt1, -charsInFloatRegister);
1009+ b (V_Loop);
1010+
1011+ bind (V_L);
1012+ vxor_v (vtmp2, vtmp2, vtmp2);
1013+ vabsd_b (vtmp1, vtmp1, vtmp2);
1014+ vneg_b (vtmp1, vtmp1);
1015+ vfrstpi_b (vtmp2, vtmp1, 0 );
1016+ vpickve2gr_bu (tmp1, vtmp2, 0 );
1017+
1018+ // the index value was stored in tmp1
1019+ ldx_bu (result, str1, tmp1);
1020+ ldx_bu (tmp2, str2, tmp1);
1021+ sub_d (result, result, tmp2);
1022+ b (Done);
1023+ } else if (isLU) {
1024+ vld (vtmp1, str1, 0 );
1025+ vld (vtmp2, str2, 0 );
1026+ vilvl_b (vtmp1, fscratch, vtmp1);
1027+ vxor_v (vtmp1, vtmp1, vtmp2);
1028+ vseteqz_v (FCC0, vtmp1);
1029+ bceqz (FCC0, V_L);
1030+
1031+ addi_d (str1, str1, 8 );
1032+ addi_d (str2, str2, 16 );
1033+ addi_d (cnt1, cnt1, -charsInFloatRegister);
1034+ b (V_Loop);
1035+
1036+ bind (V_L);
1037+ vxor_v (vtmp2, vtmp2, vtmp2);
1038+ vabsd_h (vtmp1, vtmp1, vtmp2);
1039+ vneg_h (vtmp1, vtmp1);
1040+ vfrstpi_h (vtmp2, vtmp1, 0 );
1041+ vpickve2gr_bu (tmp1, vtmp2, 0 );
1042+
1043+ // the index value was stored in tmp1
1044+ ldx_bu (result, str1, tmp1);
1045+ slli_d (tmp1, tmp1, 1 );
1046+ ldx_hu (tmp2, str2, tmp1);
1047+ sub_d (result, result, tmp2);
1048+ b (Done);
1049+ } else if (isUL) {
1050+ vld (vtmp1, str1, 0 );
1051+ vld (vtmp2, str2, 0 );
1052+ vilvl_b (vtmp2, fscratch, vtmp2);
1053+ vxor_v (vtmp1, vtmp1, vtmp2);
1054+ vseteqz_v (FCC0, vtmp1);
1055+ bceqz (FCC0, V_L);
1056+
1057+ addi_d (str1, str1, 16 );
1058+ addi_d (str2, str2, 8 );
1059+ addi_d (cnt1, cnt1, -charsInFloatRegister);
1060+ b (V_Loop);
1061+
1062+ bind (V_L);
1063+ vxor_v (vtmp2, vtmp2, vtmp2);
1064+ vabsd_h (vtmp1, vtmp1, vtmp2);
1065+ vneg_h (vtmp1, vtmp1);
1066+ vfrstpi_h (vtmp2, vtmp1, 0 );
1067+ vpickve2gr_bu (tmp1, vtmp2, 0 );
1068+
1069+ // the index value was stored in tmp1
1070+ ldx_bu (tmp2, str2, tmp1);
1071+ slli_d (tmp1, tmp1, 1 );
1072+ ldx_hu (result, str1, tmp1);
1073+ sub_d (result, result, tmp2);
1074+ b (Done);
1075+ } else if (isUU) {
1076+ vld (vtmp1, str1, 0 );
1077+ vld (vtmp2, str2, 0 );
1078+ vxor_v (vtmp1, vtmp1, vtmp2);
1079+ vseteqz_v (FCC0, vtmp1);
1080+ bceqz (FCC0, V_L);
1081+
1082+ addi_d (str1, str1, 16 );
1083+ addi_d (str2, str2, 16 );
1084+ addi_d (cnt1, cnt1, -charsInFloatRegister);
1085+ b (V_Loop);
1086+
1087+ bind (V_L);
1088+ vxor_v (vtmp2, vtmp2, vtmp2);
1089+ vabsd_h (vtmp1, vtmp1, vtmp2);
1090+ vneg_h (vtmp1, vtmp1);
1091+ vfrstpi_h (vtmp2, vtmp1, 0 );
1092+ vpickve2gr_bu (tmp1, vtmp2, 0 );
1093+
1094+ // the index value was stored in tmp1
1095+ slli_d (tmp1, tmp1, 1 );
1096+ ldx_hu (result, str1, tmp1);
1097+ ldx_hu (tmp2, str2, tmp1);
1098+ sub_d (result, result, tmp2);
1099+ b (Done);
1100+ }
1101+ }
1102+
9191103 // Now the shorter length is in cnt1 and cnt2 can be used as a tmp register
9201104 //
9211105 // For example:
@@ -929,6 +1113,9 @@ void C2_MacroAssembler::string_compare(Register str1, Register str2,
9291113 //
9301114 // Fetch 0 to 7 bits of tmp1 and tmp2, subtract to get the result.
9311115 // Other types are similar to isLL.
1116+
1117+ bind (Loop_Start);
1118+ ori (AT, R0, charsInWord);
9321119 bind (Loop);
9331120 blt (cnt1, AT, LoopEnd);
9341121 if (isLL) {
0 commit comments