Skip to content

Commit 8ff0b92

Browse files
authored
Update (2023.12.11, 4th)
33080: Fix byte_array_inflate vld is out of range 33031: Refresh the CountPositives macro 33068: Vectorize StringCompareToDifferentLength 24527: Fix a typo for invokeinterface in 8604 32871: Supplement 32674
1 parent f542478 commit 8ff0b92

File tree

8 files changed

+312
-121
lines changed

8 files changed

+312
-121
lines changed

src/hotspot/cpu/loongarch/c2_MacroAssembler_loongarch.cpp

Lines changed: 191 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -893,17 +893,21 @@ void C2_MacroAssembler::stringL_indexof_char(Register str1, Register cnt1,
893893
// Compare strings, used for char[] and byte[].
894894
void C2_MacroAssembler::string_compare(Register str1, Register str2,
895895
Register cnt1, Register cnt2, Register result,
896-
int ae, Register tmp1, Register tmp2) {
897-
Label L, Loop, LoopEnd, HaveResult, Done;
896+
int ae, Register tmp1, Register tmp2,
897+
FloatRegister vtmp1, FloatRegister vtmp2) {
898+
Label L, Loop, LoopEnd, HaveResult, Done, Loop_Start,
899+
V_L, V_Loop, V_Result, V_Start;
898900

899901
bool isLL = ae == StrIntrinsicNode::LL;
900902
bool isLU = ae == StrIntrinsicNode::LU;
901903
bool isUL = ae == StrIntrinsicNode::UL;
904+
bool isUU = ae == StrIntrinsicNode::UU;
902905

903906
bool str1_isL = isLL || isLU;
904907
bool str2_isL = isLL || isUL;
905908

906909
int charsInWord = isLL ? wordSize : wordSize/2;
910+
int charsInFloatRegister = (UseLASX && (isLL||isUU))?(isLL? 32 : 16):(isLL? 16 : 8);
907911

908912
if (!str1_isL) srli_w(cnt1, cnt1, 1);
909913
if (!str2_isL) srli_w(cnt2, cnt2, 1);
@@ -912,10 +916,190 @@ void C2_MacroAssembler::string_compare(Register str1, Register str2,
912916
sub_d(result, cnt1, cnt2); // result holds the difference of two lengths
913917

914918
// compute the shorter length (in cnt1)
915-
ori(AT, R0, charsInWord);
916-
bge(cnt2, cnt1, Loop);
919+
bge(cnt2, cnt1, V_Start);
917920
move(cnt1, cnt2);
918921

922+
bind(V_Start);
923+
// it is hard to apply the xvilvl to flate 16 bytes into 32 bytes,
924+
// so we employ the LASX only for the LL or UU StrIntrinsicNode.
925+
if (UseLASX && (isLL || isUU)) {
926+
ori(AT, R0, charsInFloatRegister);
927+
addi_d(tmp1, R0, 16);
928+
xvinsgr2vr_d(fscratch, R0, 0);
929+
xvinsgr2vr_d(fscratch, tmp1, 2);
930+
bind(V_Loop);
931+
blt(cnt1, AT, Loop_Start);
932+
if (isLL) {
933+
xvld(vtmp1, str1, 0);
934+
xvld(vtmp2, str2, 0);
935+
xvxor_v(vtmp1, vtmp1, vtmp2);
936+
xvseteqz_v(FCC0, vtmp1);
937+
bceqz(FCC0, V_L);
938+
939+
addi_d(str1, str1, 32);
940+
addi_d(str2, str2, 32);
941+
addi_d(cnt1, cnt1, -charsInFloatRegister);
942+
b(V_Loop);
943+
944+
bind(V_L);
945+
xvxor_v(vtmp2, vtmp2, vtmp2);
946+
xvabsd_b(vtmp1, vtmp1, vtmp2);
947+
xvneg_b(vtmp1, vtmp1);
948+
xvfrstp_b(vtmp2, vtmp1, fscratch);
949+
xvpickve2gr_du(tmp1, vtmp2, 0);
950+
addi_d(cnt2, R0, 16);
951+
bne(tmp1, cnt2, V_Result);
952+
953+
xvpickve2gr_du(tmp1, vtmp2, 2);
954+
addi_d(tmp1, tmp1, 16);
955+
956+
// the index value was stored in tmp1
957+
bind(V_Result);
958+
ldx_bu(result, str1, tmp1);
959+
ldx_bu(tmp2, str2, tmp1);
960+
sub_d(result, result, tmp2);
961+
b(Done);
962+
} else if (isUU) {
963+
xvld(vtmp1, str1, 0);
964+
xvld(vtmp2, str2, 0);
965+
xvxor_v(vtmp1, vtmp1, vtmp2);
966+
xvseteqz_v(FCC0, vtmp1);
967+
bceqz(FCC0, V_L);
968+
969+
addi_d(str1, str1, 32);
970+
addi_d(str2, str2, 32);
971+
addi_d(cnt1, cnt1, -charsInFloatRegister);
972+
b(V_Loop);
973+
974+
bind(V_L);
975+
xvxor_v(vtmp2, vtmp2, vtmp2);
976+
xvabsd_h(vtmp1, vtmp1, vtmp2);
977+
xvneg_h(vtmp1, vtmp1);
978+
xvfrstp_h(vtmp2, vtmp1, fscratch);
979+
xvpickve2gr_du(tmp1, vtmp2, 0);
980+
addi_d(cnt2, R0, 8);
981+
bne(tmp1, cnt2, V_Result);
982+
983+
xvpickve2gr_du(tmp1, vtmp2, 2);
984+
addi_d(tmp1, tmp1, 8);
985+
986+
// the index value was stored in tmp1
987+
bind(V_Result);
988+
slli_d(tmp1, tmp1, 1);
989+
ldx_hu(result, str1, tmp1);
990+
ldx_hu(tmp2, str2, tmp1);
991+
sub_d(result, result, tmp2);
992+
b(Done);
993+
}
994+
} else if (UseLSX) {
995+
ori(AT, R0, charsInFloatRegister);
996+
vxor_v(fscratch, fscratch, fscratch);
997+
bind(V_Loop);
998+
blt(cnt1, AT, Loop_Start);
999+
if (isLL) {
1000+
vld(vtmp1, str1, 0);
1001+
vld(vtmp2, str2, 0);
1002+
vxor_v(vtmp1, vtmp1, vtmp2);
1003+
vseteqz_v(FCC0, vtmp1);
1004+
bceqz(FCC0, V_L);
1005+
1006+
addi_d(str1, str1, 16);
1007+
addi_d(str2, str2, 16);
1008+
addi_d(cnt1, cnt1, -charsInFloatRegister);
1009+
b(V_Loop);
1010+
1011+
bind(V_L);
1012+
vxor_v(vtmp2, vtmp2, vtmp2);
1013+
vabsd_b(vtmp1, vtmp1, vtmp2);
1014+
vneg_b(vtmp1, vtmp1);
1015+
vfrstpi_b(vtmp2, vtmp1, 0);
1016+
vpickve2gr_bu(tmp1, vtmp2, 0);
1017+
1018+
// the index value was stored in tmp1
1019+
ldx_bu(result, str1, tmp1);
1020+
ldx_bu(tmp2, str2, tmp1);
1021+
sub_d(result, result, tmp2);
1022+
b(Done);
1023+
} else if (isLU) {
1024+
vld(vtmp1, str1, 0);
1025+
vld(vtmp2, str2, 0);
1026+
vilvl_b(vtmp1, fscratch, vtmp1);
1027+
vxor_v(vtmp1, vtmp1, vtmp2);
1028+
vseteqz_v(FCC0, vtmp1);
1029+
bceqz(FCC0, V_L);
1030+
1031+
addi_d(str1, str1, 8);
1032+
addi_d(str2, str2, 16);
1033+
addi_d(cnt1, cnt1, -charsInFloatRegister);
1034+
b(V_Loop);
1035+
1036+
bind(V_L);
1037+
vxor_v(vtmp2, vtmp2, vtmp2);
1038+
vabsd_h(vtmp1, vtmp1, vtmp2);
1039+
vneg_h(vtmp1, vtmp1);
1040+
vfrstpi_h(vtmp2, vtmp1, 0);
1041+
vpickve2gr_bu(tmp1, vtmp2, 0);
1042+
1043+
// the index value was stored in tmp1
1044+
ldx_bu(result, str1, tmp1);
1045+
slli_d(tmp1, tmp1, 1);
1046+
ldx_hu(tmp2, str2, tmp1);
1047+
sub_d(result, result, tmp2);
1048+
b(Done);
1049+
} else if (isUL) {
1050+
vld(vtmp1, str1, 0);
1051+
vld(vtmp2, str2, 0);
1052+
vilvl_b(vtmp2, fscratch, vtmp2);
1053+
vxor_v(vtmp1, vtmp1, vtmp2);
1054+
vseteqz_v(FCC0, vtmp1);
1055+
bceqz(FCC0, V_L);
1056+
1057+
addi_d(str1, str1, 16);
1058+
addi_d(str2, str2, 8);
1059+
addi_d(cnt1, cnt1, -charsInFloatRegister);
1060+
b(V_Loop);
1061+
1062+
bind(V_L);
1063+
vxor_v(vtmp2, vtmp2, vtmp2);
1064+
vabsd_h(vtmp1, vtmp1, vtmp2);
1065+
vneg_h(vtmp1, vtmp1);
1066+
vfrstpi_h(vtmp2, vtmp1, 0);
1067+
vpickve2gr_bu(tmp1, vtmp2, 0);
1068+
1069+
// the index value was stored in tmp1
1070+
ldx_bu(tmp2, str2, tmp1);
1071+
slli_d(tmp1, tmp1, 1);
1072+
ldx_hu(result, str1, tmp1);
1073+
sub_d(result, result, tmp2);
1074+
b(Done);
1075+
} else if (isUU) {
1076+
vld(vtmp1, str1, 0);
1077+
vld(vtmp2, str2, 0);
1078+
vxor_v(vtmp1, vtmp1, vtmp2);
1079+
vseteqz_v(FCC0, vtmp1);
1080+
bceqz(FCC0, V_L);
1081+
1082+
addi_d(str1, str1, 16);
1083+
addi_d(str2, str2, 16);
1084+
addi_d(cnt1, cnt1, -charsInFloatRegister);
1085+
b(V_Loop);
1086+
1087+
bind(V_L);
1088+
vxor_v(vtmp2, vtmp2, vtmp2);
1089+
vabsd_h(vtmp1, vtmp1, vtmp2);
1090+
vneg_h(vtmp1, vtmp1);
1091+
vfrstpi_h(vtmp2, vtmp1, 0);
1092+
vpickve2gr_bu(tmp1, vtmp2, 0);
1093+
1094+
// the index value was stored in tmp1
1095+
slli_d(tmp1, tmp1, 1);
1096+
ldx_hu(result, str1, tmp1);
1097+
ldx_hu(tmp2, str2, tmp1);
1098+
sub_d(result, result, tmp2);
1099+
b(Done);
1100+
}
1101+
}
1102+
9191103
// Now the shorter length is in cnt1 and cnt2 can be used as a tmp register
9201104
//
9211105
// For example:
@@ -929,6 +1113,9 @@ void C2_MacroAssembler::string_compare(Register str1, Register str2,
9291113
//
9301114
// Fetch 0 to 7 bits of tmp1 and tmp2, subtract to get the result.
9311115
// Other types are similar to isLL.
1116+
1117+
bind(Loop_Start);
1118+
ori(AT, R0, charsInWord);
9321119
bind(Loop);
9331120
blt(cnt1, AT, LoopEnd);
9341121
if (isLL) {

src/hotspot/cpu/loongarch/c2_MacroAssembler_loongarch.hpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,8 @@
4141
// Compare strings.
4242
void string_compare(Register str1, Register str2,
4343
Register cnt1, Register cnt2, Register result,
44-
int ae, Register tmp1, Register tmp2);
44+
int ae, Register tmp1, Register tmp2,
45+
FloatRegister vtmp1, FloatRegister vtmp2);
4546

4647
// Find index of char in Latin-1 string
4748
void stringL_indexof_char(Register str1, Register cnt1,

src/hotspot/cpu/loongarch/loongarch_64.ad

Lines changed: 32 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -8619,61 +8619,65 @@ instruct clear_array(a2RegL cnt, a0_RegP base, Universe dummy, a1RegL value) %{
86198619
ins_pipe( pipe_slow );
86208620
%}
86218621

8622-
instruct string_compareL(a4_RegP str1, mA5RegI cnt1, a6_RegP str2, mA7RegI cnt2, mRegI result, mRegL tmp1, mRegL tmp2) %{
8622+
instruct string_compareL(a4_RegP str1, mA5RegI cnt1, a6_RegP str2, mA7RegI cnt2, mRegI result, mRegL tmp1, mRegL tmp2, regF vtmp1, regF vtmp2) %{
86238623
predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
86248624
match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
8625-
effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, TEMP tmp1,TEMP tmp2, TEMP_DEF result);
8625+
effect(TEMP_DEF result, TEMP tmp1, TEMP tmp2, TEMP vtmp1, TEMP vtmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2);
86268626

8627-
format %{ "String Compare byte[] $str1[len: $cnt1], $str2[len: $cnt2] tmp1:$tmp1, tmp2:$tmp2 -> $result @ string_compareL" %}
8627+
format %{ "String Compare byte[] $str1[len: $cnt1], $str2[len: $cnt2] tmp1:$tmp1, tmp2:$tmp2, vtmp1:$vtmp1, vtmp2:$vtmp2 -> $result @ string_compareL" %}
86288628
ins_encode %{
86298629
__ string_compare($str1$$Register, $str2$$Register,
86308630
$cnt1$$Register, $cnt2$$Register, $result$$Register,
8631-
StrIntrinsicNode::LL, $tmp1$$Register, $tmp2$$Register);
8631+
StrIntrinsicNode::LL, $tmp1$$Register, $tmp2$$Register,
8632+
$vtmp1$$FloatRegister, $vtmp2$$FloatRegister);
86328633
%}
86338634

86348635
ins_pipe( pipe_slow );
86358636
%}
86368637

8637-
instruct string_compareU(a4_RegP str1, mA5RegI cnt1, a6_RegP str2, mA7RegI cnt2, mRegI result, mRegL tmp1, mRegL tmp2) %{
8638+
instruct string_compareU(a4_RegP str1, mA5RegI cnt1, a6_RegP str2, mA7RegI cnt2, mRegI result, mRegL tmp1, mRegL tmp2, regF vtmp1, regF vtmp2) %{
86388639
predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
86398640
match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
8640-
effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, TEMP tmp1, TEMP tmp2, TEMP_DEF result);
8641+
effect(TEMP_DEF result, TEMP tmp1, TEMP tmp2, TEMP vtmp1, TEMP vtmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2);
86418642

8642-
format %{ "String Compare char[] $str1[len: $cnt1], $str2[len: $cnt2] tmp1:$tmp1, tmp2:$tmp2 -> $result @ string_compareU" %}
8643+
format %{ "String Compare char[] $str1[len: $cnt1], $str2[len: $cnt2] tmp1:$tmp1, tmp2:$tmp2, vtmp1:$vtmp1, vtmp2:$vtmp2 -> $result @ string_compareU" %}
86438644
ins_encode %{
86448645
__ string_compare($str1$$Register, $str2$$Register,
86458646
$cnt1$$Register, $cnt2$$Register, $result$$Register,
8646-
StrIntrinsicNode::UU, $tmp1$$Register, $tmp2$$Register);
8647+
StrIntrinsicNode::UU, $tmp1$$Register, $tmp2$$Register,
8648+
$vtmp1$$FloatRegister, $vtmp2$$FloatRegister);
86478649
%}
86488650

86498651
ins_pipe( pipe_slow );
86508652
%}
86518653

8652-
instruct string_compareLU(a4_RegP str1, mA5RegI cnt1, a6_RegP str2, mA7RegI cnt2, mRegI result, mRegL tmp1, mRegL tmp2) %{
8654+
instruct string_compareLU(a4_RegP str1, mA5RegI cnt1, a6_RegP str2, mA7RegI cnt2, mRegI result, mRegL tmp1, mRegL tmp2, regF vtmp1, regF vtmp2) %{
86538655
predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
86548656
match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
8655-
effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, TEMP tmp1, TEMP tmp2, TEMP_DEF result);
8657+
effect(TEMP_DEF result, TEMP tmp1, TEMP tmp2, TEMP vtmp1, TEMP vtmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2);
86568658

8657-
format %{ "String Compare byte[] $str1[len: $cnt1], $str2[len: $cnt2] tmp1:$tmp1, tmp2:$tmp2 -> $result @ string_compareLU" %}
8659+
format %{ "String Compare byte[] $str1[len: $cnt1], $str2[len: $cnt2] tmp1:$tmp1, tmp2:$tmp2, vtmp1:$vtmp1, vtmp2:$vtmp2 -> $result @ string_compareLU" %}
86588660
ins_encode %{
86598661
__ string_compare($str1$$Register, $str2$$Register,
86608662
$cnt1$$Register, $cnt2$$Register, $result$$Register,
8661-
StrIntrinsicNode::LU, $tmp1$$Register, $tmp2$$Register);
8663+
StrIntrinsicNode::LU, $tmp1$$Register, $tmp2$$Register,
8664+
$vtmp1$$FloatRegister, $vtmp2$$FloatRegister);
86628665
%}
86638666

86648667
ins_pipe( pipe_slow );
86658668
%}
86668669

8667-
instruct string_compareUL(a4_RegP str1, mA5RegI cnt1, a6_RegP str2, mA7RegI cnt2, mRegI result, mRegL tmp1, mRegL tmp2) %{
8670+
instruct string_compareUL(a4_RegP str1, mA5RegI cnt1, a6_RegP str2, mA7RegI cnt2, mRegI result, mRegL tmp1, mRegL tmp2, regF vtmp1, regF vtmp2) %{
86688671
predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
86698672
match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
8670-
effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, TEMP tmp1, TEMP tmp2, TEMP_DEF result);
8673+
effect(TEMP_DEF result, TEMP tmp1, TEMP tmp2, TEMP vtmp1, TEMP vtmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2);
86718674

8672-
format %{ "String Compare byte[] $str1[len: $cnt1], $str2[len: $cnt2] tmp1:$tmp1 tmp2:$tmp2 -> $result @ string_compareUL" %}
8675+
format %{ "String Compare byte[] $str1[len: $cnt1], $str2[len: $cnt2] tmp1:$tmp1, tmp2:$tmp2, vtmp1:$vtmp1, vtmp2:$vtmp2 -> $result @ string_compareUL" %}
86738676
ins_encode %{
86748677
__ string_compare($str1$$Register, $str2$$Register,
86758678
$cnt1$$Register, $cnt2$$Register, $result$$Register,
8676-
StrIntrinsicNode::UL, $tmp1$$Register, $tmp2$$Register);
8679+
StrIntrinsicNode::UL, $tmp1$$Register, $tmp2$$Register,
8680+
$vtmp1$$FloatRegister, $vtmp2$$FloatRegister);
86778681
%}
86788682

86798683
ins_pipe( pipe_slow );
@@ -8811,28 +8815,16 @@ instruct stringL_indexof_char(a4_RegP str1, mA5RegI cnt1, mA6RegI ch, mRegI resu
88118815
ins_pipe( pipe_slow );
88128816
%}
88138817

8814-
instruct count_positives(a4_RegP src, mA5RegI len, mRegI result, mRegL tmp1) %{
8818+
instruct count_positives(mRegP src, mRegI len, mRegI result,
8819+
mRegL tmp1, mRegL tmp2) %{
88158820
match(Set result (CountPositives src len));
8816-
effect(USE src, USE len, TEMP_DEF result, TEMP tmp1);
8817-
format %{ "count positives byte[] src:$src, len:$len -> $result TEMP($tmp1) @ count_positives" %}
8818-
ins_cost(200);
8819-
8820-
ins_encode %{
8821-
__ count_positives($src$$Register, $len$$Register, $result$$Register, $tmp1$$Register);
8822-
%}
8823-
8824-
ins_pipe( pipe_slow );
8825-
%}
8821+
effect(TEMP_DEF result, TEMP tmp1, TEMP tmp2);
88268822

8827-
instruct count_positives_v(a4_RegP src, mA5RegI len, mRegI result, mRegL tmp1) %{
8828-
predicate(UseLSX);
8829-
match(Set result (CountPositives src len));
8830-
effect(USE src, USE len, TEMP_DEF result, TEMP tmp1);
8831-
format %{ "vectorized count positives byte[] src:$src, len:$len -> $result TEMP($tmp1) @ count_positives_v" %}
8832-
ins_cost(150);
8823+
format %{ "count positives byte[] src:$src, len:$len -> $result TEMP($tmp1, $tmp2) @ count_positives" %}
88338824

88348825
ins_encode %{
8835-
__ count_positives_v($src$$Register, $len$$Register, $result$$Register, $tmp1$$Register);
8826+
__ count_positives($src$$Register, $len$$Register, $result$$Register,
8827+
$tmp1$$Register, $tmp2$$Register);
88368828
%}
88378829

88388830
ins_pipe( pipe_slow );
@@ -8863,16 +8855,19 @@ instruct string_compress(a2_RegP src, mRegP dst, mRegI len, mRegI result,
88638855

88648856
// byte[] to char[] inflation
88658857
instruct string_inflate(Universe dummy, a4_RegP src, a5_RegP dst, mA6RegI len,
8866-
regF vtemp1, regF vtemp2)
8858+
mRegL tmp1, mRegL tmp2, regF vtemp1, regF vtemp2)
88678859
%{
88688860
predicate(UseLSX);
88698861
match(Set dummy (StrInflatedCopy src (Binary dst len)));
8870-
effect(USE_KILL src, USE_KILL dst, USE_KILL len, TEMP vtemp1, TEMP vtemp2);
8862+
effect(TEMP tmp1, TEMP tmp2, TEMP vtemp1, TEMP vtemp2,
8863+
USE_KILL src, USE_KILL dst, USE_KILL len);
88718864

8872-
format %{ "String Inflate $src,$dst @ string_inflate " %}
8865+
format %{ "String Inflate $src, $dst, len:$len "
8866+
"TEMP($tmp1, $tmp2, $vtemp1, $vtemp2) @ string_inflate " %}
88738867

88748868
ins_encode %{
88758869
__ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
8870+
$tmp1$$Register, $tmp2$$Register,
88768871
$vtemp1$$FloatRegister, $vtemp2$$FloatRegister);
88778872
%}
88788873

0 commit comments

Comments
 (0)