Skip to content

Commit f4b834a

Browse files
authored
Fix vreduce for small width vector operations (#7)
1 parent 170a5b6 commit f4b834a

File tree

4 files changed

+60
-28
lines changed

4 files changed

+60
-28
lines changed

src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1635,11 +1635,16 @@ void C2_MacroAssembler::minmax_FD_v(VectorRegister dst, VectorRegister src1, Vec
16351635
void C2_MacroAssembler::reduce_minmax_FD_v(FloatRegister dst,
16361636
FloatRegister src1, VectorRegister src2,
16371637
VectorRegister tmp1, VectorRegister tmp2,
1638-
bool is_double, bool is_min) {
1638+
bool is_double, bool is_min, int length_in_bytes) {
16391639
assert_different_registers(src2, tmp1, tmp2);
16401640

16411641
Label L_done, L_NaN;
1642-
vsetvli(t0, x0, is_double ? Assembler::e64 : Assembler::e32);
1642+
if (length_in_bytes != MaxVectorSize) {
1643+
mv(t0, length_in_bytes / type2aelembytes(is_double ? T_DOUBLE : T_FLOAT));
1644+
vsetvli(t0, t0, is_double ? Assembler::e64 : Assembler::e32);
1645+
} else {
1646+
vsetvli(t0, x0, is_double ? Assembler::e64 : Assembler::e32);
1647+
}
16431648
vfmv_s_f(tmp2, src1);
16441649

16451650
is_min ? vfredmin_vs(tmp1, src2, tmp2)
@@ -1672,11 +1677,16 @@ bool C2_MacroAssembler::in_scratch_emit_size() {
16721677

16731678
void C2_MacroAssembler::rvv_reduce_integral(Register dst, VectorRegister tmp,
16741679
Register src1, VectorRegister src2,
1675-
BasicType bt, int opc) {
1680+
BasicType bt, int opc, int length_in_bytes) {
16761681
assert(bt == T_BYTE || bt == T_SHORT || bt == T_INT || bt == T_LONG, "unsupported element type");
16771682

16781683
Assembler::SEW sew = Assembler::elemtype_to_sew(bt);
1679-
vsetvli(t0, x0, sew);
1684+
if (length_in_bytes != MaxVectorSize) {
1685+
mv(t0, length_in_bytes / type2aelembytes(bt));
1686+
vsetvli(t0, t0, sew);
1687+
} else {
1688+
vsetvli(t0, x0, sew);
1689+
}
16801690

16811691
vmv_s_x(tmp, src1);
16821692

src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -190,10 +190,10 @@
190190
void reduce_minmax_FD_v(FloatRegister dst,
191191
FloatRegister src1, VectorRegister src2,
192192
VectorRegister tmp1, VectorRegister tmp2,
193-
bool is_double, bool is_min);
193+
bool is_double, bool is_min, int length_in_bytes);
194194

195195
void rvv_reduce_integral(Register dst, VectorRegister tmp,
196196
Register src1, VectorRegister src2,
197-
BasicType bt, int opc);
197+
BasicType bt, int opc, int length_in_bytes);
198198

199199
#endif // CPU_RISCV_C2_MACROASSEMBLER_RISCV_HPP

src/hotspot/cpu/riscv/riscv.ad

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1936,14 +1936,12 @@ const int Matcher::vector_width_in_bytes(BasicType bt) {
19361936
const int Matcher::max_vector_size(const BasicType bt) {
19371937
return vector_width_in_bytes(bt) / type2aelembytes(bt);
19381938
}
1939+
19391940
const int Matcher::min_vector_size(const BasicType bt) {
19401941
int max_size = max_vector_size(bt);
19411942
// Limit the min vector size to 8 bytes.
19421943
int size = 8 / type2aelembytes(bt);
1943-
if (bt == T_BYTE) {
1944-
// To support vector api shuffle/rearrange.
1945-
size = 4;
1946-
} else if (bt == T_BOOLEAN) {
1944+
if (bt == T_BOOLEAN) {
19471945
// To support vector api load/store mask.
19481946
size = 2;
19491947
}

src/hotspot/cpu/riscv/riscv_v.ad

Lines changed: 42 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -828,7 +828,8 @@ instruct reduce_andI(iRegINoSp dst, iRegIorL2I src1, vReg src2, vReg tmp) %{
828828
ins_encode %{
829829
BasicType bt = Matcher::vector_element_basic_type(this, $src2);
830830
__ rvv_reduce_integral($dst$$Register, as_VectorRegister($tmp$$reg),
831-
$src1$$Register, as_VectorRegister($src2$$reg), bt, this->ideal_Opcode());
831+
$src1$$Register, as_VectorRegister($src2$$reg), bt,
832+
this->ideal_Opcode(), Matcher::vector_length_in_bytes(this, $src2));
832833
%}
833834
ins_pipe(pipe_slow);
834835
%}
@@ -844,7 +845,8 @@ instruct reduce_andL(iRegLNoSp dst, iRegL src1, vReg src2, vReg tmp) %{
844845
ins_encode %{
845846
BasicType bt = Matcher::vector_element_basic_type(this, $src2);
846847
__ rvv_reduce_integral($dst$$Register, as_VectorRegister($tmp$$reg),
847-
$src1$$Register, as_VectorRegister($src2$$reg), bt, this->ideal_Opcode());
848+
$src1$$Register, as_VectorRegister($src2$$reg), bt,
849+
this->ideal_Opcode(), Matcher::vector_length_in_bytes(this, $src2));
848850
%}
849851
ins_pipe(pipe_slow);
850852
%}
@@ -864,7 +866,8 @@ instruct reduce_orI(iRegINoSp dst, iRegIorL2I src1, vReg src2, vReg tmp) %{
864866
ins_encode %{
865867
BasicType bt = Matcher::vector_element_basic_type(this, $src2);
866868
__ rvv_reduce_integral($dst$$Register, as_VectorRegister($tmp$$reg),
867-
$src1$$Register, as_VectorRegister($src2$$reg), bt, this->ideal_Opcode());
869+
$src1$$Register, as_VectorRegister($src2$$reg), bt,
870+
this->ideal_Opcode(), Matcher::vector_length_in_bytes(this, $src2));
868871
%}
869872
ins_pipe(pipe_slow);
870873
%}
@@ -880,7 +883,8 @@ instruct reduce_orL(iRegLNoSp dst, iRegL src1, vReg src2, vReg tmp) %{
880883
ins_encode %{
881884
BasicType bt = Matcher::vector_element_basic_type(this, $src2);
882885
__ rvv_reduce_integral($dst$$Register, as_VectorRegister($tmp$$reg),
883-
$src1$$Register, as_VectorRegister($src2$$reg), bt, this->ideal_Opcode());
886+
$src1$$Register, as_VectorRegister($src2$$reg), bt,
887+
this->ideal_Opcode(), Matcher::vector_length_in_bytes(this, $src2));
884888
%}
885889
ins_pipe(pipe_slow);
886890
%}
@@ -900,7 +904,8 @@ instruct reduce_xorI(iRegINoSp dst, iRegIorL2I src1, vReg src2, vReg tmp) %{
900904
ins_encode %{
901905
BasicType bt = Matcher::vector_element_basic_type(this, $src2);
902906
__ rvv_reduce_integral($dst$$Register, as_VectorRegister($tmp$$reg),
903-
$src1$$Register, as_VectorRegister($src2$$reg), bt, this->ideal_Opcode());
907+
$src1$$Register, as_VectorRegister($src2$$reg), bt,
908+
this->ideal_Opcode(), Matcher::vector_length_in_bytes(this, $src2));
904909
%}
905910
ins_pipe(pipe_slow);
906911
%}
@@ -916,7 +921,8 @@ instruct reduce_xorL(iRegLNoSp dst, iRegL src1, vReg src2, vReg tmp) %{
916921
ins_encode %{
917922
BasicType bt = Matcher::vector_element_basic_type(this, $src2);
918923
__ rvv_reduce_integral($dst$$Register, as_VectorRegister($tmp$$reg),
919-
$src1$$Register, as_VectorRegister($src2$$reg), bt, this->ideal_Opcode());
924+
$src1$$Register, as_VectorRegister($src2$$reg), bt,
925+
this->ideal_Opcode(), Matcher::vector_length_in_bytes(this, $src2));
920926
%}
921927
ins_pipe(pipe_slow);
922928
%}
@@ -936,7 +942,8 @@ instruct reduce_addI(iRegINoSp dst, iRegIorL2I src1, vReg src2, vReg tmp) %{
936942
ins_encode %{
937943
BasicType bt = Matcher::vector_element_basic_type(this, $src2);
938944
__ rvv_reduce_integral($dst$$Register, as_VectorRegister($tmp$$reg),
939-
$src1$$Register, as_VectorRegister($src2$$reg), bt, this->ideal_Opcode());
945+
$src1$$Register, as_VectorRegister($src2$$reg), bt,
946+
this->ideal_Opcode(), Matcher::vector_length_in_bytes(this, $src2));
940947
%}
941948
ins_pipe(pipe_slow);
942949
%}
@@ -952,7 +959,8 @@ instruct reduce_addL(iRegLNoSp dst, iRegL src1, vReg src2, vReg tmp) %{
952959
ins_encode %{
953960
BasicType bt = Matcher::vector_element_basic_type(this, $src2);
954961
__ rvv_reduce_integral($dst$$Register, as_VectorRegister($tmp$$reg),
955-
$src1$$Register, as_VectorRegister($src2$$reg), bt, this->ideal_Opcode());
962+
$src1$$Register, as_VectorRegister($src2$$reg), bt,
963+
this->ideal_Opcode(), Matcher::vector_length_in_bytes(this, $src2));
956964
%}
957965
ins_pipe(pipe_slow);
958966
%}
@@ -965,7 +973,13 @@ instruct reduce_addF(fRegF src1_dst, vReg src2, vReg tmp) %{
965973
"vfredosum.vs $tmp, $src2, $tmp\n\t"
966974
"vfmv.f.s $src1_dst, $tmp" %}
967975
ins_encode %{
968-
__ vsetvli(t0, x0, Assembler::e32);
976+
int length_in_bytes = Matcher::vector_length_in_bytes(this, $src2);
977+
if (length_in_bytes != MaxVectorSize) {
978+
__ mv(t0, length_in_bytes / type2aelembytes(T_FLOAT));
979+
__ vsetvli(t0, t0, Assembler::e32);
980+
} else {
981+
__ vsetvli(t0, x0, Assembler::e32);
982+
}
969983
__ vfmv_s_f(as_VectorRegister($tmp$$reg), $src1_dst$$FloatRegister);
970984
__ vfredosum_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg),
971985
as_VectorRegister($tmp$$reg));
@@ -982,7 +996,13 @@ instruct reduce_addD(fRegD src1_dst, vReg src2, vReg tmp) %{
982996
"vfredosum.vs $tmp, $src2, $tmp\n\t"
983997
"vfmv.f.s $src1_dst, $tmp" %}
984998
ins_encode %{
985-
__ vsetvli(t0, x0, Assembler::e64);
999+
int length_in_bytes = Matcher::vector_length_in_bytes(this, $src2);
1000+
if (length_in_bytes != MaxVectorSize) {
1001+
__ mv(t0, length_in_bytes / type2aelembytes(T_DOUBLE));
1002+
__ vsetvli(t0, t0, Assembler::e64);
1003+
} else {
1004+
__ vsetvli(t0, x0, Assembler::e64);
1005+
}
9861006
__ vfmv_s_f(as_VectorRegister($tmp$$reg), $src1_dst$$FloatRegister);
9871007
__ vfredosum_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg),
9881008
as_VectorRegister($tmp$$reg));
@@ -1004,7 +1024,8 @@ instruct vreduce_maxI(iRegINoSp dst, iRegIorL2I src1, vReg src2, vReg tmp) %{
10041024
ins_encode %{
10051025
BasicType bt = Matcher::vector_element_basic_type(this, $src2);
10061026
__ rvv_reduce_integral($dst$$Register, as_VectorRegister($tmp$$reg),
1007-
$src1$$Register, as_VectorRegister($src2$$reg), bt, this->ideal_Opcode());
1027+
$src1$$Register, as_VectorRegister($src2$$reg), bt,
1028+
this->ideal_Opcode(), Matcher::vector_length_in_bytes(this, $src2));
10081029
%}
10091030
ins_pipe(pipe_slow);
10101031
%}
@@ -1018,7 +1039,8 @@ instruct vreduce_maxL(iRegLNoSp dst, iRegL src1, vReg src2, vReg tmp) %{
10181039
ins_encode %{
10191040
BasicType bt = Matcher::vector_element_basic_type(this, $src2);
10201041
__ rvv_reduce_integral($dst$$Register, as_VectorRegister($tmp$$reg),
1021-
$src1$$Register, as_VectorRegister($src2$$reg), bt, this->ideal_Opcode());
1042+
$src1$$Register, as_VectorRegister($src2$$reg), bt,
1043+
this->ideal_Opcode(), Matcher::vector_length_in_bytes(this, $src2));
10221044
%}
10231045
ins_pipe(pipe_slow);
10241046
%}
@@ -1036,7 +1058,8 @@ instruct vreduce_minI(iRegINoSp dst, iRegIorL2I src1, vReg src2, vReg tmp) %{
10361058
ins_encode %{
10371059
BasicType bt = Matcher::vector_element_basic_type(this, $src2);
10381060
__ rvv_reduce_integral($dst$$Register, as_VectorRegister($tmp$$reg),
1039-
$src1$$Register, as_VectorRegister($src2$$reg), bt, this->ideal_Opcode());
1061+
$src1$$Register, as_VectorRegister($src2$$reg), bt,
1062+
this->ideal_Opcode(), Matcher::vector_length_in_bytes(this, $src2));
10401063
%}
10411064
ins_pipe(pipe_slow);
10421065
%}
@@ -1050,7 +1073,8 @@ instruct vreduce_minL(iRegLNoSp dst, iRegL src1, vReg src2, vReg tmp) %{
10501073
ins_encode %{
10511074
BasicType bt = Matcher::vector_element_basic_type(this, $src2);
10521075
__ rvv_reduce_integral($dst$$Register, as_VectorRegister($tmp$$reg),
1053-
$src1$$Register, as_VectorRegister($src2$$reg), bt, this->ideal_Opcode());
1076+
$src1$$Register, as_VectorRegister($src2$$reg), bt,
1077+
this->ideal_Opcode(), Matcher::vector_length_in_bytes(this, $src2));
10541078
%}
10551079
ins_pipe(pipe_slow);
10561080
%}
@@ -1067,7 +1091,7 @@ instruct vreduce_maxF(fRegF dst, fRegF src1, vReg src2, vReg tmp1, vReg tmp2) %{
10671091
__ reduce_minmax_FD_v($dst$$FloatRegister,
10681092
$src1$$FloatRegister, as_VectorRegister($src2$$reg),
10691093
as_VectorRegister($tmp1$$reg), as_VectorRegister($tmp2$$reg),
1070-
false /* is_double */, false /* is_min */);
1094+
false /* is_double */, false /* is_min */, Matcher::vector_length_in_bytes(this, $src2));
10711095
%}
10721096
ins_pipe(pipe_slow);
10731097
%}
@@ -1082,7 +1106,7 @@ instruct vreduce_maxD(fRegD dst, fRegD src1, vReg src2, vReg tmp1, vReg tmp2) %{
10821106
__ reduce_minmax_FD_v($dst$$FloatRegister,
10831107
$src1$$FloatRegister, as_VectorRegister($src2$$reg),
10841108
as_VectorRegister($tmp1$$reg), as_VectorRegister($tmp2$$reg),
1085-
true /* is_double */, false /* is_min */);
1109+
true /* is_double */, false /* is_min */, Matcher::vector_length_in_bytes(this, $src2));
10861110
%}
10871111
ins_pipe(pipe_slow);
10881112
%}
@@ -1099,7 +1123,7 @@ instruct vreduce_minF(fRegF dst, fRegF src1, vReg src2, vReg tmp1, vReg tmp2) %{
10991123
__ reduce_minmax_FD_v($dst$$FloatRegister,
11001124
$src1$$FloatRegister, as_VectorRegister($src2$$reg),
11011125
as_VectorRegister($tmp1$$reg), as_VectorRegister($tmp2$$reg),
1102-
false /* is_double */, true /* is_min */);
1126+
false /* is_double */, true /* is_min */, Matcher::vector_length_in_bytes(this, $src2));
11031127
%}
11041128
ins_pipe(pipe_slow);
11051129
%}
@@ -1114,7 +1138,7 @@ instruct vreduce_minD(fRegD dst, fRegD src1, vReg src2, vReg tmp1, vReg tmp2) %{
11141138
__ reduce_minmax_FD_v($dst$$FloatRegister,
11151139
$src1$$FloatRegister, as_VectorRegister($src2$$reg),
11161140
as_VectorRegister($tmp1$$reg), as_VectorRegister($tmp2$$reg),
1117-
true /* is_double */, true /* is_min */);
1141+
true /* is_double */, true /* is_min */, Matcher::vector_length_in_bytes(this, $src2));
11181142
%}
11191143
ins_pipe(pipe_slow);
11201144
%}

0 commit comments

Comments
 (0)