Skip to content

Commit 9a352ac

Browse files
authored
Update (2023.12.27, 2nd)
33272: Implement ensureMaterializedForStackWalk intrinsic 32033: Add vector intrinsics for unsigned (zero extended) casts 33177: Implement poly1305 intrinsic 33257: java/lang/Math/RoundTests.java failed with -Xcomp after 32796 33134: 8320959: jdk/jfr/event/runtime/TestShutdownEvent.java crash with CONF=fastdebug -Xcomp
1 parent 64e0649 commit 9a352ac

File tree

9 files changed

+527
-86
lines changed

9 files changed

+527
-86
lines changed

src/hotspot/cpu/loongarch/loongarch_64.ad

Lines changed: 78 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1040,6 +1040,9 @@ bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {
10401040
if (vlen < 4 || !UseLASX)
10411041
return false;
10421042
break;
1043+
case Op_VectorUCastB2X:
1044+
case Op_VectorUCastS2X:
1045+
case Op_VectorUCastI2X:
10431046
case Op_VectorCastB2X:
10441047
case Op_VectorCastS2X:
10451048
case Op_VectorCastI2X:
@@ -2434,6 +2437,11 @@ encode %{
24342437
ciEnv::current()->record_failure("CodeCache is full");
24352438
return;
24362439
}
2440+
} else if (_method->intrinsic_id() == vmIntrinsicID::_ensureMaterializedForStackWalk) {
2441+
// The NOP here is purely to ensure that eliding a call to
2442+
// JVM_EnsureMaterializedForStackWalk doesn't change the code size.
2443+
__ nop();
2444+
__ block_comment("call JVM_EnsureMaterializedForStackWalk (elided)");
24372445
} else {
24382446
int method_index = resolved_method_index(cbuf);
24392447
RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
@@ -10885,24 +10893,30 @@ instruct convHF2F_reg_reg(regF dst, mRegI src, regF tmp) %{
1088510893
ins_pipe(pipe_slow);
1088610894
%}
1088710895

10888-
instruct round_float_reg(mRegI dst, regF src, mRegL tmp)
10896+
instruct round_float_reg(mRegI dst, regF src, regF vtemp1)
1088910897
%{
1089010898
match(Set dst (RoundF src));
10891-
effect(TEMP_DEF dst, TEMP tmp);
10892-
format %{ "round_float $dst, $src\t# @round_float_reg" %}
10899+
effect(TEMP_DEF dst, TEMP vtemp1);
10900+
format %{ "round_float $dst, $src\t# "
10901+
"TEMP($vtemp1) @round_float_reg" %}
1089310902
ins_encode %{
10894-
__ java_round_float($dst$$Register, $src$$FloatRegister, $tmp$$Register);
10903+
__ java_round_float($dst$$Register,
10904+
$src$$FloatRegister,
10905+
$vtemp1$$FloatRegister);
1089510906
%}
1089610907
ins_pipe( pipe_slow );
1089710908
%}
1089810909

10899-
instruct round_double_reg(mRegL dst, regD src, mRegL tmp)
10910+
instruct round_double_reg(mRegL dst, regD src, regD vtemp1)
1090010911
%{
1090110912
match(Set dst (RoundD src));
10902-
effect(TEMP_DEF dst, TEMP tmp);
10903-
format %{ "round_double $dst, $src\t# @round_double_reg" %}
10913+
effect(TEMP_DEF dst, TEMP vtemp1);
10914+
format %{ "round_double $dst, $src\t# "
10915+
"TEMP($vtemp1) @round_double_reg" %}
1090410916
ins_encode %{
10905-
__ java_round_double($dst$$Register, $src$$FloatRegister, $tmp$$Register);
10917+
__ java_round_double($dst$$Register,
10918+
$src$$FloatRegister,
10919+
$vtemp1$$FloatRegister);
1090610920
%}
1090710921
ins_pipe( pipe_slow );
1090810922
%}
@@ -14443,7 +14457,8 @@ instruct round_float_lsx(vReg dst, vReg src, vReg vtemp1, vReg vtemp2) %{
1444314457
predicate(Matcher::vector_length_in_bytes(n) <= 16);
1444414458
match(Set dst (RoundVF src));
1444514459
effect(TEMP_DEF dst, TEMP vtemp1, TEMP vtemp2);
14446-
format %{ "round_float_lsx $dst, $src\t# @round_float_lsx" %}
14460+
format %{ "round_float_lsx $dst, $src\t# "
14461+
"TEMP($vtemp1, $vtemp2) @round_float_lsx" %}
1444714462
ins_encode %{
1444814463
__ java_round_float_lsx($dst$$FloatRegister,
1444914464
$src$$FloatRegister,
@@ -14457,7 +14472,8 @@ instruct round_float_lasx(vReg dst, vReg src, vReg vtemp1, vReg vtemp2) %{
1445714472
predicate(Matcher::vector_length_in_bytes(n) > 16);
1445814473
match(Set dst (RoundVF src));
1445914474
effect(TEMP_DEF dst, TEMP vtemp1, TEMP vtemp2);
14460-
format %{ "round_float_lasx $dst, $src\t# @round_float_lasx" %}
14475+
format %{ "round_float_lasx $dst, $src\t# "
14476+
"TEMP($vtemp1, $vtemp2) @round_float_lasx" %}
1446114477
ins_encode %{
1446214478
__ java_round_float_lasx($dst$$FloatRegister,
1446314479
$src$$FloatRegister,
@@ -14471,7 +14487,8 @@ instruct round_double_lsx(vReg dst, vReg src, vReg vtemp1, vReg vtemp2) %{
1447114487
predicate(Matcher::vector_length_in_bytes(n) <= 16);
1447214488
match(Set dst (RoundVD src));
1447314489
effect(TEMP_DEF dst, TEMP vtemp1, TEMP vtemp2);
14474-
format %{ "round_double_lsx $dst, $src\t# @round_double_lsx" %}
14490+
format %{ "round_double_lsx $dst, $src\t# "
14491+
"TEMP($vtemp1, $vtemp2) @round_double_lsx" %}
1447514492
ins_encode %{
1447614493
__ java_round_double_lsx($dst$$FloatRegister,
1447714494
$src$$FloatRegister,
@@ -14485,7 +14502,8 @@ instruct round_double_lasx(vReg dst, vReg src, vReg vtemp1, vReg vtemp2) %{
1448514502
predicate(Matcher::vector_length_in_bytes(n) > 16);
1448614503
match(Set dst (RoundVD src));
1448714504
effect(TEMP_DEF dst, TEMP vtemp1, TEMP vtemp2);
14488-
format %{ "round_double_lasx $dst, $src\t# @round_double_lasx" %}
14505+
format %{ "round_double_lasx $dst, $src\t# "
14506+
"TEMP($vtemp1, $vtemp2) @round_double_lasx" %}
1448914507
ins_encode %{
1449014508
__ java_round_double_lasx($dst$$FloatRegister,
1449114509
$src$$FloatRegister,
@@ -14520,6 +14538,54 @@ instruct roundVD(vReg dst, vReg src, immI rmode) %{
1452014538
ins_pipe( pipe_slow );
1452114539
%}
1452214540

14541+
// ---------------------------- Vector UCast B2X -------------------------------
14542+
14543+
instruct cvtVUB(vReg dst, vReg src) %{
14544+
match(Set dst (VectorUCastB2X src));
14545+
format %{ "(x)vconvert $dst, $src\t# @cvtVUB" %}
14546+
ins_encode %{
14547+
switch (Matcher::vector_element_basic_type(this)) {
14548+
case T_SHORT : __ vext2xv_hu_bu($dst$$FloatRegister, $src$$FloatRegister); break;
14549+
case T_INT : __ vext2xv_wu_bu($dst$$FloatRegister, $src$$FloatRegister); break;
14550+
case T_LONG : __ vext2xv_du_bu($dst$$FloatRegister, $src$$FloatRegister); break;
14551+
default:
14552+
ShouldNotReachHere();
14553+
}
14554+
%}
14555+
ins_pipe( pipe_slow );
14556+
%}
14557+
14558+
// ---------------------------- Vector UCast S2X -------------------------------
14559+
14560+
instruct cvtVUS(vReg dst, vReg src) %{
14561+
match(Set dst (VectorUCastS2X src));
14562+
format %{ "(x)vconvert $dst, $src\t# @cvtVUS" %}
14563+
ins_encode %{
14564+
switch (Matcher::vector_element_basic_type(this)) {
14565+
case T_INT : __ vext2xv_wu_hu($dst$$FloatRegister, $src$$FloatRegister); break;
14566+
case T_LONG : __ vext2xv_du_hu($dst$$FloatRegister, $src$$FloatRegister); break;
14567+
default:
14568+
ShouldNotReachHere();
14569+
}
14570+
%}
14571+
ins_pipe( pipe_slow );
14572+
%}
14573+
14574+
// ---------------------------- Vector UCast I2X -------------------------------
14575+
14576+
instruct cvtVUI(vReg dst, vReg src) %{
14577+
match(Set dst (VectorUCastI2X src));
14578+
format %{ "(x)vconvert $dst, $src\t# @cvtVUI" %}
14579+
ins_encode %{
14580+
switch (Matcher::vector_element_basic_type(this)) {
14581+
case T_LONG : __ vext2xv_du_wu($dst$$FloatRegister, $src$$FloatRegister); break;
14582+
default:
14583+
ShouldNotReachHere();
14584+
}
14585+
%}
14586+
ins_pipe( pipe_slow );
14587+
%}
14588+
1452314589
// ---------------------------- Vector Cast B2X -------------------------------
1452414590

1452514591
instruct cvtVB(vReg dst, vReg src) %{

src/hotspot/cpu/loongarch/macroAssembler_loongarch.cpp

Lines changed: 99 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -3717,25 +3717,31 @@ void MacroAssembler::encode_iso_array(Register src, Register dst,
37173717
// in the IEEE-754-2008. For single-precision floatings,
37183718
// the following algorithm can be used to effectively
37193719
// implement rounding via standard operations.
3720-
//
3721-
// if src >= 0:
3722-
// dst = floor(src + 0.49999997f)
3723-
// else:
3724-
// dst = floor(src + 0.5f)
37253720
void MacroAssembler::java_round_float(Register dst,
37263721
FloatRegister src,
3727-
Register tmp) {
3722+
FloatRegister vtemp1) {
37283723
block_comment("java_round_float: { ");
3724+
3725+
Label L_abnormal, L_done;
3726+
37293727
li(AT, StubRoutines::la::round_float_imm());
37303728

3731-
movfr2gr_s(tmp, src);
3732-
bstrpick_w(tmp, tmp, 31, 31);
3733-
slli_w(tmp, tmp, 2);
3734-
fldx_s(fscratch, AT, tmp);
3735-
fadd_s(fscratch, fscratch, src);
3729+
// if src is -0.5f, return 0 as result
3730+
fld_s(vtemp1, AT, 0);
3731+
fcmp_ceq_s(FCC0, vtemp1, src);
3732+
bceqz(FCC0, L_abnormal);
3733+
move(dst, R0);
3734+
b(L_done);
37363735

3736+
// else, floor src with the magic number
3737+
bind(L_abnormal);
3738+
fld_s(vtemp1, AT, 4);
3739+
fadd_s(fscratch, vtemp1, src);
37373740
ftintrm_w_s(fscratch, fscratch);
37383741
movfr2gr_s(dst, fscratch);
3742+
3743+
bind(L_done);
3744+
37393745
block_comment("} java_round_float");
37403746
}
37413747

@@ -3745,18 +3751,13 @@ void MacroAssembler::java_round_float_lsx(FloatRegister dst,
37453751
FloatRegister vtemp2) {
37463752
block_comment("java_round_float_lsx: { ");
37473753
li(AT, StubRoutines::la::round_float_imm());
3754+
vldrepl_w(vtemp1, AT, 0); // repl -0.5f
3755+
vldrepl_w(vtemp2, AT, 1); // repl 0.49999997f
37483756

3749-
vldrepl_w(vtemp2, AT, 1); // repl 0.5f
3750-
vslti_w(fscratch, src, 0); // masked add
3751-
vand_v(vtemp2, fscratch, vtemp2);
3752-
vfadd_s(dst, src, vtemp2);
3753-
3754-
vldrepl_w(vtemp1, AT, 0); // repl 0.49999997f
3755-
vnor_v(fscratch, fscratch, fscratch); // rev mask
3756-
vand_v(vtemp1, fscratch, vtemp1);
3757-
vfadd_s(dst, dst, vtemp1);
3758-
3759-
vftintrm_w_s(dst, dst);
3757+
vfcmp_cne_s(fscratch, src, vtemp1); // generate the mask
3758+
vand_v(fscratch, fscratch, src); // clear the special
3759+
vfadd_s(dst, fscratch, vtemp2); // plus the magic
3760+
vftintrm_w_s(dst, dst); // floor the result
37603761
block_comment("} java_round_float_lsx");
37613762
}
37623763

@@ -3766,18 +3767,13 @@ void MacroAssembler::java_round_float_lasx(FloatRegister dst,
37663767
FloatRegister vtemp2) {
37673768
block_comment("java_round_float_lasx: { ");
37683769
li(AT, StubRoutines::la::round_float_imm());
3770+
xvldrepl_w(vtemp1, AT, 0); // repl -0.5f
3771+
xvldrepl_w(vtemp2, AT, 1); // repl 0.49999997f
37693772

3770-
xvldrepl_w(vtemp2, AT, 1); // repl 0.5f
3771-
xvslti_w(fscratch, src, 0); // masked add
3772-
xvand_v(vtemp2, fscratch, vtemp2);
3773-
xvfadd_s(dst, src, vtemp2);
3774-
3775-
xvldrepl_w(vtemp1, AT, 0); // repl 0.49999997f
3776-
xvnor_v(fscratch, fscratch, fscratch); // rev mask
3777-
xvand_v(vtemp1, fscratch, vtemp1);
3778-
xvfadd_s(dst, dst, vtemp1);
3779-
3780-
xvftintrm_w_s(dst, dst);
3773+
xvfcmp_cne_s(fscratch, src, vtemp1); // generate the mask
3774+
xvand_v(fscratch, fscratch, src); // clear the special
3775+
xvfadd_s(dst, fscratch, vtemp2); // plus the magic
3776+
xvftintrm_w_s(dst, dst); // floor the result
37813777
block_comment("} java_round_float_lasx");
37823778
}
37833779

@@ -3786,25 +3782,31 @@ void MacroAssembler::java_round_float_lasx(FloatRegister dst,
37863782
// in the IEEE-754-2008. For double-precision floatings,
37873783
// the following algorithm can be used to effectively
37883784
// implement rounding via standard operations.
3789-
//
3790-
// if src >= 0:
3791-
// dst = floor(src + 0.49999999999999994d)
3792-
// else:
3793-
// dst = floor(src + 0.5d)
37943785
void MacroAssembler::java_round_double(Register dst,
37953786
FloatRegister src,
3796-
Register tmp) {
3787+
FloatRegister vtemp1) {
37973788
block_comment("java_round_double: { ");
3789+
3790+
Label L_abnormal, L_done;
3791+
37983792
li(AT, StubRoutines::la::round_double_imm());
37993793

3800-
movfr2gr_d(tmp, src);
3801-
bstrpick_d(tmp, tmp, 63, 63);
3802-
slli_d(tmp, tmp, 3);
3803-
fldx_d(fscratch, AT, tmp);
3804-
fadd_d(fscratch, fscratch, src);
3794+
// if src is -0.5d, return 0 as result
3795+
fld_d(vtemp1, AT, 0);
3796+
fcmp_ceq_d(FCC0, vtemp1, src);
3797+
bceqz(FCC0, L_abnormal);
3798+
move(dst, R0);
3799+
b(L_done);
38053800

3801+
// else, floor src with the magic number
3802+
bind(L_abnormal);
3803+
fld_d(vtemp1, AT, 8);
3804+
fadd_d(fscratch, vtemp1, src);
38063805
ftintrm_l_d(fscratch, fscratch);
38073806
movfr2gr_d(dst, fscratch);
3807+
3808+
bind(L_done);
3809+
38083810
block_comment("} java_round_double");
38093811
}
38103812

@@ -3814,18 +3816,13 @@ void MacroAssembler::java_round_double_lsx(FloatRegister dst,
38143816
FloatRegister vtemp2) {
38153817
block_comment("java_round_double_lsx: { ");
38163818
li(AT, StubRoutines::la::round_double_imm());
3819+
vldrepl_d(vtemp1, AT, 0); // repl -0.5d
3820+
vldrepl_d(vtemp2, AT, 1); // repl 0.49999999999999994d
38173821

3818-
vldrepl_d(vtemp2, AT, 1); // repl 0.5d
3819-
vslti_d(fscratch, src, 0); // masked add
3820-
vand_v(vtemp2, fscratch, vtemp2);
3821-
vfadd_d(dst, src, vtemp2);
3822-
3823-
vldrepl_d(vtemp1, AT, 0); // repl 0.49999999999999994d
3824-
vnor_v(fscratch, fscratch, fscratch); // rev mask
3825-
vand_v(vtemp1, fscratch, vtemp1);
3826-
vfadd_d(dst, dst, vtemp1);
3827-
3828-
vftintrm_l_d(dst, dst);
3822+
vfcmp_cne_d(fscratch, src, vtemp1); // generate the mask
3823+
vand_v(fscratch, fscratch, src); // clear the special
3824+
vfadd_d(dst, fscratch, vtemp2); // plus the magic
3825+
vftintrm_l_d(dst, dst); // floor the result
38293826
block_comment("} java_round_double_lsx");
38303827
}
38313828

@@ -3835,18 +3832,13 @@ void MacroAssembler::java_round_double_lasx(FloatRegister dst,
38353832
FloatRegister vtemp2) {
38363833
block_comment("java_round_double_lasx: { ");
38373834
li(AT, StubRoutines::la::round_double_imm());
3835+
xvldrepl_d(vtemp1, AT, 0); // repl -0.5d
3836+
xvldrepl_d(vtemp2, AT, 1); // repl 0.49999999999999994d
38383837

3839-
xvldrepl_d(vtemp2, AT, 1); // repl 0.5d
3840-
xvslti_d(fscratch, src, 0); // masked add
3841-
xvand_v(vtemp2, fscratch, vtemp2);
3842-
xvfadd_d(dst, src, vtemp2);
3843-
3844-
xvldrepl_d(vtemp1, AT, 0); // repl 0.49999999999999994d
3845-
xvnor_v(fscratch, fscratch, fscratch); // rev mask
3846-
xvand_v(vtemp1, fscratch, vtemp1);
3847-
xvfadd_d(dst, dst, vtemp1);
3848-
3849-
xvftintrm_l_d(dst, dst);
3838+
xvfcmp_cne_d(fscratch, src, vtemp1); // generate the mask
3839+
xvand_v(fscratch, fscratch, src); // clear the special
3840+
xvfadd_d(dst, fscratch, vtemp2); // plus the magic
3841+
xvftintrm_l_d(dst, dst); // floor the result
38503842
block_comment("} java_round_double_lasx");
38513843
}
38523844

@@ -3922,6 +3914,47 @@ void MacroAssembler::mul_add(Register out, Register in, Register offset,
39223914
bind(L_end);
39233915
}
39243916

3917+
// add two unsigned input and output carry
3918+
void MacroAssembler::cad(Register dst, Register src1, Register src2, Register carry)
3919+
{
3920+
assert_different_registers(dst, carry);
3921+
assert_different_registers(dst, src2);
3922+
add_d(dst, src1, src2);
3923+
sltu(carry, dst, src2);
3924+
}
3925+
3926+
// add two input with carry
3927+
void MacroAssembler::adc(Register dst, Register src1, Register src2, Register carry) {
3928+
assert_different_registers(dst, carry);
3929+
add_d(dst, src1, src2);
3930+
add_d(dst, dst, carry);
3931+
}
3932+
3933+
// add two unsigned input with carry and output carry
3934+
void MacroAssembler::cadc(Register dst, Register src1, Register src2, Register carry) {
3935+
assert_different_registers(dst, src2);
3936+
adc(dst, src1, src2, carry);
3937+
sltu(carry, dst, src2);
3938+
}
3939+
3940+
// Multiply and multiply-accumulate unsigned 64-bit registers.
3941+
void MacroAssembler::wide_mul(Register prod_lo, Register prod_hi, Register n, Register m) {
3942+
assert_different_registers(prod_lo, prod_hi);
3943+
3944+
mul_d(prod_lo, n, m);
3945+
mulh_du(prod_hi, n, m);
3946+
}
3947+
3948+
void MacroAssembler::wide_madd(Register sum_lo, Register sum_hi, Register n,
3949+
Register m, Register tmp1, Register tmp2) {
3950+
assert_different_registers(sum_lo, sum_hi);
3951+
assert_different_registers(sum_hi, tmp2);
3952+
3953+
wide_mul(tmp1, tmp2, n, m);
3954+
cad(sum_lo, sum_lo, tmp1, tmp1); // Add tmp1 to sum_lo with carry output to tmp1
3955+
adc(sum_hi, sum_hi, tmp2, tmp1); // Add tmp2 with carry to sum_hi
3956+
}
3957+
39253958
#ifndef PRODUCT
39263959
void MacroAssembler::verify_cross_modify_fence_not_required() {
39273960
if (VerifyCrossModifyFence) {

0 commit comments

Comments
 (0)