Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

8318158: RISC-V: implement roundD/roundF intrinsics #16382

Closed
wants to merge 14 commits into from
Closed
51 changes: 51 additions & 0 deletions src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4239,6 +4239,57 @@ void MacroAssembler::zero_dcache_blocks(Register base, Register cnt, Register tm
bge(cnt, tmp1, loop);
}

// java.lang.Math.round(float a)
// Returns the closest int to the argument, with ties rounding to positive infinity.
void MacroAssembler::java_round_float(Register dst, FloatRegister src, FloatRegister ftmp) {
// this instructions calling sequence provides performance improvement on all tested devices;
// don't change it without re-verification
Label done;
mv(t0, jint_cast(0.5f));
fmv_w_x(ftmp, t0);

// dst = 0 if NaN
feq_s(t0, src, src); // replacing fclass with feq as performance optimization
mv(dst, zr);
beqz(t0, done);

// dst = (src + 0.5f) rounded down towards negative infinity
// Adding 0.5f to some floats exceeds the precision limits for a float and rounding takes place.
// RDN is required for fadd_s, RNE gives incorrect results:
// --------------------------------------------------------------------
// fadd.s rne (src + 0.5f): src = 8388609.000000 ftmp = 8388610.000000
// fcvt.w.s rdn: ftmp = 8388610.000000 dst = 8388610
// --------------------------------------------------------------------
// fadd.s rdn (src + 0.5f): src = 8388609.000000 ftmp = 8388609.000000
// fcvt.w.s rdn: ftmp = 8388609.000000 dst = 8388609
// --------------------------------------------------------------------
fadd_s(ftmp, src, ftmp, RoundingMode::rdn);
fcvt_w_s(dst, ftmp, RoundingMode::rdn);

bind(done);
}

// java.lang.Math.round(double a)
// Returns the closest long to the argument, with ties rounding to positive infinity.
void MacroAssembler::java_round_double(Register dst, FloatRegister src, FloatRegister ftmp) {
// this instructions calling sequence provides performance improvement on all tested devices;
// don't change it without re-verification
Label done;
mv(t0, julong_cast(0.5));
fmv_d_x(ftmp, t0);

// dst = 0 if NaN
feq_d(t0, src, src); // replacing fclass with feq as performance optimization
mv(dst, zr);
beqz(t0, done);

// dst = (src + 0.5) rounded down towards negative infinity
fadd_d(ftmp, src, ftmp, RoundingMode::rdn); // RDN is required here otherwise some inputs produce incorrect results
fcvt_l_d(dst, ftmp, RoundingMode::rdn);

bind(done);
}

#define FCVT_SAFE(FLOATCVT, FLOATSIG) \
void MacroAssembler::FLOATCVT##_safe(Register dst, FloatRegister src, Register tmp) { \
Label done; \
Expand Down
3 changes: 3 additions & 0 deletions src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1251,6 +1251,9 @@ class MacroAssembler: public Assembler {
void fcvt_w_d_safe(Register dst, FloatRegister src, Register tmp = t0);
void fcvt_l_d_safe(Register dst, FloatRegister src, Register tmp = t0);

void java_round_float(Register dst, FloatRegister src, FloatRegister ftmp);
void java_round_double(Register dst, FloatRegister src, FloatRegister ftmp);

// vector load/store unit-stride instructions
void vlex_v(VectorRegister vd, Register base, Assembler::SEW sew, VectorMask vm = unmasked) {
switch (sew) {
Expand Down
28 changes: 28 additions & 0 deletions src/hotspot/cpu/riscv/riscv.ad
Original file line number Diff line number Diff line change
Expand Up @@ -8353,6 +8353,34 @@ instruct convN2I(iRegINoSp dst, iRegN src)
ins_pipe(ialu_reg);
%}

instruct round_double_reg(iRegLNoSp dst, fRegD src, fRegD ftmp) %{
match(Set dst (RoundD src));

ins_cost(XFER_COST + BRANCH_COST);
effect(TEMP ftmp);
format %{ "java_round_double $dst, $src\t#@round_double_reg" %}

ins_encode %{
__ java_round_double($dst$$Register, as_FloatRegister($src$$reg), as_FloatRegister($ftmp$$reg));
%}

ins_pipe(pipe_slow);
%}

instruct round_float_reg(iRegINoSp dst, fRegF src, fRegF ftmp) %{
match(Set dst (RoundF src));

ins_cost(XFER_COST + BRANCH_COST);
effect(TEMP ftmp);
format %{ "java_round_float $dst, $src\t#@round_float_reg" %}

ins_encode %{
__ java_round_float($dst$$Register, as_FloatRegister($src$$reg), as_FloatRegister($ftmp$$reg));
%}

ins_pipe(pipe_slow);
%}

// Convert oop pointer into compressed form
instruct encodeHeapOop(iRegNNoSp dst, iRegP src) %{
match(Set dst (EncodeP src));
Expand Down