From 30ddc67fecfea86c90032df93a62d1e0e0031d80 Mon Sep 17 00:00:00 2001 From: Jan Vrany Date: Fri, 1 May 2020 22:51:37 +0100 Subject: [PATCH 1/4] RISC-V: decrement refcount on second child in compare evaluators Signed-off-by: Jan Vrany --- compiler/riscv/codegen/FPTreeEvaluator.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/compiler/riscv/codegen/FPTreeEvaluator.cpp b/compiler/riscv/codegen/FPTreeEvaluator.cpp index d6e1381aac..9ee8cab6ff 100644 --- a/compiler/riscv/codegen/FPTreeEvaluator.cpp +++ b/compiler/riscv/codegen/FPTreeEvaluator.cpp @@ -470,6 +470,7 @@ compareHelper(TR::Node *node, TR::InstOpCode::Mnemonic op, bool reverse, TR::Cod generateRTYPE(op, node, trgReg, src1Reg, src2Reg, cg); cg->decReferenceCount(firstChild); + cg->decReferenceCount(secondChild); node->setRegister(trgReg); return trgReg; } From 4963e7c86878cb00fff672a1e84d84c77c64fb46 Mon Sep 17 00:00:00 2001 From: Jan Vrany Date: Fri, 1 May 2020 23:13:20 +0100 Subject: [PATCH 2/4] RISC-V: fix fcmpne / dcmpne w.r.t NaN values fcmpne / dcmpne should return 0 if either operand is NaN. Since feq.s / feq.d with either operand NaN return 0, we have to explicitly check for NaN value. Signed-off-by: Jan Vrany --- compiler/riscv/codegen/FPTreeEvaluator.cpp | 51 ++++++++++++++++++---- 1 file changed, 43 insertions(+), 8 deletions(-) diff --git a/compiler/riscv/codegen/FPTreeEvaluator.cpp b/compiler/riscv/codegen/FPTreeEvaluator.cpp index 9ee8cab6ff..4a81a15180 100644 --- a/compiler/riscv/codegen/FPTreeEvaluator.cpp +++ b/compiler/riscv/codegen/FPTreeEvaluator.cpp @@ -475,6 +475,45 @@ compareHelper(TR::Node *node, TR::InstOpCode::Mnemonic op, bool reverse, TR::Cod return trgReg; } +static TR::Register * +compareNotEqualHelper(TR::Node *node, TR::InstOpCode::Mnemonic op, TR::CodeGenerator *cg) + { + TR::Node *firstChild = node->getFirstChild(); + TR::Register *src1Reg = cg->evaluate(firstChild); + TR::Node *secondChild = node->getSecondChild(); + TR::Register *src2Reg = cg->evaluate(secondChild); + + TR::Register *tmpReg = cg->allocateRegister(); + TR::Register *trgReg = cg->allocateRegister(); + + /* + * fcmpne / dcmpne should return 0 if either operand is + * NaN. Since feq.s / feq.d returns 0 in that case, simply + * comparing for equality (using feq.s / feq.d) and negating + * the result is not enough. + * + * We have to explicitly check for both operands to be non-NaN + * before negating result of feq.s / feq.d. To check whether a value + * is NaN, we again use feq.s / feq.d on the same value, this returns + * 0 only when the value is NaN. The result of fcmpne / dcmpne is then + * logical and of all three comparisons. This way, we avoid branching. + */ + + generateRTYPE(op, node, trgReg, src1Reg, src1Reg, cg); + generateRTYPE(op, node, tmpReg, src2Reg, src2Reg, cg); + generateRTYPE(TR::InstOpCode::_and, node, trgReg, trgReg, tmpReg, cg); + + generateRTYPE(op, node, tmpReg, src1Reg, src2Reg, cg); + generateITYPE(TR::InstOpCode::_xori, node, tmpReg, tmpReg, 1, cg); + generateRTYPE(TR::InstOpCode::_and, node, trgReg, trgReg, tmpReg, cg); + + cg->decReferenceCount(firstChild); + cg->decReferenceCount(secondChild); + cg->stopUsingRegister(tmpReg); + node->setRegister(trgReg); + return trgReg; + } + TR::Register * OMR::RV::TreeEvaluator::fcmpeqEvaluator(TR::Node *node, TR::CodeGenerator *cg) { @@ -484,9 +523,7 @@ OMR::RV::TreeEvaluator::fcmpeqEvaluator(TR::Node *node, TR::CodeGenerator *cg) TR::Register * OMR::RV::TreeEvaluator::fcmpneEvaluator(TR::Node *node, TR::CodeGenerator *cg) { - TR::Register *trgReg = compareHelper(node, TR::InstOpCode::_feq_s, false, cg); - generateITYPE(TR::InstOpCode::_xori, node, trgReg, trgReg, 1, cg); - return trgReg; + return compareNotEqualHelper(node, TR::InstOpCode::_feq_s, cg); } TR::Register * @@ -521,11 +558,9 @@ OMR::RV::TreeEvaluator::dcmpeqEvaluator(TR::Node *node, TR::CodeGenerator *cg) TR::Register * OMR::RV::TreeEvaluator::dcmpneEvaluator(TR::Node *node, TR::CodeGenerator *cg) - { - TR::Register *trgReg = compareHelper(node, TR::InstOpCode::_feq_d, false, cg); - generateITYPE(TR::InstOpCode::_xori, node, trgReg, trgReg, 1, cg); - return trgReg; - } + { + return compareNotEqualHelper(node, TR::InstOpCode::_feq_d, cg); + } TR::Register * OMR::RV::TreeEvaluator::dcmpltEvaluator(TR::Node *node, TR::CodeGenerator *cg) From 1f3fd6269d18a9aadaec550e70756bb35e4dd642 Mon Sep 17 00:00:00 2001 From: Jan Vrany Date: Fri, 8 May 2020 10:50:25 +0100 Subject: [PATCH 3/4] RISC-V: fix [fd]min / [fd]max w.r.t NaN values [fd]min / [fd]max should return 0 if either operand is NaN. This commit fixes evaluators by first checking for NaN before doing actual comparison. Signed-off-by: Jan Vrany --- compiler/riscv/codegen/FPTreeEvaluator.cpp | 40 +++++++++++++++++++--- 1 file changed, 35 insertions(+), 5 deletions(-) diff --git a/compiler/riscv/codegen/FPTreeEvaluator.cpp b/compiler/riscv/codegen/FPTreeEvaluator.cpp index 4a81a15180..6845c75748 100644 --- a/compiler/riscv/codegen/FPTreeEvaluator.cpp +++ b/compiler/riscv/codegen/FPTreeEvaluator.cpp @@ -604,7 +604,7 @@ OMR::RV::TreeEvaluator::fRegLoadEvaluator(TR::Node *node, TR::CodeGenerator *cg) } static TR::Register * -commonFpMinMaxEvaluator(TR::Node *node, TR::InstOpCode::Mnemonic op, bool reverse, bool isDouble, TR::CodeGenerator *cg) +commonFpMinMaxEvaluator(TR::Node *node, TR::InstOpCode::Mnemonic cmpOp, bool reverse, bool isDouble, TR::CodeGenerator *cg) { TR_ASSERT(node->getNumChildren() == 2, "The number of children for fmax/fmin/dmax/dmin must be 2."); @@ -617,6 +617,8 @@ commonFpMinMaxEvaluator(TR::Node *node, TR::InstOpCode::Mnemonic op, bool revers TR::RealRegister *zero = cg->machine()->getRealRegister(TR::RealRegister::zero); TR::Register *trgReg; + TR::InstOpCode::Mnemonic feqOp = isDouble ? TR::InstOpCode::_feq_d : TR::InstOpCode::_feq_s; + if (cg->canClobberNodesRegister(firstChild)) { trgReg = src1Reg; // use the first child as the target @@ -631,27 +633,55 @@ commonFpMinMaxEvaluator(TR::Node *node, TR::InstOpCode::Mnemonic op, bool revers } TR::LabelSymbol *startLabel = generateLabelSymbol(cg); + TR::LabelSymbol *moveSrc2RegToTrgReg = generateLabelSymbol(cg); TR::LabelSymbol *joinLabel = generateLabelSymbol(cg); startLabel->setStartInternalControlFlow(); joinLabel->setEndInternalControlFlow(); - TR::RegisterDependencyConditions *deps = new (cg->trHeapMemory()) TR::RegisterDependencyConditions(0, 3, cg->trMemory()); + TR::RegisterDependencyConditions *deps = new (cg->trHeapMemory()) TR::RegisterDependencyConditions(0, 4, cg->trMemory()); deps->addPostCondition(cmpReg, TR::RealRegister::NoReg); deps->addPostCondition(trgReg, TR::RealRegister::NoReg); + deps->addPostCondition(src1Reg, TR::RealRegister::NoReg); deps->addPostCondition(src2Reg, TR::RealRegister::NoReg); + generateLABEL(cg, TR::InstOpCode::label, node, startLabel); + + /* + * Check if src1Reg is NaN, if so then result is NaN. + * + * Note, that the value is already stored in trgReg, so we check + * src1Reg (using feq.s / feq.d) and if it's NaN, we just jump + * to the end (joinLabel) + */ + generateRTYPE(feqOp, node, cmpReg, src1Reg, src1Reg, cg); + generateBTYPE(TR::InstOpCode::_beq, node, joinLabel, cmpReg, zero, cg); + + /* + * Check if src2Reg is NaN, if so then result is NaN. + * + * If src2Reg is NaN, we have to move it to trgReg, just like + * later on when we compare and (eventually) move src2Reg into trgReg. + * So if src2Reg is NaN, just jump over comparison to that move. + */ + generateRTYPE(feqOp, node, cmpReg, src2Reg, src2Reg, cg); + generateBTYPE(TR::InstOpCode::_beq, node, moveSrc2RegToTrgReg, cmpReg, zero, cg); + + /* + * Finally, compare the two values. + */ if (reverse) - generateRTYPE(op, node, cmpReg, src2Reg, src1Reg, cg); + generateRTYPE(cmpOp, node, cmpReg, src2Reg, src1Reg, cg); else - generateRTYPE(op, node, cmpReg, src1Reg, src2Reg, cg); + generateRTYPE(cmpOp, node, cmpReg, src1Reg, src2Reg, cg); - generateLABEL(cg, TR::InstOpCode::label, node, startLabel); generateBTYPE(TR::InstOpCode::_bne, node, joinLabel, cmpReg, zero, cg); + generateLABEL(cg, TR::InstOpCode::label, node, moveSrc2RegToTrgReg); if (isDouble) generateRTYPE(TR::InstOpCode::_fsgnj_d, node, trgReg, src2Reg, src2Reg, cg); else generateRTYPE(TR::InstOpCode::_fsgnj_s, node, trgReg, src2Reg, src2Reg, cg); + generateLABEL(cg, TR::InstOpCode::label, node, joinLabel, deps); cg->stopUsingRegister(cmpReg); From 0a505f87c2470fbd37be063999e0f76b77170169 Mon Sep 17 00:00:00 2001 From: Jan Vrany Date: Fri, 29 May 2020 10:56:36 +0100 Subject: [PATCH 4/4] RISC-V: fix [fd]2[il] w.r.t NaN values Converting floating point value to integral-typed value should produce 0. To do so, we have to check for NaN value explicitly. Signed-off-by: Jan Vrany --- compiler/riscv/codegen/FPTreeEvaluator.cpp | 181 ++++++++++++++------- 1 file changed, 126 insertions(+), 55 deletions(-) diff --git a/compiler/riscv/codegen/FPTreeEvaluator.cpp b/compiler/riscv/codegen/FPTreeEvaluator.cpp index 6845c75748..5fc22ba8ab 100644 --- a/compiler/riscv/codegen/FPTreeEvaluator.cpp +++ b/compiler/riscv/codegen/FPTreeEvaluator.cpp @@ -316,101 +316,172 @@ OMR::RV::TreeEvaluator::dnegEvaluator(TR::Node *node, TR::CodeGenerator *cg) return doublePrecisionUnaryEvaluator(node, TR::InstOpCode::_fsgnjn_d, cg); } -static TR::Register * -conversionHelper(TR::Node *node, TR::InstOpCode::Mnemonic op, TR::CodeGenerator *cg) -{ +static TR::Register* +commonFPtoINTconversionEvaluator(TR::Node *node, TR::InstOpCode::Mnemonic op, + TR::CodeGenerator *cg) + { TR::Node *firstChild = node->getFirstChild(); TR::Register *src1Reg = cg->evaluate(firstChild); TR::Register *zero = cg->machine()->getRealRegister(TR::RealRegister::zero); - TR::Register *trgReg = cg->allocateRegister(node->getDataType().isFloatingPoint() ? TR_FPR : TR_GPR); + TR::Register *trgReg = cg->allocateRegister(TR_GPR); + + /* + * Converting NaN to integral type should produce 0. Since + * RISC-V fcvt* instructions don't do it, we have to test + * for NaN before and eventually load 0 into result (target) + * register. + */ + TR::LabelSymbol *startLabel = generateLabelSymbol(cg); + TR::LabelSymbol *joinLabel = generateLabelSymbol(cg); + + startLabel->setStartInternalControlFlow(); + joinLabel->setEndInternalControlFlow(); + + TR::RegisterDependencyConditions *deps = new (cg->trHeapMemory()) TR::RegisterDependencyConditions(0, 2, cg->trMemory()); + + deps->addPostCondition(trgReg, TR::RealRegister::NoReg); + deps->addPostCondition(src1Reg, TR::RealRegister::NoReg); + + generateLABEL(cg, TR::InstOpCode::label, node, startLabel); + + /* + * Check if srcReg is NaN, if so then result 0. + * + * To do so, we use _feq_d / _feq_s on the same register + * which for NaN value returns 0. This is very convenient + * since 0 is the desired result value, so by using target + * register to hold the result of comparison, we can just + * jump over the actual conversion via fcvt_?_?. + */ + TR::InstOpCode::Mnemonic feqOp = firstChild->getDataType().isDouble() + ? TR::InstOpCode::_feq_d + : TR::InstOpCode::_feq_s; + generateRTYPE(feqOp, node, trgReg, src1Reg, src1Reg, cg); + generateBTYPE(TR::InstOpCode::_beq, node, joinLabel, trgReg, zero, cg); + /* + * Now do the actual conversion + */ generateRTYPE(op, node, trgReg, src1Reg, zero, cg); + generateLABEL(cg, TR::InstOpCode::label, node, joinLabel, deps); + cg->decReferenceCount(firstChild); node->setRegister(trgReg); return trgReg; -} + } + +static TR::Register* +commonINTtoFPconversionEvaluator(TR::Node *node, TR::InstOpCode::Mnemonic op, TR::CodeGenerator *cg) + { + TR::Node *firstChild = node->getFirstChild(); + TR::Register *src1Reg = cg->evaluate(firstChild); + TR::Register *zero = cg->machine()->getRealRegister(TR::RealRegister::zero); + TR::Register *trgReg = cg->allocateRegister(TR_FPR); + + generateRTYPE(op, node, trgReg, src1Reg, zero, cg); + + cg->decReferenceCount(firstChild); + node->setRegister(trgReg); + return trgReg; + } + +static TR::Register* +commonFPPtoFPconversionEvaluator(TR::Node *node, TR::InstOpCode::Mnemonic op, TR::CodeGenerator *cg) + { + TR::Node *firstChild = node->getFirstChild(); + TR::Register *src1Reg = cg->evaluate(firstChild); + TR::Register *zero = cg->machine()->getRealRegister(TR::RealRegister::zero); + TR::Register *trgReg = cg->allocateRegister(TR_FPR); + + generateRTYPE(op, node, trgReg, src1Reg, zero, cg); + + cg->decReferenceCount(firstChild); + node->setRegister(trgReg); + return trgReg; + } + TR::Register * OMR::RV::TreeEvaluator::i2fEvaluator(TR::Node *node, TR::CodeGenerator *cg) - { - return conversionHelper(node, TR::InstOpCode::_fcvt_s_w, cg); - } + { + return commonINTtoFPconversionEvaluator(node, TR::InstOpCode::_fcvt_s_w, cg); + } TR::Register * OMR::RV::TreeEvaluator::i2dEvaluator(TR::Node *node, TR::CodeGenerator *cg) { - return conversionHelper(node, TR::InstOpCode::_fcvt_d_w, cg); + return commonINTtoFPconversionEvaluator(node, TR::InstOpCode::_fcvt_d_w, cg); } -TR::Register * +TR::Register* OMR::RV::TreeEvaluator::l2fEvaluator(TR::Node *node, TR::CodeGenerator *cg) - { - return conversionHelper(node, TR::InstOpCode::_fcvt_s_l, cg); - } + { + return commonINTtoFPconversionEvaluator(node, TR::InstOpCode::_fcvt_s_l, cg); + } -TR::Register * +TR::Register* OMR::RV::TreeEvaluator::l2dEvaluator(TR::Node *node, TR::CodeGenerator *cg) - { - return conversionHelper(node, TR::InstOpCode::_fcvt_d_l, cg); - } + { + return commonINTtoFPconversionEvaluator(node, TR::InstOpCode::_fcvt_d_l, cg); + } -TR::Register * +TR::Register* OMR::RV::TreeEvaluator::f2dEvaluator(TR::Node *node, TR::CodeGenerator *cg) - { - return conversionHelper(node, TR::InstOpCode::_fcvt_d_s, cg); - } + { + return commonFPPtoFPconversionEvaluator(node, TR::InstOpCode::_fcvt_d_s, cg); + } -TR::Register * +TR::Register* OMR::RV::TreeEvaluator::f2iEvaluator(TR::Node *node, TR::CodeGenerator *cg) - { - return conversionHelper(node, TR::InstOpCode::_fcvt_w_s, cg); - } + { + return commonFPtoINTconversionEvaluator(node, TR::InstOpCode::_fcvt_w_s, cg); + } -TR::Register * +TR::Register* OMR::RV::TreeEvaluator::d2iEvaluator(TR::Node *node, TR::CodeGenerator *cg) - { - return conversionHelper(node, TR::InstOpCode::_fcvt_w_d, cg); + { + return commonFPtoINTconversionEvaluator(node, TR::InstOpCode::_fcvt_w_d, cg); } -TR::Register * +TR::Register* OMR::RV::TreeEvaluator::d2cEvaluator(TR::Node *node, TR::CodeGenerator *cg) - { - // TODO:RV: Enable TR::TreeEvaluator::d2cEvaluator in compiler/aarch64/codegen/TreeEvaluatorTable.hpp when Implemented. - return OMR::RV::TreeEvaluator::unImpOpEvaluator(node, cg); - } + { + // TODO:RV: Enable TR::TreeEvaluator::d2cEvaluator in compiler/aarch64/codegen/TreeEvaluatorTable.hpp when Implemented. + return OMR::RV::TreeEvaluator::unImpOpEvaluator(node, cg); + } -TR::Register * +TR::Register* OMR::RV::TreeEvaluator::d2sEvaluator(TR::Node *node, TR::CodeGenerator *cg) - { - // TODO:RV: Enable TR::TreeEvaluator::d2sEvaluator in compiler/aarch64/codegen/TreeEvaluatorTable.hpp when Implemented. - return OMR::RV::TreeEvaluator::unImpOpEvaluator(node, cg); - } + { + // TODO:RV: Enable TR::TreeEvaluator::d2sEvaluator in compiler/aarch64/codegen/TreeEvaluatorTable.hpp when Implemented. + return OMR::RV::TreeEvaluator::unImpOpEvaluator(node, cg); + } -TR::Register * +TR::Register* OMR::RV::TreeEvaluator::d2bEvaluator(TR::Node *node, TR::CodeGenerator *cg) - { - // TODO:RV: Enable TR::TreeEvaluator::d2bEvaluator in compiler/aarch64/codegen/TreeEvaluatorTable.hpp when Implemented. - return OMR::RV::TreeEvaluator::unImpOpEvaluator(node, cg); - } + { + // TODO:RV: Enable TR::TreeEvaluator::d2bEvaluator in compiler/aarch64/codegen/TreeEvaluatorTable.hpp when Implemented. + return OMR::RV::TreeEvaluator::unImpOpEvaluator(node, cg); + } -TR::Register * +TR::Register* OMR::RV::TreeEvaluator::f2lEvaluator(TR::Node *node, TR::CodeGenerator *cg) - { - return conversionHelper(node, TR::InstOpCode::_fcvt_l_s, cg); - } + { + return commonFPtoINTconversionEvaluator(node, TR::InstOpCode::_fcvt_l_s, cg); + } -TR::Register * +TR::Register* OMR::RV::TreeEvaluator::d2lEvaluator(TR::Node *node, TR::CodeGenerator *cg) - { - return conversionHelper(node, TR::InstOpCode::_fcvt_l_d, cg); - } + { + return commonFPtoINTconversionEvaluator(node, TR::InstOpCode::_fcvt_l_d, cg); + } -TR::Register * +TR::Register* OMR::RV::TreeEvaluator::d2fEvaluator(TR::Node *node, TR::CodeGenerator *cg) - { - return conversionHelper(node, TR::InstOpCode::_fcvt_s_d, cg); - } + { + return commonFPPtoFPconversionEvaluator(node, TR::InstOpCode::_fcvt_s_d, cg); + } TR::Register * OMR::RV::TreeEvaluator::ifdcmpeqEvaluator(TR::Node *node, TR::CodeGenerator *cg)