diff --git a/compiler/aarch64/codegen/OMRCodeGenerator.cpp b/compiler/aarch64/codegen/OMRCodeGenerator.cpp index 7fc191512d9..1e8ed9ea3f4 100644 --- a/compiler/aarch64/codegen/OMRCodeGenerator.cpp +++ b/compiler/aarch64/codegen/OMRCodeGenerator.cpp @@ -700,6 +700,8 @@ bool OMR::ARM64::CodeGenerator::getSupportsOpCodeForAutoSIMD(TR::CPU *cpu, TR::I case TR::vmushr: case TR::vrol: case TR::vmrol: + case TR::vpopcnt: + case TR::vmpopcnt: // Float/ Double are not supported return (et == TR::Int8 || et == TR::Int16 || et == TR::Int32 || et == TR::Int64); case TR::vload: diff --git a/compiler/aarch64/codegen/OMRTreeEvaluator.cpp b/compiler/aarch64/codegen/OMRTreeEvaluator.cpp index 16759934398..f9d593701e3 100644 --- a/compiler/aarch64/codegen/OMRTreeEvaluator.cpp +++ b/compiler/aarch64/codegen/OMRTreeEvaluator.cpp @@ -3146,16 +3146,8 @@ OMR::ARM64::TreeEvaluator::inlineVectorMaskedBinaryOp(TR::Node *node, TR::CodeGe return resReg; } -/** - * @brief Helper functions for generating instruction sequence for masked unary operations - * - * @param[in] node: node - * @param[in] cg: CodeGenerator - * @param[in] op: unary opcode - * @return vector register containing the result - */ -static TR::Register * -inlineVectorMaskedUnaryOp(TR::Node *node, TR::CodeGenerator *cg, TR::InstOpCode::Mnemonic op) +TR::Register * +OMR::ARM64::TreeEvaluator::inlineVectorMaskedUnaryOp(TR::Node *node, TR::CodeGenerator *cg, TR::InstOpCode::Mnemonic op, unaryEvaluatorHelper evaluatorHelper) { TR::Node *firstChild = node->getFirstChild(); TR::Node *secondChild = node->getSecondChild(); @@ -3167,7 +3159,15 @@ inlineVectorMaskedUnaryOp(TR::Node *node, TR::CodeGenerator *cg, TR::InstOpCode: TR::Register *resReg = cg->allocateRegister(TR_VRF); node->setRegister(resReg); - generateTrg1Src1Instruction(cg, op, node, resReg, srcReg); + TR_ASSERT_FATAL_WITH_NODE(node, (op != TR::InstOpCode::bad) || (evaluatorHelper != NULL), "If op is TR::InstOpCode::bad, evaluatorHelper must not be NULL"); + if (evaluatorHelper != NULL) + { + (*evaluatorHelper)(node, resReg, srcReg, cg); + } + else + { + generateTrg1Src1Instruction(cg, op, node, resReg, srcReg); + } bool flipMask = false; TR::Register *maskReg = evaluateMaskNode(secondChild, flipMask, cg); @@ -4115,16 +4115,77 @@ OMR::ARM64::TreeEvaluator::vmfirstNonZeroEvaluator(TR::Node *node, TR::CodeGener return TR::TreeEvaluator::unImpOpEvaluator(node, cg); } +/** + * @brief Helper function for vector population count operation + * + * @param[in] node: node + * @param[in] resultReg: the result register + * @param[in] srcReg: the argument register + * @param[in] cg: CodeGenerator + * @return the result register + */ +static TR::Register * +vpopcntEvaluatorHelper(TR::Node *node, TR::Register *resultReg, TR::Register *srcReg, TR::CodeGenerator *cg) + { + TR::DataType et = node->getDataType().getVectorElementType(); + + generateTrg1Src1Instruction(cg, TR::InstOpCode::vcnt16b, node, resultReg, srcReg); + generateTrg1Src1Instruction(cg, TR::InstOpCode::vuaddlp16b, node, resultReg, resultReg); + if (et != TR::Int16) + { + generateTrg1Src1Instruction(cg, TR::InstOpCode::vuaddlp8h, node, resultReg, resultReg); + if (et == TR::Int64) + { + generateTrg1Src1Instruction(cg, TR::InstOpCode::vuaddlp4s, node, resultReg, resultReg); + } + } + return resultReg; + } + TR::Register* OMR::ARM64::TreeEvaluator::vpopcntEvaluator(TR::Node *node, TR::CodeGenerator *cg) { - return TR::TreeEvaluator::unImpOpEvaluator(node, cg); + TR::InstOpCode::Mnemonic op = TR::InstOpCode::bad; + unaryEvaluatorHelper evaluationHelper = NULL; + switch (node->getDataType().getVectorElementType()) + { + case TR::Int8: + op = TR::InstOpCode::vcnt16b; + break; + case TR::Int16: + case TR::Int32: + case TR::Int64: + evaluationHelper = vpopcntEvaluatorHelper; + break; + + default: + TR_ASSERT_FATAL_WITH_NODE(node, false, "unrecognized vector type %s", node->getDataType().toString()); + return NULL; + } + return inlineVectorUnaryOp(node, cg, op, evaluationHelper); } TR::Register* OMR::ARM64::TreeEvaluator::vmpopcntEvaluator(TR::Node *node, TR::CodeGenerator *cg) { - return TR::TreeEvaluator::unImpOpEvaluator(node, cg); + TR::InstOpCode::Mnemonic op = TR::InstOpCode::bad; + unaryEvaluatorHelper evaluationHelper = NULL; + switch (node->getDataType().getVectorElementType()) + { + case TR::Int8: + op = TR::InstOpCode::vcnt16b; + break; + case TR::Int16: + case TR::Int32: + case TR::Int64: + evaluationHelper = vpopcntEvaluatorHelper; + break; + + default: + TR_ASSERT_FATAL_WITH_NODE(node, false, "unrecognized vector type %s", node->getDataType().toString()); + return NULL; + } + return inlineVectorMaskedUnaryOp(node, cg, op, evaluationHelper); } TR::Register* diff --git a/compiler/aarch64/codegen/OMRTreeEvaluator.hpp b/compiler/aarch64/codegen/OMRTreeEvaluator.hpp index 6d4fcf6f6fc..ef5ca35dd35 100644 --- a/compiler/aarch64/codegen/OMRTreeEvaluator.hpp +++ b/compiler/aarch64/codegen/OMRTreeEvaluator.hpp @@ -438,6 +438,29 @@ class OMR_EXTENSIBLE TreeEvaluator: public OMR::TreeEvaluator */ static TR::Register *inlineVectorMaskedBinaryOp(TR::Node *node, TR::CodeGenerator *cg, TR::InstOpCode::Mnemonic op, binaryEvaluatorHelper evaluatorHelper = NULL); + typedef TR::Register *(*unaryEvaluatorHelper)(TR::Node *node, TR::Register *resReg, TR::Register *srcReg, TR::CodeGenerator *cg); + /** + * @brief Helper function for generating instruction sequence for unary operations + * + * @param[in] node: node + * @param[in] cg: CodeGenerator + * @param[in] op: unary opcode + * @param[in] evaluatorHelper: optional pointer to helper function which generates instruction stream for operation + * @return vector register containing the result + */ + static TR::Register *inlineVectorUnaryOp(TR::Node *node, TR::CodeGenerator *cg, TR::InstOpCode::Mnemonic op, unaryEvaluatorHelper evaluatorHelper = NULL); + + /** + * @brief Helper function for generating instruction sequence for masked unary operations + * + * @param[in] node: node + * @param[in] cg: CodeGenerator + * @param[in] op: unary opcode + * @param[in] evaluatorHelper: optional pointer to helper function which generates instruction stream for operation + * @return vector register containing the result + */ + static TR::Register *inlineVectorMaskedUnaryOp(TR::Node *node, TR::CodeGenerator *cg, TR::InstOpCode::Mnemonic op, unaryEvaluatorHelper evaluatorHelper = NULL); + static TR::Register *f2iuEvaluator(TR::Node *node, TR::CodeGenerator *cg); static TR::Register *f2luEvaluator(TR::Node *node, TR::CodeGenerator *cg); static TR::Register *f2buEvaluator(TR::Node *node, TR::CodeGenerator *cg); diff --git a/compiler/aarch64/codegen/UnaryEvaluator.cpp b/compiler/aarch64/codegen/UnaryEvaluator.cpp index 32b231485d7..145e22882e5 100644 --- a/compiler/aarch64/codegen/UnaryEvaluator.cpp +++ b/compiler/aarch64/codegen/UnaryEvaluator.cpp @@ -116,14 +116,22 @@ TR::Register *OMR::ARM64::TreeEvaluator::lnegEvaluator(TR::Node *node, TR::CodeG return tempReg; } -static TR::Register *inlineVectorUnaryOp(TR::Node *node, TR::CodeGenerator *cg, TR::InstOpCode::Mnemonic op) +TR::Register *OMR::ARM64::TreeEvaluator::inlineVectorUnaryOp(TR::Node *node, TR::CodeGenerator *cg, TR::InstOpCode::Mnemonic op, unaryEvaluatorHelper evaluatorHelper) { TR::Node *firstChild = node->getFirstChild(); TR::Register *srcReg = cg->evaluate(firstChild); - TR::Register *resReg = (firstChild->getReferenceCount() == 1) ? srcReg : cg->allocateRegister(TR_VRF); + TR::Register *resReg = cg->allocateRegister(TR_VRF); node->setRegister(resReg); - generateTrg1Src1Instruction(cg, op, node, resReg, srcReg); + TR_ASSERT_FATAL_WITH_NODE(node, (op != TR::InstOpCode::bad) || (evaluatorHelper != NULL), "If op is TR::InstOpCode::bad, evaluatorHelper must not be NULL"); + if (evaluatorHelper != NULL) + { + (*evaluatorHelper)(node, resReg, srcReg, cg); + } + else + { + generateTrg1Src1Instruction(cg, op, node, resReg, srcReg); + } cg->decReferenceCount(firstChild); return resReg; }