Skip to content

Commit

Permalink
AArch64: Implement Vector Population Count evaluator
Browse files Browse the repository at this point in the history
This commit implements vpopcnt evaluator on AArch64.

Signed-off-by: Akira Saitoh <saiaki@jp.ibm.com>
  • Loading branch information
Akira Saitoh committed Jun 7, 2023
1 parent 9921159 commit 5949b86
Show file tree
Hide file tree
Showing 4 changed files with 110 additions and 16 deletions.
2 changes: 2 additions & 0 deletions compiler/aarch64/codegen/OMRCodeGenerator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -700,6 +700,8 @@ bool OMR::ARM64::CodeGenerator::getSupportsOpCodeForAutoSIMD(TR::CPU *cpu, TR::I
case TR::vmushr:
case TR::vrol:
case TR::vmrol:
case TR::vpopcnt:
case TR::vmpopcnt:
// Float/ Double are not supported
return (et == TR::Int8 || et == TR::Int16 || et == TR::Int32 || et == TR::Int64);
case TR::vload:
Expand Down
87 changes: 74 additions & 13 deletions compiler/aarch64/codegen/OMRTreeEvaluator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3146,16 +3146,8 @@ OMR::ARM64::TreeEvaluator::inlineVectorMaskedBinaryOp(TR::Node *node, TR::CodeGe
return resReg;
}

/**
* @brief Helper functions for generating instruction sequence for masked unary operations
*
* @param[in] node: node
* @param[in] cg: CodeGenerator
* @param[in] op: unary opcode
* @return vector register containing the result
*/
static TR::Register *
inlineVectorMaskedUnaryOp(TR::Node *node, TR::CodeGenerator *cg, TR::InstOpCode::Mnemonic op)
TR::Register *
OMR::ARM64::TreeEvaluator::inlineVectorMaskedUnaryOp(TR::Node *node, TR::CodeGenerator *cg, TR::InstOpCode::Mnemonic op, unaryEvaluatorHelper evaluatorHelper)
{
TR::Node *firstChild = node->getFirstChild();
TR::Node *secondChild = node->getSecondChild();
Expand All @@ -3167,7 +3159,15 @@ inlineVectorMaskedUnaryOp(TR::Node *node, TR::CodeGenerator *cg, TR::InstOpCode:
TR::Register *resReg = cg->allocateRegister(TR_VRF);
node->setRegister(resReg);

generateTrg1Src1Instruction(cg, op, node, resReg, srcReg);
TR_ASSERT_FATAL_WITH_NODE(node, (op != TR::InstOpCode::bad) || (evaluatorHelper != NULL), "If op is TR::InstOpCode::bad, evaluatorHelper must not be NULL");
if (evaluatorHelper != NULL)
{
(*evaluatorHelper)(node, resReg, srcReg, cg);
}
else
{
generateTrg1Src1Instruction(cg, op, node, resReg, srcReg);
}

bool flipMask = false;
TR::Register *maskReg = evaluateMaskNode(secondChild, flipMask, cg);
Expand Down Expand Up @@ -4115,16 +4115,77 @@ OMR::ARM64::TreeEvaluator::vmfirstNonZeroEvaluator(TR::Node *node, TR::CodeGener
return TR::TreeEvaluator::unImpOpEvaluator(node, cg);
}

/**
* @brief Helper function for vector population count operation
*
* @param[in] node: node
* @param[in] resultReg: the result register
* @param[in] srcReg: the argument register
* @param[in] cg: CodeGenerator
* @return the result register
*/
static TR::Register *
vpopcntEvaluatorHelper(TR::Node *node, TR::Register *resultReg, TR::Register *srcReg, TR::CodeGenerator *cg)
{
TR::DataType et = node->getDataType().getVectorElementType();

generateTrg1Src1Instruction(cg, TR::InstOpCode::vcnt16b, node, resultReg, srcReg);
generateTrg1Src1Instruction(cg, TR::InstOpCode::vuaddlp16b, node, resultReg, resultReg);
if (et != TR::Int16)
{
generateTrg1Src1Instruction(cg, TR::InstOpCode::vuaddlp8h, node, resultReg, resultReg);
if (et == TR::Int64)
{
generateTrg1Src1Instruction(cg, TR::InstOpCode::vuaddlp4s, node, resultReg, resultReg);
}
}
return resultReg;
}

TR::Register*
OMR::ARM64::TreeEvaluator::vpopcntEvaluator(TR::Node *node, TR::CodeGenerator *cg)
{
return TR::TreeEvaluator::unImpOpEvaluator(node, cg);
TR::InstOpCode::Mnemonic op = TR::InstOpCode::bad;
unaryEvaluatorHelper evaluationHelper = NULL;
switch (node->getDataType().getVectorElementType())
{
case TR::Int8:
op = TR::InstOpCode::vcnt16b;
break;
case TR::Int16:
case TR::Int32:
case TR::Int64:
evaluationHelper = vpopcntEvaluatorHelper;
break;

default:
TR_ASSERT_FATAL_WITH_NODE(node, false, "unrecognized vector type %s", node->getDataType().toString());
return NULL;
}
return inlineVectorUnaryOp(node, cg, op, evaluationHelper);
}

TR::Register*
OMR::ARM64::TreeEvaluator::vmpopcntEvaluator(TR::Node *node, TR::CodeGenerator *cg)
{
return TR::TreeEvaluator::unImpOpEvaluator(node, cg);
TR::InstOpCode::Mnemonic op = TR::InstOpCode::bad;
unaryEvaluatorHelper evaluationHelper = NULL;
switch (node->getDataType().getVectorElementType())
{
case TR::Int8:
op = TR::InstOpCode::vcnt16b;
break;
case TR::Int16:
case TR::Int32:
case TR::Int64:
evaluationHelper = vpopcntEvaluatorHelper;
break;

default:
TR_ASSERT_FATAL_WITH_NODE(node, false, "unrecognized vector type %s", node->getDataType().toString());
return NULL;
}
return inlineVectorMaskedUnaryOp(node, cg, op, evaluationHelper);
}

TR::Register*
Expand Down
23 changes: 23 additions & 0 deletions compiler/aarch64/codegen/OMRTreeEvaluator.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -438,6 +438,29 @@ class OMR_EXTENSIBLE TreeEvaluator: public OMR::TreeEvaluator
*/
static TR::Register *inlineVectorMaskedBinaryOp(TR::Node *node, TR::CodeGenerator *cg, TR::InstOpCode::Mnemonic op, binaryEvaluatorHelper evaluatorHelper = NULL);

typedef TR::Register *(*unaryEvaluatorHelper)(TR::Node *node, TR::Register *resReg, TR::Register *srcReg, TR::CodeGenerator *cg);
/**
* @brief Helper function for generating instruction sequence for unary operations
*
* @param[in] node: node
* @param[in] cg: CodeGenerator
* @param[in] op: unary opcode
* @param[in] evaluatorHelper: optional pointer to helper function which generates instruction stream for operation
* @return vector register containing the result
*/
static TR::Register *inlineVectorUnaryOp(TR::Node *node, TR::CodeGenerator *cg, TR::InstOpCode::Mnemonic op, unaryEvaluatorHelper evaluatorHelper = NULL);

/**
* @brief Helper function for generating instruction sequence for masked unary operations
*
* @param[in] node: node
* @param[in] cg: CodeGenerator
* @param[in] op: unary opcode
* @param[in] evaluatorHelper: optional pointer to helper function which generates instruction stream for operation
* @return vector register containing the result
*/
static TR::Register *inlineVectorMaskedUnaryOp(TR::Node *node, TR::CodeGenerator *cg, TR::InstOpCode::Mnemonic op, unaryEvaluatorHelper evaluatorHelper = NULL);

static TR::Register *f2iuEvaluator(TR::Node *node, TR::CodeGenerator *cg);
static TR::Register *f2luEvaluator(TR::Node *node, TR::CodeGenerator *cg);
static TR::Register *f2buEvaluator(TR::Node *node, TR::CodeGenerator *cg);
Expand Down
14 changes: 11 additions & 3 deletions compiler/aarch64/codegen/UnaryEvaluator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -116,14 +116,22 @@ TR::Register *OMR::ARM64::TreeEvaluator::lnegEvaluator(TR::Node *node, TR::CodeG
return tempReg;
}

static TR::Register *inlineVectorUnaryOp(TR::Node *node, TR::CodeGenerator *cg, TR::InstOpCode::Mnemonic op)
TR::Register *OMR::ARM64::TreeEvaluator::inlineVectorUnaryOp(TR::Node *node, TR::CodeGenerator *cg, TR::InstOpCode::Mnemonic op, unaryEvaluatorHelper evaluatorHelper)
{
TR::Node *firstChild = node->getFirstChild();
TR::Register *srcReg = cg->evaluate(firstChild);
TR::Register *resReg = (firstChild->getReferenceCount() == 1) ? srcReg : cg->allocateRegister(TR_VRF);
TR::Register *resReg = cg->allocateRegister(TR_VRF);

node->setRegister(resReg);
generateTrg1Src1Instruction(cg, op, node, resReg, srcReg);
TR_ASSERT_FATAL_WITH_NODE(node, (op != TR::InstOpCode::bad) || (evaluatorHelper != NULL), "If op is TR::InstOpCode::bad, evaluatorHelper must not be NULL");
if (evaluatorHelper != NULL)
{
(*evaluatorHelper)(node, resReg, srcReg, cg);
}
else
{
generateTrg1Src1Instruction(cg, op, node, resReg, srcReg);
}
cg->decReferenceCount(firstChild);
return resReg;
}
Expand Down

0 comments on commit 5949b86

Please sign in to comment.