Skip to content

[VPlan] Add ComputeAnyOfResult VPInstruction (NFC) #141932

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Jun 3, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
63 changes: 47 additions & 16 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7209,15 +7209,25 @@ static void addRuntimeUnrollDisableMetaData(Loop *L) {
}
}

// If \p R is a ComputeReductionResult when vectorizing the epilog loop,
// fix the reduction's scalar PHI node by adding the incoming value from the
// main vector loop.
static Value *getStartValueFromReductionResult(VPInstruction *RdxResult) {
using namespace VPlanPatternMatch;
assert(RdxResult->getOpcode() == VPInstruction::ComputeFindLastIVResult &&
"RdxResult must be ComputeFindLastIVResult");
VPValue *StartVPV = RdxResult->getOperand(1);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe we need a assertion that check if RdxResult is a VPInstruction::ComputeAnyOfResult/ComputeFindLastIVResult?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added, thanks

match(StartVPV, m_Freeze(m_VPValue(StartVPV)));
return StartVPV->getLiveInIRValue();
}

// If \p R is a Compute{Reduction,AnyOf,FindLastIV}Result when vectorizing the
// epilog loop, fix the reduction's scalar PHI node by adding the incoming value
// from the main vector loop.
static void fixReductionScalarResumeWhenVectorizingEpilog(
VPRecipeBase *R, VPTransformState &State, BasicBlock *LoopMiddleBlock,
BasicBlock *BypassBlock) {
auto *EpiRedResult = dyn_cast<VPInstruction>(R);
if (!EpiRedResult ||
(EpiRedResult->getOpcode() != VPInstruction::ComputeReductionResult &&
(EpiRedResult->getOpcode() != VPInstruction::ComputeAnyOfResult &&
EpiRedResult->getOpcode() != VPInstruction::ComputeReductionResult &&
EpiRedResult->getOpcode() != VPInstruction::ComputeFindLastIVResult))
return;

Expand All @@ -7229,15 +7239,18 @@ static void fixReductionScalarResumeWhenVectorizingEpilog(
EpiRedHeaderPhi->getStartValue()->getUnderlyingValue();
if (RecurrenceDescriptor::isAnyOfRecurrenceKind(
RdxDesc.getRecurrenceKind())) {
Value *StartV = EpiRedResult->getOperand(1)->getLiveInIRValue();
(void)StartV;
auto *Cmp = cast<ICmpInst>(MainResumeValue);
assert(Cmp->getPredicate() == CmpInst::ICMP_NE &&
"AnyOf expected to start with ICMP_NE");
assert(Cmp->getOperand(1) == RdxDesc.getRecurrenceStartValue() &&
assert(Cmp->getOperand(1) == StartV &&
"AnyOf expected to start by comparing main resume value to original "
"start value");
MainResumeValue = Cmp->getOperand(0);
} else if (RecurrenceDescriptor::isFindLastIVRecurrenceKind(
RdxDesc.getRecurrenceKind())) {
Value *StartV = getStartValueFromReductionResult(EpiRedResult);
using namespace llvm::PatternMatch;
Value *Cmp, *OrigResumeV, *CmpOp;
bool IsExpectedPattern =
Expand All @@ -7246,10 +7259,7 @@ static void fixReductionScalarResumeWhenVectorizingEpilog(
m_Value(OrigResumeV))) &&
(match(Cmp, m_SpecificICmp(ICmpInst::ICMP_EQ, m_Specific(OrigResumeV),
m_Value(CmpOp))) &&
(match(CmpOp,
m_Freeze(m_Specific(RdxDesc.getRecurrenceStartValue()))) ||
(CmpOp == RdxDesc.getRecurrenceStartValue() &&
isGuaranteedNotToBeUndefOrPoison(CmpOp))));
((CmpOp == StartV && isGuaranteedNotToBeUndefOrPoison(CmpOp))));
assert(IsExpectedPattern && "Unexpected reduction resume pattern");
(void)IsExpectedPattern;
MainResumeValue = OrigResumeV;
Expand Down Expand Up @@ -9184,6 +9194,8 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
OrigExitingVPV->replaceUsesWithIf(NewExitingVPV, [](VPUser &U, unsigned) {
return isa<VPInstruction>(&U) &&
(cast<VPInstruction>(&U)->getOpcode() ==
VPInstruction::ComputeAnyOfResult ||
cast<VPInstruction>(&U)->getOpcode() ==
VPInstruction::ComputeReductionResult ||
cast<VPInstruction>(&U)->getOpcode() ==
VPInstruction::ComputeFindLastIVResult);
Expand Down Expand Up @@ -9236,6 +9248,12 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
FinalReductionResult =
Builder.createNaryOp(VPInstruction::ComputeFindLastIVResult,
{PhiR, Start, NewExitingVPV}, ExitDL);
} else if (RecurrenceDescriptor::isAnyOfRecurrenceKind(
RdxDesc.getRecurrenceKind())) {
VPValue *Start = PhiR->getStartValue();
FinalReductionResult =
Builder.createNaryOp(VPInstruction::ComputeAnyOfResult,
{PhiR, Start, NewExitingVPV}, ExitDL);
} else {
VPIRFlags Flags = RecurrenceDescriptor::isFloatingPointRecurrenceKind(
RdxDesc.getRecurrenceKind())
Expand Down Expand Up @@ -9764,23 +9782,37 @@ preparePlanForEpilogueVectorLoop(VPlan &Plan, Loop *L,
Value *ResumeV = nullptr;
// TODO: Move setting of resume values to prepareToExecute.
if (auto *ReductionPhi = dyn_cast<VPReductionPHIRecipe>(&R)) {
auto *RdxResult =
cast<VPInstruction>(*find_if(ReductionPhi->users(), [](VPUser *U) {
auto *VPI = dyn_cast<VPInstruction>(U);
return VPI &&
(VPI->getOpcode() == VPInstruction::ComputeAnyOfResult ||
VPI->getOpcode() == VPInstruction::ComputeReductionResult ||
VPI->getOpcode() == VPInstruction::ComputeFindLastIVResult);
}));
ResumeV = cast<PHINode>(ReductionPhi->getUnderlyingInstr())
->getIncomingValueForBlock(L->getLoopPreheader());
const RecurrenceDescriptor &RdxDesc =
ReductionPhi->getRecurrenceDescriptor();
RecurKind RK = RdxDesc.getRecurrenceKind();
if (RecurrenceDescriptor::isAnyOfRecurrenceKind(RK)) {
Value *StartV = RdxResult->getOperand(1)->getLiveInIRValue();
assert(RdxDesc.getRecurrenceStartValue() == StartV &&
"start value from ComputeAnyOfResult must match");

// VPReductionPHIRecipes for AnyOf reductions expect a boolean as
// start value; compare the final value from the main vector loop
// to the start value.
BasicBlock *PBB = cast<Instruction>(ResumeV)->getParent();
IRBuilder<> Builder(PBB, PBB->getFirstNonPHIIt());
ResumeV =
Builder.CreateICmpNE(ResumeV, RdxDesc.getRecurrenceStartValue());
ResumeV = Builder.CreateICmpNE(ResumeV, StartV);
} else if (RecurrenceDescriptor::isFindLastIVRecurrenceKind(RK)) {
ToFrozen[RdxDesc.getRecurrenceStartValue()] =
cast<PHINode>(ResumeV)->getIncomingValueForBlock(
EPI.MainLoopIterationCountCheck);
Value *StartV = getStartValueFromReductionResult(RdxResult);
assert(RdxDesc.getRecurrenceStartValue() == StartV &&
"start value from ComputeFindLastIVResult must match");

ToFrozen[StartV] = cast<PHINode>(ResumeV)->getIncomingValueForBlock(
EPI.MainLoopIterationCountCheck);

// VPReductionPHIRecipe for FindLastIV reductions requires an adjustment
// to the resume value. The resume value is adjusted to the sentinel
Expand All @@ -9790,8 +9822,7 @@ preparePlanForEpilogueVectorLoop(VPlan &Plan, Loop *L,
// variable.
BasicBlock *ResumeBB = cast<Instruction>(ResumeV)->getParent();
IRBuilder<> Builder(ResumeBB, ResumeBB->getFirstNonPHIIt());
Value *Cmp = Builder.CreateICmpEQ(
ResumeV, ToFrozen[RdxDesc.getRecurrenceStartValue()]);
Value *Cmp = Builder.CreateICmpEQ(ResumeV, ToFrozen[StartV]);
ResumeV =
Builder.CreateSelect(Cmp, RdxDesc.getSentinelValue(), ResumeV);
}
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Transforms/Vectorize/VPlan.h
Original file line number Diff line number Diff line change
Expand Up @@ -907,6 +907,7 @@ class VPInstruction : public VPRecipeWithIRFlags,
BranchOnCount,
BranchOnCond,
Broadcast,
ComputeAnyOfResult,
ComputeFindLastIVResult,
ComputeReductionResult,
// Extracts the last lane from its operand if it is a vector, or the last
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPInstruction *R) {
inferScalarType(R->getOperand(1)) &&
"different types inferred for different operands");
return IntegerType::get(Ctx, 1);
case VPInstruction::ComputeAnyOfResult:
case VPInstruction::ComputeFindLastIVResult:
case VPInstruction::ComputeReductionResult: {
auto *PhiR = cast<VPReductionPHIRecipe>(R->getOperand(0));
Expand Down
6 changes: 6 additions & 0 deletions llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
Original file line number Diff line number Diff line change
Expand Up @@ -318,6 +318,12 @@ m_VPInstruction(const Op0_t &Op0, const Op1_t &Op1, const Op2_t &Op2) {
{Op0, Op1, Op2});
}

template <typename Op0_t>
inline UnaryVPInstruction_match<Op0_t, Instruction::Freeze>
m_Freeze(const Op0_t &Op0) {
return m_VPInstruction<Instruction::Freeze>(Op0);
}

template <typename Op0_t>
inline UnaryVPInstruction_match<Op0_t, VPInstruction::Not>
m_Not(const Op0_t &Op0) {
Expand Down
30 changes: 21 additions & 9 deletions llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -604,6 +604,20 @@ Value *VPInstruction::generate(VPTransformState &State) {
return Builder.CreateVectorSplat(
State.VF, State.get(getOperand(0), /*IsScalar*/ true), "broadcast");
}
case VPInstruction::ComputeAnyOfResult: {
// FIXME: The cross-recipe dependency on VPReductionPHIRecipe is temporary
// and will be removed by breaking up the recipe further.
auto *PhiR = cast<VPReductionPHIRecipe>(getOperand(0));
auto *OrigPhi = cast<PHINode>(PhiR->getUnderlyingValue());
Value *ReducedPartRdx = State.get(getOperand(2));
for (unsigned Idx = 3; Idx < getNumOperands(); ++Idx)
ReducedPartRdx = Builder.CreateBinOp(
(Instruction::BinaryOps)RecurrenceDescriptor::getOpcode(
RecurKind::AnyOf),
State.get(getOperand(Idx)), ReducedPartRdx, "bin.rdx");
return createAnyOfReduction(Builder, ReducedPartRdx,
State.get(getOperand(1), VPLane(0)), OrigPhi);
}
Comment on lines +607 to +620
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you think it’s worth unifying ComputeAnyOfResult and ComputeFindLastIVResult so they can share a common recipe?
Or would you prefer to avoid passing RecurrenceKind into the recipe?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

With VPInstruction we cannot easily add the recurrence kind to the recipe. I think it would be good to keep the separate opcodes, especially if we add the sentinel value in #142291.

case VPInstruction::ComputeFindLastIVResult: {
// FIXME: The cross-recipe dependency on VPReductionPHIRecipe is temporary
// and will be removed by breaking up the recipe further.
Expand Down Expand Up @@ -681,18 +695,11 @@ Value *VPInstruction::generate(VPTransformState &State) {

// Create the reduction after the loop. Note that inloop reductions create
// the target reduction in the loop using a Reduction recipe.
if ((State.VF.isVector() ||
RecurrenceDescriptor::isAnyOfRecurrenceKind(RK)) &&
!PhiR->isInLoop()) {
if (State.VF.isVector() && !PhiR->isInLoop()) {
// TODO: Support in-order reductions based on the recurrence descriptor.
// All ops in the reduction inherit fast-math-flags from the recurrence
// descriptor.
if (RecurrenceDescriptor::isAnyOfRecurrenceKind(RK))
ReducedPartRdx =
createAnyOfReduction(Builder, ReducedPartRdx,
RdxDesc.getRecurrenceStartValue(), OrigPhi);
else
ReducedPartRdx = createSimpleReduction(Builder, ReducedPartRdx, RK);
ReducedPartRdx = createSimpleReduction(Builder, ReducedPartRdx, RK);

// If the reduction can be performed in a smaller type, we need to extend
// the reduction to the wider type before we branch to the original loop.
Expand Down Expand Up @@ -830,6 +837,7 @@ bool VPInstruction::isVectorToScalar() const {
getOpcode() == VPInstruction::ExtractPenultimateElement ||
getOpcode() == Instruction::ExtractElement ||
getOpcode() == VPInstruction::FirstActiveLane ||
getOpcode() == VPInstruction::ComputeAnyOfResult ||
getOpcode() == VPInstruction::ComputeFindLastIVResult ||
getOpcode() == VPInstruction::ComputeReductionResult ||
getOpcode() == VPInstruction::AnyOf;
Expand Down Expand Up @@ -925,6 +933,7 @@ bool VPInstruction::onlyFirstLaneUsed(const VPValue *Op) const {
return true;
case VPInstruction::PtrAdd:
return Op == getOperand(0) || vputils::onlyFirstLaneUsed(this);
case VPInstruction::ComputeAnyOfResult:
case VPInstruction::ComputeFindLastIVResult:
return Op == getOperand(1);
};
Expand Down Expand Up @@ -1005,6 +1014,9 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent,
case VPInstruction::ExtractPenultimateElement:
O << "extract-penultimate-element";
break;
case VPInstruction::ComputeAnyOfResult:
O << "compute-anyof-result";
break;
case VPInstruction::ComputeFindLastIVResult:
O << "compute-find-last-iv-result";
break;
Expand Down
4 changes: 3 additions & 1 deletion llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -327,7 +327,9 @@ void UnrollState::unrollBlock(VPBlockBase *VPB) {
// Add all VPValues for all parts to ComputeReductionResult which combines
// the parts to compute the final reduction value.
VPValue *Op1;
if (match(&R, m_VPInstruction<VPInstruction::ComputeReductionResult>(
if (match(&R, m_VPInstruction<VPInstruction::ComputeAnyOfResult>(
m_VPValue(), m_VPValue(), m_VPValue(Op1))) ||
match(&R, m_VPInstruction<VPInstruction::ComputeReductionResult>(
m_VPValue(), m_VPValue(Op1))) ||
match(&R, m_VPInstruction<VPInstruction::ComputeFindLastIVResult>(
m_VPValue(), m_VPValue(), m_VPValue(Op1)))) {
Expand Down
Loading