-
Notifications
You must be signed in to change notification settings - Fork 13.6k
[VPlan] Add ComputeAnyOfResult VPInstruction (NFC) #141932
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-llvm-transforms @llvm/pr-subscribers-vectorizers Author: Florian Hahn (fhahn) ChangesAdd a dedicated opcode for any-of reduction, similar to #132689 and #132690. The patch also explictly adds the start value to not require RecurrenceDescriptor during execute. It also allows freezing the start value to make it poison-safe. Full diff: https://github.com/llvm/llvm-project/pull/141932.diff 6 Files Affected:
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 90e224ea8f37a..d777fc746b12b 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -7484,6 +7484,13 @@ static void addRuntimeUnrollDisableMetaData(Loop *L) {
}
}
+static Value *getStartValueFromReductionResult(VPInstruction *RdxResult) {
+ using namespace VPlanPatternMatch;
+ VPValue *StartVPV = RdxResult->getOperand(1);
+ match(StartVPV, m_Freeze(m_VPValue(StartVPV)));
+ return StartVPV->getLiveInIRValue();
+}
+
// If \p R is a ComputeReductionResult when vectorizing the epilog loop,
// fix the reduction's scalar PHI node by adding the incoming value from the
// main vector loop.
@@ -7492,7 +7499,8 @@ static void fixReductionScalarResumeWhenVectorizingEpilog(
BasicBlock *BypassBlock) {
auto *EpiRedResult = dyn_cast<VPInstruction>(R);
if (!EpiRedResult ||
- (EpiRedResult->getOpcode() != VPInstruction::ComputeReductionResult &&
+ (EpiRedResult->getOpcode() != VPInstruction::ComputeAnyOfResult &&
+ EpiRedResult->getOpcode() != VPInstruction::ComputeReductionResult &&
EpiRedResult->getOpcode() != VPInstruction::ComputeFindLastIVResult))
return;
@@ -7504,15 +7512,19 @@ static void fixReductionScalarResumeWhenVectorizingEpilog(
EpiRedHeaderPhi->getStartValue()->getUnderlyingValue();
if (RecurrenceDescriptor::isAnyOfRecurrenceKind(
RdxDesc.getRecurrenceKind())) {
+ Value *StartV = EpiRedResult->getOperand(1)->getLiveInIRValue();
+ (void)StartV;
auto *Cmp = cast<ICmpInst>(MainResumeValue);
assert(Cmp->getPredicate() == CmpInst::ICMP_NE &&
"AnyOf expected to start with ICMP_NE");
- assert(Cmp->getOperand(1) == RdxDesc.getRecurrenceStartValue() &&
+ assert(Cmp->getOperand(1) == StartV &&
"AnyOf expected to start by comparing main resume value to original "
"start value");
MainResumeValue = Cmp->getOperand(0);
} else if (RecurrenceDescriptor::isFindLastIVRecurrenceKind(
RdxDesc.getRecurrenceKind())) {
+ Value *StartV = getStartValueFromReductionResult(EpiRedResult);
+ (void)StartV;
using namespace llvm::PatternMatch;
Value *Cmp, *OrigResumeV, *CmpOp;
bool IsExpectedPattern =
@@ -7521,10 +7533,7 @@ static void fixReductionScalarResumeWhenVectorizingEpilog(
m_Value(OrigResumeV))) &&
(match(Cmp, m_SpecificICmp(ICmpInst::ICMP_EQ, m_Specific(OrigResumeV),
m_Value(CmpOp))) &&
- (match(CmpOp,
- m_Freeze(m_Specific(RdxDesc.getRecurrenceStartValue()))) ||
- (CmpOp == RdxDesc.getRecurrenceStartValue() &&
- isGuaranteedNotToBeUndefOrPoison(CmpOp))));
+ ((CmpOp == StartV && isGuaranteedNotToBeUndefOrPoison(CmpOp))));
assert(IsExpectedPattern && "Unexpected reduction resume pattern");
(void)IsExpectedPattern;
MainResumeValue = OrigResumeV;
@@ -9466,7 +9475,10 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
OrigExitingVPV->replaceUsesWithIf(NewExitingVPV, [](VPUser &U, unsigned) {
return isa<VPInstruction>(&U) &&
(cast<VPInstruction>(&U)->getOpcode() ==
+ VPInstruction::ComputeAnyOfResult ||
+ cast<VPInstruction>(&U)->getOpcode() ==
VPInstruction::ComputeReductionResult ||
+
cast<VPInstruction>(&U)->getOpcode() ==
VPInstruction::ComputeFindLastIVResult);
});
@@ -9519,6 +9531,12 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
FinalReductionResult =
Builder.createNaryOp(VPInstruction::ComputeFindLastIVResult,
{PhiR, Start, NewExitingVPV}, ExitDL);
+ } else if (RecurrenceDescriptor::isAnyOfRecurrenceKind(
+ RdxDesc.getRecurrenceKind())) {
+ VPValue *Start = PhiR->getStartValue();
+ FinalReductionResult =
+ Builder.createNaryOp(VPInstruction::ComputeAnyOfResult,
+ {PhiR, Start, NewExitingVPV}, ExitDL);
} else {
VPIRFlags Flags = RecurrenceDescriptor::isFloatingPointRecurrenceKind(
RdxDesc.getRecurrenceKind())
@@ -10050,23 +10068,36 @@ preparePlanForEpilogueVectorLoop(VPlan &Plan, Loop *L,
Value *ResumeV = nullptr;
// TODO: Move setting of resume values to prepareToExecute.
if (auto *ReductionPhi = dyn_cast<VPReductionPHIRecipe>(&R)) {
+ auto *RdxResult =
+ cast<VPInstruction>(*find_if(ReductionPhi->users(), [](VPUser *U) {
+ auto *VPI = dyn_cast<VPInstruction>(U);
+ return VPI &&
+ (VPI->getOpcode() == VPInstruction::ComputeReductionResult ||
+ VPI->getOpcode() == VPInstruction::ComputeFindLastIVResult);
+ }));
ResumeV = cast<PHINode>(ReductionPhi->getUnderlyingInstr())
->getIncomingValueForBlock(L->getLoopPreheader());
const RecurrenceDescriptor &RdxDesc =
ReductionPhi->getRecurrenceDescriptor();
RecurKind RK = RdxDesc.getRecurrenceKind();
if (RecurrenceDescriptor::isAnyOfRecurrenceKind(RK)) {
+ Value *StartV = RdxResult->getOperand(1)->getLiveInIRValue();
+ assert(RdxDesc.getRecurrenceStartValue() == StartV &&
+ "start value from ComputeAnyOfResult must match");
+
// VPReductionPHIRecipes for AnyOf reductions expect a boolean as
// start value; compare the final value from the main vector loop
// to the start value.
BasicBlock *PBB = cast<Instruction>(ResumeV)->getParent();
IRBuilder<> Builder(PBB, PBB->getFirstNonPHIIt());
- ResumeV =
- Builder.CreateICmpNE(ResumeV, RdxDesc.getRecurrenceStartValue());
+ ResumeV = Builder.CreateICmpNE(ResumeV, StartV);
} else if (RecurrenceDescriptor::isFindLastIVRecurrenceKind(RK)) {
- ToFrozen[RdxDesc.getRecurrenceStartValue()] =
- cast<PHINode>(ResumeV)->getIncomingValueForBlock(
- EPI.MainLoopIterationCountCheck);
+ Value *StartV = getStartValueFromReductionResult(RdxResult);
+ assert(RdxDesc.getRecurrenceStartValue() == StartV &&
+ "start value from ComputeFindLastIVResult must match");
+
+ ToFrozen[StartV] = cast<PHINode>(ResumeV)->getIncomingValueForBlock(
+ EPI.MainLoopIterationCountCheck);
// VPReductionPHIRecipe for FindLastIV reductions requires an adjustment
// to the resume value. The resume value is adjusted to the sentinel
@@ -10076,8 +10107,7 @@ preparePlanForEpilogueVectorLoop(VPlan &Plan, Loop *L,
// variable.
BasicBlock *ResumeBB = cast<Instruction>(ResumeV)->getParent();
IRBuilder<> Builder(ResumeBB, ResumeBB->getFirstNonPHIIt());
- Value *Cmp = Builder.CreateICmpEQ(
- ResumeV, ToFrozen[RdxDesc.getRecurrenceStartValue()]);
+ Value *Cmp = Builder.CreateICmpEQ(ResumeV, ToFrozen[StartV]);
ResumeV =
Builder.CreateSelect(Cmp, RdxDesc.getSentinelValue(), ResumeV);
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 13c0d84f5e219..af97477b9ae7f 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -898,6 +898,7 @@ class VPInstruction : public VPRecipeWithIRFlags,
BranchOnCount,
BranchOnCond,
Broadcast,
+ ComputeAnyOfResult,
ComputeFindLastIVResult,
ComputeReductionResult,
// Extracts the last lane from its operand if it is a vector, or the last
diff --git a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
index ac0f30cb4693c..ecb6045e7a79f 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
@@ -87,6 +87,8 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPInstruction *R) {
inferScalarType(R->getOperand(1)) &&
"different types inferred for different operands");
return IntegerType::get(Ctx, 1);
+ case VPInstruction::ComputeAnyOfResult:
+ return inferScalarType(R->getOperand(1));
case VPInstruction::ComputeFindLastIVResult:
case VPInstruction::ComputeReductionResult: {
auto *PhiR = cast<VPReductionPHIRecipe>(R->getOperand(0));
diff --git a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
index f2a7f16e19a79..dfd9fc3d4d719 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
@@ -318,6 +318,12 @@ m_VPInstruction(const Op0_t &Op0, const Op1_t &Op1, const Op2_t &Op2) {
{Op0, Op1, Op2});
}
+template <typename Op0_t>
+inline UnaryVPInstruction_match<Op0_t, Instruction::Freeze>
+m_Freeze(const Op0_t &Op0) {
+ return m_VPInstruction<Instruction::Freeze>(Op0);
+}
+
template <typename Op0_t>
inline UnaryVPInstruction_match<Op0_t, VPInstruction::Not>
m_Not(const Op0_t &Op0) {
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index d16c8b44891f5..9d92ed1630a32 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -604,6 +604,20 @@ Value *VPInstruction::generate(VPTransformState &State) {
return Builder.CreateVectorSplat(
State.VF, State.get(getOperand(0), /*IsScalar*/ true), "broadcast");
}
+ case VPInstruction::ComputeAnyOfResult: {
+ // FIXME: The cross-recipe dependency on VPReductionPHIRecipe is temporary
+ // and will be removed by breaking up the recipe further.
+ auto *PhiR = cast<VPReductionPHIRecipe>(getOperand(0));
+ auto *OrigPhi = cast<PHINode>(PhiR->getUnderlyingValue());
+ Value *ReducedPartRdx = State.get(getOperand(2));
+ for (unsigned Idx = 3; Idx < getNumOperands(); ++Idx)
+ ReducedPartRdx = Builder.CreateBinOp(
+ (Instruction::BinaryOps)RecurrenceDescriptor::getOpcode(
+ RecurKind::AnyOf),
+ State.get(getOperand(Idx)), ReducedPartRdx, "bin.rdx");
+ return createAnyOfReduction(Builder, ReducedPartRdx,
+ State.get(getOperand(1), VPLane(0)), OrigPhi);
+ }
case VPInstruction::ComputeFindLastIVResult: {
// FIXME: The cross-recipe dependency on VPReductionPHIRecipe is temporary
// and will be removed by breaking up the recipe further.
@@ -681,17 +695,10 @@ Value *VPInstruction::generate(VPTransformState &State) {
// Create the reduction after the loop. Note that inloop reductions create
// the target reduction in the loop using a Reduction recipe.
- if ((State.VF.isVector() ||
- RecurrenceDescriptor::isAnyOfRecurrenceKind(RK)) &&
- !PhiR->isInLoop()) {
+ if (State.VF.isVector() && !PhiR->isInLoop()) {
// TODO: Support in-order reductions based on the recurrence descriptor.
// All ops in the reduction inherit fast-math-flags from the recurrence
// descriptor.
- if (RecurrenceDescriptor::isAnyOfRecurrenceKind(RK))
- ReducedPartRdx =
- createAnyOfReduction(Builder, ReducedPartRdx,
- RdxDesc.getRecurrenceStartValue(), OrigPhi);
- else
ReducedPartRdx = createSimpleReduction(Builder, ReducedPartRdx, RK);
// If the reduction can be performed in a smaller type, we need to extend
@@ -841,6 +848,7 @@ bool VPInstruction::isVectorToScalar() const {
getOpcode() == VPInstruction::ExtractPenultimateElement ||
getOpcode() == Instruction::ExtractElement ||
getOpcode() == VPInstruction::FirstActiveLane ||
+ getOpcode() == VPInstruction::ComputeAnyOfResult ||
getOpcode() == VPInstruction::ComputeFindLastIVResult ||
getOpcode() == VPInstruction::ComputeReductionResult ||
getOpcode() == VPInstruction::AnyOf;
@@ -938,6 +946,7 @@ bool VPInstruction::onlyFirstLaneUsed(const VPValue *Op) const {
return true;
case VPInstruction::PtrAdd:
return Op == getOperand(0) || vputils::onlyFirstLaneUsed(this);
+ case VPInstruction::ComputeAnyOfResult:
case VPInstruction::ComputeFindLastIVResult:
return Op == getOperand(1);
};
@@ -1021,6 +1030,9 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent,
case VPInstruction::ExtractPenultimateElement:
O << "extract-penultimate-element";
break;
+ case VPInstruction::ComputeAnyOfResult:
+ O << "compute-anyof-result";
+ break;
case VPInstruction::ComputeFindLastIVResult:
O << "compute-find-last-iv-result";
break;
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
index e1fb3d476c58d..335301a927ceb 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
@@ -327,7 +327,9 @@ void UnrollState::unrollBlock(VPBlockBase *VPB) {
// Add all VPValues for all parts to ComputeReductionResult which combines
// the parts to compute the final reduction value.
VPValue *Op1;
- if (match(&R, m_VPInstruction<VPInstruction::ComputeReductionResult>(
+ if (match(&R, m_VPInstruction<VPInstruction::ComputeAnyOfResult>(
+ m_VPValue(), m_VPValue(), m_VPValue(Op1))) ||
+ match(&R, m_VPInstruction<VPInstruction::ComputeReductionResult>(
m_VPValue(), m_VPValue(Op1))) ||
match(&R, m_VPInstruction<VPInstruction::ComputeFindLastIVResult>(
m_VPValue(), m_VPValue(), m_VPValue(Op1)))) {
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
1c7a325
to
b709387
Compare
Replace VPReductionPHIRecipe with RecurKind in VPReductionPHIRecipe, as all VPlan analyses and codegen only require the recurrence kind. This enables creating new VPReductionPHIRecipe directly in LV, without needing to construction a whole RecurrenceDescriptor object. Depends on llvm#141860 llvm#141932 llvm#142290 llvm#142291
case VPInstruction::ComputeAnyOfResult: | ||
return inferScalarType(R->getOperand(1)); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Fall through here?
case VPInstruction::ComputeAnyOfResult: | |
return inferScalarType(R->getOperand(1)); | |
case VPInstruction::ComputeAnyOfResult: |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Changed for now, but it would probably be good to move away from directly checking the underlying phi type separately.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, need another patch to change it.
@@ -7485,6 +7485,13 @@ static void addRuntimeUnrollDisableMetaData(Loop *L) { | |||
} | |||
} | |||
|
|||
static Value *getStartValueFromReductionResult(VPInstruction *RdxResult) { | |||
using namespace VPlanPatternMatch; | |||
VPValue *StartVPV = RdxResult->getOperand(1); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Maybe we need a assertion that check if RdxResult is a VPInstruction::ComputeAnyOfResult/ComputeFindLastIVResult?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Added, thanks
case VPInstruction::ComputeAnyOfResult: { | ||
// FIXME: The cross-recipe dependency on VPReductionPHIRecipe is temporary | ||
// and will be removed by breaking up the recipe further. | ||
auto *PhiR = cast<VPReductionPHIRecipe>(getOperand(0)); | ||
auto *OrigPhi = cast<PHINode>(PhiR->getUnderlyingValue()); | ||
Value *ReducedPartRdx = State.get(getOperand(2)); | ||
for (unsigned Idx = 3; Idx < getNumOperands(); ++Idx) | ||
ReducedPartRdx = Builder.CreateBinOp( | ||
(Instruction::BinaryOps)RecurrenceDescriptor::getOpcode( | ||
RecurKind::AnyOf), | ||
State.get(getOperand(Idx)), ReducedPartRdx, "bin.rdx"); | ||
return createAnyOfReduction(Builder, ReducedPartRdx, | ||
State.get(getOperand(1), VPLane(0)), OrigPhi); | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do you think it’s worth unifying ComputeAnyOfResult
and ComputeFindLastIVResult
so they can share a common recipe?
Or would you prefer to avoid passing RecurrenceKind into the recipe?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
With VPInstruction we cannot easily add the recurrence kind to the recipe. I think it would be good to keep the separate opcodes, especially if we add the sentinel value in #142291.
b709387
to
bb5d97f
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
+1 on splitting this out, createAnyOfReduction and createSimpleReduction are two quite different functions so I think it makes sense to model them as separate opcodes.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM, thanks.
Add a dedicated opcode for any-of reduction, similar to llvm/llvm-project#132689 and llvm/llvm-project#132690. The patch also explictly adds the start value to not require RecurrenceDescriptor during execute. It also allows freezing the start value to make it poison-safe. PR: llvm/llvm-project#141932
Replace VPReductionPHIRecipe with RecurKind in VPReductionPHIRecipe, as all VPlan analyses and codegen only require the recurrence kind. This enables creating new VPReductionPHIRecipe directly in LV, without needing to construction a whole RecurrenceDescriptor object. Depends on llvm#141860 llvm#141932 llvm#142290 llvm#142291
Replace VPReductionPHIRecipe with RecurKind in VPReductionPHIRecipe, as all VPlan analyses and codegen only require the recurrence kind. This enables creating new VPReductionPHIRecipe directly in LV, without needing to construction a whole RecurrenceDescriptor object. Depends on llvm#141860 llvm#141932 llvm#142290 llvm#142291
Replace VPReductionPHIRecipe with RecurKind in VPReductionPHIRecipe, as all VPlan analyses and codegen only require the recurrence kind. This enables creating new VPReductionPHIRecipe directly in LV, without needing to construction a whole RecurrenceDescriptor object. Depends on llvm#141860 llvm#141932 llvm#142290 llvm#142291
Add a dedicated opcode for any-of reduction, similar to #132689 and #132690.
The patch also explictly adds the start value to not require RecurrenceDescriptor during execute. It also allows freezing the start value to make it poison-safe.