-
Notifications
You must be signed in to change notification settings - Fork 13.5k
[VPlan] Handle early exit before forming regions. (NFC) #138393
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
d0d2c2e
76c470a
80a629c
56d576a
ce06761
2289a5e
4747678
b74e363
dd38677
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -9383,7 +9383,8 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range, | |
VPlanTransforms::prepareForVectorization( | ||
*Plan, Legal->getWidestInductionType(), PSE, RequiresScalarEpilogueCheck, | ||
CM.foldTailByMasking(), OrigLoop, | ||
getDebugLocFromInstOrOperands(Legal->getPrimaryInduction())); | ||
getDebugLocFromInstOrOperands(Legal->getPrimaryInduction()), | ||
Legal->hasUncountableEarlyExit(), Range); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Does passing There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
If it is false, it means there are no uncountable early exits and we require a scalar epilogue There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah, thanks for clarifying! |
||
VPlanTransforms::createLoopRegions(*Plan); | ||
|
||
// Don't use getDecisionAndClampRange here, because we don't know the UF | ||
|
@@ -9584,12 +9585,6 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range, | |
R->setOperand(1, WideIV->getStepValue()); | ||
} | ||
|
||
if (auto *UncountableExitingBlock = | ||
Legal->getUncountableEarlyExitingBlock()) { | ||
VPlanTransforms::runPass(VPlanTransforms::handleUncountableEarlyExit, *Plan, | ||
OrigLoop, UncountableExitingBlock, RecipeBuilder, | ||
Range); | ||
} | ||
DenseMap<VPValue *, VPValue *> IVEndValues; | ||
addScalarResumePhis(RecipeBuilder, *Plan, IVEndValues); | ||
SetVector<VPIRInstruction *> ExitUsersToFix = | ||
|
@@ -9687,7 +9682,8 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlan(VFRange &Range) { | |
auto Plan = VPlanTransforms::buildPlainCFG(OrigLoop, *LI, VPB2IRBB); | ||
VPlanTransforms::prepareForVectorization( | ||
*Plan, Legal->getWidestInductionType(), PSE, true, false, OrigLoop, | ||
getDebugLocFromInstOrOperands(Legal->getPrimaryInduction())); | ||
getDebugLocFromInstOrOperands(Legal->getPrimaryInduction()), false, | ||
Range); | ||
VPlanTransforms::createLoopRegions(*Plan); | ||
|
||
for (ElementCount VF : Range) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -460,11 +460,10 @@ static void addCanonicalIVRecipes(VPlan &Plan, VPBasicBlock *HeaderVPBB, | |
{CanonicalIVIncrement, &Plan.getVectorTripCount()}, DL); | ||
} | ||
|
||
void VPlanTransforms::prepareForVectorization(VPlan &Plan, Type *InductionTy, | ||
PredicatedScalarEvolution &PSE, | ||
bool RequiresScalarEpilogueCheck, | ||
bool TailFolded, Loop *TheLoop, | ||
DebugLoc IVDL) { | ||
void VPlanTransforms::prepareForVectorization( | ||
VPlan &Plan, Type *InductionTy, PredicatedScalarEvolution &PSE, | ||
bool RequiresScalarEpilogueCheck, bool TailFolded, Loop *TheLoop, | ||
DebugLoc IVDL, bool HasUncountableEarlyExit, VFRange &Range) { | ||
VPDominatorTree VPDT; | ||
VPDT.recalculate(Plan); | ||
|
||
|
@@ -491,19 +490,33 @@ void VPlanTransforms::prepareForVectorization(VPlan &Plan, Type *InductionTy, | |
addCanonicalIVRecipes(Plan, cast<VPBasicBlock>(HeaderVPB), | ||
cast<VPBasicBlock>(LatchVPB), InductionTy, IVDL); | ||
|
||
// Disconnect all edges to exit blocks other than from the middle block. | ||
// TODO: VPlans with early exits should be explicitly converted to a form | ||
// exiting only via the latch here, including adjusting the exit condition, | ||
// instead of simply disconnecting the edges and adjusting the VPlan later. | ||
for (VPBlockBase *EB : Plan.getExitBlocks()) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Perhaps worth replacing the old comment with a new one explaining what the loop is doing at a high level? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done thanks |
||
[[maybe_unused]] bool HandledUncountableEarlyExit = false; | ||
// Disconnect all early exits from the loop leaving it with a single exit from | ||
// the latch. Early exits that are countable are left for a scalar epilog. The | ||
// condition of uncountable early exits (currently at most one is supported) | ||
// is fused into the latch exit, and used to branch from middle block to the | ||
// early exit destination. | ||
for (VPIRBasicBlock *EB : Plan.getExitBlocks()) { | ||
for (VPBlockBase *Pred : to_vector(EB->getPredecessors())) { | ||
if (Pred == MiddleVPBB) | ||
continue; | ||
if (HasUncountableEarlyExit) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Independent note: this condition disregards the actual Pred->EB edge itself. |
||
assert(!HandledUncountableEarlyExit && | ||
"can handle exactly one uncountable early exit"); | ||
handleUncountableEarlyExit(cast<VPBasicBlock>(Pred), EB, Plan, | ||
cast<VPBasicBlock>(HeaderVPB), | ||
cast<VPBasicBlock>(LatchVPB), Range); | ||
HandledUncountableEarlyExit = true; | ||
} | ||
|
||
cast<VPBasicBlock>(Pred)->getTerminator()->eraseFromParent(); | ||
VPBlockUtils::disconnectBlocks(Pred, EB); | ||
} | ||
} | ||
|
||
assert((!HasUncountableEarlyExit || HandledUncountableEarlyExit) && | ||
"missed an uncountable exit that must be handled"); | ||
|
||
// Create SCEV and VPValue for the trip count. | ||
// We use the symbolic max backedge-taken-count, which works also when | ||
// vectorizing loops with uncountable early exits. | ||
|
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -2461,63 +2461,56 @@ void VPlanTransforms::convertToConcreteRecipes(VPlan &Plan, | |||||
} | ||||||
|
||||||
void VPlanTransforms::handleUncountableEarlyExit( | ||||||
VPlan &Plan, Loop *OrigLoop, BasicBlock *UncountableExitingBlock, | ||||||
VPRecipeBuilder &RecipeBuilder, VFRange &Range) { | ||||||
VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion(); | ||||||
auto *LatchVPBB = cast<VPBasicBlock>(LoopRegion->getExiting()); | ||||||
VPBuilder Builder(LatchVPBB->getTerminator()); | ||||||
auto *MiddleVPBB = Plan.getMiddleBlock(); | ||||||
VPValue *IsEarlyExitTaken = nullptr; | ||||||
|
||||||
// Process the uncountable exiting block. Update IsEarlyExitTaken, which | ||||||
// tracks if the uncountable early exit has been taken. Also split the middle | ||||||
// block and have it conditionally branch to the early exit block if | ||||||
// EarlyExitTaken. | ||||||
auto *EarlyExitingBranch = | ||||||
cast<BranchInst>(UncountableExitingBlock->getTerminator()); | ||||||
BasicBlock *TrueSucc = EarlyExitingBranch->getSuccessor(0); | ||||||
BasicBlock *FalseSucc = EarlyExitingBranch->getSuccessor(1); | ||||||
BasicBlock *EarlyExitIRBB = | ||||||
!OrigLoop->contains(TrueSucc) ? TrueSucc : FalseSucc; | ||||||
VPIRBasicBlock *VPEarlyExitBlock = Plan.getExitBlock(EarlyExitIRBB); | ||||||
|
||||||
VPValue *EarlyExitNotTakenCond = RecipeBuilder.getBlockInMask( | ||||||
OrigLoop->contains(TrueSucc) ? TrueSucc : FalseSucc); | ||||||
auto *EarlyExitTakenCond = Builder.createNot(EarlyExitNotTakenCond); | ||||||
IsEarlyExitTaken = | ||||||
Builder.createNaryOp(VPInstruction::AnyOf, {EarlyExitTakenCond}); | ||||||
VPBasicBlock *EarlyExitingVPBB, VPBasicBlock *EarlyExitVPBB, VPlan &Plan, | ||||||
VPBasicBlock *HeaderVPBB, VPBasicBlock *LatchVPBB, VFRange &Range) { | ||||||
using namespace llvm::VPlanPatternMatch; | ||||||
|
||||||
VPBlockBase *MiddleVPBB = LatchVPBB->getSuccessors()[0]; | ||||||
if (!EarlyExitVPBB->getSinglePredecessor() && | ||||||
EarlyExitVPBB->getPredecessors()[1] == MiddleVPBB) { | ||||||
assert(EarlyExitVPBB->getNumPredecessors() == 2 && | ||||||
EarlyExitVPBB->getPredecessors()[0] == EarlyExitingVPBB && | ||||||
"unsupported early exit VPBB"); | ||||||
// Early exit operand should always be last phi operand. If EarlyExitVPBB | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is it worth adding an assert that MiddleVPBB is actually the second predecessor? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done, thanks |
||||||
// has two predecessors and EarlyExitingVPBB is the first, swap the operands | ||||||
// of the phis. | ||||||
for (VPRecipeBase &R : EarlyExitVPBB->phis()) | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is making sure the operand corresponding to the middle block is always first, right? I guess in future if we do want to support multiple early exits this will get a bit more complicated, but swapping operands works for now. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yep |
||||||
cast<VPIRPhi>(&R)->swapOperands(); | ||||||
} | ||||||
|
||||||
VPBuilder Builder(LatchVPBB->getTerminator()); | ||||||
VPBlockBase *TrueSucc = EarlyExitingVPBB->getSuccessors()[0]; | ||||||
assert( | ||||||
match(EarlyExitingVPBB->getTerminator(), m_BranchOnCond(m_VPValue())) && | ||||||
"Terminator must be be BranchOnCond"); | ||||||
VPValue *CondOfEarlyExitingVPBB = | ||||||
EarlyExitingVPBB->getTerminator()->getOperand(0); | ||||||
auto *CondToEarlyExit = TrueSucc == EarlyExitVPBB | ||||||
? CondOfEarlyExitingVPBB | ||||||
: Builder.createNot(CondOfEarlyExitingVPBB); | ||||||
|
||||||
// Split the middle block and have it conditionally branch to the early exit | ||||||
// block if CondToEarlyExit. | ||||||
VPValue *IsEarlyExitTaken = | ||||||
Builder.createNaryOp(VPInstruction::AnyOf, {CondToEarlyExit}); | ||||||
VPBasicBlock *NewMiddle = Plan.createVPBasicBlock("middle.split"); | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Independent:
Suggested change
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can do separately, thanks |
||||||
VPBasicBlock *VectorEarlyExitVPBB = | ||||||
Plan.createVPBasicBlock("vector.early.exit"); | ||||||
VPBlockUtils::insertOnEdge(LoopRegion, MiddleVPBB, NewMiddle); | ||||||
VPBlockUtils::insertOnEdge(LatchVPBB, MiddleVPBB, NewMiddle); | ||||||
VPBlockUtils::connectBlocks(NewMiddle, VectorEarlyExitVPBB); | ||||||
NewMiddle->swapSuccessors(); | ||||||
|
||||||
VPBlockUtils::connectBlocks(VectorEarlyExitVPBB, VPEarlyExitBlock); | ||||||
VPBlockUtils::connectBlocks(VectorEarlyExitVPBB, EarlyExitVPBB); | ||||||
|
||||||
// Update the exit phis in the early exit block. | ||||||
VPBuilder MiddleBuilder(NewMiddle); | ||||||
VPBuilder EarlyExitB(VectorEarlyExitVPBB); | ||||||
for (VPRecipeBase &R : VPEarlyExitBlock->phis()) { | ||||||
for (VPRecipeBase &R : EarlyExitVPBB->phis()) { | ||||||
auto *ExitIRI = cast<VPIRPhi>(&R); | ||||||
// Early exit operand should always be last, i.e., 0 if VPEarlyExitBlock has | ||||||
// Early exit operand should always be last, i.e., 0 if EarlyExitVPBB has | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this comment still valid given you've already swapped operands above? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ep, this just re-states the expected order, as explanation for setting early exit index below. |
||||||
// a single predecessor and 1 if it has two. | ||||||
unsigned EarlyExitIdx = ExitIRI->getNumOperands() - 1; | ||||||
if (!VPEarlyExitBlock->getSinglePredecessor()) { | ||||||
// If VPEarlyExitBlock has two predecessors, they are already ordered such | ||||||
// that early exit is second (and latch exit is first), by construction. | ||||||
// But its underlying IRBB (EarlyExitIRBB) may have its predecessors | ||||||
// ordered the other way around, and it is the order of the latter which | ||||||
// corresponds to the order of operands of VPEarlyExitBlock's phi recipes. | ||||||
// Therefore, if early exit (UncountableExitingBlock) is the first | ||||||
// predecessor of EarlyExitIRBB, we swap the operands of phi recipes, | ||||||
// thereby bringing them to match VPEarlyExitBlock's predecessor order, | ||||||
// with early exit being last (second). Otherwise they already match. | ||||||
if (*pred_begin(VPEarlyExitBlock->getIRBasicBlock()) == | ||||||
UncountableExitingBlock) | ||||||
ExitIRI->swapOperands(); | ||||||
|
||||||
if (ExitIRI->getNumOperands() != 1) { | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not relevant for this patch, but the name There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Might be good to improve the naming, but it may be even better to create all required extracts up-front separately, which is something I am looking into |
||||||
// The first of two operands corresponds to the latch exit, via MiddleVPBB | ||||||
// predecessor. Extract its last lane. | ||||||
ExitIRI->extractLastLaneOfFirstOperand(MiddleBuilder); | ||||||
|
@@ -2533,7 +2526,7 @@ void VPlanTransforms::handleUncountableEarlyExit( | |||||
LoopVectorizationPlanner::getDecisionAndClampRange(IsVector, Range)) { | ||||||
// Update the incoming value from the early exit. | ||||||
VPValue *FirstActiveLane = EarlyExitB.createNaryOp( | ||||||
VPInstruction::FirstActiveLane, {EarlyExitTakenCond}, nullptr, | ||||||
VPInstruction::FirstActiveLane, {CondToEarlyExit}, nullptr, | ||||||
"first.active.lane"); | ||||||
IncomingFromEarlyExit = EarlyExitB.createNaryOp( | ||||||
Instruction::ExtractElement, {IncomingFromEarlyExit, FirstActiveLane}, | ||||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
(Independent) Should
prepareForVectorization
be renamed to a more informative name, perhapscanonicalizeTopLoop
, as it takes care of canonicalizing header and latch blocks, introducing and connecting preheader, middle-block, scalar preheader, canonical IV recipes and trip-count value.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Will to separately.