Skip to content

Commit 4f96480

Browse files
committed
[VPlan] Handle early exit before forming regions. (NFC)
Move early-exit handling up front to original VPlan construction, before introducing early exits. This builds on #137709, which adds exiting edges to the original VPlan, instead of adding exit blocks later. This retains the exit conditions early, and means we can handle early exits before forming regions, without the reliance on VPRecipeBuilder. Once we retain all exits initially, handling early exits before region construction ensures the regions are valid; otherwise we would leave edges exiting the region from elsewhere than the latch. Removing the reliance on VPRecipeBuilder removes the dependence on mapping IR BBs to VPBBs and unblocks predication as VPlan transform: #128420. Depends on #137709.
1 parent 1284bc6 commit 4f96480

File tree

5 files changed

+94
-71
lines changed

5 files changed

+94
-71
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -9383,7 +9383,8 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
93839383
VPlanTransforms::prepareForVectorization(
93849384
*Plan, Legal->getWidestInductionType(), PSE, RequiresScalarEpilogueCheck,
93859385
CM.foldTailByMasking(), OrigLoop,
9386-
getDebugLocFromInstOrOperands(Legal->getPrimaryInduction()));
9386+
getDebugLocFromInstOrOperands(Legal->getPrimaryInduction()),
9387+
Legal->hasUncountableEarlyExit(), Range);
93879388
VPlanTransforms::createLoopRegions(*Plan);
93889389

93899390
// Don't use getDecisionAndClampRange here, because we don't know the UF
@@ -9581,12 +9582,6 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
95819582
R->setOperand(1, WideIV->getStepValue());
95829583
}
95839584

9584-
if (auto *UncountableExitingBlock =
9585-
Legal->getUncountableEarlyExitingBlock()) {
9586-
VPlanTransforms::runPass(VPlanTransforms::handleUncountableEarlyExit, *Plan,
9587-
OrigLoop, UncountableExitingBlock, RecipeBuilder,
9588-
Range);
9589-
}
95909585
DenseMap<VPValue *, VPValue *> IVEndValues;
95919586
addScalarResumePhis(RecipeBuilder, *Plan, IVEndValues);
95929587
SetVector<VPIRInstruction *> ExitUsersToFix =
@@ -9684,7 +9679,8 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlan(VFRange &Range) {
96849679
auto Plan = VPlanTransforms::buildPlainCFG(OrigLoop, *LI, VPB2IRBB);
96859680
VPlanTransforms::prepareForVectorization(
96869681
*Plan, Legal->getWidestInductionType(), PSE, true, false, OrigLoop,
9687-
getDebugLocFromInstOrOperands(Legal->getPrimaryInduction()));
9682+
getDebugLocFromInstOrOperands(Legal->getPrimaryInduction()), false,
9683+
Range);
96889684
VPlanTransforms::createLoopRegions(*Plan);
96899685

96909686
for (ElementCount VF : Range)

llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp

Lines changed: 18 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -469,11 +469,10 @@ static void addCanonicalIVRecipes(VPlan &Plan, VPBasicBlock *HeaderVPBB,
469469
{CanonicalIVIncrement, &Plan.getVectorTripCount()}, DL);
470470
}
471471

472-
void VPlanTransforms::prepareForVectorization(VPlan &Plan, Type *InductionTy,
473-
PredicatedScalarEvolution &PSE,
474-
bool RequiresScalarEpilogueCheck,
475-
bool TailFolded, Loop *TheLoop,
476-
DebugLoc IVDL) {
472+
void VPlanTransforms::prepareForVectorization(
473+
VPlan &Plan, Type *InductionTy, PredicatedScalarEvolution &PSE,
474+
bool RequiresScalarEpilogueCheck, bool TailFolded, Loop *TheLoop,
475+
DebugLoc IVDL, bool HandleUncountableExit, VFRange &Range) {
477476
VPDominatorTree VPDT;
478477
VPDT.recalculate(Plan);
479478

@@ -499,16 +498,20 @@ void VPlanTransforms::prepareForVectorization(VPlan &Plan, Type *InductionTy,
499498
addCanonicalIVRecipes(Plan, cast<VPBasicBlock>(HeaderVPB),
500499
cast<VPBasicBlock>(LatchVPB), InductionTy, IVDL);
501500

502-
// Disconnect all edges to exit blocks other than from the middle block.
503-
// TODO: VPlans with early exits should be explicitly converted to a form only
504-
// exiting via the latch here, including adjusting the exit condition, instead
505-
// of simplify disconnecting the edges and adjusting the VPlan later.
506-
for (VPBlockBase *EB : to_vector(Plan.getExitBlocks())) {
507-
for (VPBlockBase *Pred : to_vector(EB->getPredecessors())) {
508-
if (Pred == MiddleVPBB)
509-
continue;
510-
cast<VPBasicBlock>(Pred)->getTerminator()->eraseFromParent();
511-
VPBlockUtils::disconnectBlocks(Pred, EB);
501+
if (HandleUncountableExit) {
502+
// Convert VPlans with early exits to a form only exiting via the latch
503+
// here, including adjusting the exit condition.
504+
handleUncountableEarlyExit(Plan, cast<VPBasicBlock>(HeaderVPB),
505+
cast<VPBasicBlock>(LatchVPB), Range);
506+
} else {
507+
// Disconnect all edges to exit blocks other than from the middle block.
508+
for (VPBlockBase *EB : to_vector(Plan.getExitBlocks())) {
509+
for (VPBlockBase *Pred : to_vector(EB->getPredecessors())) {
510+
if (Pred == MiddleVPBB)
511+
continue;
512+
cast<VPBasicBlock>(Pred)->getTerminator()->eraseFromParent();
513+
VPBlockUtils::disconnectBlocks(Pred, EB);
514+
}
512515
}
513516
}
514517

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 65 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -2488,64 +2488,86 @@ void VPlanTransforms::convertToConcreteRecipes(VPlan &Plan,
24882488
R->eraseFromParent();
24892489
}
24902490

2491-
void VPlanTransforms::handleUncountableEarlyExit(
2492-
VPlan &Plan, Loop *OrigLoop, BasicBlock *UncountableExitingBlock,
2493-
VPRecipeBuilder &RecipeBuilder, VFRange &Range) {
2494-
VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
2495-
auto *LatchVPBB = cast<VPBasicBlock>(LoopRegion->getExiting());
2491+
void VPlanTransforms::handleUncountableEarlyExit(VPlan &Plan,
2492+
VPBasicBlock *HeaderVPBB,
2493+
VPBasicBlock *LatchVPBB,
2494+
VFRange &Range) {
2495+
// First find the uncountable early exiting block by looking at the
2496+
// predecessors of the exit blocks.
2497+
VPBlockBase *MiddleVPBB = LatchVPBB->getSuccessors()[0];
2498+
VPBasicBlock *EarlyExitingVPBB = nullptr;
2499+
VPIRBasicBlock *EarlyExitVPBB = nullptr;
2500+
for (auto *EB : Plan.getExitBlocks()) {
2501+
for (VPBlockBase *Pred : EB->getPredecessors()) {
2502+
if (Pred != MiddleVPBB) {
2503+
EarlyExitingVPBB = cast<VPBasicBlock>(Pred);
2504+
EarlyExitVPBB = EB;
2505+
break;
2506+
}
2507+
}
2508+
}
2509+
assert(EarlyExitVPBB && "Must have a early exiting block.");
2510+
assert(all_of(Plan.getExitBlocks(),
2511+
[EarlyExitingVPBB, MiddleVPBB](VPIRBasicBlock *EB) {
2512+
return all_of(
2513+
EB->getPredecessors(),
2514+
[EarlyExitingVPBB, MiddleVPBB](VPBlockBase *Pred) {
2515+
return Pred == EarlyExitingVPBB || Pred == MiddleVPBB;
2516+
});
2517+
}) &&
2518+
"All exit blocks must only have EarlyExitingVPBB or MiddleVPBB as "
2519+
"predecessors.");
2520+
24962521
VPBuilder Builder(LatchVPBB->getTerminator());
2497-
auto *MiddleVPBB = Plan.getMiddleBlock();
2498-
VPValue *IsEarlyExitTaken = nullptr;
2499-
2500-
// Process the uncountable exiting block. Update IsEarlyExitTaken, which
2501-
// tracks if the uncountable early exit has been taken. Also split the middle
2502-
// block and have it conditionally branch to the early exit block if
2503-
// EarlyExitTaken.
2504-
auto *EarlyExitingBranch =
2505-
cast<BranchInst>(UncountableExitingBlock->getTerminator());
2506-
BasicBlock *TrueSucc = EarlyExitingBranch->getSuccessor(0);
2507-
BasicBlock *FalseSucc = EarlyExitingBranch->getSuccessor(1);
2508-
BasicBlock *EarlyExitIRBB =
2509-
!OrigLoop->contains(TrueSucc) ? TrueSucc : FalseSucc;
2510-
VPIRBasicBlock *VPEarlyExitBlock = Plan.getExitBlock(EarlyExitIRBB);
2511-
2512-
VPValue *EarlyExitNotTakenCond = RecipeBuilder.getBlockInMask(
2513-
OrigLoop->contains(TrueSucc) ? TrueSucc : FalseSucc);
2514-
auto *EarlyExitTakenCond = Builder.createNot(EarlyExitNotTakenCond);
2515-
IsEarlyExitTaken =
2516-
Builder.createNaryOp(VPInstruction::AnyOf, {EarlyExitTakenCond});
2522+
VPBlockBase *TrueSucc = EarlyExitingVPBB->getSuccessors()[0];
2523+
VPValue *EarlyExitCond = EarlyExitingVPBB->getTerminator()->getOperand(0);
2524+
auto *EarlyExitTakenCond = TrueSucc == EarlyExitVPBB
2525+
? EarlyExitCond
2526+
: Builder.createNot(EarlyExitCond);
2527+
2528+
if (!EarlyExitVPBB->getSinglePredecessor() &&
2529+
EarlyExitVPBB->getPredecessors()[0] != MiddleVPBB) {
2530+
for (VPRecipeBase &R : EarlyExitVPBB->phis()) {
2531+
// Early exit operand should always be last, i.e., 0 if EarlyExitVPBB has
2532+
// a single predecessor and 1 if it has two.
2533+
// If EarlyExitVPBB has two predecessors, they are already ordered such
2534+
// that early exit is second (and latch exit is first), by construction.
2535+
// But its underlying IRBB (EarlyExitIRBB) may have its predecessors
2536+
// ordered the other way around, and it is the order of the latter which
2537+
// corresponds to the order of operands of EarlyExitVPBB's phi recipes.
2538+
// Therefore, if early exit (UncountableExitingBlock) is the first
2539+
// predecessor of EarlyExitIRBB, we swap the operands of phi recipes,
2540+
// thereby bringing them to match EarlyExitVPBB's predecessor order,
2541+
// with early exit being last (second). Otherwise they already match.
2542+
cast<VPIRPhi>(&R)->swapOperands();
2543+
}
2544+
}
25172545

2546+
EarlyExitingVPBB->getTerminator()->eraseFromParent();
2547+
VPBlockUtils::disconnectBlocks(EarlyExitingVPBB, EarlyExitVPBB);
2548+
2549+
// Split the middle block and have it conditionally branch to the early exit
2550+
// block if EarlyExitTaken.
2551+
VPValue *IsEarlyExitTaken =
2552+
Builder.createNaryOp(VPInstruction::AnyOf, {EarlyExitTakenCond});
25182553
VPBasicBlock *NewMiddle = Plan.createVPBasicBlock("middle.split");
25192554
VPBasicBlock *VectorEarlyExitVPBB =
25202555
Plan.createVPBasicBlock("vector.early.exit");
2521-
VPBlockUtils::insertOnEdge(LoopRegion, MiddleVPBB, NewMiddle);
2556+
VPBlockUtils::insertOnEdge(LatchVPBB, MiddleVPBB, NewMiddle);
25222557
VPBlockUtils::connectBlocks(NewMiddle, VectorEarlyExitVPBB);
25232558
NewMiddle->swapSuccessors();
25242559

2525-
VPBlockUtils::connectBlocks(VectorEarlyExitVPBB, VPEarlyExitBlock);
2560+
VPBlockUtils::connectBlocks(VectorEarlyExitVPBB, EarlyExitVPBB);
25262561

25272562
// Update the exit phis in the early exit block.
25282563
VPBuilder MiddleBuilder(NewMiddle);
25292564
VPBuilder EarlyExitB(VectorEarlyExitVPBB);
2530-
for (VPRecipeBase &R : VPEarlyExitBlock->phis()) {
2565+
for (VPRecipeBase &R : EarlyExitVPBB->phis()) {
25312566
auto *ExitIRI = cast<VPIRPhi>(&R);
2532-
// Early exit operand should always be last, i.e., 0 if VPEarlyExitBlock has
2567+
// Early exit operand should always be last, i.e., 0 if EarlyExitVPBB has
25332568
// a single predecessor and 1 if it has two.
25342569
unsigned EarlyExitIdx = ExitIRI->getNumOperands() - 1;
2535-
if (!VPEarlyExitBlock->getSinglePredecessor()) {
2536-
// If VPEarlyExitBlock has two predecessors, they are already ordered such
2537-
// that early exit is second (and latch exit is first), by construction.
2538-
// But its underlying IRBB (EarlyExitIRBB) may have its predecessors
2539-
// ordered the other way around, and it is the order of the latter which
2540-
// corresponds to the order of operands of VPEarlyExitBlock's phi recipes.
2541-
// Therefore, if early exit (UncountableExitingBlock) is the first
2542-
// predecessor of EarlyExitIRBB, we swap the operands of phi recipes,
2543-
// thereby bringing them to match VPEarlyExitBlock's predecessor order,
2544-
// with early exit being last (second). Otherwise they already match.
2545-
if (*pred_begin(VPEarlyExitBlock->getIRBasicBlock()) ==
2546-
UncountableExitingBlock)
2547-
ExitIRI->swapOperands();
2548-
2570+
if (!EarlyExitVPBB->getSinglePredecessor()) {
25492571
// The first of two operands corresponds to the latch exit, via MiddleVPBB
25502572
// predecessor. Extract its last lane.
25512573
ExitIRI->extractLastLaneOfFirstOperand(MiddleBuilder);

llvm/lib/Transforms/Vectorize/VPlanTransforms.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,8 @@ struct VPlanTransforms {
6969
PredicatedScalarEvolution &PSE,
7070
bool RequiresScalarEpilogueCheck,
7171
bool TailFolded, Loop *TheLoop,
72-
DebugLoc IVDL);
72+
DebugLoc IVDL, bool HandleUncountableExit,
73+
VFRange &Range);
7374

7475
/// Replace loops in \p Plan's flat CFG with VPRegionBlocks, turning \p Plan's
7576
/// flat CFG into a hierarchical CFG.
@@ -179,9 +180,8 @@ struct VPlanTransforms {
179180
/// exit conditions
180181
/// * splitting the original middle block to branch to the early exit block
181182
/// if taken.
182-
static void handleUncountableEarlyExit(VPlan &Plan, Loop *OrigLoop,
183-
BasicBlock *UncountableExitingBlock,
184-
VPRecipeBuilder &RecipeBuilder,
183+
static void handleUncountableEarlyExit(VPlan &Plan, VPBasicBlock *HeaderVPBB,
184+
VPBasicBlock *LatchVPBB,
185185
VFRange &Range);
186186

187187
/// Lower abstract recipes to concrete ones, that can be codegen'd. Use \p

llvm/unittests/Transforms/Vectorize/VPlanTestBase.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
#define LLVM_UNITTESTS_TRANSFORMS_VECTORIZE_VPLANTESTBASE_H
1414

1515
#include "../lib/Transforms/Vectorize/VPlan.h"
16+
#include "../lib/Transforms/Vectorize/VPlanHelpers.h"
1617
#include "../lib/Transforms/Vectorize/VPlanTransforms.h"
1718
#include "llvm/Analysis/AssumptionCache.h"
1819
#include "llvm/Analysis/BasicAliasAnalysis.h"
@@ -72,8 +73,9 @@ class VPlanTestIRBase : public testing::Test {
7273
PredicatedScalarEvolution PSE(*SE, *L);
7374
DenseMap<VPBlockBase *, BasicBlock *> VPB2IRBB;
7475
auto Plan = VPlanTransforms::buildPlainCFG(L, *LI, VPB2IRBB);
76+
VFRange R(ElementCount::getFixed(1), ElementCount::getFixed(2));
7577
VPlanTransforms::prepareForVectorization(*Plan, IntegerType::get(*Ctx, 64),
76-
PSE, true, false, L, {});
78+
PSE, true, false, L, {}, false, R);
7779
VPlanTransforms::createLoopRegions(*Plan);
7880
return Plan;
7981
}

0 commit comments

Comments
 (0)