@@ -1363,9 +1363,11 @@ class LoopVectorizationCostModel {
1363
1363
// If we might exit from anywhere but the latch, must run the exiting
1364
1364
// iteration in scalar form.
1365
1365
if (TheLoop->getExitingBlock () != TheLoop->getLoopLatch ()) {
1366
- LLVM_DEBUG (
1367
- dbgs () << " LV: Loop requires scalar epilogue: multiple exits\n " );
1368
- return true ;
1366
+ if (!Legal->canVectorizeMultiCond ()) {
1367
+ LLVM_DEBUG (
1368
+ dbgs () << " LV: Loop requires scalar epilogue: multiple exits\n " );
1369
+ return true ;
1370
+ }
1369
1371
}
1370
1372
if (IsVectorizing && InterleaveInfo.requiresScalarEpilogue ()) {
1371
1373
LLVM_DEBUG (dbgs () << " LV: Loop requires scalar epilogue: "
@@ -2544,8 +2546,17 @@ void InnerLoopVectorizer::createVectorLoopSkeleton(StringRef Prefix) {
2544
2546
LoopVectorPreHeader = OrigLoop->getLoopPreheader ();
2545
2547
assert (LoopVectorPreHeader && " Invalid loop structure" );
2546
2548
LoopExitBlock = OrigLoop->getUniqueExitBlock (); // may be nullptr
2547
- assert ((LoopExitBlock || Cost->requiresScalarEpilogue (VF.isVector ())) &&
2548
- " multiple exit loop without required epilogue?" );
2549
+ if (Legal->canVectorizeMultiCond ()) {
2550
+ BasicBlock *Latch = OrigLoop->getLoopLatch ();
2551
+ BasicBlock *TrueSucc =
2552
+ cast<BranchInst>(Latch->getTerminator ())->getSuccessor (0 );
2553
+ BasicBlock *FalseSucc =
2554
+ cast<BranchInst>(Latch->getTerminator ())->getSuccessor (1 );
2555
+ LoopExitBlock = OrigLoop->contains (TrueSucc) ? FalseSucc : TrueSucc;
2556
+ } else {
2557
+ assert ((LoopExitBlock || Cost->requiresScalarEpilogue (VF.isVector ())) &&
2558
+ " multiple exit loop without required epilogue?" );
2559
+ }
2549
2560
2550
2561
LoopMiddleBlock =
2551
2562
SplitBlock (LoopVectorPreHeader, LoopVectorPreHeader->getTerminator (), DT,
@@ -2912,7 +2923,8 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State,
2912
2923
for (PHINode &PN : Exit->phis ())
2913
2924
PSE.getSE ()->forgetLcssaPhiWithNewPredecessor (OrigLoop, &PN);
2914
2925
2915
- if (Cost->requiresScalarEpilogue (VF.isVector ())) {
2926
+ if (Legal->canVectorizeMultiCond () ||
2927
+ Cost->requiresScalarEpilogue (VF.isVector ())) {
2916
2928
// No edge from the middle block to the unique exit block has been inserted
2917
2929
// and there is nothing to fix from vector loop; phis should have incoming
2918
2930
// from scalar loop only.
@@ -3557,7 +3569,8 @@ void LoopVectorizationCostModel::collectLoopUniforms(ElementCount VF) {
3557
3569
TheLoop->getExitingBlocks (Exiting);
3558
3570
for (BasicBlock *E : Exiting) {
3559
3571
auto *Cmp = dyn_cast<Instruction>(E->getTerminator ()->getOperand (0 ));
3560
- if (Cmp && TheLoop->contains (Cmp) && Cmp->hasOneUse ())
3572
+ if (Cmp && TheLoop->contains (Cmp) && Cmp->hasOneUse () &&
3573
+ (TheLoop->getLoopLatch () == E || !Legal->canVectorizeMultiCond ()))
3561
3574
AddToWorklistIfAllowed (Cmp);
3562
3575
}
3563
3576
@@ -7522,7 +7535,8 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
7522
7535
LLVM_DEBUG (BestVPlan.dump ());
7523
7536
7524
7537
// Perform the actual loop transformation.
7525
- VPTransformState State (BestVF, BestUF, LI, DT, ILV.Builder , &ILV, &BestVPlan);
7538
+ VPTransformState State (BestVF, BestUF, LI, DT, ILV.Builder , &ILV, &BestVPlan,
7539
+ OrigLoop);
7526
7540
7527
7541
// 0. Generate SCEV-dependent code into the preheader, including TripCount,
7528
7542
// before making any changes to the CFG.
@@ -7583,12 +7597,15 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
7583
7597
BestVPlan.execute (&State);
7584
7598
7585
7599
// 2.5 Collect reduction resume values.
7586
- auto *ExitVPBB =
7587
- cast<VPBasicBlock>(BestVPlan.getVectorLoopRegion ()->getSingleSuccessor ());
7588
- for (VPRecipeBase &R : *ExitVPBB) {
7589
- createAndCollectMergePhiForReduction (
7590
- dyn_cast<VPInstruction>(&R), State, OrigLoop,
7591
- State.CFG .VPBB2IRBB [ExitVPBB], ExpandedSCEVs);
7600
+ VPBasicBlock *ExitVPBB = nullptr ;
7601
+ if (BestVPlan.getVectorLoopRegion ()->getSingleSuccessor ()) {
7602
+ ExitVPBB = cast<VPBasicBlock>(
7603
+ BestVPlan.getVectorLoopRegion ()->getSingleSuccessor ());
7604
+ for (VPRecipeBase &R : *ExitVPBB) {
7605
+ createAndCollectMergePhiForReduction (
7606
+ dyn_cast<VPInstruction>(&R), State, OrigLoop,
7607
+ State.CFG .VPBB2IRBB [ExitVPBB], ExpandedSCEVs);
7608
+ }
7592
7609
}
7593
7610
7594
7611
// 2.6. Maintain Loop Hints
@@ -7614,6 +7631,7 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
7614
7631
LoopVectorizeHints Hints (L, true , *ORE);
7615
7632
Hints.setAlreadyVectorized ();
7616
7633
}
7634
+
7617
7635
TargetTransformInfo::UnrollingPreferences UP;
7618
7636
TTI.getUnrollingPreferences (L, *PSE.getSE (), UP, ORE);
7619
7637
if (!UP.UnrollVectorizedLoop || CanonicalIVStartValue)
@@ -7626,15 +7644,17 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
7626
7644
ILV.printDebugTracesAtEnd ();
7627
7645
7628
7646
// 4. Adjust branch weight of the branch in the middle block.
7629
- auto *MiddleTerm =
7630
- cast<BranchInst>(State.CFG .VPBB2IRBB [ExitVPBB]->getTerminator ());
7631
- if (MiddleTerm->isConditional () &&
7632
- hasBranchWeightMD (*OrigLoop->getLoopLatch ()->getTerminator ())) {
7633
- // Assume that `Count % VectorTripCount` is equally distributed.
7634
- unsigned TripCount = BestVPlan.getUF () * State.VF .getKnownMinValue ();
7635
- assert (TripCount > 0 && " trip count should not be zero" );
7636
- const uint32_t Weights[] = {1 , TripCount - 1 };
7637
- setBranchWeights (*MiddleTerm, Weights, /* IsExpected=*/ false );
7647
+ if (ExitVPBB) {
7648
+ auto *MiddleTerm =
7649
+ cast<BranchInst>(State.CFG .VPBB2IRBB [ExitVPBB]->getTerminator ());
7650
+ if (MiddleTerm->isConditional () &&
7651
+ hasBranchWeightMD (*OrigLoop->getLoopLatch ()->getTerminator ())) {
7652
+ // Assume that `Count % VectorTripCount` is equally distributed.
7653
+ unsigned TripCount = BestVPlan.getUF () * State.VF .getKnownMinValue ();
7654
+ assert (TripCount > 0 && " trip count should not be zero" );
7655
+ const uint32_t Weights[] = {1 , TripCount - 1 };
7656
+ setBranchWeights (*MiddleTerm, Weights, /* IsExpected=*/ false );
7657
+ }
7638
7658
}
7639
7659
7640
7660
return State.ExpandedSCEVs ;
@@ -8019,7 +8039,7 @@ VPValue *VPRecipeBuilder::createEdgeMask(BasicBlock *Src, BasicBlock *Dst) {
8019
8039
// If source is an exiting block, we know the exit edge is dynamically dead
8020
8040
// in the vector loop, and thus we don't need to restrict the mask. Avoid
8021
8041
// adding uses of an otherwise potentially dead instruction.
8022
- if (OrigLoop->isLoopExiting (Src))
8042
+ if (!Legal-> canVectorizeMultiCond () && OrigLoop->isLoopExiting (Src))
8023
8043
return EdgeMaskCache[Edge] = SrcMask;
8024
8044
8025
8045
VPValue *EdgeMask = getVPValueOrAddLiveIn (BI->getCondition ());
@@ -8664,6 +8684,8 @@ static void addCanonicalIVRecipes(VPlan &Plan, Type *IdxTy, bool HasNUW,
8664
8684
static SetVector<VPIRInstruction *> collectUsersInExitBlock (
8665
8685
Loop *OrigLoop, VPRecipeBuilder &Builder, VPlan &Plan,
8666
8686
const MapVector<PHINode *, InductionDescriptor> &Inductions) {
8687
+ if (!Plan.getVectorLoopRegion ()->getSingleSuccessor ())
8688
+ return {};
8667
8689
auto *MiddleVPBB =
8668
8690
cast<VPBasicBlock>(Plan.getVectorLoopRegion ()->getSingleSuccessor ());
8669
8691
// No edge from the middle block to the unique exit block has been inserted
@@ -8751,6 +8773,8 @@ static void addLiveOutsForFirstOrderRecurrences(
8751
8773
// TODO: Should be replaced by
8752
8774
// Plan->getScalarLoopRegion()->getSinglePredecessor() in the future once the
8753
8775
// scalar region is modeled as well.
8776
+ if (!VectorRegion->getSingleSuccessor ())
8777
+ return ;
8754
8778
auto *MiddleVPBB = cast<VPBasicBlock>(VectorRegion->getSingleSuccessor ());
8755
8779
VPBasicBlock *ScalarPHVPBB = nullptr ;
8756
8780
if (MiddleVPBB->getNumSuccessors () == 2 ) {
@@ -9037,6 +9061,11 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
9037
9061
" VPBasicBlock" );
9038
9062
RecipeBuilder.fixHeaderPhis ();
9039
9063
9064
+ if (Legal->canVectorizeMultiCond ()) {
9065
+ VPlanTransforms::convertToMultiCond (*Plan, *PSE.getSE (), OrigLoop,
9066
+ RecipeBuilder);
9067
+ }
9068
+
9040
9069
SetVector<VPIRInstruction *> ExitUsersToFix = collectUsersInExitBlock (
9041
9070
OrigLoop, RecipeBuilder, *Plan, Legal->getInductionVars ());
9042
9071
addLiveOutsForFirstOrderRecurrences (*Plan, ExitUsersToFix);
@@ -9168,8 +9197,6 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
9168
9197
using namespace VPlanPatternMatch ;
9169
9198
VPRegionBlock *VectorLoopRegion = Plan->getVectorLoopRegion ();
9170
9199
VPBasicBlock *Header = VectorLoopRegion->getEntryBasicBlock ();
9171
- VPBasicBlock *MiddleVPBB =
9172
- cast<VPBasicBlock>(VectorLoopRegion->getSingleSuccessor ());
9173
9200
for (VPRecipeBase &R : Header->phis ()) {
9174
9201
auto *PhiR = dyn_cast<VPReductionPHIRecipe>(&R);
9175
9202
if (!PhiR || !PhiR->isInLoop () || (MinVF.isScalar () && !PhiR->isOrdered ()))
@@ -9188,8 +9215,6 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
9188
9215
for (VPUser *U : Cur->users ()) {
9189
9216
auto *UserRecipe = cast<VPSingleDefRecipe>(U);
9190
9217
if (!UserRecipe->getParent ()->getEnclosingLoopRegion ()) {
9191
- assert (UserRecipe->getParent () == MiddleVPBB &&
9192
- " U must be either in the loop region or the middle block." );
9193
9218
continue ;
9194
9219
}
9195
9220
Worklist.insert (UserRecipe);
@@ -9294,6 +9319,10 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
9294
9319
}
9295
9320
VPBasicBlock *LatchVPBB = VectorLoopRegion->getExitingBasicBlock ();
9296
9321
Builder.setInsertPoint (&*LatchVPBB->begin ());
9322
+ if (!VectorLoopRegion->getSingleSuccessor ())
9323
+ return ;
9324
+ VPBasicBlock *MiddleVPBB =
9325
+ cast<VPBasicBlock>(VectorLoopRegion->getSingleSuccessor ());
9297
9326
VPBasicBlock::iterator IP = MiddleVPBB->getFirstNonPhi ();
9298
9327
for (VPRecipeBase &R :
9299
9328
Plan->getVectorLoopRegion ()->getEntryBasicBlock ()->phis ()) {
0 commit comments