@@ -1387,9 +1387,11 @@ class LoopVectorizationCostModel {
1387
1387
// If we might exit from anywhere but the latch, must run the exiting
1388
1388
// iteration in scalar form.
1389
1389
if (TheLoop->getExitingBlock () != TheLoop->getLoopLatch ()) {
1390
- LLVM_DEBUG (
1391
- dbgs () << " LV: Loop requires scalar epilogue: multiple exits\n " );
1392
- return true ;
1390
+ if (!Legal->canVectorizeMultiCond ()) {
1391
+ LLVM_DEBUG (
1392
+ dbgs () << " LV: Loop requires scalar epilogue: multiple exits\n " );
1393
+ return true ;
1394
+ }
1393
1395
}
1394
1396
if (IsVectorizing && InterleaveInfo.requiresScalarEpilogue ()) {
1395
1397
LLVM_DEBUG (dbgs () << " LV: Loop requires scalar epilogue: "
@@ -2571,8 +2573,17 @@ void InnerLoopVectorizer::createVectorLoopSkeleton(StringRef Prefix) {
2571
2573
LoopVectorPreHeader = OrigLoop->getLoopPreheader ();
2572
2574
assert (LoopVectorPreHeader && " Invalid loop structure" );
2573
2575
LoopExitBlock = OrigLoop->getUniqueExitBlock (); // may be nullptr
2574
- assert ((LoopExitBlock || Cost->requiresScalarEpilogue (VF.isVector ())) &&
2575
- " multiple exit loop without required epilogue?" );
2576
+ if (Legal->canVectorizeMultiCond ()) {
2577
+ BasicBlock *Latch = OrigLoop->getLoopLatch ();
2578
+ BasicBlock *TrueSucc =
2579
+ cast<BranchInst>(Latch->getTerminator ())->getSuccessor (0 );
2580
+ BasicBlock *FalseSucc =
2581
+ cast<BranchInst>(Latch->getTerminator ())->getSuccessor (1 );
2582
+ LoopExitBlock = OrigLoop->contains (TrueSucc) ? FalseSucc : TrueSucc;
2583
+ } else {
2584
+ assert ((LoopExitBlock || Cost->requiresScalarEpilogue (VF.isVector ())) &&
2585
+ " multiple exit loop without required epilogue?" );
2586
+ }
2576
2587
2577
2588
LoopMiddleBlock =
2578
2589
SplitBlock (LoopVectorPreHeader, LoopVectorPreHeader->getTerminator (), DT,
@@ -2943,24 +2954,26 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State,
2943
2954
VPRegionBlock *VectorRegion = State.Plan ->getVectorLoopRegion ();
2944
2955
VPBasicBlock *LatchVPBB = VectorRegion->getExitingBasicBlock ();
2945
2956
Loop *VectorLoop = LI->getLoopFor (State.CFG .VPBB2IRBB [LatchVPBB]);
2946
- if (Cost->requiresScalarEpilogue (VF.isVector ())) {
2947
- // No edge from the middle block to the unique exit block has been inserted
2948
- // and there is nothing to fix from vector loop; phis should have incoming
2949
- // from scalar loop only.
2950
- } else {
2951
- // TODO: Check VPLiveOuts to see if IV users need fixing instead of checking
2952
- // the cost model.
2953
-
2954
- // If we inserted an edge from the middle block to the unique exit block,
2955
- // update uses outside the loop (phis) to account for the newly inserted
2956
- // edge.
2957
-
2958
- // Fix-up external users of the induction variables.
2959
- for (const auto &Entry : Legal->getInductionVars ())
2960
- fixupIVUsers (Entry.first , Entry.second ,
2961
- getOrCreateVectorTripCount (VectorLoop->getLoopPreheader ()),
2962
- IVEndValues[Entry.first ], LoopMiddleBlock,
2963
- VectorLoop->getHeader (), Plan, State);
2957
+ if (OrigLoop->getUniqueExitBlock ()) {
2958
+ if (Cost->requiresScalarEpilogue (VF.isVector ())) {
2959
+ // No edge from the middle block to the unique exit block has been
2960
+ // inserted and there is nothing to fix from vector loop; phis should have
2961
+ // incoming from scalar loop only.
2962
+ } else {
2963
+ // TODO: Check VPLiveOuts to see if IV users need fixing instead of
2964
+ // checking the cost model.
2965
+
2966
+ // If we inserted an edge from the middle block to the unique exit block,
2967
+ // update uses outside the loop (phis) to account for the newly inserted
2968
+ // edge.
2969
+
2970
+ // Fix-up external users of the induction variables.
2971
+ for (const auto &Entry : Legal->getInductionVars ())
2972
+ fixupIVUsers (Entry.first , Entry.second ,
2973
+ getOrCreateVectorTripCount (VectorLoop->getLoopPreheader ()),
2974
+ IVEndValues[Entry.first ], LoopMiddleBlock,
2975
+ VectorLoop->getHeader (), Plan, State);
2976
+ }
2964
2977
}
2965
2978
2966
2979
// Fix live-out phis not already fixed earlier.
@@ -3584,7 +3597,8 @@ void LoopVectorizationCostModel::collectLoopUniforms(ElementCount VF) {
3584
3597
TheLoop->getExitingBlocks (Exiting);
3585
3598
for (BasicBlock *E : Exiting) {
3586
3599
auto *Cmp = dyn_cast<Instruction>(E->getTerminator ()->getOperand (0 ));
3587
- if (Cmp && TheLoop->contains (Cmp) && Cmp->hasOneUse ())
3600
+ if (Cmp && TheLoop->contains (Cmp) && Cmp->hasOneUse () &&
3601
+ (TheLoop->getLoopLatch () == E || !Legal->canVectorizeMultiCond ()))
3588
3602
AddToWorklistIfAllowed (Cmp);
3589
3603
}
3590
3604
@@ -7515,7 +7529,8 @@ LoopVectorizationPlanner::executePlan(
7515
7529
LLVM_DEBUG (BestVPlan.dump ());
7516
7530
7517
7531
// Perform the actual loop transformation.
7518
- VPTransformState State (BestVF, BestUF, LI, DT, ILV.Builder , &ILV, &BestVPlan);
7532
+ VPTransformState State (BestVF, BestUF, LI, DT, ILV.Builder , &ILV, &BestVPlan,
7533
+ OrigLoop);
7519
7534
7520
7535
// 0. Generate SCEV-dependent code into the preheader, including TripCount,
7521
7536
// before making any changes to the CFG.
@@ -7577,12 +7592,15 @@ LoopVectorizationPlanner::executePlan(
7577
7592
7578
7593
// 2.5 Collect reduction resume values.
7579
7594
DenseMap<const RecurrenceDescriptor *, Value *> ReductionResumeValues;
7580
- auto *ExitVPBB =
7581
- cast<VPBasicBlock>(BestVPlan.getVectorLoopRegion ()->getSingleSuccessor ());
7582
- for (VPRecipeBase &R : *ExitVPBB) {
7583
- createAndCollectMergePhiForReduction (
7584
- dyn_cast<VPInstruction>(&R), ReductionResumeValues, State, OrigLoop,
7585
- State.CFG .VPBB2IRBB [ExitVPBB], ExpandedSCEVs);
7595
+ VPBasicBlock *ExitVPBB = nullptr ;
7596
+ if (BestVPlan.getVectorLoopRegion ()->getSingleSuccessor ()) {
7597
+ ExitVPBB = cast<VPBasicBlock>(
7598
+ BestVPlan.getVectorLoopRegion ()->getSingleSuccessor ());
7599
+ for (VPRecipeBase &R : *ExitVPBB) {
7600
+ createAndCollectMergePhiForReduction (
7601
+ dyn_cast<VPInstruction>(&R), ReductionResumeValues, State, OrigLoop,
7602
+ State.CFG .VPBB2IRBB [ExitVPBB], ExpandedSCEVs);
7603
+ }
7586
7604
}
7587
7605
7588
7606
// 2.6. Maintain Loop Hints
@@ -7608,6 +7626,7 @@ LoopVectorizationPlanner::executePlan(
7608
7626
LoopVectorizeHints Hints (L, true , *ORE);
7609
7627
Hints.setAlreadyVectorized ();
7610
7628
}
7629
+
7611
7630
TargetTransformInfo::UnrollingPreferences UP;
7612
7631
TTI.getUnrollingPreferences (L, *PSE.getSE (), UP, ORE);
7613
7632
if (!UP.UnrollVectorizedLoop || CanonicalIVStartValue)
@@ -7620,15 +7639,17 @@ LoopVectorizationPlanner::executePlan(
7620
7639
ILV.printDebugTracesAtEnd ();
7621
7640
7622
7641
// 4. Adjust branch weight of the branch in the middle block.
7623
- auto *MiddleTerm =
7624
- cast<BranchInst>(State.CFG .VPBB2IRBB [ExitVPBB]->getTerminator ());
7625
- if (MiddleTerm->isConditional () &&
7626
- hasBranchWeightMD (*OrigLoop->getLoopLatch ()->getTerminator ())) {
7627
- // Assume that `Count % VectorTripCount` is equally distributed.
7628
- unsigned TripCount = State.UF * State.VF .getKnownMinValue ();
7629
- assert (TripCount > 0 && " trip count should not be zero" );
7630
- const uint32_t Weights[] = {1 , TripCount - 1 };
7631
- setBranchWeights (*MiddleTerm, Weights, /* IsExpected=*/ false );
7642
+ if (ExitVPBB) {
7643
+ auto *MiddleTerm =
7644
+ cast<BranchInst>(State.CFG .VPBB2IRBB [ExitVPBB]->getTerminator ());
7645
+ if (MiddleTerm->isConditional () &&
7646
+ hasBranchWeightMD (*OrigLoop->getLoopLatch ()->getTerminator ())) {
7647
+ // Assume that `Count % VectorTripCount` is equally distributed.
7648
+ unsigned TripCount = State.UF * State.VF .getKnownMinValue ();
7649
+ assert (TripCount > 0 && " trip count should not be zero" );
7650
+ const uint32_t Weights[] = {1 , TripCount - 1 };
7651
+ setBranchWeights (*MiddleTerm, Weights, /* IsExpected=*/ false );
7652
+ }
7632
7653
}
7633
7654
7634
7655
return {State.ExpandedSCEVs , ReductionResumeValues};
@@ -8013,7 +8034,7 @@ VPValue *VPRecipeBuilder::createEdgeMask(BasicBlock *Src, BasicBlock *Dst) {
8013
8034
// If source is an exiting block, we know the exit edge is dynamically dead
8014
8035
// in the vector loop, and thus we don't need to restrict the mask. Avoid
8015
8036
// adding uses of an otherwise potentially dead instruction.
8016
- if (OrigLoop->isLoopExiting (Src))
8037
+ if (!Legal-> canVectorizeMultiCond () && OrigLoop->isLoopExiting (Src))
8017
8038
return EdgeMaskCache[Edge] = SrcMask;
8018
8039
8019
8040
VPValue *EdgeMask = getVPValueOrAddLiveIn (BI->getCondition ());
@@ -8630,6 +8651,8 @@ static void addCanonicalIVRecipes(VPlan &Plan, Type *IdxTy, bool HasNUW,
8630
8651
static SetVector<VPIRInstruction *> collectUsersInExitBlock (
8631
8652
Loop *OrigLoop, VPRecipeBuilder &Builder, VPlan &Plan,
8632
8653
const MapVector<PHINode *, InductionDescriptor> &Inductions) {
8654
+ if (!Plan.getVectorLoopRegion ()->getSingleSuccessor ())
8655
+ return {};
8633
8656
auto *MiddleVPBB =
8634
8657
cast<VPBasicBlock>(Plan.getVectorLoopRegion ()->getSingleSuccessor ());
8635
8658
// No edge from the middle block to the unique exit block has been inserted
@@ -8717,6 +8740,8 @@ static void addLiveOutsForFirstOrderRecurrences(
8717
8740
// TODO: Should be replaced by
8718
8741
// Plan->getScalarLoopRegion()->getSinglePredecessor() in the future once the
8719
8742
// scalar region is modeled as well.
8743
+ if (!VectorRegion->getSingleSuccessor ())
8744
+ return ;
8720
8745
auto *MiddleVPBB = cast<VPBasicBlock>(VectorRegion->getSingleSuccessor ());
8721
8746
VPBasicBlock *ScalarPHVPBB = nullptr ;
8722
8747
if (MiddleVPBB->getNumSuccessors () == 2 ) {
@@ -8991,6 +9016,67 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
8991
9016
" VPBasicBlock" );
8992
9017
RecipeBuilder.fixHeaderPhis ();
8993
9018
9019
+ SmallVector<BasicBlock *> Exiting;
9020
+ OrigLoop->getExitingBlocks (Exiting);
9021
+
9022
+ if (Legal->canVectorizeMultiCond ()) {
9023
+ auto *LatchVPBB =
9024
+ cast<VPBasicBlock>(Plan->getVectorLoopRegion ()->getExiting ());
9025
+ VPBuilder::InsertPointGuard Guard (Builder);
9026
+ Builder.setInsertPoint (LatchVPBB->getTerminator ());
9027
+ auto *MiddleVPBB =
9028
+ cast<VPBasicBlock>(Plan->getVectorLoopRegion ()->getSingleSuccessor ());
9029
+
9030
+ VPValue *EarlyExitTaken = nullptr ;
9031
+ SmallVector<VPValue *> ExitTaken;
9032
+ SmallVector<PHINode *> ExitPhis;
9033
+ SmallVector<Value *> ExitValues;
9034
+ BasicBlock *ExitBlock;
9035
+ for (BasicBlock *E : Exiting) {
9036
+ if (E == OrigLoop->getLoopLatch ()) {
9037
+ BasicBlock *TrueSucc =
9038
+ cast<BranchInst>(E->getTerminator ())->getSuccessor (0 );
9039
+ BasicBlock *FalseSucc =
9040
+ cast<BranchInst>(E->getTerminator ())->getSuccessor (1 );
9041
+ auto EB = !OrigLoop->contains (TrueSucc) ? TrueSucc : FalseSucc;
9042
+
9043
+ auto *VPExitBlock = new VPIRBasicBlock (EB);
9044
+ VPBasicBlock *ScalarPH = new VPBasicBlock (" scalar.ph" );
9045
+ VPBlockUtils::connectBlocks (MiddleVPBB, VPExitBlock);
9046
+ VPBlockUtils::connectBlocks (MiddleVPBB, ScalarPH);
9047
+ continue ;
9048
+ }
9049
+ BasicBlock *TrueSucc =
9050
+ cast<BranchInst>(E->getTerminator ())->getSuccessor (0 );
9051
+ BasicBlock *FalseSucc =
9052
+ cast<BranchInst>(E->getTerminator ())->getSuccessor (1 );
9053
+ VPValue *M = RecipeBuilder.getBlockInMask (
9054
+ OrigLoop->contains (TrueSucc) ? TrueSucc : FalseSucc);
9055
+
9056
+ auto *N = Builder.createNot (M);
9057
+ auto *EC = Builder.createNaryOp (VPInstruction::AnyOf, {N});
9058
+ ExitTaken.push_back (EC);
9059
+ if (EarlyExitTaken)
9060
+ EarlyExitTaken = Builder.createOr (EarlyExitTaken, EC);
9061
+ else
9062
+ EarlyExitTaken = EC;
9063
+ ExitBlock = !OrigLoop->contains (TrueSucc) ? TrueSucc : FalseSucc;
9064
+ }
9065
+
9066
+ auto *Term = dyn_cast<VPInstruction>(LatchVPBB->getTerminator ());
9067
+ auto *IsLatchExiting = Builder.createICmp (
9068
+ CmpInst::ICMP_EQ, Term->getOperand (0 ), Term->getOperand (1 ));
9069
+ Builder.createNaryOp (VPInstruction::BranchMultipleConds,
9070
+ {EarlyExitTaken, IsLatchExiting});
9071
+ Term->eraseFromParent ();
9072
+
9073
+ auto *EA = new VPIRBasicBlock (ExitBlock);
9074
+ VPRegionBlock *LoopRegion = Plan->getVectorLoopRegion ();
9075
+ VPBlockUtils::disconnectBlocks (LoopRegion, MiddleVPBB);
9076
+ VPBlockUtils::connectBlocks (LoopRegion, EA);
9077
+ VPBlockUtils::connectBlocks (LoopRegion, MiddleVPBB);
9078
+ }
9079
+
8994
9080
SetVector<VPIRInstruction *> ExitUsersToFix = collectUsersInExitBlock (
8995
9081
OrigLoop, RecipeBuilder, *Plan, Legal->getInductionVars ());
8996
9082
addLiveOutsForFirstOrderRecurrences (*Plan, ExitUsersToFix);
@@ -9062,6 +9148,7 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
9062
9148
VPlanTransforms::addActiveLaneMask (*Plan, ForControlFlow,
9063
9149
WithoutRuntimeCheck);
9064
9150
}
9151
+
9065
9152
return Plan;
9066
9153
}
9067
9154
@@ -9286,6 +9373,8 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
9286
9373
}
9287
9374
VPBasicBlock *LatchVPBB = VectorLoopRegion->getExitingBasicBlock ();
9288
9375
Builder.setInsertPoint (&*LatchVPBB->begin ());
9376
+ if (!VectorLoopRegion->getSingleSuccessor ())
9377
+ return ;
9289
9378
VPBasicBlock *MiddleVPBB =
9290
9379
cast<VPBasicBlock>(VectorLoopRegion->getSingleSuccessor ());
9291
9380
VPBasicBlock::iterator IP = MiddleVPBB->getFirstNonPhi ();
0 commit comments