@@ -224,9 +224,10 @@ VPBasicBlock::iterator VPBasicBlock::getFirstNonPhi() {
224
224
225
225
VPTransformState::VPTransformState (ElementCount VF, unsigned UF, LoopInfo *LI,
226
226
DominatorTree *DT, IRBuilderBase &Builder,
227
- InnerLoopVectorizer *ILV, VPlan *Plan)
227
+ InnerLoopVectorizer *ILV, VPlan *Plan,
228
+ Type *CanonicalIVTy)
228
229
: VF(VF), CFG(DT), LI(LI), Builder(Builder), ILV(ILV), Plan(Plan),
229
- LVer(nullptr ), TypeAnalysis(Plan-> getCanonicalIV ()->getScalarType() ) {}
230
+ LVer(nullptr ), TypeAnalysis(CanonicalIVTy ) {}
230
231
231
232
Value *VPTransformState::get (VPValue *Def, const VPLane &Lane) {
232
233
if (Def->isLiveIn ())
@@ -275,8 +276,8 @@ Value *VPTransformState::get(VPValue *Def, bool NeedsScalar) {
275
276
// Place the code for broadcasting invariant variables in the new preheader.
276
277
IRBuilder<>::InsertPointGuard Guard (Builder);
277
278
if (SafeToHoist) {
278
- BasicBlock *LoopVectorPreHeader = CFG. VPBB2IRBB [cast<VPBasicBlock>(
279
- Plan-> getVectorLoopRegion ()-> getSinglePredecessor ())];
279
+ BasicBlock *LoopVectorPreHeader =
280
+ CFG. VPBB2IRBB [cast<VPBasicBlock>(Plan-> getEntry ())];
280
281
if (LoopVectorPreHeader)
281
282
Builder.SetInsertPoint (LoopVectorPreHeader->getTerminator ());
282
283
}
@@ -417,6 +418,12 @@ VPBasicBlock::createEmptyBasicBlock(VPTransformState::CFGState &CFG) {
417
418
PrevBB->getParent (), CFG.ExitBB );
418
419
LLVM_DEBUG (dbgs () << " LV: created " << NewBB->getName () << ' \n ' );
419
420
421
+ connectToPredecessors (NewBB, CFG);
422
+ return NewBB;
423
+ }
424
+
425
+ void VPBasicBlock::connectToPredecessors (BasicBlock *NewBB,
426
+ VPTransformState::CFGState &CFG) {
420
427
// Hook up the new basic block to its predecessors.
421
428
for (VPBlockBase *PredVPBlock : getHierarchicalPredecessors ()) {
422
429
VPBasicBlock *PredVPBB = PredVPBlock->getExitingBasicBlock ();
@@ -447,38 +454,14 @@ VPBasicBlock::createEmptyBasicBlock(VPTransformState::CFGState &CFG) {
447
454
}
448
455
CFG.DTU .applyUpdates ({{DominatorTree::Insert, PredBB, NewBB}});
449
456
}
450
- return NewBB;
451
457
}
452
-
453
458
void VPIRBasicBlock::execute (VPTransformState *State) {
454
459
assert (getHierarchicalSuccessors ().size () <= 2 &&
455
460
" VPIRBasicBlock can have at most two successors at the moment!" );
456
461
State->Builder .SetInsertPoint (getIRBasicBlock ()->getTerminator ());
457
462
executeRecipes (State, getIRBasicBlock ());
458
- if (getSingleSuccessor ()) {
459
- assert (isa<UnreachableInst>(getIRBasicBlock ()->getTerminator ()));
460
- auto *Br = State->Builder .CreateBr (getIRBasicBlock ());
461
- Br->setOperand (0 , nullptr );
462
- getIRBasicBlock ()->getTerminator ()->eraseFromParent ();
463
- }
464
-
465
- for (VPBlockBase *PredVPBlock : getHierarchicalPredecessors ()) {
466
- VPBasicBlock *PredVPBB = PredVPBlock->getExitingBasicBlock ();
467
- BasicBlock *PredBB = State->CFG .VPBB2IRBB [PredVPBB];
468
- assert (PredBB && " Predecessor basic-block not found building successor." );
469
- LLVM_DEBUG (dbgs () << " LV: draw edge from" << PredBB->getName () << ' \n ' );
470
463
471
- auto *PredBBTerminator = PredBB->getTerminator ();
472
- auto *TermBr = cast<BranchInst>(PredBBTerminator);
473
- // Set each forward successor here when it is created, excluding
474
- // backedges. A backward successor is set when the branch is created.
475
- const auto &PredVPSuccessors = PredVPBB->getHierarchicalSuccessors ();
476
- unsigned idx = PredVPSuccessors.front () == this ? 0 : 1 ;
477
- assert (!TermBr->getSuccessor (idx) &&
478
- " Trying to reset an existing successor block." );
479
- TermBr->setSuccessor (idx, IRBB);
480
- State->CFG .DTU .applyUpdates ({{DominatorTree::Insert, PredBB, IRBB}});
481
- }
464
+ connectToPredecessors (getIRBasicBlock (), State->CFG );
482
465
}
483
466
484
467
void VPBasicBlock::execute (VPTransformState *State) {
@@ -954,7 +937,6 @@ void VPlan::prepareToExecute(Value *TripCountV, Value *VectorTripCountV,
954
937
955
938
IRBuilder<> Builder (State.CFG .PrevBB ->getTerminator ());
956
939
// FIXME: Model VF * UF computation completely in VPlan.
957
- assert (VFxUF.getNumUsers () && " VFxUF expected to always have users" );
958
940
unsigned UF = getUF ();
959
941
if (VF.getNumUsers ()) {
960
942
Value *RuntimeVF = getRuntimeVF (Builder, TCTy, State.VF );
@@ -1026,8 +1008,13 @@ void VPlan::execute(VPTransformState *State) {
1026
1008
// skeleton creation, so we can only create the VPIRBasicBlocks now during
1027
1009
// VPlan execution rather than earlier during VPlan construction.
1028
1010
BasicBlock *MiddleBB = State->CFG .ExitBB ;
1029
- VPBasicBlock *MiddleVPBB =
1030
- cast<VPBasicBlock>(getVectorLoopRegion ()->getSingleSuccessor ());
1011
+ VPBlockBase *Leaf = nullptr ;
1012
+ for (VPBlockBase *VPB : vp_depth_first_shallow (getEntry ()))
1013
+ if (VPB->getNumSuccessors () == 0 ) {
1014
+ Leaf = VPB;
1015
+ break ;
1016
+ }
1017
+ VPBasicBlock *MiddleVPBB = cast<VPBasicBlock>(Leaf->getSinglePredecessor ());
1031
1018
// Find the VPBB for the scalar preheader, relying on the current structure
1032
1019
// when creating the middle block and its successrs: if there's a single
1033
1020
// predecessor, it must be the scalar preheader. Otherwise, the second
@@ -1055,53 +1042,59 @@ void VPlan::execute(VPTransformState *State) {
1055
1042
for (VPBlockBase *Block : vp_depth_first_shallow (Entry))
1056
1043
Block->execute (State);
1057
1044
1058
- VPBasicBlock *LatchVPBB = getVectorLoopRegion ()->getExitingBasicBlock ();
1059
- BasicBlock *VectorLatchBB = State->CFG .VPBB2IRBB [LatchVPBB];
1060
-
1061
- // Fix the latch value of canonical, reduction and first-order recurrences
1062
- // phis in the vector loop.
1063
- VPBasicBlock *Header = getVectorLoopRegion ()->getEntryBasicBlock ();
1064
- for (VPRecipeBase &R : Header->phis ()) {
1065
- // Skip phi-like recipes that generate their backedege values themselves.
1066
- if (isa<VPWidenPHIRecipe>(&R))
1067
- continue ;
1068
-
1069
- if (isa<VPWidenPointerInductionRecipe>(&R) ||
1070
- isa<VPWidenIntOrFpInductionRecipe>(&R)) {
1071
- PHINode *Phi = nullptr ;
1072
- if (isa<VPWidenIntOrFpInductionRecipe>(&R)) {
1073
- Phi = cast<PHINode>(State->get (R.getVPSingleValue ()));
1074
- } else {
1075
- auto *WidenPhi = cast<VPWidenPointerInductionRecipe>(&R);
1076
- assert (!WidenPhi->onlyScalarsGenerated (State->VF .isScalable ()) &&
1077
- " recipe generating only scalars should have been replaced" );
1078
- auto *GEP = cast<GetElementPtrInst>(State->get (WidenPhi));
1079
- Phi = cast<PHINode>(GEP->getPointerOperand ());
1080
- }
1081
-
1082
- Phi->setIncomingBlock (1 , VectorLatchBB);
1045
+ if (auto *LoopRegion =
1046
+ dyn_cast<VPRegionBlock>(getEntry ()->getSingleSuccessor ())) {
1047
+ VPBasicBlock *LatchVPBB = LoopRegion->getExitingBasicBlock ();
1048
+ BasicBlock *VectorLatchBB = State->CFG .VPBB2IRBB [LatchVPBB];
1049
+
1050
+ // Fix the latch value of canonical, reduction and first-order recurrences
1051
+ // phis in the vector loop.
1052
+ VPBasicBlock *Header = LoopRegion->getEntryBasicBlock ();
1053
+ for (VPRecipeBase &R : Header->phis ()) {
1054
+ // Skip phi-like recipes that generate their backedege values themselves.
1055
+ if (isa<VPWidenPHIRecipe>(&R))
1056
+ continue ;
1083
1057
1084
- // Move the last step to the end of the latch block. This ensures
1085
- // consistent placement of all induction updates.
1086
- Instruction *Inc = cast<Instruction>(Phi->getIncomingValue (1 ));
1087
- Inc->moveBefore (VectorLatchBB->getTerminator ()->getPrevNode ());
1058
+ if (isa<VPWidenPointerInductionRecipe>(&R) ||
1059
+ isa<VPWidenIntOrFpInductionRecipe>(&R)) {
1060
+ PHINode *Phi = nullptr ;
1061
+ if (isa<VPWidenIntOrFpInductionRecipe>(&R)) {
1062
+ Phi = cast<PHINode>(State->get (R.getVPSingleValue ()));
1063
+ } else {
1064
+ auto *WidenPhi = cast<VPWidenPointerInductionRecipe>(&R);
1065
+ assert (!WidenPhi->onlyScalarsGenerated (State->VF .isScalable ()) &&
1066
+ " recipe generating only scalars should have been replaced" );
1067
+ auto *GEP = cast<GetElementPtrInst>(State->get (WidenPhi));
1068
+ Phi = cast<PHINode>(GEP->getPointerOperand ());
1069
+ }
1070
+
1071
+ Phi->setIncomingBlock (1 , VectorLatchBB);
1072
+
1073
+ // Move the last step to the end of the latch block. This ensures
1074
+ // consistent placement of all induction updates.
1075
+ Instruction *Inc = cast<Instruction>(Phi->getIncomingValue (1 ));
1076
+ Inc->moveBefore (VectorLatchBB->getTerminator ()->getPrevNode ());
1077
+
1078
+ // Use the steps for the last part as backedge value for the induction.
1079
+ if (auto *IV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&R))
1080
+ Inc->setOperand (0 , State->get (IV->getLastUnrolledPartOperand ()));
1081
+ continue ;
1082
+ }
1088
1083
1089
- // Use the steps for the last part as backedge value for the induction.
1090
- if (auto *IV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&R))
1091
- Inc->setOperand (0 , State->get (IV->getLastUnrolledPartOperand ()));
1092
- continue ;
1084
+ // For canonical IV, first-order recurrences and in-order reduction phis,
1085
+ // only a single part is generated, which provides the last part from the
1086
+ // previous iteration. For non-ordered reductions all UF parts are
1087
+ // generated.
1088
+ auto *PhiR = cast<VPHeaderPHIRecipe>(&R);
1089
+ bool NeedsScalar =
1090
+ isa<VPCanonicalIVPHIRecipe, VPEVLBasedIVPHIRecipe>(PhiR) ||
1091
+ (isa<VPReductionPHIRecipe>(PhiR) &&
1092
+ cast<VPReductionPHIRecipe>(PhiR)->isInLoop ());
1093
+ Value *Phi = State->get (PhiR, NeedsScalar);
1094
+ Value *Val = State->get (PhiR->getBackedgeValue (), NeedsScalar);
1095
+ cast<PHINode>(Phi)->addIncoming (Val, VectorLatchBB);
1093
1096
}
1094
-
1095
- auto *PhiR = cast<VPHeaderPHIRecipe>(&R);
1096
- bool NeedsScalar =
1097
- isa<VPCanonicalIVPHIRecipe, VPEVLBasedIVPHIRecipe>(PhiR) ||
1098
- (isa<VPReductionPHIRecipe>(PhiR) &&
1099
- cast<VPReductionPHIRecipe>(PhiR)->isInLoop ());
1100
- Value *Phi = State->get (PhiR, NeedsScalar);
1101
- Value *Val = State->get (PhiR->getBackedgeValue (), NeedsScalar);
1102
- cast<PHINode>(Phi)->addIncoming (Val, VectorLatchBB);
1103
1097
}
1104
-
1105
1098
State->CFG .DTU .flush ();
1106
1099
assert (State->CFG .DTU .getDomTree ().verify (
1107
1100
DominatorTree::VerificationLevel::Fast) &&
0 commit comments