Skip to content

Commit dddcde5

Browse files
committed
[VPlan] Remove loop region in optimizeForVFAndUF.
1 parent aefe89b commit dddcde5

15 files changed

+339
-376
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 44 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -2956,6 +2956,9 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State,
29562956
for (const auto &KV : Plan.getLiveOuts())
29572957
KV.second->fixPhi(Plan, State);
29582958

2959+
if (!isa<VPRegionBlock>(State.Plan->getEntry()->getSingleSuccessor()))
2960+
return;
2961+
29592962
for (Instruction *PI : PredicatedInstructions)
29602963
sinkScalarOperands(&*PI);
29612964

@@ -7499,7 +7502,8 @@ LoopVectorizationPlanner::executePlan(
74997502
LLVM_DEBUG(BestVPlan.dump());
75007503

75017504
// Perform the actual loop transformation.
7502-
VPTransformState State(BestVF, BestUF, LI, DT, ILV.Builder, &ILV, &BestVPlan);
7505+
VPTransformState State(BestVF, BestUF, LI, DT, ILV.Builder, &ILV, &BestVPlan,
7506+
Legal->getWidestInductionType());
75037507

75047508
// 0. Generate SCEV-dependent code into the preheader, including TripCount,
75057509
// before making any changes to the CFG.
@@ -7561,14 +7565,15 @@ LoopVectorizationPlanner::executePlan(
75617565

75627566
// 2.5 Collect reduction resume values.
75637567
DenseMap<const RecurrenceDescriptor *, Value *> ReductionResumeValues;
7564-
auto *ExitVPBB =
7565-
cast<VPBasicBlock>(BestVPlan.getVectorLoopRegion()->getSingleSuccessor());
7566-
if (IsEpilogueVectorization)
7568+
if (IsEpilogueVectorization) {
7569+
auto *ExitVPBB = cast<VPBasicBlock>(
7570+
BestVPlan.getVectorLoopRegion()->getSingleSuccessor());
75677571
for (VPRecipeBase &R : *ExitVPBB) {
75687572
updateAndCollectMergePhiForReductionForEpilogueVectorization(
75697573
dyn_cast<VPInstruction>(&R), ReductionResumeValues, State, OrigLoop,
75707574
State.CFG.VPBB2IRBB[ExitVPBB], ExpandedSCEVs);
75717575
}
7576+
}
75727577

75737578
// 2.6. Maintain Loop Hints
75747579
// Keep all loop hints from the original loop on the vector loop (we'll
@@ -7579,24 +7584,26 @@ LoopVectorizationPlanner::executePlan(
75797584
makeFollowupLoopID(OrigLoopID, {LLVMLoopVectorizeFollowupAll,
75807585
LLVMLoopVectorizeFollowupVectorized});
75817586

7582-
VPBasicBlock *HeaderVPBB =
7583-
BestVPlan.getVectorLoopRegion()->getEntryBasicBlock();
7584-
Loop *L = LI->getLoopFor(State.CFG.VPBB2IRBB[HeaderVPBB]);
7585-
if (VectorizedLoopID)
7586-
L->setLoopID(*VectorizedLoopID);
7587-
else {
7588-
// Keep all loop hints from the original loop on the vector loop (we'll
7589-
// replace the vectorizer-specific hints below).
7590-
if (MDNode *LID = OrigLoop->getLoopID())
7591-
L->setLoopID(LID);
7592-
7593-
LoopVectorizeHints Hints(L, true, *ORE);
7594-
Hints.setAlreadyVectorized();
7587+
if (auto *R =
7588+
dyn_cast<VPRegionBlock>(BestVPlan.getEntry()->getSingleSuccessor())) {
7589+
VPBasicBlock *HeaderVPBB = R->getEntryBasicBlock();
7590+
Loop *L = LI->getLoopFor(State.CFG.VPBB2IRBB[HeaderVPBB]);
7591+
if (VectorizedLoopID)
7592+
L->setLoopID(*VectorizedLoopID);
7593+
else {
7594+
// Keep all loop hints from the original loop on the vector loop (we'll
7595+
// replace the vectorizer-specific hints below).
7596+
if (MDNode *LID = OrigLoop->getLoopID())
7597+
L->setLoopID(LID);
7598+
7599+
LoopVectorizeHints Hints(L, true, *ORE);
7600+
Hints.setAlreadyVectorized();
7601+
}
7602+
TargetTransformInfo::UnrollingPreferences UP;
7603+
TTI.getUnrollingPreferences(L, *PSE.getSE(), UP, ORE);
7604+
if (!UP.UnrollVectorizedLoop || CanonicalIVStartValue)
7605+
addRuntimeUnrollDisableMetaData(L);
75957606
}
7596-
TargetTransformInfo::UnrollingPreferences UP;
7597-
TTI.getUnrollingPreferences(L, *PSE.getSE(), UP, ORE);
7598-
if (!UP.UnrollVectorizedLoop || CanonicalIVStartValue)
7599-
addRuntimeUnrollDisableMetaData(L);
76007607

76017608
// 3. Fix the vectorized code: take care of header phi's, live-outs,
76027609
// predication, updating analyses.
@@ -7605,15 +7612,20 @@ LoopVectorizationPlanner::executePlan(
76057612
ILV.printDebugTracesAtEnd();
76067613

76077614
// 4. Adjust branch weight of the branch in the middle block.
7608-
auto *MiddleTerm =
7609-
cast<BranchInst>(State.CFG.VPBB2IRBB[ExitVPBB]->getTerminator());
7610-
if (MiddleTerm->isConditional() &&
7611-
hasBranchWeightMD(*OrigLoop->getLoopLatch()->getTerminator())) {
7612-
// Assume that `Count % VectorTripCount` is equally distributed.
7613-
unsigned TripCount = BestVPlan.getUF() * State.VF.getKnownMinValue();
7614-
assert(TripCount > 0 && "trip count should not be zero");
7615-
const uint32_t Weights[] = {1, TripCount - 1};
7616-
setBranchWeights(*MiddleTerm, Weights, /*IsExpected=*/false);
7615+
if (auto *R =
7616+
dyn_cast<VPRegionBlock>(BestVPlan.getEntry()->getSingleSuccessor())) {
7617+
auto *ExitVPBB = cast<VPBasicBlock>(R->getSingleSuccessor());
7618+
7619+
auto *MiddleTerm =
7620+
cast<BranchInst>(State.CFG.VPBB2IRBB[ExitVPBB]->getTerminator());
7621+
if (MiddleTerm->isConditional() &&
7622+
hasBranchWeightMD(*OrigLoop->getLoopLatch()->getTerminator())) {
7623+
// Assume that `Count % VectorTripCount` is equally distributed.
7624+
unsigned TripCount = BestVPlan.getUF() * State.VF.getKnownMinValue();
7625+
assert(TripCount > 0 && "trip count should not be zero");
7626+
const uint32_t Weights[] = {1, TripCount - 1};
7627+
setBranchWeights(*MiddleTerm, Weights, /*IsExpected=*/false);
7628+
}
76177629
}
76187630

76197631
return {State.ExpandedSCEVs, ReductionResumeValues};
@@ -9423,7 +9435,8 @@ void VPDerivedIVRecipe::execute(VPTransformState &State) {
94239435
State.Builder, CanonicalIV, getStartValue()->getLiveInIRValue(), Step,
94249436
Kind, cast_if_present<BinaryOperator>(FPBinOp));
94259437
DerivedIV->setName("offset.idx");
9426-
assert(DerivedIV != CanonicalIV && "IV didn't need transforming?");
9438+
assert((isa<Constant>(CanonicalIV) || DerivedIV != CanonicalIV) &&
9439+
"IV didn't need transforming?");
94279440

94289441
State.set(this, DerivedIV, VPLane(0));
94299442
}

llvm/lib/Transforms/Vectorize/VPlan.cpp

Lines changed: 68 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -224,9 +224,10 @@ VPBasicBlock::iterator VPBasicBlock::getFirstNonPhi() {
224224

225225
VPTransformState::VPTransformState(ElementCount VF, unsigned UF, LoopInfo *LI,
226226
DominatorTree *DT, IRBuilderBase &Builder,
227-
InnerLoopVectorizer *ILV, VPlan *Plan)
227+
InnerLoopVectorizer *ILV, VPlan *Plan,
228+
Type *CanonicalIVTy)
228229
: VF(VF), CFG(DT), LI(LI), Builder(Builder), ILV(ILV), Plan(Plan),
229-
LVer(nullptr), TypeAnalysis(Plan->getCanonicalIV()->getScalarType()) {}
230+
LVer(nullptr), TypeAnalysis(CanonicalIVTy) {}
230231

231232
Value *VPTransformState::get(VPValue *Def, const VPLane &Lane) {
232233
if (Def->isLiveIn())
@@ -275,8 +276,8 @@ Value *VPTransformState::get(VPValue *Def, bool NeedsScalar) {
275276
// Place the code for broadcasting invariant variables in the new preheader.
276277
IRBuilder<>::InsertPointGuard Guard(Builder);
277278
if (SafeToHoist) {
278-
BasicBlock *LoopVectorPreHeader = CFG.VPBB2IRBB[cast<VPBasicBlock>(
279-
Plan->getVectorLoopRegion()->getSinglePredecessor())];
279+
BasicBlock *LoopVectorPreHeader =
280+
CFG.VPBB2IRBB[cast<VPBasicBlock>(Plan->getEntry())];
280281
if (LoopVectorPreHeader)
281282
Builder.SetInsertPoint(LoopVectorPreHeader->getTerminator());
282283
}
@@ -417,6 +418,12 @@ VPBasicBlock::createEmptyBasicBlock(VPTransformState::CFGState &CFG) {
417418
PrevBB->getParent(), CFG.ExitBB);
418419
LLVM_DEBUG(dbgs() << "LV: created " << NewBB->getName() << '\n');
419420

421+
connectToPredecessors(NewBB, CFG);
422+
return NewBB;
423+
}
424+
425+
void VPBasicBlock::connectToPredecessors(BasicBlock *NewBB,
426+
VPTransformState::CFGState &CFG) {
420427
// Hook up the new basic block to its predecessors.
421428
for (VPBlockBase *PredVPBlock : getHierarchicalPredecessors()) {
422429
VPBasicBlock *PredVPBB = PredVPBlock->getExitingBasicBlock();
@@ -447,38 +454,14 @@ VPBasicBlock::createEmptyBasicBlock(VPTransformState::CFGState &CFG) {
447454
}
448455
CFG.DTU.applyUpdates({{DominatorTree::Insert, PredBB, NewBB}});
449456
}
450-
return NewBB;
451457
}
452-
453458
void VPIRBasicBlock::execute(VPTransformState *State) {
454459
assert(getHierarchicalSuccessors().size() <= 2 &&
455460
"VPIRBasicBlock can have at most two successors at the moment!");
456461
State->Builder.SetInsertPoint(getIRBasicBlock()->getTerminator());
457462
executeRecipes(State, getIRBasicBlock());
458-
if (getSingleSuccessor()) {
459-
assert(isa<UnreachableInst>(getIRBasicBlock()->getTerminator()));
460-
auto *Br = State->Builder.CreateBr(getIRBasicBlock());
461-
Br->setOperand(0, nullptr);
462-
getIRBasicBlock()->getTerminator()->eraseFromParent();
463-
}
464-
465-
for (VPBlockBase *PredVPBlock : getHierarchicalPredecessors()) {
466-
VPBasicBlock *PredVPBB = PredVPBlock->getExitingBasicBlock();
467-
BasicBlock *PredBB = State->CFG.VPBB2IRBB[PredVPBB];
468-
assert(PredBB && "Predecessor basic-block not found building successor.");
469-
LLVM_DEBUG(dbgs() << "LV: draw edge from" << PredBB->getName() << '\n');
470463

471-
auto *PredBBTerminator = PredBB->getTerminator();
472-
auto *TermBr = cast<BranchInst>(PredBBTerminator);
473-
// Set each forward successor here when it is created, excluding
474-
// backedges. A backward successor is set when the branch is created.
475-
const auto &PredVPSuccessors = PredVPBB->getHierarchicalSuccessors();
476-
unsigned idx = PredVPSuccessors.front() == this ? 0 : 1;
477-
assert(!TermBr->getSuccessor(idx) &&
478-
"Trying to reset an existing successor block.");
479-
TermBr->setSuccessor(idx, IRBB);
480-
State->CFG.DTU.applyUpdates({{DominatorTree::Insert, PredBB, IRBB}});
481-
}
464+
connectToPredecessors(getIRBasicBlock(), State->CFG);
482465
}
483466

484467
void VPBasicBlock::execute(VPTransformState *State) {
@@ -954,7 +937,6 @@ void VPlan::prepareToExecute(Value *TripCountV, Value *VectorTripCountV,
954937

955938
IRBuilder<> Builder(State.CFG.PrevBB->getTerminator());
956939
// FIXME: Model VF * UF computation completely in VPlan.
957-
assert(VFxUF.getNumUsers() && "VFxUF expected to always have users");
958940
unsigned UF = getUF();
959941
if (VF.getNumUsers()) {
960942
Value *RuntimeVF = getRuntimeVF(Builder, TCTy, State.VF);
@@ -1026,8 +1008,13 @@ void VPlan::execute(VPTransformState *State) {
10261008
// skeleton creation, so we can only create the VPIRBasicBlocks now during
10271009
// VPlan execution rather than earlier during VPlan construction.
10281010
BasicBlock *MiddleBB = State->CFG.ExitBB;
1029-
VPBasicBlock *MiddleVPBB =
1030-
cast<VPBasicBlock>(getVectorLoopRegion()->getSingleSuccessor());
1011+
VPBlockBase *Leaf = nullptr;
1012+
for (VPBlockBase *VPB : vp_depth_first_shallow(getEntry()))
1013+
if (VPB->getNumSuccessors() == 0) {
1014+
Leaf = VPB;
1015+
break;
1016+
}
1017+
VPBasicBlock *MiddleVPBB = cast<VPBasicBlock>(Leaf->getSinglePredecessor());
10311018
// Find the VPBB for the scalar preheader, relying on the current structure
10321019
// when creating the middle block and its successrs: if there's a single
10331020
// predecessor, it must be the scalar preheader. Otherwise, the second
@@ -1055,53 +1042,59 @@ void VPlan::execute(VPTransformState *State) {
10551042
for (VPBlockBase *Block : vp_depth_first_shallow(Entry))
10561043
Block->execute(State);
10571044

1058-
VPBasicBlock *LatchVPBB = getVectorLoopRegion()->getExitingBasicBlock();
1059-
BasicBlock *VectorLatchBB = State->CFG.VPBB2IRBB[LatchVPBB];
1060-
1061-
// Fix the latch value of canonical, reduction and first-order recurrences
1062-
// phis in the vector loop.
1063-
VPBasicBlock *Header = getVectorLoopRegion()->getEntryBasicBlock();
1064-
for (VPRecipeBase &R : Header->phis()) {
1065-
// Skip phi-like recipes that generate their backedege values themselves.
1066-
if (isa<VPWidenPHIRecipe>(&R))
1067-
continue;
1068-
1069-
if (isa<VPWidenPointerInductionRecipe>(&R) ||
1070-
isa<VPWidenIntOrFpInductionRecipe>(&R)) {
1071-
PHINode *Phi = nullptr;
1072-
if (isa<VPWidenIntOrFpInductionRecipe>(&R)) {
1073-
Phi = cast<PHINode>(State->get(R.getVPSingleValue()));
1074-
} else {
1075-
auto *WidenPhi = cast<VPWidenPointerInductionRecipe>(&R);
1076-
assert(!WidenPhi->onlyScalarsGenerated(State->VF.isScalable()) &&
1077-
"recipe generating only scalars should have been replaced");
1078-
auto *GEP = cast<GetElementPtrInst>(State->get(WidenPhi));
1079-
Phi = cast<PHINode>(GEP->getPointerOperand());
1080-
}
1081-
1082-
Phi->setIncomingBlock(1, VectorLatchBB);
1045+
if (auto *LoopRegion =
1046+
dyn_cast<VPRegionBlock>(getEntry()->getSingleSuccessor())) {
1047+
VPBasicBlock *LatchVPBB = LoopRegion->getExitingBasicBlock();
1048+
BasicBlock *VectorLatchBB = State->CFG.VPBB2IRBB[LatchVPBB];
1049+
1050+
// Fix the latch value of canonical, reduction and first-order recurrences
1051+
// phis in the vector loop.
1052+
VPBasicBlock *Header = LoopRegion->getEntryBasicBlock();
1053+
for (VPRecipeBase &R : Header->phis()) {
1054+
// Skip phi-like recipes that generate their backedege values themselves.
1055+
if (isa<VPWidenPHIRecipe>(&R))
1056+
continue;
10831057

1084-
// Move the last step to the end of the latch block. This ensures
1085-
// consistent placement of all induction updates.
1086-
Instruction *Inc = cast<Instruction>(Phi->getIncomingValue(1));
1087-
Inc->moveBefore(VectorLatchBB->getTerminator()->getPrevNode());
1058+
if (isa<VPWidenPointerInductionRecipe>(&R) ||
1059+
isa<VPWidenIntOrFpInductionRecipe>(&R)) {
1060+
PHINode *Phi = nullptr;
1061+
if (isa<VPWidenIntOrFpInductionRecipe>(&R)) {
1062+
Phi = cast<PHINode>(State->get(R.getVPSingleValue()));
1063+
} else {
1064+
auto *WidenPhi = cast<VPWidenPointerInductionRecipe>(&R);
1065+
assert(!WidenPhi->onlyScalarsGenerated(State->VF.isScalable()) &&
1066+
"recipe generating only scalars should have been replaced");
1067+
auto *GEP = cast<GetElementPtrInst>(State->get(WidenPhi));
1068+
Phi = cast<PHINode>(GEP->getPointerOperand());
1069+
}
1070+
1071+
Phi->setIncomingBlock(1, VectorLatchBB);
1072+
1073+
// Move the last step to the end of the latch block. This ensures
1074+
// consistent placement of all induction updates.
1075+
Instruction *Inc = cast<Instruction>(Phi->getIncomingValue(1));
1076+
Inc->moveBefore(VectorLatchBB->getTerminator()->getPrevNode());
1077+
1078+
// Use the steps for the last part as backedge value for the induction.
1079+
if (auto *IV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&R))
1080+
Inc->setOperand(0, State->get(IV->getLastUnrolledPartOperand()));
1081+
continue;
1082+
}
10881083

1089-
// Use the steps for the last part as backedge value for the induction.
1090-
if (auto *IV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&R))
1091-
Inc->setOperand(0, State->get(IV->getLastUnrolledPartOperand()));
1092-
continue;
1084+
// For canonical IV, first-order recurrences and in-order reduction phis,
1085+
// only a single part is generated, which provides the last part from the
1086+
// previous iteration. For non-ordered reductions all UF parts are
1087+
// generated.
1088+
auto *PhiR = cast<VPHeaderPHIRecipe>(&R);
1089+
bool NeedsScalar =
1090+
isa<VPCanonicalIVPHIRecipe, VPEVLBasedIVPHIRecipe>(PhiR) ||
1091+
(isa<VPReductionPHIRecipe>(PhiR) &&
1092+
cast<VPReductionPHIRecipe>(PhiR)->isInLoop());
1093+
Value *Phi = State->get(PhiR, NeedsScalar);
1094+
Value *Val = State->get(PhiR->getBackedgeValue(), NeedsScalar);
1095+
cast<PHINode>(Phi)->addIncoming(Val, VectorLatchBB);
10931096
}
1094-
1095-
auto *PhiR = cast<VPHeaderPHIRecipe>(&R);
1096-
bool NeedsScalar =
1097-
isa<VPCanonicalIVPHIRecipe, VPEVLBasedIVPHIRecipe>(PhiR) ||
1098-
(isa<VPReductionPHIRecipe>(PhiR) &&
1099-
cast<VPReductionPHIRecipe>(PhiR)->isInLoop());
1100-
Value *Phi = State->get(PhiR, NeedsScalar);
1101-
Value *Val = State->get(PhiR->getBackedgeValue(), NeedsScalar);
1102-
cast<PHINode>(Phi)->addIncoming(Val, VectorLatchBB);
11031097
}
1104-
11051098
State->CFG.DTU.flush();
11061099
assert(State->CFG.DTU.getDomTree().verify(
11071100
DominatorTree::VerificationLevel::Fast) &&

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -236,7 +236,7 @@ class VPLane {
236236
struct VPTransformState {
237237
VPTransformState(ElementCount VF, unsigned UF, LoopInfo *LI,
238238
DominatorTree *DT, IRBuilderBase &Builder,
239-
InnerLoopVectorizer *ILV, VPlan *Plan);
239+
InnerLoopVectorizer *ILV, VPlan *Plan, Type *CanonicalIVTy);
240240

241241
/// The chosen Vectorization Factor of the loop being vectorized.
242242
ElementCount VF;
@@ -3237,6 +3237,8 @@ class VPBasicBlock : public VPBlockBase {
32373237
protected:
32383238
/// Execute the recipes in the IR basic block \p BB.
32393239
void executeRecipes(VPTransformState *State, BasicBlock *BB);
3240+
void connectToPredecessors(BasicBlock *NewBB,
3241+
VPTransformState::CFGState &CFG);
32403242

32413243
private:
32423244
/// Create an IR BasicBlock to hold the output instructions generated by this
@@ -3354,6 +3356,7 @@ class VPRegionBlock : public VPBlockBase {
33543356
assert(!isReplicator() && "should only get pre-header of loop regions");
33553357
return getSinglePredecessor()->getExitingBasicBlock();
33563358
}
3359+
void clearEntry() { Entry = nullptr; }
33573360

33583361
/// An indicator whether this region is to generate multiple replicated
33593362
/// instances of output IR corresponding to its VPBlockBases.

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -199,8 +199,9 @@ bool VPRecipeBase::mayHaveSideEffects() const {
199199

200200
void VPLiveOut::fixPhi(VPlan &Plan, VPTransformState &State) {
201201
VPValue *ExitValue = getOperand(0);
202-
VPBasicBlock *MiddleVPBB =
203-
cast<VPBasicBlock>(Plan.getVectorLoopRegion()->getSingleSuccessor());
202+
auto *Region = dyn_cast<VPRegionBlock>(Plan.getEntry()->getSingleSuccessor());
203+
VPBasicBlock *MiddleVPBB = dyn_cast_or_null<VPBasicBlock>(
204+
Region ? Region->getSingleSuccessor() : nullptr);
204205
VPRecipeBase *ExitingRecipe = ExitValue->getDefiningRecipe();
205206
auto *ExitingVPBB = ExitingRecipe ? ExitingRecipe->getParent() : nullptr;
206207
// Values leaving the vector loop reach live out phi's in the exiting block
@@ -2066,7 +2067,9 @@ void VPBranchOnMaskRecipe::execute(VPTransformState &State) {
20662067
// Replace the temporary unreachable terminator with a new conditional branch,
20672068
// whose two destinations will be set later when they are created.
20682069
auto *CurrentTerminator = State.CFG.PrevBB->getTerminator();
2069-
assert(isa<UnreachableInst>(CurrentTerminator) &&
2070+
assert((isa<UnreachableInst>(CurrentTerminator) ||
2071+
(isa<BranchInst>(CurrentTerminator) &&
2072+
!CurrentTerminator->getOperand(0))) &&
20702073
"Expected to replace unreachable terminator with conditional branch.");
20712074
auto *CondBr = BranchInst::Create(State.CFG.PrevBB, nullptr, ConditionBit);
20722075
CondBr->setSuccessor(0, nullptr);

0 commit comments

Comments
 (0)