-
Notifications
You must be signed in to change notification settings - Fork 12.5k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[VPlan] Hook IR blocks into VPlan during skeleton creation (NFC) #114292
Conversation
@llvm/pr-subscribers-vectorizers Author: Florian Hahn (fhahn) ChangesAs a first step to move towards modeling the full skeleton in VPlan, start by wrapping IR blocks created during legacy skeleton creation in VPIRBasicBlocks and hook them into the VPlan. This means the skeleton CFG is represented in VPlan, just before execute. This allows moving parts of skeleton creation into recipes in the VPBBs gradually. Note that this allows retiring some manual DT updates, as this will be handled automatically during VPlan execution. Patch is 30.93 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/114292.diff 11 Files Affected:
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 3d638e52328b57..02c736642de523 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -2426,6 +2426,26 @@ InnerLoopVectorizer::getOrCreateVectorTripCount(BasicBlock *InsertBlock) {
return VectorTripCount;
}
+static void connectScalarPreheaderInVPlan(VPlan &Plan) {
+ VPBlockBase *VectorPH = Plan.getVectorPreheader();
+ VPBlockBase *ScalarPH = Plan.getScalarPreheader();
+ VPBlockBase *PredVPB = VectorPH->getSinglePredecessor();
+ VPBlockUtils::disconnectBlocks(Plan.getEntry(), VectorPH);
+ VPBlockUtils::connectBlocks(PredVPB, ScalarPH);
+ VPBlockUtils::connectBlocks(PredVPB, VectorPH);
+}
+
+static void connectCheckBlockInVPlan(VPlan &Plan, BasicBlock *CheckIRBB) {
+ VPBlockBase *ScalarPH = Plan.getScalarPreheader();
+ VPBlockBase *VectorPH = Plan.getVectorPreheader();
+ VPBlockBase *PredVPB = VectorPH->getSinglePredecessor();
+ VPBlockUtils::disconnectBlocks(PredVPB, VectorPH);
+ VPIRBasicBlock *CheckVPIRBB = VPIRBasicBlock::fromBasicBlock(CheckIRBB);
+ VPBlockUtils::connectBlocks(PredVPB, CheckVPIRBB);
+ VPBlockUtils::connectBlocks(CheckVPIRBB, ScalarPH);
+ VPBlockUtils::connectBlocks(CheckVPIRBB, VectorPH);
+}
+
void InnerLoopVectorizer::emitIterationCountCheck(BasicBlock *Bypass) {
Value *Count = getTripCount();
// Reuse existing vector loop preheader for TC checks.
@@ -2511,13 +2531,14 @@ void InnerLoopVectorizer::emitIterationCountCheck(BasicBlock *Bypass) {
"TC check is expected to dominate Bypass");
// Update dominator for Bypass & LoopExit (if needed).
- DT->changeImmediateDominator(Bypass, TCCheckBlock);
BranchInst &BI =
*BranchInst::Create(Bypass, LoopVectorPreHeader, CheckMinIters);
if (hasBranchWeightMD(*OrigLoop->getLoopLatch()->getTerminator()))
setBranchWeights(BI, MinItersBypassWeights, /*IsExpected=*/false);
ReplaceInstWithInst(TCCheckBlock->getTerminator(), &BI);
LoopBypassBlocks.push_back(TCCheckBlock);
+
+ connectScalarPreheaderInVPlan(Plan);
}
BasicBlock *InnerLoopVectorizer::emitSCEVChecks(BasicBlock *Bypass) {
@@ -2534,6 +2555,8 @@ BasicBlock *InnerLoopVectorizer::emitSCEVChecks(BasicBlock *Bypass) {
"Should already be a bypass block due to iteration count check");
LoopBypassBlocks.push_back(SCEVCheckBlock);
AddedSafetyChecks = true;
+
+ connectCheckBlockInVPlan(Plan, SCEVCheckBlock);
return SCEVCheckBlock;
}
@@ -2570,6 +2593,7 @@ BasicBlock *InnerLoopVectorizer::emitMemRuntimeChecks(BasicBlock *Bypass) {
AddedSafetyChecks = true;
+ connectCheckBlockInVPlan(Plan, MemCheckBlock);
return MemCheckBlock;
}
@@ -7649,10 +7673,10 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
// 0. Generate SCEV-dependent code into the preheader, including TripCount,
// before making any changes to the CFG.
- if (!BestVPlan.getPreheader()->empty()) {
+ if (!BestVPlan.getEntry()->empty()) {
State.CFG.PrevBB = OrigLoop->getLoopPreheader();
State.Builder.SetInsertPoint(OrigLoop->getLoopPreheader()->getTerminator());
- BestVPlan.getPreheader()->execute(&State);
+ BestVPlan.getEntry()->execute(&State);
}
if (!ILV.getTripCount())
ILV.setTripCount(State.get(BestVPlan.getTripCount(), VPLane(0)));
@@ -7861,8 +7885,6 @@ EpilogueVectorizerMainLoop::emitIterationCountCheck(BasicBlock *Bypass,
DT->getNode(Bypass)->getIDom()) &&
"TC check is expected to dominate Bypass");
- // Update dominator for Bypass.
- DT->changeImmediateDominator(Bypass, TCCheckBlock);
LoopBypassBlocks.push_back(TCCheckBlock);
// Save the trip count so we don't have to regenerate it in the
@@ -7877,6 +7899,12 @@ EpilogueVectorizerMainLoop::emitIterationCountCheck(BasicBlock *Bypass,
setBranchWeights(BI, MinItersBypassWeights, /*IsExpected=*/false);
ReplaceInstWithInst(TCCheckBlock->getTerminator(), &BI);
+ VPBlockBase *VectorPH = Plan.getVectorPreheader();
+ VPBlockBase *PredVPB = VectorPH->getSinglePredecessor();
+ if (PredVPB->getNumSuccessors() == 1)
+ connectScalarPreheaderInVPlan(Plan);
+ else
+ connectCheckBlockInVPlan(Plan, TCCheckBlock);
return TCCheckBlock;
}
@@ -7907,32 +7935,19 @@ EpilogueVectorizerEpilogueLoop::createEpilogueVectorizedLoopSkeleton(
EPI.MainLoopIterationCountCheck->getTerminator()->replaceUsesOfWith(
VecEpilogueIterationCountCheck, LoopVectorPreHeader);
- DT->changeImmediateDominator(LoopVectorPreHeader,
- EPI.MainLoopIterationCountCheck);
-
EPI.EpilogueIterationCountCheck->getTerminator()->replaceUsesOfWith(
VecEpilogueIterationCountCheck, LoopScalarPreHeader);
if (EPI.SCEVSafetyCheck)
EPI.SCEVSafetyCheck->getTerminator()->replaceUsesOfWith(
VecEpilogueIterationCountCheck, LoopScalarPreHeader);
- if (EPI.MemSafetyCheck)
+ if (EPI.MemSafetyCheck) {
EPI.MemSafetyCheck->getTerminator()->replaceUsesOfWith(
VecEpilogueIterationCountCheck, LoopScalarPreHeader);
-
- DT->changeImmediateDominator(
- VecEpilogueIterationCountCheck,
- VecEpilogueIterationCountCheck->getSinglePredecessor());
+ }
DT->changeImmediateDominator(LoopScalarPreHeader,
EPI.EpilogueIterationCountCheck);
- if (!Cost->requiresScalarEpilogue(EPI.EpilogueVF.isVector()))
- // If there is an epilogue which must run, there's no edge from the
- // middle block to exit blocks and thus no need to update the immediate
- // dominator of the exit blocks.
- DT->changeImmediateDominator(LoopExitBlock,
- EPI.EpilogueIterationCountCheck);
-
// Keep track of bypass blocks, as they feed start values to the induction and
// reduction phis in the scalar loop preheader.
if (EPI.SCEVSafetyCheck)
@@ -8035,6 +8050,20 @@ EpilogueVectorizerEpilogueLoop::emitMinimumVectorEpilogueIterCountCheck(
}
ReplaceInstWithInst(Insert->getTerminator(), &BI);
LoopBypassBlocks.push_back(Insert);
+
+ // A new entry block has been created for the epilogue VPlan. Hook it in.
+ VPIRBasicBlock *NewEntry = VPIRBasicBlock::fromBasicBlock(Insert);
+ VPBasicBlock *OldEntry = Plan.getEntry();
+ VPBlockUtils::reassociateBlocks(OldEntry, NewEntry);
+ Plan.setEntry(NewEntry);
+ for (auto &R : make_early_inc_range(*NewEntry)) {
+ auto *VPIR = dyn_cast<VPIRInstruction>(&R);
+ if (!VPIR || !isa<PHINode>(VPIR->getInstruction()))
+ break;
+ VPIR->eraseFromParent();
+ }
+
+ connectScalarPreheaderInVPlan(Plan);
return Insert;
}
@@ -10270,7 +10299,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
// should be removed once induction resume value creation is done
// directly in VPlan.
EpilogILV.setTripCount(MainILV.getTripCount());
- for (auto &R : make_early_inc_range(*BestEpiPlan.getPreheader())) {
+ for (auto &R : make_early_inc_range(*BestEpiPlan.getEntry())) {
auto *ExpandR = dyn_cast<VPExpandSCEVRecipe>(&R);
if (!ExpandR)
continue;
@@ -10330,8 +10359,6 @@ bool LoopVectorizePass::processLoop(Loop *L) {
cast<VPHeaderPHIRecipe>(&R)->setStartValue(StartVal);
}
- assert(DT->verify(DominatorTree::VerificationLevel::Fast) &&
- "DT not preserved correctly");
LVP.executePlan(EPI.EpilogueVF, EPI.EpilogueUF, BestEpiPlan, EpilogILV,
DT, true, &ExpandedSCEVs);
++LoopsEpilogueVectorized;
@@ -10359,6 +10386,9 @@ bool LoopVectorizePass::processLoop(Loop *L) {
checkMixedPrecision(L, ORE);
}
+ assert(DT->verify(DominatorTree::VerificationLevel::Fast) &&
+ "DT not preserved correctly");
+
std::optional<MDNode *> RemainderLoopID =
makeFollowupLoopID(OrigLoopID, {LLVMLoopVectorizeFollowupAll,
LLVMLoopVectorizeFollowupEpilogue});
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index 0484543d2d0398..d93296f2e7aa84 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -173,9 +173,8 @@ VPBasicBlock *VPBlockBase::getEntryBasicBlock() {
}
void VPBlockBase::setPlan(VPlan *ParentPlan) {
- assert(
- (ParentPlan->getEntry() == this || ParentPlan->getPreheader() == this) &&
- "Can only set plan on its entry or preheader block.");
+ assert(ParentPlan->getEntry() == this &&
+ "Can only set plan on its entry or preheader block.");
Plan = ParentPlan;
}
@@ -455,13 +454,11 @@ void VPIRBasicBlock::execute(VPTransformState *State) {
"VPIRBasicBlock can have at most two successors at the moment!");
State->Builder.SetInsertPoint(IRBB->getTerminator());
executeRecipes(State, IRBB);
- if (getSingleSuccessor()) {
- assert(isa<UnreachableInst>(IRBB->getTerminator()));
+ if (getSingleSuccessor() && isa<UnreachableInst>(IRBB->getTerminator())) {
auto *Br = State->Builder.CreateBr(IRBB);
Br->setOperand(0, nullptr);
IRBB->getTerminator()->eraseFromParent();
}
-
for (VPBlockBase *PredVPBlock : getHierarchicalPredecessors()) {
VPBasicBlock *PredVPBB = PredVPBlock->getExitingBasicBlock();
BasicBlock *PredBB = State->CFG.VPBB2IRBB[PredVPBB];
@@ -474,7 +471,7 @@ void VPIRBasicBlock::execute(VPTransformState *State) {
// backedges. A backward successor is set when the branch is created.
const auto &PredVPSuccessors = PredVPBB->getHierarchicalSuccessors();
unsigned idx = PredVPSuccessors.front() == this ? 0 : 1;
- assert(!TermBr->getSuccessor(idx) &&
+ assert((!TermBr->getSuccessor(idx) || TermBr->getSuccessor(idx) == IRBB) &&
"Trying to reset an existing successor block.");
TermBr->setSuccessor(idx, IRBB);
State->CFG.DTU.applyUpdates({{DominatorTree::Insert, PredBB, IRBB}});
@@ -853,9 +850,6 @@ VPlan::~VPlan() {
Block->dropAllReferences(&DummyValue);
VPBlockBase::deleteCFG(Entry);
-
- Preheader->dropAllReferences(&DummyValue);
- delete Preheader;
}
for (VPValue *VPV : VPLiveInsToFree)
delete VPV;
@@ -878,7 +872,8 @@ VPlanPtr VPlan::createInitialVPlan(Type *InductionTy,
VPIRBasicBlock *Entry =
VPIRBasicBlock::fromBasicBlock(TheLoop->getLoopPreheader());
VPBasicBlock *VecPreheader = new VPBasicBlock("vector.ph");
- auto Plan = std::make_unique<VPlan>(Entry, VecPreheader);
+ VPBlockUtils::connectBlocks(Entry, VecPreheader);
+ auto Plan = std::make_unique<VPlan>(Entry);
// Create SCEV and VPValue for the trip count.
@@ -1020,8 +1015,9 @@ void VPlan::execute(VPTransformState *State) {
BasicBlock *VectorPreHeader = State->CFG.PrevBB;
State->Builder.SetInsertPoint(VectorPreHeader->getTerminator());
- // Disconnect VectorPreHeader from ExitBB in both the CFG and DT.
- cast<BranchInst>(VectorPreHeader->getTerminator())->setSuccessor(0, nullptr);
+ replaceVPBBWithIRVPBB(
+ cast<VPBasicBlock>(getVectorLoopRegion()->getSinglePredecessor()),
+ VectorPreHeader);
State->CFG.DTU.applyUpdates(
{{DominatorTree::Delete, VectorPreHeader, State->CFG.ExitBB}});
@@ -1055,8 +1051,10 @@ void VPlan::execute(VPTransformState *State) {
MiddleBB->getTerminator()->eraseFromParent();
State->CFG.DTU.applyUpdates({{DominatorTree::Delete, MiddleBB, ScalarPh}});
+ ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<VPBlockBase *>> RPOT(
+ Entry);
// Generate code in the loop pre-header and body.
- for (VPBlockBase *Block : vp_depth_first_shallow(Entry))
+ for (VPBlockBase *Block : make_range(RPOT.begin(), RPOT.end()))
Block->execute(State);
VPBasicBlock *LatchVPBB = getVectorLoopRegion()->getExitingBasicBlock();
@@ -1107,9 +1105,6 @@ void VPlan::execute(VPTransformState *State) {
}
State->CFG.DTU.flush();
- assert(State->CFG.DTU.getDomTree().verify(
- DominatorTree::VerificationLevel::Fast) &&
- "DT not preserved correctly");
}
InstructionCost VPlan::cost(ElementCount VF, VPCostContext &Ctx) {
@@ -1162,12 +1157,10 @@ void VPlan::print(raw_ostream &O) const {
printLiveIns(O);
- if (!getPreheader()->empty()) {
- O << "\n";
- getPreheader()->print(O, "", SlotTracker);
- }
+ ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<const VPBlockBase *>>
+ RPOT(getEntry());
- for (const VPBlockBase *Block : vp_depth_first_shallow(getEntry())) {
+ for (const VPBlockBase *Block : RPOT) {
O << '\n';
Block->print(O, "", SlotTracker);
}
@@ -1204,6 +1197,20 @@ std::string VPlan::getName() const {
return Out;
}
+VPRegionBlock *VPlan::getVectorLoopRegion() {
+ for (VPBlockBase *B : vp_depth_first_shallow(getEntry()))
+ if (auto *R = dyn_cast<VPRegionBlock>(B))
+ return R;
+ return nullptr;
+}
+
+const VPRegionBlock *VPlan::getVectorLoopRegion() const {
+ for (const VPBlockBase *B : vp_depth_first_shallow(getEntry()))
+ if (auto *R = dyn_cast<VPRegionBlock>(B))
+ return R;
+ return nullptr;
+}
+
LLVM_DUMP_METHOD
void VPlan::printDOT(raw_ostream &O) const {
VPlanPrinter Printer(O, *this);
@@ -1259,11 +1266,10 @@ static void remapOperands(VPBlockBase *Entry, VPBlockBase *NewEntry,
VPlan *VPlan::duplicate() {
// Clone blocks.
- VPBasicBlock *NewPreheader = Preheader->clone();
const auto &[NewEntry, __] = cloneFrom(Entry);
// Create VPlan, clone live-ins and remap operands in the cloned blocks.
- auto *NewPlan = new VPlan(NewPreheader, cast<VPBasicBlock>(NewEntry));
+ auto *NewPlan = new VPlan(cast<VPBasicBlock>(NewEntry));
DenseMap<VPValue *, VPValue *> Old2NewVPValues;
for (VPValue *OldLiveIn : VPLiveInsToFree) {
Old2NewVPValues[OldLiveIn] =
@@ -1283,7 +1289,6 @@ VPlan *VPlan::duplicate() {
// else NewTripCount will be created and inserted into Old2NewVPValues when
// TripCount is cloned. In any case NewPlan->TripCount is updated below.
- remapOperands(Preheader, NewPreheader, Old2NewVPValues);
remapOperands(Entry, NewEntry, Old2NewVPValues);
// Clone live-outs.
@@ -1301,6 +1306,19 @@ VPlan *VPlan::duplicate() {
return NewPlan;
}
+VPBasicBlock *VPlan::getScalarPreheader() {
+ auto *MiddleVPBB =
+ cast<VPBasicBlock>(getVectorLoopRegion()->getSingleSuccessor());
+ if (MiddleVPBB->getNumSuccessors() == 2) {
+ // Order is strict: first is the exit block, second is the scalar preheader.
+ return cast<VPBasicBlock>(MiddleVPBB->getSuccessors()[1]);
+ }
+ if (auto *IRVPBB = dyn_cast<VPBasicBlock>(MiddleVPBB->getSingleSuccessor()))
+ return IRVPBB;
+
+ return nullptr;
+}
+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
Twine VPlanPrinter::getUID(const VPBlockBase *Block) {
@@ -1339,8 +1357,6 @@ void VPlanPrinter::dump() {
OS << "edge [fontname=Courier, fontsize=30]\n";
OS << "compound=true\n";
- dumpBlock(Plan.getPreheader());
-
for (const VPBlockBase *Block : vp_depth_first_shallow(Plan.getEntry()))
dumpBlock(Block);
@@ -1601,7 +1617,6 @@ void VPSlotTracker::assignNames(const VPlan &Plan) {
assignName(Plan.BackedgeTakenCount);
for (VPValue *LI : Plan.VPLiveInsToFree)
assignName(LI);
- assignNames(Plan.getPreheader());
ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<const VPBlockBase *>>
RPOT(VPBlockDeepTraversalWrapper<const VPBlockBase *>(Plan.getEntry()));
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 0e0c64f6df9cba..f0193fdbed2bb2 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -3671,11 +3671,6 @@ class VPlan {
/// preheader of the vector loop.
VPBasicBlock *Entry;
- /// VPBasicBlock corresponding to the original preheader. Used to place
- /// VPExpandSCEV recipes for expressions used during skeleton creation and the
- /// rest of VPlan execution.
- VPBasicBlock *Preheader;
-
/// Holds the VFs applicable to this VPlan.
SmallSetVector<ElementCount, 2> VFs;
@@ -3722,30 +3717,19 @@ class VPlan {
DenseMap<const SCEV *, VPValue *> SCEVToExpansion;
public:
- /// Construct a VPlan with original preheader \p Preheader, trip count \p TC
- /// and \p Entry to the plan. At the moment, \p Preheader and \p Entry need to
- /// be disconnected, as the bypass blocks between them are not yet modeled in
- /// VPlan.
- VPlan(VPBasicBlock *Preheader, VPValue *TC, VPBasicBlock *Entry)
- : VPlan(Preheader, Entry) {
- TripCount = TC;
- }
+ /// Construct a VPlan with \p Entry entering the plan and trip count \p TC.
+ VPlan(VPBasicBlock *Entry, VPValue *TC) : VPlan(Entry) { TripCount = TC; }
- /// Construct a VPlan with original preheader \p Preheader and \p Entry to
- /// the plan. At the moment, \p Preheader and \p Entry need to be
- /// disconnected, as the bypass blocks between them are not yet modeled in
- /// VPlan.
- VPlan(VPBasicBlock *Preheader, VPBasicBlock *Entry)
- : Entry(Entry), Preheader(Preheader) {
- Entry->setPlan(this);
- Preheader->setPlan(this);
- assert(Preheader->getNumSuccessors() == 0 &&
- Preheader->getNumPredecessors() == 0 &&
- "preheader must be disconnected");
- }
+ /// Construct a VPlan with \p Entry to the plan.
+ VPlan(VPBasicBlock *Entry) : Entry(Entry) { Entry->setPlan(this); }
~VPlan();
+ void setEntry(VPBasicBlock *VPBB) {
+ Entry = VPBB;
+ VPBB->setPlan(this);
+ }
+
/// Create initial VPlan, having an "entry" VPBasicBlock (wrapping
/// original scalar pre-header ) which contains SCEV expansions that need
/// to happen before the CFG is modified; a VPBasicBlock for the vector
@@ -3878,12 +3862,9 @@ class VPlan {
#endif
/// Returns the VPRegionBlock of the vector loop.
- VPRegionBlock *getVectorLoopRegion() {
- return cast<VPRegionBlock>(getEntry()->getSingleSuccessor());
- }
- const VPRegionBlock *getVectorLoopRegion() const {
- return cast<VPRegionBlock>(getEntry()->getSingleSuccessor());
- }
+ VPRegionBlock *getVectorLoopRegion();
+
+ const VPRegionBlock *getVectorLoopRegion() const;
/// Returns the preheader of the vector loop region.
VPBasicBlock *getVectorPreheader() {
@@ -3915,13 +3896,11 @@ class VPlan {
SCEVToExpansion[S] = V;
}
- /// \return The block corresponding to the original preheader.
- VPBasicBlock *getPreheader() { return Preheader; }
- const VPBasicBlock *getPreheader() const { return Preheader; }
-
/// Clone the current VPlan, update all VPValues of the new VPlan and cloned
/// recipes to refer to the clones, and return it.
VPlan *duplicate();
+
+ VPBasicBlock *getScalarPreheader();
};
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index de7023167df899..3921c6e24ff413 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -669,11 +669,11 @@ Value *VPInstruction::generate(VPTransformState &State) {
Builder.CreatePHI(IncomingFromOtherPreds->getType(), 2, Name);
BasicBlock *VPlanPred =
State.CFG
- .VPBB2IRBB[cast<VPBasicBlock>(getParent()->getSinglePredecessor())];
+ .VPBB2IRBB[cast<VPBasicBlock>(getParent()->getPredecessors()[0])];
NewPhi->addIncoming(IncomingFromVPlanPred, VPlanPred);
for (auto *OtherPred : predecessors(Builder.GetInsertBlock())) {
- assert(OtherPred != VPlanPred &&
- "VPlan predecessors should not be connected yet");
+ if (OtherPred == VPlanPred)
+ continue;
NewPhi->addIncoming(IncomingFromOtherPreds, OtherPred);
}
return NewPhi;
@@ -3263,13 +3263,17 @@ void VPWidenPointerInductionRecipe::print(raw_ostream &O, const Twine &Indent,
void VPExpandSCEVRecipe::execute(VPTransformState &State) {
assert(!State.Lane && "cannot be used in per-lane");
+ if (State.ExpandedSCEVs.contains(Expr)) {
+ State.Builder.SetInsertPoint(State.CFG.VPBB2IRBB[getParent()]);
+ return;
+ }
+
const DataLayout &DL = State.CFG.PrevBB->getDataLayout();
SCEVExpander Exp(SE, DL, "induction");
-
Value *Res = Exp.expandCodeFor(Expr, Expr->getType(),
&*State.Builder.GetInsertPoint());
- assert(!State.ExpandedSCEVs.contains(Expr) &&
- "Same SCEV expa...
[truncated]
|
@llvm/pr-subscribers-llvm-transforms Author: Florian Hahn (fhahn) ChangesAs a first step to move towards modeling the full skeleton in VPlan, start by wrapping IR blocks created during legacy skeleton creation in VPIRBasicBlocks and hook them into the VPlan. This means the skeleton CFG is represented in VPlan, just before execute. This allows moving parts of skeleton creation into recipes in the VPBBs gradually. Note that this allows retiring some manual DT updates, as this will be handled automatically during VPlan execution. Patch is 30.93 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/114292.diff 11 Files Affected:
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 3d638e52328b57..02c736642de523 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -2426,6 +2426,26 @@ InnerLoopVectorizer::getOrCreateVectorTripCount(BasicBlock *InsertBlock) {
return VectorTripCount;
}
+static void connectScalarPreheaderInVPlan(VPlan &Plan) {
+ VPBlockBase *VectorPH = Plan.getVectorPreheader();
+ VPBlockBase *ScalarPH = Plan.getScalarPreheader();
+ VPBlockBase *PredVPB = VectorPH->getSinglePredecessor();
+ VPBlockUtils::disconnectBlocks(Plan.getEntry(), VectorPH);
+ VPBlockUtils::connectBlocks(PredVPB, ScalarPH);
+ VPBlockUtils::connectBlocks(PredVPB, VectorPH);
+}
+
+static void connectCheckBlockInVPlan(VPlan &Plan, BasicBlock *CheckIRBB) {
+ VPBlockBase *ScalarPH = Plan.getScalarPreheader();
+ VPBlockBase *VectorPH = Plan.getVectorPreheader();
+ VPBlockBase *PredVPB = VectorPH->getSinglePredecessor();
+ VPBlockUtils::disconnectBlocks(PredVPB, VectorPH);
+ VPIRBasicBlock *CheckVPIRBB = VPIRBasicBlock::fromBasicBlock(CheckIRBB);
+ VPBlockUtils::connectBlocks(PredVPB, CheckVPIRBB);
+ VPBlockUtils::connectBlocks(CheckVPIRBB, ScalarPH);
+ VPBlockUtils::connectBlocks(CheckVPIRBB, VectorPH);
+}
+
void InnerLoopVectorizer::emitIterationCountCheck(BasicBlock *Bypass) {
Value *Count = getTripCount();
// Reuse existing vector loop preheader for TC checks.
@@ -2511,13 +2531,14 @@ void InnerLoopVectorizer::emitIterationCountCheck(BasicBlock *Bypass) {
"TC check is expected to dominate Bypass");
// Update dominator for Bypass & LoopExit (if needed).
- DT->changeImmediateDominator(Bypass, TCCheckBlock);
BranchInst &BI =
*BranchInst::Create(Bypass, LoopVectorPreHeader, CheckMinIters);
if (hasBranchWeightMD(*OrigLoop->getLoopLatch()->getTerminator()))
setBranchWeights(BI, MinItersBypassWeights, /*IsExpected=*/false);
ReplaceInstWithInst(TCCheckBlock->getTerminator(), &BI);
LoopBypassBlocks.push_back(TCCheckBlock);
+
+ connectScalarPreheaderInVPlan(Plan);
}
BasicBlock *InnerLoopVectorizer::emitSCEVChecks(BasicBlock *Bypass) {
@@ -2534,6 +2555,8 @@ BasicBlock *InnerLoopVectorizer::emitSCEVChecks(BasicBlock *Bypass) {
"Should already be a bypass block due to iteration count check");
LoopBypassBlocks.push_back(SCEVCheckBlock);
AddedSafetyChecks = true;
+
+ connectCheckBlockInVPlan(Plan, SCEVCheckBlock);
return SCEVCheckBlock;
}
@@ -2570,6 +2593,7 @@ BasicBlock *InnerLoopVectorizer::emitMemRuntimeChecks(BasicBlock *Bypass) {
AddedSafetyChecks = true;
+ connectCheckBlockInVPlan(Plan, MemCheckBlock);
return MemCheckBlock;
}
@@ -7649,10 +7673,10 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
// 0. Generate SCEV-dependent code into the preheader, including TripCount,
// before making any changes to the CFG.
- if (!BestVPlan.getPreheader()->empty()) {
+ if (!BestVPlan.getEntry()->empty()) {
State.CFG.PrevBB = OrigLoop->getLoopPreheader();
State.Builder.SetInsertPoint(OrigLoop->getLoopPreheader()->getTerminator());
- BestVPlan.getPreheader()->execute(&State);
+ BestVPlan.getEntry()->execute(&State);
}
if (!ILV.getTripCount())
ILV.setTripCount(State.get(BestVPlan.getTripCount(), VPLane(0)));
@@ -7861,8 +7885,6 @@ EpilogueVectorizerMainLoop::emitIterationCountCheck(BasicBlock *Bypass,
DT->getNode(Bypass)->getIDom()) &&
"TC check is expected to dominate Bypass");
- // Update dominator for Bypass.
- DT->changeImmediateDominator(Bypass, TCCheckBlock);
LoopBypassBlocks.push_back(TCCheckBlock);
// Save the trip count so we don't have to regenerate it in the
@@ -7877,6 +7899,12 @@ EpilogueVectorizerMainLoop::emitIterationCountCheck(BasicBlock *Bypass,
setBranchWeights(BI, MinItersBypassWeights, /*IsExpected=*/false);
ReplaceInstWithInst(TCCheckBlock->getTerminator(), &BI);
+ VPBlockBase *VectorPH = Plan.getVectorPreheader();
+ VPBlockBase *PredVPB = VectorPH->getSinglePredecessor();
+ if (PredVPB->getNumSuccessors() == 1)
+ connectScalarPreheaderInVPlan(Plan);
+ else
+ connectCheckBlockInVPlan(Plan, TCCheckBlock);
return TCCheckBlock;
}
@@ -7907,32 +7935,19 @@ EpilogueVectorizerEpilogueLoop::createEpilogueVectorizedLoopSkeleton(
EPI.MainLoopIterationCountCheck->getTerminator()->replaceUsesOfWith(
VecEpilogueIterationCountCheck, LoopVectorPreHeader);
- DT->changeImmediateDominator(LoopVectorPreHeader,
- EPI.MainLoopIterationCountCheck);
-
EPI.EpilogueIterationCountCheck->getTerminator()->replaceUsesOfWith(
VecEpilogueIterationCountCheck, LoopScalarPreHeader);
if (EPI.SCEVSafetyCheck)
EPI.SCEVSafetyCheck->getTerminator()->replaceUsesOfWith(
VecEpilogueIterationCountCheck, LoopScalarPreHeader);
- if (EPI.MemSafetyCheck)
+ if (EPI.MemSafetyCheck) {
EPI.MemSafetyCheck->getTerminator()->replaceUsesOfWith(
VecEpilogueIterationCountCheck, LoopScalarPreHeader);
-
- DT->changeImmediateDominator(
- VecEpilogueIterationCountCheck,
- VecEpilogueIterationCountCheck->getSinglePredecessor());
+ }
DT->changeImmediateDominator(LoopScalarPreHeader,
EPI.EpilogueIterationCountCheck);
- if (!Cost->requiresScalarEpilogue(EPI.EpilogueVF.isVector()))
- // If there is an epilogue which must run, there's no edge from the
- // middle block to exit blocks and thus no need to update the immediate
- // dominator of the exit blocks.
- DT->changeImmediateDominator(LoopExitBlock,
- EPI.EpilogueIterationCountCheck);
-
// Keep track of bypass blocks, as they feed start values to the induction and
// reduction phis in the scalar loop preheader.
if (EPI.SCEVSafetyCheck)
@@ -8035,6 +8050,20 @@ EpilogueVectorizerEpilogueLoop::emitMinimumVectorEpilogueIterCountCheck(
}
ReplaceInstWithInst(Insert->getTerminator(), &BI);
LoopBypassBlocks.push_back(Insert);
+
+ // A new entry block has been created for the epilogue VPlan. Hook it in.
+ VPIRBasicBlock *NewEntry = VPIRBasicBlock::fromBasicBlock(Insert);
+ VPBasicBlock *OldEntry = Plan.getEntry();
+ VPBlockUtils::reassociateBlocks(OldEntry, NewEntry);
+ Plan.setEntry(NewEntry);
+ for (auto &R : make_early_inc_range(*NewEntry)) {
+ auto *VPIR = dyn_cast<VPIRInstruction>(&R);
+ if (!VPIR || !isa<PHINode>(VPIR->getInstruction()))
+ break;
+ VPIR->eraseFromParent();
+ }
+
+ connectScalarPreheaderInVPlan(Plan);
return Insert;
}
@@ -10270,7 +10299,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
// should be removed once induction resume value creation is done
// directly in VPlan.
EpilogILV.setTripCount(MainILV.getTripCount());
- for (auto &R : make_early_inc_range(*BestEpiPlan.getPreheader())) {
+ for (auto &R : make_early_inc_range(*BestEpiPlan.getEntry())) {
auto *ExpandR = dyn_cast<VPExpandSCEVRecipe>(&R);
if (!ExpandR)
continue;
@@ -10330,8 +10359,6 @@ bool LoopVectorizePass::processLoop(Loop *L) {
cast<VPHeaderPHIRecipe>(&R)->setStartValue(StartVal);
}
- assert(DT->verify(DominatorTree::VerificationLevel::Fast) &&
- "DT not preserved correctly");
LVP.executePlan(EPI.EpilogueVF, EPI.EpilogueUF, BestEpiPlan, EpilogILV,
DT, true, &ExpandedSCEVs);
++LoopsEpilogueVectorized;
@@ -10359,6 +10386,9 @@ bool LoopVectorizePass::processLoop(Loop *L) {
checkMixedPrecision(L, ORE);
}
+ assert(DT->verify(DominatorTree::VerificationLevel::Fast) &&
+ "DT not preserved correctly");
+
std::optional<MDNode *> RemainderLoopID =
makeFollowupLoopID(OrigLoopID, {LLVMLoopVectorizeFollowupAll,
LLVMLoopVectorizeFollowupEpilogue});
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index 0484543d2d0398..d93296f2e7aa84 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -173,9 +173,8 @@ VPBasicBlock *VPBlockBase::getEntryBasicBlock() {
}
void VPBlockBase::setPlan(VPlan *ParentPlan) {
- assert(
- (ParentPlan->getEntry() == this || ParentPlan->getPreheader() == this) &&
- "Can only set plan on its entry or preheader block.");
+ assert(ParentPlan->getEntry() == this &&
+ "Can only set plan on its entry or preheader block.");
Plan = ParentPlan;
}
@@ -455,13 +454,11 @@ void VPIRBasicBlock::execute(VPTransformState *State) {
"VPIRBasicBlock can have at most two successors at the moment!");
State->Builder.SetInsertPoint(IRBB->getTerminator());
executeRecipes(State, IRBB);
- if (getSingleSuccessor()) {
- assert(isa<UnreachableInst>(IRBB->getTerminator()));
+ if (getSingleSuccessor() && isa<UnreachableInst>(IRBB->getTerminator())) {
auto *Br = State->Builder.CreateBr(IRBB);
Br->setOperand(0, nullptr);
IRBB->getTerminator()->eraseFromParent();
}
-
for (VPBlockBase *PredVPBlock : getHierarchicalPredecessors()) {
VPBasicBlock *PredVPBB = PredVPBlock->getExitingBasicBlock();
BasicBlock *PredBB = State->CFG.VPBB2IRBB[PredVPBB];
@@ -474,7 +471,7 @@ void VPIRBasicBlock::execute(VPTransformState *State) {
// backedges. A backward successor is set when the branch is created.
const auto &PredVPSuccessors = PredVPBB->getHierarchicalSuccessors();
unsigned idx = PredVPSuccessors.front() == this ? 0 : 1;
- assert(!TermBr->getSuccessor(idx) &&
+ assert((!TermBr->getSuccessor(idx) || TermBr->getSuccessor(idx) == IRBB) &&
"Trying to reset an existing successor block.");
TermBr->setSuccessor(idx, IRBB);
State->CFG.DTU.applyUpdates({{DominatorTree::Insert, PredBB, IRBB}});
@@ -853,9 +850,6 @@ VPlan::~VPlan() {
Block->dropAllReferences(&DummyValue);
VPBlockBase::deleteCFG(Entry);
-
- Preheader->dropAllReferences(&DummyValue);
- delete Preheader;
}
for (VPValue *VPV : VPLiveInsToFree)
delete VPV;
@@ -878,7 +872,8 @@ VPlanPtr VPlan::createInitialVPlan(Type *InductionTy,
VPIRBasicBlock *Entry =
VPIRBasicBlock::fromBasicBlock(TheLoop->getLoopPreheader());
VPBasicBlock *VecPreheader = new VPBasicBlock("vector.ph");
- auto Plan = std::make_unique<VPlan>(Entry, VecPreheader);
+ VPBlockUtils::connectBlocks(Entry, VecPreheader);
+ auto Plan = std::make_unique<VPlan>(Entry);
// Create SCEV and VPValue for the trip count.
@@ -1020,8 +1015,9 @@ void VPlan::execute(VPTransformState *State) {
BasicBlock *VectorPreHeader = State->CFG.PrevBB;
State->Builder.SetInsertPoint(VectorPreHeader->getTerminator());
- // Disconnect VectorPreHeader from ExitBB in both the CFG and DT.
- cast<BranchInst>(VectorPreHeader->getTerminator())->setSuccessor(0, nullptr);
+ replaceVPBBWithIRVPBB(
+ cast<VPBasicBlock>(getVectorLoopRegion()->getSinglePredecessor()),
+ VectorPreHeader);
State->CFG.DTU.applyUpdates(
{{DominatorTree::Delete, VectorPreHeader, State->CFG.ExitBB}});
@@ -1055,8 +1051,10 @@ void VPlan::execute(VPTransformState *State) {
MiddleBB->getTerminator()->eraseFromParent();
State->CFG.DTU.applyUpdates({{DominatorTree::Delete, MiddleBB, ScalarPh}});
+ ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<VPBlockBase *>> RPOT(
+ Entry);
// Generate code in the loop pre-header and body.
- for (VPBlockBase *Block : vp_depth_first_shallow(Entry))
+ for (VPBlockBase *Block : make_range(RPOT.begin(), RPOT.end()))
Block->execute(State);
VPBasicBlock *LatchVPBB = getVectorLoopRegion()->getExitingBasicBlock();
@@ -1107,9 +1105,6 @@ void VPlan::execute(VPTransformState *State) {
}
State->CFG.DTU.flush();
- assert(State->CFG.DTU.getDomTree().verify(
- DominatorTree::VerificationLevel::Fast) &&
- "DT not preserved correctly");
}
InstructionCost VPlan::cost(ElementCount VF, VPCostContext &Ctx) {
@@ -1162,12 +1157,10 @@ void VPlan::print(raw_ostream &O) const {
printLiveIns(O);
- if (!getPreheader()->empty()) {
- O << "\n";
- getPreheader()->print(O, "", SlotTracker);
- }
+ ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<const VPBlockBase *>>
+ RPOT(getEntry());
- for (const VPBlockBase *Block : vp_depth_first_shallow(getEntry())) {
+ for (const VPBlockBase *Block : RPOT) {
O << '\n';
Block->print(O, "", SlotTracker);
}
@@ -1204,6 +1197,20 @@ std::string VPlan::getName() const {
return Out;
}
+VPRegionBlock *VPlan::getVectorLoopRegion() {
+ for (VPBlockBase *B : vp_depth_first_shallow(getEntry()))
+ if (auto *R = dyn_cast<VPRegionBlock>(B))
+ return R;
+ return nullptr;
+}
+
+const VPRegionBlock *VPlan::getVectorLoopRegion() const {
+ for (const VPBlockBase *B : vp_depth_first_shallow(getEntry()))
+ if (auto *R = dyn_cast<VPRegionBlock>(B))
+ return R;
+ return nullptr;
+}
+
LLVM_DUMP_METHOD
void VPlan::printDOT(raw_ostream &O) const {
VPlanPrinter Printer(O, *this);
@@ -1259,11 +1266,10 @@ static void remapOperands(VPBlockBase *Entry, VPBlockBase *NewEntry,
VPlan *VPlan::duplicate() {
// Clone blocks.
- VPBasicBlock *NewPreheader = Preheader->clone();
const auto &[NewEntry, __] = cloneFrom(Entry);
// Create VPlan, clone live-ins and remap operands in the cloned blocks.
- auto *NewPlan = new VPlan(NewPreheader, cast<VPBasicBlock>(NewEntry));
+ auto *NewPlan = new VPlan(cast<VPBasicBlock>(NewEntry));
DenseMap<VPValue *, VPValue *> Old2NewVPValues;
for (VPValue *OldLiveIn : VPLiveInsToFree) {
Old2NewVPValues[OldLiveIn] =
@@ -1283,7 +1289,6 @@ VPlan *VPlan::duplicate() {
// else NewTripCount will be created and inserted into Old2NewVPValues when
// TripCount is cloned. In any case NewPlan->TripCount is updated below.
- remapOperands(Preheader, NewPreheader, Old2NewVPValues);
remapOperands(Entry, NewEntry, Old2NewVPValues);
// Clone live-outs.
@@ -1301,6 +1306,19 @@ VPlan *VPlan::duplicate() {
return NewPlan;
}
+VPBasicBlock *VPlan::getScalarPreheader() {
+ auto *MiddleVPBB =
+ cast<VPBasicBlock>(getVectorLoopRegion()->getSingleSuccessor());
+ if (MiddleVPBB->getNumSuccessors() == 2) {
+ // Order is strict: first is the exit block, second is the scalar preheader.
+ return cast<VPBasicBlock>(MiddleVPBB->getSuccessors()[1]);
+ }
+ if (auto *IRVPBB = dyn_cast<VPBasicBlock>(MiddleVPBB->getSingleSuccessor()))
+ return IRVPBB;
+
+ return nullptr;
+}
+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
Twine VPlanPrinter::getUID(const VPBlockBase *Block) {
@@ -1339,8 +1357,6 @@ void VPlanPrinter::dump() {
OS << "edge [fontname=Courier, fontsize=30]\n";
OS << "compound=true\n";
- dumpBlock(Plan.getPreheader());
-
for (const VPBlockBase *Block : vp_depth_first_shallow(Plan.getEntry()))
dumpBlock(Block);
@@ -1601,7 +1617,6 @@ void VPSlotTracker::assignNames(const VPlan &Plan) {
assignName(Plan.BackedgeTakenCount);
for (VPValue *LI : Plan.VPLiveInsToFree)
assignName(LI);
- assignNames(Plan.getPreheader());
ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<const VPBlockBase *>>
RPOT(VPBlockDeepTraversalWrapper<const VPBlockBase *>(Plan.getEntry()));
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 0e0c64f6df9cba..f0193fdbed2bb2 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -3671,11 +3671,6 @@ class VPlan {
/// preheader of the vector loop.
VPBasicBlock *Entry;
- /// VPBasicBlock corresponding to the original preheader. Used to place
- /// VPExpandSCEV recipes for expressions used during skeleton creation and the
- /// rest of VPlan execution.
- VPBasicBlock *Preheader;
-
/// Holds the VFs applicable to this VPlan.
SmallSetVector<ElementCount, 2> VFs;
@@ -3722,30 +3717,19 @@ class VPlan {
DenseMap<const SCEV *, VPValue *> SCEVToExpansion;
public:
- /// Construct a VPlan with original preheader \p Preheader, trip count \p TC
- /// and \p Entry to the plan. At the moment, \p Preheader and \p Entry need to
- /// be disconnected, as the bypass blocks between them are not yet modeled in
- /// VPlan.
- VPlan(VPBasicBlock *Preheader, VPValue *TC, VPBasicBlock *Entry)
- : VPlan(Preheader, Entry) {
- TripCount = TC;
- }
+ /// Construct a VPlan with \p Entry entering the plan and trip count \p TC.
+ VPlan(VPBasicBlock *Entry, VPValue *TC) : VPlan(Entry) { TripCount = TC; }
- /// Construct a VPlan with original preheader \p Preheader and \p Entry to
- /// the plan. At the moment, \p Preheader and \p Entry need to be
- /// disconnected, as the bypass blocks between them are not yet modeled in
- /// VPlan.
- VPlan(VPBasicBlock *Preheader, VPBasicBlock *Entry)
- : Entry(Entry), Preheader(Preheader) {
- Entry->setPlan(this);
- Preheader->setPlan(this);
- assert(Preheader->getNumSuccessors() == 0 &&
- Preheader->getNumPredecessors() == 0 &&
- "preheader must be disconnected");
- }
+ /// Construct a VPlan with \p Entry to the plan.
+ VPlan(VPBasicBlock *Entry) : Entry(Entry) { Entry->setPlan(this); }
~VPlan();
+ void setEntry(VPBasicBlock *VPBB) {
+ Entry = VPBB;
+ VPBB->setPlan(this);
+ }
+
/// Create initial VPlan, having an "entry" VPBasicBlock (wrapping
/// original scalar pre-header ) which contains SCEV expansions that need
/// to happen before the CFG is modified; a VPBasicBlock for the vector
@@ -3878,12 +3862,9 @@ class VPlan {
#endif
/// Returns the VPRegionBlock of the vector loop.
- VPRegionBlock *getVectorLoopRegion() {
- return cast<VPRegionBlock>(getEntry()->getSingleSuccessor());
- }
- const VPRegionBlock *getVectorLoopRegion() const {
- return cast<VPRegionBlock>(getEntry()->getSingleSuccessor());
- }
+ VPRegionBlock *getVectorLoopRegion();
+
+ const VPRegionBlock *getVectorLoopRegion() const;
/// Returns the preheader of the vector loop region.
VPBasicBlock *getVectorPreheader() {
@@ -3915,13 +3896,11 @@ class VPlan {
SCEVToExpansion[S] = V;
}
- /// \return The block corresponding to the original preheader.
- VPBasicBlock *getPreheader() { return Preheader; }
- const VPBasicBlock *getPreheader() const { return Preheader; }
-
/// Clone the current VPlan, update all VPValues of the new VPlan and cloned
/// recipes to refer to the clones, and return it.
VPlan *duplicate();
+
+ VPBasicBlock *getScalarPreheader();
};
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index de7023167df899..3921c6e24ff413 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -669,11 +669,11 @@ Value *VPInstruction::generate(VPTransformState &State) {
Builder.CreatePHI(IncomingFromOtherPreds->getType(), 2, Name);
BasicBlock *VPlanPred =
State.CFG
- .VPBB2IRBB[cast<VPBasicBlock>(getParent()->getSinglePredecessor())];
+ .VPBB2IRBB[cast<VPBasicBlock>(getParent()->getPredecessors()[0])];
NewPhi->addIncoming(IncomingFromVPlanPred, VPlanPred);
for (auto *OtherPred : predecessors(Builder.GetInsertBlock())) {
- assert(OtherPred != VPlanPred &&
- "VPlan predecessors should not be connected yet");
+ if (OtherPred == VPlanPred)
+ continue;
NewPhi->addIncoming(IncomingFromOtherPreds, OtherPred);
}
return NewPhi;
@@ -3263,13 +3263,17 @@ void VPWidenPointerInductionRecipe::print(raw_ostream &O, const Twine &Indent,
void VPExpandSCEVRecipe::execute(VPTransformState &State) {
assert(!State.Lane && "cannot be used in per-lane");
+ if (State.ExpandedSCEVs.contains(Expr)) {
+ State.Builder.SetInsertPoint(State.CFG.VPBB2IRBB[getParent()]);
+ return;
+ }
+
const DataLayout &DL = State.CFG.PrevBB->getDataLayout();
SCEVExpander Exp(SE, DL, "induction");
-
Value *Res = Exp.expandCodeFor(Expr, Expr->getType(),
&*State.Builder.GetInsertPoint());
- assert(!State.ExpandedSCEVs.contains(Expr) &&
- "Same SCEV expa...
[truncated]
|
As a first step to move towards modeling the full skeleton in VPlan, start by wrapping IR blocks created during legacy skeleton creation in VPIRBasicBlocks and hook them into the VPlan. This means the skeleton CFG is represented in VPlan, just before execute. This allows moving parts of skeleton creation into recipes in the VPBBs gradually. Note that this allows retiring some manual DT updates, as this will be handled automatically during VPlan execution.
48f79e0
to
1b89761
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
First set of comments and/or potential follow-ups.
VPBlockBase *PredVPB = VectorPH->getSinglePredecessor(); | ||
VPBlockUtils::disconnectBlocks(Plan.getEntry(), VectorPH); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
VPBlockBase *PredVPB = VectorPH->getSinglePredecessor(); | |
VPBlockUtils::disconnectBlocks(Plan.getEntry(), VectorPH); | |
VPBlockBase *Entry = Plan->getEntry(); | |
VPBlockUtils::disconnectBlocks(Entry, VectorPH); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done, thanks!
@@ -2426,6 +2426,26 @@ InnerLoopVectorizer::getOrCreateVectorTripCount(BasicBlock *InsertBlock) { | |||
return VectorTripCount; | |||
} | |||
|
|||
static void connectScalarPreheaderInVPlan(VPlan &Plan) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Placed here rather than in VPlan.h/VPBlockUtils expecting it to be a temporary local scaffold? Worth documenting?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yep, also added a comment, thanks!
VPBlockBase *ScalarPH = Plan.getScalarPreheader(); | ||
VPBlockBase *PredVPB = VectorPH->getSinglePredecessor(); | ||
VPBlockUtils::disconnectBlocks(Plan.getEntry(), VectorPH); | ||
VPBlockUtils::connectBlocks(PredVPB, ScalarPH); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
A simple
VPBlockUtils::connectBlocks(PredVPB, ScalarPH); | |
VPBlockUtils::connectBlocks(Plan.getEntry(), Plan.getScalarPreHeader()); |
alone does not suffice, because the scalar preheader should be the first successor, followed by the existing vector preheader.
Worth noting that we're disconnecting and reconnecting in order to set the right successor order.
Perhaps worth adding to connectBlocks()
an option to indicate successor index (if not last)? And/or predecessor index(?) This would allow swapping entries or sorting vectors in place instead of insert sorting. This may be more general and efficient than connectScalarPreheaderInVPlan()
?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Updated, thanks! The new API is a bit more difficult to get right, but more efficient.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Added in 95eeae1
static void connectCheckBlockInVPlan(VPlan &Plan, BasicBlock *CheckIRBB) { | ||
VPBlockBase *ScalarPH = Plan.getScalarPreheader(); | ||
VPBlockBase *VectorPH = Plan.getVectorPreheader(); | ||
VPBlockBase *PredVPB = VectorPH->getSinglePredecessor(); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
VPBlockBase *PredVPB = VectorPH->getSinglePredecessor(); | |
VPBlockBase *PreVectorPH = VectorPH->getSinglePredecessor(); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done, thanks!
VPBlockBase *VectorPH = Plan.getVectorPreheader(); | ||
VPBlockBase *PredVPB = VectorPH->getSinglePredecessor(); | ||
if (PredVPB->getNumSuccessors() == 1) | ||
connectScalarPreheaderInVPlan(Plan); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Worth connecting scalar preheader from the outset rather than here?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There are a few places that assume the scalar PH has a single predecessor, which would need to be updated. Could pull this in here or adjust as follow-up?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Follow-up would be fine.
VPIRBasicBlock *CheckVPIRBB = VPIRBasicBlock::fromBasicBlock(CheckIRBB); | ||
VPBlockUtils::connectBlocks(PredVPB, CheckVPIRBB); | ||
VPBlockUtils::connectBlocks(CheckVPIRBB, ScalarPH); | ||
VPBlockUtils::connectBlocks(CheckVPIRBB, VectorPH); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Perhaps worth having an insertBlockOnEdge()
function that takes block B and edge A->C, and produces A->B->C.
Followed by adding the scalar PH as another (the first) successor of B.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Updated for now to use the new connect variant but left the everything else as is for now. Will check if there are other places where the new helper could be used.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Added in ccb40b0
@@ -2511,13 +2531,14 @@ void InnerLoopVectorizer::emitIterationCountCheck(BasicBlock *Bypass) { | |||
"TC check is expected to dominate Bypass"); | |||
|
|||
// Update dominator for Bypass & LoopExit (if needed). |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Comment above should go with the line below.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Removed, thanks!
@@ -2511,13 +2531,14 @@ void InnerLoopVectorizer::emitIterationCountCheck(BasicBlock *Bypass) { | |||
"TC check is expected to dominate Bypass"); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This assert was probably added along with the lines below, but should remain, right?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, this is to check the DT updates from SplitBlock I think
BranchInst &BI = | ||
*BranchInst::Create(Bypass, LoopVectorPreHeader, CheckMinIters); | ||
if (hasBranchWeightMD(*OrigLoop->getLoopLatch()->getTerminator())) | ||
setBranchWeights(BI, MinItersBypassWeights, /*IsExpected=*/false); | ||
ReplaceInstWithInst(TCCheckBlock->getTerminator(), &BI); | ||
LoopBypassBlocks.push_back(TCCheckBlock); | ||
|
||
connectScalarPreheaderInVPlan(Plan); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Worth wrapping LoopVectorPreHeader in a VPIRBB via connectCheckBlockInVPlan() as well, perhaps as a TODO?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Added a TODO for now, thanks!
BranchInst &BI = | ||
*BranchInst::Create(Bypass, LoopVectorPreHeader, CheckMinIters); | ||
if (hasBranchWeightMD(*OrigLoop->getLoopLatch()->getTerminator())) | ||
setBranchWeights(BI, MinItersBypassWeights, /*IsExpected=*/false); | ||
ReplaceInstWithInst(TCCheckBlock->getTerminator(), &BI); | ||
LoopBypassBlocks.push_back(TCCheckBlock); | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Scalar preheader can be connected in VPlan from the outset, rather than here?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There are a few places that assume the scalar PH has a single predecessor, which would need to be updated. Could pull this in here or adjust as follow-up?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The single predecessor of Scalar PH in VPlan being middle_block, right?
I.e., scalar loop is initially connected as leftover/remainder loop only, and here connected also as alternative/bypass loop - to handle trip counts too small for the vector loop, and potentially other unvectorized cases.
Perhaps connectScalarAsBypassLoopInVPlan()
is more accurate than connectScalarPreheaderInVPlan()
, given that scalar PH is already connected in VPlan?
Applying this additional connection earlier, is fine as a later follow-up, perhaps w/ a TODO.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks, renamed and added TODO to connectScalarAsBypassLoopInVPlan
Add extra arguments to connectBlocks which allow selecting which existing predecessor/successor to update. This avoids having to disconnect blocks first unnecessarily. Suggested in #114292.
Add a new helper to insert a new VPBlockBase on an edge between 2 blocks. Suggested in llvm#114292 and also useful for some existing code.
Add a new helper to insert a new VPBlockBase on an edge between 2 blocks. Suggested in #114292 and also useful for some existing code.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The latest version now also updated the VPlan printing tests and moved final printing to VPlan::execute to also capture the updated skeleton
✅ With the latest revision this PR passed the C/C++ code formatter. |
LLVM_DEBUG(dbgs() << "Executing best plan with VF=" << BestVF | ||
<< ", UF=" << BestUF << '\n'); | ||
BestVPlan.setName("Final VPlan"); | ||
LLVM_DEBUG(BestVPlan.dump()); | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Moved later to VPlan::execute(), after is replaces VPBBtoVPIRBB.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yep, can also split off once we are happy
BranchInst &BI = | ||
*BranchInst::Create(Bypass, LoopVectorPreHeader, CheckMinIters); | ||
if (hasBranchWeightMD(*OrigLoop->getLoopLatch()->getTerminator())) | ||
setBranchWeights(BI, MinItersBypassWeights, /*IsExpected=*/false); | ||
ReplaceInstWithInst(TCCheckBlock->getTerminator(), &BI); | ||
LoopBypassBlocks.push_back(TCCheckBlock); | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The single predecessor of Scalar PH in VPlan being middle_block, right?
I.e., scalar loop is initially connected as leftover/remainder loop only, and here connected also as alternative/bypass loop - to handle trip counts too small for the vector loop, and potentially other unvectorized cases.
Perhaps connectScalarAsBypassLoopInVPlan()
is more accurate than connectScalarPreheaderInVPlan()
, given that scalar PH is already connected in VPlan?
Applying this additional connection earlier, is fine as a later follow-up, perhaps w/ a TODO.
VPBlockBase *ScalarPH = Plan.getScalarPreheader(); | ||
VPBlockBase *PredVPB = Plan.getEntry(); | ||
VPBlockUtils::connectBlocks(PredVPB, ScalarPH, -1, 0); | ||
VPBlockUtils::connectBlocks(PredVPB, VectorPH, 0, -1); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
VPlan starts with createInitialVPlan() producing a disconnected Entry and a predecessor-less vector_PH --> vector_region (with Header --> Latch) --> middle_block --> scalar_PH, plus an optional middle-block --> exit.
Would it be better to connect Entry-->vector_PH at the outset (ah, disregard - this indeed takes place below...), and here connect Entry-->scalar_PH followed by swapSuccessors(Entry)
? The use of non-negative indices in connectBlocks()
may be confusing, as it not only connects the two blocks but also disconnects them from existing successor and/or predecessor. I.e., replacePredecessor()
or replaceSuccessor()
may be more appropriate.
Furthermore, TCCheckBlock could then be inserted on this Entry-->vector_PH edge, so that connectScalarPreheaderInVPlan() assimilates introduceCheckBlockInVPlan().
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Updated to connect + swap, thanks.
Furthermore, TCCheckBlock could then be inserted on this Entry-->vector_PH edge, so that connectScalarPreheaderInVPlan() assimilates introduceCheckBlockInVPlan().
Not sure how exactly, left as is for now.
static void connectScalarPreheaderInVPlan(VPlan &Plan) { | ||
VPBlockBase *VectorPH = Plan.getVectorPreheader(); | ||
VPBlockBase *ScalarPH = Plan.getScalarPreheader(); | ||
VPBlockBase *PredVPB = Plan.getEntry(); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
VPBlockBase *PredVPB = Plan.getEntry(); | |
VPBlockBase *Entry = Plan.getEntry(); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done, thanks
static void introduceCheckBlockInVPlan(VPlan &Plan, BasicBlock *CheckIRBB) { | ||
VPBlockBase *ScalarPH = Plan.getScalarPreheader(); | ||
VPBlockBase *VectorPH = Plan.getVectorPreheader(); | ||
VPBlockBase *PreVectorPH = VectorPH->getSinglePredecessor(); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ScalarPH is expected to be the other successor of PreVectorPH, so could be asserted (or another way to retrieve ScalarPH), although more general w/o this assert?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Added an assert for now, than
if (isa<VPIRInstruction>(&R) ^ isa<VPIRBasicBlock>(VPBB)) { | ||
errs() << "VPIRInstructions "; | ||
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) | ||
R.dump(); | ||
errs() << " "; | ||
#endif | ||
errs() << "not in a VPIRBasicBlock!\n"; | ||
return false; | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
VPIRInstructions still expected in VPIRBB's only?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We now can have regular recipes in VPIRBBs ( e.g expand-scev), adjust the check.
; CHECK-NEXT: Live-in vp<[[VF:%.+]]> = VF | ||
; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF | ||
; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count | ||
; CHECK-NEXT: vp<[[TC:%.+]]> = original trip-count | ||
; CHECK-EMPTY: | ||
; CHECK-NEXT: ir-bb<for.body.preheader>: | ||
; CHECK-NEXT: IR %0 = zext i32 %n to i64 | ||
; CHECK-NEXT: EMIT vp<[[TC]]> = EXPAND SCEV (zext i32 %n to i64) | ||
; CHECK-NEXT: No successors | ||
; CHECK-EMPTY: | ||
; CHECK-NEXT: vector.ph: | ||
; CHECK-NEXT: Successor(s): vector loop | ||
; CHECK-EMPTY: | ||
; CHECK-NEXT: <x1> vector loop: { | ||
; CHECK-NEXT: vector.body: | ||
; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION | ||
; CHECK-NEXT: vp<[[DEV_IV:%.+]]> = DERIVED-IV ir<%n> + vp<[[CAN_IV]]> * ir<-1> | ||
; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[DEV_IV]]>, ir<-1> | ||
; CHECK-NEXT: CLONE ir<%i.0> = add nsw vp<[[STEPS]]>, ir<-1> | ||
; CHECK-NEXT: CLONE ir<%idxprom> = zext ir<%i.0> | ||
; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%B>, ir<%idxprom> | ||
; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = reverse-vector-pointer inbounds ir<%arrayidx>, vp<[[VF]]> | ||
; CHECK-NEXT: WIDEN ir<%13> = load vp<[[VEC_PTR]]> | ||
; CHECK-NEXT: WIDEN ir<%add9> = add ir<%13>, ir<1> | ||
; CHECK-NEXT: CLONE ir<%arrayidx3> = getelementptr inbounds ir<%A>, ir<%idxprom> | ||
; CHECK-NEXT: vp<[[VEC_PTR2:%.+]]> = reverse-vector-pointer inbounds ir<%arrayidx3>, vp<[[VF]]> | ||
; CHECK-NEXT: WIDEN store vp<[[VEC_PTR2]]>, ir<%add9> | ||
; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> | ||
; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VEC_TC]]> | ||
; CHECK-NEXT: No successors | ||
; CHECK-NEXT: } | ||
; CHECK-NEXT: Successor(s): middle.block | ||
; CHECK-EMPTY: | ||
; CHECK-NEXT: middle.block: | ||
; CHECK-NEXT: EMIT vp<[[CMP:%.+]]> = icmp eq vp<[[TC]]>, vp<[[VEC_TC]]> | ||
; CHECK-NEXT: EMIT branch-on-cond vp<[[CMP]]> | ||
; CHECK-NEXT: Successor(s): ir-bb<for.cond.cleanup.loopexit>, scalar.ph | ||
; CHECK-EMPTY: | ||
; CHECK-NEXT: ir-bb<for.cond.cleanup.loopexit>: | ||
; CHECK-NEXT: No successors | ||
; CHECK-EMPTY: | ||
; CHECK-NEXT: scalar.ph: | ||
; CHECK-NEXT: Successor(s): ir-bb<for.body> | ||
; CHECK-EMPTY: | ||
; CHECK-NEXT: ir-bb<for.body>: | ||
; CHECK-NEXT: IR %indvars.iv = phi i64 [ %0, %for.body.preheader ], [ %indvars.iv.next, %for.body ] | ||
; CHECK-NEXT: IR %i.0.in8 = phi i32 [ %n, %for.body.preheader ], [ %i.0, %for.body ] | ||
; CHECK: IR %indvars.iv.next = add nsw i64 %indvars.iv, -1 | ||
; CHECK-NEXT: No successors | ||
; CHECK-NEXT: } | ||
; CHECK: LV: Loop does not require scalar epilogue |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Gone?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why/Should these checks be removed?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
That was an accident during a merge, updated, thanks!
; IF-EVL-NEXT: IR %4 = call i64 @llvm.vscale.i64() | ||
; IF-EVL-NEXT: IR %5 = mul i64 %4, 4 | ||
; IF-EVL-NEXT: IR %6 = sub i64 %5, 1 | ||
; IF-EVL-NEXT: IR %n.rnd.up = add i64 %N, %6 | ||
; IF-EVL-NEXT: IR %n.mod.vf = urem i64 %n.rnd.up, %5 | ||
; IF-EVL-NEXT: IR %n.vec = sub i64 %n.rnd.up, %n.mod.vf | ||
; IF-EVL-NEXT: IR %7 = call i64 @llvm.vscale.i64() | ||
; IF-EVL-NEXT: IR %8 = mul i64 %7, 4 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Added?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
we are now matching the VPIRBB
; CHECK-EMPTY: | ||
; CHECK-NEXT: ir-bb<exit>: | ||
; CHECK-NEXT: No successors | ||
; CHECK-EMPTY: | ||
; CHECK-NEXT: scalar.ph: | ||
; CHECK-NEXT: ir-bb<scalar.ph>: | ||
; CHECK-NEXT: IR %bc.resume.val = phi ptr [ %ind.end, %middle.block ], [ %start, %entry ] |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Added?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We now check the VPIRBB instead of the regular VPBB
VPBlockUtils::connectBlocks(VPPH, R1); | ||
VPBlockUtils::connectBlocks(R1, ScalarHeaderVPBB); | ||
VPlan Plan(VPPH, &*TC, VPBB1, ScalarHeaderVPBB); | ||
VPlan Plan(VPPH, &*TC, ScalarHeaderVPBB); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: original constructor can be updated to connect its VPPH to VPBB1, temporarily, to reduce test diff, and then clean it up.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done, thanks!
This moves printing of the final VPlan to ::execute. This ensures the final VPlan is printed, including recipes that get introduced by late, lowering transforms and skeleton construction. Split off from #114292, to simplify the diff.
This split off changes for more complex CFGs in VPlan from both llvm#114292 llvm#112138 This simplifies their respective diffs.
This moves printing of the final VPlan to ::execute. This ensures the final VPlan is printed, including recipes that get introduced by late, lowering transforms and skeleton construction. Split off from llvm#114292, to simplify the diff.
This prepares for more complex CFGs in VPlan, as in llvm#114292 llvm#112138
Made a minor update after #112138: |
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/2/builds/13075 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/16/builds/10584 Here is the relevant piece of the build log for the reference
|
The preheader is now the entry block, connected to the vector.ph. Clean up after #114292.
This was originally done to reduce the diff for the change. Remove it and update the remaining tests. NFC modulo reordering of incoming values. Clean up after #114292.
…n::generate. This was originally done to reduce the diff for the change. Remove it and update the remaining tests. NFC modulo reordering of incoming values. Clean up after llvm/llvm-project#114292.
As a first step to move towards modeling the full skeleton in VPlan, start by wrapping IR blocks created during legacy skeleton creation in VPIRBasicBlocks and hook them into the VPlan. This means the skeleton CFG is represented in VPlan, just before execute. This allows moving parts of skeleton creation into recipes in the VPBBs gradually.
Note that this allows retiring some manual DT updates, as this will be handled automatically during VPlan execution.