Skip to content

[VPlan] Use DomTreeUpdater to automatically update DT for vector loop. #92525

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
May 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7533,8 +7533,9 @@ LoopVectorizationPlanner::executePlan(
LLVM_DEBUG(BestVPlan.dump());

// Perform the actual loop transformation.
VPTransformState State(BestVF, BestUF, LI, DT, ILV.Builder, &ILV, &BestVPlan,
OrigLoop->getHeader()->getContext());
VPTransformState State(BestVF, BestUF, LI,
EnableVPlanNativePath ? nullptr : DT, ILV.Builder,
&ILV, &BestVPlan, OrigLoop->getHeader()->getContext());

// 0. Generate SCEV-dependent code into the preheader, including TripCount,
// before making any changes to the CFG.
Expand Down Expand Up @@ -10402,6 +10403,7 @@ PreservedAnalyses LoopVectorizePass::run(Function &F,
PA.preserve<DominatorTreeAnalysis>();
PA.preserve<ScalarEvolutionAnalysis>();
}

PA.preserve<LoopAnalysis>();

if (Result.MadeCFGChange) {
Expand Down
59 changes: 13 additions & 46 deletions llvm/lib/Transforms/Vectorize/VPlan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Analysis/DomTreeUpdater.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CFG.h"
Expand Down Expand Up @@ -218,7 +219,7 @@ VPTransformState::VPTransformState(ElementCount VF, unsigned UF, LoopInfo *LI,
DominatorTree *DT, IRBuilderBase &Builder,
InnerLoopVectorizer *ILV, VPlan *Plan,
LLVMContext &Ctx)
: VF(VF), UF(UF), LI(LI), DT(DT), Builder(Builder), ILV(ILV), Plan(Plan),
: VF(VF), UF(UF), CFG(DT), LI(LI), Builder(Builder), ILV(ILV), Plan(Plan),
LVer(nullptr),
TypeAnalysis(Plan->getCanonicalIV()->getScalarType(), Ctx) {}

Expand Down Expand Up @@ -436,6 +437,7 @@ VPBasicBlock::createEmptyBasicBlock(VPTransformState::CFGState &CFG) {
"Trying to reset an existing successor block.");
TermBr->setSuccessor(idx, NewBB);
}
CFG.DTU.applyUpdates({{DominatorTree::Insert, PredBB, NewBB}});
}
return NewBB;
}
Expand Down Expand Up @@ -467,6 +469,7 @@ void VPBasicBlock::execute(VPTransformState *State) {
// The Exit block of a loop is always set to be successor 0 of the Exiting
// block.
cast<BranchInst>(ExitingBB->getTerminator())->setSuccessor(0, NewBB);
State->CFG.DTU.applyUpdates({{DominatorTree::Insert, ExitingBB, NewBB}});
} else if (PrevVPBB && /* A */
!((SingleHPred = getSingleHierarchicalPredecessor()) &&
SingleHPred->getExitingBasicBlock() == PrevVPBB &&
Expand Down Expand Up @@ -829,6 +832,11 @@ void VPlan::execute(VPTransformState *State) {
BasicBlock *VectorPreHeader = State->CFG.PrevBB;
State->Builder.SetInsertPoint(VectorPreHeader->getTerminator());

// Disconnect VectorPreHeader from ExitBB in both the CFG and DT.
cast<BranchInst>(VectorPreHeader->getTerminator())->setSuccessor(0, nullptr);
State->CFG.DTU.applyUpdates(
{{DominatorTree::Delete, VectorPreHeader, State->CFG.ExitBB}});
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ExitBB is (still, the single) successor of PrevBB=VectorPreHeader, should they first be disconnected before updating DTU with their edge's deletion?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, updated to remove the edge before


// Generate code in the loop pre-header and body.
for (VPBlockBase *Block : vp_depth_first_shallow(Entry))
Block->execute(State);
Expand Down Expand Up @@ -891,13 +899,10 @@ void VPlan::execute(VPTransformState *State) {
}
}

// We do not attempt to preserve DT for outer loop vectorization currently.
if (!EnableVPlanNativePath) {
BasicBlock *VectorHeaderBB = State->CFG.VPBB2IRBB[Header];
State->DT->addNewBlock(VectorHeaderBB, VectorPreHeader);
updateDominatorTree(State->DT, VectorHeaderBB, VectorLatchBB,
State->CFG.ExitBB);
}
State->CFG.DTU.flush();
// DT is currently updated for non-native path only.
assert(EnableVPlanNativePath || State->CFG.DTU.getDomTree().verify(
DominatorTree::VerificationLevel::Fast));
Comment on lines +904 to +905
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
assert(EnableVPlanNativePath || State->CFG.DTU.getDomTree().verify(
DominatorTree::VerificationLevel::Fast));
// DT is currently updated for non-native path only.
assert(EnableVPlanNativePath || State->CFG.DTU.getDomTree().verify(
DominatorTree::VerificationLevel::Fast));

(retaining the comment)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added back, thanks!

}

#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
Expand Down Expand Up @@ -995,44 +1000,6 @@ void VPlan::addLiveOut(PHINode *PN, VPValue *V) {
LiveOuts.insert({PN, new VPLiveOut(PN, V)});
}

void VPlan::updateDominatorTree(DominatorTree *DT, BasicBlock *LoopHeaderBB,
BasicBlock *LoopLatchBB,
BasicBlock *LoopExitBB) {
// The vector body may be more than a single basic-block by this point.
// Update the dominator tree information inside the vector body by propagating
// it from header to latch, expecting only triangular control-flow, if any.
BasicBlock *PostDomSucc = nullptr;
for (auto *BB = LoopHeaderBB; BB != LoopLatchBB; BB = PostDomSucc) {
// Get the list of successors of this block.
std::vector<BasicBlock *> Succs(succ_begin(BB), succ_end(BB));
assert(Succs.size() <= 2 &&
"Basic block in vector loop has more than 2 successors.");
PostDomSucc = Succs[0];
if (Succs.size() == 1) {
assert(PostDomSucc->getSinglePredecessor() &&
"PostDom successor has more than one predecessor.");
DT->addNewBlock(PostDomSucc, BB);
continue;
}
BasicBlock *InterimSucc = Succs[1];
if (PostDomSucc->getSingleSuccessor() == InterimSucc) {
PostDomSucc = Succs[1];
InterimSucc = Succs[0];
}
assert(InterimSucc->getSingleSuccessor() == PostDomSucc &&
"One successor of a basic block does not lead to the other.");
assert(InterimSucc->getSinglePredecessor() &&
"Interim successor has more than one predecessor.");
assert(PostDomSucc->hasNPredecessors(2) &&
"PostDom successor has more than two predecessors.");
DT->addNewBlock(InterimSucc, BB);
DT->addNewBlock(PostDomSucc, BB);
}
// Latch block is a new dominator for the loop exit.
DT->changeImmediateDominator(LoopExitBB, LoopLatchBB);
assert(DT->verify(DominatorTree::VerificationLevel::Fast));
}

static void remapOperands(VPBlockBase *Entry, VPBlockBase *NewEntry,
DenseMap<VPValue *, VPValue *> &Old2NewVPValues) {
// Update the operands of all cloned recipes starting at NewEntry. This
Expand Down
17 changes: 6 additions & 11 deletions llvm/lib/Transforms/Vectorize/VPlan.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
#include "llvm/ADT/Twine.h"
#include "llvm/ADT/ilist.h"
#include "llvm/ADT/ilist_node.h"
#include "llvm/Analysis/DomTreeUpdater.h"
#include "llvm/Analysis/IVDescriptors.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/VectorUtils.h"
Expand Down Expand Up @@ -372,7 +373,11 @@ struct VPTransformState {
/// of replication, maps the BasicBlock of the last replica created.
SmallDenseMap<VPBasicBlock *, BasicBlock *> VPBB2IRBB;

CFGState() = default;
/// Updater for the DominatorTree.
DomTreeUpdater DTU;

CFGState(DominatorTree *DT)
: DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy) {}

/// Returns the BasicBlock* mapped to the pre-header of the loop region
/// containing \p R.
Expand All @@ -382,9 +387,6 @@ struct VPTransformState {
/// Hold a pointer to LoopInfo to register new basic blocks in the loop.
LoopInfo *LI;

/// Hold a pointer to Dominator Tree to register new basic blocks in the loop.
DominatorTree *DT;

/// Hold a reference to the IRBuilder used to generate output IR code.
IRBuilderBase &Builder;

Expand Down Expand Up @@ -3289,13 +3291,6 @@ class VPlan {
/// Clone the current VPlan, update all VPValues of the new VPlan and cloned
/// recipes to refer to the clones, and return it.
VPlan *duplicate();

private:
/// Add to the given dominator tree the header block and every new basic block
/// that was created between it and the latch block, inclusive.
static void updateDominatorTree(DominatorTree *DT, BasicBlock *LoopHeaderBB,
BasicBlock *LoopLatchBB,
BasicBlock *LoopExitBB);
};

#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
Expand Down
Loading