Skip to content

Commit fcfde33

Browse files
committed
[VPlan] Move predication to VPlanTransform (NFC) (WIP).
This patch moves the logic to predicate and linearize a VPlan to a dedicated VPlan transform. The main logic to perform predication is ready to review, although there are few things to note that should be improved, either directly in the PR or in the future: * Edge and block masks are cached in VPRecipeBuilder, so they can be accessed during recipe construction. A better alternative may be to add mask operands to all VPInstructions that need them and use that during recipe construction * The mask caching in a map also means that this map needs updating each time a new recipe replaces a VPInstruction; this would also be handled by adding mask operands. Currently this is still WIP due to early-exit loop handling not working due to the exit conditions not being available in the initial VPlans. This will be fixed with #128419 and follow-ups All tests except early-exit loops are passing
1 parent 923ea72 commit fcfde33

File tree

7 files changed

+379
-328
lines changed

7 files changed

+379
-328
lines changed

llvm/lib/Transforms/Vectorize/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ add_llvm_component_library(LLVMVectorize
2424
VPlan.cpp
2525
VPlanAnalysis.cpp
2626
VPlanConstruction.cpp
27+
VPlanPredicator.cpp
2728
VPlanRecipes.cpp
2829
VPlanSLP.cpp
2930
VPlanTransforms.cpp

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 37 additions & 277 deletions
Large diffs are not rendered by default.

llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h

Lines changed: 14 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -68,15 +68,7 @@ class VPRecipeBuilder {
6868

6969
VPBuilder &Builder;
7070

71-
/// When we if-convert we need to create edge masks. We have to cache values
72-
/// so that we don't end up with exponential recursion/IR. Note that
73-
/// if-conversion currently takes place during VPlan-construction, so these
74-
/// caches are only used at that stage.
75-
using EdgeMaskCacheTy =
76-
DenseMap<std::pair<BasicBlock *, BasicBlock *>, VPValue *>;
77-
using BlockMaskCacheTy = DenseMap<BasicBlock *, VPValue *>;
78-
EdgeMaskCacheTy EdgeMaskCache;
79-
BlockMaskCacheTy BlockMaskCache;
71+
DenseMap<VPBasicBlock *, VPValue *> &BlockMaskCache;
8072

8173
// VPlan construction support: Hold a mapping from ingredients to
8274
// their recipe.
@@ -118,11 +110,6 @@ class VPRecipeBuilder {
118110
tryToOptimizeInductionTruncate(TruncInst *I, ArrayRef<VPValue *> Operands,
119111
VFRange &Range);
120112

121-
/// Handle non-loop phi nodes. Return a new VPBlendRecipe otherwise. Currently
122-
/// all such phi nodes are turned into a sequence of select instructions as
123-
/// the vectorizer currently performs full if-conversion.
124-
VPBlendRecipe *tryToBlend(PHINode *Phi, ArrayRef<VPValue *> Operands);
125-
126113
/// Handle call instructions. If \p CI can be widened for \p Range.Start,
127114
/// return a new VPWidenCallRecipe or VPWidenIntrinsicRecipe. Range.End may be
128115
/// decreased to ensure same decision from \p Range.Start to \p Range.End.
@@ -160,9 +147,11 @@ class VPRecipeBuilder {
160147
LoopVectorizationLegality *Legal,
161148
LoopVectorizationCostModel &CM,
162149
PredicatedScalarEvolution &PSE, VPBuilder &Builder,
150+
DenseMap<VPBasicBlock *, VPValue *> &BlockMaskCache,
163151
LoopVersioning *LVer)
164152
: Plan(Plan), OrigLoop(OrigLoop), TLI(TLI), TTI(TTI), Legal(Legal),
165-
CM(CM), PSE(PSE), Builder(Builder), LVer(LVer) {}
153+
CM(CM), PSE(PSE), Builder(Builder), BlockMaskCache(BlockMaskCache),
154+
LVer(LVer) {}
166155

167156
std::optional<unsigned> getScalingForReduction(const Instruction *ExitInst) {
168157
auto It = ScaledReductionMap.find(ExitInst);
@@ -193,27 +182,10 @@ class VPRecipeBuilder {
193182
Ingredient2Recipe[I] = R;
194183
}
195184

196-
/// Create the mask for the vector loop header block.
197-
void createHeaderMask();
198-
199-
/// A helper function that computes the predicate of the block BB, assuming
200-
/// that the header block of the loop is set to True or the loop mask when
201-
/// tail folding.
202-
void createBlockInMask(BasicBlock *BB);
203-
204185
/// Returns the *entry* mask for the block \p BB.
205-
VPValue *getBlockInMask(BasicBlock *BB) const;
206-
207-
/// Create an edge mask for every destination of cases and/or default.
208-
void createSwitchEdgeMasks(SwitchInst *SI);
209-
210-
/// A helper function that computes the predicate of the edge between SRC
211-
/// and DST.
212-
VPValue *createEdgeMask(BasicBlock *Src, BasicBlock *Dst);
213-
214-
/// A helper that returns the previously computed predicate of the edge
215-
/// between SRC and DST.
216-
VPValue *getEdgeMask(BasicBlock *Src, BasicBlock *Dst) const;
186+
VPValue *getBlockInMask(VPBasicBlock *BB) const {
187+
return BlockMaskCache.lookup(BB);
188+
}
217189

218190
/// Return the recipe created for given ingredient.
219191
VPRecipeBase *getRecipe(Instruction *I) {
@@ -238,6 +210,13 @@ class VPRecipeBuilder {
238210
}
239211
return Plan.getOrAddLiveIn(V);
240212
}
213+
214+
void updateBlockMaskCache(VPValue *Old, VPValue *New) {
215+
for (auto &[_, V] : BlockMaskCache) {
216+
if (V == Old)
217+
V = New;
218+
}
219+
}
241220
};
242221
} // end namespace llvm
243222

llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp

Lines changed: 5 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -66,8 +66,7 @@ class PlainCFGBuilder {
6666
: TheLoop(Lp), LI(LI), Plan(std::make_unique<VPlan>(Lp)) {}
6767

6868
/// Build plain CFG for TheLoop and connects it to Plan's entry.
69-
std::unique_ptr<VPlan>
70-
buildPlainCFG(DenseMap<VPBlockBase *, BasicBlock *> &VPB2IRBB);
69+
std::unique_ptr<VPlan> buildPlainCFG();
7170
};
7271
} // anonymous namespace
7372

@@ -242,8 +241,7 @@ void PlainCFGBuilder::createVPInstructionsForVPBB(VPBasicBlock *VPBB,
242241
}
243242

244243
// Main interface to build the plain CFG.
245-
std::unique_ptr<VPlan> PlainCFGBuilder::buildPlainCFG(
246-
DenseMap<VPBlockBase *, BasicBlock *> &VPB2IRBB) {
244+
std::unique_ptr<VPlan> PlainCFGBuilder::buildPlainCFG() {
247245
VPIRBasicBlock *Entry = cast<VPIRBasicBlock>(Plan->getEntry());
248246
BB2VPBB[Entry->getIRBasicBlock()] = Entry;
249247
for (VPIRBasicBlock *ExitVPBB : Plan->getExitBlocks())
@@ -334,18 +332,14 @@ std::unique_ptr<VPlan> PlainCFGBuilder::buildPlainCFG(
334332
}
335333
}
336334

337-
for (const auto &[IRBB, VPB] : BB2VPBB)
338-
VPB2IRBB[VPB] = IRBB;
339-
340335
LLVM_DEBUG(Plan->setName("Plain CFG\n"); dbgs() << *Plan);
341336
return std::move(Plan);
342337
}
343338

344-
std::unique_ptr<VPlan> VPlanTransforms::buildPlainCFG(
345-
Loop *TheLoop, LoopInfo &LI,
346-
DenseMap<VPBlockBase *, BasicBlock *> &VPB2IRBB) {
339+
std::unique_ptr<VPlan> VPlanTransforms::buildPlainCFG(Loop *TheLoop,
340+
LoopInfo &LI) {
347341
PlainCFGBuilder Builder(TheLoop, &LI);
348-
return Builder.buildPlainCFG(VPB2IRBB);
342+
return Builder.buildPlainCFG();
349343
}
350344

351345
/// Checks if \p HeaderVPB is a loop header block in the plain CFG; that is, it

0 commit comments

Comments
 (0)