Skip to content

Commit 7f61860

Browse files
committed
[VPlan] Move predication to VPlanTransform (NFC) (WIP).
This patch moves the logic to predicate and linearize a VPlan to a dedicated VPlan transform. The main logic to perform predication is ready to review, although there are few things to note that should be improved, either directly in the PR or in the future: * Edge and block masks are cached in VPRecipeBuilder, so they can be accessed during recipe construction. A better alternative may be to add mask operands to all VPInstructions that need them and use that during recipe construction * The mask caching in a map also means that this map needs updating each time a new recipe replaces a VPInstruction; this would also be handled by adding mask operands. Currently this is still WIP due to early-exit loop handling not working due to the exit conditions not being available in the initial VPlans. This will be fixed with #128419 and follow-ups All tests except early-exit loops are passing
1 parent eb04741 commit 7f61860

File tree

8 files changed

+353
-313
lines changed

8 files changed

+353
-313
lines changed

llvm/lib/Transforms/Vectorize/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ add_llvm_component_library(LLVMVectorize
2424
VPlan.cpp
2525
VPlanAnalysis.cpp
2626
VPlanConstruction.cpp
27+
VPlanPredicator.cpp
2728
VPlanRecipes.cpp
2829
VPlanSLP.cpp
2930
VPlanTransforms.cpp

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 60 additions & 264 deletions
Large diffs are not rendered by default.

llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h

Lines changed: 18 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -73,11 +73,14 @@ class VPRecipeBuilder {
7373
/// if-conversion currently takes place during VPlan-construction, so these
7474
/// caches are only used at that stage.
7575
using EdgeMaskCacheTy =
76-
DenseMap<std::pair<BasicBlock *, BasicBlock *>, VPValue *>;
77-
using BlockMaskCacheTy = DenseMap<BasicBlock *, VPValue *>;
76+
DenseMap<std::pair<VPBasicBlock *, VPBasicBlock *>, VPValue *>;
77+
using BlockMaskCacheTy = DenseMap<VPBasicBlock *, VPValue *>;
7878
EdgeMaskCacheTy EdgeMaskCache;
79+
80+
public:
7981
BlockMaskCacheTy BlockMaskCache;
8082

83+
private:
8184
// VPlan construction support: Hold a mapping from ingredients to
8285
// their recipe.
8386
DenseMap<Instruction *, VPRecipeBase *> Ingredient2Recipe;
@@ -114,11 +117,6 @@ class VPRecipeBuilder {
114117
tryToOptimizeInductionTruncate(TruncInst *I, ArrayRef<VPValue *> Operands,
115118
VFRange &Range);
116119

117-
/// Handle non-loop phi nodes. Return a new VPBlendRecipe otherwise. Currently
118-
/// all such phi nodes are turned into a sequence of select instructions as
119-
/// the vectorizer currently performs full if-conversion.
120-
VPBlendRecipe *tryToBlend(PHINode *Phi, ArrayRef<VPValue *> Operands);
121-
122120
/// Handle call instructions. If \p CI can be widened for \p Range.Start,
123121
/// return a new VPWidenCallRecipe or VPWidenIntrinsicRecipe. Range.End may be
124122
/// decreased to ensure same decision from \p Range.Start to \p Range.End.
@@ -188,27 +186,20 @@ class VPRecipeBuilder {
188186
Ingredient2Recipe[I] = R;
189187
}
190188

191-
/// Create the mask for the vector loop header block.
192-
void createHeaderMask();
193-
194-
/// A helper function that computes the predicate of the block BB, assuming
195-
/// that the header block of the loop is set to True or the loop mask when
196-
/// tail folding.
197-
void createBlockInMask(BasicBlock *BB);
198-
189+
void setBlockInMask(VPBasicBlock *BB, VPValue *Mask) {
190+
assert(!BlockMaskCache.contains(BB) && "Mask already set");
191+
BlockMaskCache[BB] = Mask;
192+
}
199193
/// Returns the *entry* mask for the block \p BB.
200-
VPValue *getBlockInMask(BasicBlock *BB) const;
201-
202-
/// Create an edge mask for every destination of cases and/or default.
203-
void createSwitchEdgeMasks(SwitchInst *SI);
204-
205-
/// A helper function that computes the predicate of the edge between SRC
206-
/// and DST.
207-
VPValue *createEdgeMask(BasicBlock *Src, BasicBlock *Dst);
208-
209-
/// A helper that returns the previously computed predicate of the edge
210-
/// between SRC and DST.
211-
VPValue *getEdgeMask(BasicBlock *Src, BasicBlock *Dst) const;
194+
VPValue *getBlockInMask(VPBasicBlock *BB) const {
195+
return BlockMaskCache.lookup(BB);
196+
}
197+
void setEdgeMask(VPBasicBlock *Src, VPBasicBlock *Dst, VPValue *Mask) {
198+
EdgeMaskCache[{Src, Dst}] = Mask;
199+
}
200+
VPValue *getEdgeMask(VPBasicBlock *Src, VPBasicBlock *Dst) const {
201+
return EdgeMaskCache.lookup({Src, Dst});
202+
}
212203

213204
/// Return the recipe created for given ingredient.
214205
VPRecipeBase *getRecipe(Instruction *I) {

llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -65,8 +65,7 @@ class PlainCFGBuilder {
6565
: TheLoop(Lp), LI(LI), Plan(std::make_unique<VPlan>(Lp)) {}
6666

6767
/// Build plain CFG for TheLoop and connects it to Plan's entry.
68-
std::unique_ptr<VPlan>
69-
buildPlainCFG(DenseMap<VPBlockBase *, BasicBlock *> &VPB2IRBB);
68+
std::unique_ptr<VPlan> buildPlainCFG();
7069
};
7170
} // anonymous namespace
7271

@@ -245,19 +244,24 @@ void PlainCFGBuilder::createVPInstructionsForVPBB(VPBasicBlock *VPBB,
245244
for (Value *Op : Inst->operands())
246245
VPOperands.push_back(getOrCreateVPOperand(Op));
247246

248-
// Build VPInstruction for any arbitrary Instruction without specific
249-
// representation in VPlan.
250-
NewR = cast<VPInstruction>(
251-
VPIRBuilder.createNaryOp(Inst->getOpcode(), VPOperands, Inst));
247+
if (auto *ICmp = dyn_cast<ICmpInst>(Inst)) {
248+
NewR = cast<VPInstruction>(VPIRBuilder.createICmp(
249+
ICmp->getPredicate(), VPOperands[0], VPOperands[1]));
250+
NewR->setUnderlyingValue(ICmp);
251+
} else {
252+
// Build VPInstruction for any arbitrary Instruction without specific
253+
// representation in VPlan.
254+
NewR = cast<VPInstruction>(
255+
VPIRBuilder.createNaryOp(Inst->getOpcode(), VPOperands, Inst));
256+
}
252257
}
253258

254259
IRDef2VPValue[Inst] = NewR;
255260
}
256261
}
257262

258263
// Main interface to build the plain CFG.
259-
std::unique_ptr<VPlan> PlainCFGBuilder::buildPlainCFG(
260-
DenseMap<VPBlockBase *, BasicBlock *> &VPB2IRBB) {
264+
std::unique_ptr<VPlan> PlainCFGBuilder::buildPlainCFG() {
261265
VPIRBasicBlock *Entry = cast<VPIRBasicBlock>(Plan->getEntry());
262266
BB2VPBB[Entry->getIRBasicBlock()] = Entry;
263267

@@ -346,18 +350,14 @@ std::unique_ptr<VPlan> PlainCFGBuilder::buildPlainCFG(
346350
}
347351
}
348352

349-
for (const auto &[IRBB, VPB] : BB2VPBB)
350-
VPB2IRBB[VPB] = IRBB;
351-
352353
LLVM_DEBUG(Plan->setName("Plain CFG\n"); dbgs() << *Plan);
353354
return std::move(Plan);
354355
}
355356

356-
std::unique_ptr<VPlan> VPlanTransforms::buildPlainCFG(
357-
Loop *TheLoop, LoopInfo &LI,
358-
DenseMap<VPBlockBase *, BasicBlock *> &VPB2IRBB) {
357+
std::unique_ptr<VPlan> VPlanTransforms::buildPlainCFG(Loop *TheLoop,
358+
LoopInfo &LI) {
359359
PlainCFGBuilder Builder(TheLoop, &LI);
360-
return Builder.buildPlainCFG(VPB2IRBB);
360+
return Builder.buildPlainCFG();
361361
}
362362

363363
/// Checks if \p HeaderVPB is a loop header block in the plain CFG; that is, it
Lines changed: 252 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,252 @@
1+
//===-- VPlanPredicator.cpp - VPlan predicator ----------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
///
9+
/// \file
10+
/// This file implements predication for VPlans.
11+
///
12+
//===----------------------------------------------------------------------===//
13+
14+
#include "VPRecipeBuilder.h"
15+
#include "VPlan.h"
16+
#include "VPlanCFG.h"
17+
#include "VPlanTransforms.h"
18+
#include "VPlanUtils.h"
19+
#include "llvm/ADT/PostOrderIterator.h"
20+
21+
using namespace llvm;
22+
23+
struct VPPredicator {
24+
/// When we if-convert we need to create edge masks. We have to cache values
25+
/// so that we don't end up with exponential recursion/IR. Note that
26+
/// if-conversion currently takes place during VPlan-construction, so these
27+
/// caches are only used at that stage.
28+
using EdgeMaskCacheTy =
29+
DenseMap<std::pair<VPBasicBlock *, VPBasicBlock *>, VPValue *>;
30+
using BlockMaskCacheTy = DenseMap<VPBasicBlock *, VPValue *>;
31+
32+
VPPredicator(VPRecipeBuilder &RecipeBuilder) : RecipeBuilder(RecipeBuilder) {}
33+
34+
VPRecipeBuilder &RecipeBuilder;
35+
36+
VPBuilder Builder;
37+
VPValue *createEdgeMask(VPBasicBlock *Src, VPBasicBlock *Dst) {
38+
assert(is_contained(Dst->getPredecessors(), Src) && "Invalid edge");
39+
40+
// Look for cached value.
41+
VPValue *EdgeMask = RecipeBuilder.getEdgeMask(Src, Dst);
42+
if (EdgeMask)
43+
return EdgeMask;
44+
45+
VPValue *SrcMask = RecipeBuilder.getBlockInMask(Src);
46+
47+
// The terminator has to be a branch inst!
48+
if (Src->empty() || Src->getNumSuccessors() == 1) {
49+
RecipeBuilder.setEdgeMask(Src, Dst, SrcMask);
50+
return SrcMask;
51+
}
52+
53+
auto *Term = cast<VPInstruction>(Src->getTerminator());
54+
if (Term->getOpcode() == Instruction::Switch) {
55+
createSwitchEdgeMasks(Term);
56+
return RecipeBuilder.getEdgeMask(Src, Dst);
57+
}
58+
59+
auto *BI = cast<VPInstruction>(Src->getTerminator());
60+
assert(BI->getOpcode() == VPInstruction::BranchOnCond);
61+
if (Src->getSuccessors()[0] == Src->getSuccessors()[1]) {
62+
RecipeBuilder.setEdgeMask(Src, Dst, SrcMask);
63+
return SrcMask;
64+
}
65+
66+
EdgeMask = BI->getOperand(0);
67+
assert(EdgeMask && "No Edge Mask found for condition");
68+
69+
if (Src->getSuccessors()[0] != Dst)
70+
EdgeMask = Builder.createNot(EdgeMask, BI->getDebugLoc());
71+
72+
if (SrcMask) { // Otherwise block in-mask is all-one, no need to AND.
73+
// The bitwise 'And' of SrcMask and EdgeMask introduces new UB if SrcMask
74+
// is false and EdgeMask is poison. Avoid that by using 'LogicalAnd'
75+
// instead which generates 'select i1 SrcMask, i1 EdgeMask, i1 false'.
76+
EdgeMask = Builder.createLogicalAnd(SrcMask, EdgeMask, BI->getDebugLoc());
77+
}
78+
79+
RecipeBuilder.setEdgeMask(Src, Dst, EdgeMask);
80+
return EdgeMask;
81+
}
82+
83+
VPValue *createBlockInMask(VPBasicBlock *VPBB) {
84+
Builder.setInsertPoint(VPBB, VPBB->begin());
85+
// All-one mask is modelled as no-mask following the convention for masked
86+
// load/store/gather/scatter. Initialize BlockMask to no-mask.
87+
VPValue *BlockMask = nullptr;
88+
// This is the block mask. We OR all unique incoming edges.
89+
for (auto *Predecessor : SetVector<VPBlockBase *>(
90+
VPBB->getPredecessors().begin(), VPBB->getPredecessors().end())) {
91+
VPValue *EdgeMask = createEdgeMask(cast<VPBasicBlock>(Predecessor), VPBB);
92+
if (!EdgeMask) { // Mask of predecessor is all-one so mask of block is
93+
// too.
94+
RecipeBuilder.setBlockInMask(VPBB, EdgeMask);
95+
return EdgeMask;
96+
}
97+
98+
if (!BlockMask) { // BlockMask has its initialized nullptr value.
99+
BlockMask = EdgeMask;
100+
continue;
101+
}
102+
103+
BlockMask = Builder.createOr(BlockMask, EdgeMask, {});
104+
}
105+
106+
RecipeBuilder.setBlockInMask(VPBB, BlockMask);
107+
return BlockMask;
108+
}
109+
110+
void createHeaderMask(VPBasicBlock *HeaderVPBB, bool FoldTail) {
111+
if (!FoldTail) {
112+
RecipeBuilder.setBlockInMask(HeaderVPBB, nullptr);
113+
return;
114+
}
115+
116+
// Introduce the early-exit compare IV <= BTC to form header block mask.
117+
// This is used instead of IV < TC because TC may wrap, unlike BTC. Start by
118+
// constructing the desired canonical IV in the header block as its first
119+
// non-phi instructions.
120+
121+
auto NewInsertionPoint = HeaderVPBB->getFirstNonPhi();
122+
auto &Plan = *HeaderVPBB->getPlan();
123+
auto *IV = new VPWidenCanonicalIVRecipe(Plan.getCanonicalIV());
124+
HeaderVPBB->insert(IV, NewInsertionPoint);
125+
126+
VPBuilder::InsertPointGuard Guard(Builder);
127+
Builder.setInsertPoint(HeaderVPBB, NewInsertionPoint);
128+
VPValue *BlockMask = nullptr;
129+
VPValue *BTC = Plan.getOrCreateBackedgeTakenCount();
130+
BlockMask = Builder.createICmp(CmpInst::ICMP_ULE, IV, BTC);
131+
RecipeBuilder.setBlockInMask(HeaderVPBB, BlockMask);
132+
}
133+
134+
void createSwitchEdgeMasks(VPInstruction *SI) {
135+
VPBasicBlock *Src = SI->getParent();
136+
137+
// Create masks where the terminator in Src is a switch. We create mask for
138+
// all edges at the same time. This is more efficient, as we can create and
139+
// collect compares for all cases once.
140+
VPValue *Cond = SI->getOperand(0);
141+
VPBasicBlock *DefaultDst = cast<VPBasicBlock>(Src->getSuccessors()[0]);
142+
MapVector<VPBasicBlock *, SmallVector<VPValue *>> Dst2Compares;
143+
for (const auto &[Idx, Succ] :
144+
enumerate(ArrayRef(Src->getSuccessors()).drop_front())) {
145+
VPBasicBlock *Dst = cast<VPBasicBlock>(Succ);
146+
// assert(!EdgeMaskCache.contains({Src, Dst}) && "Edge masks already
147+
// created");
148+
// Cases whose destination is the same as default are redundant and can
149+
// be ignored - they will get there anyhow.
150+
if (Dst == DefaultDst)
151+
continue;
152+
auto &Compares = Dst2Compares[Dst];
153+
VPValue *V = SI->getOperand(Idx + 1);
154+
Compares.push_back(Builder.createICmp(CmpInst::ICMP_EQ, Cond, V));
155+
}
156+
157+
// We need to handle 2 separate cases below for all entries in Dst2Compares,
158+
// which excludes destinations matching the default destination.
159+
VPValue *SrcMask = RecipeBuilder.getBlockInMask(Src);
160+
VPValue *DefaultMask = nullptr;
161+
for (const auto &[Dst, Conds] : Dst2Compares) {
162+
// 1. Dst is not the default destination. Dst is reached if any of the
163+
// cases with destination == Dst are taken. Join the conditions for each
164+
// case whose destination == Dst using an OR.
165+
VPValue *Mask = Conds[0];
166+
for (VPValue *V : ArrayRef<VPValue *>(Conds).drop_front())
167+
Mask = Builder.createOr(Mask, V);
168+
if (SrcMask)
169+
Mask = Builder.createLogicalAnd(SrcMask, Mask);
170+
RecipeBuilder.setEdgeMask(Src, Dst, Mask);
171+
172+
// 2. Create the mask for the default destination, which is reached if
173+
// none of the cases with destination != default destination are taken.
174+
// Join the conditions for each case where the destination is != Dst using
175+
// an OR and negate it.
176+
DefaultMask = DefaultMask ? Builder.createOr(DefaultMask, Mask) : Mask;
177+
}
178+
179+
if (DefaultMask) {
180+
DefaultMask = Builder.createNot(DefaultMask);
181+
if (SrcMask)
182+
DefaultMask = Builder.createLogicalAnd(SrcMask, DefaultMask);
183+
}
184+
RecipeBuilder.setEdgeMask(Src, DefaultDst, DefaultMask);
185+
}
186+
};
187+
188+
void VPlanTransforms::predicateAndLinearize(VPlan &Plan, bool FoldTail,
189+
VPRecipeBuilder &RecipeBuilder) {
190+
VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
191+
// Scan the body of the loop in a topological order to visit each basic block
192+
// after having visited its predecessor basic blocks.
193+
VPBasicBlock *Header = LoopRegion->getEntryBasicBlock();
194+
ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<VPBlockBase *>> RPOT(
195+
Header);
196+
VPPredicator Predicator(RecipeBuilder);
197+
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT)) {
198+
if (VPBB == Header) {
199+
Predicator.createHeaderMask(Header, FoldTail);
200+
continue;
201+
}
202+
203+
SmallVector<VPWidenPHIRecipe *> Phis;
204+
for (VPRecipeBase &R : VPBB->phis())
205+
Phis.push_back(cast<VPWidenPHIRecipe>(&R));
206+
207+
Predicator.createBlockInMask(VPBB);
208+
209+
for (VPWidenPHIRecipe *Phi : Phis) {
210+
PHINode *IRPhi = cast<PHINode>(Phi->getUnderlyingValue());
211+
212+
unsigned NumIncoming = IRPhi->getNumIncomingValues();
213+
214+
// We know that all PHIs in non-header blocks are converted into selects,
215+
// so we don't have to worry about the insertion order and we can just use
216+
// the builder. At this point we generate the predication tree. There may
217+
// be duplications since this is a simple recursive scan, but future
218+
// optimizations will clean it up.
219+
220+
// Map incoming IR BasicBlocks to incoming VPValues, for lookup below.
221+
// TODO: Add operands and masks in order from the VPlan predecessors.
222+
DenseMap<BasicBlock *, VPValue *> VPIncomingValues;
223+
DenseMap<BasicBlock *, VPBasicBlock *> VPIncomingBlocks;
224+
for (const auto &[Idx, Pred] :
225+
enumerate(predecessors(IRPhi->getParent()))) {
226+
VPIncomingValues[Pred] = Phi->getOperand(Idx);
227+
VPIncomingBlocks[Pred] =
228+
cast<VPBasicBlock>(VPBB->getPredecessors()[Idx]);
229+
}
230+
231+
SmallVector<VPValue *, 2> OperandsWithMask;
232+
for (unsigned In = 0; In < NumIncoming; In++) {
233+
BasicBlock *Pred = IRPhi->getIncomingBlock(In);
234+
OperandsWithMask.push_back(VPIncomingValues.lookup(Pred));
235+
VPValue *EdgeMask =
236+
RecipeBuilder.getEdgeMask(VPIncomingBlocks.lookup(Pred), VPBB);
237+
if (!EdgeMask) {
238+
assert(In == 0 && "Both null and non-null edge masks found");
239+
assert(all_equal(Phi->operands()) &&
240+
"Distinct incoming values with one having a full mask");
241+
break;
242+
}
243+
OperandsWithMask.push_back(EdgeMask);
244+
}
245+
auto *Blend = new VPBlendRecipe(IRPhi, OperandsWithMask);
246+
Blend->insertBefore(Phi);
247+
Phi->replaceAllUsesWith(Blend);
248+
Phi->eraseFromParent();
249+
RecipeBuilder.setRecipe(IRPhi, Blend);
250+
}
251+
}
252+
}

0 commit comments

Comments
 (0)