Skip to content

Commit

Permalink
[VPlan] Introduce scalar loop header in plan, remove VPLiveOut.
Browse files Browse the repository at this point in the history
Update VPlan to include the scalar loop header. This allows retiring
VPLiveOut, as the remaining live-outs can now be handled by adding
operands to the wrapped phis in the scalar loop header.

Note that the current version only includes the scalar loop header, no
other loop blocks and also does not wrap it in a region block. This can
either be included in this PR or in follow-ups as needed.
  • Loading branch information
fhahn committed Sep 26, 2024
1 parent fbec1c2 commit d0e7dae
Show file tree
Hide file tree
Showing 8 changed files with 53 additions and 132 deletions.
13 changes: 8 additions & 5 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2952,10 +2952,6 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State,
IVEndValues[Entry.first], LoopMiddleBlock, Plan, State);
}

// Fix live-out phis not already fixed earlier.
for (const auto &KV : Plan.getLiveOuts())
KV.second->fixPhi(Plan, State);

for (Instruction *PI : PredicatedInstructions)
sinkScalarOperands(&*PI);

Expand Down Expand Up @@ -8818,7 +8814,14 @@ static void addLiveOutsForFirstOrderRecurrences(
VPInstruction::ResumePhi, {Resume, FOR->getStartValue()}, {},
"scalar.recur.init");
auto *FORPhi = cast<PHINode>(FOR->getUnderlyingInstr());
Plan.addLiveOut(FORPhi, ResumePhiRecipe);
for (VPRecipeBase &R :
*cast<VPIRBasicBlock>(ScalarPHVPBB->getSingleSuccessor())) {
auto *IRI = cast<VPIRInstruction>(&R);
if (&IRI->getInstruction() == FORPhi) {
IRI->addOperand(ResumePhiRecipe);
break;
}
}

// Now update VPIRInstructions modeling LCSSA phis in the exit block.
// Extract the penultimate value of the recurrence and use it as operand for
Expand Down
38 changes: 15 additions & 23 deletions llvm/lib/Transforms/Vectorize/VPlan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -456,10 +456,17 @@ void VPIRBasicBlock::execute(VPTransformState *State) {
State->Builder.SetInsertPoint(getIRBasicBlock()->getTerminator());
executeRecipes(State, getIRBasicBlock());
if (getSingleSuccessor()) {
assert(isa<UnreachableInst>(getIRBasicBlock()->getTerminator()));
auto *Br = State->Builder.CreateBr(getIRBasicBlock());
Br->setOperand(0, nullptr);
getIRBasicBlock()->getTerminator()->eraseFromParent();
auto *SuccVPIRBB = dyn_cast<VPIRBasicBlock>(getSingleSuccessor());
if (SuccVPIRBB && SuccVPIRBB->getIRBasicBlock() ==
getIRBasicBlock()->getSingleSuccessor()) {
cast<BranchInst>(getIRBasicBlock()->getTerminator())
->setOperand(0, nullptr);
} else {
assert(isa<UnreachableInst>(getIRBasicBlock()->getTerminator()));
auto *Br = State->Builder.CreateBr(getIRBasicBlock());
Br->setOperand(0, nullptr);
getIRBasicBlock()->getTerminator()->eraseFromParent();
}
}

for (VPBlockBase *PredVPBlock : getHierarchicalPredecessors()) {
Expand Down Expand Up @@ -843,10 +850,6 @@ void VPRegionBlock::print(raw_ostream &O, const Twine &Indent,
#endif

VPlan::~VPlan() {
for (auto &KV : LiveOuts)
delete KV.second;
LiveOuts.clear();

if (Entry) {
VPValue DummyValue;
for (VPBlockBase *Block : vp_depth_first_shallow(Entry))
Expand Down Expand Up @@ -901,6 +904,8 @@ VPlanPtr VPlan::createInitialVPlan(Type *InductionTy,
VPBlockUtils::insertBlockAfter(MiddleVPBB, TopRegion);

VPBasicBlock *ScalarPH = new VPBasicBlock("scalar.ph");
VPBasicBlock *ScalarHeader = createVPIRBasicBlockFor(TheLoop->getHeader());
VPBlockUtils::connectBlocks(ScalarPH, ScalarHeader);
if (!RequiresScalarEpilogueCheck) {
VPBlockUtils::connectBlocks(MiddleVPBB, ScalarPH);
return Plan;
Expand Down Expand Up @@ -1050,6 +1055,8 @@ void VPlan::execute(VPTransformState *State) {
BrInst->insertBefore(MiddleBB->getTerminator());
MiddleBB->getTerminator()->eraseFromParent();
State->CFG.DTU.applyUpdates({{DominatorTree::Delete, MiddleBB, ScalarPh}});
State->CFG.DTU.applyUpdates(
{{DominatorTree::Delete, ScalarPh, ScalarPh->getSingleSuccessor()}});

// Generate code in the loop pre-header and body.
for (VPBlockBase *Block : vp_depth_first_shallow(Entry))
Expand Down Expand Up @@ -1168,12 +1175,6 @@ void VPlan::print(raw_ostream &O) const {
Block->print(O, "", SlotTracker);
}

if (!LiveOuts.empty())
O << "\n";
for (const auto &KV : LiveOuts) {
KV.second->print(O, SlotTracker);
}

O << "}\n";
}

Expand Down Expand Up @@ -1210,11 +1211,6 @@ LLVM_DUMP_METHOD
void VPlan::dump() const { print(dbgs()); }
#endif

void VPlan::addLiveOut(PHINode *PN, VPValue *V) {
assert(LiveOuts.count(PN) == 0 && "an exit value for PN already exists");
LiveOuts.insert({PN, new VPLiveOut(PN, V)});
}

static void remapOperands(VPBlockBase *Entry, VPBlockBase *NewEntry,
DenseMap<VPValue *, VPValue *> &Old2NewVPValues) {
// Update the operands of all cloned recipes starting at NewEntry. This
Expand Down Expand Up @@ -1282,10 +1278,6 @@ VPlan *VPlan::duplicate() {
remapOperands(Preheader, NewPreheader, Old2NewVPValues);
remapOperands(Entry, NewEntry, Old2NewVPValues);

// Clone live-outs.
for (const auto &[_, LO] : LiveOuts)
NewPlan->addLiveOut(LO->getPhi(), Old2NewVPValues[LO->getOperand(0)]);

// Initialize remaining fields of cloned VPlan.
NewPlan->VFs = VFs;
NewPlan->UFs = UFs;
Expand Down
53 changes: 0 additions & 53 deletions llvm/lib/Transforms/Vectorize/VPlan.h
Original file line number Diff line number Diff line change
Expand Up @@ -655,48 +655,6 @@ class VPBlockBase {
virtual VPBlockBase *clone() = 0;
};

/// A value that is used outside the VPlan. The operand of the user needs to be
/// added to the associated phi node. The incoming block from VPlan is
/// determined by where the VPValue is defined: if it is defined by a recipe
/// outside a region, its parent block is used, otherwise the middle block is
/// used.
class VPLiveOut : public VPUser {
PHINode *Phi;

public:
VPLiveOut(PHINode *Phi, VPValue *Op)
: VPUser({Op}, VPUser::VPUserID::LiveOut), Phi(Phi) {}

static inline bool classof(const VPUser *U) {
return U->getVPUserID() == VPUser::VPUserID::LiveOut;
}

/// Fix the wrapped phi node. This means adding an incoming value to exit
/// block phi's from the vector loop via middle block (values from scalar loop
/// already reach these phi's), and updating the value to scalar header phi's
/// from the scalar preheader.
void fixPhi(VPlan &Plan, VPTransformState &State);

/// Returns true if the VPLiveOut uses scalars of operand \p Op.
bool usesScalars(const VPValue *Op) const override {
assert(is_contained(operands(), Op) &&
"Op must be an operand of the recipe");
return true;
}

PHINode *getPhi() const { return Phi; }

/// Live-outs are marked as only using the first part during the transition
/// to unrolling directly on VPlan.
/// TODO: Remove after unroller transition.
bool onlyFirstPartUsed(const VPValue *Op) const override { return true; }

#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// Print the VPLiveOut to \p O.
void print(raw_ostream &O, VPSlotTracker &SlotTracker) const;
#endif
};

/// Struct to hold various analysis needed for cost computations.
struct VPCostContext {
const TargetTransformInfo &TTI;
Expand Down Expand Up @@ -3446,11 +3404,6 @@ class VPlan {
/// definitions are VPValues that hold a pointer to their underlying IR.
SmallVector<VPValue *, 16> VPLiveInsToFree;

/// Values used outside the plan. It contains live-outs that need fixing. Any
/// live-out that is fixed outside VPlan needs to be removed. The remaining
/// live-outs are fixed via VPLiveOut::fixPhi.
MapVector<PHINode *, VPLiveOut *> LiveOuts;

/// Mapping from SCEVs to the VPValues representing their expansions.
/// NOTE: This mapping is temporary and will be removed once all users have
/// been modeled in VPlan directly.
Expand Down Expand Up @@ -3630,12 +3583,6 @@ class VPlan {
return cast<VPCanonicalIVPHIRecipe>(&*EntryVPBB->begin());
}

void addLiveOut(PHINode *PN, VPValue *V);

const MapVector<PHINode *, VPLiveOut *> &getLiveOuts() const {
return LiveOuts;
}

VPValue *getSCEVExpansion(const SCEV *S) const {
return SCEVToExpansion.lookup(S);
}
Expand Down
34 changes: 4 additions & 30 deletions llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -197,35 +197,6 @@ bool VPRecipeBase::mayHaveSideEffects() const {
}
}

void VPLiveOut::fixPhi(VPlan &Plan, VPTransformState &State) {
VPValue *ExitValue = getOperand(0);
VPBasicBlock *MiddleVPBB =
cast<VPBasicBlock>(Plan.getVectorLoopRegion()->getSingleSuccessor());
VPRecipeBase *ExitingRecipe = ExitValue->getDefiningRecipe();
auto *ExitingVPBB = ExitingRecipe ? ExitingRecipe->getParent() : nullptr;
// Values leaving the vector loop reach live out phi's in the exiting block
// via middle block.
auto *PredVPBB = !ExitingVPBB || ExitingVPBB->getEnclosingLoopRegion()
? MiddleVPBB
: ExitingVPBB;
BasicBlock *PredBB = State.CFG.VPBB2IRBB[PredVPBB];
Value *V = State.get(ExitValue, VPLane(0));
if (Phi->getBasicBlockIndex(PredBB) != -1)
Phi->setIncomingValueForBlock(PredBB, V);
else
Phi->addIncoming(V, PredBB);
}

#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void VPLiveOut::print(raw_ostream &O, VPSlotTracker &SlotTracker) const {
O << "Live-out ";
getPhi()->printAsOperand(O);
O << " = ";
getOperand(0)->printAsOperand(O, SlotTracker);
O << "\n";
}
#endif

void VPRecipeBase::insertBefore(VPRecipeBase *InsertPos) {
assert(!Parent && "Recipe already in some VPBasicBlock");
assert(InsertPos->getParent() &&
Expand Down Expand Up @@ -858,7 +829,10 @@ void VPIRInstruction::execute(VPTransformState &State) {
State.Builder.SetInsertPoint(PredBB, PredBB->getFirstNonPHIIt());
Value *V = State.get(ExitValue, VPLane(Lane));
auto *Phi = cast<PHINode>(&I);
Phi->addIncoming(V, PredBB);
if (Phi->getBasicBlockIndex(PredBB) == -1)
Phi->addIncoming(V, PredBB);
else
Phi->setIncomingValueForBlock(PredBB, V);
}

// Advance the insert point after the wrapped IR instruction. This allows
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -378,7 +378,7 @@ static bool mergeBlocksIntoPredecessors(VPlan &Plan) {
// Don't fold the exit block of the Plan into its single predecessor for
// now.
// TODO: Remove restriction once more of the skeleton is modeled in VPlan.
if (VPBB->getNumSuccessors() == 0 && !VPBB->getParent())
if (!VPBB->getParent())
continue;
auto *PredVPBB =
dyn_cast_or_null<VPBasicBlock>(VPBB->getSinglePredecessor());
Expand Down
13 changes: 7 additions & 6 deletions llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -264,6 +264,13 @@ void UnrollState::unrollRecipeByUF(VPRecipeBase &R) {
return;

if (auto *VPI = dyn_cast<VPInstruction>(&R)) {
VPValue *Op0, *Op1;
if (match(VPI, m_VPInstruction<VPInstruction::ResumePhi>(m_VPValue(Op0),
m_VPValue(Op1)))) {
addUniformForAllParts(VPI);
return;
}

if (vputils::onlyFirstPartUsed(VPI)) {
addUniformForAllParts(VPI);
return;
Expand Down Expand Up @@ -449,11 +456,5 @@ void VPlanTransforms::unrollByUF(VPlan &Plan, unsigned UF, LLVMContext &Ctx) {
Part++;
}

// Remap the operand of live-outs to the last part.
for (const auto &[_, LO] : Plan.getLiveOuts()) {
VPValue *In = Unroller.getValueForPart(LO->getOperand(0), UF - 1);
LO->setOperand(0, In);
}

VPlanTransforms::removeDeadRecipes(Plan);
}
14 changes: 0 additions & 14 deletions llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -244,14 +244,6 @@ bool VPlanVerifier::verifyVPBasicBlock(const VPBasicBlock *VPBB) {
return false;
}

VPBlockBase *MiddleBB =
IRBB->getPlan()->getVectorLoopRegion()->getSingleSuccessor();
if (IRBB != IRBB->getPlan()->getPreheader() &&
IRBB->getSinglePredecessor() != MiddleBB) {
errs() << "VPIRBasicBlock can only be used as pre-header or a successor of "
"middle-block at the moment!\n";
return false;
}
return true;
}

Expand Down Expand Up @@ -416,12 +408,6 @@ bool VPlanVerifier::verify(const VPlan &Plan) {
return false;
}

for (const auto &KV : Plan.getLiveOuts())
if (KV.second->getNumOperands() != 1) {
errs() << "live outs must have a single operand\n";
return false;
}

return true;
}

Expand Down
18 changes: 18 additions & 0 deletions llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1077,6 +1077,17 @@ define void @merge_with_dead_gep_between_regions(i32 %n, ptr noalias %src, ptr n
; CHECK-NEXT: No successors
; CHECK-EMPTY:
; CHECK-NEXT: scalar.ph
; CHECK-NEXT: Successor(s): ir-bb<loop>
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<loop>:
; CHECK-NEXT: IR %iv = phi i32 [ %n, %entry ], [ %iv.next, %loop ]
; CHECK-NEXT: IR %iv.next = add nsw i32 %iv, -1
; CHECK-NEXT: IR %gep.src = getelementptr inbounds i32, ptr %src, i32 %iv
; CHECK-NEXT: IR %l = load i32, ptr %gep.src, align 16
; CHECK-NEXT: IR %dead_gep = getelementptr inbounds i32, ptr %dst, i64 1
; CHECK-NEXT: IR %gep.dst = getelementptr inbounds i32, ptr %dst, i32 %iv
; CHECK-NEXT: IR store i32 %l, ptr %gep.dst, align 16
; CHECK-NEXT: IR %ec = icmp eq i32 %iv.next, 0
; CHECK-NEXT: No successors
; CHECK-NEXT: }
;
Expand Down Expand Up @@ -1156,6 +1167,13 @@ define void @ptr_induction_remove_dead_recipe(ptr %start, ptr %end) {
; CHECK-NEXT: No successors
; CHECK-EMPTY:
; CHECK-NEXT: scalar.ph:
; CHECK-NEXT: Successor(s): ir-bb<loop.header>
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<loop.header>:
; CHECK-NEXT: IR %ptr.iv = phi ptr [ %start, %entry ], [ %ptr.iv.next, %loop.latch ]
; CHECK-NEXT: IR %ptr.iv.next = getelementptr inbounds i8, ptr %ptr.iv, i64 -1
; CHECK-NEXT: IR %l = load i8, ptr %ptr.iv.next, align 1
; CHECK-NEXT: IR %c.1 = icmp eq i8 %l, 0
; CHECK-NEXT: No successors
; CHECK-NEXT: }
;
Expand Down

0 comments on commit d0e7dae

Please sign in to comment.