Skip to content

Commit

Permalink
[VPlan] Update final exit value via VPlan.
Browse files Browse the repository at this point in the history
Model updating IV users directly in VPlan, replace fixupIVUsers.

Depends on llvm#110004,
llvm#109975 and
llvm#112145.
  • Loading branch information
fhahn committed Oct 13, 2024
1 parent 09dcbf7 commit 009b77f
Show file tree
Hide file tree
Showing 3 changed files with 67 additions and 118 deletions.
159 changes: 52 additions & 107 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -779,10 +779,6 @@ class EpilogueVectorizerMainLoop : public InnerLoopAndEpilogueVectorizer {
BasicBlock *emitIterationCountCheck(BasicBlock *Bypass, bool ForEpilogue);
void printDebugTracesAtStart() override;
void printDebugTracesAtEnd() override;

void fixupIVUsers(PHINode *OrigPhi, const InductionDescriptor &II,
Value *VectorTripCount, BasicBlock *MiddleBlock,
VPlan &Plan, VPTransformState &State) override {};
};

// A specialized derived class of inner loop vectorizer that performs
Expand Down Expand Up @@ -2697,87 +2693,6 @@ InnerLoopVectorizer::createVectorizedLoopSkeleton(
return {LoopVectorPreHeader, nullptr};
}

// Fix up external users of the induction variable. At this point, we are
// in LCSSA form, with all external PHIs that use the IV having one input value,
// coming from the remainder loop. We need those PHIs to also have a correct
// value for the IV when arriving directly from the middle block.
void InnerLoopVectorizer::fixupIVUsers(PHINode *OrigPhi,
const InductionDescriptor &II,
Value *VectorTripCount,
BasicBlock *MiddleBlock, VPlan &Plan,
VPTransformState &State) {
// There are two kinds of external IV usages - those that use the value
// computed in the last iteration (the PHI) and those that use the penultimate
// value (the value that feeds into the phi from the loop latch).
// We allow both, but they, obviously, have different values.

assert(OrigLoop->getUniqueExitBlock() && "Expected a single exit block");

DenseMap<Value *, Value *> MissingVals;

Value *EndValue = cast<PHINode>(OrigPhi->getIncomingValueForBlock(
OrigLoop->getLoopPreheader()))
->getIncomingValueForBlock(MiddleBlock);

// An external user of the last iteration's value should see the value that
// the remainder loop uses to initialize its own IV.
Value *PostInc = OrigPhi->getIncomingValueForBlock(OrigLoop->getLoopLatch());
for (User *U : PostInc->users()) {
Instruction *UI = cast<Instruction>(U);
if (!OrigLoop->contains(UI)) {
assert(isa<PHINode>(UI) && "Expected LCSSA form");
MissingVals[UI] = EndValue;
}
}

// An external user of the penultimate value need to see EndValue - Step.
// The simplest way to get this is to recompute it from the constituent SCEVs,
// that is Start + (Step * (CRD - 1)).
for (User *U : OrigPhi->users()) {
auto *UI = cast<Instruction>(U);
if (!OrigLoop->contains(UI)) {
assert(isa<PHINode>(UI) && "Expected LCSSA form");
IRBuilder<> B(MiddleBlock->getTerminator());

// Fast-math-flags propagate from the original induction instruction.
if (isa_and_nonnull<FPMathOperator>(II.getInductionBinOp()))
B.setFastMathFlags(II.getInductionBinOp()->getFastMathFlags());

VPValue *StepVPV = Plan.getSCEVExpansion(II.getStep());
assert(StepVPV && "step must have been expanded during VPlan execution");
Value *Step = StepVPV->isLiveIn() ? StepVPV->getLiveInIRValue()
: State.get(StepVPV, VPLane(0));
Value *Escape = nullptr;
if (EndValue->getType()->isIntegerTy())
Escape = B.CreateSub(EndValue, Step);
else if (EndValue->getType()->isPointerTy())
Escape = B.CreatePtrAdd(EndValue, B.CreateNeg(Step));
else if (EndValue->getType()->isFloatingPointTy()) {
Escape = B.CreateBinOp(II.getInductionBinOp()->getOpcode() ==
Instruction::FAdd
? Instruction::FSub
: Instruction::FAdd,
EndValue, Step);
} else {
llvm_unreachable("all possible induction types must be handled");
}
Escape->setName("ind.escape");
MissingVals[UI] = Escape;
}
}

for (auto &I : MissingVals) {
PHINode *PHI = cast<PHINode>(I.first);
// One corner case we have to handle is two IVs "chasing" each-other,
// that is %IV2 = phi [...], [ %IV1, %latch ]
// In this case, if IV1 has an external use, we need to avoid adding both
// "last value of IV1" and "penultimate value of IV2". So, verify that we
// don't already have an incoming value for the middle block.
if (PHI->getBasicBlockIndex(MiddleBlock) == -1)
PHI->addIncoming(I.second, MiddleBlock);
}
}

namespace {

struct CSEDenseMapInfo {
Expand Down Expand Up @@ -2907,25 +2822,6 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State,
for (PHINode &PN : Exit->phis())
PSE.getSE()->forgetLcssaPhiWithNewPredecessor(OrigLoop, &PN);

if (Cost->requiresScalarEpilogue(VF.isVector())) {
// No edge from the middle block to the unique exit block has been inserted
// and there is nothing to fix from vector loop; phis should have incoming
// from scalar loop only.
} else {
// TODO: Check VPLiveOuts to see if IV users need fixing instead of checking
// the cost model.

// If we inserted an edge from the middle block to the unique exit block,
// update uses outside the loop (phis) to account for the newly inserted
// edge.

// Fix-up external users of the induction variables.
for (const auto &Entry : Legal->getInductionVars())
fixupIVUsers(Entry.first, Entry.second,
getOrCreateVectorTripCount(nullptr), LoopMiddleBlock, Plan,
State);
}

for (Instruction *PI : PredicatedInstructions)
sinkScalarOperands(&*PI);

Expand Down Expand Up @@ -8821,7 +8717,7 @@ addUsersInExitBlock(VPlan &Plan,
}
}

static void addResumeValuesForInductions(VPlan &Plan) {
static void addResumeValuesForInductions(VPlan &Plan, Loop *OrigLoop) {
VPTypeAnalysis TypeInfo(Plan.getCanonicalIV()->getScalarType());
VPBasicBlock *Header = Plan.getVectorLoopRegion()->getEntryBasicBlock();

Expand Down Expand Up @@ -8870,9 +8766,11 @@ static void addResumeValuesForInductions(VPlan &Plan) {
cast<VPBasicBlock>(Plan.getVectorLoopRegion()->getSingleSuccessor());

VPBasicBlock *ScalarPHVPBB = nullptr;
VPBasicBlock *ExitVPBB = nullptr;
if (MiddleVPBB->getNumSuccessors() == 2) {
// Order is strict: first is the exit block, second is the scalar
// preheader.
ExitVPBB = cast<VPBasicBlock>(MiddleVPBB->getSuccessors()[0]);
ScalarPHVPBB = cast<VPBasicBlock>(MiddleVPBB->getSuccessors()[1]);
} else {
ScalarPHVPBB = cast<VPBasicBlock>(MiddleVPBB->getSingleSuccessor());
Expand All @@ -8886,6 +8784,53 @@ static void addResumeValuesForInductions(VPlan &Plan) {
auto *ScalarLoopHeader =
cast<VPIRBasicBlock>(ScalarPHVPBB->getSingleSuccessor());
addOperandToPhiInVPIRBasicBlock(ScalarLoopHeader, OrigPhi, ResumePhiRecipe);

if (ExitVPBB) {

Value *PostInc =
OrigPhi->getIncomingValueForBlock(OrigLoop->getLoopLatch());
for (auto &R : *ExitVPBB) {
auto *VPIRInst = cast<VPIRInstruction>(&R);
auto *IRI = &VPIRInst->getInstruction();
if (!isa<PHINode>(IRI))
break;
// Skip phi nodes already updated. This can be the case if 2 induction
// phis chase each other.
if (VPIRInst->getNumOperands() == 1)
continue;
if (any_of(IRI->operands(),
[PostInc](Value *Op) { return Op == PostInc; })) {
VPIRInst->addOperand(EndValue);
continue;
}

if (any_of(IRI->operands(),
[OrigPhi](Value *Op) { return Op == OrigPhi; })) {
VPBuilder B(MiddleVPBB->getTerminator());
VPValue *Escape = nullptr;
if (ScalarTy->isIntegerTy())
Escape = B.createNaryOp(Instruction::Sub, {EndValue, Step});
else if (ScalarTy->isPointerTy())
Escape = B.createPtrAdd(
EndValue,
B.createNaryOp(
Instruction::Xor,
{Step, Plan.getOrAddLiveIn(ConstantInt::get(
Step->getLiveInIRValue()->getType(), -1))}));
else if (ScalarTy->isFloatingPointTy()) {
Escape = B.createNaryOp(
ID->getInductionBinOp()->getOpcode() == Instruction::FAdd
? Instruction::FSub
: Instruction::FAdd,
{EndValue, Step},
{ID->getInductionBinOp()->getFastMathFlags()});
} else {
llvm_unreachable("all possible induction types must be handled");
}
VPIRInst->addOperand(Escape);
}
}
}
}
}

Expand Down Expand Up @@ -9199,7 +9144,7 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
OrigLoop, RecipeBuilder, *Plan, Legal->getInductionVars());
addLiveOutsForFirstOrderRecurrences(*Plan, ExitUsersToFix);
addUsersInExitBlock(*Plan, ExitUsersToFix);
addResumeValuesForInductions(*Plan);
addResumeValuesForInductions(*Plan, OrigLoop);

// ---------------------------------------------------------------------------
// Transform initial VPlan: Apply previously taken decisions, in order, to
Expand Down Expand Up @@ -9305,7 +9250,7 @@ VPlanPtr LoopVectorizationPlanner::buildVPlan(VFRange &Range) {
bool HasNUW = true;
addCanonicalIVRecipes(*Plan, Legal->getWidestInductionType(), HasNUW,
DebugLoc());
addResumeValuesForInductions(*Plan);
addResumeValuesForInductions(*Plan, OrigLoop);
assert(verifyVPlanIsValid(*Plan) && "VPlan is invalid");
return Plan;
}
Expand Down
24 changes: 14 additions & 10 deletions llvm/lib/Transforms/Vectorize/VPlan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -319,16 +319,20 @@ Value *VPTransformState::get(VPValue *Def, bool NeedsScalar) {
LastLane = 0;
}

auto *LastInst = cast<Instruction>(get(Def, LastLane));
// Set the insert point after the last scalarized instruction or after the
// last PHI, if LastInst is a PHI. This ensures the insertelement sequence
// will directly follow the scalar definitions.
auto *LastDef = get(Def, LastLane);
auto OldIP = Builder.saveIP();
auto NewIP =
isa<PHINode>(LastInst)
? BasicBlock::iterator(LastInst->getParent()->getFirstNonPHI())
: std::next(BasicBlock::iterator(LastInst));
Builder.SetInsertPoint(&*NewIP);
if (auto *LastInst = dyn_cast<Instruction>(LastDef)) {
// TODO: Remove once VPDerivedIVReicpe can be simplified, which requires
// vector trip count being modeled in VPlan.
// Set the insert point after the last scalarized instruction or after the
// last PHI, if LastInst is a PHI. This ensures the insertelement sequence
// will directly follow the scalar definitions.
auto NewIP =
isa<PHINode>(LastInst)
? BasicBlock::iterator(LastInst->getParent()->getFirstNonPHI())
: std::next(BasicBlock::iterator(LastInst));
Builder.SetInsertPoint(&*NewIP);
}

// However, if we are vectorizing, we need to construct the vector values.
// If the value is known to be uniform after vectorization, we can just
Expand All @@ -343,7 +347,7 @@ Value *VPTransformState::get(VPValue *Def, bool NeedsScalar) {
} else {
// Initialize packing with insertelements to start from undef.
assert(!VF.isScalable() && "VF is assumed to be non scalable.");
Value *Undef = PoisonValue::get(VectorType::get(LastInst->getType(), VF));
Value *Undef = PoisonValue::get(VectorType::get(LastDef->getType(), VF));
set(Def, Undef);
for (unsigned Lane = 0; Lane < VF.getKnownMinValue(); ++Lane)
packScalarIntoVectorValue(Def, Lane);
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -830,7 +830,7 @@ void VPIRInstruction::execute(VPTransformState &State) {
BasicBlock *PredBB = State.CFG.VPBB2IRBB[PredVPBB];
// Set insertion point in PredBB in case an extract needs to be generated.
// TODO: Model extracts explicitly.
State.Builder.SetInsertPoint(PredBB, PredBB->getFirstNonPHIIt());
State.Builder.SetInsertPoint(&*PredBB->getTerminator());
Value *V = State.get(ExitValue, VPLane(Lane));
auto *Phi = cast<PHINode>(&I);
if (Phi->getBasicBlockIndex(PredBB) == -1)
Expand Down

0 comments on commit 009b77f

Please sign in to comment.