-
Notifications
You must be signed in to change notification settings - Fork 13.5k
[VPlan] Simplify branch to scalar ph in VPlan transform. (NFC) #140409
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
Update initial construction to connect the Plan's entry to the scalar preheader during initial construction. This moves a small part of the skeleton creation out of ILV and will also enable replacing VPInstruction::ResumePhi with regular VPPhi recipes. Resume phis need 2 incoming values to start with, the second being the bypass value from the scalar ph (and used to replicate the incoming value for other bypass blocks). Adding the extra edge ensures we incoming values for resume phis match the incoming blocks. Still needs various VPlan printing tests to be updated. vplan-printing.ll shows the changes.
Use regular VPPhi instead of a separate opcode for resume phis. This removes an unneeded specialized opcode and unifies the code (verification, printing, updating when CFG is changed). Depends on llvm#140132.
Simplify branch on false, starting with the branch from the middle block to the scalar preheader. Depends on llvm#140405.
@llvm/pr-subscribers-vectorizers @llvm/pr-subscribers-backend-powerpc Author: Florian Hahn (fhahn) ChangesSimplify branch on false, starting with the branch from the middle block Depends on #140405. Patch is 101.60 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/140409.diff 32 Files Affected:
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 148b187f4f25d..6c3a2ce941c54 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -490,7 +490,7 @@ class InnerLoopVectorizer {
MinProfitableTripCount(MinProfitableTripCount), UF(UnrollFactor),
Builder(PSE.getSE()->getContext()), Cost(CM), BFI(BFI), PSI(PSI),
RTChecks(RTChecks), Plan(Plan),
- VectorPHVPB(Plan.getEntry()->getSingleSuccessor()) {}
+ VectorPHVPB(Plan.getEntry()->getSuccessors()[1]) {}
virtual ~InnerLoopVectorizer() = default;
@@ -2365,22 +2365,19 @@ InnerLoopVectorizer::getOrCreateVectorTripCount(BasicBlock *InsertBlock) {
void InnerLoopVectorizer::introduceCheckBlockInVPlan(BasicBlock *CheckIRBB) {
VPBlockBase *ScalarPH = Plan.getScalarPreheader();
VPBlockBase *PreVectorPH = VectorPHVPB->getSinglePredecessor();
- if (PreVectorPH->getNumSuccessors() != 1) {
- assert(PreVectorPH->getNumSuccessors() == 2 && "Expected 2 successors");
- assert(PreVectorPH->getSuccessors()[0] == ScalarPH &&
- "Unexpected successor");
- VPIRBasicBlock *CheckVPIRBB = Plan.createVPIRBasicBlock(CheckIRBB);
- VPBlockUtils::insertOnEdge(PreVectorPH, VectorPHVPB, CheckVPIRBB);
- PreVectorPH = CheckVPIRBB;
- }
+ assert(PreVectorPH->getNumSuccessors() == 2 && "Expected 2 successors");
+ assert(PreVectorPH->getSuccessors()[0] == ScalarPH && "Unexpected successor");
+ VPIRBasicBlock *CheckVPIRBB = Plan.createVPIRBasicBlock(CheckIRBB);
+ VPBlockUtils::insertOnEdge(PreVectorPH, VectorPHVPB, CheckVPIRBB);
+ PreVectorPH = CheckVPIRBB;
VPBlockUtils::connectBlocks(PreVectorPH, ScalarPH);
PreVectorPH->swapSuccessors();
// We just connected a new block to the scalar preheader. Update all
// ResumePhis by adding an incoming value for it, replicating the last value.
for (VPRecipeBase &R : *cast<VPBasicBlock>(ScalarPH)) {
- auto *ResumePhi = dyn_cast<VPInstruction>(&R);
- if (!ResumePhi || ResumePhi->getOpcode() != VPInstruction::ResumePhi)
+ auto *ResumePhi = cast<VPPhi>(&R);
+ if (ResumePhi->getNumIncoming() == ScalarPH->getNumPredecessors())
continue;
ResumePhi->addOperand(
ResumePhi->getOperand(ResumePhi->getNumOperands() - 1));
@@ -2463,9 +2460,6 @@ void InnerLoopVectorizer::emitIterationCountCheck(BasicBlock *Bypass) {
setBranchWeights(BI, MinItersBypassWeights, /*IsExpected=*/false);
ReplaceInstWithInst(TCCheckBlock->getTerminator(), &BI);
LoopBypassBlocks.push_back(TCCheckBlock);
-
- // TODO: Wrap LoopVectorPreHeader in VPIRBasicBlock here.
- introduceCheckBlockInVPlan(TCCheckBlock);
}
BasicBlock *InnerLoopVectorizer::emitSCEVChecks(BasicBlock *Bypass) {
@@ -2530,7 +2524,8 @@ BasicBlock *InnerLoopVectorizer::emitMemRuntimeChecks(BasicBlock *Bypass) {
static void replaceVPBBWithIRVPBB(VPBasicBlock *VPBB, BasicBlock *IRBB) {
VPIRBasicBlock *IRVPBB = VPBB->getPlan()->createVPIRBasicBlock(IRBB);
for (auto &R : make_early_inc_range(*VPBB)) {
- assert(!R.isPhi() && "Tried to move phi recipe to end of block");
+ assert((IRVPBB->empty() || IRVPBB->back().isPhi() || !R.isPhi()) &&
+ "Tried to move phi recipe to end of block");
R.moveBefore(*IRVPBB, IRVPBB->end());
}
@@ -7768,10 +7763,7 @@ static void fixReductionScalarResumeWhenVectorizingEpilog(
// created a bc.merge.rdx Phi after the main vector body. Ensure that we carry
// over the incoming values correctly.
using namespace VPlanPatternMatch;
- auto IsResumePhi = [](VPUser *U) {
- auto *VPI = dyn_cast<VPInstruction>(U);
- return VPI && VPI->getOpcode() == VPInstruction::ResumePhi;
- };
+ auto IsResumePhi = [](VPUser *U) { return isa<VPPhi>(U); };
assert(count_if(EpiRedResult->users(), IsResumePhi) == 1 &&
"ResumePhi must have a single user");
auto *EpiResumePhiVPI =
@@ -7837,7 +7829,7 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
// 1. Set up the skeleton for vectorization, including vector pre-header and
// middle block. The vector loop is created during VPlan execution.
- VPBasicBlock *VectorPH = cast<VPBasicBlock>(Entry->getSingleSuccessor());
+ VPBasicBlock *VectorPH = cast<VPBasicBlock>(Entry->getSuccessors()[1]);
State.CFG.PrevBB = ILV.createVectorizedLoopSkeleton();
if (VectorizingEpilogue)
VPlanTransforms::removeDeadRecipes(BestVPlan);
@@ -8070,7 +8062,8 @@ EpilogueVectorizerMainLoop::emitIterationCountCheck(BasicBlock *Bypass,
setBranchWeights(BI, MinItersBypassWeights, /*IsExpected=*/false);
ReplaceInstWithInst(TCCheckBlock->getTerminator(), &BI);
- introduceCheckBlockInVPlan(TCCheckBlock);
+ if (!ForEpilogue)
+ introduceCheckBlockInVPlan(TCCheckBlock);
return TCCheckBlock;
}
@@ -8200,7 +8193,6 @@ EpilogueVectorizerEpilogueLoop::emitMinimumVectorEpilogueIterCountCheck(
Plan.setEntry(NewEntry);
// OldEntry is now dead and will be cleaned up when the plan gets destroyed.
- introduceCheckBlockInVPlan(Insert);
return Insert;
}
@@ -9146,9 +9138,8 @@ static VPInstruction *addResumePhiRecipeForInduction(
WideIV->getDebugLoc());
}
- auto *ResumePhiRecipe =
- ScalarPHBuilder.createNaryOp(VPInstruction::ResumePhi, {EndValue, Start},
- WideIV->getDebugLoc(), "bc.resume.val");
+ auto *ResumePhiRecipe = ScalarPHBuilder.createScalarPhi(
+ {EndValue, Start}, WideIV->getDebugLoc(), "bc.resume.val");
return ResumePhiRecipe;
}
@@ -9160,7 +9151,7 @@ static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan,
DenseMap<VPValue *, VPValue *> &IVEndValues) {
VPTypeAnalysis TypeInfo(Plan.getCanonicalIV()->getScalarType());
auto *ScalarPH = Plan.getScalarPreheader();
- auto *MiddleVPBB = cast<VPBasicBlock>(ScalarPH->getSinglePredecessor());
+ auto *MiddleVPBB = cast<VPBasicBlock>(ScalarPH->getPredecessors()[0]);
VPRegionBlock *VectorRegion = Plan.getVectorLoopRegion();
VPBuilder VectorPHBuilder(
cast<VPBasicBlock>(VectorRegion->getSinglePredecessor()));
@@ -9177,8 +9168,7 @@ static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan,
if (VPInstruction *ResumePhi = addResumePhiRecipeForInduction(
WideIVR, VectorPHBuilder, ScalarPHBuilder, TypeInfo,
&Plan.getVectorTripCount())) {
- assert(ResumePhi->getOpcode() == VPInstruction::ResumePhi &&
- "Expected a ResumePhi");
+ assert(isa<VPPhi>(ResumePhi) && "Expected a phi");
IVEndValues[WideIVR] = ResumePhi->getOperand(0);
ScalarPhiIRI->addOperand(ResumePhi);
continue;
@@ -9203,8 +9193,7 @@ static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan,
VPInstruction::ExtractLastElement, {ResumeFromVectorLoop}, {},
"vector.recur.extract");
StringRef Name = IsFOR ? "scalar.recur.init" : "bc.merge.rdx";
- auto *ResumePhiR = ScalarPHBuilder.createNaryOp(
- VPInstruction::ResumePhi,
+ auto *ResumePhiR = ScalarPHBuilder.createScalarPhi(
{ResumeFromVectorLoop, VectorPhiR->getStartValue()}, {}, Name);
ScalarPhiIRI->addOperand(ResumePhiR);
}
@@ -10406,9 +10395,7 @@ static void preparePlanForMainVectorLoop(VPlan &MainPlan, VPlan &EpiPlan) {
VPI->setOperand(1, Freeze);
if (UpdateResumePhis)
OrigStart->replaceUsesWithIf(Freeze, [Freeze](VPUser &U, unsigned) {
- return Freeze != &U && isa<VPInstruction>(&U) &&
- cast<VPInstruction>(&U)->getOpcode() ==
- VPInstruction::ResumePhi;
+ return Freeze != &U && isa<VPPhi>(&U);
});
}
};
@@ -10421,13 +10408,12 @@ static void preparePlanForMainVectorLoop(VPlan &MainPlan, VPlan &EpiPlan) {
// scalar (which will become vector) epilogue loop we are done. Otherwise
// create it below.
if (any_of(*MainScalarPH, [VectorTC](VPRecipeBase &R) {
- return match(&R, m_VPInstruction<VPInstruction::ResumePhi>(
- m_Specific(VectorTC), m_SpecificInt(0)));
+ return match(&R, m_VPInstruction<Instruction::PHI>(m_Specific(VectorTC),
+ m_SpecificInt(0)));
}))
return;
VPBuilder ScalarPHBuilder(MainScalarPH, MainScalarPH->begin());
- ScalarPHBuilder.createNaryOp(
- VPInstruction::ResumePhi,
+ ScalarPHBuilder.createScalarPhi(
{VectorTC, MainPlan.getCanonicalIV()->getStartValue()}, {},
"vec.epilog.resume.val");
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index e634de1e17c69..ebbffef2a4afe 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -887,11 +887,6 @@ class VPInstruction : public VPRecipeWithIRFlags,
SLPStore,
ActiveLaneMask,
ExplicitVectorLength,
- /// Creates a scalar phi in a leaf VPBB with a single predecessor in VPlan.
- /// The first operand is the incoming value from the predecessor in VPlan,
- /// the second operand is the incoming value for all other predecessors
- /// (which are currently not modeled in VPlan).
- ResumePhi,
CalculateTripCountMinusVF,
// Increment the canonical IV separately for each unrolled part.
CanonicalIVIncrementForPart,
@@ -1160,6 +1155,8 @@ class VPPhiAccessors {
return getAsRecipe()->getNumOperands();
}
+ void removeIncomingValue(VPBlockBase *VPB) const;
+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// Print the recipe.
void printPhiOperands(raw_ostream &O, VPSlotTracker &SlotTracker) const;
@@ -1170,11 +1167,15 @@ struct VPPhi : public VPInstruction, public VPPhiAccessors {
VPPhi(ArrayRef<VPValue *> Operands, DebugLoc DL, const Twine &Name = "")
: VPInstruction(Instruction::PHI, Operands, DL, Name) {}
- static inline bool classof(const VPRecipeBase *U) {
+ static inline bool classof(const VPUser *U) {
auto *R = dyn_cast<VPInstruction>(U);
return R && R->getOpcode() == Instruction::PHI;
}
+ VPPhi *clone() override {
+ return new VPPhi(operands(), getDebugLoc(), getName());
+ }
+
void execute(VPTransformState &State) override;
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
@@ -3737,6 +3738,15 @@ VPPhiAccessors::getIncomingBlock(unsigned Idx) const {
return getAsRecipe()->getParent()->getCFGPredecessor(Idx);
}
+inline void VPPhiAccessors::removeIncomingValue(VPBlockBase *VPB) const {
+ VPRecipeBase *R = const_cast<VPRecipeBase *>(getAsRecipe());
+ const VPBasicBlock *Parent = R->getParent();
+ assert(R->getNumOperands() == Parent->getNumPredecessors());
+ auto I = find(Parent->getPredecessors(), VPB);
+ R->getOperand(I - Parent->getPredecessors().begin())->removeUser(*R);
+ R->removeOperand(I - Parent->getPredecessors().begin());
+}
+
/// A special type of VPBasicBlock that wraps an existing IR basic block.
/// Recipes of the block get added before the first non-phi instruction in the
/// wrapped block.
@@ -4246,7 +4256,8 @@ class VPlan {
/// that this relies on unneeded branches to the scalar tail loop being
/// removed.
bool hasScalarTail() const {
- return getScalarPreheader()->getNumPredecessors() != 0;
+ return getScalarPreheader()->getNumPredecessors() != 0 &&
+ getScalarPreheader()->getSinglePredecessor() != getEntry();
}
};
diff --git a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
index ac0f30cb4693c..926490bfad7d0 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
@@ -101,7 +101,6 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPInstruction *R) {
return inferScalarType(R->getOperand(0));
case VPInstruction::FirstOrderRecurrenceSplice:
case VPInstruction::Not:
- case VPInstruction::ResumePhi:
case VPInstruction::CalculateTripCountMinusVF:
case VPInstruction::CanonicalIVIncrementForPart:
case VPInstruction::AnyOf:
diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
index 287bc93ce496a..204af8f12311b 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
@@ -507,8 +507,12 @@ void VPlanTransforms::prepareForVectorization(
cast<VPBasicBlock>(HeaderVPB),
cast<VPBasicBlock>(LatchVPB), Range);
HandledUncountableEarlyExit = true;
+ } else {
+ for (VPRecipeBase &R : cast<VPIRBasicBlock>(EB)->phis()) {
+ if (auto *PhiR = dyn_cast<VPIRPhi>(&R))
+ PhiR->removeIncomingValue(Pred);
+ }
}
-
cast<VPBasicBlock>(Pred)->getTerminator()->eraseFromParent();
VPBlockUtils::disconnectBlocks(Pred, EB);
}
@@ -541,23 +545,13 @@ void VPlanTransforms::prepareForVectorization(
// Thus if tail is to be folded, we know we don't need to run the
// remainder and we can set the condition to true.
// 3) Otherwise, construct a runtime check.
+ VPBlockUtils::connectBlocks(MiddleVPBB, ScalarPH);
+ // Also connect the entry block to the scalar preheader.
+ VPBlockUtils::connectBlocks(Plan.getEntry(), ScalarPH);
+ Plan.getEntry()->swapSuccessors();
- if (!RequiresScalarEpilogueCheck) {
- if (auto *LatchExitVPB = MiddleVPBB->getSingleSuccessor())
- VPBlockUtils::disconnectBlocks(MiddleVPBB, LatchExitVPB);
- VPBlockUtils::connectBlocks(MiddleVPBB, ScalarPH);
- // The exit blocks are unreachable, remove their recipes to make sure no
- // users remain that may pessimize transforms.
- for (auto *EB : Plan.getExitBlocks()) {
- for (VPRecipeBase &R : make_early_inc_range(*EB))
- R.eraseFromParent();
- }
+ if (MiddleVPBB->getNumSuccessors() != 2)
return;
- }
-
- // The connection order corresponds to the operands of the conditional branch,
- // with the middle block already connected to the exit block.
- VPBlockUtils::connectBlocks(MiddleVPBB, ScalarPH);
auto *ScalarLatchTerm = TheLoop->getLoopLatch()->getTerminator();
// Here we use the same DebugLoc as the scalar loop latch terminator instead
@@ -565,13 +559,17 @@ void VPlanTransforms::prepareForVectorization(
// different line numbers and we want to avoid awkward line stepping while
// debugging. Eg. if the compare has got a line number inside the loop.
VPBuilder Builder(MiddleVPBB);
- VPValue *Cmp =
- TailFolded
- ? Plan.getOrAddLiveIn(ConstantInt::getTrue(
- IntegerType::getInt1Ty(TripCount->getType()->getContext())))
- : Builder.createICmp(CmpInst::ICMP_EQ, Plan.getTripCount(),
- &Plan.getVectorTripCount(),
- ScalarLatchTerm->getDebugLoc(), "cmp.n");
+ VPValue *Cmp;
+ if (TailFolded)
+ Cmp = Plan.getOrAddLiveIn(ConstantInt::getTrue(
+ IntegerType::getInt1Ty(TripCount->getType()->getContext())));
+ else if (!RequiresScalarEpilogueCheck)
+ Cmp = Plan.getOrAddLiveIn(ConstantInt::getFalse(
+ IntegerType::getInt1Ty(TripCount->getType()->getContext())));
+ else
+ Cmp = Builder.createICmp(CmpInst::ICMP_EQ, Plan.getTripCount(),
+ &Plan.getVectorTripCount(),
+ ScalarLatchTerm->getDebugLoc(), "cmp.n");
Builder.createNaryOp(VPInstruction::BranchOnCond, {Cmp},
ScalarLatchTerm->getDebugLoc());
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 3fa6a21b80b17..f4beb5b6d11e3 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -754,17 +754,6 @@ Value *VPInstruction::generate(VPTransformState &State) {
Value *Addend = State.get(getOperand(1), VPLane(0));
return Builder.CreatePtrAdd(Ptr, Addend, Name, getGEPNoWrapFlags());
}
- case VPInstruction::ResumePhi: {
- auto *NewPhi =
- Builder.CreatePHI(State.TypeAnalysis.inferScalarType(this), 2, Name);
- for (const auto &[IncVPV, PredVPBB] :
- zip(operands(), getParent()->getPredecessors())) {
- Value *IncV = State.get(IncVPV, /* IsScalar */ true);
- BasicBlock *PredBB = State.CFG.VPBB2IRBB.at(cast<VPBasicBlock>(PredVPBB));
- NewPhi->addIncoming(IncV, PredBB);
- }
- return NewPhi;
- }
case VPInstruction::AnyOf: {
Value *A = State.get(getOperand(0));
return Builder.CreateOrReduce(A);
@@ -863,8 +852,7 @@ bool VPInstruction::isVectorToScalar() const {
}
bool VPInstruction::isSingleScalar() const {
- return getOpcode() == VPInstruction::ResumePhi ||
- getOpcode() == Instruction::PHI;
+ return getOpcode() == Instruction::PHI;
}
#if !defined(NDEBUG)
@@ -963,7 +951,6 @@ bool VPInstruction::onlyFirstLaneUsed(const VPValue *Op) const {
case VPInstruction::CanonicalIVIncrementForPart:
case VPInstruction::BranchOnCount:
case VPInstruction::BranchOnCond:
- case VPInstruction::ResumePhi:
return true;
case VPInstruction::PtrAdd:
return Op == getOperand(0) || vputils::onlyFirstLaneUsed(this);
@@ -1020,9 +1007,6 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent,
case VPInstruction::ActiveLaneMask:
O << "active lane mask";
break;
- case VPInstruction::ResumePhi:
- O << "resume-phi";
- break;
case VPInstruction::ExplicitVectorLength:
O << "EXPLICIT-VECTOR-LENGTH";
break;
@@ -1130,15 +1114,16 @@ void VPInstructionWithType::print(raw_ostream &O, const Twine &Indent,
void VPPhi::execute(VPTransformState &State) {
State.setDebugLocFrom(getDebugLoc());
- assert(getParent() ==
- getParent()->getPlan()->getVectorLoopRegion()->getEntry() &&
- "VPInstructions with PHI opcodes must be used for header phis only "
- "at the moment");
- BasicBlock *VectorPH = State.CFG.VPBB2IRBB.at(getIncomingBlock(0));
- Value *Start = State.get(getIncomingValue(0), VPLane(0));
- PHINode *Phi = State.Builder.CreatePHI(Start->getType(), 2, getName());
- Phi->addIncoming(Start, VectorPH);
- State.set(this, Phi, VPLane(0));
+ PHINode *NewPhi = State.Builder.CreatePHI(
+ State.TypeAnalysis.inferScalarType(this), 2, getName());
+ unsigned NumIncoming =
+ getParent()->getNumPredecessors() == 0 ? 1 : getNumIncoming();
+ for (unsigned Idx = 0; Idx != NumIncoming; ++Idx) {
+ Value *IncV = State.get(getIncomingValue(Idx), /* IsScalar */ true);
+ BasicBlock *PredBB = State.CFG.VPBB2IRBB.at(getIncomingBlock(Idx));
+ NewPhi->addIncoming(IncV, PredBB);
+ }
+ State.set(this, NewPhi, VPLane(0));
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index f1c466e3208be..e1c0e505d7c90 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -1732,11 +1732,21 @@ static void removeBranchOnCondTrue(VPlan &Plan) {
using namespace llvm::VPlanPatternMatch;
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
vp_depth_first_shallow(Plan.getEntry()))) {
- if (VPBB->getNumSuccessors() != 2 ||
- !match(&VPBB->back(), m_BranchOnCond(m_True())))
+ VPValue *Cond;
+ if (VPBB->getNumSuccessors() != 2 || VPBB->empty() ||
+ !match(&VPBB->back(), m_BranchOnCond(m_VPValue(Cond))))
continue;
- VPBasicBlock *RemovedSucc = cast<VPBasicBlock>(VPBB->getSuccessors()[1]);
+ unsigned RemovedIdx;
+ if (match(Cond, m_True()))
+ RemovedIdx = 1;
+ else if (match(Cond, m_False()))
+ RemovedIdx = 0;
+ else
+ continue;
+
+ VPBasicBlock *RemovedSucc =
+ cast<VPBasicBlock>(VPBB->getSuccessors()[RemovedIdx]);
const auto &Preds = RemovedSucc->getPredecessors();
assert(count(Preds, VPBB) == 1 &&
"There must be a single edge between VPBB and its successor");
@@ -1745,12 +1755,14 @@ static void removeBranchOnCondTrue(VPlan &Plan) {
// Values coming from VPBB into ResumePhi recipes of RemoveSucc are removed
// from these recipes.
for (VPRecipeBase &R : make_early_inc_range(*RemovedSucc)) {
-...
[truncated]
|
@llvm/pr-subscribers-backend-risc-v Author: Florian Hahn (fhahn) ChangesSimplify branch on false, starting with the branch from the middle block Depends on #140405. Patch is 101.60 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/140409.diff 32 Files Affected:
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 148b187f4f25d..6c3a2ce941c54 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -490,7 +490,7 @@ class InnerLoopVectorizer {
MinProfitableTripCount(MinProfitableTripCount), UF(UnrollFactor),
Builder(PSE.getSE()->getContext()), Cost(CM), BFI(BFI), PSI(PSI),
RTChecks(RTChecks), Plan(Plan),
- VectorPHVPB(Plan.getEntry()->getSingleSuccessor()) {}
+ VectorPHVPB(Plan.getEntry()->getSuccessors()[1]) {}
virtual ~InnerLoopVectorizer() = default;
@@ -2365,22 +2365,19 @@ InnerLoopVectorizer::getOrCreateVectorTripCount(BasicBlock *InsertBlock) {
void InnerLoopVectorizer::introduceCheckBlockInVPlan(BasicBlock *CheckIRBB) {
VPBlockBase *ScalarPH = Plan.getScalarPreheader();
VPBlockBase *PreVectorPH = VectorPHVPB->getSinglePredecessor();
- if (PreVectorPH->getNumSuccessors() != 1) {
- assert(PreVectorPH->getNumSuccessors() == 2 && "Expected 2 successors");
- assert(PreVectorPH->getSuccessors()[0] == ScalarPH &&
- "Unexpected successor");
- VPIRBasicBlock *CheckVPIRBB = Plan.createVPIRBasicBlock(CheckIRBB);
- VPBlockUtils::insertOnEdge(PreVectorPH, VectorPHVPB, CheckVPIRBB);
- PreVectorPH = CheckVPIRBB;
- }
+ assert(PreVectorPH->getNumSuccessors() == 2 && "Expected 2 successors");
+ assert(PreVectorPH->getSuccessors()[0] == ScalarPH && "Unexpected successor");
+ VPIRBasicBlock *CheckVPIRBB = Plan.createVPIRBasicBlock(CheckIRBB);
+ VPBlockUtils::insertOnEdge(PreVectorPH, VectorPHVPB, CheckVPIRBB);
+ PreVectorPH = CheckVPIRBB;
VPBlockUtils::connectBlocks(PreVectorPH, ScalarPH);
PreVectorPH->swapSuccessors();
// We just connected a new block to the scalar preheader. Update all
// ResumePhis by adding an incoming value for it, replicating the last value.
for (VPRecipeBase &R : *cast<VPBasicBlock>(ScalarPH)) {
- auto *ResumePhi = dyn_cast<VPInstruction>(&R);
- if (!ResumePhi || ResumePhi->getOpcode() != VPInstruction::ResumePhi)
+ auto *ResumePhi = cast<VPPhi>(&R);
+ if (ResumePhi->getNumIncoming() == ScalarPH->getNumPredecessors())
continue;
ResumePhi->addOperand(
ResumePhi->getOperand(ResumePhi->getNumOperands() - 1));
@@ -2463,9 +2460,6 @@ void InnerLoopVectorizer::emitIterationCountCheck(BasicBlock *Bypass) {
setBranchWeights(BI, MinItersBypassWeights, /*IsExpected=*/false);
ReplaceInstWithInst(TCCheckBlock->getTerminator(), &BI);
LoopBypassBlocks.push_back(TCCheckBlock);
-
- // TODO: Wrap LoopVectorPreHeader in VPIRBasicBlock here.
- introduceCheckBlockInVPlan(TCCheckBlock);
}
BasicBlock *InnerLoopVectorizer::emitSCEVChecks(BasicBlock *Bypass) {
@@ -2530,7 +2524,8 @@ BasicBlock *InnerLoopVectorizer::emitMemRuntimeChecks(BasicBlock *Bypass) {
static void replaceVPBBWithIRVPBB(VPBasicBlock *VPBB, BasicBlock *IRBB) {
VPIRBasicBlock *IRVPBB = VPBB->getPlan()->createVPIRBasicBlock(IRBB);
for (auto &R : make_early_inc_range(*VPBB)) {
- assert(!R.isPhi() && "Tried to move phi recipe to end of block");
+ assert((IRVPBB->empty() || IRVPBB->back().isPhi() || !R.isPhi()) &&
+ "Tried to move phi recipe to end of block");
R.moveBefore(*IRVPBB, IRVPBB->end());
}
@@ -7768,10 +7763,7 @@ static void fixReductionScalarResumeWhenVectorizingEpilog(
// created a bc.merge.rdx Phi after the main vector body. Ensure that we carry
// over the incoming values correctly.
using namespace VPlanPatternMatch;
- auto IsResumePhi = [](VPUser *U) {
- auto *VPI = dyn_cast<VPInstruction>(U);
- return VPI && VPI->getOpcode() == VPInstruction::ResumePhi;
- };
+ auto IsResumePhi = [](VPUser *U) { return isa<VPPhi>(U); };
assert(count_if(EpiRedResult->users(), IsResumePhi) == 1 &&
"ResumePhi must have a single user");
auto *EpiResumePhiVPI =
@@ -7837,7 +7829,7 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
// 1. Set up the skeleton for vectorization, including vector pre-header and
// middle block. The vector loop is created during VPlan execution.
- VPBasicBlock *VectorPH = cast<VPBasicBlock>(Entry->getSingleSuccessor());
+ VPBasicBlock *VectorPH = cast<VPBasicBlock>(Entry->getSuccessors()[1]);
State.CFG.PrevBB = ILV.createVectorizedLoopSkeleton();
if (VectorizingEpilogue)
VPlanTransforms::removeDeadRecipes(BestVPlan);
@@ -8070,7 +8062,8 @@ EpilogueVectorizerMainLoop::emitIterationCountCheck(BasicBlock *Bypass,
setBranchWeights(BI, MinItersBypassWeights, /*IsExpected=*/false);
ReplaceInstWithInst(TCCheckBlock->getTerminator(), &BI);
- introduceCheckBlockInVPlan(TCCheckBlock);
+ if (!ForEpilogue)
+ introduceCheckBlockInVPlan(TCCheckBlock);
return TCCheckBlock;
}
@@ -8200,7 +8193,6 @@ EpilogueVectorizerEpilogueLoop::emitMinimumVectorEpilogueIterCountCheck(
Plan.setEntry(NewEntry);
// OldEntry is now dead and will be cleaned up when the plan gets destroyed.
- introduceCheckBlockInVPlan(Insert);
return Insert;
}
@@ -9146,9 +9138,8 @@ static VPInstruction *addResumePhiRecipeForInduction(
WideIV->getDebugLoc());
}
- auto *ResumePhiRecipe =
- ScalarPHBuilder.createNaryOp(VPInstruction::ResumePhi, {EndValue, Start},
- WideIV->getDebugLoc(), "bc.resume.val");
+ auto *ResumePhiRecipe = ScalarPHBuilder.createScalarPhi(
+ {EndValue, Start}, WideIV->getDebugLoc(), "bc.resume.val");
return ResumePhiRecipe;
}
@@ -9160,7 +9151,7 @@ static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan,
DenseMap<VPValue *, VPValue *> &IVEndValues) {
VPTypeAnalysis TypeInfo(Plan.getCanonicalIV()->getScalarType());
auto *ScalarPH = Plan.getScalarPreheader();
- auto *MiddleVPBB = cast<VPBasicBlock>(ScalarPH->getSinglePredecessor());
+ auto *MiddleVPBB = cast<VPBasicBlock>(ScalarPH->getPredecessors()[0]);
VPRegionBlock *VectorRegion = Plan.getVectorLoopRegion();
VPBuilder VectorPHBuilder(
cast<VPBasicBlock>(VectorRegion->getSinglePredecessor()));
@@ -9177,8 +9168,7 @@ static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan,
if (VPInstruction *ResumePhi = addResumePhiRecipeForInduction(
WideIVR, VectorPHBuilder, ScalarPHBuilder, TypeInfo,
&Plan.getVectorTripCount())) {
- assert(ResumePhi->getOpcode() == VPInstruction::ResumePhi &&
- "Expected a ResumePhi");
+ assert(isa<VPPhi>(ResumePhi) && "Expected a phi");
IVEndValues[WideIVR] = ResumePhi->getOperand(0);
ScalarPhiIRI->addOperand(ResumePhi);
continue;
@@ -9203,8 +9193,7 @@ static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan,
VPInstruction::ExtractLastElement, {ResumeFromVectorLoop}, {},
"vector.recur.extract");
StringRef Name = IsFOR ? "scalar.recur.init" : "bc.merge.rdx";
- auto *ResumePhiR = ScalarPHBuilder.createNaryOp(
- VPInstruction::ResumePhi,
+ auto *ResumePhiR = ScalarPHBuilder.createScalarPhi(
{ResumeFromVectorLoop, VectorPhiR->getStartValue()}, {}, Name);
ScalarPhiIRI->addOperand(ResumePhiR);
}
@@ -10406,9 +10395,7 @@ static void preparePlanForMainVectorLoop(VPlan &MainPlan, VPlan &EpiPlan) {
VPI->setOperand(1, Freeze);
if (UpdateResumePhis)
OrigStart->replaceUsesWithIf(Freeze, [Freeze](VPUser &U, unsigned) {
- return Freeze != &U && isa<VPInstruction>(&U) &&
- cast<VPInstruction>(&U)->getOpcode() ==
- VPInstruction::ResumePhi;
+ return Freeze != &U && isa<VPPhi>(&U);
});
}
};
@@ -10421,13 +10408,12 @@ static void preparePlanForMainVectorLoop(VPlan &MainPlan, VPlan &EpiPlan) {
// scalar (which will become vector) epilogue loop we are done. Otherwise
// create it below.
if (any_of(*MainScalarPH, [VectorTC](VPRecipeBase &R) {
- return match(&R, m_VPInstruction<VPInstruction::ResumePhi>(
- m_Specific(VectorTC), m_SpecificInt(0)));
+ return match(&R, m_VPInstruction<Instruction::PHI>(m_Specific(VectorTC),
+ m_SpecificInt(0)));
}))
return;
VPBuilder ScalarPHBuilder(MainScalarPH, MainScalarPH->begin());
- ScalarPHBuilder.createNaryOp(
- VPInstruction::ResumePhi,
+ ScalarPHBuilder.createScalarPhi(
{VectorTC, MainPlan.getCanonicalIV()->getStartValue()}, {},
"vec.epilog.resume.val");
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index e634de1e17c69..ebbffef2a4afe 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -887,11 +887,6 @@ class VPInstruction : public VPRecipeWithIRFlags,
SLPStore,
ActiveLaneMask,
ExplicitVectorLength,
- /// Creates a scalar phi in a leaf VPBB with a single predecessor in VPlan.
- /// The first operand is the incoming value from the predecessor in VPlan,
- /// the second operand is the incoming value for all other predecessors
- /// (which are currently not modeled in VPlan).
- ResumePhi,
CalculateTripCountMinusVF,
// Increment the canonical IV separately for each unrolled part.
CanonicalIVIncrementForPart,
@@ -1160,6 +1155,8 @@ class VPPhiAccessors {
return getAsRecipe()->getNumOperands();
}
+ void removeIncomingValue(VPBlockBase *VPB) const;
+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// Print the recipe.
void printPhiOperands(raw_ostream &O, VPSlotTracker &SlotTracker) const;
@@ -1170,11 +1167,15 @@ struct VPPhi : public VPInstruction, public VPPhiAccessors {
VPPhi(ArrayRef<VPValue *> Operands, DebugLoc DL, const Twine &Name = "")
: VPInstruction(Instruction::PHI, Operands, DL, Name) {}
- static inline bool classof(const VPRecipeBase *U) {
+ static inline bool classof(const VPUser *U) {
auto *R = dyn_cast<VPInstruction>(U);
return R && R->getOpcode() == Instruction::PHI;
}
+ VPPhi *clone() override {
+ return new VPPhi(operands(), getDebugLoc(), getName());
+ }
+
void execute(VPTransformState &State) override;
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
@@ -3737,6 +3738,15 @@ VPPhiAccessors::getIncomingBlock(unsigned Idx) const {
return getAsRecipe()->getParent()->getCFGPredecessor(Idx);
}
+inline void VPPhiAccessors::removeIncomingValue(VPBlockBase *VPB) const {
+ VPRecipeBase *R = const_cast<VPRecipeBase *>(getAsRecipe());
+ const VPBasicBlock *Parent = R->getParent();
+ assert(R->getNumOperands() == Parent->getNumPredecessors());
+ auto I = find(Parent->getPredecessors(), VPB);
+ R->getOperand(I - Parent->getPredecessors().begin())->removeUser(*R);
+ R->removeOperand(I - Parent->getPredecessors().begin());
+}
+
/// A special type of VPBasicBlock that wraps an existing IR basic block.
/// Recipes of the block get added before the first non-phi instruction in the
/// wrapped block.
@@ -4246,7 +4256,8 @@ class VPlan {
/// that this relies on unneeded branches to the scalar tail loop being
/// removed.
bool hasScalarTail() const {
- return getScalarPreheader()->getNumPredecessors() != 0;
+ return getScalarPreheader()->getNumPredecessors() != 0 &&
+ getScalarPreheader()->getSinglePredecessor() != getEntry();
}
};
diff --git a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
index ac0f30cb4693c..926490bfad7d0 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
@@ -101,7 +101,6 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPInstruction *R) {
return inferScalarType(R->getOperand(0));
case VPInstruction::FirstOrderRecurrenceSplice:
case VPInstruction::Not:
- case VPInstruction::ResumePhi:
case VPInstruction::CalculateTripCountMinusVF:
case VPInstruction::CanonicalIVIncrementForPart:
case VPInstruction::AnyOf:
diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
index 287bc93ce496a..204af8f12311b 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
@@ -507,8 +507,12 @@ void VPlanTransforms::prepareForVectorization(
cast<VPBasicBlock>(HeaderVPB),
cast<VPBasicBlock>(LatchVPB), Range);
HandledUncountableEarlyExit = true;
+ } else {
+ for (VPRecipeBase &R : cast<VPIRBasicBlock>(EB)->phis()) {
+ if (auto *PhiR = dyn_cast<VPIRPhi>(&R))
+ PhiR->removeIncomingValue(Pred);
+ }
}
-
cast<VPBasicBlock>(Pred)->getTerminator()->eraseFromParent();
VPBlockUtils::disconnectBlocks(Pred, EB);
}
@@ -541,23 +545,13 @@ void VPlanTransforms::prepareForVectorization(
// Thus if tail is to be folded, we know we don't need to run the
// remainder and we can set the condition to true.
// 3) Otherwise, construct a runtime check.
+ VPBlockUtils::connectBlocks(MiddleVPBB, ScalarPH);
+ // Also connect the entry block to the scalar preheader.
+ VPBlockUtils::connectBlocks(Plan.getEntry(), ScalarPH);
+ Plan.getEntry()->swapSuccessors();
- if (!RequiresScalarEpilogueCheck) {
- if (auto *LatchExitVPB = MiddleVPBB->getSingleSuccessor())
- VPBlockUtils::disconnectBlocks(MiddleVPBB, LatchExitVPB);
- VPBlockUtils::connectBlocks(MiddleVPBB, ScalarPH);
- // The exit blocks are unreachable, remove their recipes to make sure no
- // users remain that may pessimize transforms.
- for (auto *EB : Plan.getExitBlocks()) {
- for (VPRecipeBase &R : make_early_inc_range(*EB))
- R.eraseFromParent();
- }
+ if (MiddleVPBB->getNumSuccessors() != 2)
return;
- }
-
- // The connection order corresponds to the operands of the conditional branch,
- // with the middle block already connected to the exit block.
- VPBlockUtils::connectBlocks(MiddleVPBB, ScalarPH);
auto *ScalarLatchTerm = TheLoop->getLoopLatch()->getTerminator();
// Here we use the same DebugLoc as the scalar loop latch terminator instead
@@ -565,13 +559,17 @@ void VPlanTransforms::prepareForVectorization(
// different line numbers and we want to avoid awkward line stepping while
// debugging. Eg. if the compare has got a line number inside the loop.
VPBuilder Builder(MiddleVPBB);
- VPValue *Cmp =
- TailFolded
- ? Plan.getOrAddLiveIn(ConstantInt::getTrue(
- IntegerType::getInt1Ty(TripCount->getType()->getContext())))
- : Builder.createICmp(CmpInst::ICMP_EQ, Plan.getTripCount(),
- &Plan.getVectorTripCount(),
- ScalarLatchTerm->getDebugLoc(), "cmp.n");
+ VPValue *Cmp;
+ if (TailFolded)
+ Cmp = Plan.getOrAddLiveIn(ConstantInt::getTrue(
+ IntegerType::getInt1Ty(TripCount->getType()->getContext())));
+ else if (!RequiresScalarEpilogueCheck)
+ Cmp = Plan.getOrAddLiveIn(ConstantInt::getFalse(
+ IntegerType::getInt1Ty(TripCount->getType()->getContext())));
+ else
+ Cmp = Builder.createICmp(CmpInst::ICMP_EQ, Plan.getTripCount(),
+ &Plan.getVectorTripCount(),
+ ScalarLatchTerm->getDebugLoc(), "cmp.n");
Builder.createNaryOp(VPInstruction::BranchOnCond, {Cmp},
ScalarLatchTerm->getDebugLoc());
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 3fa6a21b80b17..f4beb5b6d11e3 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -754,17 +754,6 @@ Value *VPInstruction::generate(VPTransformState &State) {
Value *Addend = State.get(getOperand(1), VPLane(0));
return Builder.CreatePtrAdd(Ptr, Addend, Name, getGEPNoWrapFlags());
}
- case VPInstruction::ResumePhi: {
- auto *NewPhi =
- Builder.CreatePHI(State.TypeAnalysis.inferScalarType(this), 2, Name);
- for (const auto &[IncVPV, PredVPBB] :
- zip(operands(), getParent()->getPredecessors())) {
- Value *IncV = State.get(IncVPV, /* IsScalar */ true);
- BasicBlock *PredBB = State.CFG.VPBB2IRBB.at(cast<VPBasicBlock>(PredVPBB));
- NewPhi->addIncoming(IncV, PredBB);
- }
- return NewPhi;
- }
case VPInstruction::AnyOf: {
Value *A = State.get(getOperand(0));
return Builder.CreateOrReduce(A);
@@ -863,8 +852,7 @@ bool VPInstruction::isVectorToScalar() const {
}
bool VPInstruction::isSingleScalar() const {
- return getOpcode() == VPInstruction::ResumePhi ||
- getOpcode() == Instruction::PHI;
+ return getOpcode() == Instruction::PHI;
}
#if !defined(NDEBUG)
@@ -963,7 +951,6 @@ bool VPInstruction::onlyFirstLaneUsed(const VPValue *Op) const {
case VPInstruction::CanonicalIVIncrementForPart:
case VPInstruction::BranchOnCount:
case VPInstruction::BranchOnCond:
- case VPInstruction::ResumePhi:
return true;
case VPInstruction::PtrAdd:
return Op == getOperand(0) || vputils::onlyFirstLaneUsed(this);
@@ -1020,9 +1007,6 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent,
case VPInstruction::ActiveLaneMask:
O << "active lane mask";
break;
- case VPInstruction::ResumePhi:
- O << "resume-phi";
- break;
case VPInstruction::ExplicitVectorLength:
O << "EXPLICIT-VECTOR-LENGTH";
break;
@@ -1130,15 +1114,16 @@ void VPInstructionWithType::print(raw_ostream &O, const Twine &Indent,
void VPPhi::execute(VPTransformState &State) {
State.setDebugLocFrom(getDebugLoc());
- assert(getParent() ==
- getParent()->getPlan()->getVectorLoopRegion()->getEntry() &&
- "VPInstructions with PHI opcodes must be used for header phis only "
- "at the moment");
- BasicBlock *VectorPH = State.CFG.VPBB2IRBB.at(getIncomingBlock(0));
- Value *Start = State.get(getIncomingValue(0), VPLane(0));
- PHINode *Phi = State.Builder.CreatePHI(Start->getType(), 2, getName());
- Phi->addIncoming(Start, VectorPH);
- State.set(this, Phi, VPLane(0));
+ PHINode *NewPhi = State.Builder.CreatePHI(
+ State.TypeAnalysis.inferScalarType(this), 2, getName());
+ unsigned NumIncoming =
+ getParent()->getNumPredecessors() == 0 ? 1 : getNumIncoming();
+ for (unsigned Idx = 0; Idx != NumIncoming; ++Idx) {
+ Value *IncV = State.get(getIncomingValue(Idx), /* IsScalar */ true);
+ BasicBlock *PredBB = State.CFG.VPBB2IRBB.at(getIncomingBlock(Idx));
+ NewPhi->addIncoming(IncV, PredBB);
+ }
+ State.set(this, NewPhi, VPLane(0));
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index f1c466e3208be..e1c0e505d7c90 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -1732,11 +1732,21 @@ static void removeBranchOnCondTrue(VPlan &Plan) {
using namespace llvm::VPlanPatternMatch;
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
vp_depth_first_shallow(Plan.getEntry()))) {
- if (VPBB->getNumSuccessors() != 2 ||
- !match(&VPBB->back(), m_BranchOnCond(m_True())))
+ VPValue *Cond;
+ if (VPBB->getNumSuccessors() != 2 || VPBB->empty() ||
+ !match(&VPBB->back(), m_BranchOnCond(m_VPValue(Cond))))
continue;
- VPBasicBlock *RemovedSucc = cast<VPBasicBlock>(VPBB->getSuccessors()[1]);
+ unsigned RemovedIdx;
+ if (match(Cond, m_True()))
+ RemovedIdx = 1;
+ else if (match(Cond, m_False()))
+ RemovedIdx = 0;
+ else
+ continue;
+
+ VPBasicBlock *RemovedSucc =
+ cast<VPBasicBlock>(VPBB->getSuccessors()[RemovedIdx]);
const auto &Preds = RemovedSucc->getPredecessors();
assert(count(Preds, VPBB) == 1 &&
"There must be a single edge between VPBB and its successor");
@@ -1745,12 +1755,14 @@ static void removeBranchOnCondTrue(VPlan &Plan) {
// Values coming from VPBB into ResumePhi recipes of RemoveSucc are removed
// from these recipes.
for (VPRecipeBase &R : make_early_inc_range(*RemovedSucc)) {
-...
[truncated]
|
Simplify branch on false, starting with the branch from the middle block
to the scalar preheader. Initially this helps simplifying the initial VPlan construction.
Depends on #140405.