-
Notifications
You must be signed in to change notification settings - Fork 12.4k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[VPlan] Remove loop region in optimizeForVFAndUF. #108378
Changes from all commits
d3614bc
5f8fabe
c68ddd1
f0421c6
4a0eb12
9499aaa
a4843b5
f5d2bc6
1222e23
706b681
71436fc
38cbdf6
cc43362
e758945
d4e8c7e
1d4b2e6
98529f3
0c76e9d
dd45cad
e72a71f
407dbc1
af48fcc
f51412a
f2b5e53
1f4febc
3d412a1
56a4181
88fcf60
303ce93
3f016cb
430c369
f9db2d0
cabc591
057f2e9
60a046a
fe2c3a5
26c94b1
92c0ccc
05b2e4e
df67f2e
e17003f
f180edb
71ff80a
911c50a
4f74827
d17571d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2394,12 +2394,12 @@ void InnerLoopVectorizer::scalarizeInstruction(const Instruction *Instr, | |
// End if-block. | ||
VPRegionBlock *Parent = RepRecipe->getParent()->getParent(); | ||
bool IfPredicateInstr = Parent ? Parent->isReplicator() : false; | ||
assert((Parent || all_of(RepRecipe->operands(), | ||
[](VPValue *Op) { | ||
return Op->isDefinedOutsideLoopRegions(); | ||
})) && | ||
"Expected a recipe is either within a region or all of its operands " | ||
"are defined outside the vectorized region."); | ||
assert( | ||
(Parent || !RepRecipe->getParent()->getPlan()->getVectorLoopRegion() || | ||
all_of(RepRecipe->operands(), | ||
[](VPValue *Op) { return Op->isDefinedOutsideLoopRegions(); })) && | ||
"Expected a recipe is either within a region or all of its operands " | ||
"are defined outside the vectorized region."); | ||
if (IfPredicateInstr) | ||
PredicatedInstructions.push_back(Cloned); | ||
} | ||
|
@@ -3012,6 +3012,11 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State) { | |
getOrCreateVectorTripCount(nullptr), LoopMiddleBlock, State); | ||
} | ||
|
||
// Don't apply optimizations below when no vector region remains, as they all | ||
// require a vector loop at the moment. | ||
if (!State.Plan->getVectorLoopRegion()) | ||
return; | ||
|
||
Comment on lines
+3017
to
+3019
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Comment why this is placed here, i.e., why all above should work even if vector loop region was removed, and all below should not. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added thanks |
||
for (Instruction *PI : PredicatedInstructions) | ||
sinkScalarOperands(&*PI); | ||
|
||
|
@@ -7744,6 +7749,8 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan( | |
|
||
// 1. Set up the skeleton for vectorization, including vector pre-header and | ||
// middle block. The vector loop is created during VPlan execution. | ||
VPBasicBlock *VectorPH = | ||
cast<VPBasicBlock>(BestVPlan.getEntry()->getSingleSuccessor()); | ||
State.CFG.PrevBB = ILV.createVectorizedLoopSkeleton( | ||
ExpandedSCEVs ? *ExpandedSCEVs : State.ExpandedSCEVs); | ||
if (VectorizingEpilogue) | ||
|
@@ -7781,7 +7788,7 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan( | |
BestVPlan.prepareToExecute( | ||
ILV.getTripCount(), | ||
ILV.getOrCreateVectorTripCount(ILV.LoopVectorPreHeader), State); | ||
replaceVPBBWithIRVPBB(BestVPlan.getVectorPreheader(), State.CFG.PrevBB); | ||
replaceVPBBWithIRVPBB(VectorPH, State.CFG.PrevBB); | ||
|
||
BestVPlan.execute(&State); | ||
|
||
|
@@ -7807,30 +7814,31 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan( | |
// 2.6. Maintain Loop Hints | ||
// Keep all loop hints from the original loop on the vector loop (we'll | ||
// replace the vectorizer-specific hints below). | ||
MDNode *OrigLoopID = OrigLoop->getLoopID(); | ||
if (auto *LoopRegion = BestVPlan.getVectorLoopRegion()) { | ||
MDNode *OrigLoopID = OrigLoop->getLoopID(); | ||
|
||
std::optional<MDNode *> VectorizedLoopID = | ||
makeFollowupLoopID(OrigLoopID, {LLVMLoopVectorizeFollowupAll, | ||
LLVMLoopVectorizeFollowupVectorized}); | ||
|
||
VPBasicBlock *HeaderVPBB = | ||
BestVPlan.getVectorLoopRegion()->getEntryBasicBlock(); | ||
Loop *L = LI->getLoopFor(State.CFG.VPBB2IRBB[HeaderVPBB]); | ||
if (VectorizedLoopID) | ||
L->setLoopID(*VectorizedLoopID); | ||
else { | ||
// Keep all loop hints from the original loop on the vector loop (we'll | ||
// replace the vectorizer-specific hints below). | ||
if (MDNode *LID = OrigLoop->getLoopID()) | ||
L->setLoopID(LID); | ||
|
||
LoopVectorizeHints Hints(L, true, *ORE); | ||
Hints.setAlreadyVectorized(); | ||
std::optional<MDNode *> VectorizedLoopID = | ||
makeFollowupLoopID(OrigLoopID, {LLVMLoopVectorizeFollowupAll, | ||
LLVMLoopVectorizeFollowupVectorized}); | ||
|
||
VPBasicBlock *HeaderVPBB = LoopRegion->getEntryBasicBlock(); | ||
Loop *L = LI->getLoopFor(State.CFG.VPBB2IRBB[HeaderVPBB]); | ||
if (VectorizedLoopID) { | ||
L->setLoopID(*VectorizedLoopID); | ||
} else { | ||
// Keep all loop hints from the original loop on the vector loop (we'll | ||
// replace the vectorizer-specific hints below). | ||
if (MDNode *LID = OrigLoop->getLoopID()) | ||
L->setLoopID(LID); | ||
|
||
LoopVectorizeHints Hints(L, true, *ORE); | ||
Hints.setAlreadyVectorized(); | ||
} | ||
TargetTransformInfo::UnrollingPreferences UP; | ||
TTI.getUnrollingPreferences(L, *PSE.getSE(), UP, ORE); | ||
if (!UP.UnrollVectorizedLoop || VectorizingEpilogue) | ||
addRuntimeUnrollDisableMetaData(L); | ||
} | ||
TargetTransformInfo::UnrollingPreferences UP; | ||
TTI.getUnrollingPreferences(L, *PSE.getSE(), UP, ORE); | ||
if (!UP.UnrollVectorizedLoop || VectorizingEpilogue) | ||
addRuntimeUnrollDisableMetaData(L); | ||
|
||
// 3. Fix the vectorized code: take care of header phi's, live-outs, | ||
// predication, updating analyses. | ||
|
@@ -7839,15 +7847,18 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan( | |
ILV.printDebugTracesAtEnd(); | ||
|
||
// 4. Adjust branch weight of the branch in the middle block. | ||
auto *MiddleTerm = | ||
cast<BranchInst>(State.CFG.VPBB2IRBB[MiddleVPBB]->getTerminator()); | ||
if (MiddleTerm->isConditional() && | ||
hasBranchWeightMD(*OrigLoop->getLoopLatch()->getTerminator())) { | ||
// Assume that `Count % VectorTripCount` is equally distributed. | ||
unsigned TripCount = BestVPlan.getUF() * State.VF.getKnownMinValue(); | ||
assert(TripCount > 0 && "trip count should not be zero"); | ||
const uint32_t Weights[] = {1, TripCount - 1}; | ||
setBranchWeights(*MiddleTerm, Weights, /*IsExpected=*/false); | ||
if (BestVPlan.getVectorLoopRegion()) { | ||
auto *MiddleVPBB = BestVPlan.getMiddleBlock(); | ||
auto *MiddleTerm = | ||
cast<BranchInst>(State.CFG.VPBB2IRBB[MiddleVPBB]->getTerminator()); | ||
if (MiddleTerm->isConditional() && | ||
hasBranchWeightMD(*OrigLoop->getLoopLatch()->getTerminator())) { | ||
// Assume that `Count % VectorTripCount` is equally distributed. | ||
unsigned TripCount = BestVPlan.getUF() * State.VF.getKnownMinValue(); | ||
assert(TripCount > 0 && "trip count should not be zero"); | ||
const uint32_t Weights[] = {1, TripCount - 1}; | ||
setBranchWeights(*MiddleTerm, Weights, /*IsExpected=*/false); | ||
} | ||
} | ||
|
||
return State.ExpandedSCEVs; | ||
|
Original file line number | Diff line number | Diff line change | ||||||
---|---|---|---|---|---|---|---|---|
|
@@ -555,7 +555,9 @@ VPBasicBlock *VPBasicBlock::splitAt(iterator SplitAt) { | |||||||
template <typename T> static T *getEnclosingLoopRegionForRegion(T *P) { | ||||||||
if (P && P->isReplicator()) { | ||||||||
P = P->getParent(); | ||||||||
assert(!cast<VPRegionBlock>(P)->isReplicator() && | ||||||||
// Multiple loop regions can be nested, but replicate regions can only be | ||||||||
// nested inside a loop region or must be outside any other region. | ||||||||
assert((!P || !cast<VPRegionBlock>(P)->isReplicator()) && | ||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
? Worth noting that a replicate region is allowed to be nested within a loop region, or not - if the latter is removed, but must not be nested within another replicate region. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added a comment but left the check as is for now. |
||||||||
"unexpected nested replicate regions"); | ||||||||
} | ||||||||
return P; | ||||||||
|
@@ -934,7 +936,8 @@ void VPlan::prepareToExecute(Value *TripCountV, Value *VectorTripCountV, | |||||||
|
||||||||
IRBuilder<> Builder(State.CFG.PrevBB->getTerminator()); | ||||||||
// FIXME: Model VF * UF computation completely in VPlan. | ||||||||
assert(VFxUF.getNumUsers() && "VFxUF expected to always have users"); | ||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this related? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We may remove the users (the canonical IV increment). Updated to account for that in assertion |
||||||||
assert((!getVectorLoopRegion() || VFxUF.getNumUsers()) && | ||||||||
"VFxUF expected to always have users"); | ||||||||
unsigned UF = getUF(); | ||||||||
if (VF.getNumUsers()) { | ||||||||
Value *RuntimeVF = getRuntimeVF(Builder, TCTy, State.VF); | ||||||||
|
@@ -988,12 +991,18 @@ void VPlan::execute(VPTransformState *State) { | |||||||
for (VPBlockBase *Block : RPOT) | ||||||||
Block->execute(State); | ||||||||
|
||||||||
VPBasicBlock *LatchVPBB = getVectorLoopRegion()->getExitingBasicBlock(); | ||||||||
State->CFG.DTU.flush(); | ||||||||
|
||||||||
auto *LoopRegion = getVectorLoopRegion(); | ||||||||
if (!LoopRegion) | ||||||||
return; | ||||||||
|
||||||||
VPBasicBlock *LatchVPBB = LoopRegion->getExitingBasicBlock(); | ||||||||
BasicBlock *VectorLatchBB = State->CFG.VPBB2IRBB[LatchVPBB]; | ||||||||
|
||||||||
// Fix the latch value of canonical, reduction and first-order recurrences | ||||||||
// phis in the vector loop. | ||||||||
VPBasicBlock *Header = getVectorLoopRegion()->getEntryBasicBlock(); | ||||||||
VPBasicBlock *Header = LoopRegion->getEntryBasicBlock(); | ||||||||
for (VPRecipeBase &R : Header->phis()) { | ||||||||
// Skip phi-like recipes that generate their backedege values themselves. | ||||||||
if (isa<VPWidenPHIRecipe>(&R)) | ||||||||
|
@@ -1032,8 +1041,6 @@ void VPlan::execute(VPTransformState *State) { | |||||||
Value *Val = State->get(PhiR->getBackedgeValue(), NeedsScalar); | ||||||||
cast<PHINode>(Phi)->addIncoming(Val, VectorLatchBB); | ||||||||
} | ||||||||
|
||||||||
State->CFG.DTU.flush(); | ||||||||
} | ||||||||
|
||||||||
InstructionCost VPlan::cost(ElementCount VF, VPCostContext &Ctx) { | ||||||||
|
@@ -1046,14 +1053,14 @@ VPRegionBlock *VPlan::getVectorLoopRegion() { | |||||||
// TODO: Cache if possible. | ||||||||
for (VPBlockBase *B : vp_depth_first_shallow(getEntry())) | ||||||||
if (auto *R = dyn_cast<VPRegionBlock>(B)) | ||||||||
return R; | ||||||||
return R->isReplicator() ? nullptr : R; | ||||||||
return nullptr; | ||||||||
} | ||||||||
|
||||||||
const VPRegionBlock *VPlan::getVectorLoopRegion() const { | ||||||||
for (const VPBlockBase *B : vp_depth_first_shallow(getEntry())) | ||||||||
if (auto *R = dyn_cast<VPRegionBlock>(B)) | ||||||||
return R; | ||||||||
return R->isReplicator() ? nullptr : R; | ||||||||
return nullptr; | ||||||||
} | ||||||||
|
||||||||
|
@@ -1399,11 +1406,17 @@ void VPlanIngredient::print(raw_ostream &O) const { | |||||||
|
||||||||
#endif | ||||||||
|
||||||||
bool VPValue::isDefinedOutsideLoopRegions() const { | ||||||||
return !hasDefiningRecipe() || | ||||||||
!getDefiningRecipe()->getParent()->getEnclosingLoopRegion(); | ||||||||
/// Returns true if there is a vector loop region and \p VPV is defined in a | ||||||||
/// loop region. | ||||||||
static bool isDefinedInsideLoopRegions(const VPValue *VPV) { | ||||||||
const VPRecipeBase *DefR = VPV->getDefiningRecipe(); | ||||||||
return DefR && (!DefR->getParent()->getPlan()->getVectorLoopRegion() || | ||||||||
DefR->getParent()->getEnclosingLoopRegion()); | ||||||||
} | ||||||||
|
||||||||
bool VPValue::isDefinedOutsideLoopRegions() const { | ||||||||
return !isDefinedInsideLoopRegions(this); | ||||||||
} | ||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. post-commit nit:
Suggested change
|
||||||||
void VPValue::replaceAllUsesWith(VPValue *New) { | ||||||||
replaceUsesWithIf(New, [](VPUser &, unsigned) { return true; }); | ||||||||
} | ||||||||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -794,12 +794,12 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) { | |
return R.getVPSingleValue()->replaceAllUsesWith(R.getOperand(1)); | ||
} | ||
|
||
/// Try to simplify the recipes in \p Plan | ||
static void simplifyRecipes(VPlan &Plan) { | ||
/// Try to simplify the recipes in \p Plan. Use \p CanonicalIVTy as type for all | ||
/// un-typed live-ins in VPTypeAnalysis. | ||
static void simplifyRecipes(VPlan &Plan, Type *CanonicalIVTy) { | ||
ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<VPBlockBase *>> RPOT( | ||
Plan.getEntry()); | ||
Type *CanonicalIVType = Plan.getCanonicalIV()->getScalarType(); | ||
VPTypeAnalysis TypeInfo(CanonicalIVType); | ||
VPTypeAnalysis TypeInfo(CanonicalIVTy); | ||
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT)) { | ||
for (VPRecipeBase &R : make_early_inc_range(*VPBB)) { | ||
simplifyRecipe(R, TypeInfo); | ||
|
@@ -812,8 +812,8 @@ void VPlanTransforms::optimizeForVFAndUF(VPlan &Plan, ElementCount BestVF, | |
PredicatedScalarEvolution &PSE) { | ||
assert(Plan.hasVF(BestVF) && "BestVF is not available in Plan"); | ||
assert(Plan.hasUF(BestUF) && "BestUF is not available in Plan"); | ||
VPBasicBlock *ExitingVPBB = | ||
Plan.getVectorLoopRegion()->getExitingBasicBlock(); | ||
VPRegionBlock *VectorRegion = Plan.getVectorLoopRegion(); | ||
VPBasicBlock *ExitingVPBB = VectorRegion->getExitingBasicBlock(); | ||
auto *Term = &ExitingVPBB->back(); | ||
// Try to simplify the branch condition if TC <= VF * UF when preparing to | ||
// execute the plan for the main vector loop. We only do this if the | ||
|
@@ -837,14 +837,42 @@ void VPlanTransforms::optimizeForVFAndUF(VPlan &Plan, ElementCount BestVF, | |
!SE.isKnownPredicate(CmpInst::ICMP_ULE, TripCount, C)) | ||
return; | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Say something about what is about to happen now. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added, thanks |
||
LLVMContext &Ctx = SE.getContext(); | ||
auto *BOC = new VPInstruction( | ||
VPInstruction::BranchOnCond, | ||
{Plan.getOrAddLiveIn(ConstantInt::getTrue(Ctx))}, Term->getDebugLoc()); | ||
// The vector loop region only executes once. If possible, completely remove | ||
// the region, otherwise replace the terminator controlling the latch with | ||
// (BranchOnCond true). | ||
auto *Header = cast<VPBasicBlock>(VectorRegion->getEntry()); | ||
auto *CanIVTy = Plan.getCanonicalIV()->getScalarType(); | ||
if (all_of( | ||
Header->phis(), | ||
IsaPred<VPCanonicalIVPHIRecipe, VPFirstOrderRecurrencePHIRecipe>)) { | ||
for (VPRecipeBase &HeaderR : make_early_inc_range(Header->phis())) { | ||
auto *HeaderPhiR = cast<VPHeaderPHIRecipe>(&HeaderR); | ||
HeaderPhiR->replaceAllUsesWith(HeaderPhiR->getStartValue()); | ||
HeaderPhiR->eraseFromParent(); | ||
} | ||
|
||
Term->eraseFromParent(); | ||
ExitingVPBB->appendRecipe(BOC); | ||
VPBlockBase *Preheader = VectorRegion->getSinglePredecessor(); | ||
VPBlockBase *Exit = VectorRegion->getSingleSuccessor(); | ||
VPBlockUtils::disconnectBlocks(Preheader, VectorRegion); | ||
VPBlockUtils::disconnectBlocks(VectorRegion, Exit); | ||
|
||
for (VPBlockBase *B : vp_depth_first_shallow(VectorRegion->getEntry())) | ||
B->setParent(nullptr); | ||
|
||
VPBlockUtils::connectBlocks(Preheader, Header); | ||
VPBlockUtils::connectBlocks(ExitingVPBB, Exit); | ||
simplifyRecipes(Plan, CanIVTy); | ||
} else { | ||
// The vector region contains header phis for which we cannot remove the | ||
// loop region yet. | ||
LLVMContext &Ctx = SE.getContext(); | ||
auto *BOC = new VPInstruction( | ||
VPInstruction::BranchOnCond, | ||
{Plan.getOrAddLiveIn(ConstantInt::getTrue(Ctx))}, Term->getDebugLoc()); | ||
ExitingVPBB->appendRecipe(BOC); | ||
} | ||
|
||
Term->eraseFromParent(); | ||
VPlanTransforms::removeDeadRecipes(Plan); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Worth committing this replacement of recursivelyDeleteDeadRecipes() separately? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done, thanks |
||
|
||
Plan.setVF(BestVF); | ||
|
@@ -1258,10 +1286,10 @@ void VPlanTransforms::optimize(VPlan &Plan) { | |
removeRedundantCanonicalIVs(Plan); | ||
removeRedundantInductionCasts(Plan); | ||
|
||
simplifyRecipes(Plan); | ||
simplifyRecipes(Plan, Plan.getCanonicalIV()->getScalarType()); | ||
legalizeAndOptimizeInductions(Plan); | ||
removeRedundantExpandSCEVRecipes(Plan); | ||
simplifyRecipes(Plan); | ||
simplifyRecipes(Plan, Plan.getCanonicalIV()->getScalarType()); | ||
removeDeadRecipes(Plan); | ||
|
||
createAndOptimizeReplicateRegions(Plan); | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -80,15 +80,13 @@ define void @powi_call(ptr %P) { | |
; CHECK: [[VECTOR_PH]]: | ||
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] | ||
; CHECK: [[VECTOR_BODY]]: | ||
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] | ||
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 | ||
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds double, ptr [[P]], i64 [[TMP0]] | ||
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds double, ptr [[P]], i64 0 | ||
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds double, ptr [[TMP1]], i32 0 | ||
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x double>, ptr [[TMP2]], align 8 | ||
; CHECK-NEXT: [[TMP3:%.*]] = call <2 x double> @llvm.powi.v2f64.i32(<2 x double> [[WIDE_LOAD]], i32 3) | ||
; CHECK-NEXT: store <2 x double> [[TMP3]], ptr [[TMP2]], align 8 | ||
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 | ||
; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] | ||
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds double, ptr [[TMP1]], i32 0 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Better (continue to) reuse TMP2 instead of replicating it? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, this is a fall-out from not running non-VPlan-based simple CSE in fixVectorizedLoop |
||
; CHECK-NEXT: store <2 x double> [[TMP3]], ptr [[TMP4]], align 8 | ||
; CHECK-NEXT: br label %[[MIDDLE_BLOCK:.*]] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Potential for merging original latch block with middle block? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yep |
||
; CHECK: [[MIDDLE_BLOCK]]: | ||
; CHECK-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]] | ||
; CHECK: [[SCALAR_PH]]: | ||
|
@@ -102,7 +100,7 @@ define void @powi_call(ptr %P) { | |
; CHECK-NEXT: store double [[POWI]], ptr [[GEP]], align 8 | ||
; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 | ||
; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], 1 | ||
; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP5:![0-9]+]] | ||
; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP4:![0-9]+]] | ||
; CHECK: [[EXIT]]: | ||
; CHECK-NEXT: ret void | ||
; | ||
|
@@ -233,6 +231,5 @@ declare i64 @llvm.fshl.i64(i64, i64, i64) | |
; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} | ||
; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"} | ||
; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]} | ||
; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]} | ||
; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]} | ||
; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META2]], [[META1]]} | ||
;. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We're actually interested here in asking
if (RepRecipe->getParent()->getEnclosingReplicateRegion())
?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes but such a helper needs to be added. Will check if there are other users that could benefit, thanks