-
Notifications
You must be signed in to change notification settings - Fork 14.7k
[VPlan] Add VPIRInstruction, use for exit block live-outs. #100735
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 7 commits
2f441bb
b7542dc
540b9c5
0284675
82b1669
de86741
66dfc24
9a40ed0
958f130
aab1463
c8a1add
3dc696b
f66d27e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -8592,7 +8592,7 @@ static void addCanonicalIVRecipes(VPlan &Plan, Type *IdxTy, bool HasNUW, | |||||
// are modeled in VPlan. Some exiting values are not modeled explicitly yet and | ||||||
// won't be included. Those are un-truncated VPWidenIntOrFpInductionRecipe, | ||||||
// VPWidenPointerInductionRecipe and induction increments. | ||||||
static MapVector<PHINode *, VPValue *> collectUsersInExitBlock( | ||||||
static MapVector<VPIRInstruction *, VPValue *> collectUsersInExitBlock( | ||||||
Loop *OrigLoop, VPRecipeBuilder &Builder, VPlan &Plan, | ||||||
const MapVector<PHINode *, InductionDescriptor> &Inductions) { | ||||||
auto MiddleVPBB = | ||||||
|
@@ -8602,13 +8602,17 @@ static MapVector<PHINode *, VPValue *> collectUsersInExitBlock( | |||||
// from scalar loop only. | ||||||
if (MiddleVPBB->getNumSuccessors() != 2) | ||||||
return {}; | ||||||
MapVector<PHINode *, VPValue *> ExitingValuesToFix; | ||||||
BasicBlock *ExitBB = | ||||||
cast<VPIRBasicBlock>(MiddleVPBB->getSuccessors()[0])->getIRBasicBlock(); | ||||||
MapVector<VPIRInstruction *, VPValue *> ExitingValuesToFix; | ||||||
VPBasicBlock *ExitVPBB = cast<VPIRBasicBlock>(MiddleVPBB->getSuccessors()[0]); | ||||||
BasicBlock *ExitingBB = OrigLoop->getExitingBlock(); | ||||||
for (PHINode &ExitPhi : ExitBB->phis()) { | ||||||
Value *IncomingValue = | ||||||
ExitPhi.getIncomingValueForBlock(ExitingBB); | ||||||
for (VPRecipeBase &R : *ExitVPBB) { | ||||||
auto *IR = dyn_cast<VPIRInstruction>(&R); | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
or something else more accurate than IR? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done, thanks! |
||||||
if (!IR) | ||||||
continue; | ||||||
auto *ExitPhi = dyn_cast<PHINode>(&IR->getInstruction()); | ||||||
if (!ExitPhi) | ||||||
break; | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Breaking because all remaining recipes are expected to be non phi's? Should the lcssa VPIRI's be considered phi recipes (as do loop header phi recipes) to help ensure they appear before non-phi recipes (VPIRI's) in the VP(IR)BB? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Yes.
We could but this would pull in more complexity in this patch without much benefits initially. Perhaps better done once a more concrete need arises/as follow up? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. A separate VPIRPhi subclass of VIPIRInstruction dedicated to phi's would arguably model the distinction between the two more accurately, and help simplify their documentation, operand behavior, execute(), keeping phi's together, iterating over them, etc., but this can be considered as a follow-up. |
||||||
Value *IncomingValue = ExitPhi->getIncomingValueForBlock(ExitingBB); | ||||||
VPValue *V = Builder.getVPValueOrAddLiveIn(IncomingValue, Plan); | ||||||
// Exit values for inductions are computed and updated outside of VPlan and | ||||||
// independent of induction recipes. | ||||||
|
@@ -8623,16 +8627,15 @@ static MapVector<PHINode *, VPValue *> collectUsersInExitBlock( | |||||
return P && Inductions.contains(P); | ||||||
}))) | ||||||
continue; | ||||||
ExitingValuesToFix.insert({&ExitPhi, V}); | ||||||
ExitingValuesToFix.insert({IR, V}); | ||||||
} | ||||||
return ExitingValuesToFix; | ||||||
} | ||||||
|
||||||
// Add exit values to \p Plan. Extracts and VPLiveOuts are added for each entry | ||||||
// in \p ExitingValuesToFix. | ||||||
static void | ||||||
addUsersInExitBlock(VPlan &Plan, | ||||||
MapVector<PHINode *, VPValue *> &ExitingValuesToFix) { | ||||||
static void addUsersInExitBlock( | ||||||
VPlan &Plan, MapVector<VPIRInstruction *, VPValue *> &ExitingValuesToFix) { | ||||||
if (ExitingValuesToFix.empty()) | ||||||
return; | ||||||
|
||||||
|
@@ -8650,12 +8653,12 @@ addUsersInExitBlock(VPlan &Plan, | |||||
} | ||||||
|
||||||
// Introduce VPUsers modeling the exit values. | ||||||
for (const auto &[ExitPhi, V] : ExitingValuesToFix) { | ||||||
for (const auto &[IR, V] : ExitingValuesToFix) { | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Updated, thanks! |
||||||
VPValue *Ext = B.createNaryOp( | ||||||
VPInstruction::ExtractFromEnd, | ||||||
{V, Plan.getOrAddLiveIn(ConstantInt::get( | ||||||
IntegerType::get(ExitBB->getContext(), 32), 1))}); | ||||||
Plan.addLiveOut(ExitPhi, Ext); | ||||||
IR->addOperand(Ext); | ||||||
} | ||||||
} | ||||||
|
||||||
|
@@ -8668,7 +8671,7 @@ addUsersInExitBlock(VPlan &Plan, | |||||
/// 2. Feed the penultimate value of recurrences to their LCSSA phi users in | ||||||
/// the original exit block using a VPLiveOut. | ||||||
static void addLiveOutsForFirstOrderRecurrences( | ||||||
VPlan &Plan, MapVector<PHINode *, VPValue *> &ExitingValuesToFix) { | ||||||
VPlan &Plan, MapVector<VPIRInstruction *, VPValue *> &ExitingValuesToFix) { | ||||||
VPRegionBlock *VectorRegion = Plan.getVectorLoopRegion(); | ||||||
|
||||||
// Start by finding out if middle block branches to scalar preheader, which is | ||||||
|
@@ -8802,17 +8805,14 @@ static void addLiveOutsForFirstOrderRecurrences( | |||||
// No edge from the middle block to the unique exit block has been inserted | ||||||
// and there is nothing to fix from vector loop; phis should have incoming | ||||||
// from scalar loop only. | ||||||
if (ExitingValuesToFix.empty()) | ||||||
continue; | ||||||
for (User *U : FORPhi->users()) { | ||||||
auto *UI = cast<Instruction>(U); | ||||||
if (UI->getParent() != ExitBB) | ||||||
for (const auto &[IR, V] : ExitingValuesToFix) { | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done, thanks! |
||||||
if (V != FOR) | ||||||
continue; | ||||||
VPValue *Ext = MiddleBuilder.createNaryOp( | ||||||
VPInstruction::ExtractFromEnd, {FOR->getBackedgeValue(), TwoVPV}, {}, | ||||||
"vector.recur.extract.for.phi"); | ||||||
Plan.addLiveOut(cast<PHINode>(UI), Ext); | ||||||
ExitingValuesToFix.erase(cast<PHINode>(UI)); | ||||||
IR->addOperand(Ext); | ||||||
ExitingValuesToFix.erase(IR); | ||||||
} | ||||||
} | ||||||
} | ||||||
|
@@ -8974,8 +8974,9 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) { | |||||
"VPBasicBlock"); | ||||||
RecipeBuilder.fixHeaderPhis(); | ||||||
|
||||||
MapVector<PHINode *, VPValue *> ExitingValuesToFix = collectUsersInExitBlock( | ||||||
OrigLoop, RecipeBuilder, *Plan, Legal->getInductionVars()); | ||||||
MapVector<VPIRInstruction *, VPValue *> ExitingValuesToFix = | ||||||
collectUsersInExitBlock(OrigLoop, RecipeBuilder, *Plan, | ||||||
Legal->getInductionVars()); | ||||||
|
||||||
addLiveOutsForFirstOrderRecurrences(*Plan, ExitingValuesToFix); | ||||||
addUsersInExitBlock(*Plan, ExitingValuesToFix); | ||||||
|
@@ -10121,7 +10122,9 @@ bool LoopVectorizePass::processLoop(Loop *L) { | |||||
// directly in VPlan. | ||||||
EpilogILV.setTripCount(MainILV.getTripCount()); | ||||||
for (auto &R : make_early_inc_range(*BestEpiPlan.getPreheader())) { | ||||||
auto *ExpandR = cast<VPExpandSCEVRecipe>(&R); | ||||||
auto *ExpandR = dyn_cast<VPExpandSCEVRecipe>(&R); | ||||||
if (!ExpandR) | ||||||
continue; | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Just noting: the (pre)preheader VPIRBB may now have VPIRI's (that need to be skipped here) in addition to VPExpandSCEVRecipe. |
||||||
auto *ExpandedVal = BestEpiPlan.getOrAddLiveIn( | ||||||
ExpandedSCEVs.find(ExpandR->getSCEV())->second); | ||||||
ExpandR->replaceAllUsesWith(ExpandedVal); | ||||||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -860,10 +860,18 @@ VPlan::~VPlan() { | |
delete BackedgeTakenCount; | ||
} | ||
|
||
static VPIRBasicBlock *createVPIRBasicBlockFor(BasicBlock *BB) { | ||
auto *VPIRBB = new VPIRBasicBlock(BB); | ||
for (Instruction &I : | ||
make_range(BB->begin(), BB->getTerminator()->getIterator())) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Would a simple There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is intentional to skip the terminators and it allows to insert new recipes at the end of the VPIRBB. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is ok, adding some further thoughts: There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Yes, only partially populating would probably be a bit more logic to skip undesired parts. Not adding the branches also have the benefit that we don't remove the original terminator VPIRI from the middle block when adding the VPlan-modeled branch recipes. |
||
VPIRBB->appendRecipe(new VPIRInstruction(I)); | ||
return VPIRBB; | ||
} | ||
|
||
VPlanPtr VPlan::createInitialVPlan(const SCEV *TripCount, ScalarEvolution &SE, | ||
bool RequiresScalarEpilogueCheck, | ||
bool TailFolded, Loop *TheLoop) { | ||
VPIRBasicBlock *Entry = new VPIRBasicBlock(TheLoop->getLoopPreheader()); | ||
VPIRBasicBlock *Entry = createVPIRBasicBlockFor(TheLoop->getLoopPreheader()); | ||
VPBasicBlock *VecPreheader = new VPBasicBlock("vector.ph"); | ||
auto Plan = std::make_unique<VPlan>(Entry, VecPreheader); | ||
Plan->TripCount = | ||
|
@@ -895,7 +903,7 @@ VPlanPtr VPlan::createInitialVPlan(const SCEV *TripCount, ScalarEvolution &SE, | |
// we unconditionally branch to the scalar preheader. Do nothing. | ||
// 3) Otherwise, construct a runtime check. | ||
BasicBlock *IRExitBlock = TheLoop->getUniqueExitBlock(); | ||
auto *VPExitBlock = new VPIRBasicBlock(IRExitBlock); | ||
auto *VPExitBlock = createVPIRBasicBlockFor(IRExitBlock); | ||
// The connection order corresponds to the operands of the conditional branch. | ||
VPBlockUtils::insertBlockAfter(VPExitBlock, MiddleVPBB); | ||
VPBlockUtils::connectBlocks(MiddleVPBB, ScalarPH); | ||
|
@@ -962,7 +970,7 @@ void VPlan::prepareToExecute(Value *TripCountV, Value *VectorTripCountV, | |
/// predecessor, which is rewired to the new VPIRBasicBlock. All successors of | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. (independent) All recipes of VPBB are moved to the beginning of the newly created VPIRBB, followed by VPIRI's wrapping IRBB's instructions. IRBB must be free of phi's? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Worth clarifying and asserting (independently). There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done in f66509b. IIUC the recipes from VPBB are moved after the existing VPIRInstructions in IRMiddleVPB |
||
/// VPBB, if any, are rewired to the new VPIRBasicBlock. | ||
static void replaceVPBBWithIRVPBB(VPBasicBlock *VPBB, BasicBlock *IRBB) { | ||
VPIRBasicBlock *IRMiddleVPBB = new VPIRBasicBlock(IRBB); | ||
VPIRBasicBlock *IRMiddleVPBB = createVPIRBasicBlockFor(IRBB); | ||
for (auto &R : make_early_inc_range(*VPBB)) | ||
R.moveBefore(*IRMiddleVPBB, IRMiddleVPBB->end()); | ||
VPBlockBase *PredVPBB = VPBB->getSinglePredecessor(); | ||
|
Original file line number | Diff line number | Diff line change | ||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
@@ -936,6 +936,7 @@ class VPSingleDefRecipe : public VPRecipeBase, public VPValue { | |||||||||||||
case VPRecipeBase::VPScalarCastSC: | ||||||||||||||
return true; | ||||||||||||||
case VPRecipeBase::VPInterleaveSC: | ||||||||||||||
case VPRecipeBase::VPIRInstructionSC: | ||||||||||||||
case VPRecipeBase::VPBranchOnMaskSC: | ||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
trying to keep these in order. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done, thanks! |
||||||||||||||
case VPRecipeBase::VPWidenLoadEVLSC: | ||||||||||||||
case VPRecipeBase::VPWidenLoadSC: | ||||||||||||||
|
@@ -1403,6 +1404,45 @@ class VPInstruction : public VPRecipeWithIRFlags { | |||||||||||||
bool isSingleScalar() const; | ||||||||||||||
}; | ||||||||||||||
|
||||||||||||||
/// A recipe to wrap on original IR instruction not to be modified during | ||||||||||||||
/// execution, execept for PHIs. For PHIs, a single VPValue operand is allowed, | ||||||||||||||
/// and it is used to add a new incoming value for the single predecessor VPBB. | ||||||||||||||
/// Expect PHIs, VPIRInstructions cannot have any operands. | ||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Would a separate (sub)recipe for phi's be better? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could do if preferred, but it would pull in additional complexity for relatively small gains? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could be, OTOH may simplify rather than complicate, worth evaluating, possibly as a follow-up. |
||||||||||||||
class VPIRInstruction : public VPRecipeBase { | ||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could be a VPSingleDefRecipe, as IR instructions define at most one value, but this currently serves only as a placeholder to advance the insert position, and as a live-out use/operand. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's at the moment intentionally not a VPSingleDefRecipe, as VPIRIntructions at the moment don't need their def-use chains modeled in VPlan. Making them VPSingleDefRecipe without also modeling VPIRInstruction's operands as VPValues would probably be q bit surprising as the def-use chains aren't connected. Might be good as follow-up, however we would still need to decide how to deal with exit phis that have their original operand coming from the scalar loop, for which is not yet modeled in VPlan. |
||||||||||||||
Instruction &I; | ||||||||||||||
|
||||||||||||||
public: | ||||||||||||||
VPIRInstruction(Instruction &I) | ||||||||||||||
: VPRecipeBase(VPDef::VPIRInstructionSC, ArrayRef<VPValue *>()), I(I) {} | ||||||||||||||
|
||||||||||||||
~VPIRInstruction() override = default; | ||||||||||||||
|
||||||||||||||
VP_CLASSOF_IMPL(VPDef::VPIRInstructionSC) | ||||||||||||||
|
||||||||||||||
VPIRInstruction *clone() override { | ||||||||||||||
auto *R = new VPIRInstruction(I); | ||||||||||||||
for (auto *Op : operands()) | ||||||||||||||
R->addOperand(Op); | ||||||||||||||
return R; | ||||||||||||||
} | ||||||||||||||
|
||||||||||||||
void execute(VPTransformState &State) override; | ||||||||||||||
|
||||||||||||||
Instruction &getInstruction() { return I; } | ||||||||||||||
|
||||||||||||||
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) | ||||||||||||||
/// Print the recipe. | ||||||||||||||
void print(raw_ostream &O, const Twine &Indent, | ||||||||||||||
VPSlotTracker &SlotTracker) const override; | ||||||||||||||
#endif | ||||||||||||||
|
||||||||||||||
bool usesScalars(const VPValue *Op) const override { | ||||||||||||||
assert(is_contained(operands(), Op) && | ||||||||||||||
"Op must be an operand of the recipe"); | ||||||||||||||
return true; | ||||||||||||||
} | ||||||||||||||
}; | ||||||||||||||
|
||||||||||||||
/// VPWidenRecipe is a recipe for producing a widened instruction using the | ||||||||||||||
/// opcode and operands of the recipe. This recipe covers most of the | ||||||||||||||
/// traditional vectorization cases where each recipe transforms into a | ||||||||||||||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -856,6 +856,43 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent, | |
} | ||
#endif | ||
|
||
void VPIRInstruction::execute(VPTransformState &State) { | ||
assert(isa<VPIRBasicBlock>(getParent()) && | ||
"VPIRInstructions can only be placed in VPIRBasicBlocks"); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Independent: assert also applies to SCEV expand recipe? Should be checked by validation instead? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Will add to verifier seprarately, thanks! There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Moved assert to VPlanVerifier. Will add for SCEVExpandRecipe separately. |
||
|
||
assert((isa<PHINode>(&I) || getNumOperands() == 0) && | ||
"Only PHINodes can have extra operands"); | ||
if (getNumOperands() == 1) { | ||
VPValue *ExitValue = getOperand(0); | ||
auto Lane = vputils::isUniformAfterVectorization(ExitValue) | ||
? VPLane::getFirstLane() | ||
: VPLane::getLastLaneForVF(State.VF); | ||
auto *PredVPBB = cast<VPBasicBlock>(getParent()->getSinglePredecessor()); | ||
BasicBlock *PredBB = State.CFG.VPBB2IRBB[PredVPBB]; | ||
// Set insertion point in PredBB in case an extract needs to be generated. | ||
// TODO: Model extracts explicitly. | ||
State.Builder.SetInsertPoint(PredBB, PredBB->getFirstNonPHIIt()); | ||
Value *V = State.get(ExitValue, VPIteration(State.UF - 1, Lane)); | ||
auto *Phi = cast<PHINode>(&I); | ||
Phi->addIncoming(V, PredBB); | ||
} | ||
|
||
State.Builder.SetInsertPoint(I.getParent(), std::next(I.getIterator())); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can this simplify VPIRBasicBlock::execute(), where this setInsertPoint() replaces the one there. Independent: can VPBasicBlock::execute() be simplified, given VPIRBasicBlocks: "A. the first VPBB reuses the loop pre-header BB" - should this reuse be modelled by fusing into a VPIRBasicBlock? Further cases should be simplified once replicate regions are unfolded as a VPlan2VPlan transformation. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Unfortunately not yet; We still need to set the default insert point for empty VPIRBasicBlocks and also some IR instructions may be created in VPIRBasicBlock (the pre-preheader) only during skeleton creation, which will be missing from the VPIRBB. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Worth explaining regarding setting the insertion point, that VPIRBBs and VPIRIs facilitate introducing recipes among existing instructions in existing BBs. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added a comment, thanks! |
||
} | ||
|
||
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) | ||
void VPIRInstruction::print(raw_ostream &O, const Twine &Indent, | ||
VPSlotTracker &SlotTracker) const { | ||
O << Indent << "IR " << I; | ||
|
||
if (getNumOperands() != 0) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This, e.g., must be true for VPIRPhi's and false for other VPIRI's. |
||
O << " (extra operands: "; | ||
printOperands(O, SlotTracker); | ||
O << ")"; | ||
} | ||
} | ||
#endif | ||
|
||
void VPWidenCallRecipe::execute(VPTransformState &State) { | ||
assert(State.VF.isVector() && "not widening"); | ||
Function *CalledScalarFn = getCalledScalarFunction(); | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -339,6 +339,7 @@ class VPDef { | |
VPBranchOnMaskSC, | ||
VPDerivedIVSC, | ||
VPExpandSCEVSC, | ||
VPIRInstructionSC, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Recipes that represent phi's need to appear below, for isPhi() to work. Should VPIRInstruction be split into There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Below is just for header phis, so I think it wouldn't apply of VPIRPhi at least for now. I've left it for now as is. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's also for PredInstPhi, a non-loop-header if-then hammock phi of a replicate region. It should be for all phi's planned to be generated - to help keep them together at the start of their basic block. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah yes, we have a header phi ids are grouped together separately. |
||
VPInstructionSC, | ||
VPInterleaveSC, | ||
VPReductionEVLSC, | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -58,6 +58,7 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur | |
; CHECK-NEXT: vp<%2> = original trip-count | ||
; CHECK-EMPTY: | ||
; CHECK-NEXT: ir-bb<for.body.preheader>: | ||
; CHECK-NEXT: IR %0 = zext i32 %n to i64 | ||
; CHECK-NEXT: EMIT vp<%2> = EXPAND SCEV (zext i32 %n to i64) | ||
Comment on lines
+61
to
62
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Just noting: VPIRBB with both VPIRI and non-VPIRI recipes. |
||
; CHECK-NEXT: No successors | ||
; CHECK-EMPTY: | ||
|
@@ -141,6 +142,7 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur | |
; CHECK-NEXT: vp<%2> = original trip-count | ||
; CHECK-EMPTY: | ||
; CHECK-NEXT: ir-bb<for.body.preheader>: | ||
; CHECK-NEXT: IR %0 = zext i32 %n to i64 | ||
; CHECK-NEXT: EMIT vp<%2> = EXPAND SCEV (zext i32 %n to i64) | ||
; CHECK-NEXT: No successors | ||
; CHECK-EMPTY: | ||
|
@@ -260,6 +262,7 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur | |
; CHECK-NEXT: vp<%2> = original trip-count | ||
; CHECK-EMPTY: | ||
; CHECK-NEXT: ir-bb<for.body.preheader>: | ||
; CHECK-NEXT: IR %0 = zext i32 %n to i64 | ||
; CHECK-NEXT: EMIT vp<%2> = EXPAND SCEV (zext i32 %n to i64) | ||
; CHECK-NEXT: No successors | ||
; CHECK-EMPTY: | ||
|
@@ -343,6 +346,7 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur | |
; CHECK-NEXT: vp<%2> = original trip-count | ||
; CHECK-EMPTY: | ||
; CHECK-NEXT: ir-bb<for.body.preheader>: | ||
; CHECK-NEXT: IR %0 = zext i32 %n to i64 | ||
; CHECK-NEXT: EMIT vp<%2> = EXPAND SCEV (zext i32 %n to i64) | ||
; CHECK-NEXT: No successors | ||
; CHECK-EMPTY: | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -60,12 +60,11 @@ define i32 @reduction(ptr %a, i64 %n, i32 %start) { | |
; IF-EVL-INLOOP-NEXT: Successor(s): ir-bb<for.end>, scalar.ph | ||
; IF-EVL-INLOOP-EMPTY: | ||
; IF-EVL-INLOOP-NEXT: ir-bb<for.end>: | ||
; IF-EVL-INLOOP-NEXT: IR %add.lcssa = phi i32 [ %add, %for.body ] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should this capture the RDX_EX operand as the live out did below? Good to see the live-out being placed in the context of exit bb. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done, thanks! |
||
; IF-EVL-INLOOP-NEXT: No successors | ||
; IF-EVL-INLOOP-EMPTY: | ||
; IF-EVL-INLOOP-NEXT: scalar.ph: | ||
; IF-EVL-INLOOP-NEXT: No successors | ||
; IF-EVL-INLOOP-EMPTY: | ||
; IF-EVL-INLOOP-NEXT: Live-out i32 %add.lcssa = vp<[[RDX_EX]]> | ||
; IF-EVL-INLOOP-NEXT: } | ||
; | ||
|
||
|
@@ -100,12 +99,11 @@ define i32 @reduction(ptr %a, i64 %n, i32 %start) { | |
; NO-VP-OUTLOOP-NEXT: Successor(s): ir-bb<for.end>, scalar.ph | ||
; NO-VP-OUTLOOP-EMPTY: | ||
; NO-VP-OUTLOOP-NEXT: ir-bb<for.end>: | ||
; NO-VP-OUTLOOP-NEXT: IR %add.lcssa = phi i32 [ %add, %for.body ] | ||
; NO-VP-OUTLOOP-NEXT: No successors | ||
; NO-VP-OUTLOOP-EMPTY: | ||
; NO-VP-OUTLOOP-NEXT: scalar.ph: | ||
; NO-VP-OUTLOOP-NEXT: No successors | ||
; NO-VP-OUTLOOP-EMPTY: | ||
; NO-VP-OUTLOOP-NEXT: Live-out i32 %add.lcssa = vp<[[RDX_EX]]> | ||
; NO-VP-OUTLOOP-NEXT: } | ||
; | ||
|
||
|
@@ -140,12 +138,11 @@ define i32 @reduction(ptr %a, i64 %n, i32 %start) { | |
; NO-VP-INLOOP-NEXT: Successor(s): ir-bb<for.end>, scalar.ph | ||
; NO-VP-INLOOP-EMPTY: | ||
; NO-VP-INLOOP-NEXT: ir-bb<for.end>: | ||
; NO-VP-INLOOP-NEXT: IR %add.lcssa = phi i32 [ %add, %for.body ] | ||
; NO-VP-INLOOP-NEXT: No successors | ||
; NO-VP-INLOOP-EMPTY: | ||
; NO-VP-INLOOP-NEXT: scalar.ph: | ||
; NO-VP-INLOOP-NEXT: No successors | ||
; NO-VP-INLOOP-EMPTY: | ||
; NO-VP-INLOOP-NEXT: Live-out i32 %add.lcssa = vp<[[RDX_EX]]> | ||
; NO-VP-INLOOP-NEXT: } | ||
; | ||
entry: | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -14,11 +14,12 @@ define i8 @widget(ptr %arr, i8 %t9) { | |
; CHECK-NEXT: br i1 [[C]], label [[FOR_PREHEADER:%.*]], label [[BB6]] | ||
; CHECK: for.preheader: | ||
; CHECK-NEXT: [[T1_0_LCSSA:%.*]] = phi ptr [ [[T1_0]], [[BB6]] ] | ||
; CHECK-NEXT: [[T1_0_LCSSA2:%.*]] = ptrtoint ptr [[T1_0_LCSSA]] to i64 | ||
; CHECK-NEXT: [[TMP0:%.*]] = trunc i64 [[ARR1]] to i32 | ||
; CHECK-NEXT: [[TMP1:%.*]] = sub i32 0, [[TMP0]] | ||
; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[T1_0_LCSSA2]] to i32 | ||
; CHECK-NEXT: [[T1_0_LCSSA3:%.*]] = ptrtoint ptr [[T1_0_LCSSA]] to i64 | ||
; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[T1_0_LCSSA3]] to i32 | ||
; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[TMP1]], [[TMP2]] | ||
; CHECK-NEXT: [[T1_0_LCSSA2:%.*]] = ptrtoint ptr [[T1_0_LCSSA]] to i64 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This ptrtoint seems to now be replicated? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think due to insert points now being set slightly differently, preventing SCEV expander to re-use it in this particular case There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Anything worth being (more) careful about when setting insertion points, to facilitate such foldings? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not sure as this should be cleaned up be later folding passes. |
||
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP3]], 4 | ||
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]] | ||
; CHECK: vector.scevcheck: | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Worth updating comment above that talks about Exit/phi's.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Updated, thanks!
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: can assign a name for this type of map, to clarify and simplify passing it as a parameter below.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Adjusted to use
SetVector<VPIRInstruction*>
, adding the exiting values as operand early, which should simplify the passed arguments enough hopefully