Skip to content

Commit

Permalink
!fixup address latest comments, thanks!
Browse files Browse the repository at this point in the history
  • Loading branch information
fhahn committed Dec 4, 2024
1 parent 93f3304 commit ce214f5
Show file tree
Hide file tree
Showing 26 changed files with 700 additions and 487 deletions.
107 changes: 53 additions & 54 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -519,14 +519,14 @@ class InnerLoopVectorizer {
/// the induction resume value, and the value for the bypass block, if needed.
/// \p Step is the SCEV-expanded induction step to use. In cases where the
/// loop skeleton is more complicated (i.e., epilogue vectorization) and the
/// resume values can come from an additional bypass block, the \p
/// AdditionalBypass pair provides this additional bypass block along with the
/// resume value coming from it.
void createInductionResumeVPValue(
VPIRInstruction *InductionPhiIRI, const InductionDescriptor &ID,
Value *Step, ArrayRef<BasicBlock *> BypassBlocks,
VPBuilder &ScalarPHBuilder,
std::pair<BasicBlock *, Value *> AdditionalBypass = {nullptr, nullptr});
/// resume values can come from an additional bypass block, \p
/// AdditionalBypassValue provides the end value on the edge from bypass to
/// this loop.
void createInductionResumeVPValue(VPIRInstruction *InductionPhiIRI,
const InductionDescriptor &ID, Value *Step,
ArrayRef<BasicBlock *> BypassBlocks,
VPBuilder &ScalarPHBuilder,
Value *AdditionalBypassValue = nullptr);

/// Returns the original loop trip count.
Value *getTripCount() const { return TripCount; }
Expand All @@ -539,12 +539,14 @@ class InnerLoopVectorizer {
/// Retrieve the bypass value associated with an original induction header
/// phi.
Value *getInductionAdditionalBypassValue(PHINode *OrigPhi) const {
return Induction2AdditionalBypass.at(OrigPhi).second;
return Induction2AdditionalBypassValue.at(OrigPhi);
}

/// Return the additional bypass block.
BasicBlock *getInductionAdditionalBypassBlock() const {
return Induction2AdditionalBypass.begin()->second.first;
BasicBlock *getAdditionalBypassBlock() const {
assert(AdditionalBypassBlock &&
"Trying to access AdditionalBypassBlock but it has not been set");
return AdditionalBypassBlock;
}

protected:
Expand Down Expand Up @@ -584,11 +586,10 @@ class InnerLoopVectorizer {
/// in the scalar epilogue, from where the vectorized loop left off.
/// In cases where the loop skeleton is more complicated (eg. epilogue
/// vectorization) and the resume values can come from an additional bypass
/// block, the \p AdditionalBypass pair provides information about the bypass
/// block and the end value on the edge from bypass to this loop.
void createInductionResumeVPValues(
const SCEV2ValueTy &ExpandedSCEVs,
std::pair<BasicBlock *, Value *> AdditionalBypass = {nullptr, nullptr});
/// block, the \p AdditionalBypassValue provides the end value on the edge
/// from bypass to this loop.
void createInductionResumeVPValues(const SCEV2ValueTy &ExpandedSCEVs,
Value *AdditionalBypassValue = nullptr);

/// Allow subclasses to override and print debug traces before/after vplan
/// execution, when trace information is requested.
Expand Down Expand Up @@ -678,11 +679,15 @@ class InnerLoopVectorizer {
/// for cleaning the checks, if vectorization turns out unprofitable.
GeneratedRTChecks &RTChecks;

/// Mapping of induction phis to their bypass values and bypass blocks. They
/// The additional bypass block which conditionally skips over the epilogue
/// loop after executing the main loop. Needed to resume inductions and
/// reductions during epilogue vectorization.
BasicBlock *AdditionalBypassBlock = nullptr;

/// Mapping of induction phis to their additional bypass values. They
/// need to be added as operands to phi nodes in the scalar loop preheader
/// after the epilogue skeleton has been created.
DenseMap<PHINode *, std::pair<BasicBlock *, Value *>>
Induction2AdditionalBypass;
DenseMap<PHINode *, Value *> Induction2AdditionalBypassValue;

VPlan &Plan;
};
Expand Down Expand Up @@ -2603,14 +2608,14 @@ void InnerLoopVectorizer::createVectorLoopSkeleton(StringRef Prefix) {
void InnerLoopVectorizer::createInductionResumeVPValue(
VPIRInstruction *InductionPhiRI, const InductionDescriptor &II, Value *Step,
ArrayRef<BasicBlock *> BypassBlocks, VPBuilder &ScalarPHBuilder,
std::pair<BasicBlock *, Value *> AdditionalBypass) {
Value *AdditionalBypassValue) {
auto *OrigPhi = cast<PHINode>(&InductionPhiRI->getInstruction());
Value *VectorTripCount = getOrCreateVectorTripCount(LoopVectorPreHeader);
assert(VectorTripCount && "Expected valid arguments");

Instruction *OldInduction = Legal->getPrimaryInduction();
Value *EndValue = nullptr;
Value *EndValueFromAdditionalBypass = AdditionalBypass.second;
Value *EndValueFromAdditionalBypass = AdditionalBypassValue;
if (OrigPhi == OldInduction) {
// We know what the end value is.
EndValue = VectorTripCount;
Expand All @@ -2626,11 +2631,11 @@ void InnerLoopVectorizer::createInductionResumeVPValue(
EndValue->setName("ind.end");

// Compute the end value for the additional bypass (if applicable).
if (AdditionalBypass.first) {
B.SetInsertPoint(AdditionalBypass.first,
AdditionalBypass.first->getFirstInsertionPt());
if (AdditionalBypassValue) {
B.SetInsertPoint(getAdditionalBypassBlock(),
getAdditionalBypassBlock()->getFirstInsertionPt());
EndValueFromAdditionalBypass =
emitTransformedIndex(B, AdditionalBypass.second, II.getStartValue(),
emitTransformedIndex(B, AdditionalBypassValue, II.getStartValue(),
Step, II.getKind(), II.getInductionBinOp());
EndValueFromAdditionalBypass->setName("ind.end");
}
Expand All @@ -2644,14 +2649,13 @@ void InnerLoopVectorizer::createInductionResumeVPValue(
"InductionPhiRI should not have any operands");
InductionPhiRI->addOperand(ResumePhiRecipe);

if (AdditionalBypass.first) {
if (AdditionalBypassValue) {
// Store the bypass value here, as it needs to be added as operand to its
// scalar preheader phi node after the epilogue skeleton has been created.
// TODO: Directly add as extra operand to the VPResumePHI recipe.
assert(!Induction2AdditionalBypass.contains(OrigPhi) &&
assert(!Induction2AdditionalBypassValue.contains(OrigPhi) &&
"entry for OrigPhi already exits");
Induction2AdditionalBypass[OrigPhi] = {AdditionalBypass.first,
EndValueFromAdditionalBypass};
Induction2AdditionalBypassValue[OrigPhi] = EndValueFromAdditionalBypass;
}
}

Expand All @@ -2670,19 +2674,13 @@ static Value *getExpandedStep(const InductionDescriptor &ID,
}

void InnerLoopVectorizer::createInductionResumeVPValues(
const SCEV2ValueTy &ExpandedSCEVs,
std::pair<BasicBlock *, Value *> AdditionalBypass) {
assert(((AdditionalBypass.first && AdditionalBypass.second) ||
(!AdditionalBypass.first && !AdditionalBypass.second)) &&
"Inconsistent information about additional bypass.");
const SCEV2ValueTy &ExpandedSCEVs, Value *AdditionalBypassValue) {
// We are going to resume the execution of the scalar loop.
// Go over all of the induction variable PHIs of the scalar loop header and
// fix their starting values, which depend on the counter of the last
// iteration of the vectorized loop. The starting values of PHI nodes depend
// on the counter of the last iteration in the vectorized loop. If we come
// from one of the LoopBypassBlocks then we need to start from the original
// start value. If we come from the AdditionalBypass then we need to start
// from its value.
// iteration of the vectorized loop. If we come from one of the
// LoopBypassBlocks then we need to start from the original start value. If we
// come from the AdditionalBypass then we need to start from its value.
VPBasicBlock *ScalarPHVPBB = Plan.getScalarPreheader();
VPBuilder ScalarPHBuilder(ScalarPHVPBB, ScalarPHVPBB->begin());
for (VPRecipeBase &R : *Plan.getScalarHeader()) {
Expand All @@ -2695,7 +2693,7 @@ void InnerLoopVectorizer::createInductionResumeVPValues(
const InductionDescriptor &II = Legal->getInductionVars().find(Phi)->second;
createInductionResumeVPValue(PhiR, II, getExpandedStep(II, ExpandedSCEVs),
LoopBypassBlocks, ScalarPHBuilder,
AdditionalBypass);
AdditionalBypassValue);
}
}

Expand Down Expand Up @@ -7744,7 +7742,7 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
// 2.5 When vectorizing the epilogue, fix reduction and induction resume
// values from the additional bypass block.
if (VectorizingEpilogue) {
BasicBlock *BypassBlock = ILV.getInductionAdditionalBypassBlock();
BasicBlock *BypassBlock = ILV.getAdditionalBypassBlock();
for (VPRecipeBase &R : *ExitVPBB) {
fixReductionScalarResumeWhenVectorizingEpilog(
&R, State, State.CFG.VPBB2IRBB[ExitVPBB], BypassBlock);
Expand Down Expand Up @@ -7941,6 +7939,7 @@ EpilogueVectorizerEpilogueLoop::createEpilogueVectorizedLoopSkeleton(
nullptr, "vec.epilog.iter.check", true);
emitMinimumVectorEpilogueIterCountCheck(LoopScalarPreHeader,
VecEpilogueIterationCountCheck);
AdditionalBypassBlock = VecEpilogueIterationCountCheck;

// Adjust the control flow taking the state info from the main loop
// vectorization into account.
Expand Down Expand Up @@ -8017,12 +8016,13 @@ EpilogueVectorizerEpilogueLoop::createEpilogueVectorizedLoopSkeleton(
// preheader.
PHINode *EPResumeVal = nullptr;
Type *IdxTy = Legal->getWidestInductionType();
Value *TC = EPI.VectorTripCount;
Constant *Init = ConstantInt::get(IdxTy, 0);

for (PHINode &P : LoopVectorPreHeader->phis()) {
if (P.getType() == IdxTy &&
P.getIncomingValueForBlock(VecEpilogueIterationCountCheck) ==
EPI.VectorTripCount &&
P.getIncomingValueForBlock(EPI.MainLoopIterationCountCheck) ==
ConstantInt::get(IdxTy, 0)) {
P.getIncomingValueForBlock(VecEpilogueIterationCountCheck) == TC &&
P.getIncomingValueForBlock(EPI.MainLoopIterationCountCheck) == Init) {
EPResumeVal = &P;
EPResumeVal->setName("vec.epilog.resume.val");
break;
Expand All @@ -8031,22 +8031,19 @@ EpilogueVectorizerEpilogueLoop::createEpilogueVectorizedLoopSkeleton(
if (!EPResumeVal) {
EPResumeVal = PHINode::Create(IdxTy, 2, "vec.epilog.resume.val");
EPResumeVal->insertBefore(LoopVectorPreHeader->getFirstNonPHIIt());
EPResumeVal->addIncoming(EPI.VectorTripCount,
VecEpilogueIterationCountCheck);
EPResumeVal->addIncoming(ConstantInt::get(IdxTy, 0),
EPI.MainLoopIterationCountCheck);
EPResumeVal->addIncoming(TC, VecEpilogueIterationCountCheck);
EPResumeVal->addIncoming(Init, EPI.MainLoopIterationCountCheck);
}

// Generate induction resume values. These variables save the new starting
// indexes for the scalar loop. They are used to test if there are any tail
// iterations left once the vector loop has completed.
// Note that when the vectorized epilogue is skipped due to iteration count
// check, then the resume value for the induction variable comes from
// the trip count of the main vector loop, hence passing the AdditionalBypass
// argument.
createInductionResumeVPValues(ExpandedSCEVs,
{VecEpilogueIterationCountCheck,
EPI.VectorTripCount} /* AdditionalBypass */);
// the trip count of the main vector loop, hence passing the
// AdditionalBypassValue argument.
createInductionResumeVPValues(
ExpandedSCEVs, EPI.VectorTripCount /* AdditionalBypassValue */);

return {LoopVectorPreHeader, EPResumeVal};
}
Expand Down Expand Up @@ -10358,6 +10355,8 @@ bool LoopVectorizePass::processLoop(Loop *L) {
auto *WidenInd = cast<VPWidenIntOrFpInductionRecipe>(&R);
IndPhi = WidenInd->getPHINode();
}
// Hook up to the PHINode generated by a ResumePhi recipe of main
// loop VPlan, which feeds the scalar loop.
ResumeV = IndPhi->getIncomingValueForBlock(L->getLoopPreheader());
}
assert(ResumeV && "Must have a resume value");
Expand Down
1 change: 0 additions & 1 deletion llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,6 @@ bool VPRecipeBase::mayWriteToMemory() const {
case VPInstruction::FirstOrderRecurrenceSplice:
case VPInstruction::LogicalAnd:
case VPInstruction::PtrAdd:
case VPInstruction::ResumePhi:
return false;
default:
return true;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ define void @test_iv_cost(ptr %ptr.start, i8 %a, i64 %b) {
; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], 4
; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]]
; CHECK: [[VEC_EPILOG_PH]]:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END3]], %[[VEC_EPILOG_ITER_CHECK]] ], [ [[START]], %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i64 [ [[IND_END3]], %[[VEC_EPILOG_ITER_CHECK]] ], [ [[START]], %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
; CHECK-NEXT: [[BC_RESUME_VAL3:%.*]] = phi ptr [ [[IND_END2]], %[[VEC_EPILOG_ITER_CHECK]] ], [ [[PTR_START]], %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
; CHECK-NEXT: [[N_MOD_VF2:%.*]] = urem i64 [[START]], 4
Expand All @@ -120,12 +120,12 @@ define void @test_iv_cost(ptr %ptr.start, i8 %a, i64 %b) {
; CHECK-NEXT: [[CMP_N11:%.*]] = icmp eq i64 [[START]], [[N_VEC3]]
; CHECK-NEXT: br i1 [[CMP_N11]], label %[[EXIT_LOOPEXIT]], label %[[VEC_EPILOG_SCALAR_PH]]
; CHECK: [[VEC_EPILOG_SCALAR_PH]]:
; CHECK-NEXT: [[BC_RESUME_VAL14:%.*]] = phi i64 [ [[IND_END1]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END]], %[[VEC_EPILOG_ITER_CHECK]] ], [ [[START]], %[[ITER_CHECK]] ]
; CHECK-NEXT: [[BC_RESUME_VAL15:%.*]] = phi ptr [ [[IND_END5]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END6]], %[[VEC_EPILOG_ITER_CHECK]] ], [ [[PTR_START]], %[[ITER_CHECK]] ]
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END1]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END]], %[[VEC_EPILOG_ITER_CHECK]] ], [ [[START]], %[[ITER_CHECK]] ]
; CHECK-NEXT: [[BC_RESUME_VAL7:%.*]] = phi ptr [ [[IND_END5]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END6]], %[[VEC_EPILOG_ITER_CHECK]] ], [ [[PTR_START]], %[[ITER_CHECK]] ]
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL14]], %[[VEC_EPILOG_SCALAR_PH]] ]
; CHECK-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[PTR_IV_NEXT:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL15]], %[[VEC_EPILOG_SCALAR_PH]] ]
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[VEC_EPILOG_SCALAR_PH]] ]
; CHECK-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[PTR_IV_NEXT:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL7]], %[[VEC_EPILOG_SCALAR_PH]] ]
; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], -1
; CHECK-NEXT: [[PTR_IV_NEXT]] = getelementptr i8, ptr [[PTR_IV]], i64 1
; CHECK-NEXT: store i8 0, ptr [[PTR_IV]], align 1
Expand Down
Loading

0 comments on commit ce214f5

Please sign in to comment.