Skip to content

Commit 7e99893

Browse files
authored
[VPlan] Materialize Build(Struct)Vectors for VPReplicateRecipes. (NFCI) (llvm#151487)
Materialze Build(Struct)Vectors explicitly for VPRecplicateRecipes, to serve their users requiring a vector, instead of doing so when unrolling by VF. Now we only need to implicitly build vectors in VPTransformState::get for VPInstructions. Once they are also unrolled by VF we can remove the code-path alltogether. PR: llvm#151487
1 parent f5a648f commit 7e99893

File tree

6 files changed

+101
-18
lines changed

6 files changed

+101
-18
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7254,8 +7254,9 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
72547254
// TODO: Move to VPlan transform stage once the transition to the VPlan-based
72557255
// cost model is complete for better cost estimates.
72567256
VPlanTransforms::runPass(VPlanTransforms::unrollByUF, BestVPlan, BestUF);
7257-
VPlanTransforms::runPass(VPlanTransforms::replicateByVF, BestVPlan, BestVF);
7257+
VPlanTransforms::runPass(VPlanTransforms::materializeBuildVectors, BestVPlan);
72587258
VPlanTransforms::runPass(VPlanTransforms::materializeBroadcasts, BestVPlan);
7259+
VPlanTransforms::runPass(VPlanTransforms::replicateByVF, BestVPlan, BestVF);
72597260
bool HasBranchWeights =
72607261
hasBranchWeightMD(*OrigLoop->getLoopLatch()->getTerminator());
72617262
if (HasBranchWeights) {

llvm/lib/Transforms/Vectorize/VPlan.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -355,6 +355,9 @@ Value *VPTransformState::get(const VPValue *Def, bool NeedsScalar) {
355355
set(Def, VectorValue);
356356
} else {
357357
assert(!VF.isScalable() && "VF is assumed to be non scalable.");
358+
assert(isa<VPInstruction>(Def) &&
359+
"Explicit BuildVector recipes must have"
360+
"handled packing for non-VPInstructions.");
358361
// Initialize packing with insertelements to start from poison.
359362
VectorValue = PoisonValue::get(toVectorizedTy(LastInst->getType(), VF));
360363
for (unsigned Lane = 0; Lane < VF.getFixedValue(); ++Lane)

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -460,6 +460,8 @@ unsigned VPInstruction::getNumOperandsForOpcode(unsigned Opcode) {
460460
case Instruction::Load:
461461
case VPInstruction::AnyOf:
462462
case VPInstruction::BranchOnCond:
463+
case VPInstruction::BuildStructVector:
464+
case VPInstruction::BuildVector:
463465
case VPInstruction::CalculateTripCountMinusVF:
464466
case VPInstruction::CanonicalIVIncrementForPart:
465467
case VPInstruction::ExplicitVectorLength:

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3282,6 +3282,52 @@ void VPlanTransforms::materializeBackedgeTakenCount(VPlan &Plan,
32823282
BTC->replaceAllUsesWith(TCMO);
32833283
}
32843284

3285+
void VPlanTransforms::materializeBuildVectors(VPlan &Plan) {
3286+
if (Plan.hasScalarVFOnly())
3287+
return;
3288+
3289+
VPTypeAnalysis TypeInfo(Plan);
3290+
VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
3291+
auto VPBBsOutsideLoopRegion = VPBlockUtils::blocksOnly<VPBasicBlock>(
3292+
vp_depth_first_shallow(Plan.getEntry()));
3293+
auto VPBBsInsideLoopRegion = VPBlockUtils::blocksOnly<VPBasicBlock>(
3294+
vp_depth_first_shallow(LoopRegion->getEntry()));
3295+
// Materialize Build(Struct)Vector for all replicating VPReplicateRecipes,
3296+
// excluding ones in replicate regions. Those are not materialized explicitly
3297+
// yet. Those vector users are still handled in VPReplicateRegion::execute(),
3298+
// via shouldPack().
3299+
// TODO: materialize build vectors for replicating recipes in replicating
3300+
// regions.
3301+
// TODO: materialize build vectors for VPInstructions.
3302+
for (VPBasicBlock *VPBB :
3303+
concat<VPBasicBlock *>(VPBBsOutsideLoopRegion, VPBBsInsideLoopRegion)) {
3304+
for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
3305+
auto *RepR = dyn_cast<VPReplicateRecipe>(&R);
3306+
auto UsesVectorOrInsideReplicateRegion = [RepR, LoopRegion](VPUser *U) {
3307+
VPRegionBlock *ParentRegion =
3308+
cast<VPRecipeBase>(U)->getParent()->getParent();
3309+
return !U->usesScalars(RepR) || ParentRegion != LoopRegion;
3310+
};
3311+
if (!RepR || RepR->isSingleScalar() ||
3312+
none_of(RepR->users(), UsesVectorOrInsideReplicateRegion))
3313+
continue;
3314+
3315+
Type *ScalarTy = TypeInfo.inferScalarType(RepR);
3316+
unsigned Opcode = ScalarTy->isStructTy()
3317+
? VPInstruction::BuildStructVector
3318+
: VPInstruction::BuildVector;
3319+
auto *BuildVector = new VPInstruction(Opcode, {RepR});
3320+
BuildVector->insertAfter(RepR);
3321+
3322+
RepR->replaceUsesWithIf(
3323+
BuildVector, [BuildVector, &UsesVectorOrInsideReplicateRegion](
3324+
VPUser &U, unsigned) {
3325+
return &U != BuildVector && UsesVectorOrInsideReplicateRegion(&U);
3326+
});
3327+
}
3328+
}
3329+
}
3330+
32853331
void VPlanTransforms::materializeVectorTripCount(VPlan &Plan,
32863332
VPBasicBlock *VectorPHVPBB,
32873333
bool TailByMasking,

llvm/lib/Transforms/Vectorize/VPlanTransforms.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -274,6 +274,10 @@ struct VPlanTransforms {
274274
static void materializeBackedgeTakenCount(VPlan &Plan,
275275
VPBasicBlock *VectorPH);
276276

277+
/// Add explicit Build[Struct]Vector recipes that combine multiple scalar
278+
/// values into single vectors.
279+
static void materializeBuildVectors(VPlan &Plan);
280+
277281
/// Materialize VF and VFxUF to be computed explicitly using VPInstructions.
278282
static void materializeVFAndVFxUF(VPlan &Plan, VPBasicBlock *VectorPH,
279283
ElementCount VF);

llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp

Lines changed: 44 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -464,10 +464,12 @@ void VPlanTransforms::unrollByUF(VPlan &Plan, unsigned UF) {
464464
VPlanTransforms::removeDeadRecipes(Plan);
465465
}
466466

467-
/// Create a single-scalar clone of \p RepR for lane \p Lane.
468-
static VPReplicateRecipe *cloneForLane(VPlan &Plan, VPBuilder &Builder,
469-
Type *IdxTy, VPReplicateRecipe *RepR,
470-
VPLane Lane) {
467+
/// Create a single-scalar clone of \p RepR for lane \p Lane. Use \p
468+
/// Def2LaneDefs to look up scalar definitions for operands of \RepR.
469+
static VPReplicateRecipe *
470+
cloneForLane(VPlan &Plan, VPBuilder &Builder, Type *IdxTy,
471+
VPReplicateRecipe *RepR, VPLane Lane,
472+
const DenseMap<VPValue *, SmallVector<VPValue *>> &Def2LaneDefs) {
471473
// Collect the operands at Lane, creating extracts as needed.
472474
SmallVector<VPValue *> NewOps;
473475
for (VPValue *Op : RepR->operands()) {
@@ -480,6 +482,14 @@ static VPReplicateRecipe *cloneForLane(VPlan &Plan, VPBuilder &Builder,
480482
Builder.createNaryOp(VPInstruction::ExtractLastElement, {Op}));
481483
continue;
482484
}
485+
// If Op is a definition that has been unrolled, directly use the clone for
486+
// the corresponding lane.
487+
auto LaneDefs = Def2LaneDefs.find(Op);
488+
if (LaneDefs != Def2LaneDefs.end()) {
489+
NewOps.push_back(LaneDefs->second[Lane.getKnownLane()]);
490+
continue;
491+
}
492+
483493
// Look through buildvector to avoid unnecessary extracts.
484494
if (match(Op, m_BuildVector())) {
485495
NewOps.push_back(
@@ -512,6 +522,13 @@ void VPlanTransforms::replicateByVF(VPlan &Plan, ElementCount VF) {
512522
vp_depth_first_shallow(Plan.getVectorLoopRegion()->getEntry()));
513523
auto VPBBsToUnroll =
514524
concat<VPBasicBlock *>(VPBBsOutsideLoopRegion, VPBBsInsideLoopRegion);
525+
// A mapping of current VPValue definitions to collections of new VPValues
526+
// defined per lane. Serves to hook-up potential users of current VPValue
527+
// definition that are replicated-per-VF later.
528+
DenseMap<VPValue *, SmallVector<VPValue *>> Def2LaneDefs;
529+
// The removal of current recipes being replaced by new ones needs to be
530+
// delayed after Def2LaneDefs is no longer in use.
531+
SmallVector<VPRecipeBase *> ToRemove;
515532
for (VPBasicBlock *VPBB : VPBBsToUnroll) {
516533
for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
517534
auto *RepR = dyn_cast<VPReplicateRecipe>(&R);
@@ -523,36 +540,46 @@ void VPlanTransforms::replicateByVF(VPlan &Plan, ElementCount VF) {
523540
if (isa<StoreInst>(RepR->getUnderlyingInstr()) &&
524541
vputils::isSingleScalar(RepR->getOperand(1))) {
525542
// Stores to invariant addresses need to store the last lane only.
526-
cloneForLane(Plan, Builder, IdxTy, RepR,
527-
VPLane::getLastLaneForVF(VF));
543+
cloneForLane(Plan, Builder, IdxTy, RepR, VPLane::getLastLaneForVF(VF),
544+
Def2LaneDefs);
528545
} else {
529546
// Create single-scalar version of RepR for all lanes.
530547
for (unsigned I = 0; I != VF.getKnownMinValue(); ++I)
531-
cloneForLane(Plan, Builder, IdxTy, RepR, VPLane(I));
548+
cloneForLane(Plan, Builder, IdxTy, RepR, VPLane(I), Def2LaneDefs);
532549
}
533550
RepR->eraseFromParent();
534551
continue;
535552
}
536553
/// Create single-scalar version of RepR for all lanes.
537554
SmallVector<VPValue *> LaneDefs;
538555
for (unsigned I = 0; I != VF.getKnownMinValue(); ++I)
539-
LaneDefs.push_back(cloneForLane(Plan, Builder, IdxTy, RepR, VPLane(I)));
556+
LaneDefs.push_back(
557+
cloneForLane(Plan, Builder, IdxTy, RepR, VPLane(I), Def2LaneDefs));
540558

559+
Def2LaneDefs[RepR] = LaneDefs;
541560
/// Users that only demand the first lane can use the definition for lane
542561
/// 0.
543562
RepR->replaceUsesWithIf(LaneDefs[0], [RepR](VPUser &U, unsigned) {
544563
return U.onlyFirstLaneUsed(RepR);
545564
});
546565

547-
// If needed, create a Build(Struct)Vector recipe to insert the scalar
548-
// lane values into a vector.
549-
Type *ResTy = RepR->getUnderlyingInstr()->getType();
550-
VPValue *VecRes = Builder.createNaryOp(
551-
ResTy->isStructTy() ? VPInstruction::BuildStructVector
552-
: VPInstruction::BuildVector,
553-
LaneDefs);
554-
RepR->replaceAllUsesWith(VecRes);
555-
RepR->eraseFromParent();
566+
// Update each build vector user that currently has RepR as its only
567+
// operand, to have all LaneDefs as its operands.
568+
for (VPUser *U : to_vector(RepR->users())) {
569+
auto *VPI = dyn_cast<VPInstruction>(U);
570+
if (!VPI || (VPI->getOpcode() != VPInstruction::BuildVector &&
571+
VPI->getOpcode() != VPInstruction::BuildStructVector))
572+
continue;
573+
assert(VPI->getNumOperands() == 1 &&
574+
"Build(Struct)Vector must have a single operand before "
575+
"replicating by VF");
576+
VPI->setOperand(0, LaneDefs[0]);
577+
for (VPValue *LaneDef : drop_begin(LaneDefs))
578+
VPI->addOperand(LaneDef);
579+
}
580+
ToRemove.push_back(RepR);
556581
}
557582
}
583+
for (auto *R : reverse(ToRemove))
584+
R->eraseFromParent();
558585
}

0 commit comments

Comments
 (0)