@@ -464,10 +464,12 @@ void VPlanTransforms::unrollByUF(VPlan &Plan, unsigned UF) {
464
464
VPlanTransforms::removeDeadRecipes (Plan);
465
465
}
466
466
467
- // / Create a single-scalar clone of \p RepR for lane \p Lane.
468
- static VPReplicateRecipe *cloneForLane (VPlan &Plan, VPBuilder &Builder,
469
- Type *IdxTy, VPReplicateRecipe *RepR,
470
- VPLane Lane) {
467
+ // / Create a single-scalar clone of \p RepR for lane \p Lane. Use \p
468
+ // / Def2LaneDefs to look up scalar definitions for operands of \RepR.
469
+ static VPReplicateRecipe *
470
+ cloneForLane (VPlan &Plan, VPBuilder &Builder, Type *IdxTy,
471
+ VPReplicateRecipe *RepR, VPLane Lane,
472
+ const DenseMap<VPValue *, SmallVector<VPValue *>> &Def2LaneDefs) {
471
473
// Collect the operands at Lane, creating extracts as needed.
472
474
SmallVector<VPValue *> NewOps;
473
475
for (VPValue *Op : RepR->operands ()) {
@@ -480,6 +482,14 @@ static VPReplicateRecipe *cloneForLane(VPlan &Plan, VPBuilder &Builder,
480
482
Builder.createNaryOp (VPInstruction::ExtractLastElement, {Op}));
481
483
continue ;
482
484
}
485
+ // If Op is a definition that has been unrolled, directly use the clone for
486
+ // the corresponding lane.
487
+ auto LaneDefs = Def2LaneDefs.find (Op);
488
+ if (LaneDefs != Def2LaneDefs.end ()) {
489
+ NewOps.push_back (LaneDefs->second [Lane.getKnownLane ()]);
490
+ continue ;
491
+ }
492
+
483
493
// Look through buildvector to avoid unnecessary extracts.
484
494
if (match (Op, m_BuildVector ())) {
485
495
NewOps.push_back (
@@ -512,6 +522,13 @@ void VPlanTransforms::replicateByVF(VPlan &Plan, ElementCount VF) {
512
522
vp_depth_first_shallow (Plan.getVectorLoopRegion ()->getEntry ()));
513
523
auto VPBBsToUnroll =
514
524
concat<VPBasicBlock *>(VPBBsOutsideLoopRegion, VPBBsInsideLoopRegion);
525
+ // A mapping of current VPValue definitions to collections of new VPValues
526
+ // defined per lane. Serves to hook-up potential users of current VPValue
527
+ // definition that are replicated-per-VF later.
528
+ DenseMap<VPValue *, SmallVector<VPValue *>> Def2LaneDefs;
529
+ // The removal of current recipes being replaced by new ones needs to be
530
+ // delayed after Def2LaneDefs is no longer in use.
531
+ SmallVector<VPRecipeBase *> ToRemove;
515
532
for (VPBasicBlock *VPBB : VPBBsToUnroll) {
516
533
for (VPRecipeBase &R : make_early_inc_range (*VPBB)) {
517
534
auto *RepR = dyn_cast<VPReplicateRecipe>(&R);
@@ -523,36 +540,46 @@ void VPlanTransforms::replicateByVF(VPlan &Plan, ElementCount VF) {
523
540
if (isa<StoreInst>(RepR->getUnderlyingInstr ()) &&
524
541
vputils::isSingleScalar (RepR->getOperand (1 ))) {
525
542
// Stores to invariant addresses need to store the last lane only.
526
- cloneForLane (Plan, Builder, IdxTy, RepR,
527
- VPLane::getLastLaneForVF (VF) );
543
+ cloneForLane (Plan, Builder, IdxTy, RepR, VPLane::getLastLaneForVF (VF),
544
+ Def2LaneDefs );
528
545
} else {
529
546
// Create single-scalar version of RepR for all lanes.
530
547
for (unsigned I = 0 ; I != VF.getKnownMinValue (); ++I)
531
- cloneForLane (Plan, Builder, IdxTy, RepR, VPLane (I));
548
+ cloneForLane (Plan, Builder, IdxTy, RepR, VPLane (I), Def2LaneDefs );
532
549
}
533
550
RepR->eraseFromParent ();
534
551
continue ;
535
552
}
536
553
// / Create single-scalar version of RepR for all lanes.
537
554
SmallVector<VPValue *> LaneDefs;
538
555
for (unsigned I = 0 ; I != VF.getKnownMinValue (); ++I)
539
- LaneDefs.push_back (cloneForLane (Plan, Builder, IdxTy, RepR, VPLane (I)));
556
+ LaneDefs.push_back (
557
+ cloneForLane (Plan, Builder, IdxTy, RepR, VPLane (I), Def2LaneDefs));
540
558
559
+ Def2LaneDefs[RepR] = LaneDefs;
541
560
// / Users that only demand the first lane can use the definition for lane
542
561
// / 0.
543
562
RepR->replaceUsesWithIf (LaneDefs[0 ], [RepR](VPUser &U, unsigned ) {
544
563
return U.onlyFirstLaneUsed (RepR);
545
564
});
546
565
547
- // If needed, create a Build(Struct)Vector recipe to insert the scalar
548
- // lane values into a vector.
549
- Type *ResTy = RepR->getUnderlyingInstr ()->getType ();
550
- VPValue *VecRes = Builder.createNaryOp (
551
- ResTy->isStructTy () ? VPInstruction::BuildStructVector
552
- : VPInstruction::BuildVector,
553
- LaneDefs);
554
- RepR->replaceAllUsesWith (VecRes);
555
- RepR->eraseFromParent ();
566
+ // Update each build vector user that currently has RepR as its only
567
+ // operand, to have all LaneDefs as its operands.
568
+ for (VPUser *U : to_vector (RepR->users ())) {
569
+ auto *VPI = dyn_cast<VPInstruction>(U);
570
+ if (!VPI || (VPI->getOpcode () != VPInstruction::BuildVector &&
571
+ VPI->getOpcode () != VPInstruction::BuildStructVector))
572
+ continue ;
573
+ assert (VPI->getNumOperands () == 1 &&
574
+ " Build(Struct)Vector must have a single operand before "
575
+ " replicating by VF" );
576
+ VPI->setOperand (0 , LaneDefs[0 ]);
577
+ for (VPValue *LaneDef : drop_begin (LaneDefs))
578
+ VPI->addOperand (LaneDef);
579
+ }
580
+ ToRemove.push_back (RepR);
556
581
}
557
582
}
583
+ for (auto *R : reverse (ToRemove))
584
+ R->eraseFromParent ();
558
585
}
0 commit comments