@@ -9298,6 +9298,7 @@ static void addExitUsersForFirstOrderRecurrences(
9298
9298
VPlanPtr
9299
9299
LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes (VFRange &Range) {
9300
9300
9301
+ using namespace llvm ::VPlanPatternMatch;
9301
9302
SmallPtrSet<const InterleaveGroup<Instruction> *, 1 > InterleaveGroups;
9302
9303
9303
9304
// ---------------------------------------------------------------------------
@@ -9321,6 +9322,10 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
9321
9322
PSE, RequiresScalarEpilogueCheck,
9322
9323
CM.foldTailByMasking (), OrigLoop);
9323
9324
9325
+ // Build hierarchical CFG.
9326
+ VPlanHCFGBuilder HCFGBuilder (OrigLoop, LI, *Plan);
9327
+ HCFGBuilder.buildHierarchicalCFG ();
9328
+
9324
9329
// Don't use getDecisionAndClampRange here, because we don't know the UF
9325
9330
// so this function is better to be conservative, rather than to split
9326
9331
// it up into different VPlans.
@@ -9371,12 +9376,8 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
9371
9376
// Construct recipes for the instructions in the loop
9372
9377
// ---------------------------------------------------------------------------
9373
9378
9374
- // Scan the body of the loop in a topological order to visit each basic block
9375
- // after having visited its predecessor basic blocks.
9376
- LoopBlocksDFS DFS (OrigLoop);
9377
- DFS.perform (LI);
9378
-
9379
- VPBasicBlock *HeaderVPBB = Plan->getVectorLoopRegion ()->getEntryBasicBlock ();
9379
+ VPRegionBlock *LoopRegion = Plan->getVectorLoopRegion ();
9380
+ VPBasicBlock *HeaderVPBB = LoopRegion->getEntryBasicBlock ();
9380
9381
VPBasicBlock *VPBB = HeaderVPBB;
9381
9382
BasicBlock *HeaderBB = OrigLoop->getHeader ();
9382
9383
bool NeedsMasks =
@@ -9389,26 +9390,70 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
9389
9390
RecipeBuilder.collectScaledReductions (Range);
9390
9391
9391
9392
auto *MiddleVPBB = Plan->getMiddleBlock ();
9393
+
9394
+ // Scan the body of the loop in a topological order to visit each basic block
9395
+ // after having visited its predecessor basic blocks.
9396
+ ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<VPBlockBase *>> RPOT (
9397
+ HeaderVPBB);
9398
+
9392
9399
VPBasicBlock::iterator MBIP = MiddleVPBB->getFirstNonPhi ();
9393
- for (BasicBlock *BB : make_range (DFS.beginRPO (), DFS.endRPO ())) {
9394
- // Relevant instructions from basic block BB will be grouped into VPRecipe
9395
- // ingredients and fill a new VPBasicBlock.
9396
- if (VPBB != HeaderVPBB)
9397
- VPBB->setName (BB->getName ());
9398
- Builder.setInsertPoint (VPBB);
9400
+ VPBlockBase *PrevVPBB = nullptr ;
9401
+ for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT)) {
9402
+ // Handle VPBBs down to the latch.
9403
+ if (VPBB == LoopRegion->getExiting ()) {
9404
+ assert (!HCFGBuilder.getIRBBForVPB (VPBB) &&
9405
+ " the latch block shouldn't have a corresponding IRBB" );
9406
+ VPBlockUtils::connectBlocks (PrevVPBB, VPBB);
9407
+ break ;
9408
+ }
9399
9409
9400
- if (VPBB == HeaderVPBB)
9410
+ // Create mask based on the IR BB corresponding to VPBB.
9411
+ // TODO: Predicate directly based on VPlan.
9412
+ Builder.setInsertPoint (VPBB, VPBB->begin ());
9413
+ if (VPBB == HeaderVPBB) {
9414
+ Builder.setInsertPoint (VPBB, VPBB->getFirstNonPhi ());
9401
9415
RecipeBuilder.createHeaderMask ();
9402
- else if (NeedsMasks)
9403
- RecipeBuilder.createBlockInMask (BB);
9416
+ } else if (NeedsMasks) {
9417
+ // FIXME: At the moment, masks need to be placed at the beginning of the
9418
+ // block, as blends introduced for phi nodes need to use it. The created
9419
+ // blends should be sunk after the mask recipes.
9420
+ RecipeBuilder.createBlockInMask (HCFGBuilder.getIRBBForVPB (VPBB));
9421
+ }
9422
+
9423
+ // Convert input VPInstructions to widened recipes.
9424
+ for (VPRecipeBase &R : make_early_inc_range (*VPBB)) {
9425
+ auto *SingleDef = cast<VPSingleDefRecipe>(&R);
9426
+ auto *UnderlyingValue = SingleDef->getUnderlyingValue ();
9427
+ // Skip recipes that do not need transforming, including canonical IV,
9428
+ // wide canonical IV and VPInstructions without underlying values. The
9429
+ // latter are added above for masking.
9430
+ // FIXME: Migrate code relying on the underlying instruction from VPlan0
9431
+ // to construct recipes below to not use the underlying instruction.
9432
+ if (isa<VPCanonicalIVPHIRecipe, VPWidenCanonicalIVRecipe>(&R) ||
9433
+ (isa<VPInstruction>(&R) && !UnderlyingValue))
9434
+ continue ;
9404
9435
9405
- // Introduce each ingredient into VPlan.
9406
- // TODO: Model and preserve debug intrinsics in VPlan.
9407
- for (Instruction &I : drop_end (BB->instructionsWithoutDebug (false ))) {
9408
- Instruction *Instr = &I;
9436
+ // FIXME: VPlan0, which models a copy of the original scalar loop, should
9437
+ // not use VPWidenPHIRecipe to model the phis.
9438
+ assert ((isa<VPWidenPHIRecipe>(&R) || isa<VPInstruction>(&R)) &&
9439
+ UnderlyingValue && " unsupported recipe" );
9440
+
9441
+ if (isa<VPInstruction>(&R) &&
9442
+ (cast<VPInstruction>(&R)->getOpcode () ==
9443
+ VPInstruction::BranchOnCond ||
9444
+ (cast<VPInstruction>(&R)->getOpcode () == Instruction::Switch))) {
9445
+ R.eraseFromParent ();
9446
+ break ;
9447
+ }
9448
+
9449
+ // TODO: Gradually replace uses of underlying instruction by analyses on
9450
+ // VPlan.
9451
+ Instruction *Instr = cast<Instruction>(UnderlyingValue);
9452
+ Builder.setInsertPoint (SingleDef);
9409
9453
SmallVector<VPValue *, 4 > Operands;
9410
9454
auto *Phi = dyn_cast<PHINode>(Instr);
9411
9455
if (Phi && Phi->getParent () == HeaderBB) {
9456
+ // The backedge value will be added in fixHeaderPhis later.
9412
9457
Operands.push_back (Plan->getOrAddLiveIn (
9413
9458
Phi->getIncomingValueForBlock (OrigLoop->getLoopPreheader ())));
9414
9459
} else {
@@ -9420,15 +9465,16 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
9420
9465
// in the exit block, a uniform store recipe will be created for the final
9421
9466
// invariant store of the reduction.
9422
9467
StoreInst *SI;
9423
- if ((SI = dyn_cast<StoreInst>(&I )) &&
9468
+ if ((SI = dyn_cast<StoreInst>(Instr )) &&
9424
9469
Legal->isInvariantAddressOfReduction (SI->getPointerOperand ())) {
9425
9470
// Only create recipe for the final invariant store of the reduction.
9426
- if (!Legal->isInvariantStoreOfReduction (SI))
9427
- continue ;
9428
- auto *Recipe = new VPReplicateRecipe (
9429
- SI, make_range (Operands.begin (), Operands.end ()),
9430
- true /* IsUniform */ );
9431
- Recipe->insertBefore (*MiddleVPBB, MBIP);
9471
+ if (Legal->isInvariantStoreOfReduction (SI)) {
9472
+ auto *Recipe = new VPReplicateRecipe (
9473
+ SI, make_range (Operands.begin (), Operands.end ()),
9474
+ true /* IsUniform */ );
9475
+ Recipe->insertBefore (*MiddleVPBB, MBIP);
9476
+ }
9477
+ R.eraseFromParent ();
9432
9478
continue ;
9433
9479
}
9434
9480
@@ -9438,25 +9484,29 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
9438
9484
Recipe = RecipeBuilder.handleReplication (Instr, Operands, Range);
9439
9485
9440
9486
RecipeBuilder.setRecipe (Instr, Recipe);
9441
- if (isa<VPHeaderPHIRecipe>(Recipe)) {
9442
- // VPHeaderPHIRecipes must be kept in the phi section of HeaderVPBB. In
9443
- // the following cases, VPHeaderPHIRecipes may be created after non-phi
9444
- // recipes and need to be moved to the phi section of HeaderVPBB:
9445
- // * tail-folding (non-phi recipes computing the header mask are
9446
- // introduced earlier than regular header phi recipes, and should appear
9447
- // after them)
9448
- // * Optimizing truncates to VPWidenIntOrFpInductionRecipe.
9449
-
9450
- assert ((HeaderVPBB->getFirstNonPhi () == VPBB->end () ||
9451
- CM.foldTailByMasking () || isa<TruncInst>(Instr)) &&
9452
- " unexpected recipe needs moving" );
9487
+ if (isa<VPWidenIntOrFpInductionRecipe>(Recipe) && isa<TruncInst>(Instr)) {
9488
+ // Optimized a truncate to VPWidenIntOrFpInductionRecipe. It needs to be
9489
+ // moved to the phi section in the header.
9453
9490
Recipe->insertBefore (*HeaderVPBB, HeaderVPBB->getFirstNonPhi ());
9454
- } else
9455
- VPBB->appendRecipe (Recipe);
9456
- }
9457
-
9458
- VPBlockUtils::insertBlockAfter (Plan->createVPBasicBlock (" " ), VPBB);
9459
- VPBB = cast<VPBasicBlock>(VPBB->getSingleSuccessor ());
9491
+ } else {
9492
+ Builder.insert (Recipe);
9493
+ }
9494
+ if (Recipe->getNumDefinedValues () == 1 )
9495
+ SingleDef->replaceAllUsesWith (Recipe->getVPSingleValue ());
9496
+ else
9497
+ assert (Recipe->getNumDefinedValues () == 0 &&
9498
+ " Unexpected multidef recipe" );
9499
+ R.eraseFromParent ();
9500
+ }
9501
+
9502
+ // Flatten the CFG in the loop. Masks for blocks have already been generated
9503
+ // and added to recipes as needed. To do so, first disconnect VPBB from its
9504
+ // successors. Then connect VPBB to the previously visited VPBB.
9505
+ for (auto *Succ : to_vector (VPBB->getSuccessors ()))
9506
+ VPBlockUtils::disconnectBlocks (VPBB, Succ);
9507
+ if (PrevVPBB)
9508
+ VPBlockUtils::connectBlocks (PrevVPBB, VPBB);
9509
+ PrevVPBB = VPBB;
9460
9510
}
9461
9511
9462
9512
// After here, VPBB should not be used.
0 commit comments