Skip to content

Commit e32148a

Browse files
JIT: Add loop-aware RPO, and use as LSRA's block sequence (#108086)
Part of #107749, and follow-up to #107927. When computing a RPO of the flow graph, ensuring that the entirety of a loop body is visited before any of the loop's successors has the benefit of keeping the loop body compact in the traversal. This is certainly ideal when computing an initial block layout, and may be preferable for register allocation, too. Thus, this change formalizes loop-aware RPO creation as part of the flowgraph API surface, and uses it for LSRA's block sequence.
1 parent 9696817 commit e32148a

File tree

3 files changed

+95
-3
lines changed

3 files changed

+95
-3
lines changed

src/coreclr/jit/compiler.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6286,6 +6286,9 @@ class Compiler
62866286
FlowGraphDfsTree* fgComputeDfs();
62876287
void fgInvalidateDfsTree();
62886288

6289+
template <typename TFunc>
6290+
void fgVisitBlocksInLoopAwareRPO(FlowGraphDfsTree* dfsTree, FlowGraphNaturalLoops* loops, TFunc func);
6291+
62896292
void fgRemoveReturnBlock(BasicBlock* block);
62906293

62916294
void fgConvertBBToThrowBB(BasicBlock* block);

src/coreclr/jit/compiler.hpp

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4974,6 +4974,75 @@ unsigned Compiler::fgRunDfs(VisitPreorder visitPreorder, VisitPostorder visitPos
49744974
return preOrderIndex;
49754975
}
49764976

4977+
//------------------------------------------------------------------------
4978+
// fgVisitBlocksInLoopAwareRPO: Visit the blocks in 'dfsTree' in reverse post-order,
4979+
// but ensure loop bodies are visited before loop successors.
4980+
//
4981+
// Type parameters:
4982+
// TFunc - Callback functor type
4983+
//
4984+
// Parameters:
4985+
// dfsTree - The DFS tree of the flow graph
4986+
// loops - A collection of the loops in the flow graph
4987+
// func - Callback functor that operates on a BasicBlock*
4988+
//
4989+
// Returns:
4990+
// A postorder traversal with compact loop bodies.
4991+
//
4992+
template <typename TFunc>
4993+
void Compiler::fgVisitBlocksInLoopAwareRPO(FlowGraphDfsTree* dfsTree, FlowGraphNaturalLoops* loops, TFunc func)
4994+
{
4995+
assert(dfsTree != nullptr);
4996+
assert(loops != nullptr);
4997+
4998+
// We will start by visiting blocks in reverse post-order.
4999+
// If we encounter the header of a loop, we will visit the loop's remaining blocks next
5000+
// to keep the loop body compact in the visitation order.
5001+
// We have to do this recursively to handle nested loops.
5002+
// Since the presence of loops implies we will try to visit some blocks more than once,
5003+
// we need to track visited blocks.
5004+
struct LoopAwareVisitor
5005+
{
5006+
BitVecTraits traits;
5007+
BitVec visitedBlocks;
5008+
FlowGraphNaturalLoops* loops;
5009+
TFunc func;
5010+
5011+
LoopAwareVisitor(FlowGraphDfsTree* dfsTree, FlowGraphNaturalLoops* loops, TFunc func)
5012+
: traits(dfsTree->PostOrderTraits())
5013+
, visitedBlocks(BitVecOps::MakeEmpty(&traits))
5014+
, loops(loops)
5015+
, func(func)
5016+
{
5017+
}
5018+
5019+
void VisitBlock(BasicBlock* block)
5020+
{
5021+
if (BitVecOps::TryAddElemD(&traits, visitedBlocks, block->bbPostorderNum))
5022+
{
5023+
func(block);
5024+
5025+
FlowGraphNaturalLoop* const loop = loops->GetLoopByHeader(block);
5026+
if (loop != nullptr)
5027+
{
5028+
loop->VisitLoopBlocksReversePostOrder([&](BasicBlock* block) {
5029+
VisitBlock(block);
5030+
return BasicBlockVisit::Continue;
5031+
});
5032+
}
5033+
}
5034+
}
5035+
};
5036+
5037+
LoopAwareVisitor visitor(dfsTree, loops, func);
5038+
5039+
for (unsigned i = dfsTree->GetPostOrderCount(); i != 0; i--)
5040+
{
5041+
BasicBlock* const block = dfsTree->GetPostOrder(i - 1);
5042+
visitor.VisitBlock(block);
5043+
}
5044+
}
5045+
49775046
//------------------------------------------------------------------------------
49785047
// FlowGraphNaturalLoop::VisitLoopBlocksReversePostOrder: Visit all of the
49795048
// loop's blocks in reverse post order.

src/coreclr/jit/lsra.cpp

Lines changed: 23 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -953,9 +953,29 @@ void LinearScan::setBlockSequence()
953953

954954
assert((blockSequence == nullptr) && (bbSeqCount == 0));
955955
FlowGraphDfsTree* const dfsTree = compiler->fgComputeDfs</* useProfile */ true>();
956-
blockSequence = dfsTree->GetPostOrder();
957-
bbNumMaxBeforeResolution = compiler->fgBBNumMax;
958-
blockInfo = new (compiler, CMK_LSRA) LsraBlockInfo[bbNumMaxBeforeResolution + 1];
956+
957+
if (compiler->opts.OptimizationEnabled() && dfsTree->HasCycle())
958+
{
959+
// Ensure loop bodies are compact in the visitation order
960+
FlowGraphNaturalLoops* const loops = FlowGraphNaturalLoops::Find(dfsTree);
961+
blockSequence = new (compiler, CMK_LSRA) BasicBlock*[compiler->fgBBcount];
962+
unsigned index = dfsTree->GetPostOrderCount();
963+
964+
auto addToSequence = [this, &index](BasicBlock* block) {
965+
assert(index != 0);
966+
blockSequence[--index] = block;
967+
};
968+
969+
compiler->fgVisitBlocksInLoopAwareRPO(dfsTree, loops, addToSequence);
970+
}
971+
else
972+
{
973+
// TODO: Just use lexical block order in MinOpts
974+
blockSequence = dfsTree->GetPostOrder();
975+
}
976+
977+
bbNumMaxBeforeResolution = compiler->fgBBNumMax;
978+
blockInfo = new (compiler, CMK_LSRA) LsraBlockInfo[bbNumMaxBeforeResolution + 1];
959979

960980
// Flip the DFS traversal to get the reverse post-order traversal
961981
// (this is the order in which blocks will be allocated)

0 commit comments

Comments
 (0)