Skip to content

Commit 230fea1

Browse files
JIT: Use linear block order for MinOpts in LSRA (#108147)
Follow-up from #108086 (comment). Since LSRA shouldn't create cross-block live registers in MinOpts, LSRA's block ordering shouldn't matter, so just use the lexical order we have on-hand.
1 parent 00b4fa7 commit 230fea1

File tree

2 files changed

+47
-41
lines changed

2 files changed

+47
-41
lines changed

src/coreclr/jit/lsra.cpp

Lines changed: 31 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -956,7 +956,7 @@ LinearScan::LinearScan(Compiler* theCompiler)
956956
// Notes:
957957
// On return, the blockSequence array contains the blocks in reverse post-order.
958958
// This method clears the bbVisitedSet on LinearScan, and when it returns the set
959-
// contains all the bbNums for the block.
959+
// contains all the bbPostorderNums for the block.
960960
//
961961
void LinearScan::setBlockSequence()
962962
{
@@ -967,30 +967,33 @@ void LinearScan::setBlockSequence()
967967
bbVisitedSet = BitVecOps::MakeEmpty(traits);
968968

969969
assert((blockSequence == nullptr) && (bbSeqCount == 0));
970-
971-
compiler->m_dfsTree = compiler->fgComputeDfs</* useProfile */ true>();
972-
FlowGraphDfsTree* const dfsTree = compiler->m_dfsTree;
973-
blockSequence = new (compiler, CMK_LSRA) BasicBlock*[compiler->fgBBcount];
970+
blockSequence = new (compiler, CMK_LSRA) BasicBlock*[compiler->fgBBcount];
974971

975972
if (compiler->opts.OptimizationEnabled())
976973
{
977-
// Ensure loop bodies are compact in the visitation order.
978-
compiler->m_loops = FlowGraphNaturalLoops::Find(dfsTree);
974+
// If optimizations are enabled, allocate blocks in reverse post-order.
975+
// This ensures each block's predecessors are visited first.
976+
// Also, ensure loop bodies are compact in the visitation order.
977+
compiler->m_dfsTree = compiler->fgComputeDfs</* useProfile */ true>();
978+
compiler->m_loops = FlowGraphNaturalLoops::Find(compiler->m_dfsTree);
979979
FlowGraphNaturalLoops* const loops = compiler->m_loops;
980-
unsigned index = 0;
981980

982-
auto addToSequence = [this, &index](BasicBlock* block) {
983-
blockSequence[index++] = block;
981+
auto addToSequence = [this](BasicBlock* block) {
982+
blockSequence[bbSeqCount++] = block;
984983
};
985984

986-
compiler->fgVisitBlocksInLoopAwareRPO(dfsTree, loops, addToSequence);
985+
compiler->fgVisitBlocksInLoopAwareRPO(compiler->m_dfsTree, loops, addToSequence);
987986
}
988987
else
989988
{
990-
// TODO: Just use lexical block order in MinOpts
991-
for (unsigned i = 0; i < dfsTree->GetPostOrderCount(); i++)
989+
// If we aren't optimizing, we won't have any cross-block live registers,
990+
// so the order of blocks allocated shouldn't matter.
991+
// Just use the linear order.
992+
for (BasicBlock* const block : compiler->Blocks())
992993
{
993-
blockSequence[i] = dfsTree->GetPostOrder(dfsTree->GetPostOrderCount() - i - 1);
994+
// Give this block a unique post-order number that can be used as a key into bbVisitedSet
995+
block->bbPostorderNum = bbSeqCount;
996+
blockSequence[bbSeqCount++] = block;
994997
}
995998
}
996999

@@ -1094,30 +1097,29 @@ void LinearScan::setBlockSequence()
10941097
};
10951098

10961099
JITDUMP("Start LSRA Block Sequence: \n");
1097-
for (unsigned i = 0; i < dfsTree->GetPostOrderCount(); i++)
1100+
for (unsigned i = 0; i < bbSeqCount; i++)
10981101
{
10991102
visitBlock(blockSequence[i]);
11001103
}
11011104

1102-
// If the DFS didn't visit any blocks, add them to the end of blockSequence
1103-
if (dfsTree->GetPostOrderCount() < compiler->fgBBcount)
1105+
// If any blocks remain unvisited, add them to the end of blockSequence.
1106+
// Unvisited blocks are more likely to be at the back of the list, so iterate backwards.
1107+
for (BasicBlock* block = compiler->fgLastBB; bbSeqCount < compiler->fgBBcount; block = block->Prev())
11041108
{
1105-
// Unvisited blocks are more likely to be at the back of the list, so iterate backwards
1106-
unsigned i = dfsTree->GetPostOrderCount();
1107-
for (BasicBlock* block = compiler->fgLastBB; i < compiler->fgBBcount; block = block->Prev())
1109+
assert(compiler->opts.OptimizationEnabled());
1110+
assert(block != nullptr);
1111+
assert(compiler->m_dfsTree != nullptr);
1112+
1113+
if (!compiler->m_dfsTree->Contains(block))
11081114
{
1109-
assert(block != nullptr);
1110-
if (!dfsTree->Contains(block))
1111-
{
1112-
// Give this block a unique post-order number that can be used as a key into bbVisitedSet
1113-
block->bbPostorderNum = i;
1114-
visitBlock(block);
1115-
blockSequence[i++] = block;
1116-
}
1115+
// Give this block a unique post-order number that can be used as a key into bbVisitedSet
1116+
block->bbPostorderNum = bbSeqCount;
1117+
visitBlock(block);
1118+
blockSequence[bbSeqCount++] = block;
11171119
}
11181120
}
11191121

1120-
bbSeqCount = compiler->fgBBcount;
1122+
assert(bbSeqCount == compiler->fgBBcount);
11211123
blockSequencingDone = true;
11221124

11231125
#ifdef DEBUG

src/coreclr/jit/lsrabuild.cpp

Lines changed: 16 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2326,32 +2326,31 @@ void LinearScan::buildIntervals()
23262326
numPlacedArgLocals = 0;
23272327
placedArgRegs = RBM_NONE;
23282328

2329-
BasicBlock* predBlock = nullptr;
23302329
BasicBlock* prevBlock = nullptr;
23312330

23322331
// Initialize currentLiveVars to the empty set. We will set it to the current
23332332
// live-in at the entry to each block (this will include the incoming args on
23342333
// the first block).
23352334
VarSetOps::AssignNoCopy(compiler, currentLiveVars, VarSetOps::MakeEmpty(compiler));
2335+
const bool floatingPointUsed = compiler->compFloatingPointUsed;
23362336

23372337
for (block = startBlockSequence(); block != nullptr; block = moveToNextBlock())
23382338
{
23392339
JITDUMP("\nNEW BLOCK " FMT_BB "\n", block->bbNum);
23402340
compiler->compCurBB = block;
23412341

2342-
bool predBlockIsAllocated = false;
2343-
predBlock = findPredBlockForLiveIn(block, prevBlock DEBUGARG(&predBlockIsAllocated));
2344-
if (predBlock != nullptr)
2345-
{
2346-
JITDUMP("\n\nSetting " FMT_BB " as the predecessor for determining incoming variable registers of " FMT_BB
2347-
"\n",
2348-
predBlock->bbNum, block->bbNum);
2349-
assert(predBlock->bbNum <= bbNumMaxBeforeResolution);
2350-
blockInfo[block->bbNum].predBBNum = predBlock->bbNum;
2351-
}
2352-
23532342
if (localVarsEnregistered)
23542343
{
2344+
bool predBlockIsAllocated = false;
2345+
BasicBlock* const predBlock = findPredBlockForLiveIn(block, prevBlock DEBUGARG(&predBlockIsAllocated));
2346+
if (predBlock != nullptr)
2347+
{
2348+
JITDUMP("\n\nSetting " FMT_BB
2349+
" as the predecessor for determining incoming variable registers of " FMT_BB "\n",
2350+
predBlock->bbNum, block->bbNum);
2351+
assert(predBlock->bbNum <= bbNumMaxBeforeResolution);
2352+
blockInfo[block->bbNum].predBBNum = predBlock->bbNum;
2353+
}
23552354
VarSetOps::AssignNoCopy(compiler, currentLiveVars,
23562355
VarSetOps::Intersection(compiler, registerCandidateVars, block->bbLiveIn));
23572356

@@ -2411,6 +2410,11 @@ void LinearScan::buildIntervals()
24112410
}
24122411
}
24132412
}
2413+
else
2414+
{
2415+
// If state isn't live across blocks, then reset any global Compiler state.
2416+
compiler->compFloatingPointUsed = floatingPointUsed;
2417+
}
24142418

24152419
// Add a dummy RefPosition to mark the block boundary.
24162420
// Note that we do this AFTER adding the exposed uses above, because the

0 commit comments

Comments
 (0)