Skip to content

Commit 2098981

Browse files
authored
JIT: very simple cloning heuristic (#108771)
Avoid cloning large loops. We compute loop size by counting tree nodes of all statements of all blocks in the loop. If this is over a threshold, we inhibit cloning. Threshold value was chosen based on distribution of unrestricted cloned loop sizes in the benchmark run_pgo collection.
1 parent 6d23f64 commit 2098981

File tree

4 files changed

+112
-6
lines changed

4 files changed

+112
-6
lines changed

src/coreclr/jit/compiler.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7091,6 +7091,7 @@ class Compiler
70917091
bool optCanonicalizeExit(FlowGraphNaturalLoop* loop, BasicBlock* exit);
70927092

70937093
PhaseStatus optCloneLoops();
7094+
bool optShouldCloneLoop(FlowGraphNaturalLoop* loop, LoopCloneContext* context);
70947095
void optCloneLoop(FlowGraphNaturalLoop* loop, LoopCloneContext* context);
70957096
PhaseStatus optUnrollLoops(); // Unrolls loops (needs to have cost info)
70967097
bool optTryUnrollLoop(FlowGraphNaturalLoop* loop, bool* changedIR);

src/coreclr/jit/jitconfigvalues.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -59,8 +59,10 @@ CONFIG_INTEGER(JitBreakMorphTree, W("JitBreakMorphTree"), 0xffffffff)
5959
CONFIG_INTEGER(JitBreakOnBadCode, W("JitBreakOnBadCode"), 0)
6060
CONFIG_INTEGER(JitBreakOnMinOpts, W("JITBreakOnMinOpts"), 0) // Halt if jit switches to MinOpts
6161
CONFIG_INTEGER(JitCloneLoops, W("JitCloneLoops"), 1) // If 0, don't clone. Otherwise clone loops for optimizations.
62-
CONFIG_INTEGER(JitCloneLoopsWithGdvTests, W("JitCloneLoopsWithGdvTests"), 1) // If 0, don't clone loops based on
63-
// invariant type/method address tests
62+
CONFIG_INTEGER(JitCloneLoopsWithGdvTests, W("JitCloneLoopsWithGdvTests"), 1) // If 0, don't clone loops based on
63+
// invariant type/method address tests
64+
RELEASE_CONFIG_INTEGER(JitCloneLoopsSizeLimit, W("JitCloneLoopsSizeLimit"), 400) // limit cloning to loops with less
65+
// than this many tree nodes
6466
CONFIG_INTEGER(JitDebugLogLoopCloning, W("JitDebugLogLoopCloning"), 0) // In debug builds log places where loop cloning
6567
// optimizations are performed on the fast path.
6668
CONFIG_INTEGER(JitDefaultFill, W("JitDefaultFill"), 0xdd) // In debug builds, initialize the memory allocated by the nra

src/coreclr/jit/loopcloning.cpp

Lines changed: 94 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1773,6 +1773,94 @@ void Compiler::optPerformStaticOptimizations(FlowGraphNaturalLoop* loop,
17731773
}
17741774
}
17751775

1776+
//------------------------------------------------------------------------
1777+
// optShouldCloneLoop: Decide if a loop that can be cloned should be cloned.
1778+
//
1779+
// Arguments:
1780+
// loop - the current loop for which the optimizations are performed.
1781+
// context - data structure where all loop cloning info is kept.
1782+
//
1783+
// Returns:
1784+
// true if expected performance gain from cloning is worth the potential
1785+
// size increase.
1786+
//
1787+
// Remarks:
1788+
// This is a simple-minded heuristic meant to avoid "runaway" cloning
1789+
// where large loops are cloned.
1790+
//
1791+
// We estimate the size cost of cloning by summing up the number of
1792+
// tree nodes in all statements in all blocks in the loop.
1793+
//
1794+
// This value is compared to a hard-coded threshold, and if bigger,
1795+
// then the method returns false.
1796+
//
1797+
bool Compiler::optShouldCloneLoop(FlowGraphNaturalLoop* loop, LoopCloneContext* context)
1798+
{
1799+
// Compute loop size
1800+
//
1801+
unsigned loopSize = 0;
1802+
1803+
// For now we use a very simplistic model where each tree node
1804+
// has the same code size.
1805+
//
1806+
// CostSz is not available until later.
1807+
//
1808+
struct TreeCostWalker : GenTreeVisitor<TreeCostWalker>
1809+
{
1810+
enum
1811+
{
1812+
DoPreOrder = true,
1813+
};
1814+
1815+
unsigned m_nodeCount;
1816+
1817+
TreeCostWalker(Compiler* comp)
1818+
: GenTreeVisitor(comp)
1819+
, m_nodeCount(0)
1820+
{
1821+
}
1822+
1823+
fgWalkResult PreOrderVisit(GenTree** use, GenTree* user)
1824+
{
1825+
m_nodeCount++;
1826+
return WALK_CONTINUE;
1827+
}
1828+
1829+
void Reset()
1830+
{
1831+
m_nodeCount = 0;
1832+
}
1833+
unsigned Cost()
1834+
{
1835+
return m_nodeCount;
1836+
}
1837+
};
1838+
1839+
TreeCostWalker costWalker(this);
1840+
1841+
loop->VisitLoopBlocks([&](BasicBlock* block) {
1842+
weight_t normalizedWeight = block->getBBWeight(this);
1843+
for (Statement* const stmt : block->Statements())
1844+
{
1845+
costWalker.Reset();
1846+
costWalker.WalkTree(stmt->GetRootNodePointer(), nullptr);
1847+
loopSize += costWalker.Cost();
1848+
}
1849+
return BasicBlockVisit::Continue;
1850+
});
1851+
1852+
int const sizeLimit = JitConfig.JitCloneLoopsSizeLimit();
1853+
1854+
if ((sizeLimit >= 0) && (loopSize >= (unsigned)sizeLimit))
1855+
{
1856+
JITDUMP("Loop cloning: rejecting loop " FMT_LP " of size %u, size limit %d\n", loop->GetIndex(), loopSize,
1857+
sizeLimit);
1858+
return false;
1859+
}
1860+
1861+
return true;
1862+
}
1863+
17761864
//----------------------------------------------------------------------------
17771865
// optIsLoopClonable: Determine whether this loop can be cloned.
17781866
//
@@ -2563,7 +2651,7 @@ Compiler::fgWalkResult Compiler::optCanOptimizeByLoopCloning(GenTree* tree, Loop
25632651

25642652
assert(compCurBB->lastStmt() == info->stmt);
25652653
info->context->EnsureLoopOptInfo(info->loop->GetIndex())
2566-
->Push(new (this, CMK_LoopOpt) LcTypeTestOptInfo(info->stmt, indir, lclNum, clsHnd));
2654+
->Push(new (this, CMK_LoopOpt) LcTypeTestOptInfo(compCurBB, info->stmt, indir, lclNum, clsHnd));
25672655
}
25682656
}
25692657
else if (optIsHandleOrIndirOfHandle(relopOp2, GTF_ICON_FTN_ADDR))
@@ -2644,7 +2732,7 @@ Compiler::fgWalkResult Compiler::optCanOptimizeByLoopCloning(GenTree* tree, Loop
26442732
assert(iconHandle->IsIconHandle(GTF_ICON_FTN_ADDR));
26452733
assert(compCurBB->lastStmt() == info->stmt);
26462734
LcMethodAddrTestOptInfo* optInfo = new (this, CMK_LoopOpt)
2647-
LcMethodAddrTestOptInfo(info->stmt, indir, lclNum, (void*)iconHandle->IconValue(),
2735+
LcMethodAddrTestOptInfo(compCurBB, info->stmt, indir, lclNum, (void*)iconHandle->IconValue(),
26482736
relopOp2 != iconHandle DEBUG_ARG(
26492737
(CORINFO_METHOD_HANDLE)iconHandle->gtTargetHandle));
26502738
info->context->EnsureLoopOptInfo(info->loop->GetIndex())->Push(optInfo);
@@ -2944,6 +3032,10 @@ PhaseStatus Compiler::optCloneLoops()
29443032
// No need to clone.
29453033
context.CancelLoopOptInfo(loop->GetIndex());
29463034
}
3035+
else if (!optShouldCloneLoop(loop, &context))
3036+
{
3037+
context.CancelLoopOptInfo(loop->GetIndex());
3038+
}
29473039
}
29483040
}
29493041

src/coreclr/jit/loopcloning.h

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -321,6 +321,8 @@ struct LcJaggedArrayOptInfo : public LcOptInfo
321321
//
322322
struct LcTypeTestOptInfo : public LcOptInfo
323323
{
324+
// block where statement occurs
325+
BasicBlock* block;
324326
// statement where the opportunity occurs
325327
Statement* stmt;
326328
// indir for the method table
@@ -330,8 +332,13 @@ struct LcTypeTestOptInfo : public LcOptInfo
330332
// handle being tested for
331333
CORINFO_CLASS_HANDLE clsHnd;
332334

333-
LcTypeTestOptInfo(Statement* stmt, GenTreeIndir* methodTableIndir, unsigned lclNum, CORINFO_CLASS_HANDLE clsHnd)
335+
LcTypeTestOptInfo(BasicBlock* block,
336+
Statement* stmt,
337+
GenTreeIndir* methodTableIndir,
338+
unsigned lclNum,
339+
CORINFO_CLASS_HANDLE clsHnd)
334340
: LcOptInfo(LcTypeTest)
341+
, block(block)
335342
, stmt(stmt)
336343
, methodTableIndir(methodTableIndir)
337344
, lclNum(lclNum)
@@ -342,6 +349,8 @@ struct LcTypeTestOptInfo : public LcOptInfo
342349

343350
struct LcMethodAddrTestOptInfo : public LcOptInfo
344351
{
352+
// block where statement occurs
353+
BasicBlock* block;
345354
// statement where the opportunity occurs
346355
Statement* stmt;
347356
// indir on the delegate
@@ -355,12 +364,14 @@ struct LcMethodAddrTestOptInfo : public LcOptInfo
355364
CORINFO_METHOD_HANDLE targetMethHnd;
356365
#endif
357366

358-
LcMethodAddrTestOptInfo(Statement* stmt,
367+
LcMethodAddrTestOptInfo(BasicBlock* block,
368+
Statement* stmt,
359369
GenTreeIndir* delegateAddressIndir,
360370
unsigned delegateLclNum,
361371
void* methAddr,
362372
bool isSlot DEBUG_ARG(CORINFO_METHOD_HANDLE targetMethHnd))
363373
: LcOptInfo(LcMethodAddrTest)
374+
, block(block)
364375
, stmt(stmt)
365376
, delegateAddressIndir(delegateAddressIndir)
366377
, delegateLclNum(delegateLclNum)

0 commit comments

Comments
 (0)