Skip to content

Commit

Permalink
JIT: very simple cloning heuristic (#108771)
Browse files Browse the repository at this point in the history
Avoid cloning large loops.

We compute loop size by counting tree nodes of all statements of all
blocks in the loop. If this is over a threshold, we inhibit cloning.

Threshold value was chosen based on distribution of unrestricted cloned
loop sizes in the benchmark run_pgo collection.
  • Loading branch information
AndyAyersMS authored Oct 14, 2024
1 parent 6d23f64 commit 2098981
Show file tree
Hide file tree
Showing 4 changed files with 112 additions and 6 deletions.
1 change: 1 addition & 0 deletions src/coreclr/jit/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -7091,6 +7091,7 @@ class Compiler
bool optCanonicalizeExit(FlowGraphNaturalLoop* loop, BasicBlock* exit);

PhaseStatus optCloneLoops();
bool optShouldCloneLoop(FlowGraphNaturalLoop* loop, LoopCloneContext* context);
void optCloneLoop(FlowGraphNaturalLoop* loop, LoopCloneContext* context);
PhaseStatus optUnrollLoops(); // Unrolls loops (needs to have cost info)
bool optTryUnrollLoop(FlowGraphNaturalLoop* loop, bool* changedIR);
Expand Down
6 changes: 4 additions & 2 deletions src/coreclr/jit/jitconfigvalues.h
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,10 @@ CONFIG_INTEGER(JitBreakMorphTree, W("JitBreakMorphTree"), 0xffffffff)
CONFIG_INTEGER(JitBreakOnBadCode, W("JitBreakOnBadCode"), 0)
CONFIG_INTEGER(JitBreakOnMinOpts, W("JITBreakOnMinOpts"), 0) // Halt if jit switches to MinOpts
CONFIG_INTEGER(JitCloneLoops, W("JitCloneLoops"), 1) // If 0, don't clone. Otherwise clone loops for optimizations.
CONFIG_INTEGER(JitCloneLoopsWithGdvTests, W("JitCloneLoopsWithGdvTests"), 1) // If 0, don't clone loops based on
// invariant type/method address tests
CONFIG_INTEGER(JitCloneLoopsWithGdvTests, W("JitCloneLoopsWithGdvTests"), 1) // If 0, don't clone loops based on
// invariant type/method address tests
RELEASE_CONFIG_INTEGER(JitCloneLoopsSizeLimit, W("JitCloneLoopsSizeLimit"), 400) // limit cloning to loops with less
// than this many tree nodes
CONFIG_INTEGER(JitDebugLogLoopCloning, W("JitDebugLogLoopCloning"), 0) // In debug builds log places where loop cloning
// optimizations are performed on the fast path.
CONFIG_INTEGER(JitDefaultFill, W("JitDefaultFill"), 0xdd) // In debug builds, initialize the memory allocated by the nra
Expand Down
96 changes: 94 additions & 2 deletions src/coreclr/jit/loopcloning.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1773,6 +1773,94 @@ void Compiler::optPerformStaticOptimizations(FlowGraphNaturalLoop* loop,
}
}

//------------------------------------------------------------------------
// optShouldCloneLoop: Decide if a loop that can be cloned should be cloned.
//
// Arguments:
// loop - the current loop for which the optimizations are performed.
// context - data structure where all loop cloning info is kept.
//
// Returns:
// true if expected performance gain from cloning is worth the potential
// size increase.
//
// Remarks:
// This is a simple-minded heuristic meant to avoid "runaway" cloning
// where large loops are cloned.
//
// We estimate the size cost of cloning by summing up the number of
// tree nodes in all statements in all blocks in the loop.
//
// This value is compared to a hard-coded threshold, and if bigger,
// then the method returns false.
//
bool Compiler::optShouldCloneLoop(FlowGraphNaturalLoop* loop, LoopCloneContext* context)
{
// Compute loop size
//
unsigned loopSize = 0;

// For now we use a very simplistic model where each tree node
// has the same code size.
//
// CostSz is not available until later.
//
struct TreeCostWalker : GenTreeVisitor<TreeCostWalker>
{
enum
{
DoPreOrder = true,
};

unsigned m_nodeCount;

TreeCostWalker(Compiler* comp)
: GenTreeVisitor(comp)
, m_nodeCount(0)
{
}

fgWalkResult PreOrderVisit(GenTree** use, GenTree* user)
{
m_nodeCount++;
return WALK_CONTINUE;
}

void Reset()
{
m_nodeCount = 0;
}
unsigned Cost()
{
return m_nodeCount;
}
};

TreeCostWalker costWalker(this);

loop->VisitLoopBlocks([&](BasicBlock* block) {
weight_t normalizedWeight = block->getBBWeight(this);
for (Statement* const stmt : block->Statements())
{
costWalker.Reset();
costWalker.WalkTree(stmt->GetRootNodePointer(), nullptr);
loopSize += costWalker.Cost();
}
return BasicBlockVisit::Continue;
});

int const sizeLimit = JitConfig.JitCloneLoopsSizeLimit();

if ((sizeLimit >= 0) && (loopSize >= (unsigned)sizeLimit))
{
JITDUMP("Loop cloning: rejecting loop " FMT_LP " of size %u, size limit %d\n", loop->GetIndex(), loopSize,
sizeLimit);
return false;
}

return true;
}

//----------------------------------------------------------------------------
// optIsLoopClonable: Determine whether this loop can be cloned.
//
Expand Down Expand Up @@ -2563,7 +2651,7 @@ Compiler::fgWalkResult Compiler::optCanOptimizeByLoopCloning(GenTree* tree, Loop

assert(compCurBB->lastStmt() == info->stmt);
info->context->EnsureLoopOptInfo(info->loop->GetIndex())
->Push(new (this, CMK_LoopOpt) LcTypeTestOptInfo(info->stmt, indir, lclNum, clsHnd));
->Push(new (this, CMK_LoopOpt) LcTypeTestOptInfo(compCurBB, info->stmt, indir, lclNum, clsHnd));
}
}
else if (optIsHandleOrIndirOfHandle(relopOp2, GTF_ICON_FTN_ADDR))
Expand Down Expand Up @@ -2644,7 +2732,7 @@ Compiler::fgWalkResult Compiler::optCanOptimizeByLoopCloning(GenTree* tree, Loop
assert(iconHandle->IsIconHandle(GTF_ICON_FTN_ADDR));
assert(compCurBB->lastStmt() == info->stmt);
LcMethodAddrTestOptInfo* optInfo = new (this, CMK_LoopOpt)
LcMethodAddrTestOptInfo(info->stmt, indir, lclNum, (void*)iconHandle->IconValue(),
LcMethodAddrTestOptInfo(compCurBB, info->stmt, indir, lclNum, (void*)iconHandle->IconValue(),
relopOp2 != iconHandle DEBUG_ARG(
(CORINFO_METHOD_HANDLE)iconHandle->gtTargetHandle));
info->context->EnsureLoopOptInfo(info->loop->GetIndex())->Push(optInfo);
Expand Down Expand Up @@ -2944,6 +3032,10 @@ PhaseStatus Compiler::optCloneLoops()
// No need to clone.
context.CancelLoopOptInfo(loop->GetIndex());
}
else if (!optShouldCloneLoop(loop, &context))
{
context.CancelLoopOptInfo(loop->GetIndex());
}
}
}

Expand Down
15 changes: 13 additions & 2 deletions src/coreclr/jit/loopcloning.h
Original file line number Diff line number Diff line change
Expand Up @@ -321,6 +321,8 @@ struct LcJaggedArrayOptInfo : public LcOptInfo
//
struct LcTypeTestOptInfo : public LcOptInfo
{
// block where statement occurs
BasicBlock* block;
// statement where the opportunity occurs
Statement* stmt;
// indir for the method table
Expand All @@ -330,8 +332,13 @@ struct LcTypeTestOptInfo : public LcOptInfo
// handle being tested for
CORINFO_CLASS_HANDLE clsHnd;

LcTypeTestOptInfo(Statement* stmt, GenTreeIndir* methodTableIndir, unsigned lclNum, CORINFO_CLASS_HANDLE clsHnd)
LcTypeTestOptInfo(BasicBlock* block,
Statement* stmt,
GenTreeIndir* methodTableIndir,
unsigned lclNum,
CORINFO_CLASS_HANDLE clsHnd)
: LcOptInfo(LcTypeTest)
, block(block)
, stmt(stmt)
, methodTableIndir(methodTableIndir)
, lclNum(lclNum)
Expand All @@ -342,6 +349,8 @@ struct LcTypeTestOptInfo : public LcOptInfo

struct LcMethodAddrTestOptInfo : public LcOptInfo
{
// block where statement occurs
BasicBlock* block;
// statement where the opportunity occurs
Statement* stmt;
// indir on the delegate
Expand All @@ -355,12 +364,14 @@ struct LcMethodAddrTestOptInfo : public LcOptInfo
CORINFO_METHOD_HANDLE targetMethHnd;
#endif

LcMethodAddrTestOptInfo(Statement* stmt,
LcMethodAddrTestOptInfo(BasicBlock* block,
Statement* stmt,
GenTreeIndir* delegateAddressIndir,
unsigned delegateLclNum,
void* methAddr,
bool isSlot DEBUG_ARG(CORINFO_METHOD_HANDLE targetMethHnd))
: LcOptInfo(LcMethodAddrTest)
, block(block)
, stmt(stmt)
, delegateAddressIndir(delegateAddressIndir)
, delegateLclNum(delegateLclNum)
Expand Down

0 comments on commit 2098981

Please sign in to comment.