Skip to content

Commit

Permalink
JIT: Enable inlining for late devirtualization (dotnet#110827)
Browse files Browse the repository at this point in the history
* Enable inlining for late devirt

* Pass correct IL offset

* Only creates RET_EXPR if the node is not top-level or not returning void

* Do not try inlining if BBF_INTERNAL is set

* Ensure the type matches

* Set inliner context correctly

* Address review feedbacks

* Fix non inline candidate marking

* Handle calls with retbuf

* Handle BBF_INTERNAL

* Get real type from local

* Always set IL offset

* Add comments

* Oops

* Use gtReturnType instead

* Remove unused ilOffset

* Use genActualType

* Remove unnecessary spillTemp

* Handle nested call correctly

* Don't promote compCurStmt

* Remove unused ilOffset

* Handle BBF_INTERNAL

* Use correct return type

* Use bbInCatchHandlerBBRange and bbInFilterBBRange

* Cleanup fncRetType

* Add a runtime check to prevent accidental execution order change

* Format jit

* Revert some changes

* Remove unused local

* Check whether a call can be spilled without side effect

* Get rid of BAD_VAR_NUM

* Add comments for CanSpillCallWithoutSideEffect

* Use ancestors to estimate whether a call can be spilled or not

* Reset m_ancestors before walking

* Nit

* Fix assertion

* Limit to GT_STORE_LCL_VAR only

* Oops

* Inline the check

* Remove leftovers

* Hoist the check

* Make sure the store parent is the statement root

Co-authored-by: Andy Ayers <andya@microsoft.com>

* Format JIT

* Check side effects before trying inlining

* Fix

* Nit

* Make lvHasLdAddrOp check optional

* Rename to early

* Split effects if necessary

* Use gtSplitTree

* Teach gtSplitTree to support early use

* Cleanup

* ClearInlineInfo is not needed

---------

Co-authored-by: Andy Ayers <andya@microsoft.com>
  • Loading branch information
hez2010 and AndyAyersMS authored Jan 29, 2025
1 parent 21ab780 commit a9d67ec
Show file tree
Hide file tree
Showing 4 changed files with 102 additions and 20 deletions.
4 changes: 3 additions & 1 deletion src/coreclr/jit/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -3790,7 +3790,7 @@ class Compiler
bool ignoreRoot = false);

bool gtSplitTree(
BasicBlock* block, Statement* stmt, GenTree* splitPoint, Statement** firstNewStmt, GenTree*** splitPointUse);
BasicBlock* block, Statement* stmt, GenTree* splitPoint, Statement** firstNewStmt, GenTree*** splitPointUse, bool early = false);

bool gtStoreDefinesField(
LclVarDsc* fieldVarDsc, ssize_t offset, unsigned size, ssize_t* pFieldStoreOffset, unsigned* pFieldStoreSize);
Expand Down Expand Up @@ -5114,6 +5114,8 @@ class Compiler
SpillCliqueSucc
};

friend class SubstitutePlaceholdersAndDevirtualizeWalker;

// Abstract class for receiving a callback while walking a spill clique
class SpillCliqueWalker
{
Expand Down
97 changes: 84 additions & 13 deletions src/coreclr/jit/fginline.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,9 @@ bool Compiler::TypeInstantiationComplexityExceeds(CORINFO_CLASS_HANDLE handle, i

class SubstitutePlaceholdersAndDevirtualizeWalker : public GenTreeVisitor<SubstitutePlaceholdersAndDevirtualizeWalker>
{
bool m_madeChanges = false;
bool m_madeChanges = false;
Statement* m_curStmt = nullptr;
Statement* m_firstNewStmt = nullptr;

public:
enum
Expand All @@ -219,11 +221,29 @@ class SubstitutePlaceholdersAndDevirtualizeWalker : public GenTreeVisitor<Substi
{
}

bool MadeChanges()
bool MadeChanges() const
{
return m_madeChanges;
}

// ------------------------------------------------------------------------
// WalkStatement: Walk the tree of a statement, and return the first newly
// added statement if any, otherwise return the original statement.
//
// Arguments:
// stmt - the statement to walk.
//
// Return Value:
// The first newly added statement if any, or the original statement.
//
Statement* WalkStatement(Statement* stmt)
{
m_curStmt = stmt;
m_firstNewStmt = nullptr;
WalkTree(m_curStmt->GetRootNodePointer(), nullptr);
return m_firstNewStmt == nullptr ? m_curStmt : m_firstNewStmt;
}

fgWalkResult PreOrderVisit(GenTree** use, GenTree* user)
{
GenTree* tree = *use;
Expand Down Expand Up @@ -586,8 +606,59 @@ class SubstitutePlaceholdersAndDevirtualizeWalker : public GenTreeVisitor<Substi
}

CORINFO_CONTEXT_HANDLE contextInput = context;
context = nullptr;
m_compiler->impDevirtualizeCall(call, nullptr, &method, &methodFlags, &contextInput, &context,
isLateDevirtualization, explicitTailCall);

if (!call->IsVirtual())
{
assert(context != nullptr);
CORINFO_CALL_INFO callInfo = {};
callInfo.hMethod = method;
callInfo.methodFlags = methodFlags;
m_compiler->impMarkInlineCandidate(call, context, false, &callInfo);

if (call->IsInlineCandidate())
{
Statement* newStmt = nullptr;
GenTree** callUse = nullptr;
if (m_compiler->gtSplitTree(m_compiler->compCurBB, m_curStmt, call, &newStmt, &callUse, true))
{
if (m_firstNewStmt == nullptr)
{
m_firstNewStmt = newStmt;
}
}

// If the call is the root expression in a statement, and it returns void,
// we can inline it directly without creating a RET_EXPR.
if (parent != nullptr || call->gtReturnType != TYP_VOID)
{
Statement* stmt = m_compiler->gtNewStmt(call);
m_compiler->fgInsertStmtBefore(m_compiler->compCurBB, m_curStmt, stmt);
if (m_firstNewStmt == nullptr)
{
m_firstNewStmt = stmt;
}

GenTreeRetExpr* retExpr =
m_compiler->gtNewInlineCandidateReturnExpr(call->AsCall(),
genActualType(call->TypeGet()));
call->GetSingleInlineCandidateInfo()->retExpr = retExpr;

JITDUMP("Creating new RET_EXPR for [%06u]:\n", call->gtTreeID);
DISPTREE(retExpr);

*pTree = retExpr;
}

call->GetSingleInlineCandidateInfo()->exactContextHandle = context;
INDEBUG(call->GetSingleInlineCandidateInfo()->inlinersContext = call->gtInlineContext);

JITDUMP("New inline candidate due to late devirtualization:\n");
DISPTREE(call);
}
}
m_madeChanges = true;
}
}
Expand Down Expand Up @@ -730,17 +801,10 @@ PhaseStatus Compiler::fgInline()
do
{
// Make the current basic block address available globally
compCurBB = block;

for (Statement* const stmt : block->Statements())
compCurBB = block;
Statement* stmt = block->firstStmt();
while (stmt != nullptr)
{

#if defined(DEBUG)
// In debug builds we want the inline tree to show all failed
// inlines. Some inlines may fail very early and never make it to
// candidate stage. So scan the tree looking for those early failures.
fgWalkTreePre(stmt->GetRootNodePointer(), fgFindNonInlineCandidate, stmt);
#endif
// See if we need to replace some return value place holders.
// Also, see if this replacement enables further devirtualization.
//
Expand All @@ -755,7 +819,7 @@ PhaseStatus Compiler::fgInline()
// possible further optimization, as the (now complete) GT_RET_EXPR
// replacement may have enabled optimizations by providing more
// specific types for trees or variables.
walker.WalkTree(stmt->GetRootNodePointer(), nullptr);
stmt = walker.WalkStatement(stmt);

GenTree* expr = stmt->GetRootNode();

Expand Down Expand Up @@ -805,6 +869,13 @@ PhaseStatus Compiler::fgInline()
madeChanges = true;
stmt->SetRootNode(expr->AsOp()->gtOp1);
}

#if defined(DEBUG)
// In debug builds we want the inline tree to show all failed
// inlines.
fgWalkTreePre(stmt->GetRootNodePointer(), fgFindNonInlineCandidate, stmt);
#endif
stmt = stmt->GetNextStmt();
}

block = block->Next();
Expand Down
19 changes: 14 additions & 5 deletions src/coreclr/jit/gentree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17005,6 +17005,8 @@ bool Compiler::gtTreeHasSideEffects(GenTree* tree, GenTreeFlags flags /* = GTF_S
// firstNewStmt - [out] The first new statement that was introduced.
// [firstNewStmt..stmt) are the statements added by this function.
// splitNodeUse - The use of the tree to split at.
// early - The run is in the early phase where we still don't have valid
// GTF_GLOB_REF yet.
//
// Notes:
// This method turns all non-invariant nodes that would be executed before
Expand All @@ -17025,14 +17027,19 @@ bool Compiler::gtTreeHasSideEffects(GenTree* tree, GenTreeFlags flags /* = GTF_S
// Returns:
// True if any changes were made; false if nothing needed to be done to split the tree.
//
bool Compiler::gtSplitTree(
BasicBlock* block, Statement* stmt, GenTree* splitPoint, Statement** firstNewStmt, GenTree*** splitNodeUse)
bool Compiler::gtSplitTree(BasicBlock* block,
Statement* stmt,
GenTree* splitPoint,
Statement** firstNewStmt,
GenTree*** splitNodeUse,
bool early)
{
class Splitter final : public GenTreeVisitor<Splitter>
{
BasicBlock* m_bb;
Statement* m_splitStmt;
GenTree* m_splitNode;
bool m_early;

struct UseInfo
{
Expand All @@ -17049,11 +17056,12 @@ bool Compiler::gtSplitTree(
UseExecutionOrder = true
};

Splitter(Compiler* compiler, BasicBlock* bb, Statement* stmt, GenTree* splitNode)
Splitter(Compiler* compiler, BasicBlock* bb, Statement* stmt, GenTree* splitNode, bool early)
: GenTreeVisitor(compiler)
, m_bb(bb)
, m_splitStmt(stmt)
, m_splitNode(splitNode)
, m_early(early)
, m_useStack(compiler->getAllocator(CMK_ArrayStack))
{
}
Expand Down Expand Up @@ -17195,7 +17203,8 @@ bool Compiler::gtSplitTree(
return;
}

if ((*use)->OperIs(GT_LCL_VAR) && !m_compiler->lvaGetDesc((*use)->AsLclVarCommon())->IsAddressExposed())
if ((*use)->OperIs(GT_LCL_VAR) && !m_compiler->lvaGetDesc((*use)->AsLclVarCommon())->IsAddressExposed() &&
!(m_early && m_compiler->lvaGetDesc((*use)->AsLclVarCommon())->lvHasLdAddrOp))
{
// The splitting we do here should always guarantee that we
// only introduce locals for the tree edges that overlap the
Expand Down Expand Up @@ -17278,7 +17287,7 @@ bool Compiler::gtSplitTree(
}
};

Splitter splitter(this, block, stmt, splitPoint);
Splitter splitter(this, block, stmt, splitPoint, early);
splitter.WalkTree(stmt->GetRootNodePointer(), nullptr);
*firstNewStmt = splitter.FirstStatement;
*splitNodeUse = splitter.SplitNodeUse;
Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/jit/importercalls.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -329,7 +329,7 @@ var_types Compiler::impImportCall(OPCODE opcode,
assert((sig->callConv & CORINFO_CALLCONV_MASK) != CORINFO_CALLCONV_VARARG &&
(sig->callConv & CORINFO_CALLCONV_MASK) != CORINFO_CALLCONV_NATIVEVARARG);

call = gtNewIndCallNode(stubAddr, callRetTyp);
call = gtNewIndCallNode(stubAddr, callRetTyp, di);

call->gtFlags |= GTF_EXCEPT | (stubAddr->gtFlags & GTF_GLOB_EFFECT);
call->gtFlags |= GTF_CALL_VIRT_STUB;
Expand Down

0 comments on commit a9d67ec

Please sign in to comment.