Skip to content
This repository was archived by the owner on Jan 23, 2023. It is now read-only.

JIT: refactor how we do late devirtualization #20553

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion src/jit/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -5315,7 +5315,8 @@ class Compiler
void fgAttachStructInlineeToAsg(GenTree* tree, GenTree* child, CORINFO_CLASS_HANDLE retClsHnd);
#endif // FEATURE_MULTIREG_RET

static fgWalkPreFn fgUpdateInlineReturnExpressionPlaceHolder;
static fgWalkPreFn fgUpdateInlineReturnExpressionPlaceHolder;
static fgWalkPostFn fgLateDevirtualization;

#ifdef DEBUG
static fgWalkPreFn fgDebugCheckInlineCandidates;
Expand Down
203 changes: 128 additions & 75 deletions src/jit/flowgraph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21831,7 +21831,20 @@ void Compiler::fgInline()

// See if we need to replace the return value place holder.
// Also, see if this update enables further devirtualization.
fgWalkTreePre(&stmt->gtStmtExpr, fgUpdateInlineReturnExpressionPlaceHolder, (void*)this);
//
// Note we have both preorder and postorder callbacks here.
//
// The preorder callback is responsible for replacing GT_RET_EXPRs
// with the appropriate expansion (call or inline result).
// Replacement may introduce subtrees with GT_RET_EXPR and so
// we rely on the preorder to recursively process those as well.
//
// On the way back up, the postorder callback then re-examines nodes for
// possible further optimization, as the (now complete) GT_RET_EXPR
// replacement may have enabled optimizations by providing more
// specific types for trees or variables.
fgWalkTree(&stmt->gtStmtExpr, fgUpdateInlineReturnExpressionPlaceHolder, fgLateDevirtualization,
(void*)this);

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice comment.


// See if stmt is of the form GT_COMMA(call, nop)
// If yes, we can get rid of GT_COMMA.
Expand Down Expand Up @@ -22122,25 +22135,29 @@ void Compiler::fgAttachStructInlineeToAsg(GenTree* tree, GenTree* child, CORINFO
// the inlinee return expression if the inline is successful (see
// tail end of fgInsertInlineeBlocks for the update of iciCall).
//
// If the parent of the GT_RET_EXPR is a virtual call,
// devirtualization is attempted. This should only succeed in the
// successful inline case, when the inlinee's return value
// expression provides a better type than the return type of the
// method. Note for failed inlines, the devirtualizer can only go
// by the return type, and any devirtualization that type enabled
// would have already happened during importation.
//
// If the return type is a struct type and we're on a platform
// where structs can be returned in multiple registers, ensure the
// call has a suitable parent.

Compiler::fgWalkResult Compiler::fgUpdateInlineReturnExpressionPlaceHolder(GenTree** pTree, fgWalkData* data)
{
GenTree* tree = *pTree;
// All the operations here and in the corresponding postorder
// callback (fgLateDevirtualization) are triggered by GT_CALL or
// GT_RET_EXPR trees, and these (should) have the call side
// effect flag.
//
// So bail out for any trees that don't have this flag.
GenTree* tree = *pTree;

if ((tree->gtFlags & GTF_CALL) == 0)
{
return WALK_SKIP_SUBTREES;
}

Compiler* comp = data->compiler;
CORINFO_CLASS_HANDLE retClsHnd = NO_CLASS_HANDLE;

if (tree->gtOper == GT_RET_EXPR)
if (tree->OperGet() == GT_RET_EXPR)
{
// We are going to copy the tree from the inlinee,
// so record the handle now.
Expand All @@ -22150,71 +22167,33 @@ Compiler::fgWalkResult Compiler::fgUpdateInlineReturnExpressionPlaceHolder(GenTr
retClsHnd = tree->gtRetExpr.gtRetClsHnd;
}

do
{
// Obtained the expanded inline candidate
GenTree* inlineCandidate = tree->gtRetExpr.gtInlineCandidate;
// Skip through chains of GT_RET_EXPRs (say from nested inlines)
// to the actual tree to use.
GenTree* inlineCandidate = tree->gtRetExprVal();

#ifdef DEBUG
if (comp->verbose)
{
printf("\nReplacing the return expression placeholder ");
printTreeID(tree);
printf(" with ");
printTreeID(inlineCandidate);
printf("\n");
// Dump out the old return expression placeholder it will be overwritten by the ReplaceWith below
comp->gtDispTree(tree);
}
if (comp->verbose)
{
printf("\nReplacing the return expression placeholder ");
printTreeID(tree);
printf(" with ");
printTreeID(inlineCandidate);
printf("\n");
// Dump out the old return expression placeholder it will be overwritten by the ReplaceWith below
comp->gtDispTree(tree);
}
#endif // DEBUG

tree->ReplaceWith(inlineCandidate, comp);
tree->ReplaceWith(inlineCandidate, comp);

#ifdef DEBUG
if (comp->verbose)
{
printf("\nInserting the inline return expression\n");
comp->gtDispTree(tree);
printf("\n");
}
#endif // DEBUG
} while (tree->gtOper == GT_RET_EXPR);

// Now see if this return value expression feeds the 'this'
// object at a virtual call site.
//
// Note for void returns where the inline failed, the
// GT_RET_EXPR may be top-level.
//
// May miss cases where there are intermediaries between call
// and this, eg commas.
GenTree* parentTree = data->parent;

if ((parentTree != nullptr) && (parentTree->gtOper == GT_CALL))
if (comp->verbose)
{
GenTreeCall* call = parentTree->AsCall();
bool tryLateDevirt = call->IsVirtual() && (call->gtCallObjp == tree) && (call->gtCallType == CT_USER_FUNC);

#ifdef DEBUG
tryLateDevirt = tryLateDevirt && (JitConfig.JitEnableLateDevirtualization() == 1);
#endif // DEBUG

if (tryLateDevirt)
{
#ifdef DEBUG
if (comp->verbose)
{
printf("**** Late devirt opportunity\n");
comp->gtDispTree(call);
}
#endif // DEBUG

CORINFO_METHOD_HANDLE method = call->gtCallMethHnd;
unsigned methodFlags = 0;
CORINFO_CONTEXT_HANDLE context = nullptr;
comp->impDevirtualizeCall(call, &method, &methodFlags, &context, nullptr);
}
printf("\nInserting the inline return expression\n");
comp->gtDispTree(tree);
printf("\n");
}
#endif // DEBUG
}

// If an inline was rejected and the call returns a struct, we may
Expand Down Expand Up @@ -22333,16 +22312,17 @@ Compiler::fgWalkResult Compiler::fgUpdateInlineReturnExpressionPlaceHolder(GenTr
// leaving an assert in here. This can be fixed by looking ahead
// when we visit GT_ASG similar to fgAttachStructInlineeToAsg.
//
if ((tree->gtOper == GT_ASG) && (tree->gtOp.gtOp2->gtOper == GT_COMMA))
if (tree->OperGet() == GT_ASG)
{
GenTree* comma;
for (comma = tree->gtOp.gtOp2; comma->gtOper == GT_COMMA; comma = comma->gtOp.gtOp2)
GenTree* value = tree->gtOp.gtOp2;

if (value->OperGet() == GT_COMMA)
{
// empty
}
GenTree* effectiveValue = value->gtEffectiveVal(/*commaOnly*/ true);

noway_assert(!varTypeIsStruct(comma) || comma->gtOper != GT_RET_EXPR ||
!comp->IsMultiRegReturnedType(comma->gtRetExpr.gtRetClsHnd));
noway_assert(!varTypeIsStruct(effectiveValue) || (effectiveValue->OperGet() != GT_RET_EXPR) ||
!comp->IsMultiRegReturnedType(effectiveValue->gtRetExpr.gtRetClsHnd));
}
}

#endif // defined(DEBUG)
Expand All @@ -22351,6 +22331,79 @@ Compiler::fgWalkResult Compiler::fgUpdateInlineReturnExpressionPlaceHolder(GenTr
return WALK_CONTINUE;
}

//------------------------------------------------------------------------
// fgLateDevirtualization: re-examine calls after inlining to see if we
// can do more devirtualization
//
// Arguments:
// pTree -- pointer to tree to examine for updates
// data -- context data for the tree walk
//
// Returns:
// fgWalkResult indicating the walk should continue; that
// is we wish to fully explore the tree.
//
// Notes:
// We used to check this opportunistically in the preorder callback for
// calls where the `obj` was fed by a return, but we now re-examine
// all calls.
//
// Late devirtualization (and eventually, perhaps, other type-driven
// opts like cast optimization) can happen now because inlining or other
// optimizations may have provided more accurate types than we saw when
// first importing the trees.
//
// It would be nice to screen candidate sites based on the likelihood
// that something has changed. Otherwise we'll waste some time retrying
// an optimization that will just fail again.

Compiler::fgWalkResult Compiler::fgLateDevirtualization(GenTree** pTree, fgWalkData* data)
{
GenTree* tree = *pTree;
GenTree* parent = data->parent;
Compiler* comp = data->compiler;

// In some (rare) cases the parent node of tree will be smashed to a NOP during
// the preorder by fgAttachStructToInlineeArg.
//
// jit\Methodical\VT\callconv\_il_reljumper3 for x64 linux
//
// If so, just bail out here.
if ((parent != nullptr) && parent->OperGet() == GT_NOP)
{
assert(tree == nullptr);
return WALK_CONTINUE;
}

if (tree->OperGet() == GT_CALL)
{
GenTreeCall* call = tree->AsCall();
bool tryLateDevirt = call->IsVirtual() && (call->gtCallType == CT_USER_FUNC);

#ifdef DEBUG
tryLateDevirt = tryLateDevirt && (JitConfig.JitEnableLateDevirtualization() == 1);
#endif // DEBUG

if (tryLateDevirt)
{
#ifdef DEBUG
if (comp->verbose)
{
printf("**** Late devirt opportunity\n");
comp->gtDispTree(call);
}
#endif // DEBUG

CORINFO_METHOD_HANDLE method = call->gtCallMethHnd;
unsigned methodFlags = 0;
CORINFO_CONTEXT_HANDLE context = nullptr;
comp->impDevirtualizeCall(call, &method, &methodFlags, &context, nullptr);
}
}

return WALK_CONTINUE;
}

#ifdef DEBUG

/*****************************************************************************
Expand Down
6 changes: 6 additions & 0 deletions src/jit/gentree.h
Original file line number Diff line number Diff line change
Expand Up @@ -3833,6 +3833,11 @@ struct GenTreeCmpXchg : public GenTree
// There's no reason to do a compare-exchange on a local location, so we'll assume that all of these
// have global effects.
gtFlags |= (GTF_GLOB_REF | GTF_ASG);

// Merge in flags from operands
gtFlags |= gtOpLocation->gtFlags & GTF_ALL_EFFECT;
gtFlags |= gtOpValue->gtFlags & GTF_ALL_EFFECT;
gtFlags |= gtOpComparand->gtFlags & GTF_ALL_EFFECT;
}
#if DEBUGGABLE_GENTREE
GenTreeCmpXchg() : GenTree()
Expand Down Expand Up @@ -4367,6 +4372,7 @@ struct GenTreeArrElem : public GenTree
for (unsigned char i = 0; i < rank; i++)
{
gtArrInds[i] = inds[i];
gtFlags |= (inds[i]->gtFlags & GTF_ALL_EFFECT);
}
gtFlags |= GTF_EXCEPT;
}
Expand Down
7 changes: 1 addition & 6 deletions src/jit/importer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19458,13 +19458,8 @@ void Compiler::impDevirtualizeCall(GenTreeCall* call,
// See if we have special knowlege that can get us a type or a better type.
if ((objClass == nullptr) || !isExact)
{
actualThisObj = thisObj;

// Walk back through any return expression placeholders
while (actualThisObj->OperGet() == GT_RET_EXPR)
{
actualThisObj = actualThisObj->gtRetExpr.gtInlineCandidate;
}
actualThisObj = thisObj->gtRetExprVal();

// See if we landed on a call to a special intrinsic method
if (actualThisObj->IsCall())
Expand Down