Skip to content

Commit 7532f08

Browse files
committed
JIT: optimize type casts
Implement the jit interface compareTypesForEquality method to handle casts from known types to known types, and from shared types to certain interface types. Call this method in the jit for castclass and isinst, using `gtGetClassHandle` to obtain the from type. Optimize sucessful casts and unsuccessful isinsts when the from type is known exactly. Undo part of the type-equality based optimization/workaround in the AsyncMethodBuilder code that was introduced in dotnet#14178 in favor of interface checks. There is more here that can be done here before this issue is entirely closed and I will look at this subsequently. This optimization allows the jit to remove boxes that are used solely to feed type casts, and so closes #12877.
1 parent 4cf4202 commit 7532f08

File tree

4 files changed

+262
-103
lines changed

4 files changed

+262
-103
lines changed

src/jit/compiler.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3156,6 +3156,8 @@ class Compiler
31563156
CORINFO_RESOLVED_TOKEN* pResolvedToken,
31573157
bool isCastClass);
31583158

3159+
GenTree* impOptimizeCastClassOrIsInst(GenTree* op1, CORINFO_RESOLVED_TOKEN* pResolvedToken, bool isCastClass);
3160+
31593161
bool VarTypeIsMultiByteAndCanEnreg(var_types type,
31603162
CORINFO_CLASS_HANDLE typeClass,
31613163
unsigned* typeSize,

src/jit/importer.cpp

Lines changed: 173 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -9762,6 +9762,99 @@ var_types Compiler::impGetByRefResultType(genTreeOps oper, bool fUnsigned, GenTr
97629762
return type;
97639763
}
97649764

9765+
//------------------------------------------------------------------------
9766+
// impOptimizeCastClassOrIsInst: attempt to resolve a cast when jitting
9767+
//
9768+
// Arguments:
9769+
// op1 -- value to cast
9770+
// pResolvedToken -- resolved token for type to cast to
9771+
// isCastClass -- true if this is a castclass, false if isinst
9772+
//
9773+
// Return Value:
9774+
// tree representing optimized cast, or null if no optimization possible
9775+
9776+
GenTree* Compiler::impOptimizeCastClassOrIsInst(GenTree* op1, CORINFO_RESOLVED_TOKEN* pResolvedToken, bool isCastClass)
9777+
{
9778+
assert(op1->TypeGet() == TYP_REF);
9779+
9780+
// Don't optimize for minopts or debug codegen.
9781+
if (opts.compDbgCode || opts.MinOpts())
9782+
{
9783+
return nullptr;
9784+
}
9785+
9786+
// See what we know about the type of the object being cast.
9787+
bool isExact = false;
9788+
bool isNonNull = false;
9789+
CORINFO_CLASS_HANDLE fromClass = gtGetClassHandle(op1, &isExact, &isNonNull);
9790+
GenTree* optResult = nullptr;
9791+
9792+
if (fromClass != nullptr)
9793+
{
9794+
CORINFO_CLASS_HANDLE toClass = pResolvedToken->hClass;
9795+
JITDUMP("\nConsidering optimization of %s from %s%p (%s) to %p (%s)\n", isCastClass ? "castclass" : "isinst",
9796+
isExact ? "exact " : "", fromClass, info.compCompHnd->getClassName(fromClass), toClass,
9797+
info.compCompHnd->getClassName(toClass));
9798+
9799+
// Perhaps we know if the cast will succeed or fail.
9800+
TypeCompareState castResult = info.compCompHnd->compareTypesForCast(fromClass, toClass);
9801+
9802+
if (castResult == TypeCompareState::Must)
9803+
{
9804+
// Cast will succeed, result is simply op1.
9805+
JITDUMP("Cast will succeed, optimizing to simply return input\n");
9806+
return op1;
9807+
}
9808+
else if (castResult == TypeCompareState::MustNot)
9809+
{
9810+
// See if we can sharpen exactness by looking for final classes
9811+
if (!isExact)
9812+
{
9813+
DWORD flags = info.compCompHnd->getClassAttribs(fromClass);
9814+
DWORD flagsMask = CORINFO_FLG_FINAL | CORINFO_FLG_MARSHAL_BYREF | CORINFO_FLG_CONTEXTFUL |
9815+
CORINFO_FLG_VARIANCE | CORINFO_FLG_ARRAY;
9816+
isExact = ((flags & flagsMask) == CORINFO_FLG_FINAL);
9817+
}
9818+
9819+
// Cast to exact type will fail. Handle case where we have
9820+
// an exact type (that is, fromClass is not a subtype)
9821+
// and we're not going to throw on failure.
9822+
if (isExact && !isCastClass)
9823+
{
9824+
JITDUMP("Cast will fail, optimizing to return null\n");
9825+
GenTree* result = gtNewIconNode(0, TYP_REF);
9826+
9827+
// If the cast was fed by a box, we can remove that too.
9828+
if (op1->IsBoxedValue())
9829+
{
9830+
JITDUMP("Also removing upstream box\n");
9831+
gtTryRemoveBoxUpstreamEffects(op1);
9832+
}
9833+
9834+
return result;
9835+
}
9836+
else if (isExact)
9837+
{
9838+
JITDUMP("Not optimizing failing castclass (yet)\n");
9839+
}
9840+
else
9841+
{
9842+
JITDUMP("Can't optimize since fromClass is inexact\n");
9843+
}
9844+
}
9845+
else
9846+
{
9847+
JITDUMP("Result of cast unknown, must generate runtime test\n");
9848+
}
9849+
}
9850+
else
9851+
{
9852+
JITDUMP("\nCan't optimize since fromClass is unknown\n");
9853+
}
9854+
9855+
return nullptr;
9856+
}
9857+
97659858
//------------------------------------------------------------------------
97669859
// impCastClassOrIsInstToTree: build and import castclass/isinst
97679860
//
@@ -10203,6 +10296,7 @@ void Compiler::impImportBlockCode(BasicBlock* block)
1020310296
var_types lclTyp, ovflType = TYP_UNKNOWN;
1020410297
GenTreePtr op1 = DUMMY_INIT(NULL);
1020510298
GenTreePtr op2 = DUMMY_INIT(NULL);
10299+
GenTree* optTree = nullptr;
1020610300
GenTreeArgList* args = nullptr; // What good do these "DUMMY_INIT"s do?
1020710301
GenTreePtr newObjThisPtr = DUMMY_INIT(NULL);
1020810302
bool uns = DUMMY_INIT(false);
@@ -14262,43 +14356,54 @@ void Compiler::impImportBlockCode(BasicBlock* block)
1426214356

1426314357
op1 = impPopStack().val;
1426414358

14265-
#ifdef FEATURE_READYTORUN_COMPILER
14266-
if (opts.IsReadyToRun())
14359+
optTree = impOptimizeCastClassOrIsInst(op1, &resolvedToken, false);
14360+
14361+
if (optTree != nullptr)
14362+
{
14363+
impPushOnStack(optTree, tiRetVal);
14364+
}
14365+
else
1426714366
{
14268-
GenTreeCall* opLookup =
14269-
impReadyToRunHelperToTree(&resolvedToken, CORINFO_HELP_READYTORUN_ISINSTANCEOF, TYP_REF,
14270-
gtNewArgList(op1));
14271-
usingReadyToRunHelper = (opLookup != nullptr);
14272-
op1 = (usingReadyToRunHelper ? opLookup : op1);
1427314367

14274-
if (!usingReadyToRunHelper)
14368+
#ifdef FEATURE_READYTORUN_COMPILER
14369+
if (opts.IsReadyToRun())
1427514370
{
14276-
// TODO: ReadyToRun: When generic dictionary lookups are necessary, replace the lookup call
14277-
// and the isinstanceof_any call with a single call to a dynamic R2R cell that will:
14278-
// 1) Load the context
14279-
// 2) Perform the generic dictionary lookup and caching, and generate the appropriate stub
14280-
// 3) Perform the 'is instance' check on the input object
14281-
// Reason: performance (today, we'll always use the slow helper for the R2R generics case)
14371+
GenTreeCall* opLookup =
14372+
impReadyToRunHelperToTree(&resolvedToken, CORINFO_HELP_READYTORUN_ISINSTANCEOF, TYP_REF,
14373+
gtNewArgList(op1));
14374+
usingReadyToRunHelper = (opLookup != nullptr);
14375+
op1 = (usingReadyToRunHelper ? opLookup : op1);
1428214376

14283-
op2 = impTokenToHandle(&resolvedToken, nullptr, FALSE);
14284-
if (op2 == nullptr)
14285-
{ // compDonotInline()
14286-
return;
14377+
if (!usingReadyToRunHelper)
14378+
{
14379+
// TODO: ReadyToRun: When generic dictionary lookups are necessary, replace the lookup call
14380+
// and the isinstanceof_any call with a single call to a dynamic R2R cell that will:
14381+
// 1) Load the context
14382+
// 2) Perform the generic dictionary lookup and caching, and generate the appropriate
14383+
// stub
14384+
// 3) Perform the 'is instance' check on the input object
14385+
// Reason: performance (today, we'll always use the slow helper for the R2R generics case)
14386+
14387+
op2 = impTokenToHandle(&resolvedToken, nullptr, FALSE);
14388+
if (op2 == nullptr)
14389+
{ // compDonotInline()
14390+
return;
14391+
}
1428714392
}
1428814393
}
14289-
}
1429014394

14291-
if (!usingReadyToRunHelper)
14395+
if (!usingReadyToRunHelper)
1429214396
#endif
14293-
{
14294-
op1 = impCastClassOrIsInstToTree(op1, op2, &resolvedToken, false);
14295-
}
14296-
if (compDonotInline())
14297-
{
14298-
return;
14299-
}
14397+
{
14398+
op1 = impCastClassOrIsInstToTree(op1, op2, &resolvedToken, false);
14399+
}
14400+
if (compDonotInline())
14401+
{
14402+
return;
14403+
}
1430014404

14301-
impPushOnStack(op1, tiRetVal);
14405+
impPushOnStack(op1, tiRetVal);
14406+
}
1430214407

1430314408
break;
1430414409

@@ -14796,43 +14901,55 @@ void Compiler::impImportBlockCode(BasicBlock* block)
1479614901
// and op2 to contain code that creates the type handle corresponding to typeRef
1479714902
CASTCLASS:
1479814903

14799-
#ifdef FEATURE_READYTORUN_COMPILER
14800-
if (opts.IsReadyToRun())
14904+
optTree = impOptimizeCastClassOrIsInst(op1, &resolvedToken, true);
14905+
14906+
if (optTree != nullptr)
14907+
{
14908+
impPushOnStack(optTree, tiRetVal);
14909+
}
14910+
else
1480114911
{
14802-
GenTreeCall* opLookup = impReadyToRunHelperToTree(&resolvedToken, CORINFO_HELP_READYTORUN_CHKCAST,
14803-
TYP_REF, gtNewArgList(op1));
14804-
usingReadyToRunHelper = (opLookup != nullptr);
14805-
op1 = (usingReadyToRunHelper ? opLookup : op1);
1480614912

14807-
if (!usingReadyToRunHelper)
14913+
#ifdef FEATURE_READYTORUN_COMPILER
14914+
if (opts.IsReadyToRun())
1480814915
{
14809-
// TODO: ReadyToRun: When generic dictionary lookups are necessary, replace the lookup call
14810-
// and the chkcastany call with a single call to a dynamic R2R cell that will:
14811-
// 1) Load the context
14812-
// 2) Perform the generic dictionary lookup and caching, and generate the appropriate stub
14813-
// 3) Check the object on the stack for the type-cast
14814-
// Reason: performance (today, we'll always use the slow helper for the R2R generics case)
14916+
GenTreeCall* opLookup =
14917+
impReadyToRunHelperToTree(&resolvedToken, CORINFO_HELP_READYTORUN_CHKCAST, TYP_REF,
14918+
gtNewArgList(op1));
14919+
usingReadyToRunHelper = (opLookup != nullptr);
14920+
op1 = (usingReadyToRunHelper ? opLookup : op1);
1481514921

14816-
op2 = impTokenToHandle(&resolvedToken, nullptr, FALSE);
14817-
if (op2 == nullptr)
14818-
{ // compDonotInline()
14819-
return;
14922+
if (!usingReadyToRunHelper)
14923+
{
14924+
// TODO: ReadyToRun: When generic dictionary lookups are necessary, replace the lookup call
14925+
// and the chkcastany call with a single call to a dynamic R2R cell that will:
14926+
// 1) Load the context
14927+
// 2) Perform the generic dictionary lookup and caching, and generate the appropriate
14928+
// stub
14929+
// 3) Check the object on the stack for the type-cast
14930+
// Reason: performance (today, we'll always use the slow helper for the R2R generics case)
14931+
14932+
op2 = impTokenToHandle(&resolvedToken, nullptr, FALSE);
14933+
if (op2 == nullptr)
14934+
{ // compDonotInline()
14935+
return;
14936+
}
1482014937
}
1482114938
}
14822-
}
1482314939

14824-
if (!usingReadyToRunHelper)
14940+
if (!usingReadyToRunHelper)
1482514941
#endif
14826-
{
14827-
op1 = impCastClassOrIsInstToTree(op1, op2, &resolvedToken, true);
14828-
}
14829-
if (compDonotInline())
14830-
{
14831-
return;
14832-
}
14942+
{
14943+
op1 = impCastClassOrIsInstToTree(op1, op2, &resolvedToken, true);
14944+
}
14945+
if (compDonotInline())
14946+
{
14947+
return;
14948+
}
1483314949

14834-
/* Push the result back on the stack */
14835-
impPushOnStack(op1, tiRetVal);
14950+
/* Push the result back on the stack */
14951+
impPushOnStack(op1, tiRetVal);
14952+
}
1483614953
break;
1483714954

1483814955
case CEE_THROW:

src/mscorlib/src/System/Runtime/CompilerServices/AsyncMethodBuilder.cs

Lines changed: 4 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -387,33 +387,14 @@ public void AwaitUnsafeOnCompleted<TAwaiter, TStateMachine>(
387387
{
388388
IAsyncStateMachineBox box = GetStateMachineBox(ref stateMachine);
389389

390-
// TODO https://github.com/dotnet/coreclr/issues/12877:
391-
// Once the JIT is able to recognize "awaiter is ITaskAwaiter" and "awaiter is IConfiguredTaskAwaiter",
392-
// use those in order to a) consolidate a lot of this code, and b) handle all Task/Task<T> and not just
393-
// the few types special-cased here. For now, handle common {Configured}TaskAwaiter. Having the types
394-
// explicitly listed here allows the JIT to generate the best code for them; otherwise we'll fall through
395-
// to the later workaround.
396-
if (typeof(TAwaiter) == typeof(TaskAwaiter) ||
397-
typeof(TAwaiter) == typeof(TaskAwaiter<object>) ||
398-
typeof(TAwaiter) == typeof(TaskAwaiter<string>) ||
399-
typeof(TAwaiter) == typeof(TaskAwaiter<byte[]>) ||
400-
typeof(TAwaiter) == typeof(TaskAwaiter<bool>) ||
401-
typeof(TAwaiter) == typeof(TaskAwaiter<byte>) ||
402-
typeof(TAwaiter) == typeof(TaskAwaiter<int>) ||
403-
typeof(TAwaiter) == typeof(TaskAwaiter<long>))
390+
// TThe null tests here ensure that the jit can optimize away the interface
391+
// tests when TAwaiter is is a ref type.
392+
if ((null != (object)default(TAwaiter)) && (awaiter is ITaskAwaiter))
404393
{
405394
ref TaskAwaiter ta = ref Unsafe.As<TAwaiter, TaskAwaiter>(ref awaiter); // relies on TaskAwaiter/TaskAwaiter<T> having the same layout
406395
TaskAwaiter.UnsafeOnCompletedInternal(ta.m_task, box, continueOnCapturedContext: true);
407396
}
408-
else if (
409-
typeof(TAwaiter) == typeof(ConfiguredTaskAwaitable.ConfiguredTaskAwaiter) ||
410-
typeof(TAwaiter) == typeof(ConfiguredTaskAwaitable<object>.ConfiguredTaskAwaiter) ||
411-
typeof(TAwaiter) == typeof(ConfiguredTaskAwaitable<string>.ConfiguredTaskAwaiter) ||
412-
typeof(TAwaiter) == typeof(ConfiguredTaskAwaitable<byte[]>.ConfiguredTaskAwaiter) ||
413-
typeof(TAwaiter) == typeof(ConfiguredTaskAwaitable<bool>.ConfiguredTaskAwaiter) ||
414-
typeof(TAwaiter) == typeof(ConfiguredTaskAwaitable<byte>.ConfiguredTaskAwaiter) ||
415-
typeof(TAwaiter) == typeof(ConfiguredTaskAwaitable<int>.ConfiguredTaskAwaiter) ||
416-
typeof(TAwaiter) == typeof(ConfiguredTaskAwaitable<long>.ConfiguredTaskAwaiter))
397+
else if ((null != (object)default(TAwaiter)) && (awaiter is IConfiguredTaskAwaiter))
417398
{
418399
ref ConfiguredTaskAwaitable.ConfiguredTaskAwaiter ta = ref Unsafe.As<TAwaiter, ConfiguredTaskAwaitable.ConfiguredTaskAwaiter>(ref awaiter);
419400
TaskAwaiter.UnsafeOnCompletedInternal(ta.m_task, box, ta.m_continueOnCapturedContext);
@@ -450,21 +431,6 @@ public void AwaitUnsafeOnCompleted<TAwaiter, TStateMachine>(
450431
TaskAwaiter.UnsafeOnCompletedInternal(vta.AsTask(), box, vta._continueOnCapturedContext);
451432
}
452433

453-
// To catch all Task/Task<T> awaits, do the currently more expensive interface checks.
454-
// Eventually these and the above Task/Task<T> checks should be replaced by "is" checks,
455-
// once that's recognized and optimized by the JIT. We do these after all of the hardcoded
456-
// checks above so that they don't incur the costs of these checks.
457-
else if (InterfaceIsCheckWorkaround<TAwaiter>.IsITaskAwaiter)
458-
{
459-
ref TaskAwaiter ta = ref Unsafe.As<TAwaiter, TaskAwaiter>(ref awaiter);
460-
TaskAwaiter.UnsafeOnCompletedInternal(ta.m_task, box, continueOnCapturedContext: true);
461-
}
462-
else if (InterfaceIsCheckWorkaround<TAwaiter>.IsIConfiguredTaskAwaiter)
463-
{
464-
ref ConfiguredTaskAwaitable.ConfiguredTaskAwaiter ta = ref Unsafe.As<TAwaiter, ConfiguredTaskAwaitable.ConfiguredTaskAwaiter>(ref awaiter);
465-
TaskAwaiter.UnsafeOnCompletedInternal(ta.m_task, box, ta.m_continueOnCapturedContext);
466-
}
467-
468434
// The awaiter isn't specially known. Fall back to doing a normal await.
469435
else
470436
{
@@ -922,13 +888,6 @@ internal static Task<TResult> CreateCacheableTask<TResult>(TResult result) =>
922888
new Task<TResult>(false, result, (TaskCreationOptions)InternalTaskOptions.DoNotDispose, default(CancellationToken));
923889
}
924890

925-
/// <summary>Temporary workaround for https://github.com/dotnet/coreclr/issues/12877.</summary>
926-
internal static class InterfaceIsCheckWorkaround<TAwaiter>
927-
{
928-
internal static readonly bool IsITaskAwaiter = typeof(TAwaiter).GetInterface("ITaskAwaiter") != null;
929-
internal static readonly bool IsIConfiguredTaskAwaiter = typeof(TAwaiter).GetInterface("IConfiguredTaskAwaiter") != null;
930-
}
931-
932891
/// <summary>
933892
/// An interface implemented by all <see cref="AsyncStateMachineBox{TStateMachine, TResult}"/> instances, regardless of generics.
934893
/// </summary>

0 commit comments

Comments
 (0)