Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enable inlining P/Invokes into try blocks with no catch or filter clauses V2 #73661

Merged
merged 6 commits into from
Aug 23, 2022
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions src/coreclr/inc/corinfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -3244,4 +3244,9 @@ class ICorDynamicInfo : public ICorStaticInfo
//
#define IMAGE_REL_BASED_REL_THUMB_MOV32_PCREL 0x14

/**********************************************************************************/
#ifdef TARGET_64BIT
#define USE_PER_FRAME_PINVOKE_INIT
#endif

#endif // _COR_INFO_H_
50 changes: 26 additions & 24 deletions src/coreclr/jit/importer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8428,41 +8428,43 @@ bool Compiler::impCanPInvokeInlineCallSite(BasicBlock* block)
return true;
}

#ifdef TARGET_64BIT
// On 64-bit platforms, we disable pinvoke inlining inside of try regions.
// Note that this could be needed on other architectures too, but we
// haven't done enough investigation to know for sure at this point.
//
// Here is the comment from JIT64 explaining why:
// [VSWhidbey: 611015] - because the jitted code links in the
// Frame (instead of the stub) we rely on the Frame not being
// 'active' until inside the stub. This normally happens by the
// stub setting the return address pointer in the Frame object
// inside the stub. On a normal return, the return address
// pointer is zeroed out so the Frame can be safely re-used, but
// if an exception occurs, nobody zeros out the return address
// pointer. Thus if we re-used the Frame object, it would go
// 'active' as soon as we link it into the Frame chain.
//
// Technically we only need to disable PInvoke inlining if we're
// in a handler or if we're in a try body with a catch or
// filter/except where other non-handler code in this method
// might run and try to re-use the dirty Frame object.
//
// A desktop test case where this seems to matter is
// jit\jit64\ebvts\mcpp\sources2\ijw\__clrcall\vector_ctor_dtor.02\deldtor_clr.exe
#ifdef USE_PER_FRAME_PINVOKE_INIT
// For platforms that use per-P/Invoke InlinedCallFrame initialization,
// we can't inline P/Invokes inside of try blocks where we can resume execution in the same function.
// The runtime can correctly unwind out of an InlinedCallFrame and out of managed code. However,
// it cannot correctly unwind out of an InlinedCallFrame and stop at that frame without also unwinding
// at least one managed frame. In particular, the runtime struggles to restore non-volatile registers
// from the top-most unmanaged call before the InlinedCallFrame. As a result, the runtime does not support
// re-entering the same method frame as the InlinedCallFrame after an exception in unmanaged code.
if (block->hasTryIndex())
{
// This does not apply to the raw pinvoke call that is inside the pinvoke
// ILStub. In this case, we have to inline the raw pinvoke call into the stub,
// otherwise we would end up with a stub that recursively calls itself, and end
// up with a stack overflow.
// This works correctly because the runtime never emits a catch block in a managed-to-native
// IL stub. If the runtime ever emits a catch block into a managed-to-native stub when using
// P/Invoke helpers, this condition will need to be revisited.
if (opts.jitFlags->IsSet(JitFlags::JIT_FLAG_IL_STUB) && opts.ShouldUsePInvokeHelpers())
{
return true;
}

return false;
// Check if this block's try block or any containing try blocks have catch handlers.
// If any of the containing try blocks have catch handlers,
// we cannot inline a P/Invoke for reasons above. If the handler is a fault or finally handler,
// we can inline a P/Invoke into this block in the try since the code will not resume execution
// in the same method after throwing an exception if only fault or finally handlers are executed.
for (unsigned int ehIndex = block->getTryIndex(); ehIndex != EHblkDsc::NO_ENCLOSING_INDEX;
ehIndex = ehGetEnclosingTryIndex(ehIndex))
{
if (ehGetDsc(ehIndex)->HasCatchHandler())
{
return false;
}
}

return true;
}
#endif // TARGET_64BIT

Expand Down
21 changes: 13 additions & 8 deletions src/coreclr/jit/lower.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4281,6 +4281,7 @@ GenTree* Lowering::CreateFrameLinkUpdate(FrameLinkAction action)
// Return Value:
// none
//
// See the usages for USE_PER_FRAME_PINVOKE_INIT for more information.
void Lowering::InsertPInvokeMethodProlog()
{
noway_assert(comp->info.compUnmanagedCallCountWithGCTransition);
Expand Down Expand Up @@ -4377,13 +4378,16 @@ void Lowering::InsertPInvokeMethodProlog()
// --------------------------------------------------------
// On 32-bit targets, CORINFO_HELP_INIT_PINVOKE_FRAME initializes the PInvoke frame and then pushes it onto
// the current thread's Frame stack. On 64-bit targets, it only initializes the PInvoke frame.
// As a result, don't push the frame onto the frame stack here for any 64-bit targets
CLANG_FORMAT_COMMENT_ANCHOR;

#ifdef TARGET_64BIT
#ifdef USE_PER_FRAME_PINVOKE_INIT
// For IL stubs, we push the frame once even when we're doing per-pinvoke init.
if (comp->opts.jitFlags->IsSet(JitFlags::JIT_FLAG_IL_STUB))
#endif // USE_PER_FRAME_PINVOKE_INIT
{
// Push a frame - if we are NOT in an IL stub, this is done right before the call
// The init routine sets InlinedCallFrame's m_pNext, so we just set the thead's top-of-stack
// Push a frame. The init routine sets InlinedCallFrame's m_pNext, so we just set the thread's top-of-stack
GenTree* frameUpd = CreateFrameLinkUpdate(PushFrame);
firstBlockRange.InsertBefore(insertionPoint, LIR::SeqTree(comp, frameUpd));
ContainCheckStoreIndir(frameUpd->AsStoreInd());
Expand Down Expand Up @@ -4443,9 +4447,10 @@ void Lowering::InsertPInvokeMethodEpilog(BasicBlock* returnBB DEBUGARG(GenTree*
// this in the epilog for IL stubs; for non-IL stubs the frame is popped after every PInvoke call.
CLANG_FORMAT_COMMENT_ANCHOR;

#ifdef TARGET_64BIT
#ifdef USE_PER_FRAME_PINVOKE_INIT
// For IL stubs, we push the frame once even when we're doing per-pinvoke init
if (comp->opts.jitFlags->IsSet(JitFlags::JIT_FLAG_IL_STUB))
#endif // TARGET_64BIT
#endif // USE_PER_FRAME_PINVOKE_INIT
{
GenTree* frameUpd = CreateFrameLinkUpdate(PopFrame);
returnBlockRange.InsertBefore(insertionPoint, LIR::SeqTree(comp, frameUpd));
Expand Down Expand Up @@ -4601,7 +4606,7 @@ void Lowering::InsertPInvokeCallProlog(GenTreeCall* call)
// contains PInvokes; on 64-bit targets this is necessary in non-stubs.
CLANG_FORMAT_COMMENT_ANCHOR;

#ifdef TARGET_64BIT
#ifdef USE_PER_FRAME_PINVOKE_INIT
if (!comp->opts.jitFlags->IsSet(JitFlags::JIT_FLAG_IL_STUB))
{
// Set the TCB's frame to be the one we just created.
Expand All @@ -4613,7 +4618,7 @@ void Lowering::InsertPInvokeCallProlog(GenTreeCall* call)
BlockRange().InsertBefore(insertBefore, LIR::SeqTree(comp, frameUpd));
ContainCheckStoreIndir(frameUpd->AsStoreInd());
}
#endif // TARGET_64BIT
#endif // USE_PER_FRAME_PINVOKE_INIT

// IMPORTANT **** This instruction must be the last real instruction ****
// It changes the thread's state to Preemptive mode
Expand Down Expand Up @@ -4679,7 +4684,7 @@ void Lowering::InsertPInvokeCallEpilog(GenTreeCall* call)
// this happens after every PInvoke call in non-stubs. 32-bit targets instead mark the frame as inactive.
CLANG_FORMAT_COMMENT_ANCHOR;

#ifdef TARGET_64BIT
#ifdef USE_PER_FRAME_PINVOKE_INIT
if (!comp->opts.jitFlags->IsSet(JitFlags::JIT_FLAG_IL_STUB))
{
tree = CreateFrameLinkUpdate(PopFrame);
Expand All @@ -4703,7 +4708,7 @@ void Lowering::InsertPInvokeCallEpilog(GenTreeCall* call)

BlockRange().InsertBefore(insertionPoint, constantZero, storeCallSiteTracker);
ContainCheckStoreLoc(storeCallSiteTracker);
#endif // TARGET_64BIT
#endif // USE_PER_FRAME_PINVOKE_INIT
}

//------------------------------------------------------------------------
Expand Down
43 changes: 31 additions & 12 deletions src/coreclr/vm/exceptionhandling.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#include "virtualcallstub.h"
#include "utilcode.h"
#include "interoplibinterface.h"
#include "corinfo.h"

#if defined(TARGET_X86)
#define USE_CURRENT_CONTEXT_IN_FILTER
Expand Down Expand Up @@ -1776,8 +1777,10 @@ CLRUnwindStatus ExceptionTracker::ProcessOSExceptionNotification(
// InlinedCallFrames (ICF) are allocated, initialized and linked to the Frame chain
// by the code generated by the JIT for a method containing a PInvoke.
//
// JIT generates code that links in the ICF at the start of the method and unlinks it towards
// the method end. Thus, ICF is present on the Frame chain at any given point so long as the
// On platforms where USE_PER_FRAME_PINVOKE_INIT is not defined,
// the JIT generates code that links in the ICF
// at the start of the method and unlinks it towards the method end.
// Thus, ICF is present on the Frame chain at any given point so long as the
// method containing the PInvoke is on the stack.
//
// Now, if the method containing ICF catches an exception, we will reset the Frame chain
Expand Down Expand Up @@ -1815,13 +1818,16 @@ CLRUnwindStatus ExceptionTracker::ProcessOSExceptionNotification(
// below the callerSP for which we will invoke ExceptionUnwind.
//
// Thus, ICF::ExceptionUnwind should not do anything significant. If any of these assumptions
// break, then the next best thing will be to make the JIT link/unlink the frame dynamically.
// break, then the next best thing will be to make the JIT link/unlink the frame dynamically
//
// If the current method executing is from precompiled ReadyToRun code, then the above is no longer
// applicable because each PInvoke is wrapped by calls to the JIT_PInvokeBegin and JIT_PInvokeEnd
// helpers, which push and pop the ICF to the current thread. Unlike jitted code, the ICF is not
// linked during the method prolog, and unlinked at the epilog (it looks more like the X64 case).
// If the current method executing is from precompiled ReadyToRun code, each PInvoke is wrapped
// by calls to the JIT_PInvokeBegin and JIT_PInvokeEnd helpers,
// which push and pop the ICF to the current thread. The ICF is not
// linked during the method prolog, and unlinked at the epilog.
// In that case, we need to unlink the ICF during unwinding here.
// On platforms where USE_PER_FRAME_PINVOKE_INIT is defined, the JIT generates code that links in
// the ICF immediately before and after a PInvoke in non-IL-stubs, like ReadyToRun.
// See the usages for USE_PER_FRAME_PINVOKE_INIT for more information.

if (fTargetUnwind && (pFrame->GetVTablePtr() == InlinedCallFrame::GetMethodFrameVPtr()))
{
Expand All @@ -1830,8 +1836,10 @@ CLRUnwindStatus ExceptionTracker::ProcessOSExceptionNotification(
//
// 1) ICF address is higher than the current frame's SP (which we get from DispatcherContext), AND
// 2) ICF address is below callerSP.
if ((GetSP(pDispatcherContext->ContextRecord) < (TADDR)pICF) &&
((UINT_PTR)pICF < uCallerSP))
// 3) ICF is active.
// -
if ((GetSP(pDispatcherContext->ContextRecord) < (TADDR)pICF)
&& ((UINT_PTR)pICF < uCallerSP))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So we don't link the ICF frame for the whole stub on any platform in any case anymore?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We still link it for the whole stub on 32-bit platforms. This condition didn't change between main and this PR (the V1 version of the change added the IsActiveCall check, but the V2 version doesn't include it).

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'll get rid of the diff here since I'm not changing the logic any more

jkoritzinsky marked this conversation as resolved.
Show resolved Hide resolved
{
pICFForUnwindTarget = pFrame;

Expand All @@ -1840,9 +1848,20 @@ CLRUnwindStatus ExceptionTracker::ProcessOSExceptionNotification(
// to the JIT_PInvokeBegin and JIT_PInvokeEnd helpers, which push and pop the ICF on the thread. The
// ICF is not linked at the method prolog and unlined at the epilog when running R2R code. Since the
// JIT_PInvokeEnd helper will be skipped, we need to unlink the ICF here. If the executing method
// has another pinovoke, it will re-link the ICF again when the JIT_PInvokeBegin helper is called

if (ExecutionManager::IsReadyToRunCode(((InlinedCallFrame*)pFrame)->m_pCallerReturnAddress))
// has another pinvoke, it will re-link the ICF again when the JIT_PInvokeBegin helper is called.

TADDR returnAddress = ((InlinedCallFrame*)pFrame)->m_pCallerReturnAddress;
#ifdef USE_PER_FRAME_PINVOKE_INIT
// If we're setting up the frame for each P/Invoke for the given platform,
// then we do this for all P/Invokes except ones in IL stubs.
// IL stubs link the frame in for the whole stub, so if an exception is thrown during marshalling,
// the ICF will be on the frame chain and inactive.
if (returnAddress != NULL && !ExecutionManager::GetCodeMethodDesc(returnAddress)->IsILStub())
#else
// If we aren't setting up the frame for each P/Invoke (instead setting up once per method),
// then ReadyToRun code is the only code using the per-P/Invoke logic.
if (ExecutionManager::IsReadyToRunCode(returnAddress))
#endif
{
pICFForUnwindTarget = pICFForUnwindTarget->Next();
}
Expand Down
21 changes: 18 additions & 3 deletions src/coreclr/vm/i386/excepx86.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
#include "eeconfig.h"
#include "vars.hpp"
#include "generics.h"
#include "corinfo.h"

#include "asmconstants.h"
#include "virtualcallstub.h"
Expand Down Expand Up @@ -2971,6 +2972,8 @@ void ResumeAtJitEH(CrawlFrame* pCf,
// InlinedCallFrame somewhere up the call chain that is not related to the current exception
// handling.

// See the usages for USE_PER_FRAME_PINVOKE_INIT for more information.

#ifdef DEBUG
TADDR handlerFrameSP = pCf->GetRegisterSet()->SP;
#endif // DEBUG
Expand All @@ -2982,10 +2985,22 @@ void ResumeAtJitEH(CrawlFrame* pCf,
NULL /* StackwalkCacheUnwindInfo* */);
_ASSERTE(unwindSuccess);

if (((TADDR)pThread->m_pFrame < pCf->GetRegisterSet()->SP) && ExecutionManager::IsReadyToRunCode(((InlinedCallFrame*)pThread->m_pFrame)->m_pCallerReturnAddress))
if (((TADDR)pThread->m_pFrame < pCf->GetRegisterSet()->SP))
{
_ASSERTE((TADDR)pThread->m_pFrame >= handlerFrameSP);
pThread->m_pFrame->Pop(pThread);
TADDR returnAddress = ((InlinedCallFrame*)pThread->m_pFrame)->m_pCallerReturnAddress;
#ifdef USE_PER_FRAME_PINVOKE_INIT
// If we're setting up the frame for each P/Invoke for the given platform,
// then we do this for all P/Invokes except ones in IL stubs.
if (returnAddress != NULL && !ExecutionManager::GetCodeMethodDesc(returnAddress)->IsILStub())
#else
// If we aren't setting up the frame for each P/Invoke (instead setting up once per method),
// then ReadyToRun code is the only code using the per-P/Invoke logic.
if (ExecutionManager::IsReadyToRunCode(returnAddress))
#endif
{
_ASSERTE((TADDR)pThread->m_pFrame >= handlerFrameSP);
pThread->m_pFrame->Pop(pThread);
}
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -122,4 +122,38 @@ public static void ThrowNativeExceptionAndCatchInFrameWithFinally()

Assert.True(caughtException);
}

[Fact]
[PlatformSpecific(TestPlatforms.Windows)]
[SkipOnMono("Exception interop not supported on Mono.")]
public static void ThrowNativeExceptionInFrameWithFinallyCatchInOuterFrame()
{
bool caughtException = false;
try
{
ThrowInFrameWithFinally();
}
catch
{
caughtException = true;
}

Assert.True(caughtException);

[MethodImpl(MethodImplOptions.NoInlining)]
static void ThrowInFrameWithFinally()
{
try
{
ThrowException();
}
finally
{
// Try calling another P/Invoke in the finally block before the catch
// to make sure we have everything set up
// to recover from the exceptional control flow.
NativeFunction();
}
}
}
}
3 changes: 0 additions & 3 deletions src/tests/issues.targets
Original file line number Diff line number Diff line change
Expand Up @@ -109,9 +109,6 @@
<ExcludeList Include="$(XunitTestBinBase)/GC/API/GC/GetAllocatedBytesForCurrentThread/*">
<Issue>https://github.com/dotnet/runtime/issues/</Issue>
</ExcludeList>
<ExcludeList Include="$(XunitTestBinBase)/GC/API/GC/GetGCMemoryInfo/**">
<Issue>https://github.com/dotnet/runtime/issues/73247</Issue>
</ExcludeList>
<ExcludeList Include="$(XunitTestBinBase)/GC/Features/HeapExpansion/bestfit-finalize/*">
<Issue>times out</Issue>
</ExcludeList>
Expand Down