Skip to content

Commit 21f23f9

Browse files
authored
[NativeAOT/x86] Load RtlRestoreContext dynamically and add fallback for old OSes (#99813)
* Load RtlRestoreContext dynamically and add fallback for old OSes * Port the RtlRestoreContext SEH fallback logic from CoreCLR * Remove unnecessary change * Remove ARM32 mention from a comment * Simplify implementations of GetCurrentSEHRecord, SetCurrentSEHRecord, and PopSEHRecords. Use the same implementation logic in CoreCLR and NativeAOT * Update comments
1 parent 0935105 commit 21f23f9

File tree

6 files changed

+215
-60
lines changed

6 files changed

+215
-60
lines changed

src/coreclr/nativeaot/Runtime/thread.cpp

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1107,6 +1107,44 @@ void Thread::SetActivationPending(bool isPending)
11071107
}
11081108
}
11091109

1110+
#ifdef TARGET_X86
1111+
1112+
void Thread::SetPendingRedirect(PCODE eip)
1113+
{
1114+
m_LastRedirectIP = eip;
1115+
m_SpinCount = 0;
1116+
}
1117+
1118+
bool Thread::CheckPendingRedirect(PCODE eip)
1119+
{
1120+
if (eip == m_LastRedirectIP)
1121+
{
1122+
// We need to test for an infinite loop in assembly, as this will break the heuristic we
1123+
// are using.
1124+
const BYTE short_jmp = 0xeb; // Machine code for a short jump.
1125+
const BYTE self = 0xfe; // -2. Short jumps are calculated as [ip]+2+[second_byte].
1126+
1127+
// If we find that we are in an infinite loop, we'll set the last redirected IP to 0 so that we will
1128+
// redirect the next time we attempt it. Delaying one interation allows us to narrow the window of
1129+
// the race we are working around in this corner case.
1130+
BYTE *ip = (BYTE *)m_LastRedirectIP;
1131+
if (ip[0] == short_jmp && ip[1] == self)
1132+
m_LastRedirectIP = 0;
1133+
1134+
// We set a hard limit of 5 times we will spin on this to avoid any tricky race which we have not
1135+
// accounted for.
1136+
m_SpinCount++;
1137+
if (m_SpinCount >= 5)
1138+
m_LastRedirectIP = 0;
1139+
1140+
return true;
1141+
}
1142+
1143+
return false;
1144+
}
1145+
1146+
#endif // TARGET_X86
1147+
11101148
#endif // !DACCESS_COMPILE
11111149

11121150
void Thread::ValidateExInfoStack()

src/coreclr/nativeaot/Runtime/thread.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,10 @@ struct ThreadBuffer
9797
uintptr_t m_uHijackedReturnValueFlags;
9898
PTR_ExInfo m_pExInfoStackHead;
9999
Object* m_threadAbortException; // ThreadAbortException instance -set only during thread abort
100+
#ifdef TARGET_X86
101+
PCODE m_LastRedirectIP;
102+
uint64_t m_SpinCount;
103+
#endif
100104
Object* m_pThreadLocalStatics;
101105
InlinedThreadStaticRoot* m_pInlinedThreadLocalStatics;
102106
GCFrameRegistration* m_pGCFrameRegistrations;
@@ -317,6 +321,11 @@ class Thread : private ThreadBuffer
317321

318322
bool IsActivationPending();
319323
void SetActivationPending(bool isPending);
324+
325+
#ifdef TARGET_X86
326+
void SetPendingRedirect(PCODE eip);
327+
bool CheckPendingRedirect(PCODE eip);
328+
#endif
320329
};
321330

322331
#ifndef __GCENV_BASE_INCLUDED__

src/coreclr/nativeaot/Runtime/windows/PalRedhawkMinWin.cpp

Lines changed: 151 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
#include "gcconfig.h"
2929

3030
#include "thread.h"
31+
#include "threadstore.h"
3132

3233
#define REDHAWK_PALEXPORT extern "C"
3334
#define REDHAWK_PALAPI __stdcall
@@ -322,10 +323,120 @@ REDHAWK_PALEXPORT HANDLE REDHAWK_PALAPI PalCreateEventW(_In_opt_ LPSECURITY_ATTR
322323
return CreateEventW(pEventAttributes, manualReset, initialState, pName);
323324
}
324325

326+
#ifdef TARGET_X86
327+
328+
#define EXCEPTION_HIJACK 0xe0434f4e // 0xe0000000 | 'COM'+1
329+
330+
PEXCEPTION_REGISTRATION_RECORD GetCurrentSEHRecord()
331+
{
332+
return (PEXCEPTION_REGISTRATION_RECORD)__readfsdword(0);
333+
}
334+
335+
VOID SetCurrentSEHRecord(EXCEPTION_REGISTRATION_RECORD *pSEH)
336+
{
337+
__writefsdword(0, (DWORD)pSEH);
338+
}
339+
340+
VOID PopSEHRecords(LPVOID pTargetSP)
341+
{
342+
PEXCEPTION_REGISTRATION_RECORD currentContext = GetCurrentSEHRecord();
343+
// The last record in the chain is EXCEPTION_CHAIN_END which is defined as maxiumum
344+
// pointer value so it cannot satisfy the loop condition.
345+
while (currentContext < pTargetSP)
346+
{
347+
currentContext = currentContext->Next;
348+
}
349+
SetCurrentSEHRecord(currentContext);
350+
}
351+
352+
// This will check who caused the exception. If it was caused by the redirect function,
353+
// the reason is to resume the thread back at the point it was redirected in the first
354+
// place. If the exception was not caused by the function, then it was caused by the call
355+
// out to the I[GC|Debugger]ThreadControl client and we need to determine if it's an
356+
// exception that we can just eat and let the runtime resume the thread, or if it's an
357+
// uncatchable exception that we need to pass on to the runtime.
358+
int RtlRestoreContextFallbackExceptionFilter(PEXCEPTION_POINTERS pExcepPtrs, CONTEXT *pCtx, Thread *pThread)
359+
{
360+
if (pExcepPtrs->ExceptionRecord->ExceptionCode == STATUS_STACK_OVERFLOW)
361+
{
362+
return EXCEPTION_CONTINUE_SEARCH;
363+
}
364+
365+
// Get the thread handle
366+
_ASSERTE(pExcepPtrs->ExceptionRecord->ExceptionCode == EXCEPTION_HIJACK);
367+
368+
// Copy everything in the saved context record into the EH context.
369+
// Historically the EH context has enough space for every enabled context feature.
370+
// That may not hold for the future features beyond AVX, but this codepath is
371+
// supposed to be used only on OSes that do not have RtlRestoreContext.
372+
CONTEXT* pTarget = pExcepPtrs->ContextRecord;
373+
if (!CopyContext(pTarget, pCtx->ContextFlags, pCtx))
374+
{
375+
PalPrintFatalError("Could not set context record.\n");
376+
RhFailFast();
377+
}
378+
379+
DWORD espValue = pCtx->Esp;
380+
381+
// NOTE: Ugly, ugly workaround.
382+
// We need to resume the thread into the managed code where it was redirected,
383+
// and the corresponding ESP is below the current one. But C++ expects that
384+
// on an EXCEPTION_CONTINUE_EXECUTION that the ESP will be above where it has
385+
// installed the SEH handler. To solve this, we need to remove all handlers
386+
// that reside above the resumed ESP, but we must leave the OS-installed
387+
// handler at the top, so we grab the top SEH handler, call
388+
// PopSEHRecords which will remove all SEH handlers above the target ESP and
389+
// then link the OS handler back in with SetCurrentSEHRecord.
390+
391+
// Get the special OS handler and save it until PopSEHRecords is done
392+
EXCEPTION_REGISTRATION_RECORD *pCurSEH = GetCurrentSEHRecord();
393+
394+
// Unlink all records above the target resume ESP
395+
PopSEHRecords((LPVOID)(size_t)espValue);
396+
397+
// Link the special OS handler back in to the top
398+
pCurSEH->Next = GetCurrentSEHRecord();
399+
400+
// Register the special OS handler as the top handler with the OS
401+
SetCurrentSEHRecord(pCurSEH);
402+
403+
// Resume execution at point where thread was originally redirected
404+
return EXCEPTION_CONTINUE_EXECUTION;
405+
}
406+
407+
EXTERN_C VOID __cdecl RtlRestoreContextFallback(PCONTEXT ContextRecord, struct _EXCEPTION_RECORD* ExceptionRecord)
408+
{
409+
Thread *pThread = ThreadStore::GetCurrentThread();
410+
411+
// A counter to avoid a nasty case where an
412+
// up-stack filter throws another exception
413+
// causing our filter to be run again for
414+
// some unrelated exception.
415+
int filter_count = 0;
416+
417+
__try
418+
{
419+
// Save the instruction pointer where we redirected last. This does not race with the check
420+
// against this variable because the GC will not attempt to redirect the thread until the
421+
// instruction pointer of this thread is back in managed code.
422+
pThread->SetPendingRedirect(ContextRecord->Eip);
423+
RaiseException(EXCEPTION_HIJACK, 0, 0, NULL);
424+
}
425+
__except (++filter_count == 1
426+
? RtlRestoreContextFallbackExceptionFilter(GetExceptionInformation(), ContextRecord, pThread)
427+
: EXCEPTION_CONTINUE_SEARCH)
428+
{
429+
_ASSERTE(!"Reached body of __except in RtlRestoreContextFallback");
430+
}
431+
}
432+
433+
#endif // TARGET_X86
434+
325435
typedef BOOL(WINAPI* PINITIALIZECONTEXT2)(PVOID Buffer, DWORD ContextFlags, PCONTEXT* Context, PDWORD ContextLength, ULONG64 XStateCompactionMask);
326436
PINITIALIZECONTEXT2 pfnInitializeContext2 = NULL;
327437

328438
#ifdef TARGET_X86
439+
EXTERN_C VOID __cdecl RtlRestoreContextFallback(PCONTEXT ContextRecord, struct _EXCEPTION_RECORD* ExceptionRecord);
329440
typedef VOID(__cdecl* PRTLRESTORECONTEXT)(PCONTEXT ContextRecord, struct _EXCEPTION_RECORD* ExceptionRecord);
330441
PRTLRESTORECONTEXT pfnRtlRestoreContext = NULL;
331442

@@ -356,6 +467,11 @@ REDHAWK_PALEXPORT CONTEXT* PalAllocateCompleteOSContext(_Out_ uint8_t** contextB
356467
{
357468
HMODULE hm = GetModuleHandleW(_T("ntdll.dll"));
358469
pfnRtlRestoreContext = (PRTLRESTORECONTEXT)GetProcAddress(hm, "RtlRestoreContext");
470+
if (pfnRtlRestoreContext == NULL)
471+
{
472+
// Fallback to the internal implementation if OS doesn't provide one.
473+
pfnRtlRestoreContext = RtlRestoreContextFallback;
474+
}
359475
}
360476
#endif //TARGET_X86
361477

@@ -438,7 +554,12 @@ REDHAWK_PALEXPORT _Success_(return) bool REDHAWK_PALAPI PalSetThreadContext(HAND
438554
REDHAWK_PALEXPORT void REDHAWK_PALAPI PalRestoreContext(CONTEXT * pCtx)
439555
{
440556
__asan_handle_no_return();
557+
#ifdef TARGET_X86
558+
_ASSERTE(pfnRtlRestoreContext != NULL);
559+
pfnRtlRestoreContext(pCtx, NULL);
560+
#else
441561
RtlRestoreContext(pCtx, NULL);
562+
#endif //TARGET_X86
442563
}
443564

444565
REDHAWK_PALIMPORT void REDHAWK_PALAPI PopulateControlSegmentRegisters(CONTEXT* pContext)
@@ -568,16 +689,41 @@ REDHAWK_PALEXPORT void REDHAWK_PALAPI PalHijack(HANDLE hThread, _In_opt_ void* p
568689

569690
if (GetThreadContext(hThread, &win32ctx))
570691
{
692+
bool isSafeToRedirect = true;
693+
694+
#ifdef TARGET_X86
695+
// Workaround around WOW64 problems. Only do this workaround if a) this is x86, and b) the OS does
696+
// not support trap frame reporting.
697+
if ((win32ctx.ContextFlags & CONTEXT_EXCEPTION_REPORTING) == 0)
698+
{
699+
// This code fixes a race between GetThreadContext and NtContinue. If we redirect managed code
700+
// at the same place twice in a row, we run the risk of reading a bogus CONTEXT when we redirect
701+
// the second time. This leads to access violations on x86 machines. To fix the problem, we
702+
// never redirect at the same instruction pointer that we redirected at on the previous GC.
703+
if (((Thread*)pThreadToHijack)->CheckPendingRedirect(win32ctx.Eip))
704+
{
705+
isSafeToRedirect = false;
706+
}
707+
}
708+
#else
709+
// In some cases Windows will not set the CONTEXT_EXCEPTION_REPORTING flag if the thread is executing
710+
// in kernel mode (i.e. in the middle of a syscall or exception handling). Therefore, we should treat
711+
// the absence of the CONTEXT_EXCEPTION_REPORTING flag as an indication that it is not safe to
712+
// manipulate with the current state of the thread context.
713+
isSafeToRedirect = (win32ctx.ContextFlags & CONTEXT_EXCEPTION_REPORTING) != 0;
714+
#endif
715+
571716
// The CONTEXT_SERVICE_ACTIVE and CONTEXT_EXCEPTION_ACTIVE output flags indicate we suspended the thread
572717
// at a point where the kernel cannot guarantee a completely accurate context. We'll fail the request in
573718
// this case (which should force our caller to resume the thread and try again -- since this is a fairly
574719
// narrow window we're highly likely to succeed next time).
575-
// Note: in some cases (x86 WOW64, ARM32 on ARM64) the OS will not set the CONTEXT_EXCEPTION_REPORTING flag
576-
// if the thread is executing in kernel mode (i.e. in the middle of a syscall or exception handling).
577-
// Therefore, we should treat the absence of the CONTEXT_EXCEPTION_REPORTING flag as an indication that
578-
// it is not safe to manipulate with the current state of the thread context.
579720
if ((win32ctx.ContextFlags & CONTEXT_EXCEPTION_REPORTING) != 0 &&
580-
((win32ctx.ContextFlags & (CONTEXT_SERVICE_ACTIVE | CONTEXT_EXCEPTION_ACTIVE)) == 0))
721+
((win32ctx.ContextFlags & (CONTEXT_SERVICE_ACTIVE | CONTEXT_EXCEPTION_ACTIVE)) != 0))
722+
{
723+
isSafeToRedirect = false;
724+
}
725+
726+
if (isSafeToRedirect)
581727
{
582728
g_pHijackCallback(&win32ctx, pThreadToHijack);
583729
}

src/coreclr/vm/excep.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -518,7 +518,7 @@ EXCEPTION_HANDLER_DECL(COMPlusFrameHandlerRevCom);
518518
#endif // FEATURE_COMINTEROP
519519

520520
// Pop off any SEH handlers we have registered below pTargetSP
521-
VOID __cdecl PopSEHRecords(LPVOID pTargetSP);
521+
VOID PopSEHRecords(LPVOID pTargetSP);
522522

523523
#ifdef DEBUGGING_SUPPORTED
524524
VOID UnwindExceptionTrackerAndResumeInInterceptionFrame(ExInfo* pExInfo, EHContext* context);

src/coreclr/vm/i386/excepx86.cpp

Lines changed: 11 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -1849,39 +1849,7 @@ PEXCEPTION_REGISTRATION_RECORD GetCurrentSEHRecord()
18491849
{
18501850
WRAPPER_NO_CONTRACT;
18511851

1852-
LPVOID fs0 = (LPVOID)__readfsdword(0);
1853-
1854-
#if 0 // This walk is too expensive considering we hit it every time we a CONTRACT(NOTHROW)
1855-
#ifdef _DEBUG
1856-
EXCEPTION_REGISTRATION_RECORD *pEHR = (EXCEPTION_REGISTRATION_RECORD *)fs0;
1857-
LPVOID spVal;
1858-
__asm {
1859-
mov spVal, esp
1860-
}
1861-
1862-
// check that all the eh frames are all greater than the current stack value. If not, the
1863-
// stack has been updated somehow w/o unwinding the SEH chain.
1864-
1865-
// LOG((LF_EH, LL_INFO1000000, "ER Chain:\n"));
1866-
while (pEHR != NULL && pEHR != EXCEPTION_CHAIN_END) {
1867-
// LOG((LF_EH, LL_INFO1000000, "\tp: prev:p handler:%x\n", pEHR, pEHR->Next, pEHR->Handler));
1868-
if (pEHR < spVal) {
1869-
if (gLastResumedExceptionFunc != 0)
1870-
_ASSERTE(!"Stack is greater than start of SEH chain - possible missing leave in handler. See gLastResumedExceptionHandler & gLastResumedExceptionFunc for info");
1871-
else
1872-
_ASSERTE(!"Stack is greater than start of SEH chain (FS:0)");
1873-
}
1874-
if (pEHR->Handler == (void *)-1)
1875-
_ASSERTE(!"Handler value has been corrupted");
1876-
1877-
_ASSERTE(pEHR < pEHR->Next);
1878-
1879-
pEHR = pEHR->Next;
1880-
}
1881-
#endif
1882-
#endif // 0
1883-
1884-
return (EXCEPTION_REGISTRATION_RECORD*) fs0;
1852+
return (PEXCEPTION_REGISTRATION_RECORD)__readfsdword(0);
18851853
}
18861854

18871855
PEXCEPTION_REGISTRATION_RECORD GetFirstCOMPlusSEHRecord(Thread *pThread) {
@@ -1917,29 +1885,23 @@ PEXCEPTION_REGISTRATION_RECORD GetPrevSEHRecord(EXCEPTION_REGISTRATION_RECORD *n
19171885
VOID SetCurrentSEHRecord(EXCEPTION_REGISTRATION_RECORD *pSEH)
19181886
{
19191887
WRAPPER_NO_CONTRACT;
1920-
*GetThread()->GetExceptionListPtr() = pSEH;
1888+
1889+
__writefsdword(0, (DWORD)pSEH);
19211890
}
19221891

1923-
// Note that this logic is copied below, in PopSEHRecords
1924-
__declspec(naked)
1925-
VOID __cdecl PopSEHRecords(LPVOID pTargetSP)
1892+
VOID PopSEHRecords(LPVOID pTargetSP)
19261893
{
1927-
// No CONTRACT possible on naked functions
19281894
STATIC_CONTRACT_NOTHROW;
19291895
STATIC_CONTRACT_GC_NOTRIGGER;
19301896

1931-
__asm{
1932-
mov ecx, [esp+4] ;; ecx <- pTargetSP
1933-
mov eax, fs:[0] ;; get current SEH record
1934-
poploop:
1935-
cmp eax, ecx
1936-
jge done
1937-
mov eax, [eax] ;; get next SEH record
1938-
jmp poploop
1939-
done:
1940-
mov fs:[0], eax
1941-
retn
1897+
PEXCEPTION_REGISTRATION_RECORD currentContext = GetCurrentSEHRecord();
1898+
// The last record in the chain is EXCEPTION_CHAIN_END which is defined as maxiumum
1899+
// pointer value so it cannot satisfy the loop condition.
1900+
while (currentContext < pTargetSP)
1901+
{
1902+
currentContext = currentContext->Next;
19421903
}
1904+
SetCurrentSEHRecord(currentContext);
19431905
}
19441906

19451907
//

src/coreclr/vm/threadsuspend.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1117,11 +1117,11 @@ BOOL Thread::IsContextSafeToRedirect(const CONTEXT* pContext)
11171117
#ifndef TARGET_UNIX
11181118

11191119
#if !defined(TARGET_X86)
1120-
// In some cases (x86 WOW64, ARM32 on ARM64) Windows will not set the CONTEXT_EXCEPTION_REPORTING flag
1121-
// if the thread is executing in kernel mode (i.e. in the middle of a syscall or exception handling).
1122-
// Therefore, we should treat the absence of the CONTEXT_EXCEPTION_REPORTING flag as an indication that
1123-
// it is not safe to manipulate with the current state of the thread context.
1124-
// Note: the x86 WOW64 case is already handled in GetSafelyRedirectableThreadContext; in addition, this
1120+
// In some cases Windows will not set the CONTEXT_EXCEPTION_REPORTING flag if the thread is executing
1121+
// in kernel mode (i.e. in the middle of a syscall or exception handling). Therefore, we should treat
1122+
// the absence of the CONTEXT_EXCEPTION_REPORTING flag as an indication that it is not safe to
1123+
// manipulate with the current state of the thread context.
1124+
// Note: The x86 WOW64 case is already handled in GetSafelyRedirectableThreadContext; in addition, this
11251125
// flag is never set on Windows7 x86 WOW64. So this check is valid for non-x86 architectures only.
11261126
isSafeToRedirect = (pContext->ContextFlags & CONTEXT_EXCEPTION_REPORTING) != 0;
11271127
#endif // !defined(TARGET_X86)

0 commit comments

Comments
 (0)