Skip to content

Commit 82f7100

Browse files
authored
Fix x64 and x86 emulation on arm64 Windows (dotnet#72693)
* Fix x64 and x86 emulation on arm64 Windows The runtime was hanging or crashing when running on x64 and x86 emulation on Windows ARM64. Most of these failures were caused by missing cache flushes after code updates where the emulator kept executing arm64 code that it has jitted from the previous state of the code. There were also two CONTEXT related issues. One was that we were assuming that AVX state is always supported. However, the emulator doesn't support it, so SetXStateFeaturesMask(pCtx, XSTATE_MASK_AVX) was failing and we have considered that a failure to capture thread context. The other one was that we have assumed that the CONTEXT we get in Thread::RedirectCurrentThreadAtHandledJITCase is CONTEXT_COMPLETE, but with the emulation, we one get CONTEXT_FULL, that means CONTEXT_COMPLETE without the debug registers. There were also two CoreCLR tests issues that caused failures under the emulation, I have fixed those. * Add hasCodeExecutedBefore argument to ClrFlushInstructionCache
1 parent 1926f09 commit 82f7100

File tree

15 files changed

+60
-26
lines changed

15 files changed

+60
-26
lines changed

src/coreclr/vm/amd64/cgenamd64.cpp

+2-1
Original file line numberDiff line numberDiff line change
@@ -548,6 +548,7 @@ void UMEntryThunkCode::Encode(UMEntryThunkCode *pEntryThunkCodeRX, BYTE* pTarget
548548
m_jmpRAX[2] = 0xE0;
549549

550550
_ASSERTE(DbgIsExecutable(&pEntryThunkCodeRX->m_movR10[0], &pEntryThunkCodeRX->m_jmpRAX[3]-&pEntryThunkCodeRX->m_movR10[0]));
551+
FlushInstructionCache(GetCurrentProcess(),pEntryThunkCodeRX,sizeof(UMEntryThunkCode));
551552
}
552553

553554
void UMEntryThunkCode::Poison()
@@ -574,7 +575,7 @@ void UMEntryThunkCode::Poison()
574575
pThisRW->m_movR10[1] = 0xBF;
575576
#endif
576577

577-
ClrFlushInstructionCache(&m_movR10[0], &m_jmpRAX[3]-&m_movR10[0]);
578+
ClrFlushInstructionCache(&m_movR10[0], &m_jmpRAX[3]-&m_movR10[0], /* hasCodeExecutedBefore */ true);
578579
}
579580

580581
UMEntryThunk* UMEntryThunk::Decode(LPVOID pCallback)

src/coreclr/vm/amd64/cgencpu.h

+9-3
Original file line numberDiff line numberDiff line change
@@ -534,10 +534,16 @@ DWORD GetOffsetAtEndOfFunction(ULONGLONG uImageBase,
534534
// Currently ClrFlushInstructionCache has no effect on AMD64
535535
//
536536

537-
inline BOOL ClrFlushInstructionCache(LPCVOID pCodeAddr, size_t sizeOfCode)
537+
inline BOOL ClrFlushInstructionCache(LPCVOID pCodeAddr, size_t sizeOfCode, bool hasCodeExecutedBefore = false)
538538
{
539-
// FlushInstructionCache(GetCurrentProcess(), pCodeAddr, sizeOfCode);
540-
MemoryBarrier();
539+
if (hasCodeExecutedBefore)
540+
{
541+
FlushInstructionCache(GetCurrentProcess(), pCodeAddr, sizeOfCode);
542+
}
543+
else
544+
{
545+
MemoryBarrier();
546+
}
541547
return TRUE;
542548
}
543549

src/coreclr/vm/amd64/jitinterfaceamd64.cpp

+1
Original file line numberDiff line numberDiff line change
@@ -296,6 +296,7 @@ int WriteBarrierManager::ChangeWriteBarrierTo(WriteBarrierType newWriteBarrier,
296296
{
297297
ExecutableWriterHolder<void> writeBarrierWriterHolder(GetWriteBarrierCodeLocation((void*)JIT_WriteBarrier), GetCurrentWriteBarrierSize());
298298
memcpy(writeBarrierWriterHolder.GetRW(), (LPVOID)GetCurrentWriteBarrierCode(), GetCurrentWriteBarrierSize());
299+
stompWBCompleteActions |= SWB_ICACHE_FLUSH;
299300
}
300301

301302
switch (newWriteBarrier)

src/coreclr/vm/arm/cgencpu.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -998,7 +998,7 @@ struct HijackArgs
998998
// Currently ClrFlushInstructionCache has no effect on X86
999999
//
10001000

1001-
inline BOOL ClrFlushInstructionCache(LPCVOID pCodeAddr, size_t sizeOfCode)
1001+
inline BOOL ClrFlushInstructionCache(LPCVOID pCodeAddr, size_t sizeOfCode, bool hasCodeExecutedBefore = false)
10021002
{
10031003
return FlushInstructionCache(GetCurrentProcess(), pCodeAddr, sizeOfCode);
10041004
}

src/coreclr/vm/arm64/cgencpu.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -277,7 +277,7 @@ inline NEON128 GetSimdMem(PCODE ip)
277277
void emitCOMStubCall (ComCallMethodDesc *pCOMMethodRX, ComCallMethodDesc *pCOMMethodRW, PCODE target);
278278
#endif // FEATURE_COMINTEROP
279279

280-
inline BOOL ClrFlushInstructionCache(LPCVOID pCodeAddr, size_t sizeOfCode)
280+
inline BOOL ClrFlushInstructionCache(LPCVOID pCodeAddr, size_t sizeOfCode, bool hasCodeExecutedBefore = false)
281281
{
282282
return FlushInstructionCache(GetCurrentProcess(), pCodeAddr, sizeOfCode);
283283
}

src/coreclr/vm/comcallablewrapper.cpp

+1
Original file line numberDiff line numberDiff line change
@@ -566,6 +566,7 @@ extern "C" PCODE ComPreStubWorker(ComPrestubMethodFrame *pPFrame, UINT64 *pError
566566
#else
567567
*ppofsWriterHolder.GetRW() = ((UINT_PTR)pStub);
568568
#endif
569+
ClrFlushInstructionCache(ppofs, sizeof(UINT_PTR), /* hasCodeExecutedBefore */ true);
569570

570571
// Return the address of the prepad. The prepad will regenerate the hidden parameter and due
571572
// to the update above will execute the new stub code the second time around.

src/coreclr/vm/i386/cgencpu.h

+9-3
Original file line numberDiff line numberDiff line change
@@ -501,10 +501,16 @@ struct HijackArgs
501501
// Currently ClrFlushInstructionCache has no effect on X86
502502
//
503503

504-
inline BOOL ClrFlushInstructionCache(LPCVOID pCodeAddr, size_t sizeOfCode)
504+
inline BOOL ClrFlushInstructionCache(LPCVOID pCodeAddr, size_t sizeOfCode, bool hasCodeExecutedBefore = false)
505505
{
506-
// FlushInstructionCache(GetCurrentProcess(), pCodeAddr, sizeOfCode);
507-
MemoryBarrier();
506+
if (hasCodeExecutedBefore)
507+
{
508+
FlushInstructionCache(GetCurrentProcess(), pCodeAddr, sizeOfCode);
509+
}
510+
else
511+
{
512+
MemoryBarrier();
513+
}
508514
return TRUE;
509515
}
510516

src/coreclr/vm/i386/cgenx86.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -1157,7 +1157,7 @@ void UMEntryThunkCode::Encode(UMEntryThunkCode *pEntryThunkCodeRX, BYTE* pTarget
11571157
m_jmp = X86_INSTR_JMP_REL32;
11581158
m_execstub = (BYTE*) ((pTargetCode) - (4+((BYTE*)&pEntryThunkCodeRX->m_execstub)));
11591159

1160-
FlushInstructionCache(GetCurrentProcess(),pEntryThunkCodeRX->GetEntryPoint(),sizeof(UMEntryThunkCode));
1160+
ClrFlushInstructionCache(pEntryThunkCodeRX->GetEntryPoint(),sizeof(UMEntryThunkCode) - GetEntryPointOffset(), /* hasCodeExecutedBefore */ true);
11611161
}
11621162

11631163
void UMEntryThunkCode::Poison()
@@ -1172,7 +1172,7 @@ void UMEntryThunkCode::Poison()
11721172
// mov ecx, imm32
11731173
pThisRW->m_movEAX = 0xb9;
11741174

1175-
ClrFlushInstructionCache(GetEntryPoint(),sizeof(UMEntryThunkCode));
1175+
ClrFlushInstructionCache(GetEntryPoint(),sizeof(UMEntryThunkCode) - GetEntryPointOffset(), /* hasCodeExecutedBefore */ true);
11761176
}
11771177

11781178
UMEntryThunk* UMEntryThunk::Decode(LPVOID pCallback)

src/coreclr/vm/i386/jitinterfacex86.cpp

+22-6
Original file line numberDiff line numberDiff line change
@@ -1326,7 +1326,11 @@ int StompWriteBarrierResize(bool isRuntimeSuspended, bool bReqUpperBoundsCheck)
13261326
// cmp offset[edx], 0ffh instruction
13271327
_ASSERTE(pBuf[22] == 0x80);
13281328
pfunc = (size_t *) &pBufRW[PostGrow_CardTableFirstLocation];
1329-
*pfunc = (size_t) g_card_table;
1329+
if (*pfunc != (size_t) g_card_table)
1330+
{
1331+
stompWBCompleteActions |= SWB_ICACHE_FLUSH;
1332+
*pfunc = (size_t) g_card_table;
1333+
}
13301334

13311335
// What we're trying to update is the offset field of a
13321336
// mov offset[edx], 0ffh instruction
@@ -1341,7 +1345,11 @@ int StompWriteBarrierResize(bool isRuntimeSuspended, bool bReqUpperBoundsCheck)
13411345
// cmp offset[edx], 0ffh instruction
13421346
_ASSERTE(pBuf[14] == 0x80);
13431347
pfunc = (size_t *) &pBufRW[PreGrow_CardTableFirstLocation];
1344-
*pfunc = (size_t) g_card_table;
1348+
if (*pfunc != (size_t) g_card_table)
1349+
{
1350+
stompWBCompleteActions |= SWB_ICACHE_FLUSH;
1351+
*pfunc = (size_t) g_card_table;
1352+
}
13451353

13461354
// What we're trying to update is the offset field of a
13471355

@@ -1357,7 +1365,11 @@ int StompWriteBarrierResize(bool isRuntimeSuspended, bool bReqUpperBoundsCheck)
13571365
// cmp offset[edx], 0ffh instruction
13581366
_ASSERTE(pBuf[22] == 0x80);
13591367
pfunc = (size_t *) &pBufRW[PostGrow_CardTableFirstLocation];
1360-
*pfunc = (size_t) g_card_table;
1368+
if (*pfunc != (size_t) g_card_table)
1369+
{
1370+
stompWBCompleteActions |= SWB_ICACHE_FLUSH;
1371+
*pfunc = (size_t) g_card_table;
1372+
}
13611373

13621374
// What we're trying to update is the offset field of a
13631375
// mov offset[edx], 0ffh instruction
@@ -1366,7 +1378,11 @@ int StompWriteBarrierResize(bool isRuntimeSuspended, bool bReqUpperBoundsCheck)
13661378
}
13671379

13681380
// Stick in the adjustment value.
1369-
*pfunc = (size_t) g_card_table;
1381+
if (*pfunc != (size_t) g_card_table)
1382+
{
1383+
stompWBCompleteActions |= SWB_ICACHE_FLUSH;
1384+
*pfunc = (size_t) g_card_table;
1385+
}
13701386
}
13711387

13721388
if (bStompWriteBarrierEphemeral)
@@ -1379,7 +1395,7 @@ int StompWriteBarrierResize(bool isRuntimeSuspended, bool bReqUpperBoundsCheck)
13791395

13801396
void FlushWriteBarrierInstructionCache()
13811397
{
1382-
FlushInstructionCache(GetCurrentProcess(), (void *)JIT_PatchedWriteBarrierGroup,
1383-
(BYTE*)JIT_PatchedWriteBarrierGroup_End - (BYTE*)JIT_PatchedWriteBarrierGroup);
1398+
ClrFlushInstructionCache(GetWriteBarrierCodeLocation((BYTE*)JIT_PatchedWriteBarrierGroup),
1399+
(BYTE*)JIT_PatchedWriteBarrierGroup_End - (BYTE*)JIT_PatchedWriteBarrierGroup, /* hasCodeExecutedBefore */ true);
13841400
}
13851401

src/coreclr/vm/jitinterface.cpp

+2-1
Original file line numberDiff line numberDiff line change
@@ -12954,7 +12954,8 @@ PCODE UnsafeJitFunction(PrepareCodeConfig* config,
1295412954
//DbgPrintf("Jitted Entry at" FMT_ADDR "method %s::%s %s size %08x\n", DBG_ADDR(nativeEntry),
1295512955
// pszDebugClassName, pszDebugMethodName, pszDebugMethodSignature, sizeOfCode);
1295612956

12957-
ClrFlushInstructionCache(nativeEntry, sizeOfCode);
12957+
// For dynamic method, the code memory may be reused, thus we are passing in the hasCodeExecutedBefore set to true
12958+
ClrFlushInstructionCache(nativeEntry, sizeOfCode, /* hasCodeExecutedBefore */ true);
1295812959
ret = (PCODE)nativeEntry;
1295912960

1296012961
#ifdef TARGET_ARM

src/coreclr/vm/loongarch64/cgencpu.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -226,7 +226,7 @@ inline TADDR GetMem(PCODE address, SIZE_T size, bool signExtend)
226226
void emitCOMStubCall (ComCallMethodDesc *pCOMMethodRX, ComCallMethodDesc *pCOMMethodRW, PCODE target);
227227
#endif // FEATURE_COMINTEROP
228228

229-
inline BOOL ClrFlushInstructionCache(LPCVOID pCodeAddr, size_t sizeOfCode)
229+
inline BOOL ClrFlushInstructionCache(LPCVOID pCodeAddr, size_t sizeOfCode, bool hasCodeExecutedBefore = false)
230230
{
231231
return FlushInstructionCache(GetCurrentProcess(), pCodeAddr, sizeOfCode);
232232
}

src/coreclr/vm/precode.cpp

+2-1
Original file line numberDiff line numberDiff line change
@@ -350,6 +350,7 @@ BOOL Precode::SetTargetInterlocked(PCODE target, BOOL fOnlyRedirectFromPrestub)
350350
#ifdef HAS_THISPTR_RETBUF_PRECODE
351351
case PRECODE_THISPTR_RETBUF:
352352
ret = AsThisPtrRetBufPrecode()->SetTargetInterlocked(target, expected);
353+
ClrFlushInstructionCache(this, sizeof(ThisPtrRetBufPrecode), /* hasCodeExecutedBefore */ true);
353354
break;
354355
#endif // HAS_THISPTR_RETBUF_PRECODE
355356

@@ -381,7 +382,7 @@ void Precode::Reset()
381382
{
382383
ExecutableWriterHolder<Precode> precodeWriterHolder(this, size);
383384
precodeWriterHolder.GetRW()->Init(this, t, pMD, pMD->GetLoaderAllocator());
384-
ClrFlushInstructionCache(this, SizeOf());
385+
ClrFlushInstructionCache(this, SizeOf(), /* hasCodeExecutedBefore */ true);
385386
}
386387
}
387388

src/coreclr/vm/threadsuspend.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -2880,7 +2880,7 @@ BOOL Thread::RedirectThreadAtHandledJITCase(PFN_REDIRECTTARGET pTgt)
28802880
// This should not normally fail.
28812881
// The system silently ignores any feature specified in the FeatureMask
28822882
// which is not enabled on the processor.
2883-
bRes &= SetXStateFeaturesMask(pCtx, XSTATE_MASK_AVX);
2883+
SetXStateFeaturesMask(pCtx, XSTATE_MASK_AVX);
28842884
#endif //defined(TARGET_X86) || defined(TARGET_AMD64)
28852885

28862886
// Make sure we specify CONTEXT_EXCEPTION_REQUEST to detect "trap frame reporting".
@@ -2981,7 +2981,7 @@ BOOL Thread::RedirectCurrentThreadAtHandledJITCase(PFN_REDIRECTTARGET pTgt, CONT
29812981
_ASSERTE(PreemptiveGCDisabledOther());
29822982
_ASSERTE(IsAddrOfRedirectFunc(pTgt));
29832983
_ASSERTE(pCurrentThreadCtx);
2984-
_ASSERTE((pCurrentThreadCtx->ContextFlags & CONTEXT_COMPLETE) == CONTEXT_COMPLETE);
2984+
_ASSERTE((pCurrentThreadCtx->ContextFlags & CONTEXT_FULL) == CONTEXT_FULL);
29852985
_ASSERTE(ExecutionManager::IsManagedCode(GetIP(pCurrentThreadCtx)));
29862986

29872987
////////////////////////////////////////////////////////////////

src/tests/JIT/HardwareIntrinsics/X86/X86Base/CpuId.cs

+3-2
Original file line numberDiff line numberDiff line change
@@ -28,11 +28,12 @@ static unsafe int Main(string[] args)
2828

2929
bool isAuthenticAmd = (ebx == 0x68747541) && (ecx == 0x444D4163) && (edx == 0x69746E65);
3030
bool isGenuineIntel = (ebx == 0x756E6547) && (ecx == 0x6C65746E) && (edx == 0x49656E69);
31+
bool isVirtualCPU = (ebx == 0x74726956) && (ecx == 0x20555043) && (edx == 0x206C6175);
3132

32-
if (!isAuthenticAmd && !isGenuineIntel)
33+
if (!isAuthenticAmd && !isGenuineIntel && !isVirtualCPU)
3334
{
3435
// CPUID checks are vendor specific and aren't guaranteed to match up, even across Intel/AMD
35-
// as such, we limit ourselves to just AuthenticAMD and GenuineIntel right now. Any other
36+
// as such, we limit ourselves to just AuthenticAMD, GenuineIntel and "Virtual CPU" right now. Any other
3637
// vendors would need to be validated against the checks below and added to the list as necessary.
3738

3839
// An example of a difference is Intel/AMD for LZCNT. While the same underlying bit is used to

src/tests/JIT/Regression/JitBlue/Runtime_34587/Runtime_34587.cs

+2-2
Original file line numberDiff line numberDiff line change
@@ -422,13 +422,13 @@ static bool ValidateX86Base()
422422

423423
if (X86BaseIsSupported)
424424
{
425-
succeeded &= (RuntimeInformation.OSArchitecture == Architecture.X86) || (RuntimeInformation.OSArchitecture == Architecture.X64);
425+
succeeded &= (RuntimeInformation.ProcessArchitecture == Architecture.X86) || (RuntimeInformation.ProcessArchitecture == Architecture.X64);
426426
}
427427

428428
if (X86BaseX64IsSupported)
429429
{
430430
succeeded &= X86BaseIsSupported;
431-
succeeded &= (RuntimeInformation.OSArchitecture == Architecture.X64);
431+
succeeded &= (RuntimeInformation.ProcessArchitecture == Architecture.X64);
432432
}
433433

434434
return succeeded;

0 commit comments

Comments
 (0)