Skip to content

[NativeAOT] Thin locks #79519

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 25 commits into from
Dec 15, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
aaee546
switch to managed thread ID in Lock
VSadov Nov 23, 2022
2ddb01a
fattening the lock
VSadov Nov 23, 2022
7a49ffc
__declspec(selectany)
VSadov Dec 7, 2022
700d437
few tweaks
VSadov Dec 7, 2022
cd5fc2a
fairness
VSadov Dec 11, 2022
7beea61
more room for thread ids
VSadov Dec 11, 2022
59610de
remove CurrentNativeThreadId
VSadov Dec 11, 2022
f809161
couple fixes
VSadov Dec 12, 2022
e3d4e87
fix win-arm64 build
VSadov Dec 12, 2022
46f28ed
win-arm64 build , another try
VSadov Dec 12, 2022
16183e9
Apply suggestions from code review
VSadov Dec 12, 2022
236410f
fix after renaming
VSadov Dec 12, 2022
0eebcab
do not report successful spin if thread has waited
VSadov Dec 12, 2022
85dc071
keep extern and undo mangling of tls_CurrentThread in asm
VSadov Dec 12, 2022
77c1bec
use SyncTable indexer in less perf-sensitive places.
VSadov Dec 12, 2022
867a8ee
GetNewHashCode just delegate to shared random
VSadov Dec 13, 2022
b755737
Apply suggestions from code review
VSadov Dec 15, 2022
6b557ec
unchecked const conversion
VSadov Dec 15, 2022
bc54d49
some refactoring comments and typos
VSadov Dec 15, 2022
0a78358
min number of spins in the backoff
VSadov Dec 15, 2022
367acbb
moved CurrentManagedThreadIdUnchecked to ManagedThreadId
VSadov Dec 15, 2022
43a354d
Use `-1` to report success and allow using element #1 in the SyncTable
VSadov Dec 15, 2022
536492b
use threadstatic for managed thread ID
VSadov Dec 15, 2022
a96f618
check before calling RhGetProcessCpuCount
VSadov Dec 15, 2022
a7256ec
use 0 as default thread ID
VSadov Dec 15, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions src/coreclr/nativeaot/Runtime/MiscHelpers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,11 +51,11 @@ EXTERN_C NATIVEAOT_API void __cdecl RhSpinWait(int32_t iterations)
ASSERT(iterations > 0);

// limit the spin count in coop mode.
ASSERT_MSG(iterations <= 10000 || !ThreadStore::GetCurrentThread()->IsCurrentThreadInCooperativeMode(),
ASSERT_MSG(iterations <= 1024 || !ThreadStore::GetCurrentThread()->IsCurrentThreadInCooperativeMode(),
Copy link
Member Author

@VSadov VSadov Dec 12, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This change is just because we switched to SpinWait that is calibrated to 35 nsec.
Previous numbers were assumed in terms of pre-skylake "pause", or, simply put, were divided by 8 in the underlying APIs.
I also made it power of 2. It is more convenient.

"This is too long wait for coop mode. You must p/invoke with GC transition.");

YieldProcessorNormalizationInfo normalizationInfo;
YieldProcessorNormalizedForPreSkylakeCount(normalizationInfo, iterations);
YieldProcessorNormalized(normalizationInfo, iterations);
}

// Yield the cpu to another thread ready to process, if one is available.
Expand Down
7 changes: 5 additions & 2 deletions src/coreclr/nativeaot/Runtime/thread.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -260,7 +260,10 @@ void Thread::Construct()
// alloc_context ever needs different initialization, a matching change to the tls_CurrentThread
// static initialization will need to be made.

m_uPalThreadIdForLogging = PalGetCurrentThreadIdForLogging();
m_pTransitionFrame = TOP_OF_STACK_MARKER;
m_pDeferredTransitionFrame = TOP_OF_STACK_MARKER;
m_hPalThread = INVALID_HANDLE_VALUE;

m_threadId.SetToCurrentThread();

HANDLE curProcessPseudo = PalGetCurrentProcess();
Expand Down Expand Up @@ -328,7 +331,7 @@ bool Thread::CatchAtSafePoint()

uint64_t Thread::GetPalThreadIdForLogging()
{
return m_uPalThreadIdForLogging;
return *(uint64_t*)&m_threadId;
}

bool Thread::IsCurrentThread()
Expand Down
5 changes: 2 additions & 3 deletions src/coreclr/nativeaot/Runtime/thread.h
Original file line number Diff line number Diff line change
Expand Up @@ -95,8 +95,7 @@ struct ThreadBuffer
PTR_VOID m_pStackLow;
PTR_VOID m_pStackHigh;
PTR_UInt8 m_pTEB; // Pointer to OS TEB structure for this thread
uint64_t m_uPalThreadIdForLogging; // @TODO: likely debug-only
EEThreadId m_threadId;
EEThreadId m_threadId; // OS thread ID
PTR_VOID m_pThreadStressLog; // pointer to head of thread's StressLogChunks
NATIVE_CONTEXT* m_interruptedContext; // context for an asynchronously interrupted thread.
#ifdef FEATURE_SUSPEND_REDIRECTION
Expand Down Expand Up @@ -192,7 +191,7 @@ class Thread : private ThreadBuffer
gc_alloc_context * GetAllocContext(); // @TODO: I would prefer to not expose this in this way

#ifndef DACCESS_COMPILE
uint64_t GetPalThreadIdForLogging();
uint64_t GetPalThreadIdForLogging();
bool IsCurrentThread();

void GcScanRoots(void * pfnEnumCallback, void * pvCallbackData);
Expand Down
17 changes: 3 additions & 14 deletions src/coreclr/nativeaot/Runtime/threadstore.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -415,20 +415,9 @@ COOP_PINVOKE_HELPER(void, RhpCancelThreadAbort, (void* thread))

C_ASSERT(sizeof(Thread) == sizeof(ThreadBuffer));

EXTERN_C DECLSPEC_THREAD ThreadBuffer tls_CurrentThread;
DECLSPEC_THREAD ThreadBuffer tls_CurrentThread =
{
{ 0 }, // m_rgbAllocContextBuffer
Thread::TSF_Unknown, // m_ThreadStateFlags
TOP_OF_STACK_MARKER, // m_pTransitionFrame
TOP_OF_STACK_MARKER, // m_pDeferredTransitionFrame
0, // m_pCachedTransitionFrame
0, // m_pNext
INVALID_HANDLE_VALUE, // m_hPalThread
0, // m_ppvHijackedReturnAddressLocation
0, // m_pvHijackedReturnAddress
0, // all other fields are initialized by zeroes
};
#ifndef _MSC_VER
__thread ThreadBuffer tls_CurrentThread;
#endif

EXTERN_C ThreadBuffer* RhpGetThread()
{
Expand Down
7 changes: 6 additions & 1 deletion src/coreclr/nativeaot/Runtime/threadstore.inl
Original file line number Diff line number Diff line change
@@ -1,7 +1,12 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

EXTERN_C DECLSPEC_THREAD ThreadBuffer tls_CurrentThread;
#ifdef _MSC_VER
// a workaround to prevent tls_CurrentThread from becoming dynamically checked/initialized.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we keep on extern "C" on this workaround to avoid introducing name mangling into the asm code?

Copy link
Member Author

@VSadov VSadov Dec 12, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I just ported the workaround from a similar change in CoreClr. I am not sure I completely understand why it is needed.
If removing extern "C" is not a part of the fix, I guess, why not. I will try.

Copy link
Member Author

@VSadov VSadov Dec 12, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The fix works fine with extern "C", so I will add it and revert the mangling in asm

EXTERN_C __declspec(selectany) __declspec(thread) ThreadBuffer tls_CurrentThread;
#else
EXTERN_C __thread ThreadBuffer tls_CurrentThread;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does non-Windows pay the penalty for TLS init checks? (ie similar workaround is not possible on non-Windows)

Copy link
Member Author

@VSadov VSadov Dec 12, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

From the discussion of the original fix (#33347), I gathered that even on windows the workaround was not needed until some version of MSVC.
It is probably more a function of the c runtime than OS (glibc, musl, etc). It should be fairly easy to check what happens on glibc.
I also wonder if there are differences on arm64.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Linux/glibc does not require a similar fix. There are no checks when accessing TLS.

Here is the disassembly for reference.

Linux/glibc x64

System.Collections.Tests`::RhGetCurrentManagedThreadId():
    0x5555554d5db0 <+0>:  pushq  %rbp
    0x5555554d5db1 <+1>:  movq   %rsp, %rbp
    0x5555554d5db4 <+4>:  movq   %fs:0x0, %rax
    0x5555554d5dbd <+13>: leaq   -0x100(%rax), %rax
    0x5555554d5dc4 <+20>: movl   0xc0(%rax), %eax
    0x5555554d5dca <+26>: popq   %rbp
    0x5555554d5dcb <+27>: retq

Linux/glibc arm64

System.Collections.Tests`::RhGetCurrentManagedThreadId():
    0xaaaaaab87400 <+0>:  stp    x29, x30, [sp, #-0x10]!
    0xaaaaaab87404 <+4>:  mov    x29, sp
    0xaaaaaab87408 <+8>:  adrp   x0, 4173
    0xaaaaaab8740c <+12>: ldr    x0, [x0, #0xa30]
    0xaaaaaab87410 <+16>: nop
    0xaaaaaab87414 <+20>: nop
    0xaaaaaab87418 <+24>: mrs    x8, TPIDR_EL0
    0xaaaaaab8741c <+28>: add    x8, x8, x0
    0xaaaaaab87420 <+32>: ldr    w0, [x8, #0xc0]
    0xaaaaaab87424 <+36>: ldp    x29, x30, [sp], #0x10
    0xaaaaaab87428 <+40>: ret

MSVC (with the fix)

    return ThreadStore::RawGetCurrentThread()->GetManagedThreadId();
00007FF6A99D7AE0  mov         ecx,dword ptr [_tls_index (07FF6A9CB3A28h)]  
00007FF6A99D7AE6  mov         rax,qword ptr gs:[58h]  
00007FF6A99D7AEF  mov         edx,10h  
00007FF6A99D7AF4  mov         rax,qword ptr [rax+rcx*8]  
00007FF6A99D7AF8  mov         eax,dword ptr [rax+rdx+0C0h]  
}
00007FF6A99D7AFF  ret  

#endif

// static
inline Thread * ThreadStore::RawGetCurrentThread()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,42 +14,68 @@ internal static class SynchronizedMethodHelpers
private static void MonitorEnter(object obj, ref bool lockTaken)
{
// Inlined Monitor.Enter with a few tweaks
Lock lck = Monitor.GetLock(obj);
int resultOrIndex = ObjectHeader.Acquire(obj);
if (resultOrIndex < 0)
{
lockTaken = true;
return;
}

Lock lck = resultOrIndex == 0 ?
ObjectHeader.GetLockObject(obj) :
SyncTable.GetLockObject(resultOrIndex);

if (lck.TryAcquire(0))
{
lockTaken = true;
return;
}

Monitor.TryAcquireContended(lck, obj, Timeout.Infinite);
lockTaken = true;
}
private static void MonitorExit(object obj, ref bool lockTaken)
{
// Inlined Monitor.Exit with a few tweaks
if (!lockTaken) return;
Monitor.GetLock(obj).Release();
if (!lockTaken)
return;

ObjectHeader.Release(obj);
lockTaken = false;
}

private static void MonitorEnterStatic(IntPtr pEEType, ref bool lockTaken)
{
// Inlined Monitor.Enter with a few tweaks
object obj = GetStaticLockObject(pEEType);
Lock lck = Monitor.GetLock(obj);
int resultOrIndex = ObjectHeader.Acquire(obj);
if (resultOrIndex < 0)
{
lockTaken = true;
return;
}

Lock lck = resultOrIndex == 0 ?
ObjectHeader.GetLockObject(obj) :
SyncTable.GetLockObject(resultOrIndex);

if (lck.TryAcquire(0))
{
lockTaken = true;
return;
}

Monitor.TryAcquireContended(lck, obj, Timeout.Infinite);
lockTaken = true;
}
private static void MonitorExitStatic(IntPtr pEEType, ref bool lockTaken)
{
// Inlined Monitor.Exit with a few tweaks
if (!lockTaken) return;
if (!lockTaken)
return;

object obj = GetStaticLockObject(pEEType);
Monitor.GetLock(obj).Release();
ObjectHeader.Release(obj);
lockTaken = false;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,6 @@ namespace System
{
public static partial class Environment
{
internal static int CurrentNativeThreadId => ManagedThreadId.Current;

public static long TickCount64 => (long)RuntimeImports.RhpGetTickCount64();

[DoesNotReturn]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,6 @@ namespace System
{
public static partial class Environment
{
internal static int CurrentNativeThreadId => unchecked((int)Interop.Kernel32.GetCurrentThreadId());

public static long TickCount64 => (long)Interop.Kernel32.GetTickCount64();

[DoesNotReturn]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -94,17 +94,9 @@ public static object GetObjectValue(object? obj)
return RuntimeImports.RhCompareObjectContentsAndPadding(o1, o2);
}

[ThreadStatic]
private static int t_hashSeed;

internal static int GetNewHashCode()
{
int multiplier = Environment.CurrentManagedThreadId * 4 + 5;
// Every thread has its own generator for hash codes so that we won't get into a situation
// where two threads consistently give out the same hash codes.
// Choice of multiplier guarantees period of 2**32 - see Knuth Vol 2 p16 (3.2.1.2 Theorem A).
t_hashSeed = t_hashSeed * multiplier + 1;
return t_hashSeed;
return Random.Shared.Next();
}

public static unsafe int GetHashCode(object o)
Expand Down Expand Up @@ -228,6 +220,9 @@ internal static unsafe ushort GetElementSize(this Array array)
internal static unsafe MethodTable* GetMethodTable(this object obj)
=> obj.m_pEEType;

internal static unsafe ref MethodTable* GetMethodTableRef(this object obj)
=> ref obj.m_pEEType;

internal static unsafe EETypePtr GetEETypePtr(this object obj)
=> new EETypePtr(obj.m_pEEType);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -464,7 +464,7 @@ private static void SerializeExceptionsForDump(Exception currentException, IntPt
LowLevelList<Exception> exceptions = new LowLevelList<Exception>(curThreadExceptions);
LowLevelList<Exception> nonThrownInnerExceptions = new LowLevelList<Exception>();

uint currentThreadId = (uint)Environment.CurrentNativeThreadId;
uint currentThreadId = (uint)Environment.CurrentManagedThreadId;

// Reset nesting levels for exceptions on this thread that might not be currently in flight
foreach (KeyValuePair<Exception, ExceptionData> item in s_exceptionDataTable)
Expand Down
Loading