Skip to content

Commit ee898ca

Browse files
janvorlijkotas
andauthored
Fix lookup for current Thread in async signal handler (#122048)
The current `CheckActivationSafePoint` uses thread local storage to get the current Thread instance. But this function is called from async signal handler (the activation signal handler) and it is not allowed to access TLS variables there because the access can allocate and if the interrupted code was running in an allocation code, it could crash. There was no problem with this since .NET 1.0, but a change in the recent glibc version has broken this. We've got reports of crashes in this code due to the reason mentioned above. This change introduces an async safe mechanism for accessing the current Thread instance from async signal handlers. It uses a segmented array that can grow, but never shrink. Entries for threads are added when runtime creates a thread / attaches to an external thread and removed when the thread dies. The check for safety of the activation injection was further enhanced to make sure that the ScanReaderLock is not taken. In cases it would need to be taken, we just reject the location. Since NativeAOT is subject to the same issue, the code to maintain the thread id to thread instance map is placed to the minipal and shared between coreclr and NativeAOT. Closes #121581 --------- Co-authored-by: Jan Kotas <jkotas@microsoft.com>
1 parent c78f853 commit ee898ca

File tree

14 files changed

+328
-55
lines changed

14 files changed

+328
-55
lines changed

src/coreclr/nativeaot/Runtime/CMakeLists.txt

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,12 @@ set(COMMON_RUNTIME_SOURCES
5555
${CLR_SRC_NATIVE_DIR}/minipal/xoshiro128pp.c
5656
)
5757

58+
if (CLR_CMAKE_TARGET_UNIX AND NOT CLR_CMAKE_TARGET_ARCH_WASM)
59+
list(APPEND COMMON_RUNTIME_SOURCES
60+
${RUNTIME_DIR}/asyncsafethreadmap.cpp
61+
)
62+
endif()
63+
5864
set(SERVER_GC_SOURCES
5965
${GC_DIR}/gceesvr.cpp
6066
${GC_DIR}/gcsvr.cpp

src/coreclr/nativeaot/Runtime/threadstore.cpp

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@
2222
#include "TargetPtrs.h"
2323
#include "yieldprocessornormalized.h"
2424
#include <minipal/time.h>
25+
#include <minipal/thread.h>
26+
#include "asyncsafethreadmap.h"
2527

2628
#include "slist.inl"
2729

@@ -143,6 +145,14 @@ void ThreadStore::AttachCurrentThread(bool fAcquireThreadStoreLock)
143145
pAttachingThread->m_ThreadStateFlags = Thread::TSF_Attached;
144146

145147
pTS->m_ThreadList.PushHead(pAttachingThread);
148+
149+
#if defined(TARGET_UNIX) && !defined(TARGET_WASM)
150+
if (!InsertThreadIntoAsyncSafeMap(pAttachingThread->m_threadId, pAttachingThread))
151+
{
152+
PalPrintFatalError("\nFailed to insert thread into async-safe map due to out of memory.\n");
153+
RhFailFast();
154+
}
155+
#endif // TARGET_UNIX && !TARGET_WASM
146156
}
147157

148158
// static
@@ -188,6 +198,9 @@ void ThreadStore::DetachCurrentThread()
188198
pTS->m_ThreadList.RemoveFirst(pDetachingThread);
189199
// tidy up GC related stuff (release allocation context, etc..)
190200
pDetachingThread->Detach();
201+
#if defined(TARGET_UNIX) && !defined(TARGET_WASM)
202+
RemoveThreadFromAsyncSafeMap(pDetachingThread->m_threadId, pDetachingThread);
203+
#endif
191204
}
192205

193206
// post-mortem clean up.
@@ -352,6 +365,13 @@ EXTERN_C RuntimeThreadLocals* RhpGetThread()
352365
return &tls_CurrentThread;
353366
}
354367

368+
#if defined(TARGET_UNIX) && !defined(TARGET_WASM)
369+
Thread * ThreadStore::GetCurrentThreadIfAvailableAsyncSafe()
370+
{
371+
return (Thread*)FindThreadInAsyncSafeMap(minipal_get_current_thread_id_no_cache());
372+
}
373+
#endif // TARGET_UNIX && !TARGET_WASM
374+
355375
#endif // !DACCESS_COMPILE
356376

357377
#ifdef _WIN32

src/coreclr/nativeaot/Runtime/threadstore.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ class ThreadStore
4747
static Thread * RawGetCurrentThread();
4848
static Thread * GetCurrentThread();
4949
static Thread * GetCurrentThreadIfAvailable();
50+
static Thread * GetCurrentThreadIfAvailableAsyncSafe();
5051
static PTR_Thread GetSuspendingThread();
5152
static void AttachCurrentThread();
5253
static void AttachCurrentThread(bool fAcquireThreadStoreLock);

src/coreclr/nativeaot/Runtime/unix/PalUnix.cpp

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1017,24 +1017,24 @@ static struct sigaction g_previousActivationHandler;
10171017

10181018
static void ActivationHandler(int code, siginfo_t* siginfo, void* context)
10191019
{
1020-
// Only accept activations from the current process
1021-
if (siginfo->si_pid == getpid()
1020+
Thread* pThread = ThreadStore::GetCurrentThreadIfAvailableAsyncSafe();
1021+
if (pThread)
1022+
{
1023+
// Only accept activations from the current process
1024+
if (siginfo->si_pid == getpid()
10221025
#ifdef HOST_APPLE
1023-
// On Apple platforms si_pid is sometimes 0. It was confirmed by Apple to be expected, as the si_pid is tracked at the process level. So when multiple
1024-
// signals are in flight in the same process at the same time, it may be overwritten / zeroed.
1025-
|| siginfo->si_pid == 0
1026+
// On Apple platforms si_pid is sometimes 0. It was confirmed by Apple to be expected, as the si_pid is tracked at the process level. So when multiple
1027+
// signals are in flight in the same process at the same time, it may be overwritten / zeroed.
1028+
|| siginfo->si_pid == 0
10261029
#endif
1027-
)
1028-
{
1029-
// Make sure that errno is not modified
1030-
int savedErrNo = errno;
1031-
Thread::HijackCallback((NATIVE_CONTEXT*)context, NULL);
1032-
errno = savedErrNo;
1033-
}
1030+
)
1031+
{
1032+
// Make sure that errno is not modified
1033+
int savedErrNo = errno;
1034+
Thread::HijackCallback((NATIVE_CONTEXT*)context, pThread);
1035+
errno = savedErrNo;
1036+
}
10341037

1035-
Thread* pThread = ThreadStore::GetCurrentThreadIfAvailable();
1036-
if (pThread)
1037-
{
10381038
pThread->SetActivationPending(false);
10391039
}
10401040

src/coreclr/pal/src/exception/signal.cpp

Lines changed: 11 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -936,22 +936,20 @@ static void inject_activation_handler(int code, siginfo_t *siginfo, void *contex
936936
CONTEXTToNativeContext(&winContext, ucontext);
937937
}
938938
}
939+
940+
// Call the original handler when it is not ignored or default (terminate).
941+
if (g_previous_activation.sa_flags & SA_SIGINFO)
942+
{
943+
_ASSERTE(g_previous_activation.sa_sigaction != NULL);
944+
g_previous_activation.sa_sigaction(code, siginfo, context);
945+
}
939946
else
940947
{
941-
// Call the original handler when it is not ignored or default (terminate).
942-
if (g_previous_activation.sa_flags & SA_SIGINFO)
943-
{
944-
_ASSERTE(g_previous_activation.sa_sigaction != NULL);
945-
g_previous_activation.sa_sigaction(code, siginfo, context);
946-
}
947-
else
948+
if (g_previous_activation.sa_handler != SIG_IGN &&
949+
g_previous_activation.sa_handler != SIG_DFL)
948950
{
949-
if (g_previous_activation.sa_handler != SIG_IGN &&
950-
g_previous_activation.sa_handler != SIG_DFL)
951-
{
952-
_ASSERTE(g_previous_activation.sa_handler != NULL);
953-
g_previous_activation.sa_handler(code);
954-
}
951+
_ASSERTE(g_previous_activation.sa_handler != NULL);
952+
g_previous_activation.sa_handler(code);
955953
}
956954
}
957955
}
Lines changed: 126 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,126 @@
1+
// Licensed to the .NET Foundation under one or more agreements.
2+
// The .NET Foundation licenses this file to you under the MIT license.
3+
4+
#include "common.h"
5+
6+
#include "asyncsafethreadmap.h"
7+
8+
// Async safe lock free thread map for use in signal handlers
9+
10+
struct ThreadEntry
11+
{
12+
size_t osThread;
13+
void* pThread;
14+
};
15+
16+
#define MAX_THREADS_IN_SEGMENT 256
17+
18+
struct ThreadSegment
19+
{
20+
ThreadEntry entries[MAX_THREADS_IN_SEGMENT];
21+
ThreadSegment* pNext;
22+
};
23+
24+
static ThreadSegment *s_pAsyncSafeThreadMapHead = NULL;
25+
26+
bool InsertThreadIntoAsyncSafeMap(size_t osThread, void* pThread)
27+
{
28+
size_t startIndex = osThread % MAX_THREADS_IN_SEGMENT;
29+
30+
ThreadSegment* pSegment = s_pAsyncSafeThreadMapHead;
31+
ThreadSegment** ppSegment = &s_pAsyncSafeThreadMapHead;
32+
while (true)
33+
{
34+
if (pSegment == NULL)
35+
{
36+
// Need to add a new segment
37+
ThreadSegment* pNewSegment = new (nothrow) ThreadSegment();
38+
if (pNewSegment == NULL)
39+
{
40+
// Memory allocation failed
41+
return false;
42+
}
43+
44+
memset(pNewSegment, 0, sizeof(ThreadSegment));
45+
ThreadSegment* pExpected = NULL;
46+
if (!__atomic_compare_exchange_n(
47+
ppSegment,
48+
&pExpected,
49+
pNewSegment,
50+
false /* weak */,
51+
__ATOMIC_RELEASE /* success_memorder */,
52+
__ATOMIC_RELAXED /* failure_memorder */))
53+
{
54+
// Another thread added the segment first
55+
delete pNewSegment;
56+
pNewSegment = pExpected;
57+
}
58+
59+
pSegment = pNewSegment;
60+
}
61+
for (size_t i = 0; i < MAX_THREADS_IN_SEGMENT; i++)
62+
{
63+
size_t index = (startIndex + i) % MAX_THREADS_IN_SEGMENT;
64+
65+
size_t expected = 0;
66+
if (__atomic_compare_exchange_n(
67+
&pSegment->entries[index].osThread,
68+
&expected,
69+
osThread,
70+
false /* weak */,
71+
__ATOMIC_RELEASE /* success_memorder */,
72+
__ATOMIC_RELAXED /* failure_memorder */))
73+
{
74+
// Successfully inserted
75+
// Use atomic store with release to ensure proper ordering
76+
__atomic_store_n(&pSegment->entries[index].pThread, pThread, __ATOMIC_RELEASE);
77+
return true;
78+
}
79+
}
80+
81+
ppSegment = &pSegment->pNext;
82+
pSegment = __atomic_load_n(&pSegment->pNext, __ATOMIC_ACQUIRE);
83+
}
84+
}
85+
86+
void RemoveThreadFromAsyncSafeMap(size_t osThread, void* pThread)
87+
{
88+
size_t startIndex = osThread % MAX_THREADS_IN_SEGMENT;
89+
90+
ThreadSegment* pSegment = s_pAsyncSafeThreadMapHead;
91+
while (pSegment)
92+
{
93+
for (size_t i = 0; i < MAX_THREADS_IN_SEGMENT; i++)
94+
{
95+
size_t index = (startIndex + i) % MAX_THREADS_IN_SEGMENT;
96+
if (pSegment->entries[index].pThread == pThread)
97+
{
98+
// Found the entry, remove it
99+
pSegment->entries[index].pThread = NULL;
100+
__atomic_exchange_n(&pSegment->entries[index].osThread, (size_t)0, __ATOMIC_RELEASE);
101+
return;
102+
}
103+
}
104+
pSegment = __atomic_load_n(&pSegment->pNext, __ATOMIC_ACQUIRE);
105+
}
106+
}
107+
108+
void *FindThreadInAsyncSafeMap(size_t osThread)
109+
{
110+
size_t startIndex = osThread % MAX_THREADS_IN_SEGMENT;
111+
ThreadSegment* pSegment = s_pAsyncSafeThreadMapHead;
112+
while (pSegment)
113+
{
114+
for (size_t i = 0; i < MAX_THREADS_IN_SEGMENT; i++)
115+
{
116+
size_t index = (startIndex + i) % MAX_THREADS_IN_SEGMENT;
117+
// Use acquire to synchronize with release in InsertThreadIntoAsyncSafeMap
118+
if (__atomic_load_n(&pSegment->entries[index].osThread, __ATOMIC_ACQUIRE) == osThread)
119+
{
120+
return pSegment->entries[index].pThread;
121+
}
122+
}
123+
pSegment = __atomic_load_n(&pSegment->pNext, __ATOMIC_ACQUIRE);
124+
}
125+
return NULL;
126+
}
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
// Licensed to the .NET Foundation under one or more agreements.
2+
// The .NET Foundation licenses this file to you under the MIT license.
3+
4+
#ifndef __ASYNCSAFETHREADMAP_H__
5+
#define __ASYNCSAFETHREADMAP_H__
6+
7+
#if defined(TARGET_UNIX) && !defined(TARGET_WASM)
8+
9+
// Insert a thread into the async-safe map.
10+
// * osThread - The OS thread ID to insert.
11+
// * pThread - A pointer to the thread object to associate with the OS thread ID.
12+
// * return true if the insertion was successful, false otherwise (OOM).
13+
bool InsertThreadIntoAsyncSafeMap(size_t osThread, void* pThread);
14+
15+
// Remove a thread from the async-safe map.
16+
// * osThread - The OS thread ID to remove.
17+
// * pThread - A pointer to the thread object associated with the OS thread ID.
18+
void RemoveThreadFromAsyncSafeMap(size_t osThread, void* pThread);
19+
20+
// Find a thread in the async-safe map.
21+
// * osThread = The OS thread ID to search for.
22+
// * return - A pointer to the thread object associated with the OS thread ID, or NULL if not found.
23+
void* FindThreadInAsyncSafeMap(size_t osThread);
24+
25+
#endif // TARGET_UNIX && !TARGET_WASM
26+
27+
#endif // __ASYNCSAFETHREADMAP_H__

src/coreclr/vm/CMakeLists.txt

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -380,6 +380,12 @@ set(VM_SOURCES_WKS
380380
${VM_SOURCES_GDBJIT}
381381
)
382382

383+
if (CLR_CMAKE_TARGET_UNIX AND NOT CLR_CMAKE_TARGET_ARCH_WASM)
384+
list(APPEND VM_SOURCES_WKS
385+
${RUNTIME_DIR}/asyncsafethreadmap.cpp
386+
)
387+
endif()
388+
383389
# coreclr needs to compile codeman.cpp differently depending on flavor (i.e. dll vs. static lib))
384390
list(REMOVE_ITEM VM_SOURCES_WKS codeman.cpp)
385391

@@ -476,6 +482,12 @@ set(VM_HEADERS_WKS
476482
${VM_HEADERS_GDBJIT}
477483
)
478484

485+
if (CLR_CMAKE_TARGET_UNIX AND NOT CLR_CMAKE_TARGET_ARCH_WASM)
486+
list(APPEND VM_HEADERS_WKS
487+
${RUNTIME_DIR}/asyncsafethreadmap.h
488+
)
489+
endif()
490+
479491
set(GC_SOURCES_WKS
480492
${GC_SOURCES_DAC_AND_WKS_COMMON}
481493
../gc/gceventstatus.cpp

src/coreclr/vm/codeman.cpp

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -858,7 +858,7 @@ IJitManager::IJitManager()
858858
// been stopped when we suspend the EE so they won't be touching an element that is about to be deleted.
859859
// However for pre-emptive mode threads, they could be stalled right on top of the element we want
860860
// to delete, so we need to apply the reader lock to them and wait for them to drain.
861-
ExecutionManager::ScanFlag ExecutionManager::GetScanFlags()
861+
ExecutionManager::ScanFlag ExecutionManager::GetScanFlags(Thread *pThread)
862862
{
863863
CONTRACTL {
864864
NOTHROW;
@@ -869,7 +869,10 @@ ExecutionManager::ScanFlag ExecutionManager::GetScanFlags()
869869
#if !defined(DACCESS_COMPILE)
870870

871871

872-
Thread *pThread = GetThreadNULLOk();
872+
if (!pThread)
873+
{
874+
pThread = GetThreadNULLOk();
875+
}
873876

874877
if (!pThread)
875878
return ScanNoReaderLock;
@@ -5229,6 +5232,24 @@ BOOL ExecutionManager::IsManagedCode(PCODE currentPC)
52295232
return IsManagedCodeWorker(currentPC, &lockState);
52305233
}
52315234

5235+
//**************************************************************************
5236+
BOOL ExecutionManager::IsManagedCodeNoLock(PCODE currentPC)
5237+
{
5238+
CONTRACTL {
5239+
NOTHROW;
5240+
GC_NOTRIGGER;
5241+
} CONTRACTL_END;
5242+
5243+
if (currentPC == (PCODE)NULL)
5244+
return FALSE;
5245+
5246+
_ASSERTE(GetScanFlags() != ScanReaderLock);
5247+
5248+
// Since ScanReaderLock is not set, then we must assume that the ReaderLock is effectively taken.
5249+
RangeSectionLockState lockState = RangeSectionLockState::ReaderLocked;
5250+
return IsManagedCodeWorker(currentPC, &lockState);
5251+
}
5252+
52325253
//**************************************************************************
52335254
NOINLINE // Make sure that the slow path with lock won't affect the fast path
52345255
BOOL ExecutionManager::IsManagedCodeWithLock(PCODE currentPC)

src/coreclr/vm/codeman.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2293,11 +2293,15 @@ class ExecutionManager
22932293
};
22942294

22952295
// Returns default scan flag for current thread
2296-
static ScanFlag GetScanFlags();
2296+
static ScanFlag GetScanFlags(Thread *pThread = NULL);
22972297

22982298
// Returns whether currentPC is in managed code. Returns false for jump stubs on WIN64.
22992299
static BOOL IsManagedCode(PCODE currentPC);
23002300

2301+
// Returns whether currentPC is in managed code. Returns false for jump stubs on WIN64.
2302+
// Does not acquire the reader lock. Caller must ensure it is safe.
2303+
static BOOL IsManagedCodeNoLock(PCODE currentPC);
2304+
23012305
// Returns true if currentPC is ready to run codegen
23022306
static BOOL IsReadyToRunCode(PCODE currentPC);
23032307

0 commit comments

Comments
 (0)