Skip to content

Commit effaf28

Browse files
[threads] At shutdown, don't wait for native threads that aren't calling Mono (#43174)
* [test] Call P/Invoke callback delegates from foreign threads Change post-detach-1.cs to also have versions that call reverse pinvokes from foreign threads that were not attached to mono. The foreign threads should not prevent GC and should not prevent Mono from shutting down using mono_manage_internal. * [threads] Don't wait for native threads that aren't calling Mono If a thread is started from native code, and it interacts with the runtime (by calling a thunk that invokes a managed method), the runtime will attach the thread - it will create a `MonoInternalThread` and add it to the list of threads hash (in threads.c). If the thread returns from the managed call, it will still be recorded by the runtime, but as long as it is not running managed code anymore, it will prevent shutdown. The problem is when we try to suspend the thread in order to abort it, `mono_thread_state_init_from_handle` will see a NULL domain (because `mono_threads_detach_coop_internal` will restore it to NULL when a managed method returns back to native code). (On systems using POSIX signals to suspend, the same check is in `mono_thread_state_init_from_sigctx`). As a result, `mono_threads_suspend_begin_async_suspend` (or `suspend_signal_handler` on POSIX) will set `suspend_can_continue` to FALSE, and `mono_thread_info_safe_suspend_and_run` will not run the suspend callback. As a result, when `mono_manage_internal` calls `abort_threads`, it will add the thread handle to the wait list, but it will not actually request the thread to abort. As a result, after `abort_threads` returns, the subsequent call to `wait_for_tids` will block until the native thread terminates (at which point the TLS destructor will notify the thread handle and wait_for_tids will unblock). This commit changes the behavior of `abort_threads` to ignore threads that do not run `async_suspend_critical` and not to add them to the wait list. As a result, if a native thread calls into managed and then returns to native code, the runtime will not wait for it. * [threads] Warn if mono_thread_manage_internal can't abort a thread Give a hint to embedders to aid debugging * rename AbortThreadData:thread_will_abort field It's used to keep track of whether the thread will eventually throw a TAE (and thus that we need to wait for it). The issue is that under full coop suspend, we treat threads in GC Safe (BLOCKING) state as if they're suspended and always execute async_abort_critical. So the field has nothing to do with whether the thread was suscessfully suspended, but rather whether it will (eventually) be aborted. * [threads] Fix async_abort_critical for full coop If the foreign external thread doesn't have any managed methods on its callstack, but it once called a native-to-managed wrapper, it will be left by mono_threads_detach_coop in GC Safe (BLOCKING) state. But under full coop, GC Safe state is considered suspended, so mono_thread_info_safe_suspend_and_run will run async_abort_critical for the thread. But the thread may never call into Mono again, in which case it will never safepoint and aknowledge the abort interruption. So set thread_will_abort to FALSE in this case, so that mono_thread_manage_internal won't try to wait for it. --- Related to an issue first identified in mono/mono#18517 --- This supersedes mono/mono#18656 Co-authored-by: lambdageek <lambdageek@users.noreply.github.com>
1 parent df87d73 commit effaf28

File tree

4 files changed

+123
-35
lines changed

4 files changed

+123
-35
lines changed

src/mono/mono/metadata/threads-types.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -294,7 +294,8 @@ MONO_PROFILER_API MonoInternalThread *mono_thread_internal_current (void);
294294
MonoInternalThreadHandle
295295
mono_thread_internal_current_handle (void);
296296

297-
void mono_thread_internal_abort (MonoInternalThread *thread, gboolean appdomain_unload);
297+
gboolean
298+
mono_thread_internal_abort (MonoInternalThread *thread, gboolean appdomain_unload);
298299
void mono_thread_internal_suspend_for_shutdown (MonoInternalThread *thread);
299300

300301
gboolean mono_thread_internal_has_appdomain_ref (MonoInternalThread *thread, MonoDomain *domain);

src/mono/mono/metadata/threads.c

Lines changed: 41 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -212,7 +212,7 @@ static void mono_free_static_data (gpointer* static_data);
212212
static void mono_init_static_data_info (StaticDataInfo *static_data);
213213
static guint32 mono_alloc_static_data_slot (StaticDataInfo *static_data, guint32 size, guint32 align);
214214
static gboolean mono_thread_resume (MonoInternalThread* thread);
215-
static void async_abort_internal (MonoInternalThread *thread, gboolean install_async_abort);
215+
static gboolean async_abort_internal (MonoInternalThread *thread, gboolean install_async_abort);
216216
static void self_abort_internal (MonoError *error);
217217
static void async_suspend_internal (MonoInternalThread *thread, gboolean interrupt);
218218
static void self_suspend_internal (void);
@@ -1866,7 +1866,7 @@ ves_icall_System_Threading_Thread_Thread_internal (MonoThreadObjectHandle thread
18661866

18671867
internal->state &= ~ThreadState_Unstarted;
18681868

1869-
THREAD_DEBUG (g_message ("%s: Started thread ID %" G_GSIZE_FORMAT " (handle %p)", __func__, tid, thread));
1869+
THREAD_DEBUG (g_message ("%s: Started thread ID %" G_GSIZE_FORMAT " (handle %p)", __func__, (gsize)internal->tid, internal));
18701870

18711871
UNLOCK_THREAD (internal);
18721872
return TRUE;
@@ -2898,15 +2898,16 @@ ves_icall_System_Threading_Thread_Abort (MonoInternalThreadHandle thread_handle,
28982898
* mono_thread_internal_abort:
28992899
* Request thread \p thread to be aborted.
29002900
* \p thread MUST NOT be the current thread.
2901+
* \returns true if the request was successful
29012902
*/
2902-
void
2903+
gboolean
29032904
mono_thread_internal_abort (MonoInternalThread *thread, gboolean appdomain_unload)
29042905
{
29052906
g_assert (thread != mono_thread_internal_current ());
29062907

29072908
if (!request_thread_abort (thread, NULL, appdomain_unload))
2908-
return;
2909-
async_abort_internal (thread, TRUE);
2909+
return FALSE;
2910+
return async_abort_internal (thread, TRUE);
29102911
}
29112912

29122913
#ifndef ENABLE_NETCORE
@@ -3776,12 +3777,20 @@ abort_threads (gpointer key, gpointer value, gpointer user)
37763777
if ((thread->flags & MONO_THREAD_FLAG_DONT_MANAGE))
37773778
return;
37783779

3779-
wait->handles[wait->num] = mono_threads_open_thread_handle (thread->handle);
3780-
wait->threads[wait->num] = thread;
3781-
wait->num++;
3782-
3780+
MonoThreadHandle *handle = mono_threads_open_thread_handle (thread->handle);
37833781
THREAD_DEBUG (g_print ("%s: Aborting id: %" G_GSIZE_FORMAT "\n", __func__, (gsize)thread->tid));
3784-
mono_thread_internal_abort (thread, FALSE);
3782+
if (!mono_thread_internal_abort (thread, FALSE)) {
3783+
g_warning ("%s: Failed aborting id: %p, mono_thread_manage will ignore it\n", __func__, (void*)(intptr_t)(gsize)thread->tid);
3784+
/* close the handle, we're not going to wait for the thread to be aborted */
3785+
mono_threads_close_thread_handle (handle);
3786+
} else {
3787+
/* commit to waiting for the thread to be aborted */
3788+
wait->handles[wait->num] = handle;
3789+
wait->threads[wait->num] = thread;
3790+
wait->num++;
3791+
}
3792+
3793+
37853794
}
37863795

37873796
/**
@@ -5615,8 +5624,11 @@ mono_thread_info_get_last_managed (MonoThreadInfo *info)
56155624
}
56165625

56175626
typedef struct {
5627+
/* inputs */
56185628
MonoInternalThread *thread;
56195629
gboolean install_async_abort;
5630+
/* outputs */
5631+
gboolean thread_will_abort;
56205632
MonoThreadInfoInterruptToken *interrupt_token;
56215633
} AbortThreadData;
56225634

@@ -5629,6 +5641,8 @@ async_abort_critical (MonoThreadInfo *info, gpointer ud)
56295641
gboolean protected_wrapper;
56305642
gboolean running_managed;
56315643

5644+
data->thread_will_abort = TRUE;
5645+
56325646
if (mono_get_eh_callbacks ()->mono_install_handler_block_guard (mono_thread_info_get_suspend_state (info)))
56335647
return MonoResumeThread;
56345648

@@ -5656,25 +5670,41 @@ async_abort_critical (MonoThreadInfo *info, gpointer ud)
56565670
*/
56575671
data->interrupt_token = mono_thread_info_prepare_interrupt (info);
56585672

5673+
if (!ji && !info->runtime_thread) {
5674+
/* Under full cooperative suspend, if a thread is in GC Safe mode (blocking
5675+
* state), mono_thread_info_safe_suspend_and_run will treat the thread as
5676+
* suspended and run async_abort_critical. If the thread has no managed
5677+
* frames, it is some native thread that may not call Mono again anymore, so
5678+
* don't wait for it to abort.
5679+
*/
5680+
data->thread_will_abort = FALSE;
5681+
}
56595682
return MonoResumeThread;
56605683
}
56615684
}
56625685

5663-
static void
5686+
static gboolean
56645687
async_abort_internal (MonoInternalThread *thread, gboolean install_async_abort)
56655688
{
56665689
AbortThreadData data;
56675690

56685691
g_assert (thread != mono_thread_internal_current ());
56695692

56705693
data.thread = thread;
5694+
data.thread_will_abort = FALSE;
56715695
data.install_async_abort = install_async_abort;
56725696
data.interrupt_token = NULL;
56735697

56745698
mono_thread_info_safe_suspend_and_run (thread_get_tid (thread), TRUE, async_abort_critical, &data);
56755699
if (data.interrupt_token)
56765700
mono_thread_info_finish_interrupt (data.interrupt_token);
56775701
/*FIXME we need to wait for interruption to complete -- figure out how much into interruption we should wait for here*/
5702+
5703+
/* If the thread was not suspended (async_abort_critical did not run), or the thread is
5704+
* "suspended" (BLOCKING) under full coop, and the thread was running native code without
5705+
* any managed callers, we can't be sure that it will aknowledge the abort request and
5706+
* actually abort. */
5707+
return data.thread_will_abort;
56785708
}
56795709

56805710
static void

src/mono/mono/tests/libtest.c

Lines changed: 50 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -8375,19 +8375,39 @@ invoke_foreign_thread (void* user_data)
83758375
destroy_invoke_names (names);
83768376
return NULL;
83778377
}
8378+
8379+
static void*
8380+
invoke_foreign_delegate (void *user_data)
8381+
{
8382+
VoidVoidCallback del = (VoidVoidCallback)user_data;
8383+
for (int i = 0; i < 5; ++i) {
8384+
del ();
8385+
sleep (2);
8386+
}
8387+
return NULL;
8388+
}
8389+
83788390
#endif
83798391

83808392

83818393
LIBTEST_API mono_bool STDCALL
8382-
mono_test_attach_invoke_foreign_thread (const char *assm_name, const char *name_space, const char *name, const char *meth_name)
8394+
mono_test_attach_invoke_foreign_thread (const char *assm_name, const char *name_space, const char *name, const char *meth_name, VoidVoidCallback del)
83838395
{
83848396
#ifndef HOST_WIN32
8385-
struct invoke_names *names = make_invoke_names (assm_name, name_space, name, meth_name);
8386-
pthread_t t;
8387-
int res = pthread_create (&t, NULL, invoke_foreign_thread, (void*)names);
8388-
g_assert (res == 0);
8389-
pthread_join (t, NULL);
8390-
return 0;
8397+
if (!del) {
8398+
struct invoke_names *names = make_invoke_names (assm_name, name_space, name, meth_name);
8399+
pthread_t t;
8400+
int res = pthread_create (&t, NULL, invoke_foreign_thread, (void*)names);
8401+
g_assert (res == 0);
8402+
pthread_join (t, NULL);
8403+
return 0;
8404+
} else {
8405+
pthread_t t;
8406+
int res = pthread_create (&t, NULL, invoke_foreign_delegate, del);
8407+
g_assert (res == 0);
8408+
pthread_join (t, NULL);
8409+
return 0;
8410+
}
83918411
#else
83928412
// TODO: Win32 version of this test
83938413
return 1;
@@ -8396,6 +8416,8 @@ mono_test_attach_invoke_foreign_thread (const char *assm_name, const char *name_
83968416

83978417
#ifndef HOST_WIN32
83988418
struct names_and_mutex {
8419+
/* if del is NULL, use names, otherwise just call del */
8420+
VoidVoidCallback del;
83998421
struct invoke_names *names;
84008422
/* mutex to coordinate test and foreign thread */
84018423
pthread_mutex_t coord_mutex;
@@ -8410,7 +8432,11 @@ invoke_block_foreign_thread (void *user_data)
84108432
// This thread calls into the runtime and then blocks. It should not
84118433
// prevent the runtime from shutting down.
84128434
struct names_and_mutex *nm = (struct names_and_mutex *)user_data;
8413-
test_invoke_by_name (nm->names);
8435+
if (!nm->del) {
8436+
test_invoke_by_name (nm->names);
8437+
} else {
8438+
nm->del ();
8439+
}
84148440
pthread_mutex_lock (&nm->coord_mutex);
84158441
/* signal the test thread that we called the runtime */
84168442
pthread_cond_signal (&nm->coord_cond);
@@ -8422,26 +8448,31 @@ invoke_block_foreign_thread (void *user_data)
84228448
#endif
84238449

84248450
LIBTEST_API mono_bool STDCALL
8425-
mono_test_attach_invoke_block_foreign_thread (const char *assm_name, const char *name_space, const char *name, const char *meth_name)
8451+
mono_test_attach_invoke_block_foreign_thread (const char *assm_name, const char *name_space, const char *name, const char *meth_name, VoidVoidCallback del)
84268452
{
84278453
#ifndef HOST_WIN32
8428-
struct invoke_names *names = make_invoke_names (assm_name, name_space, name, meth_name);
84298454
struct names_and_mutex *nm = malloc (sizeof (struct names_and_mutex));
8430-
nm->names = names;
8431-
pthread_mutex_init (&nm->coord_mutex, NULL);
8432-
pthread_cond_init (&nm->coord_cond, NULL);
8455+
nm->del = del;
8456+
if (!del) {
8457+
struct invoke_names *names = make_invoke_names (assm_name, name_space, name, meth_name);
8458+
nm->names = names;
8459+
} else {
8460+
nm->names = NULL;
8461+
}
8462+
pthread_mutex_init (&nm->coord_mutex, NULL);
8463+
pthread_cond_init (&nm->coord_cond, NULL);
84338464
pthread_mutex_init (&nm->deadlock_mutex, NULL);
84348465

84358466
pthread_mutex_lock (&nm->deadlock_mutex); // lock the mutex and never unlock it.
84368467
pthread_t t;
84378468
int res = pthread_create (&t, NULL, invoke_block_foreign_thread, (void*)nm);
84388469
g_assert (res == 0);
8439-
/* wait for the foreign thread to finish calling the runtime before
8440-
* detaching it and returning
8441-
*/
8442-
pthread_mutex_lock (&nm->coord_mutex);
8443-
pthread_cond_wait (&nm->coord_cond, &nm->coord_mutex);
8444-
pthread_mutex_unlock (&nm->coord_mutex);
8470+
/* wait for the foreign thread to finish calling the runtime before
8471+
* detaching it and returning
8472+
*/
8473+
pthread_mutex_lock (&nm->coord_mutex);
8474+
pthread_cond_wait (&nm->coord_cond, &nm->coord_mutex);
8475+
pthread_mutex_unlock (&nm->coord_mutex);
84458476
pthread_detach (t);
84468477
return 0;
84478478
#else

src/mono/mono/tests/pinvoke-detach-1.cs

Lines changed: 30 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -28,29 +28,55 @@ private static void MethodInvokedFromNative ()
2828
}
2929

3030
[DllImport ("libtest", EntryPoint="mono_test_attach_invoke_foreign_thread")]
31-
public static extern bool mono_test_attach_invoke_foreign_thread (string assm_name, string name_space, string class_name, string method_name);
31+
public static extern bool mono_test_attach_invoke_foreign_thread (string assm_name, string name_space, string class_name, string method_name, VoidVoidDelegate del);
3232

3333
public static int test_0_attach_invoke_foreign_thread ()
3434
{
3535
was_called = 0;
36-
bool skipped = mono_test_attach_invoke_foreign_thread (typeof (Tests).Assembly.Location, "", "Tests", "MethodInvokedFromNative");
36+
bool skipped = mono_test_attach_invoke_foreign_thread (typeof (Tests).Assembly.Location, "", "Tests", "MethodInvokedFromNative", null);
37+
GC.Collect (); // should not hang waiting for the foreign thread
3738
return skipped || was_called == 5 ? 0 : 1;
3839
}
3940

41+
static int was_called_del;
42+
43+
[MonoPInvokeCallback (typeof (VoidVoidDelegate))]
44+
private static void MethodInvokedFromNative_del ()
45+
{
46+
was_called_del++;
47+
}
48+
49+
public static int test_0_attach_invoke_foreign_thread_delegate ()
50+
{
51+
var del = new VoidVoidDelegate (MethodInvokedFromNative_del);
52+
was_called_del = 0;
53+
bool skipped = mono_test_attach_invoke_foreign_thread (null, null, null, null, del);
54+
GC.Collect (); // should not hang waiting for the foreign thread
55+
return skipped || was_called_del == 5 ? 0 : 1;
56+
}
57+
4058
[MonoPInvokeCallback (typeof (VoidVoidDelegate))]
4159
private static void MethodInvokedFromNative2 ()
4260
{
4361
}
4462

4563
[DllImport ("libtest", EntryPoint="mono_test_attach_invoke_block_foreign_thread")]
46-
public static extern bool mono_test_attach_invoke_block_foreign_thread (string assm_name, string name_space, string class_name, string method_name);
64+
public static extern bool mono_test_attach_invoke_block_foreign_thread (string assm_name, string name_space, string class_name, string method_name, VoidVoidDelegate del);
4765

4866
public static int test_0_attach_invoke_block_foreign_thread ()
4967
{
50-
bool skipped = mono_test_attach_invoke_block_foreign_thread (typeof (Tests).Assembly.Location, "", "Tests", "MethodInvokedFromNative2");
68+
bool skipped = mono_test_attach_invoke_block_foreign_thread (typeof (Tests).Assembly.Location, "", "Tests", "MethodInvokedFromNative2", null);
5169
GC.Collect (); // should not hang waiting for the foreign thread
5270
return 0; // really we succeed if the app can shut down without hanging
5371
}
5472

73+
// This one fails because we haven't fully detached, so shutdown is waiting for the thread
74+
public static int test_0_attach_invoke_block_foreign_thread_delegate ()
75+
{
76+
var del = new VoidVoidDelegate (MethodInvokedFromNative2);
77+
bool skipped = mono_test_attach_invoke_block_foreign_thread (null, null, null, null, del);
78+
GC.Collect (); // should not hang waiting for the foreign thread
79+
return 0; // really we succeed if the app can shut down without hanging
80+
}
5581

5682
}

0 commit comments

Comments
 (0)