Skip to content

Commit 145079c

Browse files
vtjnashkpamnany
authored andcommitted
add support for async backtraces of Tasks on any thread (JuliaLang#51430)
1 parent b1e181c commit 145079c

File tree

13 files changed

+538
-332
lines changed

13 files changed

+538
-332
lines changed

src/Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ endif
4343
SRCS := \
4444
jltypes gf typemap smallintset ast builtins module interpreter symbol \
4545
dlload sys init task array staticdata toplevel jl_uv datatype \
46-
simplevector runtime_intrinsics precompile jloptions \
46+
simplevector runtime_intrinsics precompile jloptions mtarraylist \
4747
threading partr stackwalk gc gc-debug gc-pages gc-stacks gc-alloc-profiler method \
4848
jlapi signal-handling safepoint timing subtype rtutils gc-heap-snapshot \
4949
crc32c APInt-C processor ircode opaque_closure codegen-stubs coverage runtime_ccall

src/gc-stacks.c

Lines changed: 54 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -119,7 +119,7 @@ static void _jl_free_stack(jl_ptls_t ptls, void *stkbuf, size_t bufsz)
119119
if (bufsz <= pool_sizes[JL_N_STACK_POOLS - 1]) {
120120
unsigned pool_id = select_pool(bufsz);
121121
if (pool_sizes[pool_id] == bufsz) {
122-
arraylist_push(&ptls->heap.free_stacks[pool_id], stkbuf);
122+
small_arraylist_push(&ptls->heap.free_stacks[pool_id], stkbuf);
123123
return;
124124
}
125125
}
@@ -148,7 +148,7 @@ void jl_release_task_stack(jl_ptls_t ptls, jl_task_t *task)
148148
#ifdef _COMPILER_ASAN_ENABLED_
149149
__asan_unpoison_stack_memory((uintptr_t)stkbuf, bufsz);
150150
#endif
151-
arraylist_push(&ptls->heap.free_stacks[pool_id], stkbuf);
151+
small_arraylist_push(&ptls->heap.free_stacks[pool_id], stkbuf);
152152
}
153153
}
154154
}
@@ -163,9 +163,9 @@ JL_DLLEXPORT void *jl_malloc_stack(size_t *bufsz, jl_task_t *owner) JL_NOTSAFEPO
163163
if (ssize <= pool_sizes[JL_N_STACK_POOLS - 1]) {
164164
unsigned pool_id = select_pool(ssize);
165165
ssize = pool_sizes[pool_id];
166-
arraylist_t *pool = &ptls->heap.free_stacks[pool_id];
166+
small_arraylist_t *pool = &ptls->heap.free_stacks[pool_id];
167167
if (pool->len > 0) {
168-
stk = arraylist_pop(pool);
168+
stk = small_arraylist_pop(pool);
169169
}
170170
}
171171
else {
@@ -184,8 +184,8 @@ JL_DLLEXPORT void *jl_malloc_stack(size_t *bufsz, jl_task_t *owner) JL_NOTSAFEPO
184184
}
185185
*bufsz = ssize;
186186
if (owner) {
187-
arraylist_t *live_tasks = &ptls->heap.live_tasks;
188-
arraylist_push(live_tasks, owner);
187+
small_arraylist_t *live_tasks = &ptls->heap.live_tasks;
188+
mtarraylist_push(live_tasks, owner);
189189
}
190190
return stk;
191191
}
@@ -209,7 +209,7 @@ void sweep_stack_pools(void)
209209

210210
// free half of stacks that remain unused since last sweep
211211
for (int p = 0; p < JL_N_STACK_POOLS; p++) {
212-
arraylist_t *al = &ptls2->heap.free_stacks[p];
212+
small_arraylist_t *al = &ptls2->heap.free_stacks[p];
213213
size_t n_to_free;
214214
if (al->len > MIN_STACK_MAPPINGS_PER_POOL) {
215215
n_to_free = al->len / 2;
@@ -220,12 +220,12 @@ void sweep_stack_pools(void)
220220
n_to_free = 0;
221221
}
222222
for (int n = 0; n < n_to_free; n++) {
223-
void *stk = arraylist_pop(al);
223+
void *stk = small_arraylist_pop(al);
224224
free_stack(stk, pool_sizes[p]);
225225
}
226226
}
227227

228-
arraylist_t *live_tasks = &ptls2->heap.live_tasks;
228+
small_arraylist_t *live_tasks = &ptls2->heap.live_tasks;
229229
size_t n = 0;
230230
size_t ndel = 0;
231231
size_t l = live_tasks->len;
@@ -268,24 +268,52 @@ void sweep_stack_pools(void)
268268

269269
JL_DLLEXPORT jl_array_t *jl_live_tasks(void)
270270
{
271-
jl_task_t *ct = jl_current_task;
272-
jl_ptls_t ptls = ct->ptls;
273-
arraylist_t *live_tasks = &ptls->heap.live_tasks;
274-
size_t i, j, l;
275-
jl_array_t *a;
276-
do {
277-
l = live_tasks->len;
278-
a = jl_alloc_vec_any(l + 1); // may gc, changing the number of tasks
279-
} while (l + 1 < live_tasks->len);
280-
l = live_tasks->len;
281-
void **lst = live_tasks->items;
282-
j = 0;
283-
((void**)jl_array_data(a))[j++] = ptls->root_task;
284-
for (i = 0; i < l; i++) {
285-
if (((jl_task_t*)lst[i])->stkbuf != NULL)
286-
((void**)jl_array_data(a))[j++] = lst[i];
271+
size_t nthreads = jl_atomic_load_acquire(&jl_n_threads);
272+
jl_ptls_t *allstates = jl_atomic_load_relaxed(&jl_all_tls_states);
273+
size_t l = 0; // l is not reset on restart, so we keep getting more aggressive at making a big enough list everything it fails
274+
restart:
275+
for (size_t i = 0; i < nthreads; i++) {
276+
// skip GC threads since they don't have tasks
277+
if (gc_first_tid <= i && i < gc_first_tid + jl_n_gcthreads) {
278+
continue;
279+
}
280+
jl_ptls_t ptls2 = allstates[i];
281+
if (ptls2 == NULL)
282+
continue;
283+
small_arraylist_t *live_tasks = &ptls2->heap.live_tasks;
284+
size_t n = mtarraylist_length(live_tasks);
285+
l += n + (ptls2->root_task->stkbuf != NULL);
286+
}
287+
l += l / 20; // add 5% for margin of estimation error
288+
jl_array_t *a = jl_alloc_vec_any(l); // may gc, changing the number of tasks and forcing us to reload everything
289+
nthreads = jl_atomic_load_acquire(&jl_n_threads);
290+
allstates = jl_atomic_load_relaxed(&jl_all_tls_states);
291+
size_t j = 0;
292+
for (size_t i = 0; i < nthreads; i++) {
293+
// skip GC threads since they don't have tasks
294+
if (gc_first_tid <= i && i < gc_first_tid + jl_n_gcthreads) {
295+
continue;
296+
}
297+
jl_ptls_t ptls2 = allstates[i];
298+
if (ptls2 == NULL)
299+
continue;
300+
jl_task_t *t = ptls2->root_task;
301+
if (t->stkbuf != NULL) {
302+
if (j == l)
303+
goto restart;
304+
((void**)jl_array_data(a))[j++] = t;
305+
}
306+
small_arraylist_t *live_tasks = &ptls2->heap.live_tasks;
307+
size_t n = mtarraylist_length(live_tasks);
308+
for (size_t i = 0; i < n; i++) {
309+
jl_task_t *t = (jl_task_t*)mtarraylist_get(live_tasks, i);
310+
if (t->stkbuf != NULL) {
311+
if (j == l)
312+
goto restart;
313+
((void**)jl_array_data(a))[j++] = t;
314+
}
315+
}
287316
}
288-
l = jl_array_len(a);
289317
if (j < l) {
290318
JL_GC_PUSH1(&a);
291319
jl_array_del_end(a, l - j);

src/gc.c

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1084,7 +1084,7 @@ JL_DLLEXPORT jl_weakref_t *jl_gc_new_weakref_th(jl_ptls_t ptls,
10841084
jl_weakref_t *wr = (jl_weakref_t*)jl_gc_alloc(ptls, sizeof(void*),
10851085
jl_weakref_type);
10861086
wr->value = value; // NOTE: wb not needed here
1087-
arraylist_push(&ptls->heap.weak_refs, wr);
1087+
small_arraylist_push(&ptls->heap.weak_refs, wr);
10881088
return wr;
10891089
}
10901090

@@ -3677,8 +3677,10 @@ void jl_init_thread_heap(jl_ptls_t ptls)
36773677
p[i].freelist = NULL;
36783678
p[i].newpages = NULL;
36793679
}
3680-
arraylist_new(&heap->weak_refs, 0);
3681-
arraylist_new(&heap->live_tasks, 0);
3680+
small_arraylist_new(&heap->weak_refs, 0);
3681+
small_arraylist_new(&heap->live_tasks, 0);
3682+
for (int i = 0; i < JL_N_STACK_POOLS; i++)
3683+
small_arraylist_new(&heap->free_stacks[i], 0);
36823684
heap->mallocarrays = NULL;
36833685
heap->mafreelist = NULL;
36843686
heap->big_objects = NULL;

src/interpreter.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,8 @@ extern void JL_GC_ENABLEFRAME(interpreter_state*) JL_NOTSAFEPOINT;
6565
// we define this separately so that we can populate the frame before we add it to the backtrace
6666
// it's recommended to mark the containing function with NOINLINE, though not essential
6767
#define JL_GC_ENABLEFRAME(frame) \
68-
((void**)&frame[1])[0] = __builtin_frame_address(0);
68+
jl_signal_fence(); \
69+
((void**)&frame[1])[0] = __builtin_frame_address(0);
6970

7071
#endif
7172

src/julia.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -968,6 +968,11 @@ JL_DLLEXPORT void *jl_gc_managed_realloc(void *d, size_t sz, size_t oldsz,
968968
int isaligned, jl_value_t *owner);
969969
JL_DLLEXPORT void jl_gc_safepoint(void);
970970

971+
void *mtarraylist_get(small_arraylist_t *_a, size_t idx) JL_NOTSAFEPOINT;
972+
size_t mtarraylist_length(small_arraylist_t *_a) JL_NOTSAFEPOINT;
973+
void mtarraylist_add(small_arraylist_t *_a, void *elt, size_t idx) JL_NOTSAFEPOINT;
974+
void mtarraylist_push(small_arraylist_t *_a, void *elt) JL_NOTSAFEPOINT;
975+
971976
// object accessors -----------------------------------------------------------
972977

973978
#define jl_svec_len(t) (((jl_svec_t*)(t))->length)

src/julia_internal.h

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -195,10 +195,12 @@ JL_DLLEXPORT void jl_set_profile_peek_duration(double);
195195

196196
JL_DLLEXPORT void jl_init_profile_lock(void);
197197
JL_DLLEXPORT uintptr_t jl_lock_profile_rd_held(void) JL_NOTSAFEPOINT;
198-
JL_DLLEXPORT void jl_lock_profile(void) JL_NOTSAFEPOINT;
199-
JL_DLLEXPORT void jl_unlock_profile(void) JL_NOTSAFEPOINT;
200-
JL_DLLEXPORT void jl_lock_profile_wr(void) JL_NOTSAFEPOINT;
201-
JL_DLLEXPORT void jl_unlock_profile_wr(void) JL_NOTSAFEPOINT;
198+
JL_DLLEXPORT void jl_lock_profile(void) JL_NOTSAFEPOINT JL_NOTSAFEPOINT_ENTER;
199+
JL_DLLEXPORT void jl_unlock_profile(void) JL_NOTSAFEPOINT JL_NOTSAFEPOINT_LEAVE;
200+
JL_DLLEXPORT void jl_lock_profile_wr(void) JL_NOTSAFEPOINT JL_NOTSAFEPOINT_ENTER;
201+
JL_DLLEXPORT void jl_unlock_profile_wr(void) JL_NOTSAFEPOINT JL_NOTSAFEPOINT_LEAVE;
202+
int jl_lock_stackwalk(void) JL_NOTSAFEPOINT JL_NOTSAFEPOINT_ENTER;
203+
void jl_unlock_stackwalk(int lockret) JL_NOTSAFEPOINT JL_NOTSAFEPOINT_LEAVE;
202204

203205
// number of cycles since power-on
204206
static inline uint64_t cycleclock(void) JL_NOTSAFEPOINT
@@ -1181,6 +1183,9 @@ void jl_print_bt_entry_codeloc(int sig, jl_bt_element_t *bt_data) JL_NOTSAFEPOIN
11811183
#ifdef _OS_WINDOWS_
11821184
JL_DLLEXPORT void jl_refresh_dbg_module_list(void);
11831185
#endif
1186+
int jl_thread_suspend_and_get_state(int tid, int timeout, bt_context_t *ctx) JL_NOTSAFEPOINT;
1187+
void jl_thread_resume(int tid) JL_NOTSAFEPOINT;
1188+
11841189
// *to is NULL or malloc'd pointer, from is allowed to be NULL
11851190
STATIC_INLINE char *jl_copy_str(char **to, const char *from) JL_NOTSAFEPOINT
11861191
{

src/julia_threads.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,7 @@ typedef struct {
107107

108108
// handle to reference an OS thread
109109
#ifdef _OS_WINDOWS_
110-
typedef DWORD jl_thread_t;
110+
typedef HANDLE jl_thread_t;
111111
#else
112112
typedef pthread_t jl_thread_t;
113113
#endif
@@ -138,10 +138,10 @@ typedef struct {
138138

139139
typedef struct {
140140
// variable for tracking weak references
141-
arraylist_t weak_refs;
141+
small_arraylist_t weak_refs;
142142
// live tasks started on this thread
143143
// that are holding onto a stack from the pool
144-
arraylist_t live_tasks;
144+
small_arraylist_t live_tasks;
145145

146146
// variables for tracking malloc'd arrays
147147
struct _mallocarray_t *mallocarrays;
@@ -169,7 +169,7 @@ typedef struct {
169169
jl_gc_pool_t norm_pools[JL_GC_N_POOLS];
170170

171171
#define JL_N_STACK_POOLS 16
172-
arraylist_t free_stacks[JL_N_STACK_POOLS];
172+
small_arraylist_t free_stacks[JL_N_STACK_POOLS];
173173
} jl_thread_heap_t;
174174

175175
typedef struct {

src/mtarraylist.c

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
// This file is a part of Julia. License is MIT: https://julialang.org/license
2+
3+
#include "julia.h"
4+
#include "julia_internal.h"
5+
#include "julia_assert.h"
6+
7+
#ifdef __cplusplus
8+
extern "C" {
9+
#endif
10+
11+
// this file provides some alternate API functions for small_arraylist (push and add)
12+
// which can be safely observed from other threads concurrently
13+
// there is only permitted to be a single writer thread (or a mutex)
14+
// but there can be any number of observers
15+
16+
typedef struct {
17+
_Atomic(uint32_t) len;
18+
uint32_t max;
19+
_Atomic(_Atomic(void*)*) items;
20+
_Atomic(void*) _space[SMALL_AL_N_INLINE];
21+
} small_mtarraylist_t;
22+
23+
// change capacity to at least newlen
24+
static void mtarraylist_resizeto(small_mtarraylist_t *a, size_t len, size_t newlen) JL_NOTSAFEPOINT
25+
{
26+
size_t max = a->max;
27+
if (newlen > max) {
28+
size_t nm = max * 2;
29+
if (nm == 0)
30+
nm = 1;
31+
while (newlen > nm)
32+
nm *= 2;
33+
void *olditems = (void*)jl_atomic_load_relaxed(&a->items);
34+
void *p = calloc_s(nm * sizeof(void*));
35+
memcpy(p, olditems, len * sizeof(void*));
36+
jl_atomic_store_release(&a->items, (_Atomic(void*)*)p);
37+
a->max = nm;
38+
if (olditems != (void*)&a->_space[0]) {
39+
jl_task_t *ct = jl_current_task;
40+
jl_gc_add_quiescent(ct->ptls, (void**)olditems, free);
41+
}
42+
}
43+
}
44+
45+
// single-threaded
46+
void mtarraylist_push(small_arraylist_t *_a, void *elt)
47+
{
48+
small_mtarraylist_t *a = (small_mtarraylist_t*)_a;
49+
size_t len = jl_atomic_load_relaxed(&a->len);
50+
mtarraylist_resizeto(a, len, len + 1);
51+
jl_atomic_store_release(&jl_atomic_load_relaxed(&a->items)[len], elt);
52+
jl_atomic_store_release(&a->len, len + 1);
53+
}
54+
55+
// single-threaded
56+
void mtarraylist_add(small_arraylist_t *_a, void *elt, size_t idx)
57+
{
58+
small_mtarraylist_t *a = (small_mtarraylist_t*)_a;
59+
size_t len = jl_atomic_load_relaxed(&a->len);
60+
mtarraylist_resizeto(a, len, idx + 1);
61+
jl_atomic_store_release(&jl_atomic_load_relaxed(&a->items)[idx], elt);
62+
if (jl_atomic_load_relaxed(&a->len) < idx + 1)
63+
jl_atomic_store_release(&a->len, idx + 1);
64+
}
65+
66+
// concurrent-safe
67+
size_t mtarraylist_length(small_arraylist_t *_a)
68+
{
69+
small_mtarraylist_t *a = (small_mtarraylist_t*)_a;
70+
return jl_atomic_load_relaxed(&a->len);
71+
}
72+
73+
// concurrent-safe
74+
void *mtarraylist_get(small_arraylist_t *_a, size_t idx)
75+
{
76+
small_mtarraylist_t *a = (small_mtarraylist_t*)_a;
77+
size_t len = jl_atomic_load_acquire(&a->len);
78+
if (idx >= len)
79+
return NULL;
80+
return jl_atomic_load_relaxed(&jl_atomic_load_relaxed(&a->items)[idx]);
81+
}

src/signals-mach.c

Lines changed: 18 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -377,12 +377,12 @@ static void attach_exception_port(thread_port_t thread, int segv_only)
377377
HANDLE_MACH_ERROR("thread_set_exception_ports", ret);
378378
}
379379

380-
static int jl_thread_suspend_and_get_state2(int tid, host_thread_state_t *ctx)
380+
static int jl_thread_suspend_and_get_state2(int tid, host_thread_state_t *ctx) JL_NOTSAFEPOINT
381381
{
382382
jl_ptls_t ptls2 = jl_atomic_load_relaxed(&jl_all_tls_states)[tid];
383383
if (ptls2 == NULL) // this thread is not alive
384384
return 0;
385-
jl_task_t *ct2 = ptls2 ? jl_atomic_load_relaxed(&ptls2->current_task) : NULL;
385+
jl_task_t *ct2 = jl_atomic_load_relaxed(&ptls2->current_task);
386386
if (ct2 == NULL) // this thread is already dead
387387
return 0;
388388

@@ -400,18 +400,18 @@ static int jl_thread_suspend_and_get_state2(int tid, host_thread_state_t *ctx)
400400
return 1;
401401
}
402402

403-
static void jl_thread_suspend_and_get_state(int tid, int timeout, unw_context_t **ctx)
403+
int jl_thread_suspend_and_get_state(int tid, int timeout, bt_context_t *ctx)
404404
{
405405
(void)timeout;
406-
static host_thread_state_t state;
406+
host_thread_state_t state;
407407
if (!jl_thread_suspend_and_get_state2(tid, &state)) {
408-
*ctx = NULL;
409-
return;
408+
return 0;
410409
}
411-
*ctx = (unw_context_t*)&state;
410+
*ctx = *(unw_context_t*)&state;
411+
return 1;
412412
}
413413

414-
static void jl_thread_resume(int tid, int sig)
414+
void jl_thread_resume(int tid)
415415
{
416416
jl_ptls_t ptls2 = jl_atomic_load_relaxed(&jl_all_tls_states)[tid];
417417
mach_port_t thread = pthread_mach_thread_np(ptls2->system_id);
@@ -579,8 +579,15 @@ static void jl_unlock_profile_mach(int dlsymlock, int keymgr_locked)
579579
jl_unlock_profile();
580580
}
581581

582-
#define jl_lock_profile() int keymgr_locked = jl_lock_profile_mach(1)
583-
#define jl_unlock_profile() jl_unlock_profile_mach(1, keymgr_locked)
582+
int jl_lock_stackwalk(void)
583+
{
584+
return jl_lock_profile_mach(1);
585+
}
586+
587+
void jl_unlock_stackwalk(int lockret)
588+
{
589+
jl_unlock_profile_mach(1, lockret);
590+
}
584591

585592
void *mach_profile_listener(void *arg)
586593
{
@@ -673,7 +680,7 @@ void *mach_profile_listener(void *arg)
673680
bt_data_prof[bt_size_cur++].uintptr = 0;
674681
}
675682
// We're done! Resume the thread.
676-
jl_thread_resume(i, 0);
683+
jl_thread_resume(i);
677684
}
678685
jl_unlock_profile_mach(0, keymgr_locked);
679686
if (running) {

0 commit comments

Comments
 (0)