Skip to content

Commit 082d7d2

Browse files
committed
allow tasks to request dedicated stack space when created
never copy over the root stack: this is a hybrid approach to COPY_STACK where the root task is never moved or copied, and all other task stacks are layered into the same memory area (ptls->basestack + basesize) several strategies exist for making new stacks: ucontext_t (where it is available, aka linux) unw_context_t (as an alternative to ucontext_t that avoids a syscall on task-switch) makecontext (as a posix standard implemention) setjmp/longjmp-based implementation (for systems where this is sufficient) Windows Fibers (implemented here, since we can be more efficient and reliable than the official Fibers API) also, uses an alternate stack for use in collecting stack-overflow backtraces like posix, but managed manually
1 parent d28a7d5 commit 082d7d2

26 files changed

+1259
-680
lines changed

base/boot.jl

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -374,7 +374,9 @@ eval(Core, :(LineInfoNode(mod::Module, method::Symbol, file::Symbol, line::Int,
374374

375375
Module(name::Symbol=:anonymous, std_imports::Bool=true) = ccall(:jl_f_new_module, Ref{Module}, (Any, Bool), name, std_imports)
376376

377-
Task(@nospecialize(f)) = ccall(:jl_new_task, Ref{Task}, (Any, Int), f, 0)
377+
function Task(@nospecialize(f), reserved_stack::Int=0)
378+
return ccall(:jl_new_task, Ref{Task}, (Any, Int), f, reserved_stack)
379+
end
378380

379381
# simple convert for use by constructors of types in Core
380382
# note that there is no actual conversion defined here,

src/Makefile

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ SRCS := \
4242
jltypes gf typemap ast builtins module interpreter symbol \
4343
dlload sys init task array dump staticdata toplevel jl_uv datatype \
4444
simplevector APInt-C runtime_intrinsics runtime_ccall precompile \
45-
threadgroup threading stackwalk gc gc-debug gc-pages method \
45+
threadgroup threading stackwalk gc gc-debug gc-pages gc-stacks method \
4646
jlapi signal-handling safepoint jloptions timing subtype rtutils \
4747
crc32c processor
4848

@@ -207,14 +207,14 @@ $(addprefix $(BUILDDIR)/,threading.o threading.dbg.obj gc.o gc.dbg.obj init.c in
207207
$(addprefix $(BUILDDIR)/,APInt-C.o APInt-C.dbg.obj runtime_intrinsics.o runtime_intrinsics.dbg.obj): $(SRCDIR)/APInt-C.h
208208

209209
# archive library file rules
210-
$(BUILDDIR)/support/libsupport.a: $(SRCDIR)/support/*.h $(SRCDIR)/support/*.c
210+
$(BUILDDIR)/support/libsupport.a: $(addprefix $(SRCDIR)/support/,*.h *.c *.S) $(SRCDIR)/support/*.c
211211
$(MAKE) -C $(SRCDIR)/support BUILDDIR='$(abspath $(BUILDDIR)/support)'
212212

213-
$(BUILDDIR)/support/libsupport-debug.a: $(SRCDIR)/support/*.h $(SRCDIR)/support/*.c
213+
$(BUILDDIR)/support/libsupport-debug.a: $(addprefix $(SRCDIR)/support/,*.h *.c *.S) $(SRCDIR)/support/*.c
214214
$(MAKE) -C $(SRCDIR)/support debug BUILDDIR='$(abspath $(BUILDDIR)/support)'
215215

216216
$(FLISP_EXECUTABLE_release): $(BUILDDIR)/flisp/libflisp.a
217-
$(BUILDDIR)/flisp/libflisp.a: $(addprefix $(SRCDIR)/,flisp/*.h flisp/*.c) $(BUILDDIR)/support/libsupport.a
217+
$(BUILDDIR)/flisp/libflisp.a: $(addprefix $(SRCDIR)/flisp/,*.h *.c) $(BUILDDIR)/support/libsupport.a
218218
$(MAKE) -C $(SRCDIR)/flisp BUILDDIR='$(abspath $(BUILDDIR)/flisp)'
219219

220220
$(FLISP_EXECUTABLE_debug): $(BUILDDIR)/flisp/libflisp-debug.a

src/gc-debug.c

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -595,11 +595,11 @@ static void gc_scrub_task(jl_task_t *ta)
595595
#else
596596
jl_task_t *thread_task = ptls2->root_task;
597597
#endif
598-
if (ta == thread_task)
599-
gc_scrub_range(ptls2->stack_lo, ptls2->stack_hi);
600-
if (ta->stkbuf == (void*)(intptr_t)(-1) || !ta->stkbuf)
601-
return;
602-
gc_scrub_range((char*)ta->stkbuf, (char*)ta->stkbuf + ta->ssize);
598+
void *stkbuf = ta->stkbuf;
599+
if (ta == thread_task && ptls->copy_stack)
600+
gc_scrub_range(ptls2->stackbase, ptls2->stacksize);
601+
else if (stkbuf)
602+
gc_scrub_range((char*)stkbuf, (char*)stkbuf + ta->bufsz);
603603
}
604604

605605
void gc_scrub(void)

src/gc-pages.c

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
#ifndef _OS_WINDOWS_
55
# include <sys/resource.h>
66
#endif
7-
#include "julia_assert.h"
87

98
#ifdef __cplusplus
109
extern "C" {

src/gc-stacks.c

Lines changed: 177 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,177 @@
1+
// This file is a part of Julia. License is MIT: https://julialang.org/license
2+
3+
#include "gc.h"
4+
#ifndef _OS_WINDOWS_
5+
# include <sys/resource.h>
6+
#endif
7+
8+
const size_t jl_guard_size = (4096 * 16);
9+
10+
#ifdef _OS_WINDOWS_
11+
#define MAP_FAILED NULL
12+
static void *malloc_stack(size_t bufsz)
13+
{
14+
void *stk = VirtualAlloc(NULL, bufsz, MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE);
15+
if (stk == NULL)
16+
return MAP_FAILED;
17+
DWORD dwOldProtect;
18+
if (!VirtualProtect(stk, jl_guard_size, PAGE_READWRITE | PAGE_GUARD, &dwOldProtect)) {
19+
VirtualFree(stk, 0, MEM_RELEASE);
20+
return MAP_FAILED;
21+
}
22+
return stk;
23+
}
24+
25+
26+
static void free_stack(void *stkbuf, size_t bufsz)
27+
{
28+
VirtualFree(stkbuf, 0, MEM_RELEASE);
29+
}
30+
31+
#else
32+
33+
static void *malloc_stack(size_t bufsz)
34+
{
35+
void* stk = mmap(0, bufsz, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
36+
if (stk == MAP_FAILED)
37+
return MAP_FAILED;
38+
#if !defined(JL_HAVE_UCONTEXT) && !defined(JL_HAVE_SIGALTSTACK)
39+
// setup a guard page to detect stack overflow
40+
if (mprotect(stk, jl_guard_size, PROT_NONE) == -1) {
41+
munmap(stk, bufsz);
42+
return MAP_FAILED;
43+
}
44+
#endif
45+
return stk;
46+
}
47+
48+
static void free_stack(void *stkbuf, size_t bufsz)
49+
{
50+
munmap(stkbuf, bufsz);
51+
}
52+
#endif
53+
54+
55+
const unsigned pool_sizes[] = {
56+
128 * 1024,
57+
192 * 1024,
58+
256 * 1024,
59+
384 * 1024,
60+
512 * 1024,
61+
768 * 1024,
62+
1024 * 1024,
63+
1537 * 1024,
64+
2048 * 1024,
65+
3 * 1024 * 1024,
66+
4 * 1024 * 1024,
67+
6 * 1024 * 1024,
68+
8 * 1024 * 1024,
69+
12 * 1024 * 1024,
70+
16 * 1024 * 1024,
71+
24 * 1024 * 1024,
72+
};
73+
74+
static_assert(sizeof(pool_sizes) == JL_N_STACK_POOLS * sizeof(pool_sizes[0]), "JL_N_STACK_POOLS size mismatch");
75+
76+
static unsigned select_pool(size_t nb)
77+
{
78+
unsigned pool_id = 0;
79+
while (pool_sizes[pool_id] < nb)
80+
pool_id++;
81+
return pool_id;
82+
}
83+
84+
85+
static void _jl_free_stack(jl_ptls_t ptls, void *stkbuf, size_t bufsz)
86+
{
87+
if (bufsz <= pool_sizes[JL_N_STACK_POOLS - 1]) {
88+
unsigned pool_id = select_pool(bufsz);
89+
if (pool_sizes[pool_id] == bufsz) {
90+
arraylist_push(&ptls->heap.free_stacks[pool_id], stkbuf);
91+
return;
92+
}
93+
}
94+
free_stack(stkbuf, bufsz);
95+
}
96+
97+
98+
JL_DLLEXPORT void jl_free_stack(void *stkbuf, size_t bufsz)
99+
{
100+
_jl_free_stack(jl_get_ptls_states(), stkbuf, bufsz);
101+
}
102+
103+
104+
JL_DLLEXPORT void *jl_malloc_stack(size_t *bufsz, jl_task_t *owner)
105+
{
106+
jl_ptls_t ptls = jl_get_ptls_states();
107+
size_t ssize = *bufsz;
108+
void *stk = NULL;
109+
if (ssize <= pool_sizes[JL_N_STACK_POOLS - 1]) {
110+
unsigned pool_id = select_pool(ssize);
111+
ssize = pool_sizes[pool_id];
112+
arraylist_t *pool = &ptls->heap.free_stacks[pool_id];
113+
if (pool->len > 0) {
114+
stk = arraylist_pop(pool);
115+
}
116+
}
117+
else {
118+
ssize = LLT_ALIGN(ssize, jl_page_size);
119+
}
120+
if (stk == NULL) {
121+
// TODO: allocate blocks of stacks? but need to mprotect individually anyways
122+
stk = malloc_stack(ssize);
123+
if (stk == MAP_FAILED)
124+
jl_throw(jl_memory_exception);
125+
}
126+
*bufsz = ssize;
127+
if (owner) {
128+
arraylist_t *live_tasks = &ptls->heap.live_tasks;
129+
arraylist_push(live_tasks, owner);
130+
}
131+
return stk;
132+
}
133+
134+
void sweep_stack_pools(void)
135+
{
136+
// TODO: deallocate stacks if we have too many sitting around unused
137+
// for (stk in halfof(free_stacks))
138+
// free_stack(stk, pool_sz);
139+
// // then sweep the task stacks
140+
// for (t in live_tasks)
141+
// if (!gc-marked(t))
142+
// stkbuf = t->stkbuf
143+
// bufsz = t->bufsz
144+
// if (stkbuf)
145+
// push(free_stacks[sz], stkbuf)
146+
for (int i = 0; i < jl_n_threads; i++) {
147+
jl_ptls_t ptls2 = jl_all_tls_states[i];
148+
arraylist_t *live_tasks = &ptls2->heap.live_tasks;
149+
size_t n = 0;
150+
size_t ndel = 0;
151+
size_t l = live_tasks->len;
152+
void **lst = live_tasks->items;
153+
if (l == 0)
154+
continue;
155+
while (1) {
156+
jl_task_t *t = (jl_task_t*)lst[n];
157+
if (gc_marked(jl_astaggedvalue(t)->bits.gc)) {
158+
n++;
159+
}
160+
else {
161+
ndel++;
162+
void *stkbuf = t->stkbuf;
163+
size_t bufsz = t->bufsz;
164+
if (stkbuf) {
165+
t->stkbuf = NULL;
166+
_jl_free_stack(ptls2, stkbuf, bufsz);
167+
}
168+
}
169+
if (n >= l - ndel)
170+
break;
171+
void *tmp = lst[n];
172+
lst[n] = lst[n + ndel];
173+
lst[n + ndel] = tmp;
174+
}
175+
live_tasks->len -= ndel;
176+
}
177+
}

src/gc.c

Lines changed: 15 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -699,7 +699,7 @@ JL_DLLEXPORT jl_weakref_t *jl_gc_new_weakref_th(jl_ptls_t ptls,
699699

700700
static void sweep_weak_refs(void)
701701
{
702-
for (int i = 0;i < jl_n_threads;i++) {
702+
for (int i = 0; i < jl_n_threads; i++) {
703703
jl_ptls_t ptls2 = jl_all_tls_states[i];
704704
size_t n = 0;
705705
size_t ndel = 0;
@@ -710,7 +710,8 @@ static void sweep_weak_refs(void)
710710
while (1) {
711711
jl_weakref_t *wr = (jl_weakref_t*)lst[n];
712712
if (gc_marked(jl_astaggedvalue(wr)->bits.gc)) {
713-
// weakref itself is alive
713+
// weakref itself is alive,
714+
// so the user could still re-set it to a new value
714715
if (!gc_marked(jl_astaggedvalue(wr->value)->bits.gc))
715716
wr->value = (jl_value_t*)jl_nothing;
716717
n++;
@@ -722,7 +723,7 @@ static void sweep_weak_refs(void)
722723
break;
723724
void *tmp = lst[n];
724725
lst[n] = lst[n + ndel];
725-
lst[n+ndel] = tmp;
726+
lst[n + ndel] = tmp;
726727
}
727728
ptls2->heap.weak_refs.len -= ndel;
728729
}
@@ -1026,7 +1027,7 @@ static jl_taggedvalue_t **sweep_page(jl_gc_pool_t *p, jl_gc_pagemeta_t *pg, jl_t
10261027
int freedall = 1;
10271028
int pg_skpd = 1;
10281029
if (!pg->has_marked) {
1029-
// lazy version: (empty) if the whole page was already unused, free it
1030+
// lazy version: (empty) if the whole page was already unused, free it (return it to the pool)
10301031
// eager version: (freedall) free page as soon as possible
10311032
// the eager one uses less memory.
10321033
// FIXME - need to do accounting on a per-thread basis
@@ -2124,19 +2125,13 @@ mark: {
21242125
objprofile_count(vt, bits == GC_OLD_MARKED, sizeof(jl_task_t));
21252126
jl_task_t *ta = (jl_task_t*)new_obj;
21262127
gc_scrub_record_task(ta);
2127-
int stkbuf = (ta->stkbuf != (void*)(intptr_t)-1 && ta->stkbuf != NULL);
2128+
void *stkbuf = ta->stkbuf;
21282129
int16_t tid = ta->tid;
21292130
jl_ptls_t ptls2 = jl_all_tls_states[tid];
2130-
if (stkbuf) {
21312131
#ifdef COPY_STACKS
2132-
gc_setmark_buf_(ptls, ta->stkbuf, bits, ta->bufsz);
2133-
#else
2134-
// stkbuf isn't owned by julia for the root task
2135-
if (ta != ptls2->root_task) {
2136-
gc_setmark_buf_(ptls, ta->stkbuf, bits, ta->ssize);
2137-
}
2132+
if (stkbuf && ta->copy_stack)
2133+
gc_setmark_buf_(ptls, stkbuf, bits, ta->bufsz);
21382134
#endif
2139-
}
21402135
jl_gcframe_t *s = NULL;
21412136
size_t nroots;
21422137
uintptr_t offset = 0;
@@ -2148,9 +2143,11 @@ mark: {
21482143
else if (stkbuf) {
21492144
s = ta->gcstack;
21502145
#ifdef COPY_STACKS
2151-
ub = (uintptr_t)ptls2->stackbase;
2152-
lb = ub - ta->ssize;
2153-
offset = (uintptr_t)ta->stkbuf - lb;
2146+
if (ta->copy_stack) {
2147+
ub = (uintptr_t)ptls2->stackbase;
2148+
lb = ub - ta->copy_stack;
2149+
offset = (uintptr_t)stkbuf - lb;
2150+
}
21542151
#endif
21552152
}
21562153
if (s) {
@@ -2278,10 +2275,6 @@ static void mark_roots(jl_gc_mark_cache_t *gc_cache, gc_mark_sp_t *sp)
22782275
if (jl_all_methods != NULL)
22792276
gc_mark_queue_obj(gc_cache, sp, jl_all_methods);
22802277

2281-
#ifndef COPY_STACKS
2282-
gc_mark_queue_obj(gc_cache, sp, jl_unprotect_stack_func);
2283-
#endif
2284-
22852278
// constants
22862279
gc_mark_queue_obj(gc_cache, sp, jl_typetype_type);
22872280
gc_mark_queue_obj(gc_cache, sp, jl_emptytuple_type);
@@ -2564,6 +2557,7 @@ static int _jl_gc_collect(jl_ptls_t ptls, int full)
25642557
scanned_bytes = 0;
25652558
// 5. start sweeping
25662559
sweep_weak_refs();
2560+
sweep_stack_pools();
25672561
gc_sweep_other(ptls, sweep_full);
25682562
gc_scrub();
25692563
gc_verify_tags();
@@ -2687,6 +2681,7 @@ void jl_init_thread_heap(jl_ptls_t ptls)
26872681
p[i].newpages = NULL;
26882682
}
26892683
arraylist_new(&heap->weak_refs, 0);
2684+
arraylist_new(&heap->live_tasks, 0);
26902685
heap->mallocarrays = NULL;
26912686
heap->mafreelist = NULL;
26922687
heap->big_objects = NULL;

src/gc.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -495,6 +495,7 @@ void gc_mark_queue_all_roots(jl_ptls_t ptls, gc_mark_sp_t *sp);
495495
void gc_mark_queue_finlist(jl_gc_mark_cache_t *gc_cache, gc_mark_sp_t *sp,
496496
arraylist_t *list, size_t start);
497497
void gc_mark_loop(jl_ptls_t ptls, gc_mark_sp_t sp);
498+
void sweep_stack_pools(void);
498499
void gc_debug_init(void);
499500

500501
extern void *gc_mark_label_addrs[_GC_MARK_L_MAX];

0 commit comments

Comments
 (0)