Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

no-copy stacks #13099

Merged
merged 2 commits into from
Oct 2, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion base/boot.jl
Original file line number Diff line number Diff line change
Expand Up @@ -374,7 +374,9 @@ eval(Core, :(LineInfoNode(mod::Module, method::Symbol, file::Symbol, line::Int,

Module(name::Symbol=:anonymous, std_imports::Bool=true) = ccall(:jl_f_new_module, Ref{Module}, (Any, Bool), name, std_imports)

Task(@nospecialize(f)) = ccall(:jl_new_task, Ref{Task}, (Any, Int), f, 0)
function Task(@nospecialize(f), reserved_stack::Int=0)
return ccall(:jl_new_task, Ref{Task}, (Any, Int), f, reserved_stack)
end

# simple convert for use by constructors of types in Core
# note that there is no actual conversion defined here,
Expand Down
8 changes: 4 additions & 4 deletions src/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ SRCS := \
jltypes gf typemap ast builtins module interpreter symbol \
dlload sys init task array dump staticdata toplevel jl_uv datatype \
simplevector APInt-C runtime_intrinsics runtime_ccall precompile \
threadgroup threading stackwalk gc gc-debug gc-pages method \
threadgroup threading stackwalk gc gc-debug gc-pages gc-stacks method \
jlapi signal-handling safepoint jloptions timing subtype rtutils \
crc32c processor

Expand Down Expand Up @@ -207,14 +207,14 @@ $(addprefix $(BUILDDIR)/,threading.o threading.dbg.obj gc.o gc.dbg.obj init.c in
$(addprefix $(BUILDDIR)/,APInt-C.o APInt-C.dbg.obj runtime_intrinsics.o runtime_intrinsics.dbg.obj): $(SRCDIR)/APInt-C.h

# archive library file rules
$(BUILDDIR)/support/libsupport.a: $(SRCDIR)/support/*.h $(SRCDIR)/support/*.c
$(BUILDDIR)/support/libsupport.a: $(addprefix $(SRCDIR)/support/,*.h *.c *.S) $(SRCDIR)/support/*.c
$(MAKE) -C $(SRCDIR)/support BUILDDIR='$(abspath $(BUILDDIR)/support)'

$(BUILDDIR)/support/libsupport-debug.a: $(SRCDIR)/support/*.h $(SRCDIR)/support/*.c
$(BUILDDIR)/support/libsupport-debug.a: $(addprefix $(SRCDIR)/support/,*.h *.c *.S) $(SRCDIR)/support/*.c
$(MAKE) -C $(SRCDIR)/support debug BUILDDIR='$(abspath $(BUILDDIR)/support)'

$(FLISP_EXECUTABLE_release): $(BUILDDIR)/flisp/libflisp.a
$(BUILDDIR)/flisp/libflisp.a: $(addprefix $(SRCDIR)/,flisp/*.h flisp/*.c) $(BUILDDIR)/support/libsupport.a
$(BUILDDIR)/flisp/libflisp.a: $(addprefix $(SRCDIR)/flisp/,*.h *.c) $(BUILDDIR)/support/libsupport.a
$(MAKE) -C $(SRCDIR)/flisp BUILDDIR='$(abspath $(BUILDDIR)/flisp)'

$(FLISP_EXECUTABLE_debug): $(BUILDDIR)/flisp/libflisp-debug.a
Expand Down
10 changes: 5 additions & 5 deletions src/gc-debug.c
Original file line number Diff line number Diff line change
Expand Up @@ -595,11 +595,11 @@ static void gc_scrub_task(jl_task_t *ta)
#else
jl_task_t *thread_task = ptls2->root_task;
#endif
if (ta == thread_task)
gc_scrub_range(ptls2->stack_lo, ptls2->stack_hi);
if (ta->stkbuf == (void*)(intptr_t)(-1) || !ta->stkbuf)
return;
gc_scrub_range((char*)ta->stkbuf, (char*)ta->stkbuf + ta->ssize);
void *stkbuf = ta->stkbuf;
if (ta == thread_task && ptls->copy_stack)
gc_scrub_range(ptls2->stackbase, ptls2->stacksize);
else if (stkbuf)
gc_scrub_range((char*)stkbuf, (char*)stkbuf + ta->bufsz);
}

void gc_scrub(void)
Expand Down
1 change: 0 additions & 1 deletion src/gc-pages.c
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
#ifndef _OS_WINDOWS_
# include <sys/resource.h>
#endif
#include "julia_assert.h"

#ifdef __cplusplus
extern "C" {
Expand Down
191 changes: 191 additions & 0 deletions src/gc-stacks.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,191 @@
// This file is a part of Julia. License is MIT: https://julialang.org/license

#include "gc.h"
#ifndef _OS_WINDOWS_
# include <sys/resource.h>
#endif

const size_t jl_guard_size = (4096 * 16);

#ifdef _OS_WINDOWS_
#define MAP_FAILED NULL
static void *malloc_stack(size_t bufsz)
{
void *stk = VirtualAlloc(NULL, bufsz, MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE);
if (stk == NULL)
return MAP_FAILED;
DWORD dwOldProtect;
if (!VirtualProtect(stk, jl_guard_size, PAGE_READWRITE | PAGE_GUARD, &dwOldProtect)) {
VirtualFree(stk, 0, MEM_RELEASE);
return MAP_FAILED;
}
return stk;
}


static void free_stack(void *stkbuf, size_t bufsz)
{
VirtualFree(stkbuf, 0, MEM_RELEASE);
}

#else

static void *malloc_stack(size_t bufsz)
{
void* stk = mmap(0, bufsz, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
if (stk == MAP_FAILED)
return MAP_FAILED;
#if !defined(JL_HAVE_UCONTEXT) && !defined(JL_HAVE_SIGALTSTACK)
// setup a guard page to detect stack overflow
if (mprotect(stk, jl_guard_size, PROT_NONE) == -1) {
munmap(stk, bufsz);
return MAP_FAILED;
}
#endif
return stk;
}

static void free_stack(void *stkbuf, size_t bufsz)
{
munmap(stkbuf, bufsz);
}
#endif


const unsigned pool_sizes[] = {
128 * 1024,
192 * 1024,
256 * 1024,
384 * 1024,
512 * 1024,
768 * 1024,
1024 * 1024,
1537 * 1024,
2048 * 1024,
3 * 1024 * 1024,
4 * 1024 * 1024,
6 * 1024 * 1024,
8 * 1024 * 1024,
12 * 1024 * 1024,
16 * 1024 * 1024,
24 * 1024 * 1024,
};

static_assert(sizeof(pool_sizes) == JL_N_STACK_POOLS * sizeof(pool_sizes[0]), "JL_N_STACK_POOLS size mismatch");

static unsigned select_pool(size_t nb)
{
unsigned pool_id = 0;
while (pool_sizes[pool_id] < nb)
pool_id++;
return pool_id;
}


static void _jl_free_stack(jl_ptls_t ptls, void *stkbuf, size_t bufsz)
{
if (bufsz <= pool_sizes[JL_N_STACK_POOLS - 1]) {
unsigned pool_id = select_pool(bufsz);
if (pool_sizes[pool_id] == bufsz) {
arraylist_push(&ptls->heap.free_stacks[pool_id], stkbuf);
return;
}
}
free_stack(stkbuf, bufsz);
}


JL_DLLEXPORT void jl_free_stack(void *stkbuf, size_t bufsz)
{
_jl_free_stack(jl_get_ptls_states(), stkbuf, bufsz);
}


void jl_release_task_stack(jl_ptls_t ptls, jl_task_t *task)
{
void *stkbuf = task->stkbuf;
size_t bufsz = task->bufsz;
if (bufsz <= pool_sizes[JL_N_STACK_POOLS - 1]) {
unsigned pool_id = select_pool(bufsz);
if (pool_sizes[pool_id] == bufsz) {
task->stkbuf = NULL;
arraylist_push(&ptls->heap.free_stacks[pool_id], stkbuf);
}
}
}


JL_DLLEXPORT void *jl_malloc_stack(size_t *bufsz, jl_task_t *owner)
{
jl_ptls_t ptls = jl_get_ptls_states();
size_t ssize = *bufsz;
void *stk = NULL;
if (ssize <= pool_sizes[JL_N_STACK_POOLS - 1]) {
unsigned pool_id = select_pool(ssize);
ssize = pool_sizes[pool_id];
arraylist_t *pool = &ptls->heap.free_stacks[pool_id];
if (pool->len > 0) {
stk = arraylist_pop(pool);
}
}
else {
ssize = LLT_ALIGN(ssize, jl_page_size);
}
if (stk == NULL) {
// TODO: allocate blocks of stacks? but need to mprotect individually anyways
stk = malloc_stack(ssize);
if (stk == MAP_FAILED)
jl_throw(jl_memory_exception);
}
*bufsz = ssize;
if (owner) {
arraylist_t *live_tasks = &ptls->heap.live_tasks;
arraylist_push(live_tasks, owner);
}
return stk;
}

void sweep_stack_pools(void)
{
// TODO: deallocate stacks if we have too many sitting around unused
// for (stk in halfof(free_stacks))
// free_stack(stk, pool_sz);
// // then sweep the task stacks
// for (t in live_tasks)
// if (!gc-marked(t))
// stkbuf = t->stkbuf
// bufsz = t->bufsz
// if (stkbuf)
// push(free_stacks[sz], stkbuf)
for (int i = 0; i < jl_n_threads; i++) {
jl_ptls_t ptls2 = jl_all_tls_states[i];
arraylist_t *live_tasks = &ptls2->heap.live_tasks;
size_t n = 0;
size_t ndel = 0;
size_t l = live_tasks->len;
void **lst = live_tasks->items;
if (l == 0)
continue;
while (1) {
jl_task_t *t = (jl_task_t*)lst[n];
if (gc_marked(jl_astaggedvalue(t)->bits.gc)) {
n++;
}
else {
ndel++;
void *stkbuf = t->stkbuf;
size_t bufsz = t->bufsz;
if (stkbuf) {
t->stkbuf = NULL;
_jl_free_stack(ptls2, stkbuf, bufsz);
}
}
if (n >= l - ndel)
break;
void *tmp = lst[n];
lst[n] = lst[n + ndel];
lst[n + ndel] = tmp;
}
live_tasks->len -= ndel;
}
}
35 changes: 15 additions & 20 deletions src/gc.c
Original file line number Diff line number Diff line change
Expand Up @@ -699,7 +699,7 @@ JL_DLLEXPORT jl_weakref_t *jl_gc_new_weakref_th(jl_ptls_t ptls,

static void sweep_weak_refs(void)
{
for (int i = 0;i < jl_n_threads;i++) {
for (int i = 0; i < jl_n_threads; i++) {
jl_ptls_t ptls2 = jl_all_tls_states[i];
size_t n = 0;
size_t ndel = 0;
Expand All @@ -710,7 +710,8 @@ static void sweep_weak_refs(void)
while (1) {
jl_weakref_t *wr = (jl_weakref_t*)lst[n];
if (gc_marked(jl_astaggedvalue(wr)->bits.gc)) {
// weakref itself is alive
// weakref itself is alive,
// so the user could still re-set it to a new value
if (!gc_marked(jl_astaggedvalue(wr->value)->bits.gc))
wr->value = (jl_value_t*)jl_nothing;
n++;
Expand All @@ -722,7 +723,7 @@ static void sweep_weak_refs(void)
break;
void *tmp = lst[n];
lst[n] = lst[n + ndel];
lst[n+ndel] = tmp;
lst[n + ndel] = tmp;
}
ptls2->heap.weak_refs.len -= ndel;
}
Expand Down Expand Up @@ -1026,7 +1027,7 @@ static jl_taggedvalue_t **sweep_page(jl_gc_pool_t *p, jl_gc_pagemeta_t *pg, jl_t
int freedall = 1;
int pg_skpd = 1;
if (!pg->has_marked) {
// lazy version: (empty) if the whole page was already unused, free it
// lazy version: (empty) if the whole page was already unused, free it (return it to the pool)
// eager version: (freedall) free page as soon as possible
// the eager one uses less memory.
// FIXME - need to do accounting on a per-thread basis
Expand Down Expand Up @@ -2124,19 +2125,13 @@ mark: {
objprofile_count(vt, bits == GC_OLD_MARKED, sizeof(jl_task_t));
jl_task_t *ta = (jl_task_t*)new_obj;
gc_scrub_record_task(ta);
int stkbuf = (ta->stkbuf != (void*)(intptr_t)-1 && ta->stkbuf != NULL);
void *stkbuf = ta->stkbuf;
int16_t tid = ta->tid;
jl_ptls_t ptls2 = jl_all_tls_states[tid];
if (stkbuf) {
#ifdef COPY_STACKS
gc_setmark_buf_(ptls, ta->stkbuf, bits, ta->bufsz);
#else
// stkbuf isn't owned by julia for the root task
if (ta != ptls2->root_task) {
gc_setmark_buf_(ptls, ta->stkbuf, bits, ta->ssize);
}
if (stkbuf && ta->copy_stack)
gc_setmark_buf_(ptls, stkbuf, bits, ta->bufsz);
#endif
}
jl_gcframe_t *s = NULL;
size_t nroots;
uintptr_t offset = 0;
Expand All @@ -2148,9 +2143,11 @@ mark: {
else if (stkbuf) {
s = ta->gcstack;
#ifdef COPY_STACKS
ub = (uintptr_t)ptls2->stackbase;
lb = ub - ta->ssize;
offset = (uintptr_t)ta->stkbuf - lb;
if (ta->copy_stack) {
ub = (uintptr_t)ptls2->stackbase;
lb = ub - ta->copy_stack;
offset = (uintptr_t)stkbuf - lb;
}
#endif
}
if (s) {
Expand Down Expand Up @@ -2278,10 +2275,6 @@ static void mark_roots(jl_gc_mark_cache_t *gc_cache, gc_mark_sp_t *sp)
if (jl_all_methods != NULL)
gc_mark_queue_obj(gc_cache, sp, jl_all_methods);

#ifndef COPY_STACKS
gc_mark_queue_obj(gc_cache, sp, jl_unprotect_stack_func);
#endif

// constants
gc_mark_queue_obj(gc_cache, sp, jl_typetype_type);
gc_mark_queue_obj(gc_cache, sp, jl_emptytuple_type);
Expand Down Expand Up @@ -2564,6 +2557,7 @@ static int _jl_gc_collect(jl_ptls_t ptls, int full)
scanned_bytes = 0;
// 5. start sweeping
sweep_weak_refs();
sweep_stack_pools();
gc_sweep_other(ptls, sweep_full);
gc_scrub();
gc_verify_tags();
Expand Down Expand Up @@ -2687,6 +2681,7 @@ void jl_init_thread_heap(jl_ptls_t ptls)
p[i].newpages = NULL;
}
arraylist_new(&heap->weak_refs, 0);
arraylist_new(&heap->live_tasks, 0);
heap->mallocarrays = NULL;
heap->mafreelist = NULL;
heap->big_objects = NULL;
Expand Down
1 change: 1 addition & 0 deletions src/gc.h
Original file line number Diff line number Diff line change
Expand Up @@ -495,6 +495,7 @@ void gc_mark_queue_all_roots(jl_ptls_t ptls, gc_mark_sp_t *sp);
void gc_mark_queue_finlist(jl_gc_mark_cache_t *gc_cache, gc_mark_sp_t *sp,
arraylist_t *list, size_t start);
void gc_mark_loop(jl_ptls_t ptls, gc_mark_sp_t sp);
void sweep_stack_pools(void);
void gc_debug_init(void);

extern void *gc_mark_label_addrs[_GC_MARK_L_MAX];
Expand Down
Loading