Skip to content

Commit

Permalink
GC support for local allocations (ocaml-flambda#29)
Browse files Browse the repository at this point in the history
GC support for local allocations

  * Local arena is reallocated as needed, growing 4x each time
    (Old arenas currently not freed, but 4x growth means they
     amount to only 1/3 of the space used by the stack)

  * Separate arenas per systhread, swapped on context switch

  * Root scanning distinguishes local and heap roots directly
    (rather that relying on the page table check)

  * Simple unidirectional marker for GC, so that dead local allocs
    do not extend the lifetimes of heap blocks they point to

  * Debug mode minor heap check: verifies that the minor heap is
    well-formed and does not contain any pointers to the local
    arena (even from dead blocks)

  * Bugfix for caml_alloc_local
  • Loading branch information
stedolan committed Nov 11, 2021
1 parent 8dd7270 commit c7a193a
Show file tree
Hide file tree
Showing 18 changed files with 408 additions and 152 deletions.
10 changes: 8 additions & 2 deletions asmcomp/cmm_helpers.ml
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ let bind_nonvar name arg fn =
| _ -> let id = V.create_local name in Clet(VP.create id, arg, fn (Cvar id))

let caml_black = Nativeint.shift_left (Nativeint.of_int 3) 8
let caml_local = Nativeint.shift_left (Nativeint.of_int 2) 8
(* cf. runtime/caml/gc.h *)

(* Block headers. Meaning of the tag field: see stdlib/obj.ml *)
Expand All @@ -51,6 +52,7 @@ let block_header tag sz =
in no-naked-pointers mode. See [caml_darken] and the code below that emits
structured constants and static module definitions. *)
let black_block_header tag sz = Nativeint.logor (block_header tag sz) caml_black
let local_block_header tag sz = Nativeint.logor (block_header tag sz) caml_local
let white_closure_header sz = block_header Obj.closure_tag sz
let black_closure_header sz = black_block_header Obj.closure_tag sz
let infix_header ofs = block_header Obj.infix_tag ofs
Expand Down Expand Up @@ -795,8 +797,12 @@ let call_cached_method obj tag cache pos args dbg =

let make_alloc_generic ~mode set_fn dbg tag wordsize args =
if mode = Lambda.Alloc_local || wordsize <= Config.max_young_wosize then
Cop(Calloc mode,
Cconst_natint(block_header tag wordsize, dbg) :: args, dbg)
let hdr =
match mode with
| Lambda.Alloc_local -> local_block_header tag wordsize
| Lambda.Alloc_heap -> block_header tag wordsize
in
Cop(Calloc mode, Cconst_natint(hdr, dbg) :: args, dbg)
else begin
let id = V.create_local "*alloc*" in
let rec fill_fields idx = function
Expand Down
8 changes: 6 additions & 2 deletions otherlibs/systhreads/st_stubs.c
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ struct caml_thread_struct {
value * gc_regs; /* Saved value of Caml_state->gc_regs */
char * exception_pointer; /* Saved value of Caml_state->exception_pointer */
struct caml__roots_block * local_roots; /* Saved value of local_roots */
struct caml_local_arenas * local_arenas;
struct longjmp_buffer * exit_buf; /* For thread exit */
#else
value * stack_low; /* The execution stack for this thread */
Expand Down Expand Up @@ -148,8 +149,8 @@ static void caml_thread_scan_roots(scanning_action action)
if (th != curr_thread) {
#ifdef NATIVE_CODE
if (th->bottom_of_stack != NULL)
caml_do_local_roots(action, th->bottom_of_stack, th->last_retaddr,
th->gc_regs, th->local_roots);
caml_do_local_roots(action, action, th->bottom_of_stack, th->last_retaddr,
th->gc_regs, th->local_roots, th->local_arenas);
#else
caml_do_local_roots(action, th->sp, th->stack_high, th->local_roots);
#endif
Expand Down Expand Up @@ -181,6 +182,7 @@ Caml_inline void caml_thread_save_runtime_state(void)
curr_thread->last_retaddr = Caml_state->last_return_address;
curr_thread->gc_regs = Caml_state->gc_regs;
curr_thread->exception_pointer = Caml_state->exception_pointer;
curr_thread->local_arenas = caml_get_local_arenas();
#else
curr_thread->stack_low = Caml_state->stack_low;
curr_thread->stack_high = Caml_state->stack_high;
Expand All @@ -204,6 +206,7 @@ Caml_inline void caml_thread_restore_runtime_state(void)
Caml_state->last_return_address = curr_thread->last_retaddr;
Caml_state->gc_regs = curr_thread->gc_regs;
Caml_state->exception_pointer = curr_thread->exception_pointer;
caml_set_local_arenas(curr_thread->local_arenas);
#else
Caml_state->stack_low = curr_thread->stack_low;
Caml_state->stack_high = curr_thread->stack_high;
Expand Down Expand Up @@ -332,6 +335,7 @@ static caml_thread_t caml_thread_new_info(void)
th->last_retaddr = 1;
th->exception_pointer = NULL;
th->local_roots = NULL;
th->local_arenas = NULL;
th->exit_buf = NULL;
#else
/* Allocate the stacks */
Expand Down
4 changes: 3 additions & 1 deletion runtime/caml/domain_state.tbl
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,11 @@ DOMAIN_STATE(struct caml_ephe_ref_table*, ephe_ref_table)
DOMAIN_STATE(struct caml_custom_table*, custom_table)
/* See minor_gc.c */

DOMAIN_STATE(struct caml_local_arenas*, local_arenas)
DOMAIN_STATE(intnat, local_sp)
DOMAIN_STATE(struct region_stack*, local_top)
DOMAIN_STATE(void*, local_top)
DOMAIN_STATE(intnat, local_limit)

DOMAIN_STATE(intnat, local_total)

DOMAIN_STATE(struct mark_stack*, mark_stack)
Expand Down
35 changes: 35 additions & 0 deletions runtime/caml/gc.h
Original file line number Diff line number Diff line change
Expand Up @@ -64,4 +64,39 @@
#define Colornum_hd(hd) ((color_t) (((hd) >> 8) & 3))
#define Coloredhd_hd(hd,colnum) (((hd) & ~Caml_black) | ((colnum) << 8))

#ifdef CAML_INTERNALS


#define Init_local_arena_bsize 4096
#ifdef ARCH_SIXTYFOUR
#define Max_local_arenas 10 /* max 4G */
#else
#define Max_local_arenas 8 /* max 1G */
#endif

struct caml_local_arena {
char* base;
uintnat length;
};
typedef struct caml_local_arenas {
int count;
intnat saved_sp;
intnat next_length;
struct caml_local_arena arenas[Max_local_arenas];
} caml_local_arenas;

/* Colors for locally allocated values.
(Only used during root-scanning, never visible to the rest of the GC) */
#define Local_marked Caml_black
#define Local_unmarked Caml_blue /* allocation color of local objects */
#define Local_scanned Caml_gray

#define With_color_hd(hd, color) \
(((hd) & ~Caml_black) | color)

/* Neither a valid header nor value */
#define Local_uninit_hd Make_header(0, 0x42, Local_unmarked)

#endif /* CAML_INTERNALS */

#endif /* CAML_GC_H */
3 changes: 3 additions & 0 deletions runtime/caml/memory.h
Original file line number Diff line number Diff line change
Expand Up @@ -250,6 +250,9 @@ extern void caml_alloc_small_dispatch (intnat wosize, int flags,

#define Modify(fp,val) caml_modify((fp), (val))

struct caml_local_arenas* caml_get_local_arenas();
void caml_set_local_arenas(struct caml_local_arenas* s);

#endif /* CAML_INTERNALS */

struct caml__roots_block {
Expand Down
2 changes: 2 additions & 0 deletions runtime/caml/misc.h
Original file line number Diff line number Diff line change
Expand Up @@ -398,9 +398,11 @@ int caml_runtime_warnings_active(void);
#define Debug_free_shrink Debug_tag (0x03)
#define Debug_free_truncate Debug_tag (0x04)
#define Debug_free_unused Debug_tag (0x05)
#define Debug_free_local Debug_tag (0x06)
#define Debug_uninit_minor Debug_tag (0x10)
#define Debug_uninit_major Debug_tag (0x11)
#define Debug_uninit_align Debug_tag (0x15)
#define Debug_uninit_local Debug_tag (0x16)
#define Debug_filler_align Debug_tag (0x85)
#define Debug_pool_magic Debug_tag (0x99)

Expand Down
6 changes: 4 additions & 2 deletions runtime/caml/roots.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,11 @@ CAMLextern void caml_do_local_roots_byt (scanning_action, value *, value *,
#define caml_do_local_roots caml_do_local_roots_byt
#else
CAMLextern void caml_do_local_roots_nat (
scanning_action f, char * c_bottom_of_stack,
scanning_action maj, scanning_action min,
char * c_bottom_of_stack,
uintnat last_retaddr, value * v_gc_regs,
struct caml__roots_block * gc_local_roots);
struct caml__roots_block * gc_local_roots,
struct caml_local_arenas* local_arenas);
#define caml_do_local_roots caml_do_local_roots_nat
#endif

Expand Down
2 changes: 2 additions & 0 deletions runtime/caml/stack.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,9 +44,11 @@
#else
#error "TARGET_power: wrong MODEL"
#endif
/* FIXME: Already_scanned optimisation not supported on this branch
#define Already_scanned(sp, retaddr) ((retaddr) & 1)
#define Mask_already_scanned(retaddr) ((retaddr) & ~1)
#define Mark_scanned(sp, retaddr) Saved_return_address(sp) = (retaddr) | 1
*/
#endif

#ifdef TARGET_s390x
Expand Down
105 changes: 81 additions & 24 deletions runtime/memory.c
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
#include "caml/signals.h"
#include "caml/memprof.h"
#include "caml/eventlog.h"
#include "caml/alloc.h"

int caml_huge_fallback_count = 0;
/* Number of times that mmapping big pages fails and we fell back to small
Expand Down Expand Up @@ -676,11 +677,6 @@ CAMLexport CAMLweakdef void caml_modify (value *fp, value val)
}
}

struct region_stack {
char* base;
struct region_stack* next;
};

CAMLexport intnat caml_local_region_begin()
{
return Caml_state->local_sp;
Expand All @@ -691,37 +687,98 @@ CAMLexport void caml_local_region_end(intnat reg)
Caml_state->local_sp = reg;
}

//#define Local_init_wsz 64
#define Local_init_wsz (4096)
void caml_local_realloc()
CAMLexport caml_local_arenas* caml_get_local_arenas()
{
caml_local_arenas* s = Caml_state->local_arenas;
if (s != NULL)
s->saved_sp = Caml_state->local_sp;
return s;
}

CAMLexport void caml_set_local_arenas(caml_local_arenas* s)
{
intnat new_bsize;
struct region_stack* stk;
char* stkbase;
if (Caml_state->local_top == NULL) {
new_bsize = Bsize_wsize(Local_init_wsz);
Caml_state->local_arenas = s;
if (s != NULL) {
struct caml_local_arena a = s->arenas[s->count - 1];
Caml_state->local_sp = s->saved_sp;
Caml_state->local_top = (void*)(a.base + a.length);
Caml_state->local_limit = - a.length;
} else {
CAMLassert((char*)Caml_state->local_top + Caml_state->local_limit == Caml_state->local_top->base);
new_bsize = -Caml_state->local_limit;
Caml_state->local_sp = 0;
Caml_state->local_top = NULL;
Caml_state->local_limit = 0;
}
while (Caml_state->local_sp < -new_bsize) new_bsize *= 2;
stkbase = caml_stat_alloc(new_bsize + sizeof(struct region_stack));
stk = (struct region_stack*)(stkbase + new_bsize);
memset(stkbase, 0x42, new_bsize); /* FIXME debugging only */
stk->base = stkbase;
stk->next = Caml_state->local_top;
Caml_state->local_top = stk;
Caml_state->local_limit = -new_bsize;
}

void caml_local_realloc()
{
caml_local_arenas* s = caml_get_local_arenas();
intnat i;
char* arena;
if (s == NULL) {
s = caml_stat_alloc(sizeof(*s));
s->count = 0;
s->next_length = 0;
s->saved_sp = Caml_state->local_sp;
}
if (s->count == Max_local_arenas)
caml_fatal_error("Local allocation stack overflow - exceeded Max_local_arenas");

do {
if (s->next_length == 0) {
s->next_length = Init_local_arena_bsize;
} else {
/* overflow check */
CAML_STATIC_ASSERT(((intnat)Init_local_arena_bsize << (2*Max_local_arenas)) > 0);
s->next_length *= 4;
}
/* may need to loop, if a very large allocation was requested */
} while (s->saved_sp + s->next_length < 0);

arena = caml_stat_alloc_noexc(s->next_length);
if (arena == NULL)
caml_fatal_error("Local allocation stack overflow - out of memory");
#ifdef DEBUG
for (i = 0; i < s->next_length; i += sizeof(value)) {
*((header_t*)(arena + i)) = Debug_uninit_local;
}
#endif
for (i = s->saved_sp; i < 0; i += sizeof(value)) {
*((header_t*)(arena + s->next_length + i)) = Local_uninit_hd;
}
caml_gc_message(0x08,
"Growing local stack to %"ARCH_INTNAT_PRINTF_FORMAT"d kB\n",
s->next_length / 1024);
s->count++;
s->arenas[s->count-1].length = s->next_length;
s->arenas[s->count-1].base = arena;
caml_set_local_arenas(s);
CAMLassert(Caml_state->local_limit <= Caml_state->local_sp);
}

CAMLexport value caml_alloc_local(mlsize_t wosize, tag_t tag)
{
#ifdef NATIVE_CODE
intnat sp = Caml_state->local_sp;
header_t* hp;
sp -= Bhsize_wosize(wosize);
Caml_state->local_sp = sp;
if (sp < Caml_state->local_limit)
caml_local_realloc();
return Val_hp((char*)Caml_state->local_top + sp);
hp = (header_t*)((char*)Caml_state->local_top + sp);
*hp = Make_header(wosize, tag, Local_unmarked);
return Val_hp(hp);
#else
if (wosize <= Max_young_wosize) {
return caml_alloc_small(wosize, tag);
} else {
/* The return value is initialised directly using Field.
This is invalid if it may create major -> minor pointers.
So, perform a minor GC to prevent this. (See caml_make_vect) */
caml_minor_collection();
return caml_alloc_shr(wosize, tag);
}
#endif
}

/* Global memory pool.
Expand Down
33 changes: 33 additions & 0 deletions runtime/minor_gc.c
Original file line number Diff line number Diff line change
Expand Up @@ -348,6 +348,36 @@ void caml_oldify_mopup (void)
if (redo) goto again;
}

#ifdef DEBUG
static void verify_minor_heap()
{
header_t* p;
struct caml_local_arena* arena = Caml_state->local_arenas ?
&Caml_state->local_arenas->arenas[Caml_state->local_arenas->count-1] : NULL;
for (p = (header_t*)Caml_state->young_ptr;
p < (header_t*)Caml_state->young_alloc_end;
p += Whsize_hp(p)) {
header_t hd = *p;
CAMLassert_young_header(hd);
if (Tag_hd(hd) < No_scan_tag) {
intnat i = 0;
if (Tag_hd(hd) == Closure_tag)
i = Start_env_closinfo(Closinfo_val(Val_hp(p)));
for (; i < Wosize_hd(hd); i++) {
value v = Field(Val_hp(p), i);
if (Is_block(v)) {
if (Is_young(v)) CAMLassert ((value)Caml_state->young_ptr < v);
if (arena) {
CAMLassert(!(arena->base <= (char*)v &&
(char*)v < arena->base + arena->length));
}
}
}
}
}
}
#endif

/* Make sure the minor heap is empty by performing a minor collection
if needed.
*/
Expand All @@ -360,6 +390,9 @@ void caml_empty_minor_heap (void)

if (Caml_state->young_ptr != Caml_state->young_alloc_end){
CAMLassert_young_header(*(header_t*)Caml_state->young_ptr);
#ifdef DEBUG
verify_minor_heap();
#endif
if (caml_minor_gc_begin_hook != NULL) (*caml_minor_gc_begin_hook) ();
prev_alloc_words = caml_allocated_words;
Caml_state->in_minor_collection = 1;
Expand Down
Loading

0 comments on commit c7a193a

Please sign in to comment.