Skip to content

Binary search for pkgimage gc #48940

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Mar 29, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
126 changes: 125 additions & 1 deletion src/gc.c
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,13 @@ pagetable_t memory_map;
// List of marked big objects. Not per-thread. Accessed only by master thread.
bigval_t *big_objects_marked = NULL;

// Eytzinger tree of images. Used for very fast jl_object_in_image queries during gc
// See https://algorithmica.org/en/eytzinger
static arraylist_t eytzinger_image_tree;
static arraylist_t eytzinger_idxs;
static uintptr_t gc_img_min;
static uintptr_t gc_img_max;

// -- Finalization --
// `ptls->finalizers` and `finalizer_list_marked` might have tagged pointers.
// If an object pointer has the lowest bit set, the next pointer is an unboxed c function pointer.
Expand All @@ -183,6 +190,118 @@ arraylist_t finalizer_list_marked;
arraylist_t to_finalize;
JL_DLLEXPORT _Atomic(int) jl_gc_have_pending_finalizers = 0;

static int ptr_cmp(const void *l, const void *r)
{
uintptr_t left = *(const uintptr_t*)l;
uintptr_t right = *(const uintptr_t*)r;
// jl_safe_printf("cmp %p %p\n", (void*)left, (void*)right);
return (left > right) - (left < right);
}

// Build an eytzinger tree from a sorted array
static int eytzinger(uintptr_t *src, uintptr_t *dest, size_t i, size_t k, size_t n)
{
if (k <= n) {
i = eytzinger(src, dest, i, 2 * k, n);
dest[k-1] = src[i];
i++;
i = eytzinger(src, dest, i, 2 * k + 1, n);
}
return i;
}

static size_t eyt_obj_idx(jl_value_t *obj) JL_NOTSAFEPOINT
{
size_t n = eytzinger_image_tree.len - 1;
if (n == 0)
return n;
assert(n % 2 == 0 && "Eytzinger tree not even length!");
uintptr_t cmp = (uintptr_t) obj;
if (cmp <= gc_img_min || cmp > gc_img_max)
return n;
uintptr_t *tree = (uintptr_t*)eytzinger_image_tree.items;
size_t k = 1;
// note that k preserves the history of how we got to the current node
while (k <= n) {
int greater = (cmp > tree[k - 1]);
k <<= 1;
k |= greater;
}
// Free to assume k is nonzero, since we start with k = 1
// and cmp > gc_img_min
// This shift does a fast revert of the path until we get
// to a node that evaluated less than cmp.
k >>= (__builtin_ctzll(k) + 1);
assert(k != 0);
assert(k <= n && "Eytzinger tree index out of bounds!");
assert(tree[k - 1] < cmp && "Failed to find lower bound for object!");
return k - 1;
}

//used in staticdata.c after we add an image
void rebuild_image_blob_tree(void)
{
size_t inc = 1 + jl_linkage_blobs.len - eytzinger_image_tree.len;
assert(eytzinger_idxs.len == eytzinger_image_tree.len);
assert(eytzinger_idxs.max == eytzinger_image_tree.max);
arraylist_grow(&eytzinger_idxs, inc);
arraylist_grow(&eytzinger_image_tree, inc);
eytzinger_idxs.items[eytzinger_idxs.len - 1] = (void*)jl_linkage_blobs.len;
eytzinger_image_tree.items[eytzinger_image_tree.len - 1] = (void*)1; // outside image
for (size_t i = 0; i < jl_linkage_blobs.len; i++) {
assert((uintptr_t) jl_linkage_blobs.items[i] % 4 == 0 && "Linkage blob not 4-byte aligned!");
// We abuse the pointer here a little so that a couple of properties are true:
// 1. a start and an end are never the same value. This simplifies the binary search.
// 2. ends are always after starts. This also simplifies the binary search.
// We assume that there exist no 0-size blobs, but that's a safe assumption
// since it means nothing could be there anyways
uintptr_t val = (uintptr_t) jl_linkage_blobs.items[i];
eytzinger_idxs.items[i] = (void*)(val + (i & 1));
}
qsort(eytzinger_idxs.items, eytzinger_idxs.len - 1, sizeof(void*), ptr_cmp);
gc_img_min = (uintptr_t) eytzinger_idxs.items[0];
gc_img_max = (uintptr_t) eytzinger_idxs.items[eytzinger_idxs.len - 2] + 1;
eytzinger((uintptr_t*)eytzinger_idxs.items, (uintptr_t*)eytzinger_image_tree.items, 0, 1, eytzinger_idxs.len - 1);
// Reuse the scratch memory to store the indices
// Still O(nlogn) because binary search
for (size_t i = 0; i < jl_linkage_blobs.len; i ++) {
uintptr_t val = (uintptr_t) jl_linkage_blobs.items[i];
// This is the same computation as in the prior for loop
uintptr_t eyt_val = val + (i & 1);
size_t eyt_idx = eyt_obj_idx((jl_value_t*)(eyt_val + 1)); assert(eyt_idx < eytzinger_idxs.len - 1);
assert(eytzinger_image_tree.items[eyt_idx] == (void*)eyt_val && "Eytzinger tree failed to find object!");
if (i & 1)
eytzinger_idxs.items[eyt_idx] = (void*)n_linkage_blobs();
else
eytzinger_idxs.items[eyt_idx] = (void*)(i / 2);
}
}

static int eyt_obj_in_img(jl_value_t *obj) JL_NOTSAFEPOINT
{
assert((uintptr_t) obj % 4 == 0 && "Object not 4-byte aligned!");
int idx = eyt_obj_idx(obj);
// Now we use a tiny trick: tree[idx] & 1 is whether or not tree[idx] is a
// start (0) or an end (1) of a blob. If it's a start, then the object is
// in the image, otherwise it is not.
int in_image = ((uintptr_t)eytzinger_image_tree.items[idx] & 1) == 0;
return in_image;
}

size_t external_blob_index(jl_value_t *v) JL_NOTSAFEPOINT
{
assert((uintptr_t) v % 4 == 0 && "Object not 4-byte aligned!");
int eyt_idx = eyt_obj_idx(v);
// We fill the invalid slots with the length, so we can just return that
size_t idx = (size_t) eytzinger_idxs.items[eyt_idx];
return idx;
}

uint8_t jl_object_in_image(jl_value_t *obj) JL_NOTSAFEPOINT
{
return eyt_obj_in_img(obj);
}

NOINLINE uintptr_t gc_get_stack_ptr(void)
{
return (uintptr_t)jl_get_frame_addr();
Expand Down Expand Up @@ -2270,7 +2389,8 @@ FORCE_INLINE void gc_mark_outrefs(jl_ptls_t ptls, jl_gc_markqueue_t *mq, void *_
uint8_t bits = (gc_old(o->header) && !mark_reset_age) ? GC_OLD_MARKED : GC_MARKED;
int update_meta = __likely(!meta_updated && !gc_verifying);
int foreign_alloc = 0;
if (update_meta && jl_object_in_image(new_obj)) {
// directly point at eyt_obj_in_img to encourage inlining
if (update_meta && eyt_obj_in_img(new_obj)) {
foreign_alloc = 1;
update_meta = 0;
}
Expand Down Expand Up @@ -3245,6 +3365,10 @@ void jl_gc_init(void)

arraylist_new(&finalizer_list_marked, 0);
arraylist_new(&to_finalize, 0);
arraylist_new(&eytzinger_image_tree, 0);
arraylist_new(&eytzinger_idxs, 0);
arraylist_push(&eytzinger_idxs, (void*)0);
arraylist_push(&eytzinger_image_tree, (void*)1); // outside image

gc_num.interval = default_collect_interval;
last_long_collect_interval = default_collect_interval;
Expand Down
23 changes: 2 additions & 21 deletions src/julia_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -954,28 +954,9 @@ STATIC_INLINE size_t n_linkage_blobs(void) JL_NOTSAFEPOINT
return jl_image_relocs.len;
}

// TODO: Makes this a binary search
STATIC_INLINE size_t external_blob_index(jl_value_t *v) JL_NOTSAFEPOINT {
size_t i, nblobs = n_linkage_blobs();
assert(jl_linkage_blobs.len == 2*nblobs);
for (i = 0; i < nblobs; i++) {
uintptr_t left = (uintptr_t)jl_linkage_blobs.items[2*i];
uintptr_t right = (uintptr_t)jl_linkage_blobs.items[2*i + 1];
if (left < (uintptr_t)v && (uintptr_t)v <= right) {
// the last object may be a singleton (v is shifted by a type tag, so we use exclusive bounds here)
break;
}
}
return i;
}
size_t external_blob_index(jl_value_t *v) JL_NOTSAFEPOINT;

STATIC_INLINE uint8_t jl_object_in_image(jl_value_t* v) JL_NOTSAFEPOINT {
size_t blob = external_blob_index(v);
if (blob == n_linkage_blobs()) {
return 0;
}
return 1;
}
uint8_t jl_object_in_image(jl_value_t* v) JL_NOTSAFEPOINT;

typedef struct {
LLVMOrcThreadSafeModuleRef TSM;
Expand Down
7 changes: 7 additions & 0 deletions src/staticdata.c
Original file line number Diff line number Diff line change
Expand Up @@ -2366,6 +2366,10 @@ static void jl_save_system_image_to_stream(ios_t *f, jl_array_t *mod_array,
jl_write_relocations(&s);
}

// This ensures that we can use the low bit of addresses for
// identifying end pointers in gc's eytzinger search.
write_padding(&sysimg, 4 - (sysimg.size % 4));

if (sysimg.size > ((uintptr_t)1 << RELOC_TAG_OFFSET)) {
jl_printf(
JL_STDERR,
Expand Down Expand Up @@ -2658,6 +2662,8 @@ JL_DLLEXPORT void jl_set_sysimg_so(void *handle)
// }
#endif

extern void rebuild_image_blob_tree(void);

static void jl_restore_system_image_from_stream_(ios_t *f, jl_image_t *image, jl_array_t *depmods, uint64_t checksum,
/* outputs */ jl_array_t **restored, jl_array_t **init_order,
jl_array_t **extext_methods,
Expand Down Expand Up @@ -3151,6 +3157,7 @@ static void jl_restore_system_image_from_stream_(ios_t *f, jl_image_t *image, jl
arraylist_push(&jl_linkage_blobs, (void*)image_base);
arraylist_push(&jl_linkage_blobs, (void*)(image_base + sizeof_sysimg + sizeof(uintptr_t)));
arraylist_push(&jl_image_relocs, (void*)relocs_base);
rebuild_image_blob_tree();

// jl_printf(JL_STDOUT, "%ld blobs to link against\n", jl_linkage_blobs.len >> 1);
jl_gc_enable(en);
Expand Down