Skip to content

Commit 3c01f2a

Browse files
pchintalapudiKristofferC
authored and
KristofferC
committed
Binary search for pkgimage metadata (#48940)
Co-authored-by: Jameson Nash <vtjnash@gmail.com> (cherry picked from commit bc33c81) (cherry picked from commit 40692cca1619a06991bebabc162e531255d99ddd)
1 parent b9f0f7e commit 3c01f2a

File tree

3 files changed

+133
-22
lines changed

3 files changed

+133
-22
lines changed

src/gc.c

+124-1
Original file line numberDiff line numberDiff line change
@@ -184,6 +184,13 @@ pagetable_t memory_map;
184184
// List of marked big objects. Not per-thread. Accessed only by master thread.
185185
bigval_t *big_objects_marked = NULL;
186186

187+
// Eytzinger tree of images. Used for very fast jl_object_in_image queries during gc
188+
// See https://algorithmica.org/en/eytzinger
189+
static arraylist_t eytzinger_image_tree;
190+
static arraylist_t eytzinger_idxs;
191+
static uintptr_t gc_img_min;
192+
static uintptr_t gc_img_max;
193+
187194
// -- Finalization --
188195
// `ptls->finalizers` and `finalizer_list_marked` might have tagged pointers.
189196
// If an object pointer has the lowest bit set, the next pointer is an unboxed c function pointer.
@@ -194,6 +201,118 @@ arraylist_t finalizer_list_marked;
194201
arraylist_t to_finalize;
195202
JL_DLLEXPORT _Atomic(int) jl_gc_have_pending_finalizers = 0;
196203

204+
static int ptr_cmp(const void *l, const void *r)
205+
{
206+
uintptr_t left = *(const uintptr_t*)l;
207+
uintptr_t right = *(const uintptr_t*)r;
208+
// jl_safe_printf("cmp %p %p\n", (void*)left, (void*)right);
209+
return (left > right) - (left < right);
210+
}
211+
212+
// Build an eytzinger tree from a sorted array
213+
static int eytzinger(uintptr_t *src, uintptr_t *dest, size_t i, size_t k, size_t n)
214+
{
215+
if (k <= n) {
216+
i = eytzinger(src, dest, i, 2 * k, n);
217+
dest[k-1] = src[i];
218+
i++;
219+
i = eytzinger(src, dest, i, 2 * k + 1, n);
220+
}
221+
return i;
222+
}
223+
224+
static size_t eyt_obj_idx(jl_value_t *obj) JL_NOTSAFEPOINT
225+
{
226+
size_t n = eytzinger_image_tree.len - 1;
227+
if (n == 0)
228+
return n;
229+
assert(n % 2 == 0 && "Eytzinger tree not even length!");
230+
uintptr_t cmp = (uintptr_t) obj;
231+
if (cmp <= gc_img_min || cmp > gc_img_max)
232+
return n;
233+
uintptr_t *tree = (uintptr_t*)eytzinger_image_tree.items;
234+
size_t k = 1;
235+
// note that k preserves the history of how we got to the current node
236+
while (k <= n) {
237+
int greater = (cmp > tree[k - 1]);
238+
k <<= 1;
239+
k |= greater;
240+
}
241+
// Free to assume k is nonzero, since we start with k = 1
242+
// and cmp > gc_img_min
243+
// This shift does a fast revert of the path until we get
244+
// to a node that evaluated less than cmp.
245+
k >>= (__builtin_ctzll(k) + 1);
246+
assert(k != 0);
247+
assert(k <= n && "Eytzinger tree index out of bounds!");
248+
assert(tree[k - 1] < cmp && "Failed to find lower bound for object!");
249+
return k - 1;
250+
}
251+
252+
//used in staticdata.c after we add an image
253+
void rebuild_image_blob_tree(void)
254+
{
255+
size_t inc = 1 + jl_linkage_blobs.len - eytzinger_image_tree.len;
256+
assert(eytzinger_idxs.len == eytzinger_image_tree.len);
257+
assert(eytzinger_idxs.max == eytzinger_image_tree.max);
258+
arraylist_grow(&eytzinger_idxs, inc);
259+
arraylist_grow(&eytzinger_image_tree, inc);
260+
eytzinger_idxs.items[eytzinger_idxs.len - 1] = (void*)jl_linkage_blobs.len;
261+
eytzinger_image_tree.items[eytzinger_image_tree.len - 1] = (void*)1; // outside image
262+
for (size_t i = 0; i < jl_linkage_blobs.len; i++) {
263+
assert((uintptr_t) jl_linkage_blobs.items[i] % 4 == 0 && "Linkage blob not 4-byte aligned!");
264+
// We abuse the pointer here a little so that a couple of properties are true:
265+
// 1. a start and an end are never the same value. This simplifies the binary search.
266+
// 2. ends are always after starts. This also simplifies the binary search.
267+
// We assume that there exist no 0-size blobs, but that's a safe assumption
268+
// since it means nothing could be there anyways
269+
uintptr_t val = (uintptr_t) jl_linkage_blobs.items[i];
270+
eytzinger_idxs.items[i] = (void*)(val + (i & 1));
271+
}
272+
qsort(eytzinger_idxs.items, eytzinger_idxs.len - 1, sizeof(void*), ptr_cmp);
273+
gc_img_min = (uintptr_t) eytzinger_idxs.items[0];
274+
gc_img_max = (uintptr_t) eytzinger_idxs.items[eytzinger_idxs.len - 2] + 1;
275+
eytzinger((uintptr_t*)eytzinger_idxs.items, (uintptr_t*)eytzinger_image_tree.items, 0, 1, eytzinger_idxs.len - 1);
276+
// Reuse the scratch memory to store the indices
277+
// Still O(nlogn) because binary search
278+
for (size_t i = 0; i < jl_linkage_blobs.len; i ++) {
279+
uintptr_t val = (uintptr_t) jl_linkage_blobs.items[i];
280+
// This is the same computation as in the prior for loop
281+
uintptr_t eyt_val = val + (i & 1);
282+
size_t eyt_idx = eyt_obj_idx((jl_value_t*)(eyt_val + 1)); assert(eyt_idx < eytzinger_idxs.len - 1);
283+
assert(eytzinger_image_tree.items[eyt_idx] == (void*)eyt_val && "Eytzinger tree failed to find object!");
284+
if (i & 1)
285+
eytzinger_idxs.items[eyt_idx] = (void*)n_linkage_blobs();
286+
else
287+
eytzinger_idxs.items[eyt_idx] = (void*)(i / 2);
288+
}
289+
}
290+
291+
static int eyt_obj_in_img(jl_value_t *obj) JL_NOTSAFEPOINT
292+
{
293+
assert((uintptr_t) obj % 4 == 0 && "Object not 4-byte aligned!");
294+
int idx = eyt_obj_idx(obj);
295+
// Now we use a tiny trick: tree[idx] & 1 is whether or not tree[idx] is a
296+
// start (0) or an end (1) of a blob. If it's a start, then the object is
297+
// in the image, otherwise it is not.
298+
int in_image = ((uintptr_t)eytzinger_image_tree.items[idx] & 1) == 0;
299+
return in_image;
300+
}
301+
302+
size_t external_blob_index(jl_value_t *v) JL_NOTSAFEPOINT
303+
{
304+
assert((uintptr_t) v % 4 == 0 && "Object not 4-byte aligned!");
305+
int eyt_idx = eyt_obj_idx(v);
306+
// We fill the invalid slots with the length, so we can just return that
307+
size_t idx = (size_t) eytzinger_idxs.items[eyt_idx];
308+
return idx;
309+
}
310+
311+
uint8_t jl_object_in_image(jl_value_t *obj) JL_NOTSAFEPOINT
312+
{
313+
return eyt_obj_in_img(obj);
314+
}
315+
197316
NOINLINE uintptr_t gc_get_stack_ptr(void)
198317
{
199318
return (uintptr_t)jl_get_frame_addr();
@@ -2673,7 +2792,7 @@ mark: {
26732792
jl_datatype_t *vt = (jl_datatype_t*)tag;
26742793
int foreign_alloc = 0;
26752794
int update_meta = __likely(!meta_updated && !gc_verifying);
2676-
if (update_meta && jl_object_in_image(new_obj)) {
2795+
if (update_meta && eyt_obj_in_img(new_obj)) {
26772796
foreign_alloc = 1;
26782797
update_meta = 0;
26792798
}
@@ -3668,6 +3787,10 @@ void jl_gc_init(void)
36683787

36693788
arraylist_new(&finalizer_list_marked, 0);
36703789
arraylist_new(&to_finalize, 0);
3790+
arraylist_new(&eytzinger_image_tree, 0);
3791+
arraylist_new(&eytzinger_idxs, 0);
3792+
arraylist_push(&eytzinger_idxs, (void*)0);
3793+
arraylist_push(&eytzinger_image_tree, (void*)1); // outside image
36713794

36723795
gc_num.interval = default_collect_interval;
36733796
last_long_collect_interval = default_collect_interval;

src/julia_internal.h

+2-21
Original file line numberDiff line numberDiff line change
@@ -958,28 +958,9 @@ STATIC_INLINE size_t n_linkage_blobs(void) JL_NOTSAFEPOINT
958958
return jl_image_relocs.len;
959959
}
960960

961-
// TODO: Makes this a binary search
962-
STATIC_INLINE size_t external_blob_index(jl_value_t *v) JL_NOTSAFEPOINT {
963-
size_t i, nblobs = n_linkage_blobs();
964-
assert(jl_linkage_blobs.len == 2*nblobs);
965-
for (i = 0; i < nblobs; i++) {
966-
uintptr_t left = (uintptr_t)jl_linkage_blobs.items[2*i];
967-
uintptr_t right = (uintptr_t)jl_linkage_blobs.items[2*i + 1];
968-
if (left < (uintptr_t)v && (uintptr_t)v <= right) {
969-
// the last object may be a singleton (v is shifted by a type tag, so we use exclusive bounds here)
970-
break;
971-
}
972-
}
973-
return i;
974-
}
961+
size_t external_blob_index(jl_value_t *v) JL_NOTSAFEPOINT;
975962

976-
STATIC_INLINE uint8_t jl_object_in_image(jl_value_t* v) JL_NOTSAFEPOINT {
977-
size_t blob = external_blob_index(v);
978-
if (blob == n_linkage_blobs()) {
979-
return 0;
980-
}
981-
return 1;
982-
}
963+
uint8_t jl_object_in_image(jl_value_t* v) JL_NOTSAFEPOINT;
983964

984965
typedef struct {
985966
LLVMOrcThreadSafeModuleRef TSM;

src/staticdata.c

+7
Original file line numberDiff line numberDiff line change
@@ -2452,6 +2452,10 @@ static void jl_save_system_image_to_stream(ios_t *f, jl_array_t *mod_array,
24522452
jl_write_relocations(&s);
24532453
}
24542454

2455+
// This ensures that we can use the low bit of addresses for
2456+
// identifying end pointers in gc's eytzinger search.
2457+
write_padding(&sysimg, 4 - (sysimg.size % 4));
2458+
24552459
if (sysimg.size > ((uintptr_t)1 << RELOC_TAG_OFFSET)) {
24562460
jl_printf(
24572461
JL_STDERR,
@@ -2745,6 +2749,8 @@ JL_DLLEXPORT void jl_set_sysimg_so(void *handle)
27452749
// }
27462750
#endif
27472751

2752+
extern void rebuild_image_blob_tree(void);
2753+
27482754
static void jl_restore_system_image_from_stream_(ios_t *f, jl_image_t *image, jl_array_t *depmods, uint64_t checksum,
27492755
/* outputs */ jl_array_t **restored, jl_array_t **init_order,
27502756
jl_array_t **extext_methods,
@@ -3255,6 +3261,7 @@ static void jl_restore_system_image_from_stream_(ios_t *f, jl_image_t *image, jl
32553261
arraylist_push(&jl_linkage_blobs, (void*)image_base);
32563262
arraylist_push(&jl_linkage_blobs, (void*)(image_base + sizeof_sysimg + sizeof(uintptr_t)));
32573263
arraylist_push(&jl_image_relocs, (void*)relocs_base);
3264+
rebuild_image_blob_tree();
32583265

32593266
// jl_printf(JL_STDOUT, "%ld blobs to link against\n", jl_linkage_blobs.len >> 1);
32603267
jl_gc_enable(en);

0 commit comments

Comments
 (0)