Skip to content

Commit 1a973c7

Browse files
authored
NFC: some cleanup in gc.c (#49577)
1 parent 70ebadb commit 1a973c7

File tree

6 files changed

+140
-145
lines changed

6 files changed

+140
-145
lines changed

src/gc.c

Lines changed: 12 additions & 139 deletions
Original file line numberDiff line numberDiff line change
@@ -11,18 +11,16 @@
1111
extern "C" {
1212
#endif
1313

14+
// Number of threads currently running the GC mark-loop
15+
_Atomic(int) gc_n_threads_marking;
1416
// `tid` of mutator thread that triggered GC
1517
_Atomic(int) gc_master_tid;
1618
// `tid` of first GC thread
1719
int gc_first_tid;
18-
1920
// Mutex/cond used to synchronize sleep/wakeup of GC threads
2021
uv_mutex_t gc_threads_lock;
2122
uv_cond_t gc_threads_cond;
2223

23-
// Number of threads currently running the GC mark-loop
24-
_Atomic(int) gc_n_threads_marking;
25-
2624
// Linked list of callback functions
2725

2826
typedef void (*jl_gc_cb_func_t)(void);
@@ -142,8 +140,8 @@ static _Atomic(int) support_conservative_marking = 0;
142140
* Note about GC synchronization:
143141
*
144142
* When entering `jl_gc_collect()`, `jl_gc_running` is atomically changed from
145-
* `0` to `1` to make sure that only one thread can be running the GC. Other
146-
* threads that enters `jl_gc_collect()` at the same time (or later calling
143+
* `0` to `1` to make sure that only one thread can be running `_jl_gc_collect`. Other
144+
* mutator threads that enters `jl_gc_collect()` at the same time (or later calling
147145
* from unmanaged code) will wait in `jl_gc_collect()` until the GC is finished.
148146
*
149147
* Before starting the mark phase the GC thread calls `jl_safepoint_start_gc()`
@@ -153,7 +151,7 @@ static _Atomic(int) support_conservative_marking = 0;
153151
* GC (`gc_state != 0`). It also acquires the `finalizers` lock so that no
154152
* other thread will access them when the GC is running.
155153
*
156-
* During the mark and sweep phase of the GC, the threads that are not running
154+
* During the mark and sweep phase of the GC, the mutator threads that are not running
157155
* the GC should either be running unmanaged code (or code section that does
158156
* not have a GC critical region mainly including storing to the stack or
159157
* another object) or paused at a safepoint and wait for the GC to finish.
@@ -185,13 +183,6 @@ pagetable_t memory_map;
185183
// List of marked big objects. Not per-thread. Accessed only by master thread.
186184
bigval_t *big_objects_marked = NULL;
187185

188-
// Eytzinger tree of images. Used for very fast jl_object_in_image queries during gc
189-
// See https://algorithmica.org/en/eytzinger
190-
static arraylist_t eytzinger_image_tree;
191-
static arraylist_t eytzinger_idxs;
192-
static uintptr_t gc_img_min;
193-
static uintptr_t gc_img_max;
194-
195186
// -- Finalization --
196187
// `ptls->finalizers` and `finalizer_list_marked` might have tagged pointers.
197188
// If an object pointer has the lowest bit set, the next pointer is an unboxed c function pointer.
@@ -202,117 +193,6 @@ arraylist_t finalizer_list_marked;
202193
arraylist_t to_finalize;
203194
JL_DLLEXPORT _Atomic(int) jl_gc_have_pending_finalizers = 0;
204195

205-
static int ptr_cmp(const void *l, const void *r)
206-
{
207-
uintptr_t left = *(const uintptr_t*)l;
208-
uintptr_t right = *(const uintptr_t*)r;
209-
// jl_safe_printf("cmp %p %p\n", (void*)left, (void*)right);
210-
return (left > right) - (left < right);
211-
}
212-
213-
// Build an eytzinger tree from a sorted array
214-
static int eytzinger(uintptr_t *src, uintptr_t *dest, size_t i, size_t k, size_t n)
215-
{
216-
if (k <= n) {
217-
i = eytzinger(src, dest, i, 2 * k, n);
218-
dest[k-1] = src[i];
219-
i++;
220-
i = eytzinger(src, dest, i, 2 * k + 1, n);
221-
}
222-
return i;
223-
}
224-
225-
static size_t eyt_obj_idx(jl_value_t *obj) JL_NOTSAFEPOINT
226-
{
227-
size_t n = eytzinger_image_tree.len - 1;
228-
if (n == 0)
229-
return n;
230-
assert(n % 2 == 0 && "Eytzinger tree not even length!");
231-
uintptr_t cmp = (uintptr_t) obj;
232-
if (cmp <= gc_img_min || cmp > gc_img_max)
233-
return n;
234-
uintptr_t *tree = (uintptr_t*)eytzinger_image_tree.items;
235-
size_t k = 1;
236-
// note that k preserves the history of how we got to the current node
237-
while (k <= n) {
238-
int greater = (cmp > tree[k - 1]);
239-
k <<= 1;
240-
k |= greater;
241-
}
242-
// Free to assume k is nonzero, since we start with k = 1
243-
// and cmp > gc_img_min
244-
// This shift does a fast revert of the path until we get
245-
// to a node that evaluated less than cmp.
246-
k >>= (__builtin_ctzll(k) + 1);
247-
assert(k != 0);
248-
assert(k <= n && "Eytzinger tree index out of bounds!");
249-
assert(tree[k - 1] < cmp && "Failed to find lower bound for object!");
250-
return k - 1;
251-
}
252-
253-
//used in staticdata.c after we add an image
254-
void rebuild_image_blob_tree(void)
255-
{
256-
size_t inc = 1 + jl_linkage_blobs.len - eytzinger_image_tree.len;
257-
assert(eytzinger_idxs.len == eytzinger_image_tree.len);
258-
assert(eytzinger_idxs.max == eytzinger_image_tree.max);
259-
arraylist_grow(&eytzinger_idxs, inc);
260-
arraylist_grow(&eytzinger_image_tree, inc);
261-
eytzinger_idxs.items[eytzinger_idxs.len - 1] = (void*)jl_linkage_blobs.len;
262-
eytzinger_image_tree.items[eytzinger_image_tree.len - 1] = (void*)1; // outside image
263-
for (size_t i = 0; i < jl_linkage_blobs.len; i++) {
264-
assert((uintptr_t) jl_linkage_blobs.items[i] % 4 == 0 && "Linkage blob not 4-byte aligned!");
265-
// We abuse the pointer here a little so that a couple of properties are true:
266-
// 1. a start and an end are never the same value. This simplifies the binary search.
267-
// 2. ends are always after starts. This also simplifies the binary search.
268-
// We assume that there exist no 0-size blobs, but that's a safe assumption
269-
// since it means nothing could be there anyways
270-
uintptr_t val = (uintptr_t) jl_linkage_blobs.items[i];
271-
eytzinger_idxs.items[i] = (void*)(val + (i & 1));
272-
}
273-
qsort(eytzinger_idxs.items, eytzinger_idxs.len - 1, sizeof(void*), ptr_cmp);
274-
gc_img_min = (uintptr_t) eytzinger_idxs.items[0];
275-
gc_img_max = (uintptr_t) eytzinger_idxs.items[eytzinger_idxs.len - 2] + 1;
276-
eytzinger((uintptr_t*)eytzinger_idxs.items, (uintptr_t*)eytzinger_image_tree.items, 0, 1, eytzinger_idxs.len - 1);
277-
// Reuse the scratch memory to store the indices
278-
// Still O(nlogn) because binary search
279-
for (size_t i = 0; i < jl_linkage_blobs.len; i ++) {
280-
uintptr_t val = (uintptr_t) jl_linkage_blobs.items[i];
281-
// This is the same computation as in the prior for loop
282-
uintptr_t eyt_val = val + (i & 1);
283-
size_t eyt_idx = eyt_obj_idx((jl_value_t*)(eyt_val + 1)); assert(eyt_idx < eytzinger_idxs.len - 1);
284-
assert(eytzinger_image_tree.items[eyt_idx] == (void*)eyt_val && "Eytzinger tree failed to find object!");
285-
if (i & 1)
286-
eytzinger_idxs.items[eyt_idx] = (void*)n_linkage_blobs();
287-
else
288-
eytzinger_idxs.items[eyt_idx] = (void*)(i / 2);
289-
}
290-
}
291-
292-
static int eyt_obj_in_img(jl_value_t *obj) JL_NOTSAFEPOINT
293-
{
294-
assert((uintptr_t) obj % 4 == 0 && "Object not 4-byte aligned!");
295-
int idx = eyt_obj_idx(obj);
296-
// Now we use a tiny trick: tree[idx] & 1 is whether or not tree[idx] is a
297-
// start (0) or an end (1) of a blob. If it's a start, then the object is
298-
// in the image, otherwise it is not.
299-
int in_image = ((uintptr_t)eytzinger_image_tree.items[idx] & 1) == 0;
300-
return in_image;
301-
}
302-
303-
size_t external_blob_index(jl_value_t *v) JL_NOTSAFEPOINT
304-
{
305-
assert((uintptr_t) v % 4 == 0 && "Object not 4-byte aligned!");
306-
int eyt_idx = eyt_obj_idx(v);
307-
// We fill the invalid slots with the length, so we can just return that
308-
size_t idx = (size_t) eytzinger_idxs.items[eyt_idx];
309-
return idx;
310-
}
311-
312-
uint8_t jl_object_in_image(jl_value_t *obj) JL_NOTSAFEPOINT
313-
{
314-
return eyt_obj_in_img(obj);
315-
}
316196

317197
NOINLINE uintptr_t gc_get_stack_ptr(void)
318198
{
@@ -346,9 +226,6 @@ void jl_gc_wait_for_the_world(jl_ptls_t* gc_all_tls_states, int gc_n_threads)
346226
}
347227
}
348228

349-
350-
void jl_gc_wait_for_the_world(jl_ptls_t* gc_all_tls_states, int gc_n_threads);
351-
352229
// malloc wrappers, aligned allocation
353230

354231
#if defined(_OS_WINDOWS_)
@@ -3242,20 +3119,20 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
32423119
int single_threaded = (jl_n_gcthreads == 0 || gc_heap_snapshot_enabled);
32433120
for (int t_i = 0; t_i < gc_n_threads; t_i++) {
32443121
jl_ptls_t ptls2 = gc_all_tls_states[t_i];
3245-
jl_gc_markqueue_t *mq2 = mq;
3246-
jl_ptls_t ptls_gc_thread = NULL;
3122+
jl_ptls_t ptls_dest = ptls;
3123+
jl_gc_markqueue_t *mq_dest = mq;
32473124
if (!single_threaded) {
3248-
ptls_gc_thread = gc_all_tls_states[gc_first_tid + t_i % jl_n_gcthreads];
3249-
mq2 = &ptls_gc_thread->mark_queue;
3125+
ptls_dest = gc_all_tls_states[gc_first_tid + t_i % jl_n_gcthreads];
3126+
mq_dest = &ptls_dest->mark_queue;
32503127
}
32513128
if (ptls2 != NULL) {
32523129
// 2.1. mark every thread local root
3253-
gc_queue_thread_local(mq2, ptls2);
3130+
gc_queue_thread_local(mq_dest, ptls2);
32543131
// 2.2. mark any managed objects in the backtrace buffer
32553132
// TODO: treat these as roots for gc_heap_snapshot_record
3256-
gc_queue_bt_buf(mq2, ptls2);
3133+
gc_queue_bt_buf(mq_dest, ptls2);
32573134
// 2.3. mark every object in the `last_remsets` and `rem_binding`
3258-
gc_queue_remset(single_threaded ? ptls : ptls_gc_thread, ptls2);
3135+
gc_queue_remset(ptls_dest, ptls2);
32593136
}
32603137
}
32613138

@@ -3696,10 +3573,6 @@ void jl_gc_init(void)
36963573

36973574
arraylist_new(&finalizer_list_marked, 0);
36983575
arraylist_new(&to_finalize, 0);
3699-
arraylist_new(&eytzinger_image_tree, 0);
3700-
arraylist_new(&eytzinger_idxs, 0);
3701-
arraylist_push(&eytzinger_idxs, (void*)0);
3702-
arraylist_push(&eytzinger_image_tree, (void*)1); // outside image
37033576

37043577
gc_num.interval = default_collect_interval;
37053578
last_long_collect_interval = default_collect_interval;

src/gc.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -379,13 +379,17 @@ STATIC_INLINE void gc_big_object_link(bigval_t *hdr, bigval_t **list) JL_NOTSAFE
379379
*list = hdr;
380380
}
381381

382+
extern uv_mutex_t gc_threads_lock;
383+
extern uv_cond_t gc_threads_cond;
384+
extern _Atomic(int) gc_n_threads_marking;
382385
void gc_mark_queue_all_roots(jl_ptls_t ptls, jl_gc_markqueue_t *mq);
383386
void gc_mark_finlist_(jl_gc_markqueue_t *mq, jl_value_t **fl_begin,
384387
jl_value_t **fl_end) JL_NOTSAFEPOINT;
385388
void gc_mark_finlist(jl_gc_markqueue_t *mq, arraylist_t *list,
386389
size_t start) JL_NOTSAFEPOINT;
387390
void gc_mark_loop_serial_(jl_ptls_t ptls, jl_gc_markqueue_t *mq);
388391
void gc_mark_loop_serial(jl_ptls_t ptls);
392+
void gc_mark_loop_parallel(jl_ptls_t ptls, int master);
389393
void sweep_stack_pools(void);
390394
void jl_gc_debug_init(void);
391395

src/init.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -818,6 +818,10 @@ JL_DLLEXPORT void julia_init(JL_IMAGE_SEARCH rel)
818818

819819
arraylist_new(&jl_linkage_blobs, 0);
820820
arraylist_new(&jl_image_relocs, 0);
821+
arraylist_new(&eytzinger_image_tree, 0);
822+
arraylist_new(&eytzinger_idxs, 0);
823+
arraylist_push(&eytzinger_idxs, (void*)0);
824+
arraylist_push(&eytzinger_image_tree, (void*)1); // outside image
821825

822826
jl_ptls_t ptls = jl_init_threadtls(0);
823827
#pragma GCC diagnostic push

src/julia_internal.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -317,6 +317,8 @@ void print_func_loc(JL_STREAM *s, jl_method_t *m);
317317
extern jl_array_t *_jl_debug_method_invalidation JL_GLOBALLY_ROOTED;
318318
JL_DLLEXPORT extern arraylist_t jl_linkage_blobs; // external linkage: sysimg/pkgimages
319319
JL_DLLEXPORT extern arraylist_t jl_image_relocs; // external linkage: sysimg/pkgimages
320+
extern arraylist_t eytzinger_image_tree;
321+
extern arraylist_t eytzinger_idxs;
320322

321323
extern JL_DLLEXPORT size_t jl_page_size;
322324
extern jl_function_t *jl_typeinf_func JL_GLOBALLY_ROOTED;

src/partr.c

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -108,11 +108,6 @@ void jl_init_threadinginfra(void)
108108

109109
void JL_NORETURN jl_finish_task(jl_task_t *t);
110110

111-
extern uv_mutex_t gc_threads_lock;
112-
extern uv_cond_t gc_threads_cond;
113-
extern _Atomic(int) gc_n_threads_marking;
114-
extern void gc_mark_loop_parallel(jl_ptls_t ptls, int master);
115-
116111
// gc thread function
117112
void jl_gc_threadfun(void *arg)
118113
{

0 commit comments

Comments
 (0)