1111extern "C" {
1212#endif
1313
14+ // Number of threads currently running the GC mark-loop
15+ _Atomic(int ) gc_n_threads_marking ;
1416// `tid` of mutator thread that triggered GC
1517_Atomic(int ) gc_master_tid ;
1618// `tid` of first GC thread
1719int gc_first_tid ;
18-
1920// Mutex/cond used to synchronize sleep/wakeup of GC threads
2021uv_mutex_t gc_threads_lock ;
2122uv_cond_t gc_threads_cond ;
2223
23- // Number of threads currently running the GC mark-loop
24- _Atomic(int ) gc_n_threads_marking ;
25-
2624// Linked list of callback functions
2725
2826typedef void (* jl_gc_cb_func_t )(void );
@@ -142,8 +140,8 @@ static _Atomic(int) support_conservative_marking = 0;
142140 * Note about GC synchronization:
143141 *
144142 * When entering `jl_gc_collect()`, `jl_gc_running` is atomically changed from
145- * `0` to `1` to make sure that only one thread can be running the GC . Other
146- * threads that enters `jl_gc_collect()` at the same time (or later calling
143+ * `0` to `1` to make sure that only one thread can be running `_jl_gc_collect` . Other
144+ * mutator threads that enters `jl_gc_collect()` at the same time (or later calling
147145 * from unmanaged code) will wait in `jl_gc_collect()` until the GC is finished.
148146 *
149147 * Before starting the mark phase the GC thread calls `jl_safepoint_start_gc()`
@@ -153,7 +151,7 @@ static _Atomic(int) support_conservative_marking = 0;
153151 * GC (`gc_state != 0`). It also acquires the `finalizers` lock so that no
154152 * other thread will access them when the GC is running.
155153 *
156- * During the mark and sweep phase of the GC, the threads that are not running
154+ * During the mark and sweep phase of the GC, the mutator threads that are not running
157155 * the GC should either be running unmanaged code (or code section that does
158156 * not have a GC critical region mainly including storing to the stack or
159157 * another object) or paused at a safepoint and wait for the GC to finish.
@@ -185,13 +183,6 @@ pagetable_t memory_map;
185183// List of marked big objects. Not per-thread. Accessed only by master thread.
186184bigval_t * big_objects_marked = NULL ;
187185
188- // Eytzinger tree of images. Used for very fast jl_object_in_image queries during gc
189- // See https://algorithmica.org/en/eytzinger
190- static arraylist_t eytzinger_image_tree ;
191- static arraylist_t eytzinger_idxs ;
192- static uintptr_t gc_img_min ;
193- static uintptr_t gc_img_max ;
194-
195186// -- Finalization --
196187// `ptls->finalizers` and `finalizer_list_marked` might have tagged pointers.
197188// If an object pointer has the lowest bit set, the next pointer is an unboxed c function pointer.
@@ -202,117 +193,6 @@ arraylist_t finalizer_list_marked;
202193arraylist_t to_finalize ;
203194JL_DLLEXPORT _Atomic(int ) jl_gc_have_pending_finalizers = 0 ;
204195
205- static int ptr_cmp (const void * l , const void * r )
206- {
207- uintptr_t left = * (const uintptr_t * )l ;
208- uintptr_t right = * (const uintptr_t * )r ;
209- // jl_safe_printf("cmp %p %p\n", (void*)left, (void*)right);
210- return (left > right ) - (left < right );
211- }
212-
213- // Build an eytzinger tree from a sorted array
214- static int eytzinger (uintptr_t * src , uintptr_t * dest , size_t i , size_t k , size_t n )
215- {
216- if (k <= n ) {
217- i = eytzinger (src , dest , i , 2 * k , n );
218- dest [k - 1 ] = src [i ];
219- i ++ ;
220- i = eytzinger (src , dest , i , 2 * k + 1 , n );
221- }
222- return i ;
223- }
224-
225- static size_t eyt_obj_idx (jl_value_t * obj ) JL_NOTSAFEPOINT
226- {
227- size_t n = eytzinger_image_tree .len - 1 ;
228- if (n == 0 )
229- return n ;
230- assert (n % 2 == 0 && "Eytzinger tree not even length!" );
231- uintptr_t cmp = (uintptr_t ) obj ;
232- if (cmp <= gc_img_min || cmp > gc_img_max )
233- return n ;
234- uintptr_t * tree = (uintptr_t * )eytzinger_image_tree .items ;
235- size_t k = 1 ;
236- // note that k preserves the history of how we got to the current node
237- while (k <= n ) {
238- int greater = (cmp > tree [k - 1 ]);
239- k <<= 1 ;
240- k |= greater ;
241- }
242- // Free to assume k is nonzero, since we start with k = 1
243- // and cmp > gc_img_min
244- // This shift does a fast revert of the path until we get
245- // to a node that evaluated less than cmp.
246- k >>= (__builtin_ctzll (k ) + 1 );
247- assert (k != 0 );
248- assert (k <= n && "Eytzinger tree index out of bounds!" );
249- assert (tree [k - 1 ] < cmp && "Failed to find lower bound for object!" );
250- return k - 1 ;
251- }
252-
253- //used in staticdata.c after we add an image
254- void rebuild_image_blob_tree (void )
255- {
256- size_t inc = 1 + jl_linkage_blobs .len - eytzinger_image_tree .len ;
257- assert (eytzinger_idxs .len == eytzinger_image_tree .len );
258- assert (eytzinger_idxs .max == eytzinger_image_tree .max );
259- arraylist_grow (& eytzinger_idxs , inc );
260- arraylist_grow (& eytzinger_image_tree , inc );
261- eytzinger_idxs .items [eytzinger_idxs .len - 1 ] = (void * )jl_linkage_blobs .len ;
262- eytzinger_image_tree .items [eytzinger_image_tree .len - 1 ] = (void * )1 ; // outside image
263- for (size_t i = 0 ; i < jl_linkage_blobs .len ; i ++ ) {
264- assert ((uintptr_t ) jl_linkage_blobs .items [i ] % 4 == 0 && "Linkage blob not 4-byte aligned!" );
265- // We abuse the pointer here a little so that a couple of properties are true:
266- // 1. a start and an end are never the same value. This simplifies the binary search.
267- // 2. ends are always after starts. This also simplifies the binary search.
268- // We assume that there exist no 0-size blobs, but that's a safe assumption
269- // since it means nothing could be there anyways
270- uintptr_t val = (uintptr_t ) jl_linkage_blobs .items [i ];
271- eytzinger_idxs .items [i ] = (void * )(val + (i & 1 ));
272- }
273- qsort (eytzinger_idxs .items , eytzinger_idxs .len - 1 , sizeof (void * ), ptr_cmp );
274- gc_img_min = (uintptr_t ) eytzinger_idxs .items [0 ];
275- gc_img_max = (uintptr_t ) eytzinger_idxs .items [eytzinger_idxs .len - 2 ] + 1 ;
276- eytzinger ((uintptr_t * )eytzinger_idxs .items , (uintptr_t * )eytzinger_image_tree .items , 0 , 1 , eytzinger_idxs .len - 1 );
277- // Reuse the scratch memory to store the indices
278- // Still O(nlogn) because binary search
279- for (size_t i = 0 ; i < jl_linkage_blobs .len ; i ++ ) {
280- uintptr_t val = (uintptr_t ) jl_linkage_blobs .items [i ];
281- // This is the same computation as in the prior for loop
282- uintptr_t eyt_val = val + (i & 1 );
283- size_t eyt_idx = eyt_obj_idx ((jl_value_t * )(eyt_val + 1 )); assert (eyt_idx < eytzinger_idxs .len - 1 );
284- assert (eytzinger_image_tree .items [eyt_idx ] == (void * )eyt_val && "Eytzinger tree failed to find object!" );
285- if (i & 1 )
286- eytzinger_idxs .items [eyt_idx ] = (void * )n_linkage_blobs ();
287- else
288- eytzinger_idxs .items [eyt_idx ] = (void * )(i / 2 );
289- }
290- }
291-
292- static int eyt_obj_in_img (jl_value_t * obj ) JL_NOTSAFEPOINT
293- {
294- assert ((uintptr_t ) obj % 4 == 0 && "Object not 4-byte aligned!" );
295- int idx = eyt_obj_idx (obj );
296- // Now we use a tiny trick: tree[idx] & 1 is whether or not tree[idx] is a
297- // start (0) or an end (1) of a blob. If it's a start, then the object is
298- // in the image, otherwise it is not.
299- int in_image = ((uintptr_t )eytzinger_image_tree .items [idx ] & 1 ) == 0 ;
300- return in_image ;
301- }
302-
303- size_t external_blob_index (jl_value_t * v ) JL_NOTSAFEPOINT
304- {
305- assert ((uintptr_t ) v % 4 == 0 && "Object not 4-byte aligned!" );
306- int eyt_idx = eyt_obj_idx (v );
307- // We fill the invalid slots with the length, so we can just return that
308- size_t idx = (size_t ) eytzinger_idxs .items [eyt_idx ];
309- return idx ;
310- }
311-
312- uint8_t jl_object_in_image (jl_value_t * obj ) JL_NOTSAFEPOINT
313- {
314- return eyt_obj_in_img (obj );
315- }
316196
317197NOINLINE uintptr_t gc_get_stack_ptr (void )
318198{
@@ -346,9 +226,6 @@ void jl_gc_wait_for_the_world(jl_ptls_t* gc_all_tls_states, int gc_n_threads)
346226 }
347227}
348228
349-
350- void jl_gc_wait_for_the_world (jl_ptls_t * gc_all_tls_states , int gc_n_threads );
351-
352229// malloc wrappers, aligned allocation
353230
354231#if defined(_OS_WINDOWS_ )
@@ -3242,20 +3119,20 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
32423119 int single_threaded = (jl_n_gcthreads == 0 || gc_heap_snapshot_enabled );
32433120 for (int t_i = 0 ; t_i < gc_n_threads ; t_i ++ ) {
32443121 jl_ptls_t ptls2 = gc_all_tls_states [t_i ];
3245- jl_gc_markqueue_t * mq2 = mq ;
3246- jl_ptls_t ptls_gc_thread = NULL ;
3122+ jl_ptls_t ptls_dest = ptls ;
3123+ jl_gc_markqueue_t * mq_dest = mq ;
32473124 if (!single_threaded ) {
3248- ptls_gc_thread = gc_all_tls_states [gc_first_tid + t_i % jl_n_gcthreads ];
3249- mq2 = & ptls_gc_thread -> mark_queue ;
3125+ ptls_dest = gc_all_tls_states [gc_first_tid + t_i % jl_n_gcthreads ];
3126+ mq_dest = & ptls_dest -> mark_queue ;
32503127 }
32513128 if (ptls2 != NULL ) {
32523129 // 2.1. mark every thread local root
3253- gc_queue_thread_local (mq2 , ptls2 );
3130+ gc_queue_thread_local (mq_dest , ptls2 );
32543131 // 2.2. mark any managed objects in the backtrace buffer
32553132 // TODO: treat these as roots for gc_heap_snapshot_record
3256- gc_queue_bt_buf (mq2 , ptls2 );
3133+ gc_queue_bt_buf (mq_dest , ptls2 );
32573134 // 2.3. mark every object in the `last_remsets` and `rem_binding`
3258- gc_queue_remset (single_threaded ? ptls : ptls_gc_thread , ptls2 );
3135+ gc_queue_remset (ptls_dest , ptls2 );
32593136 }
32603137 }
32613138
@@ -3696,10 +3573,6 @@ void jl_gc_init(void)
36963573
36973574 arraylist_new (& finalizer_list_marked , 0 );
36983575 arraylist_new (& to_finalize , 0 );
3699- arraylist_new (& eytzinger_image_tree , 0 );
3700- arraylist_new (& eytzinger_idxs , 0 );
3701- arraylist_push (& eytzinger_idxs , (void * )0 );
3702- arraylist_push (& eytzinger_image_tree , (void * )1 ); // outside image
37033576
37043577 gc_num .interval = default_collect_interval ;
37053578 last_long_collect_interval = default_collect_interval ;
0 commit comments