1111extern "C" {
1212#endif
1313
14+ // Number of GC threads that may run parallel marking
15+ int jl_n_markthreads ;
16+ // Number of GC threads that may run concurrent sweeping (0 or 1)
17+ int jl_n_sweepthreads ;
1418// Number of threads currently running the GC mark-loop
1519_Atomic(int ) gc_n_threads_marking ;
1620// `tid` of mutator thread that triggered GC
1721_Atomic(int ) gc_master_tid ;
1822// `tid` of first GC thread
1923int gc_first_tid ;
24+ // To indicate whether concurrent sweeping should run
25+ uv_sem_t gc_sweep_assists_needed ;
2026
2127// Linked list of callback functions
2228
@@ -1356,7 +1362,7 @@ static jl_taggedvalue_t **gc_sweep_page(jl_gc_pool_t *p, jl_gc_pagemeta_t **allo
13561362 int pg_skpd = 1 ;
13571363 if (!pg -> has_marked ) {
13581364 reuse_page = 0 ;
1359- #ifdef _P64
1365+ #ifdef _P64 // TODO: re-enable on `_P32`?
13601366 // lazy version: (empty) if the whole page was already unused, free it (return it to the pool)
13611367 // eager version: (freedall) free page as soon as possible
13621368 // the eager one uses less memory.
@@ -1440,8 +1446,18 @@ static jl_taggedvalue_t **gc_sweep_page(jl_gc_pool_t *p, jl_gc_pagemeta_t **allo
14401446 push_page_metadata_back (lazily_freed , pg );
14411447 }
14421448 else {
1449+ #ifdef _P64 // only enable concurrent sweeping on 64bit
1450+ if (jl_n_sweepthreads == 0 ) {
1451+ jl_gc_free_page (pg );
1452+ push_lf_page_metadata_back (& global_page_pool_freed , pg );
1453+ }
1454+ else {
1455+ push_lf_page_metadata_back (& global_page_pool_lazily_freed , pg );
1456+ }
1457+ #else
14431458 jl_gc_free_page (pg );
14441459 push_lf_page_metadata_back (& global_page_pool_freed , pg );
1460+ #endif
14451461 }
14461462 gc_time_count_page (freedall , pg_skpd );
14471463 gc_num .freed += (nfree - old_nfree ) * osize ;
@@ -1561,6 +1577,13 @@ static void gc_sweep_pool(int sweep_full)
15611577 }
15621578 }
15631579
1580+ #ifdef _P64 // only enable concurrent sweeping on 64bit
1581+ // wake thread up to sweep concurrently
1582+ if (jl_n_sweepthreads > 0 ) {
1583+ uv_sem_post (& gc_sweep_assists_needed );
1584+ }
1585+ #endif
1586+
15641587 gc_time_pool_end (sweep_full );
15651588}
15661589
@@ -2691,8 +2714,8 @@ void gc_mark_and_steal(jl_ptls_t ptls)
26912714 // of work for the mark loop
26922715 steal : {
26932716 // Try to steal chunk from random GC thread
2694- for (int i = 0 ; i < 4 * jl_n_gcthreads ; i ++ ) {
2695- uint32_t v = gc_first_tid + cong (UINT64_MAX , UINT64_MAX , & ptls -> rngseed ) % jl_n_gcthreads ;
2717+ for (int i = 0 ; i < 4 * jl_n_markthreads ; i ++ ) {
2718+ uint32_t v = gc_first_tid + cong (UINT64_MAX , UINT64_MAX , & ptls -> rngseed ) % jl_n_markthreads ;
26962719 jl_gc_markqueue_t * mq2 = & gc_all_tls_states [v ]-> mark_queue ;
26972720 c = gc_chunkqueue_steal_from (mq2 );
26982721 if (c .cid != GC_empty_chunk ) {
@@ -2701,7 +2724,7 @@ void gc_mark_and_steal(jl_ptls_t ptls)
27012724 }
27022725 }
27032726 // Sequentially walk GC threads to try to steal chunk
2704- for (int i = gc_first_tid ; i < gc_first_tid + jl_n_gcthreads ; i ++ ) {
2727+ for (int i = gc_first_tid ; i < gc_first_tid + jl_n_markthreads ; i ++ ) {
27052728 jl_gc_markqueue_t * mq2 = & gc_all_tls_states [i ]-> mark_queue ;
27062729 c = gc_chunkqueue_steal_from (mq2 );
27072730 if (c .cid != GC_empty_chunk ) {
@@ -2718,15 +2741,15 @@ void gc_mark_and_steal(jl_ptls_t ptls)
27182741 }
27192742 }
27202743 // Try to steal pointer from random GC thread
2721- for (int i = 0 ; i < 4 * jl_n_gcthreads ; i ++ ) {
2722- uint32_t v = gc_first_tid + cong (UINT64_MAX , UINT64_MAX , & ptls -> rngseed ) % jl_n_gcthreads ;
2744+ for (int i = 0 ; i < 4 * jl_n_markthreads ; i ++ ) {
2745+ uint32_t v = gc_first_tid + cong (UINT64_MAX , UINT64_MAX , & ptls -> rngseed ) % jl_n_markthreads ;
27232746 jl_gc_markqueue_t * mq2 = & gc_all_tls_states [v ]-> mark_queue ;
27242747 new_obj = gc_ptr_queue_steal_from (mq2 );
27252748 if (new_obj != NULL )
27262749 goto mark ;
27272750 }
27282751 // Sequentially walk GC threads to try to steal pointer
2729- for (int i = gc_first_tid ; i < gc_first_tid + jl_n_gcthreads ; i ++ ) {
2752+ for (int i = gc_first_tid ; i < gc_first_tid + jl_n_markthreads ; i ++ ) {
27302753 jl_gc_markqueue_t * mq2 = & gc_all_tls_states [i ]-> mark_queue ;
27312754 new_obj = gc_ptr_queue_steal_from (mq2 );
27322755 if (new_obj != NULL )
@@ -2748,7 +2771,7 @@ void gc_mark_loop_parallel(jl_ptls_t ptls, int master)
27482771 jl_atomic_store (& gc_master_tid , ptls -> tid );
27492772 // Wake threads up and try to do some work
27502773 jl_atomic_fetch_add (& gc_n_threads_marking , 1 );
2751- for (int i = gc_first_tid ; i < gc_first_tid + jl_n_gcthreads ; i ++ ) {
2774+ for (int i = gc_first_tid ; i < gc_first_tid + jl_n_markthreads ; i ++ ) {
27522775 jl_ptls_t ptls2 = gc_all_tls_states [i ];
27532776 uv_mutex_lock (& ptls2 -> sleep_lock );
27542777 uv_cond_signal (& ptls2 -> wake_signal );
@@ -2771,7 +2794,7 @@ void gc_mark_loop_parallel(jl_ptls_t ptls, int master)
27712794
27722795void gc_mark_loop (jl_ptls_t ptls )
27732796{
2774- if (jl_n_gcthreads == 0 || gc_heap_snapshot_enabled ) {
2797+ if (jl_n_markthreads == 0 || gc_heap_snapshot_enabled ) {
27752798 gc_mark_loop_serial (ptls );
27762799 }
27772800 else {
@@ -3065,13 +3088,13 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
30653088 }
30663089
30673090 assert (gc_n_threads );
3068- int single_threaded = (jl_n_gcthreads == 0 || gc_heap_snapshot_enabled );
3091+ int single_threaded_mark = (jl_n_markthreads == 0 || gc_heap_snapshot_enabled );
30693092 for (int t_i = 0 ; t_i < gc_n_threads ; t_i ++ ) {
30703093 jl_ptls_t ptls2 = gc_all_tls_states [t_i ];
30713094 jl_ptls_t ptls_dest = ptls ;
30723095 jl_gc_markqueue_t * mq_dest = mq ;
3073- if (!single_threaded ) {
3074- ptls_dest = gc_all_tls_states [gc_first_tid + t_i % jl_n_gcthreads ];
3096+ if (!single_threaded_mark ) {
3097+ ptls_dest = gc_all_tls_states [gc_first_tid + t_i % jl_n_markthreads ];
30753098 mq_dest = & ptls_dest -> mark_queue ;
30763099 }
30773100 if (ptls2 != NULL ) {
@@ -3513,6 +3536,7 @@ void jl_gc_init(void)
35133536 JL_MUTEX_INIT (& finalizers_lock , "finalizers_lock" );
35143537 uv_mutex_init (& gc_cache_lock );
35153538 uv_mutex_init (& gc_perm_lock );
3539+ uv_sem_init (& gc_sweep_assists_needed , 0 );
35163540
35173541 jl_gc_init_page ();
35183542 jl_gc_debug_init ();
0 commit comments