@@ -1027,6 +1027,20 @@ void gc_sweep_wait_for_all_stacks(void)
10271027 }
10281028}
10291029
1030+ void sweep_stack_pools (jl_ptls_t ptls ) JL_NOTSAFEPOINT
1031+ {
1032+ // initialize ptls index for parallel sweeping of stack pools
1033+ int stack_free_idx = jl_atomic_load_relaxed (& gc_stack_free_idx );
1034+ if (stack_free_idx + 1 == gc_n_threads )
1035+ jl_atomic_store_relaxed (& gc_stack_free_idx , 0 );
1036+ else
1037+ jl_atomic_store_relaxed (& gc_stack_free_idx , stack_free_idx + 1 );
1038+ jl_atomic_store_release (& gc_ptls_sweep_idx , gc_n_threads - 1 ); // idx == gc_n_threads = release stacks to the OS so it's serial
1039+ gc_sweep_wake_all_stacks (ptls );
1040+ sweep_stack_pool_loop ();
1041+ gc_sweep_wait_for_all_stacks ();
1042+ }
1043+
10301044static void gc_pool_sync_nfree (jl_gc_pagemeta_t * pg , jl_taggedvalue_t * last ) JL_NOTSAFEPOINT
10311045{
10321046 assert (pg -> fl_begin_offset != UINT16_MAX );
@@ -3095,16 +3109,7 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
30953109#endif
30963110 current_sweep_full = sweep_full ;
30973111 sweep_weak_refs ();
3098- // initialize ptls index for parallel sweeping of stack pools
3099- int stack_free_idx = jl_atomic_load_relaxed (& gc_stack_free_idx );
3100- if (stack_free_idx + 1 == gc_n_threads )
3101- jl_atomic_store_relaxed (& gc_stack_free_idx , 0 );
3102- else
3103- jl_atomic_store_relaxed (& gc_stack_free_idx , stack_free_idx + 1 );
3104- jl_atomic_store_release (& gc_ptls_sweep_idx , gc_n_threads - 1 ); // idx == gc_n_threads = release stacks to the OS so it's serial
3105- gc_sweep_wake_all_stacks (ptls );
3106- sweep_stack_pools ();
3107- gc_sweep_wait_for_all_stacks ();
3112+ sweep_stack_pools (ptls );
31083113 gc_sweep_other (ptls , sweep_full );
31093114 gc_scrub ();
31103115 gc_verify_tags ();
@@ -3516,6 +3521,10 @@ STATIC_INLINE int may_sweep(jl_ptls_t ptls) JL_NOTSAFEPOINT
35163521 return (jl_atomic_load (& ptls -> gc_tls .gc_sweeps_requested ) > 0 );
35173522}
35183523
3524+ STATIC_INLINE int may_sweep_stack (jl_ptls_t ptls ) JL_NOTSAFEPOINT
3525+ {
3526+ return (jl_atomic_load (& ptls -> gc_tls .gc_stack_sweep_requested ) > 0 );
3527+ }
35193528// parallel gc thread function
35203529void jl_parallel_gc_threadfun (void * arg )
35213530{
@@ -3544,10 +3553,14 @@ void jl_parallel_gc_threadfun(void *arg)
35443553 uv_mutex_unlock (& gc_threads_lock );
35453554 assert (jl_atomic_load_relaxed (& ptls -> gc_state ) == JL_GC_PARALLEL_COLLECTOR_THREAD );
35463555 gc_mark_loop_parallel (ptls , 0 );
3556+ if (may_sweep_stack (ptls )) {
3557+ assert (jl_atomic_load_relaxed (& ptls -> gc_state ) == JL_GC_PARALLEL_COLLECTOR_THREAD );
3558+ sweep_stack_pool_loop ();
3559+ jl_atomic_fetch_add (& ptls -> gc_tls .gc_stack_sweep_requested , -1 );
3560+ }
35473561 if (may_sweep (ptls )) {
35483562 assert (jl_atomic_load_relaxed (& ptls -> gc_state ) == JL_GC_PARALLEL_COLLECTOR_THREAD );
35493563 gc_sweep_pool_parallel (ptls );
3550- sweep_stack_pools ();
35513564 jl_atomic_fetch_add (& ptls -> gc_tls .gc_sweeps_requested , -1 );
35523565 }
35533566 }
0 commit comments