@@ -2719,9 +2719,7 @@ void gc_mark_and_steal(jl_ptls_t ptls)
2719
2719
jl_gc_markqueue_t * mq = & ptls -> mark_queue ;
2720
2720
jl_gc_markqueue_t * mq_master = NULL ;
2721
2721
int master_tid = jl_atomic_load (& gc_master_tid );
2722
- if (master_tid == -1 ) {
2723
- return ;
2724
- }
2722
+ assert (master_tid != -1 );
2725
2723
mq_master = & gc_all_tls_states [master_tid ]-> mark_queue ;
2726
2724
void * new_obj ;
2727
2725
jl_gc_chunk_t c ;
@@ -2812,61 +2810,49 @@ size_t gc_count_work_in_queue(jl_ptls_t ptls) JL_NOTSAFEPOINT
2812
2810
* Correctness argument for the mark-loop termination protocol.
2813
2811
*
2814
2812
* Safety properties:
2815
- * - No work items shall be in any thread's queues when `gc_mark_loop_barrier ` observes
2813
+ * - No work items shall be in any thread's queues when `gc_should_mark ` observes
2816
2814
* that `gc_n_threads_marking` is zero.
2817
2815
*
2818
2816
* - No work item shall be stolen from the master thread (i.e. mutator thread which started
2819
2817
* GC and which helped the `jl_n_markthreads` - 1 threads to mark) after
2820
- * `gc_mark_loop_barrier ` observes that `gc_n_threads_marking` is zero. This property is
2818
+ * `gc_should_mark ` observes that `gc_n_threads_marking` is zero. This property is
2821
2819
* necessary because we call `gc_mark_loop_serial` after marking the finalizer list in
2822
2820
* `_jl_gc_collect`, and want to ensure that we have the serial mark-loop semantics there,
2823
2821
* and that no work is stolen from us at that point.
2824
2822
*
2825
2823
* Proof:
2826
- * - Suppose the master thread observes that `gc_n_threads_marking` is zero in
2827
- * `gc_mark_loop_barrier` and there is a work item left in one thread's queue at that point.
2828
- * Since threads try to steal from all threads' queues, this implies that all threads must
2829
- * have tried to steal from the queue which still has a work item left, but failed to do so,
2830
- * which violates the semantics of Chase-Lev's work-stealing queue.
2831
- *
2832
- * - Let E1 be the event "master thread writes -1 to gc_master_tid" and E2 be the even
2833
- * "master thread observes that `gc_n_threads_marking` is zero". Since we're using
2834
- * sequentially consistent atomics, E1 => E2. Now suppose one thread which is spinning in
2835
- * `gc_should_mark` tries to enter the mark-loop after E2. In order to do so, it must
2836
- * increment `gc_n_threads_marking` to 1 in an event E3, and then read `gc_master_tid` in an
2837
- * event E4. Since we're using sequentially consistent atomics, E3 => E4. Since we observed
2838
- * `gc_n_threads_marking` as zero in E2, then E2 => E3, and we conclude E1 => E4, so that
2839
- * the thread which is spinning in `gc_should_mark` must observe that `gc_master_tid` is -1
2840
- * and therefore won't enter the mark-loop.
2824
+ * - If a thread observes that `gc_n_threads_marking` is zero inside `gc_should_mark`, that
2825
+ * means that no thread has work on their queue, this is guaranteed because a thread may only exit
2826
+ * `gc_mark_and_steal` when its own queue is empty, this information is synchronized by the
2827
+ * seq-cst fetch_add to a thread that is in `gc_should_mark`. `gc_queue_observer_lock`
2828
+ * guarantees that once `gc_n_threads_marking` reaches zero, no thread will increment it again,
2829
+ * because incrementing is only legal from inside the lock. Therefore, no thread will reenter
2830
+ * the mark-loop after `gc_n_threads_marking` reaches zero.
2841
2831
*/
2842
2832
2843
- int gc_should_mark (jl_ptls_t ptls )
2833
+ int gc_should_mark (void )
2844
2834
{
2845
2835
int should_mark = 0 ;
2846
- int n_threads_marking = jl_atomic_load (& gc_n_threads_marking );
2847
- // fast path
2848
- if (n_threads_marking == 0 ) {
2849
- return 0 ;
2850
- }
2851
2836
uv_mutex_lock (& gc_queue_observer_lock );
2852
2837
while (1 ) {
2853
- int tid = jl_atomic_load (& gc_master_tid );
2854
- // fast path
2855
- if (tid == -1 ) {
2856
- break ;
2857
- }
2858
- n_threads_marking = jl_atomic_load (& gc_n_threads_marking );
2859
- // fast path
2838
+ int n_threads_marking = jl_atomic_load (& gc_n_threads_marking );
2860
2839
if (n_threads_marking == 0 ) {
2861
2840
break ;
2862
2841
}
2842
+ int tid = jl_atomic_load_relaxed (& gc_master_tid );
2843
+ assert (tid != -1 );
2863
2844
size_t work = gc_count_work_in_queue (gc_all_tls_states [tid ]);
2864
2845
for (tid = gc_first_tid ; tid < gc_first_tid + jl_n_markthreads ; tid ++ ) {
2865
- work += gc_count_work_in_queue (gc_all_tls_states [tid ]);
2846
+ jl_ptls_t ptls2 = gc_all_tls_states [tid ];
2847
+ if (ptls2 == NULL ) {
2848
+ continue ;
2849
+ }
2850
+ work += gc_count_work_in_queue (ptls2 );
2866
2851
}
2867
2852
// if there is a lot of work left, enter the mark loop
2868
2853
if (work >= 16 * n_threads_marking ) {
2869
- jl_atomic_fetch_add (& gc_n_threads_marking , 1 );
2854
+ jl_atomic_fetch_add (& gc_n_threads_marking , 1 ); // A possibility would be to allow a thread that found lots
2855
+ // of work to increment this
2870
2856
should_mark = 1 ;
2871
2857
break ;
2872
2858
}
@@ -2878,22 +2864,22 @@ int gc_should_mark(jl_ptls_t ptls)
2878
2864
2879
2865
void gc_wake_all_for_marking (jl_ptls_t ptls )
2880
2866
{
2881
- jl_atomic_store (& gc_master_tid , ptls -> tid );
2882
2867
uv_mutex_lock (& gc_threads_lock );
2883
- jl_atomic_fetch_add (& gc_n_threads_marking , 1 );
2884
2868
uv_cond_broadcast (& gc_threads_cond );
2885
2869
uv_mutex_unlock (& gc_threads_lock );
2886
2870
}
2887
2871
2888
2872
void gc_mark_loop_parallel (jl_ptls_t ptls , int master )
2889
2873
{
2890
2874
if (master ) {
2875
+ jl_atomic_store (& gc_master_tid , ptls -> tid );
2876
+ jl_atomic_fetch_add (& gc_n_threads_marking , 1 );
2891
2877
gc_wake_all_for_marking (ptls );
2892
2878
gc_mark_and_steal (ptls );
2893
2879
jl_atomic_fetch_add (& gc_n_threads_marking , -1 );
2894
2880
}
2895
2881
while (1 ) {
2896
- int should_mark = gc_should_mark (ptls );
2882
+ int should_mark = gc_should_mark ();
2897
2883
if (!should_mark ) {
2898
2884
break ;
2899
2885
}
0 commit comments