@@ -799,7 +799,7 @@ STATIC_INLINE void gc_queue_big_marked(jl_ptls_t ptls, bigval_t *hdr,
799
799
FORCE_INLINE int gc_try_setmark_tag (jl_taggedvalue_t * o , uint8_t mark_mode ) JL_NOTSAFEPOINT
800
800
{
801
801
assert (gc_marked (mark_mode ));
802
- uintptr_t tag = jl_atomic_load_relaxed ((_Atomic ( uintptr_t ) * ) & o -> header ) ;
802
+ uintptr_t tag = o -> header ;
803
803
if (gc_marked (tag ))
804
804
return 0 ;
805
805
if (mark_reset_age ) {
@@ -813,9 +813,13 @@ FORCE_INLINE int gc_try_setmark_tag(jl_taggedvalue_t *o, uint8_t mark_mode) JL_N
813
813
tag = tag | mark_mode ;
814
814
assert ((tag & 0x3 ) == mark_mode );
815
815
}
816
- jl_atomic_store_relaxed ((_Atomic (uintptr_t )* )& o -> header , tag ); //xchg here was slower than
817
- verify_val (jl_valueof (o )); //potentially redoing work because of a stale tag.
818
- return 1 ;
816
+ // XXX: note that marking not only sets the GC bits but also updates the
817
+ // page metadata for pool allocated objects.
818
+ // The second step is **not** idempotent, so we need a compare exchange here
819
+ // (instead of a pair of load&store) to avoid marking an object twice
820
+ tag = jl_atomic_exchange_relaxed ((_Atomic (uintptr_t )* )& o -> header , tag );
821
+ verify_val (jl_valueof (o ));
822
+ return !gc_marked (tag );
819
823
}
820
824
821
825
// This function should be called exactly once during marking for each big
@@ -854,7 +858,7 @@ STATIC_INLINE void gc_setmark_pool_(jl_ptls_t ptls, jl_taggedvalue_t *o,
854
858
if (mark_mode == GC_OLD_MARKED ) {
855
859
ptls -> gc_cache .perm_scanned_bytes += page -> osize ;
856
860
static_assert (sizeof (_Atomic (uint16_t )) == sizeof (page -> nold ), "" );
857
- page -> nold ++ ;
861
+ jl_atomic_fetch_add_relaxed (( _Atomic ( uint16_t ) * ) & page -> nold , 1 ) ;
858
862
}
859
863
else {
860
864
ptls -> gc_cache .scanned_bytes += page -> osize ;
@@ -1377,27 +1381,20 @@ static jl_taggedvalue_t **gc_sweep_page(jl_gc_pool_t *p, jl_gc_pagemeta_t **allo
1377
1381
nfree = (GC_PAGE_SZ - GC_PAGE_OFFSET ) / osize ;
1378
1382
goto done ;
1379
1383
}
1380
- // note that `pg->nold` may not be accurate with multithreaded marking since
1381
- // two threads may race when trying to set the mark bit in `gc_try_setmark_tag`.
1382
- // We're basically losing a bit of precision in the sweep phase at the cost of
1383
- // making the mark phase considerably cheaper.
1384
- // See issue #50419
1385
- if (jl_n_markthreads == 0 ) {
1386
- // For quick sweep, we might be able to skip the page if the page doesn't
1387
- // have any young live cell before marking.
1388
- if (!sweep_full && !pg -> has_young ) {
1389
- assert (!prev_sweep_full || pg -> prev_nold >= pg -> nold );
1390
- if (!prev_sweep_full || pg -> prev_nold == pg -> nold ) {
1391
- // the position of the freelist begin/end in this page
1392
- // is stored in its metadata
1393
- if (pg -> fl_begin_offset != (uint16_t )-1 ) {
1394
- * pfl = page_pfl_beg (pg );
1395
- pfl = (jl_taggedvalue_t * * )page_pfl_end (pg );
1396
- }
1397
- freedall = 0 ;
1398
- nfree = pg -> nfree ;
1399
- goto done ;
1384
+ // For quick sweep, we might be able to skip the page if the page doesn't
1385
+ // have any young live cell before marking.
1386
+ if (!sweep_full && !pg -> has_young ) {
1387
+ assert (!prev_sweep_full || pg -> prev_nold >= pg -> nold );
1388
+ if (!prev_sweep_full || pg -> prev_nold == pg -> nold ) {
1389
+ // the position of the freelist begin/end in this page
1390
+ // is stored in its metadata
1391
+ if (pg -> fl_begin_offset != (uint16_t )-1 ) {
1392
+ * pfl = page_pfl_beg (pg );
1393
+ pfl = (jl_taggedvalue_t * * )page_pfl_end (pg );
1400
1394
}
1395
+ freedall = 0 ;
1396
+ nfree = pg -> nfree ;
1397
+ goto done ;
1401
1398
}
1402
1399
}
1403
1400
0 commit comments