Skip to content

Commit 2cee483

Browse files
authored
use atomic compare exchange when setting the GC mark-bit (#50576)
Fixes #50574.
1 parent 7b40a36 commit 2cee483

File tree

1 file changed

+22
-25
lines changed

1 file changed

+22
-25
lines changed

src/gc.c

Lines changed: 22 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -799,7 +799,7 @@ STATIC_INLINE void gc_queue_big_marked(jl_ptls_t ptls, bigval_t *hdr,
799799
FORCE_INLINE int gc_try_setmark_tag(jl_taggedvalue_t *o, uint8_t mark_mode) JL_NOTSAFEPOINT
800800
{
801801
assert(gc_marked(mark_mode));
802-
uintptr_t tag = jl_atomic_load_relaxed((_Atomic(uintptr_t)*)&o->header);
802+
uintptr_t tag = o->header;
803803
if (gc_marked(tag))
804804
return 0;
805805
if (mark_reset_age) {
@@ -813,9 +813,13 @@ FORCE_INLINE int gc_try_setmark_tag(jl_taggedvalue_t *o, uint8_t mark_mode) JL_N
813813
tag = tag | mark_mode;
814814
assert((tag & 0x3) == mark_mode);
815815
}
816-
jl_atomic_store_relaxed((_Atomic(uintptr_t)*)&o->header, tag); //xchg here was slower than
817-
verify_val(jl_valueof(o)); //potentially redoing work because of a stale tag.
818-
return 1;
816+
// XXX: note that marking not only sets the GC bits but also updates the
817+
// page metadata for pool allocated objects.
818+
// The second step is **not** idempotent, so we need a compare exchange here
819+
// (instead of a pair of load&store) to avoid marking an object twice
820+
tag = jl_atomic_exchange_relaxed((_Atomic(uintptr_t)*)&o->header, tag);
821+
verify_val(jl_valueof(o));
822+
return !gc_marked(tag);
819823
}
820824

821825
// This function should be called exactly once during marking for each big
@@ -854,7 +858,7 @@ STATIC_INLINE void gc_setmark_pool_(jl_ptls_t ptls, jl_taggedvalue_t *o,
854858
if (mark_mode == GC_OLD_MARKED) {
855859
ptls->gc_cache.perm_scanned_bytes += page->osize;
856860
static_assert(sizeof(_Atomic(uint16_t)) == sizeof(page->nold), "");
857-
page->nold++;
861+
jl_atomic_fetch_add_relaxed((_Atomic(uint16_t)*)&page->nold, 1);
858862
}
859863
else {
860864
ptls->gc_cache.scanned_bytes += page->osize;
@@ -1377,27 +1381,20 @@ static jl_taggedvalue_t **gc_sweep_page(jl_gc_pool_t *p, jl_gc_pagemeta_t **allo
13771381
nfree = (GC_PAGE_SZ - GC_PAGE_OFFSET) / osize;
13781382
goto done;
13791383
}
1380-
// note that `pg->nold` may not be accurate with multithreaded marking since
1381-
// two threads may race when trying to set the mark bit in `gc_try_setmark_tag`.
1382-
// We're basically losing a bit of precision in the sweep phase at the cost of
1383-
// making the mark phase considerably cheaper.
1384-
// See issue #50419
1385-
if (jl_n_markthreads == 0) {
1386-
// For quick sweep, we might be able to skip the page if the page doesn't
1387-
// have any young live cell before marking.
1388-
if (!sweep_full && !pg->has_young) {
1389-
assert(!prev_sweep_full || pg->prev_nold >= pg->nold);
1390-
if (!prev_sweep_full || pg->prev_nold == pg->nold) {
1391-
// the position of the freelist begin/end in this page
1392-
// is stored in its metadata
1393-
if (pg->fl_begin_offset != (uint16_t)-1) {
1394-
*pfl = page_pfl_beg(pg);
1395-
pfl = (jl_taggedvalue_t**)page_pfl_end(pg);
1396-
}
1397-
freedall = 0;
1398-
nfree = pg->nfree;
1399-
goto done;
1384+
// For quick sweep, we might be able to skip the page if the page doesn't
1385+
// have any young live cell before marking.
1386+
if (!sweep_full && !pg->has_young) {
1387+
assert(!prev_sweep_full || pg->prev_nold >= pg->nold);
1388+
if (!prev_sweep_full || pg->prev_nold == pg->nold) {
1389+
// the position of the freelist begin/end in this page
1390+
// is stored in its metadata
1391+
if (pg->fl_begin_offset != (uint16_t)-1) {
1392+
*pfl = page_pfl_beg(pg);
1393+
pfl = (jl_taggedvalue_t**)page_pfl_end(pg);
14001394
}
1395+
freedall = 0;
1396+
nfree = pg->nfree;
1397+
goto done;
14011398
}
14021399
}
14031400

0 commit comments

Comments
 (0)