Skip to content

gc_managed_realloc_ should be incrementing gc_num.freed if it shrinks the memory buffer on reallocation  #52923

@d-netto

Description

@d-netto

In release-1.10, gc_managed_realloc_ is being defined as:

1.10:
static void *gc_managed_realloc_(jl_ptls_t ptls, void *d, size_t sz, size_t oldsz,
                                 int isaligned, jl_value_t *owner, int8_t can_collect)
{
    if (can_collect)
        maybe_collect(ptls);
    int is_old_marked = jl_astaggedvalue(owner)->bits.gc == GC_OLD_MARKED;
    size_t allocsz = LLT_ALIGN(sz, JL_CACHE_BYTE_ALIGNMENT);
    if (allocsz < sz)  // overflow in adding offs, size was "negative"
        jl_throw(jl_memory_exception);

    int last_errno = errno;
#ifdef _OS_WINDOWS_
    DWORD last_error = GetLastError();
#endif
    void *b;
    if (isaligned)
        b = realloc_cache_align(d, allocsz, oldsz);
    else
        b = realloc(d, allocsz);
    if (b == NULL)
        jl_throw(jl_memory_exception);
#ifdef _OS_WINDOWS_
    SetLastError(last_error);
#endif
    errno = last_errno;
    // gc_managed_realloc_ is currently used exclusively for resizing array buffers.
    if (is_old_marked) {
        ptls->gc_cache.perm_scanned_bytes += allocsz - oldsz;
        inc_live_bytes(allocsz - oldsz);
    }
    else if (!(allocsz < oldsz))
        jl_atomic_store_relaxed(&ptls->gc_num.allocd,
            jl_atomic_load_relaxed(&ptls->gc_num.allocd) + (allocsz - oldsz));
    jl_atomic_store_relaxed(&ptls->gc_num.realloc,
        jl_atomic_load_relaxed(&ptls->gc_num.realloc) + 1);
    if (allocsz > oldsz) {
        maybe_record_alloc_to_profile((jl_value_t*)b, allocsz - oldsz, (jl_datatype_t*)jl_buff_tag);
    }
    return b;
}

as opposed to the implementation from 1.9:

1.9:
static void *gc_managed_realloc_(jl_ptls_t ptls, void *d, size_t sz, size_t oldsz,
                                 int isaligned, jl_value_t *owner, int8_t can_collect)
{
    if (can_collect)
        maybe_collect(ptls);

    size_t allocsz = LLT_ALIGN(sz, JL_CACHE_BYTE_ALIGNMENT);
    if (allocsz < sz)  // overflow in adding offs, size was "negative"
        jl_throw(jl_memory_exception);

    if (jl_astaggedvalue(owner)->bits.gc == GC_OLD_MARKED) {
        ptls->gc_cache.perm_scanned_bytes += allocsz - oldsz;
        live_bytes += allocsz - oldsz;
    }
    else if (allocsz < oldsz)
        jl_atomic_store_relaxed(&ptls->gc_num.freed,
            jl_atomic_load_relaxed(&ptls->gc_num.freed) + (oldsz - allocsz));
    else
        jl_atomic_store_relaxed(&ptls->gc_num.allocd,
            jl_atomic_load_relaxed(&ptls->gc_num.allocd) + (allocsz - oldsz));
    jl_atomic_store_relaxed(&ptls->gc_num.realloc,
        jl_atomic_load_relaxed(&ptls->gc_num.realloc) + 1);

    int last_errno = errno;
#ifdef _OS_WINDOWS_
    DWORD last_error = GetLastError();
#endif
    void *b;
    if (isaligned)
        b = realloc_cache_align(d, allocsz, oldsz);
    else
        b = realloc(d, allocsz);
    if (b == NULL)
        jl_throw(jl_memory_exception);
#ifdef _OS_WINDOWS_
    SetLastError(last_error);
#endif
    errno = last_errno;
    maybe_record_alloc_to_profile((jl_value_t*)b, sz, jl_gc_unknown_type_tag);
    return b;
}

For some reason, after #50144 we stopped incrementing the number of freed bytes if we shrink the memory buffer on a realloc.

In particular, this could lead to some issues since our heuristics use this metric to compute live_bytes as a proxy for the heap size.

It could be, for instance, one of the causes behind a pathological behavior we're seeing in one of our workloads in 1.10, where live_bytes increases monotonically despite RSS being stable.

CC: @gbaraldi

Metadata

Metadata

Assignees

No one assigned

    Labels

    GCGarbage collector

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions