Skip to content

gh-133136: Limit excess memory held by QSBR #135107

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Include/internal/pycore_pymem.h
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ extern wchar_t *_PyMem_DefaultRawWcsdup(const wchar_t *str);
extern int _PyMem_DebugEnabled(void);

// Enqueue a pointer to be freed possibly after some delay.
extern void _PyMem_FreeDelayed(void *ptr);
extern void _PyMem_FreeDelayed(void *ptr, size_t size);

// Enqueue an object to be freed possibly after some delay
#ifdef Py_GIL_DISABLED
Expand Down
4 changes: 4 additions & 0 deletions Include/internal/pycore_qsbr.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,10 @@ struct _qsbr_thread_state {
// Used to defer advancing write sequence a fixed number of times
int deferrals;

// Estimate for the amount of memory that is held by this thread since
// the last non-deferred advance.
size_t memory_deferred;

// Is this thread state allocated?
bool allocated;
struct _qsbr_thread_state *freelist_next;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Limit excess memory usage in the :term:`free threading` build when a
large dictionary or list is resized and accessed by multiple threads.
2 changes: 1 addition & 1 deletion Objects/codeobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -3350,7 +3350,7 @@ create_tlbc_lock_held(PyCodeObject *co, Py_ssize_t idx)
}
memcpy(new_tlbc->entries, tlbc->entries, tlbc->size * sizeof(void *));
_Py_atomic_store_ptr_release(&co->co_tlbc, new_tlbc);
_PyMem_FreeDelayed(tlbc);
_PyMem_FreeDelayed(tlbc, tlbc->size * sizeof(void *));
tlbc = new_tlbc;
}
char *bc = PyMem_Calloc(1, _PyCode_NBYTES(co));
Expand Down
4 changes: 2 additions & 2 deletions Objects/dictobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -813,7 +813,7 @@ free_keys_object(PyDictKeysObject *keys, bool use_qsbr)
{
#ifdef Py_GIL_DISABLED
if (use_qsbr) {
_PyMem_FreeDelayed(keys);
_PyMem_FreeDelayed(keys, _PyDict_KeysSize(keys));
return;
}
#endif
Expand Down Expand Up @@ -858,7 +858,7 @@ free_values(PyDictValues *values, bool use_qsbr)
assert(values->embedded == 0);
#ifdef Py_GIL_DISABLED
if (use_qsbr) {
_PyMem_FreeDelayed(values);
_PyMem_FreeDelayed(values, values_size_from_count(values->capacity));
return;
}
#endif
Expand Down
3 changes: 2 additions & 1 deletion Objects/listobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,8 @@ free_list_items(PyObject** items, bool use_qsbr)
#ifdef Py_GIL_DISABLED
_PyListArray *array = _Py_CONTAINER_OF(items, _PyListArray, ob_item);
if (use_qsbr) {
_PyMem_FreeDelayed(array);
size_t size = sizeof(_PyListArray) + array->allocated * sizeof(PyObject *);
_PyMem_FreeDelayed(array, size);
}
else {
PyMem_Free(array);
Expand Down
41 changes: 34 additions & 7 deletions Objects/obmalloc.c
Original file line number Diff line number Diff line change
Expand Up @@ -1141,8 +1141,29 @@ free_work_item(uintptr_t ptr, delayed_dealloc_cb cb, void *state)
}
}

#ifdef Py_GIL_DISABLED
static int
should_advance_qsbr(_PyThreadStateImpl *tstate, size_t size)
{
// If the deferred memory exceeds 1 MiB, we force an advance in the
// shared QSBR sequence number to limit excess memory usage.
static const size_t QSBR_DEFERRED_LIMIT = 1024 * 1024;
if (size > QSBR_DEFERRED_LIMIT) {
tstate->qsbr->memory_deferred = 0;
return 1;
}

tstate->qsbr->memory_deferred += size;
if (tstate->qsbr->memory_deferred > QSBR_DEFERRED_LIMIT) {
tstate->qsbr->memory_deferred = 0;
return 1;
}
return 0;
}
Comment on lines +1146 to +1162
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we need early return here?
It looks like it will be same eventually.

Suggested change
should_advance_qsbr(_PyThreadStateImpl *tstate, size_t size)
{
// If the deferred memory exceeds 1 MiB, we force an advance in the
// shared QSBR sequence number to limit excess memory usage.
static const size_t QSBR_DEFERRED_LIMIT = 1024 * 1024;
if (size > QSBR_DEFERRED_LIMIT) {
tstate->qsbr->memory_deferred = 0;
return 1;
}
tstate->qsbr->memory_deferred += size;
if (tstate->qsbr->memory_deferred > QSBR_DEFERRED_LIMIT) {
tstate->qsbr->memory_deferred = 0;
return 1;
}
return 0;
}
should_advance_qsbr(_PyThreadStateImpl *tstate, size_t size)
{
// If the deferred memory exceeds 1 MiB, we force an advance in the
// shared QSBR sequence number to limit excess memory usage.
static const size_t QSBR_DEFERRED_LIMIT = 1024 * 1024;
tstate->qsbr->memory_deferred += size;
if (tstate->qsbr->memory_deferred > QSBR_DEFERRED_LIMIT) {
tstate->qsbr->memory_deferred = 0;
return 1;
}
return 0;
}

#endif

static void
free_delayed(uintptr_t ptr)
free_delayed(uintptr_t ptr, size_t size)
{
#ifndef Py_GIL_DISABLED
free_work_item(ptr, NULL, NULL);
Expand Down Expand Up @@ -1200,23 +1221,29 @@ free_delayed(uintptr_t ptr)
}

assert(buf != NULL && buf->wr_idx < WORK_ITEMS_PER_CHUNK);
uint64_t seq = _Py_qsbr_deferred_advance(tstate->qsbr);
uint64_t seq;
int force_advance = should_advance_qsbr(tstate, size);
if (force_advance) {
seq = _Py_qsbr_advance(tstate->qsbr->shared);
}
else {
seq = _Py_qsbr_deferred_advance(tstate->qsbr);
}
buf->array[buf->wr_idx].ptr = ptr;
buf->array[buf->wr_idx].qsbr_goal = seq;
buf->wr_idx++;

if (buf->wr_idx == WORK_ITEMS_PER_CHUNK) {
if (buf->wr_idx == WORK_ITEMS_PER_CHUNK || force_advance) {
_PyMem_ProcessDelayed((PyThreadState *)tstate);
}
#endif
}

void
_PyMem_FreeDelayed(void *ptr)
_PyMem_FreeDelayed(void *ptr, size_t size)
{
assert(!((uintptr_t)ptr & 0x01));
if (ptr != NULL) {
free_delayed((uintptr_t)ptr);
free_delayed((uintptr_t)ptr, size);
}
}

Expand All @@ -1226,7 +1253,7 @@ _PyObject_XDecRefDelayed(PyObject *ptr)
{
assert(!((uintptr_t)ptr & 0x01));
if (ptr != NULL) {
free_delayed(((uintptr_t)ptr)|0x01);
free_delayed(((uintptr_t)ptr)|0x01, 64);
}
}
#endif
Expand Down
Loading