Skip to content

gh-114940: Add a Per-Interpreter Lock For the List of Thread States #127037

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Include/internal/pycore_interp.h
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,7 @@ struct _is {

uintptr_t last_restart_version;
struct pythreads {
PyMutex mutex;
uint64_t next_unique_id;
/* The linked list of threads, newest first. */
PyThreadState *head;
Expand Down
5 changes: 5 additions & 0 deletions Include/internal/pycore_pystate.h
Original file line number Diff line number Diff line change
Expand Up @@ -269,6 +269,11 @@ extern int _PyOS_InterruptOccurred(PyThreadState *tstate);
#define HEAD_UNLOCK(runtime) \
PyMutex_Unlock(&(runtime)->interpreters.mutex)

#define THREADS_HEAD_LOCK(interp) \
PyMutex_LockFlags(&(interp)->threads.mutex, _Py_LOCK_DONT_DETACH)
#define THREADS_HEAD_UNLOCK(interp) \
PyMutex_Unlock(&(interp)->threads.mutex)

// Get the configuration of the current interpreter.
// The caller must hold the GIL.
// Export for test_peg_generator.
Expand Down
4 changes: 4 additions & 0 deletions Objects/codeobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -2871,20 +2871,24 @@ get_indices_in_use(PyInterpreterState *interp, struct flag_set *in_use)
assert(interp->stoptheworld.world_stopped);
assert(in_use->flags == NULL);
int32_t max_index = 0;
THREADS_HEAD_LOCK(interp);
for (PyThreadState *p = interp->threads.head; p != NULL; p = p->next) {
int32_t idx = ((_PyThreadStateImpl *) p)->tlbc_index;
if (idx > max_index) {
max_index = idx;
}
}
THREADS_HEAD_UNLOCK(interp);
in_use->size = (size_t) max_index + 1;
in_use->flags = PyMem_Calloc(in_use->size, sizeof(*in_use->flags));
if (in_use->flags == NULL) {
return -1;
}
THREADS_HEAD_LOCK(interp);
for (PyThreadState *p = interp->threads.head; p != NULL; p = p->next) {
in_use->flags[((_PyThreadStateImpl *) p)->tlbc_index] = 1;
}
THREADS_HEAD_UNLOCK(interp);
return 0;
}

Expand Down
2 changes: 2 additions & 0 deletions Objects/object.c
Original file line number Diff line number Diff line change
Expand Up @@ -119,11 +119,13 @@ get_reftotal(PyInterpreterState *interp)
since we can't determine which interpreter updated it. */
Py_ssize_t total = REFTOTAL(interp);
#ifdef Py_GIL_DISABLED
THREADS_HEAD_LOCK(interp);
for (PyThreadState *p = interp->threads.head; p != NULL; p = p->next) {
/* This may race with other threads modifications to their reftotal */
_PyThreadStateImpl *tstate_impl = (_PyThreadStateImpl *)p;
total += _Py_atomic_load_ssize_relaxed(&tstate_impl->reftotal);
}
THREADS_HEAD_UNLOCK(interp);
#endif
return total;
}
Expand Down
2 changes: 2 additions & 0 deletions Objects/obmalloc.c
Original file line number Diff line number Diff line change
Expand Up @@ -1405,13 +1405,15 @@ get_mimalloc_allocated_blocks(PyInterpreterState *interp)
{
size_t allocated_blocks = 0;
#ifdef Py_GIL_DISABLED
THREADS_HEAD_LOCK(interp);
for (PyThreadState *t = interp->threads.head; t != NULL; t = t->next) {
_PyThreadStateImpl *tstate = (_PyThreadStateImpl *)t;
for (int i = 0; i < _Py_MIMALLOC_HEAP_COUNT; i++) {
mi_heap_t *heap = &tstate->mimalloc.heaps[i];
mi_heap_visit_blocks(heap, false, &count_blocks, &allocated_blocks);
}
}
THREADS_HEAD_UNLOCK(interp);

mi_abandoned_pool_t *pool = &interp->mimalloc.abandoned_pool;
for (uint8_t tag = 0; tag < _Py_MIMALLOC_HEAP_COUNT; tag++) {
Expand Down
2 changes: 2 additions & 0 deletions Python/ceval.c
Original file line number Diff line number Diff line change
Expand Up @@ -296,11 +296,13 @@ Py_SetRecursionLimit(int new_limit)
{
PyInterpreterState *interp = _PyInterpreterState_GET();
interp->ceval.recursion_limit = new_limit;
THREADS_HEAD_LOCK(interp);
for (PyThreadState *p = interp->threads.head; p != NULL; p = p->next) {
int depth = p->py_recursion_limit - p->py_recursion_remaining;
p->py_recursion_limit = new_limit;
p->py_recursion_remaining = new_limit - depth;
}
THREADS_HEAD_UNLOCK(interp);
}

/* The function _Py_EnterRecursiveCallTstate() only calls _Py_CheckRecursiveCall()
Expand Down
12 changes: 4 additions & 8 deletions Python/ceval_gil.c
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If I remember correctly, this was an issue that could not be changed because it would cause unexpected issues with CI (it was originally changed in my PR)

Original file line number Diff line number Diff line change
Expand Up @@ -977,25 +977,21 @@ make_pending_calls(PyThreadState *tstate)
void
_Py_set_eval_breaker_bit_all(PyInterpreterState *interp, uintptr_t bit)
{
_PyRuntimeState *runtime = &_PyRuntime;

HEAD_LOCK(runtime);
THREADS_HEAD_LOCK(interp);
for (PyThreadState *tstate = interp->threads.head; tstate != NULL; tstate = tstate->next) {
_Py_set_eval_breaker_bit(tstate, bit);
}
HEAD_UNLOCK(runtime);
THREADS_HEAD_UNLOCK(interp);
}

void
_Py_unset_eval_breaker_bit_all(PyInterpreterState *interp, uintptr_t bit)
{
_PyRuntimeState *runtime = &_PyRuntime;

HEAD_LOCK(runtime);
THREADS_HEAD_LOCK(interp);
for (PyThreadState *tstate = interp->threads.head; tstate != NULL; tstate = tstate->next) {
_Py_unset_eval_breaker_bit(tstate, bit);
}
HEAD_UNLOCK(runtime);
THREADS_HEAD_UNLOCK(interp);
}

void
Expand Down
20 changes: 10 additions & 10 deletions Python/gc_free_threading.c
Original file line number Diff line number Diff line change
Expand Up @@ -350,9 +350,9 @@ gc_visit_heaps(PyInterpreterState *interp, mi_block_visit_fun *visitor,
assert(interp->stoptheworld.world_stopped);

int err;
HEAD_LOCK(&_PyRuntime);
THREADS_HEAD_LOCK(interp);
err = gc_visit_heaps_lock_held(interp, visitor, arg);
HEAD_UNLOCK(&_PyRuntime);
THREADS_HEAD_UNLOCK(interp);
return err;
}

Expand All @@ -374,7 +374,7 @@ gc_visit_stackref(_PyStackRef stackref)
static void
gc_visit_thread_stacks(PyInterpreterState *interp)
{
HEAD_LOCK(&_PyRuntime);
THREADS_HEAD_LOCK(interp);
for (PyThreadState *p = interp->threads.head; p != NULL; p = p->next) {
for (_PyInterpreterFrame *f = p->current_frame; f != NULL; f = f->previous) {
PyObject *executable = PyStackRef_AsPyObjectBorrow(f->f_executable);
Expand All @@ -390,7 +390,7 @@ gc_visit_thread_stacks(PyInterpreterState *interp)
}
}
}
HEAD_UNLOCK(&_PyRuntime);
THREADS_HEAD_UNLOCK(interp);
}

static void
Expand Down Expand Up @@ -429,14 +429,14 @@ process_delayed_frees(PyInterpreterState *interp)

// Merge the queues from other threads into our own queue so that we can
// process all of the pending delayed free requests at once.
HEAD_LOCK(&_PyRuntime);
THREADS_HEAD_LOCK(interp);
for (PyThreadState *p = interp->threads.head; p != NULL; p = p->next) {
_PyThreadStateImpl *other = (_PyThreadStateImpl *)p;
if (other != current_tstate) {
llist_concat(&current_tstate->mem_free_queue, &other->mem_free_queue);
}
}
HEAD_UNLOCK(&_PyRuntime);
THREADS_HEAD_UNLOCK(interp);

_PyMem_ProcessDelayed((PyThreadState *)current_tstate);
}
Expand Down Expand Up @@ -1226,7 +1226,7 @@ gc_collect_internal(PyInterpreterState *interp, struct collection_state *state,
state->gcstate->old[i-1].count = 0;
}

HEAD_LOCK(&_PyRuntime);
THREADS_HEAD_LOCK(interp);
for (PyThreadState *p = interp->threads.head; p != NULL; p = p->next) {
_PyThreadStateImpl *tstate = (_PyThreadStateImpl *)p;

Expand All @@ -1236,7 +1236,7 @@ gc_collect_internal(PyInterpreterState *interp, struct collection_state *state,
// merge refcounts for all queued objects
merge_queued_objects(tstate, state);
}
HEAD_UNLOCK(&_PyRuntime);
THREADS_HEAD_UNLOCK(interp);

process_delayed_frees(interp);

Expand Down Expand Up @@ -1991,13 +1991,13 @@ PyUnstable_GC_VisitObjects(gcvisitobjects_t callback, void *arg)
void
_PyGC_ClearAllFreeLists(PyInterpreterState *interp)
{
HEAD_LOCK(&_PyRuntime);
THREADS_HEAD_LOCK(interp);
_PyThreadStateImpl *tstate = (_PyThreadStateImpl *)interp->threads.head;
while (tstate != NULL) {
_PyObject_ClearFreeLists(&tstate->freelists, 0);
tstate = (_PyThreadStateImpl *)tstate->base.next;
}
HEAD_UNLOCK(&_PyRuntime);
THREADS_HEAD_UNLOCK(interp);
}

#endif // Py_GIL_DISABLED
5 changes: 2 additions & 3 deletions Python/instrumentation.c
Original file line number Diff line number Diff line change
Expand Up @@ -1006,13 +1006,12 @@ set_global_version(PyThreadState *tstate, uint32_t version)

#ifdef Py_GIL_DISABLED
// Set the version on all threads in free-threaded builds.
_PyRuntimeState *runtime = &_PyRuntime;
HEAD_LOCK(runtime);
THREADS_HEAD_LOCK(interp);
for (tstate = interp->threads.head; tstate;
tstate = PyThreadState_Next(tstate)) {
set_version_raw(&tstate->eval_breaker, version);
};
HEAD_UNLOCK(runtime);
THREADS_HEAD_UNLOCK(interp);
#else
// Normal builds take the current version from instrumentation_version when
// attaching a thread, so we only have to set the current thread's version.
Expand Down
34 changes: 18 additions & 16 deletions Python/pystate.c
Original file line number Diff line number Diff line change
Expand Up @@ -773,7 +773,6 @@ interpreter_clear(PyInterpreterState *interp, PyThreadState *tstate)
{
assert(interp != NULL);
assert(tstate != NULL);
_PyRuntimeState *runtime = interp->runtime;

/* XXX Conditions we need to enforce:

Expand All @@ -790,17 +789,17 @@ interpreter_clear(PyInterpreterState *interp, PyThreadState *tstate)
}

// Clear the current/main thread state last.
HEAD_LOCK(runtime);
THREADS_HEAD_LOCK(interp);
PyThreadState *p = interp->threads.head;
HEAD_UNLOCK(runtime);
THREADS_HEAD_UNLOCK(interp);
while (p != NULL) {
// See https://github.com/python/cpython/issues/102126
// Must be called without HEAD_LOCK held as it can deadlock
// if any finalizer tries to acquire that lock.
PyThreadState_Clear(p);
HEAD_LOCK(runtime);
THREADS_HEAD_LOCK(interp);
p = p->next;
HEAD_UNLOCK(runtime);
THREADS_HEAD_UNLOCK(interp);
}
if (tstate->interp == interp) {
/* We fix tstate->_status below when we for sure aren't using it
Expand Down Expand Up @@ -1539,7 +1538,7 @@ new_threadstate(PyInterpreterState *interp, int whence)
#endif

/* We serialize concurrent creation to protect global state. */
HEAD_LOCK(interp->runtime);
THREADS_HEAD_LOCK(interp);

// Initialize the new thread state.
interp->threads.next_unique_id += 1;
Expand All @@ -1550,7 +1549,7 @@ new_threadstate(PyInterpreterState *interp, int whence)
PyThreadState *old_head = interp->threads.head;
add_threadstate(interp, (PyThreadState *)tstate, old_head);

HEAD_UNLOCK(interp->runtime);
THREADS_HEAD_UNLOCK(interp);

#ifdef Py_GIL_DISABLED
// Must be called with lock unlocked to avoid lock ordering deadlocks.
Expand Down Expand Up @@ -1741,7 +1740,7 @@ tstate_delete_common(PyThreadState *tstate, int release_gil)
}
_PyRuntimeState *runtime = interp->runtime;

HEAD_LOCK(runtime);
THREADS_HEAD_LOCK(interp);
if (tstate->prev) {
tstate->prev->next = tstate->next;
}
Expand All @@ -1757,9 +1756,11 @@ tstate_delete_common(PyThreadState *tstate, int release_gil)
if (interp->stoptheworld.requested) {
decrement_stoptheworld_countdown(&interp->stoptheworld);
}
HEAD_LOCK(runtime);
if (runtime->stoptheworld.requested) {
decrement_stoptheworld_countdown(&runtime->stoptheworld);
}
HEAD_UNLOCK(runtime);
Comment on lines +1759 to +1763
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this is correct, but need to double-check.

}

#if defined(Py_REF_DEBUG) && defined(Py_GIL_DISABLED)
Expand All @@ -1770,7 +1771,7 @@ tstate_delete_common(PyThreadState *tstate, int release_gil)
assert(tstate_impl->refcounts.values == NULL);
#endif

HEAD_UNLOCK(runtime);
THREADS_HEAD_UNLOCK(interp);

// XXX Unbind in PyThreadState_Clear(), or earlier
// (and assert not-equal here)?
Expand Down Expand Up @@ -1851,13 +1852,15 @@ _PyThreadState_RemoveExcept(PyThreadState *tstate)
{
assert(tstate != NULL);
PyInterpreterState *interp = tstate->interp;
_PyRuntimeState *runtime = interp->runtime;

#ifdef Py_GIL_DISABLED
#ifndef NDEBUG
_PyRuntimeState *runtime = interp->runtime;
#endif
assert(runtime->stoptheworld.world_stopped);
#endif

HEAD_LOCK(runtime);
THREADS_HEAD_LOCK(interp);
/* Remove all thread states, except tstate, from the linked list of
thread states. */
PyThreadState *list = interp->threads.head;
Expand All @@ -1872,7 +1875,7 @@ _PyThreadState_RemoveExcept(PyThreadState *tstate)
}
tstate->prev = tstate->next = NULL;
interp->threads.head = tstate;
HEAD_UNLOCK(runtime);
THREADS_HEAD_UNLOCK(interp);

return list;
}
Expand Down Expand Up @@ -2339,7 +2342,6 @@ _PyEval_StartTheWorld(PyInterpreterState *interp)
int
PyThreadState_SetAsyncExc(unsigned long id, PyObject *exc)
{
_PyRuntimeState *runtime = &_PyRuntime;
PyInterpreterState *interp = _PyInterpreterState_GET();

/* Although the GIL is held, a few C API functions can be called
Expand All @@ -2348,7 +2350,7 @@ PyThreadState_SetAsyncExc(unsigned long id, PyObject *exc)
* list of thread states we're traversing, so to prevent that we lock
* head_mutex for the duration.
*/
HEAD_LOCK(runtime);
THREADS_HEAD_LOCK(interp);
for (PyThreadState *tstate = interp->threads.head; tstate != NULL; tstate = tstate->next) {
if (tstate->thread_id != id) {
continue;
Expand All @@ -2363,13 +2365,13 @@ PyThreadState_SetAsyncExc(unsigned long id, PyObject *exc)
*/
Py_XINCREF(exc);
PyObject *old_exc = _Py_atomic_exchange_ptr(&tstate->async_exc, exc);
HEAD_UNLOCK(runtime);
THREADS_HEAD_UNLOCK(interp);

Py_XDECREF(old_exc);
_Py_set_eval_breaker_bit(tstate, _PY_ASYNC_EXCEPTION_BIT);
return 1;
}
HEAD_UNLOCK(runtime);
THREADS_HEAD_UNLOCK(interp);
return 0;
}

Expand Down
Loading