Skip to content

Commit a565327

Browse files
authored
gh-144981: Make PyUnstable_Code_SetExtra/GetExtra thread-safe (#144980)
* Make PyUnstable_Code_SetExtra/GetExtra thread-safe
1 parent 8246d58 commit a565327

File tree

4 files changed

+243
-31
lines changed

4 files changed

+243
-31
lines changed

Lib/test/test_free_threading/test_code.py

Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,41 @@
11
import unittest
22

3+
try:
4+
import ctypes
5+
except ImportError:
6+
ctypes = None
7+
38
from threading import Thread
49
from unittest import TestCase
510

611
from test.support import threading_helper
12+
from test.support.threading_helper import run_concurrently
13+
14+
if ctypes is not None:
15+
capi = ctypes.pythonapi
16+
17+
freefunc = ctypes.CFUNCTYPE(None, ctypes.c_voidp)
18+
19+
RequestCodeExtraIndex = capi.PyUnstable_Eval_RequestCodeExtraIndex
20+
RequestCodeExtraIndex.argtypes = (freefunc,)
21+
RequestCodeExtraIndex.restype = ctypes.c_ssize_t
22+
23+
SetExtra = capi.PyUnstable_Code_SetExtra
24+
SetExtra.argtypes = (ctypes.py_object, ctypes.c_ssize_t, ctypes.c_voidp)
25+
SetExtra.restype = ctypes.c_int
26+
27+
GetExtra = capi.PyUnstable_Code_GetExtra
28+
GetExtra.argtypes = (
29+
ctypes.py_object,
30+
ctypes.c_ssize_t,
31+
ctypes.POINTER(ctypes.c_voidp),
32+
)
33+
GetExtra.restype = ctypes.c_int
34+
35+
# Note: each call to RequestCodeExtraIndex permanently allocates a slot
36+
# (the counter is monotonically increasing), up to MAX_CO_EXTRA_USERS (255).
37+
NTHREADS = 20
38+
739

840
@threading_helper.requires_working_threading()
941
class TestCode(TestCase):
@@ -25,6 +57,83 @@ def run_in_thread():
2557
for thread in threads:
2658
thread.join()
2759

60+
@unittest.skipUnless(ctypes, "ctypes is required")
61+
def test_request_code_extra_index_concurrent(self):
62+
"""Test concurrent calls to RequestCodeExtraIndex"""
63+
results = []
64+
65+
def worker():
66+
idx = RequestCodeExtraIndex(freefunc(0))
67+
self.assertGreaterEqual(idx, 0)
68+
results.append(idx)
69+
70+
run_concurrently(worker_func=worker, nthreads=NTHREADS)
71+
72+
# Every thread must get a unique index.
73+
self.assertEqual(len(results), NTHREADS)
74+
self.assertEqual(len(set(results)), NTHREADS)
75+
76+
@unittest.skipUnless(ctypes, "ctypes is required")
77+
def test_code_extra_all_ops_concurrent(self):
78+
"""Test concurrent RequestCodeExtraIndex + SetExtra + GetExtra"""
79+
LOOP = 100
80+
81+
def f():
82+
pass
83+
84+
code = f.__code__
85+
86+
def worker():
87+
idx = RequestCodeExtraIndex(freefunc(0))
88+
self.assertGreaterEqual(idx, 0)
89+
90+
for i in range(LOOP):
91+
ret = SetExtra(code, idx, ctypes.c_voidp(i + 1))
92+
self.assertEqual(ret, 0)
93+
94+
for _ in range(LOOP):
95+
extra = ctypes.c_voidp()
96+
ret = GetExtra(code, idx, extra)
97+
self.assertEqual(ret, 0)
98+
# The slot was set by this thread, so the value must
99+
# be the last one written.
100+
self.assertEqual(extra.value, LOOP)
101+
102+
run_concurrently(worker_func=worker, nthreads=NTHREADS)
103+
104+
@unittest.skipUnless(ctypes, "ctypes is required")
105+
def test_code_extra_set_get_concurrent(self):
106+
"""Test concurrent SetExtra + GetExtra on a shared index"""
107+
LOOP = 100
108+
109+
def f():
110+
pass
111+
112+
code = f.__code__
113+
114+
idx = RequestCodeExtraIndex(freefunc(0))
115+
self.assertGreaterEqual(idx, 0)
116+
117+
def worker():
118+
for i in range(LOOP):
119+
ret = SetExtra(code, idx, ctypes.c_voidp(i + 1))
120+
self.assertEqual(ret, 0)
121+
122+
for _ in range(LOOP):
123+
extra = ctypes.c_voidp()
124+
ret = GetExtra(code, idx, extra)
125+
self.assertEqual(ret, 0)
126+
# Value is set by any writer thread.
127+
self.assertTrue(1 <= extra.value <= LOOP)
128+
129+
run_concurrently(worker_func=worker, nthreads=NTHREADS)
130+
131+
# Every thread's last write is LOOP, so the final value must be LOOP.
132+
extra = ctypes.c_voidp()
133+
ret = GetExtra(code, idx, extra)
134+
self.assertEqual(ret, 0)
135+
self.assertEqual(extra.value, LOOP)
136+
28137

29138
if __name__ == "__main__":
30139
unittest.main()
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Made :c:func:`PyUnstable_Code_SetExtra`, :c:func:`PyUnstable_Code_GetExtra`,
2+
and :c:func:`PyUnstable_Eval_RequestCodeExtraIndex` thread-safe on the
3+
:term:`free threaded <free threading>` build.

Objects/codeobject.c

Lines changed: 113 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1575,6 +1575,67 @@ typedef struct {
15751575
} _PyCodeObjectExtra;
15761576

15771577

1578+
static inline size_t
1579+
code_extra_size(Py_ssize_t n)
1580+
{
1581+
return sizeof(_PyCodeObjectExtra) + (n - 1) * sizeof(void *);
1582+
}
1583+
1584+
#ifdef Py_GIL_DISABLED
1585+
static int
1586+
code_extra_grow_ft(PyCodeObject *co, _PyCodeObjectExtra *old_co_extra,
1587+
Py_ssize_t old_ce_size, Py_ssize_t new_ce_size,
1588+
Py_ssize_t index, void *extra)
1589+
{
1590+
_Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(co);
1591+
_PyCodeObjectExtra *new_co_extra = PyMem_Malloc(
1592+
code_extra_size(new_ce_size));
1593+
if (new_co_extra == NULL) {
1594+
PyErr_NoMemory();
1595+
return -1;
1596+
}
1597+
1598+
if (old_ce_size > 0) {
1599+
memcpy(new_co_extra->ce_extras, old_co_extra->ce_extras,
1600+
old_ce_size * sizeof(void *));
1601+
}
1602+
for (Py_ssize_t i = old_ce_size; i < new_ce_size; i++) {
1603+
new_co_extra->ce_extras[i] = NULL;
1604+
}
1605+
new_co_extra->ce_size = new_ce_size;
1606+
new_co_extra->ce_extras[index] = extra;
1607+
1608+
// Publish new buffer and its contents to lock-free readers.
1609+
FT_ATOMIC_STORE_PTR_RELEASE(co->co_extra, new_co_extra);
1610+
if (old_co_extra != NULL) {
1611+
// QSBR: defer old-buffer free until lock-free readers quiesce.
1612+
_PyMem_FreeDelayed(old_co_extra, code_extra_size(old_ce_size));
1613+
}
1614+
return 0;
1615+
}
1616+
#else
1617+
static int
1618+
code_extra_grow_gil(PyCodeObject *co, _PyCodeObjectExtra *old_co_extra,
1619+
Py_ssize_t old_ce_size, Py_ssize_t new_ce_size,
1620+
Py_ssize_t index, void *extra)
1621+
{
1622+
_PyCodeObjectExtra *new_co_extra = PyMem_Realloc(
1623+
old_co_extra, code_extra_size(new_ce_size));
1624+
if (new_co_extra == NULL) {
1625+
PyErr_NoMemory();
1626+
return -1;
1627+
}
1628+
1629+
for (Py_ssize_t i = old_ce_size; i < new_ce_size; i++) {
1630+
new_co_extra->ce_extras[i] = NULL;
1631+
}
1632+
new_co_extra->ce_size = new_ce_size;
1633+
new_co_extra->ce_extras[index] = extra;
1634+
co->co_extra = new_co_extra;
1635+
return 0;
1636+
}
1637+
#endif
1638+
15781639
int
15791640
PyUnstable_Code_GetExtra(PyObject *code, Py_ssize_t index, void **extra)
15801641
{
@@ -1583,15 +1644,19 @@ PyUnstable_Code_GetExtra(PyObject *code, Py_ssize_t index, void **extra)
15831644
return -1;
15841645
}
15851646

1586-
PyCodeObject *o = (PyCodeObject*) code;
1587-
_PyCodeObjectExtra *co_extra = (_PyCodeObjectExtra*) o->co_extra;
1647+
PyCodeObject *co = (PyCodeObject *)code;
1648+
*extra = NULL;
15881649

1589-
if (co_extra == NULL || index < 0 || co_extra->ce_size <= index) {
1590-
*extra = NULL;
1650+
if (index < 0) {
15911651
return 0;
15921652
}
15931653

1594-
*extra = co_extra->ce_extras[index];
1654+
// Lock-free read; pairs with release stores in SetExtra.
1655+
_PyCodeObjectExtra *co_extra = FT_ATOMIC_LOAD_PTR_ACQUIRE(co->co_extra);
1656+
if (co_extra != NULL && index < co_extra->ce_size) {
1657+
*extra = FT_ATOMIC_LOAD_PTR_ACQUIRE(co_extra->ce_extras[index]);
1658+
}
1659+
15951660
return 0;
15961661
}
15971662

@@ -1601,40 +1666,59 @@ PyUnstable_Code_SetExtra(PyObject *code, Py_ssize_t index, void *extra)
16011666
{
16021667
PyInterpreterState *interp = _PyInterpreterState_GET();
16031668

1604-
if (!PyCode_Check(code) || index < 0 ||
1605-
index >= interp->co_extra_user_count) {
1669+
// co_extra_user_count is monotonically increasing and published with
1670+
// release store in RequestCodeExtraIndex, so once an index is valid
1671+
// it stays valid.
1672+
Py_ssize_t user_count = FT_ATOMIC_LOAD_SSIZE_ACQUIRE(
1673+
interp->co_extra_user_count);
1674+
1675+
if (!PyCode_Check(code) || index < 0 || index >= user_count) {
16061676
PyErr_BadInternalCall();
16071677
return -1;
16081678
}
16091679

1610-
PyCodeObject *o = (PyCodeObject*) code;
1611-
_PyCodeObjectExtra *co_extra = (_PyCodeObjectExtra *) o->co_extra;
1680+
PyCodeObject *co = (PyCodeObject *)code;
1681+
int result = 0;
1682+
void *old_slot_value = NULL;
16121683

1613-
if (co_extra == NULL || co_extra->ce_size <= index) {
1614-
Py_ssize_t i = (co_extra == NULL ? 0 : co_extra->ce_size);
1615-
co_extra = PyMem_Realloc(
1616-
co_extra,
1617-
sizeof(_PyCodeObjectExtra) +
1618-
(interp->co_extra_user_count-1) * sizeof(void*));
1619-
if (co_extra == NULL) {
1620-
return -1;
1621-
}
1622-
for (; i < interp->co_extra_user_count; i++) {
1623-
co_extra->ce_extras[i] = NULL;
1624-
}
1625-
co_extra->ce_size = interp->co_extra_user_count;
1626-
o->co_extra = co_extra;
1684+
Py_BEGIN_CRITICAL_SECTION(co);
1685+
1686+
_PyCodeObjectExtra *old_co_extra = (_PyCodeObjectExtra *)co->co_extra;
1687+
Py_ssize_t old_ce_size = (old_co_extra == NULL)
1688+
? 0 : old_co_extra->ce_size;
1689+
1690+
// Fast path: slot already exists, update in place.
1691+
if (index < old_ce_size) {
1692+
old_slot_value = old_co_extra->ce_extras[index];
1693+
FT_ATOMIC_STORE_PTR_RELEASE(old_co_extra->ce_extras[index], extra);
1694+
goto done;
16271695
}
16281696

1629-
if (co_extra->ce_extras[index] != NULL) {
1630-
freefunc free = interp->co_extra_freefuncs[index];
1631-
if (free != NULL) {
1632-
free(co_extra->ce_extras[index]);
1697+
// Slow path: buffer needs to grow.
1698+
Py_ssize_t new_ce_size = user_count;
1699+
#ifdef Py_GIL_DISABLED
1700+
// FT build: allocate new buffer and swap; QSBR reclaims the old one.
1701+
result = code_extra_grow_ft(
1702+
co, old_co_extra, old_ce_size, new_ce_size, index, extra);
1703+
#else
1704+
// GIL build: grow with realloc.
1705+
result = code_extra_grow_gil(
1706+
co, old_co_extra, old_ce_size, new_ce_size, index, extra);
1707+
#endif
1708+
1709+
done:;
1710+
Py_END_CRITICAL_SECTION();
1711+
if (old_slot_value != NULL) {
1712+
// Free the old slot value if a free function was registered.
1713+
// The caller must ensure no other thread can still access the old
1714+
// value after this overwrite.
1715+
freefunc free_extra = interp->co_extra_freefuncs[index];
1716+
if (free_extra != NULL) {
1717+
free_extra(old_slot_value);
16331718
}
16341719
}
16351720

1636-
co_extra->ce_extras[index] = extra;
1637-
return 0;
1721+
return result;
16381722
}
16391723

16401724

Python/ceval.c

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3493,11 +3493,27 @@ PyUnstable_Eval_RequestCodeExtraIndex(freefunc free)
34933493
PyInterpreterState *interp = _PyInterpreterState_GET();
34943494
Py_ssize_t new_index;
34953495

3496-
if (interp->co_extra_user_count == MAX_CO_EXTRA_USERS - 1) {
3496+
#ifdef Py_GIL_DISABLED
3497+
struct _py_code_state *state = &interp->code_state;
3498+
FT_MUTEX_LOCK(&state->mutex);
3499+
#endif
3500+
3501+
if (interp->co_extra_user_count >= MAX_CO_EXTRA_USERS - 1) {
3502+
#ifdef Py_GIL_DISABLED
3503+
FT_MUTEX_UNLOCK(&state->mutex);
3504+
#endif
34973505
return -1;
34983506
}
3499-
new_index = interp->co_extra_user_count++;
3507+
3508+
new_index = interp->co_extra_user_count;
35003509
interp->co_extra_freefuncs[new_index] = free;
3510+
3511+
// Publish freefuncs[new_index] before making the index visible.
3512+
FT_ATOMIC_STORE_SSIZE_RELEASE(interp->co_extra_user_count, new_index + 1);
3513+
3514+
#ifdef Py_GIL_DISABLED
3515+
FT_MUTEX_UNLOCK(&state->mutex);
3516+
#endif
35013517
return new_index;
35023518
}
35033519

0 commit comments

Comments
 (0)