Skip to content

Commit b9d2ee6

Browse files
authored
gh-129701: Fix a data race in intern_common in the free threaded build (GH-130089)
* gh-129701: Fix a data race in `intern_common` in the free threaded build * Use a mutex to avoid potentially returning a non-immortalized string, because immortalization happens after the insertion into the interned dict. * Use `Py_DECREF()` calls instead of `Py_SET_REFCNT(s, Py_REFCNT(s) - 2)` for thread-safety. This code path isn't performance sensistive, so just use `Py_DECREF()` unconditionally for simplicity.
1 parent fc8c99a commit b9d2ee6

File tree

2 files changed

+17
-6
lines changed

2 files changed

+17
-6
lines changed

Include/internal/pycore_global_objects.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,9 @@ struct _Py_static_objects {
6464
(interp)->cached_objects.NAME
6565

6666
struct _Py_interp_cached_objects {
67+
#ifdef Py_GIL_DISABLED
68+
PyMutex interned_mutex;
69+
#endif
6770
PyObject *interned_strings;
6871

6972
/* object.__reduce__ */

Objects/unicodeobject.c

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,14 @@ NOTE: In the interpreter's initialization phase, some globals are currently
112112
# define _PyUnicode_CHECK(op) PyUnicode_Check(op)
113113
#endif
114114

115+
#ifdef Py_GIL_DISABLED
116+
# define LOCK_INTERNED(interp) PyMutex_Lock(&_Py_INTERP_CACHED_OBJECT(interp, interned_mutex))
117+
# define UNLOCK_INTERNED(interp) PyMutex_Unlock(&_Py_INTERP_CACHED_OBJECT(interp, interned_mutex))
118+
#else
119+
# define LOCK_INTERNED(interp)
120+
# define UNLOCK_INTERNED(interp)
121+
#endif
122+
115123
static inline char* _PyUnicode_UTF8(PyObject *op)
116124
{
117125
return FT_ATOMIC_LOAD_PTR_ACQUIRE(_PyCompactUnicodeObject_CAST(op)->utf8);
@@ -15814,11 +15822,13 @@ intern_common(PyInterpreterState *interp, PyObject *s /* stolen */,
1581415822
PyObject *interned = get_interned_dict(interp);
1581515823
assert(interned != NULL);
1581615824

15825+
LOCK_INTERNED(interp);
1581715826
PyObject *t;
1581815827
{
1581915828
int res = PyDict_SetDefaultRef(interned, s, s, &t);
1582015829
if (res < 0) {
1582115830
PyErr_Clear();
15831+
UNLOCK_INTERNED(interp);
1582215832
return s;
1582315833
}
1582415834
else if (res == 1) {
@@ -15828,6 +15838,7 @@ intern_common(PyInterpreterState *interp, PyObject *s /* stolen */,
1582815838
PyUnicode_CHECK_INTERNED(t) == SSTATE_INTERNED_MORTAL) {
1582915839
immortalize_interned(t);
1583015840
}
15841+
UNLOCK_INTERNED(interp);
1583115842
return t;
1583215843
}
1583315844
else {
@@ -15844,12 +15855,8 @@ intern_common(PyInterpreterState *interp, PyObject *s /* stolen */,
1584415855
if (!_Py_IsImmortal(s)) {
1584515856
/* The two references in interned dict (key and value) are not counted.
1584615857
unicode_dealloc() and _PyUnicode_ClearInterned() take care of this. */
15847-
Py_SET_REFCNT(s, Py_REFCNT(s) - 2);
15848-
#ifdef Py_REF_DEBUG
15849-
/* let's be pedantic with the ref total */
15850-
_Py_DecRefTotal(_PyThreadState_GET());
15851-
_Py_DecRefTotal(_PyThreadState_GET());
15852-
#endif
15858+
Py_DECREF(s);
15859+
Py_DECREF(s);
1585315860
}
1585415861
FT_ATOMIC_STORE_UINT16_RELAXED(_PyUnicode_STATE(s).interned, SSTATE_INTERNED_MORTAL);
1585515862

@@ -15864,6 +15871,7 @@ intern_common(PyInterpreterState *interp, PyObject *s /* stolen */,
1586415871
immortalize_interned(s);
1586515872
}
1586615873

15874+
UNLOCK_INTERNED(interp);
1586715875
return s;
1586815876
}
1586915877

0 commit comments

Comments
 (0)