Skip to content

gh-124470: Fix crash when reading from object instance dictionary while replacing it #122489

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Nov 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 0 additions & 18 deletions Include/internal/pycore_gc.h
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,6 @@ static inline PyObject* _Py_FROM_GC(PyGC_Head *gc) {
# define _PyGC_BITS_UNREACHABLE (4)
# define _PyGC_BITS_FROZEN (8)
# define _PyGC_BITS_SHARED (16)
# define _PyGC_BITS_SHARED_INLINE (32)
# define _PyGC_BITS_DEFERRED (64) // Use deferred reference counting
#endif

Expand Down Expand Up @@ -119,23 +118,6 @@ static inline void _PyObject_GC_SET_SHARED(PyObject *op) {
}
#define _PyObject_GC_SET_SHARED(op) _PyObject_GC_SET_SHARED(_Py_CAST(PyObject*, op))

/* True if the memory of the object is shared between multiple
* threads and needs special purpose when freeing due to
* the possibility of in-flight lock-free reads occurring.
* Objects with this bit that are GC objects will automatically
* delay-freed by PyObject_GC_Del. */
static inline int _PyObject_GC_IS_SHARED_INLINE(PyObject *op) {
return _PyObject_HAS_GC_BITS(op, _PyGC_BITS_SHARED_INLINE);
}
#define _PyObject_GC_IS_SHARED_INLINE(op) \
_PyObject_GC_IS_SHARED_INLINE(_Py_CAST(PyObject*, op))

static inline void _PyObject_GC_SET_SHARED_INLINE(PyObject *op) {
_PyObject_SET_GC_BITS(op, _PyGC_BITS_SHARED_INLINE);
}
#define _PyObject_GC_SET_SHARED_INLINE(op) \
_PyObject_GC_SET_SHARED_INLINE(_Py_CAST(PyObject*, op))

#endif

/* Bit flags for _gc_prev */
Expand Down
16 changes: 15 additions & 1 deletion Include/internal/pycore_pymem.h
Original file line number Diff line number Diff line change
Expand Up @@ -120,11 +120,25 @@ extern int _PyMem_DebugEnabled(void);
extern void _PyMem_FreeDelayed(void *ptr);

// Enqueue an object to be freed possibly after some delay
extern void _PyObject_FreeDelayed(void *ptr);
#ifdef Py_GIL_DISABLED
extern void _PyObject_XDecRefDelayed(PyObject *obj);
#else
static inline void _PyObject_XDecRefDelayed(PyObject *obj)
{
Py_XDECREF(obj);
}
#endif

// Periodically process delayed free requests.
extern void _PyMem_ProcessDelayed(PyThreadState *tstate);


// Periodically process delayed free requests when the world is stopped.
// Notify of any objects whic should be freeed.
typedef void (*delayed_dealloc_cb)(PyObject *, void *);
extern void _PyMem_ProcessDelayedNoDealloc(PyThreadState *tstate,
delayed_dealloc_cb cb, void *state);

// Abandon all thread-local delayed free requests and push them to the
// interpreter's queue.
extern void _PyMem_AbandonDelayed(PyThreadState *tstate);
Expand Down
64 changes: 64 additions & 0 deletions Lib/test/test_free_threading/test_dict.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,70 @@ def writer_func(l):
for ref in thread_list:
self.assertIsNone(ref())

def test_racing_set_object_dict(self):
"""Races assigning to __dict__ should be thread safe"""
class C: pass
class MyDict(dict): pass
for cyclic in (False, True):
f = C()
f.__dict__ = {"foo": 42}
THREAD_COUNT = 10

def writer_func(l):
for i in range(1000):
if cyclic:
other_d = {}
d = MyDict({"foo": 100})
if cyclic:
d["x"] = other_d
other_d["bar"] = d
l.append(weakref.ref(d))
f.__dict__ = d

def reader_func():
for i in range(1000):
f.foo

lists = []
readers = []
writers = []
for x in range(THREAD_COUNT):
thread_list = []
lists.append(thread_list)
writer = Thread(target=partial(writer_func, thread_list))
writers.append(writer)

for x in range(THREAD_COUNT):
reader = Thread(target=partial(reader_func))
readers.append(reader)

for writer in writers:
writer.start()
for reader in readers:
reader.start()

for writer in writers:
writer.join()

for reader in readers:
reader.join()

f.__dict__ = {}
gc.collect()
gc.collect()

count = 0
ids = set()
for thread_list in lists:
for i, ref in enumerate(thread_list):
if ref() is None:
continue
count += 1
ids.add(id(ref()))
count += 1

self.assertEqual(count, 0)


if __name__ == "__main__":
unittest.main()
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Fix crash in free-threaded builds when replacing object dictionary while reading attribute on another thread
164 changes: 132 additions & 32 deletions Objects/dictobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -7087,51 +7087,146 @@ set_dict_inline_values(PyObject *obj, PyDictObject *new_dict)
}
}

int
_PyObject_SetManagedDict(PyObject *obj, PyObject *new_dict)
#ifdef Py_GIL_DISABLED

// Trys and sets the dictionary for an object in the easy case when our current
// dictionary is either completely not materialized or is a dictionary which
// does not point at the inline values.
static bool
try_set_dict_inline_only_or_other_dict(PyObject *obj, PyObject *new_dict, PyDictObject **cur_dict)
{
bool replaced = false;
Py_BEGIN_CRITICAL_SECTION(obj);

PyDictObject *dict = *cur_dict = _PyObject_GetManagedDict(obj);
if (dict == NULL) {
// We only have inline values, we can just completely replace them.
set_dict_inline_values(obj, (PyDictObject *)new_dict);
replaced = true;
goto exit_lock;
}

if (FT_ATOMIC_LOAD_PTR_RELAXED(dict->ma_values) != _PyObject_InlineValues(obj)) {
// We have a materialized dict which doesn't point at the inline values,
// We get to simply swap dictionaries and free the old dictionary.
FT_ATOMIC_STORE_PTR(_PyObject_ManagedDictPointer(obj)->dict,
(PyDictObject *)Py_XNewRef(new_dict));
replaced = true;
goto exit_lock;
}
else {
// We have inline values, we need to lock the dict and the object
// at the same time to safely dematerialize them. To do that while releasing
// the object lock we need a strong reference to the current dictionary.
Py_INCREF(dict);
}
exit_lock:
Py_END_CRITICAL_SECTION();
return replaced;
}

// Replaces a dictionary that is probably the dictionary which has been
// materialized and points at the inline values. We could have raced
// and replaced it with another dictionary though.
static int
replace_dict_probably_inline_materialized(PyObject *obj, PyDictObject *inline_dict,
PyDictObject *cur_dict, PyObject *new_dict)
{
_Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(obj);

if (cur_dict == inline_dict) {
assert(FT_ATOMIC_LOAD_PTR_RELAXED(inline_dict->ma_values) == _PyObject_InlineValues(obj));

int err = _PyDict_DetachFromObject(inline_dict, obj);
if (err != 0) {
assert(new_dict == NULL);
return err;
}
}

FT_ATOMIC_STORE_PTR(_PyObject_ManagedDictPointer(obj)->dict,
(PyDictObject *)Py_XNewRef(new_dict));
return 0;
}

#endif

static void
decref_maybe_delay(PyObject *obj, bool delay)
{
if (delay) {
_PyObject_XDecRefDelayed(obj);
}
else {
Py_XDECREF(obj);
}
}

static int
set_or_clear_managed_dict(PyObject *obj, PyObject *new_dict, bool clear)
{
assert(Py_TYPE(obj)->tp_flags & Py_TPFLAGS_MANAGED_DICT);
#ifndef NDEBUG
Py_BEGIN_CRITICAL_SECTION(obj);
assert(_PyObject_InlineValuesConsistencyCheck(obj));
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do these consistency checks need to be within some sort of lock?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The type flag one is fine, I'll add some ifdefs and lock for the inline check.

Py_END_CRITICAL_SECTION();
#endif
int err = 0;
PyTypeObject *tp = Py_TYPE(obj);
if (tp->tp_flags & Py_TPFLAGS_INLINE_VALUES) {
PyDictObject *dict = _PyObject_GetManagedDict(obj);
if (dict == NULL) {
#ifdef Py_GIL_DISABLED
Py_BEGIN_CRITICAL_SECTION(obj);
PyDictObject *prev_dict;
if (!try_set_dict_inline_only_or_other_dict(obj, new_dict, &prev_dict)) {
// We had a materialized dictionary which pointed at the inline
// values. We need to lock both the object and the dict at the
// same time to safely replace it. We can't merely lock the dictionary
// while the object is locked because it could suspend the object lock.
PyDictObject *cur_dict;

dict = _PyObject_ManagedDictPointer(obj)->dict;
if (dict == NULL) {
set_dict_inline_values(obj, (PyDictObject *)new_dict);
}
assert(prev_dict != NULL);
Py_BEGIN_CRITICAL_SECTION2(obj, prev_dict);

Py_END_CRITICAL_SECTION();
// We could have had another thread race in between the call to
// try_set_dict_inline_only_or_other_dict where we locked the object
// and when we unlocked and re-locked the dictionary.
cur_dict = _PyObject_GetManagedDict(obj);

if (dict == NULL) {
return 0;
err = replace_dict_probably_inline_materialized(obj, prev_dict,
cur_dict, new_dict);

Py_END_CRITICAL_SECTION2();

// Decref for the dictionary we incref'd in try_set_dict_inline_only_or_other_dict
// while the object was locked
decref_maybe_delay((PyObject *)prev_dict,
!clear && prev_dict != cur_dict);
if (err != 0) {
return err;
}
#else
set_dict_inline_values(obj, (PyDictObject *)new_dict);
return 0;
#endif
}

Py_BEGIN_CRITICAL_SECTION2(dict, obj);
prev_dict = cur_dict;
}

// We've locked dict, but the actual dict could have changed
// since we locked it.
dict = _PyObject_ManagedDictPointer(obj)->dict;
err = _PyDict_DetachFromObject(dict, obj);
assert(err == 0 || new_dict == NULL);
if (err == 0) {
FT_ATOMIC_STORE_PTR(_PyObject_ManagedDictPointer(obj)->dict,
(PyDictObject *)Py_XNewRef(new_dict));
if (prev_dict != NULL) {
// decref for the dictionary that we replaced
decref_maybe_delay((PyObject *)prev_dict, !clear);
}
Py_END_CRITICAL_SECTION2();

if (err == 0) {
Py_XDECREF(dict);
return 0;
#else
PyDictObject *dict = _PyObject_GetManagedDict(obj);
if (dict == NULL) {
set_dict_inline_values(obj, (PyDictObject *)new_dict);
return 0;
}
if (_PyDict_DetachFromObject(dict, obj) == 0) {
_PyObject_ManagedDictPointer(obj)->dict = (PyDictObject *)Py_XNewRef(new_dict);
Py_DECREF(dict);
return 0;
}
assert(new_dict == NULL);
return -1;
#endif
}
else {
PyDictObject *dict;
Expand All @@ -7144,17 +7239,22 @@ _PyObject_SetManagedDict(PyObject *obj, PyObject *new_dict)
(PyDictObject *)Py_XNewRef(new_dict));

Py_END_CRITICAL_SECTION();

Py_XDECREF(dict);
decref_maybe_delay((PyObject *)dict, !clear);
}
assert(_PyObject_InlineValuesConsistencyCheck(obj));
return err;
}

int
_PyObject_SetManagedDict(PyObject *obj, PyObject *new_dict)
{
return set_or_clear_managed_dict(obj, new_dict, false);
}

void
PyObject_ClearManagedDict(PyObject *obj)
{
if (_PyObject_SetManagedDict(obj, NULL) < 0) {
if (set_or_clear_managed_dict(obj, NULL, true) < 0) {
/* Must be out of memory */
assert(PyErr_Occurred() == PyExc_MemoryError);
PyErr_WriteUnraisable(NULL);
Expand Down
Loading
Loading