Skip to content

gh-90562: Improve zero argument support for super() in dataclasses when slots=True #124692

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 18 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 12 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
70 changes: 56 additions & 14 deletions Lib/dataclasses.py
Original file line number Diff line number Diff line change
Expand Up @@ -1222,11 +1222,6 @@ def _get_slots(cls):


def _update_func_cell_for__class__(f, oldcls, newcls):
# Returns True if we update a cell, else False.
if f is None:
# f will be None in the case of a property where not all of
# fget, fset, and fdel are used. Nothing to do in that case.
return False
try:
idx = f.__code__.co_freevars.index("__class__")
except ValueError:
Expand All @@ -1235,13 +1230,38 @@ def _update_func_cell_for__class__(f, oldcls, newcls):
# Fix the cell to point to the new class, if it's already pointing
# at the old class. I'm not convinced that the "is oldcls" test
# is needed, but other than performance can't hurt.
closure = f.__closure__[idx]
if closure.cell_contents is oldcls:
closure.cell_contents = newcls
cell = f.__closure__[idx]
if cell.cell_contents is oldcls:
cell.cell_contents = newcls
return True
return False


def _find_inner_functions(obj, _seen=None, _depth=0):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why the leading underscores on _seen and _depth?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This should indicate that these arguments are only used within the function (in recursive calls) and not outside of the function body.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think that the whole function is private, so it can only be used inside CPython by us :)

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Well, you've got a point. Removed the underscores.

if _seen is None:
_seen = set()
if id(obj) in _seen:
return None
_seen.add(id(obj))

_depth += 1
# Normally just inspection of a descriptor object itself should be enough,
# and we should encounter the function as its attribute,
# but in case function was wrapped (e.g. functools.partition was used),
# we want to dive at least one level deeper.
if _depth > 2:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why are depth 1 and 2 special? What is this test (and _depth in general) trying to accomplish? Please add comments explaining what's going on here, it's not at all clear from the code.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added an explanation and few test cases for this behaviour. Caught two bugs in the process \o/

return None

for attr in dir(obj):
value = getattr(obj, attr, None)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This case is always complex, since getattr can evaluate properties and other objects. There are two ways of hanling this case:

  1. Allow all exceptions to raise
  2. Hide all exceptions from user here

I am not quite sure which one is better here.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for bringing this to my attention!

After some hacking, I've decided to go with the third option: removing any possibility of user defined code execution.

if value is None:
continue
if isinstance(value, types.FunctionType):
yield inspect.unwrap(value)
return
yield from _find_inner_functions(value, _seen, _depth)


def _create_slots(defined_fields, inherited_slots, field_names, weakref_slot):
# The slots for our class. Remove slots from our base classes. Add
# '__weakref__' if weakref_slot was given, unless it is already present.
Expand Down Expand Up @@ -1317,19 +1337,41 @@ def _add_slots(cls, is_frozen, weakref_slot, defined_fields):
# (the newly created one, which we're returning) and not the
# original class. We can break out of this loop as soon as we
# make an update, since all closures for a class will share a
# given cell.
# given cell. First we try to find a pure function or a property,
# and then fallback to inspecting custom descriptors
# if no pure function or property is found.

custom_descriptors_to_check = []
for member in newcls.__dict__.values():
# If this is a wrapped function, unwrap it.
member = inspect.unwrap(member)

if isinstance(member, types.FunctionType):
if _update_func_cell_for__class__(member, cls, newcls):
break
elif isinstance(member, property):
if (_update_func_cell_for__class__(member.fget, cls, newcls)
or _update_func_cell_for__class__(member.fset, cls, newcls)
or _update_func_cell_for__class__(member.fdel, cls, newcls)):
break
elif isinstance(member, property) and (
any(
# Unwrap once more in case function
# was wrapped before it became property.
_update_func_cell_for__class__(inspect.unwrap(f), cls, newcls)
for f in (member.fget, member.fset, member.fdel)
if f is not None
)
):
break
elif hasattr(member, "__get__") and not inspect.ismemberdescriptor(
member
):
# We don't want to inspect custom descriptors just yet
# there's still a chance we'll encounter a pure function
# or a property and won't have to use slower recursive search.
custom_descriptors_to_check.append(member)
else:
# Now let's ensure custom descriptors won't be left out.
for descriptor in custom_descriptors_to_check:
for f in _find_inner_functions(descriptor):
if _update_func_cell_for__class__(f, cls, newcls):
break

return newcls

Expand Down
197 changes: 197 additions & 0 deletions Lib/test/test_dataclasses/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
import weakref
import traceback
import unittest
from functools import partial
from functools import update_wrapper
from unittest.mock import Mock
from typing import ClassVar, Any, List, Union, Tuple, Dict, Generic, TypeVar, Optional, Protocol, DefaultDict
from typing import get_type_hints
Expand Down Expand Up @@ -5031,6 +5033,201 @@ def foo(self):

A().foo()

def test_wrapped_property(self):
def mydecorator(f):
@wraps(f)
def wrapper(*args, **kwargs):
return f(*args, **kwargs)
return wrapper

class B:
@property
def foo(self):
return "bar"

@dataclass(slots=True)
class A(B):
@property
@mydecorator
def foo(self):
return super().foo

self.assertEqual(A().foo, "bar")

def test_custom_descriptor(self):
class CustomDescriptor:
def __init__(self, f):
self._f = f

def __get__(self, instance, owner):
return self._f(instance)

class B:
def foo(self):
return "bar"

@dataclass(slots=True)
class A(B):
@CustomDescriptor
def foo(cls):
return super().foo()

self.assertEqual(A().foo, "bar")

def test_custom_descriptor_wrapped(self):
class CustomDescriptor:
def __init__(self, f):
self._f = update_wrapper(lambda *args, **kwargs: f(*args, **kwargs), f)

def __get__(self, instance, owner):
return self._f(instance)

class B:
def foo(self):
return "bar"

@dataclass(slots=True)
class A(B):
@CustomDescriptor
def foo(cls):
return super().foo()

self.assertEqual(A().foo, "bar")

def test_custom_nested_descriptor(self):
class CustomFunctionWrapper:
def __init__(self, f):
self._f = f

def __call__(self, *args, **kwargs):
return self._f(*args, **kwargs)

class CustomDescriptor:
def __init__(self, f):
self._wrapper = CustomFunctionWrapper(f)

def __get__(self, instance, owner):
return self._wrapper(instance)

class B:
def foo(self):
return "bar"

@dataclass(slots=True)
class A(B):
@CustomDescriptor
def foo(cls):
return super().foo()

self.assertEqual(A().foo, "bar")

def test_custom_nested_descriptor_with_partial(self):
class CustomDescriptor:
def __init__(self, f):
self._wrapper = partial(f, value="bar")

def __get__(self, instance, owner):
return self._wrapper(instance)

class B:
def foo(self, value):
return value

@dataclass(slots=True)
class A(B):
@CustomDescriptor
def foo(self, value):
return super().foo(value)

self.assertEqual(A().foo, "bar")

def test_pure_functions_preferred_to_custom_descriptors(self):
class CustomDescriptor:
def __init__(self, f):
self._wrapper = partial(f, value="bar")

def __get__(self, instance, owner):
return self._wrapper(instance)

def __dir__(self):
raise RuntimeError("Never should be accessed")

class B:
def foo(self, value):
return value

with self.assertRaises(RuntimeError) as context:
@dataclass(slots=True)
class A(B):
@CustomDescriptor
def foo(self, value): ...

self.assertEqual(context.exception.args, ("Never should be accessed",))

@dataclass(slots=True)
class A(B):
@CustomDescriptor
def foo(self, value):
return super().foo(value)

@property
def bar(self):
return super()

self.assertEqual(A().foo, "bar")

@dataclass(slots=True)
class A(B):
@CustomDescriptor
def foo(self, value):
return super().foo(value)

def bar(self):
return super()

self.assertEqual(A().foo, "bar")

def test_custom_too_nested_descriptor(self):
class UnnecessaryNestedWrapper:
def __init__(self, wrapper):
self._wrapper = wrapper

def __call__(self, *args, **kwargs):
return self._wrapper(*args, **kwargs)

class CustomFunctionWrapper:
def __init__(self, f):
self._f = f

def __call__(self, *args, **kwargs):
return self._f(*args, **kwargs)

class CustomDescriptor:
def __init__(self, f):
self._wrapper = UnnecessaryNestedWrapper(CustomFunctionWrapper(f))

def __get__(self, instance, owner):
return self._wrapper(instance)

class B:
def foo(self):
return "bar"

@dataclass(slots=True)
class A(B):
@CustomDescriptor
def foo(cls):
return super().foo()

with self.assertRaises(TypeError) as context:
A().foo

expected_error_message = (
'super(type, obj): obj (instance of A) is not '
'an instance or subtype of type (A).'
)
self.assertEqual(context.exception.args, (expected_error_message,))

def test_remembered_class(self):
# Apply the dataclass decorator manually (not when the class
# is created), so that we can keep a reference to the
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Modify dataclasses to enable zero argument support for ``super()`` when ``slots=True`` is
specified and custom descriptor is used or ``property`` function is wrapped.
Loading