Skip to content

Commit d82dcf0

Browse files
feat: Implement dunder repr for collection, schema, column and rule (#63)
1 parent c25880e commit d82dcf0

File tree

6 files changed

+153
-0
lines changed

6 files changed

+153
-0
lines changed

dataframely/_base_collection.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33

44
from __future__ import annotations
55

6+
import textwrap
67
import typing
78
from abc import ABCMeta
89
from collections.abc import Iterable
@@ -245,6 +246,32 @@ def _derive_member_info(
245246
# Some other unknown annotation
246247
raise AnnotationImplementationError(attr, type_annotation)
247248

249+
def __repr__(cls) -> str:
250+
parts = [f'[Collection "{cls.__class__.__name__}"]']
251+
parts.append(textwrap.indent("Members:", prefix=" " * 2))
252+
for name, member in cls.members().items(): # type: ignore
253+
parts.append(
254+
textwrap.indent(
255+
f'- "{name}": {member.schema.__name__}'
256+
f"(optional={member.is_optional}, "
257+
f"ignored_in_filters={member.ignored_in_filters}, "
258+
f"inline_for_sampling={member.inline_for_sampling})",
259+
prefix=" " * 4,
260+
)
261+
)
262+
if filters := cls._filters(): # type: ignore
263+
parts.append(textwrap.indent("Filters:", prefix=" " * 2))
264+
for name, member in filters.items():
265+
parts.append(textwrap.indent(f'- "{name}":', prefix=" " * 4))
266+
parts.append(
267+
textwrap.indent(
268+
f"{member.logic(cls.create_empty()).explain()}", # type: ignore
269+
prefix=" " * 8,
270+
)
271+
)
272+
parts.append("") # Add line break at the end
273+
return "\n".join(parts)
274+
248275

249276
class BaseCollection(metaclass=CollectionMeta):
250277
"""Internal utility abstraction to reference collections without introducing

dataframely/_base_schema.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33

44
from __future__ import annotations
55

6+
import textwrap
67
from abc import ABCMeta
78
from copy import copy
89
from dataclasses import dataclass, field
@@ -162,6 +163,18 @@ def _get_metadata(source: dict[str, Any]) -> Metadata:
162163
result.rules[attr] = value
163164
return result
164165

166+
def __repr__(cls) -> str:
167+
parts = [f'[Schema "{cls.__name__}"]']
168+
parts.append(textwrap.indent("Columns:", prefix=" " * 2))
169+
for name, col in cls.columns().items(): # type: ignore
170+
parts.append(textwrap.indent(f'- "{name}": {col!r}', prefix=" " * 4))
171+
if validation_rules := cls._schema_validation_rules(): # type: ignore
172+
parts.append(textwrap.indent("Rules:", prefix=" " * 2))
173+
for name, rule in validation_rules.items():
174+
parts.append(textwrap.indent(f'- "{name}": {rule!r}', prefix=" " * 4))
175+
parts.append("") # Add line break at the end
176+
return "\n".join(parts)
177+
165178

166179
class BaseSchema(metaclass=SchemaMeta):
167180
"""Internal utility abstraction to reference schemas without introducing cyclical

dataframely/_rule.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,9 @@ def from_dict(cls, data: dict[str, Any]) -> Self:
4242
"""
4343
return cls(data["expr"])
4444

45+
def __repr__(self) -> str:
46+
return str(self.expr)
47+
4548

4649
class GroupRule(Rule):
4750
"""Rule that is evaluated on a group of columns."""
@@ -62,6 +65,9 @@ def as_dict(self) -> dict[str, Any]:
6265
def from_dict(cls, data: dict[str, Any]) -> Self:
6366
return cls(data["expr"], group_columns=data["group_columns"])
6467

68+
def __repr__(self) -> str:
69+
return f"{super().__repr__()} grouped by {self.group_columns}"
70+
6571

6672
def rule(*, group_by: list[str] | None = None) -> Callable[[ValidationFunction], Rule]:
6773
"""Mark a function as a rule to evaluate during validation.

dataframely/columns/_base.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -372,6 +372,21 @@ def _attributes_match(
372372

373373
# -------------------------------- DUNDER METHODS -------------------------------- #
374374

375+
def __repr__(self) -> str:
376+
parts = [
377+
f"{attribute}={repr(getattr(self, attribute))}"
378+
for attribute, param_details in inspect.signature(
379+
self.__class__.__init__
380+
).parameters.items()
381+
if attribute
382+
not in ["self", "alias"] # alias is always equal to the column name here
383+
and not (
384+
# Do not include attributes that are set to their default value
385+
getattr(self, attribute) == param_details.default
386+
)
387+
]
388+
return f"{self.__class__.__name__}({', '.join(parts)})"
389+
375390
def __str__(self) -> str:
376391
return self.__class__.__name__.lower()
377392

tests/collection/test_repr.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
# Copyright (c) QuantCo 2025-2025
2+
# SPDX-License-Identifier: BSD-3-Clause
3+
4+
import textwrap
5+
6+
import polars as pl
7+
8+
import dataframely as dy
9+
10+
11+
class MySchema(dy.Schema):
12+
a = dy.Integer(primary_key=True)
13+
14+
15+
class MyCollection(dy.Collection):
16+
member_a: dy.LazyFrame[MySchema]
17+
member_b: dy.LazyFrame[MySchema]
18+
19+
@dy.filter()
20+
def member_a_member_b_one_to_one(self) -> pl.LazyFrame:
21+
return self.member_a.join(self.member_b, on="a", how="inner")
22+
23+
24+
def test_repr_collection() -> None:
25+
assert repr(MyCollection) == textwrap.dedent("""\
26+
[Collection "CollectionMeta"]
27+
Members:
28+
- "member_a": MySchema(optional=False, ignored_in_filters=False, inline_for_sampling=False)
29+
- "member_b": MySchema(optional=False, ignored_in_filters=False, inline_for_sampling=False)
30+
Filters:
31+
- "member_a_member_b_one_to_one":
32+
INNER JOIN:
33+
LEFT PLAN ON: [col("a")]
34+
DF ["a"]; PROJECT */1 COLUMNS
35+
RIGHT PLAN ON: [col("a")]
36+
DF ["a"]; PROJECT */1 COLUMNS
37+
END INNER JOIN
38+
""")

tests/schema/test_repr.py

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
# Copyright (c) QuantCo 2025-2025
2+
# SPDX-License-Identifier: BSD-3-Clause
3+
import textwrap
4+
5+
import polars as pl
6+
7+
import dataframely as dy
8+
9+
10+
def test_repr_no_rules() -> None:
11+
class SchemaNoRules(dy.Schema):
12+
a = dy.Integer()
13+
14+
assert repr(SchemaNoRules) == textwrap.dedent("""\
15+
[Schema "SchemaNoRules"]
16+
Columns:
17+
- "a": Integer(nullable=True)
18+
""")
19+
20+
21+
def test_repr_only_column_rules() -> None:
22+
class SchemaColumnRules(dy.Schema):
23+
a = dy.Integer(min=10)
24+
25+
assert repr(SchemaColumnRules) == textwrap.dedent("""\
26+
[Schema "SchemaColumnRules"]
27+
Columns:
28+
- "a": Integer(nullable=True, min=10)
29+
""")
30+
31+
32+
class SchemaWithRules(dy.Schema):
33+
a = dy.Integer(min=10)
34+
b = dy.String(primary_key=True, regex=r"^[A-Z]{3}$", alias="b2")
35+
36+
@dy.rule()
37+
def my_rule() -> pl.Expr:
38+
return pl.col("a") < 100
39+
40+
@dy.rule(group_by=["a"])
41+
def my_group_rule() -> pl.Expr:
42+
return pl.col("a").sum() > 50
43+
44+
45+
def test_repr_with_rules() -> None:
46+
assert repr(SchemaWithRules) == textwrap.dedent("""\
47+
[Schema "SchemaWithRules"]
48+
Columns:
49+
- "a": Integer(nullable=True, min=10)
50+
- "b2": String(nullable=False, primary_key=True, regex='^[A-Z]{3}$')
51+
Rules:
52+
- "my_rule": [(col("a")) < (dyn int: 100)]
53+
- "my_group_rule": [(col("a").sum()) > (dyn int: 50)] grouped by ['a']
54+
""")

0 commit comments

Comments
 (0)