Skip to content

feat: Implement dunder repr for collection, schema, column and rule #63

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
Jun 18, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 27 additions & 0 deletions dataframely/_base_collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

from __future__ import annotations

import textwrap
import typing
from abc import ABCMeta
from collections.abc import Iterable
Expand Down Expand Up @@ -245,6 +246,32 @@ def _derive_member_info(
# Some other unknown annotation
raise AnnotationImplementationError(attr, type_annotation)

def __repr__(cls) -> str:
parts = [f'[Collection "{cls.__class__.__name__}"]']
parts.append(textwrap.indent("Members:", prefix=" " * 2))
for name, member in cls.members().items(): # type: ignore
parts.append(
textwrap.indent(
f'- "{name}": {member.schema.__name__}'
f"(optional={member.is_optional}, "
f"ignored_in_filters={member.ignored_in_filters}, "
f"inline_for_sampling={member.inline_for_sampling})",
prefix=" " * 4,
)
)
if filters := cls._filters(): # type: ignore
parts.append(textwrap.indent("Filters:", prefix=" " * 2))
for name, member in filters.items():
parts.append(textwrap.indent(f'- "{name}":', prefix=" " * 4))
parts.append(
textwrap.indent(
f"{member.logic(cls.create_empty()).explain()}", # type: ignore
prefix=" " * 8,
)
)
parts.append("") # Add line break at the end
return "\n".join(parts)


class BaseCollection(metaclass=CollectionMeta):
"""Internal utility abstraction to reference collections without introducing
Expand Down
13 changes: 13 additions & 0 deletions dataframely/_base_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

from __future__ import annotations

import textwrap
from abc import ABCMeta
from copy import copy
from dataclasses import dataclass, field
Expand Down Expand Up @@ -157,6 +158,18 @@ def _get_metadata(source: dict[str, Any]) -> Metadata:
result.rules[attr] = value
return result

def __repr__(cls) -> str:
parts = [f'[Schema "{cls.__name__}"]']
parts.append(textwrap.indent("Columns:", prefix=" " * 2))
for name, col in cls.columns().items(): # type: ignore
parts.append(textwrap.indent(f'- "{name}": {col!r}', prefix=" " * 4))
if validation_rules := cls._schema_validation_rules(): # type: ignore
parts.append(textwrap.indent("Rules:", prefix=" " * 2))
for name, rule in validation_rules.items():
parts.append(textwrap.indent(f'- "{name}": {rule!r}', prefix=" " * 4))
parts.append("") # Add line break at the end
return "\n".join(parts)


class BaseSchema(metaclass=SchemaMeta):
"""Internal utility abstraction to reference schemas without introducing cyclical
Expand Down
6 changes: 6 additions & 0 deletions dataframely/_rule.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,9 @@ def from_dict(cls, data: dict[str, Any]) -> Self:
"""
return cls(data["expr"])

def __repr__(self) -> str:
return str(self.expr)


class GroupRule(Rule):
"""Rule that is evaluated on a group of columns."""
Expand All @@ -62,6 +65,9 @@ def as_dict(self) -> dict[str, Any]:
def from_dict(cls, data: dict[str, Any]) -> Self:
return cls(data["expr"], group_columns=data["group_columns"])

def __repr__(self) -> str:
return f"{super().__repr__()} grouped by {self.group_columns}"


def rule(*, group_by: list[str] | None = None) -> Callable[[ValidationFunction], Rule]:
"""Mark a function as a rule to evaluate during validation.
Expand Down
15 changes: 15 additions & 0 deletions dataframely/columns/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -368,6 +368,21 @@ def _attributes_match(

# -------------------------------- DUNDER METHODS -------------------------------- #

def __repr__(self) -> str:
parts = [
f"{attribute}={repr(getattr(self, attribute))}"
for attribute, param_details in inspect.signature(
self.__class__.__init__
).parameters.items()
if attribute
not in ["self", "alias"] # alias is always equal to the column name here
and not (
# Do not include attributes that are set to their default value
getattr(self, attribute) == param_details.default
)
]
return f"{self.__class__.__name__}({', '.join(parts)})"

def __str__(self) -> str:
return self.__class__.__name__.lower()

Expand Down
38 changes: 38 additions & 0 deletions tests/collection/test_repr.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
# Copyright (c) QuantCo 2025-2025
# SPDX-License-Identifier: BSD-3-Clause

import textwrap

import polars as pl

import dataframely as dy


class MySchema(dy.Schema):
a = dy.Integer(primary_key=True)


class MyCollection(dy.Collection):
member_a: dy.LazyFrame[MySchema]
member_b: dy.LazyFrame[MySchema]

@dy.filter()
def member_a_member_b_one_to_one(self) -> pl.LazyFrame:
return self.member_a.join(self.member_b, on="a", how="inner")


def test_repr_collection() -> None:
assert repr(MyCollection) == textwrap.dedent("""\
[Collection "CollectionMeta"]
Members:
- "member_a": MySchema(optional=False, ignored_in_filters=False, inline_for_sampling=False)
- "member_b": MySchema(optional=False, ignored_in_filters=False, inline_for_sampling=False)
Filters:
- "member_a_member_b_one_to_one":
INNER JOIN:
LEFT PLAN ON: [col("a")]
DF ["a"]; PROJECT */1 COLUMNS
RIGHT PLAN ON: [col("a")]
DF ["a"]; PROJECT */1 COLUMNS
END INNER JOIN
""")
54 changes: 54 additions & 0 deletions tests/schema/test_repr.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
# Copyright (c) QuantCo 2025-2025
# SPDX-License-Identifier: BSD-3-Clause
import textwrap

import polars as pl

import dataframely as dy


def test_repr_no_rules() -> None:
class SchemaNoRules(dy.Schema):
a = dy.Integer()

assert repr(SchemaNoRules) == textwrap.dedent("""\
[Schema "SchemaNoRules"]
Columns:
- "a": Integer(nullable=True)
""")


def test_repr_only_column_rules() -> None:
class SchemaColumnRules(dy.Schema):
a = dy.Integer(min=10)

assert repr(SchemaColumnRules) == textwrap.dedent("""\
[Schema "SchemaColumnRules"]
Columns:
- "a": Integer(nullable=True, min=10)
""")


class SchemaWithRules(dy.Schema):
a = dy.Integer(min=10)
b = dy.String(primary_key=True, regex=r"^[A-Z]{3}$", alias="b2")

@dy.rule()
def my_rule() -> pl.Expr:
return pl.col("a") < 100

@dy.rule(group_by=["a"])
def my_group_rule() -> pl.Expr:
return pl.col("a").sum() > 50


def test_repr_with_rules() -> None:
assert repr(SchemaWithRules) == textwrap.dedent("""\
[Schema "SchemaWithRules"]
Columns:
- "a": Integer(nullable=True, min=10)
- "b2": String(nullable=False, primary_key=True, regex='^[A-Z]{3}$')
Rules:
- "my_rule": [(col("a")) < (dyn int: 100)]
- "my_group_rule": [(col("a").sum()) > (dyn int: 50)] grouped by ['a']
""")
Loading