Skip to content

Commit

Permalink
Merge pull request #11 from vertti/polars-support
Browse files Browse the repository at this point in the history
Polars support
  • Loading branch information
vertti authored Jan 12, 2025
2 parents 59ba620 + 151383b commit 23b2e0f
Show file tree
Hide file tree
Showing 4 changed files with 196 additions and 105 deletions.
24 changes: 13 additions & 11 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,21 +7,19 @@

## Description

In projects using Pandas, it's very common to have functions that take Pandas DataFrames as input or produce them as output.
It's hard to figure out quickly what these DataFrames contain. This library offers simple decorators to annotate your functions
so that they document themselves and that documentation is kept up-to-date by validating the input and output on runtime.

For example,
Working with DataFrames often means passing them through multiple transformation functions, making it easy to lose track of their structure over time. DAFFY adds runtime validation and documentation to your DataFrame operations through simple decorators. By declaring the expected columns and types in your function definitions, you can:

```python
@df_in(columns=["Brand", "Price"]) # the function expects a DataFrame as input parameter with columns Brand and Price
@df_out(columns=["Brand", "Price"]) # the function will return a DataFrame with columns Brand and Price
def filter_cars(car_df):
# before this code is executed, the input DataFrame is validated according to the above decorator
# filter some cars..
return filtered_cars_df
@df_in(columns=["price", "bedrooms", "location"])
@df_out(columns=["price_per_room", "price_category"])
def analyze_housing(houses_df):
# Transform raw housing data into price analysis
return analyzed_df
```

Like type hints for DataFrames, DAFFY helps you catch structural mismatches early and keeps your data pipeline documentation synchronized with the code. Compatible with both Pandas and Polars.


## Table of Contents
* [Installation](#installation)
* [Usage](#usage)
Expand Down Expand Up @@ -167,6 +165,10 @@ MIT

## Changelog

### 0.8.0

- Support Polars DataFrames

### 0.7.0

- Support Pandas 2.x
Expand Down
27 changes: 17 additions & 10 deletions daffy/decorators.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,13 @@
from typing import Any, Callable, Dict, List, Optional, Union

import pandas as pd
import polars as pl

ColumnsDef = Union[List, Dict]
DataFrameType = Union[pd.DataFrame, pl.DataFrame]


def _check_columns(df: pd.DataFrame, columns: ColumnsDef, strict: bool) -> None:
def _check_columns(df: DataFrameType, columns: ColumnsDef, strict: bool) -> None:
if isinstance(columns, list):
for column in columns:
assert column in df.columns, f"Column {column} missing from DataFrame. Got {_describe_pd(df)}"
Expand Down Expand Up @@ -43,7 +45,9 @@ def wrapper_df_out(func: Callable) -> Callable:
@wraps(func)
def wrapper(*args: str, **kwargs: Any) -> Any:
result = func(*args, **kwargs)
assert isinstance(result, pd.DataFrame), f"Wrong return type. Expected pandas dataframe, got {type(result)}"
assert isinstance(result, pd.DataFrame) or isinstance(result, pl.DataFrame), (
f"Wrong return type. Expected DataFrame, got {type(result)}"
)
if columns:
_check_columns(result, columns, strict)
return result
Expand All @@ -53,7 +57,7 @@ def wrapper(*args: str, **kwargs: Any) -> Any:
return wrapper_df_out


def _get_parameter(func: Callable, name: Optional[str] = None, *args: str, **kwargs: Any) -> pd.DataFrame:
def _get_parameter(func: Callable, name: Optional[str] = None, *args: str, **kwargs: Any) -> DataFrameType:
if not name:
if len(args) == 0:
return None
Expand Down Expand Up @@ -85,8 +89,8 @@ def wrapper_df_in(func: Callable) -> Callable:
@wraps(func)
def wrapper(*args: str, **kwargs: Any) -> Any:
df = _get_parameter(func, name, *args, **kwargs)
assert isinstance(df, pd.DataFrame), (
f"Wrong parameter type. Expected Pandas DataFrame, got {type(df).__name__} instead."
assert isinstance(df, pd.DataFrame) or isinstance(df, pl.DataFrame), (
f"Wrong parameter type. Expected DataFrame, got {type(df).__name__} instead."
)
if columns:
_check_columns(df, columns, strict)
Expand All @@ -97,24 +101,27 @@ def wrapper(*args: str, **kwargs: Any) -> Any:
return wrapper_df_in


def _describe_pd(df: pd.DataFrame, include_dtypes: bool = False) -> str:
def _describe_pd(df: DataFrameType, include_dtypes: bool = False) -> str:
result = f"columns: {list(df.columns)}"
if include_dtypes:
readable_dtypes = [dtype.name for dtype in df.dtypes]
result += f" with dtypes {readable_dtypes}"
if isinstance(df, pd.DataFrame):
readable_dtypes = [dtype.name for dtype in df.dtypes]
result += f" with dtypes {readable_dtypes}"
if isinstance(df, pl.DataFrame):
result += f" with dtypes {df.dtypes}"
return result


def _log_input(level: int, func_name: str, df: Any, include_dtypes: bool) -> None:
if isinstance(df, pd.DataFrame):
if isinstance(df, pd.DataFrame) or isinstance(df, pl.DataFrame):
logging.log(
level,
f"Function {func_name} parameters contained a DataFrame: {_describe_pd(df, include_dtypes)}",
)


def _log_output(level: int, func_name: str, df: Any, include_dtypes: bool) -> None:
if isinstance(df, pd.DataFrame):
if isinstance(df, pd.DataFrame) or isinstance(df, pl.DataFrame):
logging.log(
level,
f"Function {func_name} returned a DataFrame: {_describe_pd(df, include_dtypes)}",
Expand Down
5 changes: 3 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[project]
name = "daffy"
version = "0.7.0"
description = "Function decorators for Pandas Dataframe column name and data type validation"
version = "0.8.0"
description = "Function decorators for Pandas and Polars Dataframe column name and data type validation"
authors = [
{ name="Janne Sinivirta", email="janne.sinivirta@gmail.com" },
]
Expand Down Expand Up @@ -32,6 +32,7 @@ include = [
[tool.poetry.dependencies]
python = ">=3.9.0,<4.0.0"
pandas = ">=1.5.1,<3.0.0"
polars = "^1.7.0"

[tool.poetry.group.dev.dependencies]
pytest = "^7.4.3"
Expand Down
Loading

0 comments on commit 23b2e0f

Please sign in to comment.