Skip to content

Commit

Permalink
Don't autoregister extensions (#65)
Browse files Browse the repository at this point in the history
  • Loading branch information
MarcoGorelli authored Jan 29, 2024
1 parent 239daf3 commit 7b894de
Show file tree
Hide file tree
Showing 9 changed files with 300 additions and 127 deletions.
34 changes: 19 additions & 15 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -51,40 +51,44 @@ fn pig_latinnify(inputs: &[Series], kwargs: PigLatinKwargs) -> PolarsResult<Seri
}
```

On the python side this expression can then be registered under a namespace:
This can then be exposed on the Python side:

```python
import polars as pl
from polars.type_aliases import IntoExpr
from polars.utils.udfs import _get_shared_lib_location

lib = _get_shared_lib_location(__file__)
from expression_lib.utils import parse_into_expr

lib = _get_shared_lib_location(__file__)

@pl.api.register_expr_namespace("language")
class Language:
def __init__(self, expr: pl.Expr):
self._expr = expr

def pig_latinnify(self, capatilize: bool = False) -> pl.Expr:
return self._expr._register_plugin(
lib=lib,
symbol="pig_latinnify",
is_elementwise=True,
kwargs={"capitalize": capatilize}
)
def pig_latinnify(expr: IntoExpr, capitalize: bool = False) -> pl.Expr:
expr = parse_into_expr(expr)
return expr.register_plugin(
lib=lib,
symbol="pig_latinnify",
is_elementwise=True,
kwargs={"capitalize": capitalize},
)
```

Compile/ship and then it is ready to use:

```python
import polars as pl
import expression_lib
from expression_lib import language

df = pl.DataFrame({
"names": ["Richard", "Alice", "Bob"],
})


out = df.with_columns(
pig_latin = language.pig_latinnify("names")
)
```
Alternatively, you can [register a custom namespace](https://docs.pola.rs/py-polars/html/reference/api/polars.api.register_expr_namespace.html#polars.api.register_expr_namespace), which enables you to write:
```python
out = df.with_columns(
pig_latin = pl.col("names").language.pig_latinnify()
)
Expand Down
111 changes: 0 additions & 111 deletions example/derive_expression/expression_lib/expression_lib/__init__.py
Original file line number Diff line number Diff line change
@@ -1,111 +0,0 @@
import polars as pl
from polars.type_aliases import IntoExpr
from polars.utils.udfs import _get_shared_lib_location

lib = _get_shared_lib_location(__file__)


@pl.api.register_expr_namespace("language")
class Language:
def __init__(self, expr: pl.Expr):
self._expr = expr

def pig_latinnify(self, capitalize: bool = False) -> pl.Expr:
return self._expr.register_plugin(
lib=lib,
symbol="pig_latinnify",
is_elementwise=True,
kwargs={"capitalize": capitalize},
)

def append_args(
self,
float_arg: float,
integer_arg: int,
string_arg: str,
boolean_arg: bool,
) -> pl.Expr:
"""
This example shows how arguments other than `Series` can be used.
"""
return self._expr.register_plugin(
lib=lib,
args=[],
kwargs={
"float_arg": float_arg,
"integer_arg": integer_arg,
"string_arg": string_arg,
"boolean_arg": boolean_arg,
},
symbol="append_kwargs",
is_elementwise=True,
)


@pl.api.register_expr_namespace("dist")
class Distance:
def __init__(self, expr: pl.Expr):
self._expr = expr

def hamming_distance(self, other: IntoExpr) -> pl.Expr:
return self._expr.register_plugin(
lib=lib,
args=[other],
symbol="hamming_distance",
is_elementwise=True,
)

def jaccard_similarity(self, other: IntoExpr) -> pl.Expr:
return self._expr.register_plugin(
lib=lib,
args=[other],
symbol="jaccard_similarity",
is_elementwise=True,
)

def haversine(
self,
start_lat: IntoExpr,
start_long: IntoExpr,
end_lat: IntoExpr,
end_long: IntoExpr,
) -> pl.Expr:
return self._expr.register_plugin(
lib=lib,
args=[start_lat, start_long, end_lat, end_long],
symbol="haversine",
is_elementwise=True,
cast_to_supertypes=True,
)


@pl.api.register_expr_namespace("date_util")
class DateUtil:
def __init__(self, expr: pl.Expr):
self._expr = expr

def is_leap_year(self) -> pl.Expr:
return self._expr.register_plugin(
lib=lib,
symbol="is_leap_year",
is_elementwise=True,
)

# Note that this already exists in Polars. It is just for explanatory
# purposes.
def change_time_zone(self, tz: str = "Europe/Amsterdam") -> pl.Expr:
return self._expr.register_plugin(
lib=lib, symbol="change_time_zone", is_elementwise=True, kwargs={"tz": tz}
)


@pl.api.register_expr_namespace("panic")
class Panic:
def __init__(self, expr: pl.Expr):
self._expr = expr

def panic(self) -> pl.Expr:
return self._expr.register_plugin(
lib=lib,
symbol="panic",
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
import polars as pl
from polars.type_aliases import IntoExpr
from polars.utils.udfs import _get_shared_lib_location

from expression_lib.utils import parse_into_expr

lib = _get_shared_lib_location(__file__)


def is_leap_year(expr: IntoExpr) -> pl.Expr:
expr = parse_into_expr(expr)
return expr.register_plugin(
lib=lib,
symbol="is_leap_year",
is_elementwise=True,
)


# Note that this already exists in Polars. It is just for explanatory
# purposes.
def change_time_zone(expr: IntoExpr, tz: str = "Europe/Amsterdam") -> pl.Expr:
expr = parse_into_expr(expr)
return expr.register_plugin(
lib=lib, symbol="change_time_zone", is_elementwise=True, kwargs={"tz": tz}
)
44 changes: 44 additions & 0 deletions example/derive_expression/expression_lib/expression_lib/dist.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
import polars as pl
from polars.type_aliases import IntoExpr
from polars.utils.udfs import _get_shared_lib_location

from expression_lib.utils import parse_into_expr

lib = _get_shared_lib_location(__file__)


def hamming_distance(expr: IntoExpr, other: IntoExpr) -> pl.Expr:
expr = parse_into_expr(expr)
return expr.register_plugin(
lib=lib,
args=[other],
symbol="hamming_distance",
is_elementwise=True,
)


def jaccard_similarity(expr: IntoExpr, other: IntoExpr) -> pl.Expr:
expr = parse_into_expr(expr)
return expr.register_plugin(
lib=lib,
args=[other],
symbol="jaccard_similarity",
is_elementwise=True,
)


def haversine(
expr: IntoExpr,
start_lat: IntoExpr,
start_long: IntoExpr,
end_lat: IntoExpr,
end_long: IntoExpr,
) -> pl.Expr:
expr = parse_into_expr(expr)
return expr.register_plugin(
lib=lib,
args=[start_lat, start_long, end_lat, end_long],
symbol="haversine",
is_elementwise=True,
cast_to_supertypes=True,
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
"""
Register Expressions extension with extra functionality.
Enables you to write
pl.col("dist_a").dist.jaccard_similarity("dist_b")
instead of
dist.jaccard_similarity("dist_a", "dist_b")
However, note that:
- you will need to add `import expression_lib.extension` to your code.
Add `# noqa: F401` to avoid linting errors due to unused imports.
- static typing will not recognise your custom namespace. Errors such
as `"Expr" has no attribute "dist" [attr-defined]`.
"""
from __future__ import annotations

import polars as pl
from typing import Any, Callable
from expression_lib import date_util, dist, language, utils, panic


@pl.api.register_expr_namespace("language")
class Language:
def __init__(self, expr: pl.Expr):
self._expr = expr

def __getattr__(self, attr: str) -> Callable[..., pl.Expr]:
if attr in ("pig_latinnify", "append_args"):

def func(*args: Any, **kwargs: Any) -> pl.Expr:
return getattr(language, attr)(self._expr, *args, **kwargs)

return func
raise AttributeError(f"{self.__class__} has no attribute {attr}")


@pl.api.register_expr_namespace("dist")
class Distance:
def __init__(self, expr: pl.Expr):
self._expr = expr

def __getattr__(self, attr: str) -> Callable[..., pl.Expr]:
if attr in ("hamming_distance", "jaccard_similarity", "haversine"):

def func(*args: Any, **kwargs: Any) -> pl.Expr:
return getattr(dist, attr)(self._expr, *args, **kwargs)

return func
raise AttributeError(f"{self.__class__} has no attribute {attr}")


@pl.api.register_expr_namespace("date_util")
class DateUtil:
def __init__(self, expr: pl.Expr):
self._expr = expr

def __getattr__(self, attr: str) -> Callable[..., pl.Expr]:
if attr in ("change_time_zone", "is_leap_year"):

def func(*args: Any, **kwargs: Any) -> pl.Expr:
return getattr(date_util, attr)(self._expr, *args, **kwargs)

return func
raise AttributeError(f"{self.__class__} has no attribute {attr}")


@pl.api.register_expr_namespace("panic")
class Panic:
def __init__(self, expr: pl.Expr):
self._expr = expr

def __getattr__(self, attr: str) -> Callable[..., pl.Expr]:
if attr in ("panic",):

def func(*args: Any, **kwargs: Any) -> pl.Expr:
return getattr(panic, attr)(self._expr, *args, **kwargs)

return func
raise AttributeError(f"{self.__class__} has no attribute {attr}")
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
import polars as pl
from polars.type_aliases import IntoExpr
from polars.utils.udfs import _get_shared_lib_location

from expression_lib.utils import parse_into_expr

lib = _get_shared_lib_location(__file__)


def pig_latinnify(expr: IntoExpr, capitalize: bool = False) -> pl.Expr:
expr = parse_into_expr(expr)
return expr.register_plugin(
lib=lib,
symbol="pig_latinnify",
is_elementwise=True,
kwargs={"capitalize": capitalize},
)


def append_args(
expr: IntoExpr,
float_arg: float,
integer_arg: int,
string_arg: str,
boolean_arg: bool,
) -> pl.Expr:
"""
This example shows how arguments other than `Series` can be used.
"""
expr = parse_into_expr(expr)
return expr.register_plugin(
lib=lib,
args=[],
kwargs={
"float_arg": float_arg,
"integer_arg": integer_arg,
"string_arg": string_arg,
"boolean_arg": boolean_arg,
},
symbol="append_kwargs",
is_elementwise=True,
)
15 changes: 15 additions & 0 deletions example/derive_expression/expression_lib/expression_lib/panic.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
import polars as pl
from polars.type_aliases import IntoExpr
from polars.utils.udfs import _get_shared_lib_location

from expression_lib.utils import parse_into_expr

lib = _get_shared_lib_location(__file__)


def panic(expr: IntoExpr) -> pl.Expr:
expr = parse_into_expr(expr)
return expr.register_plugin(
lib=lib,
symbol="panic",
)
Loading

0 comments on commit 7b894de

Please sign in to comment.