Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
61 commits
Select commit Hold shift + click to select a range
662f258
get pandas zfill to work
williambdean May 23, 2025
153f0f3
add to the api-reference
williambdean May 23, 2025
55d77b8
add tests for the zfill
williambdean May 23, 2025
7ca9fcb
implement for other backends
williambdean May 23, 2025
1aa802d
Merge branch 'main' into zfill
williambdean May 23, 2025
4c4c58a
add the series tests
williambdean May 23, 2025
dda9485
add additional test cases
williambdean May 23, 2025
6cdf831
Merge branch 'main' into zfill
williambdean May 24, 2025
4c41e24
add additional test cases
williambdean May 24, 2025
2e737cc
implement for duckdb
williambdean May 24, 2025
4ba6ba4
Merge branch 'main' into zfill
williambdean May 24, 2025
0a3914c
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] May 24, 2025
508382c
Merge branch 'zfill' of github.com:williambdean/narwhals into zfill
williambdean May 26, 2025
33a2d47
support polars like zfill
williambdean May 26, 2025
72ad871
make test cases act like polars
williambdean May 26, 2025
0aebf85
implement for ibis
williambdean May 26, 2025
e5fb6c5
implement for spark_like
williambdean May 26, 2025
a9a0fba
add attempt at arrow
williambdean May 26, 2025
edef50a
Merge branch 'main' into zfill
williambdean May 26, 2025
71cef5a
add skip reason
williambdean May 26, 2025
3b31cf2
Merge branch 'main' into zfill
williambdean May 26, 2025
03cbdaf
consolidate on inner function name
williambdean May 27, 2025
7ef7e87
add to docstring
williambdean May 27, 2025
76373df
feat: Get `pyarrow` working
dangotbanned May 27, 2025
ebd7c02
fix(typing): `ibis` casts
dangotbanned May 27, 2025
0b62377
fix(typing): `pyspark` ignore
dangotbanned May 27, 2025
407787c
Merge branch 'main' into zfill
dangotbanned May 27, 2025
86d9a59
add dask implementation
williambdean May 27, 2025
f9ff2d8
Merge branch 'zfill' of github.com:williambdean/narwhals into zfill
williambdean May 27, 2025
d2bd7a9
add exceptions and skips
williambdean May 27, 2025
e5434d0
adapt pandas / stdlib zfill behavior for all
williambdean May 30, 2025
1319479
Merge branch 'main' into zfill
williambdean May 30, 2025
94bbaf9
correct the example in the docstring
williambdean Jun 4, 2025
778017b
Merge branch 'main' into zfill
williambdean Jun 4, 2025
2ad6bc1
Merge branch 'main' into zfill
williambdean Jun 5, 2025
f9642f4
Merge branch 'main' into zfill
williambdean Jun 8, 2025
6f0178b
Merge branch 'main' into zfill
williambdean Jun 11, 2025
cfee983
Merge branch 'main' into zfill
williambdean Jun 12, 2025
1ba08d9
skip based on pandas version
williambdean Jun 12, 2025
cb5bf7c
change the condition
williambdean Jun 12, 2025
a626606
add expr_str example as well
williambdean Jun 12, 2025
68288d7
fixing the import
williambdean Jun 12, 2025
eafdef5
implement for older polars versions
williambdean Jun 12, 2025
aeeb82e
Merge branch 'main' into zfill
williambdean Jun 14, 2025
4d70d6a
Merge branch 'main' into zfill
williambdean Jun 15, 2025
d03367c
add skipif for second test :tear:
williambdean Jun 15, 2025
5fcdc69
Merge branch 'main' into zfill
williambdean Jun 17, 2025
0514b6a
fix the doctests
williambdean Jun 19, 2025
d717cf3
refactor logic to look like _spark_like
williambdean Jun 19, 2025
cb0c30e
pyarrow variable naming, typing and some misc
FBruzzesi Jun 19, 2025
9c97fc3
pyarrow use repeat
FBruzzesi Jun 19, 2025
c742332
Merge branch 'main' into zfill
williambdean Jun 19, 2025
ea48128
remove warnings
williambdean Jun 19, 2025
a2a5eb5
chore(typing): Add missing `zfill`
dangotbanned Jun 19, 2025
e63ced7
Update narwhals/_ibis/expr_str.py
williambdean Jun 19, 2025
fa5c65b
Update narwhals/_ibis/expr_str.py
williambdean Jun 19, 2025
9aa9eb1
Update narwhals/_arrow/series_str.py
williambdean Jun 19, 2025
4842eb7
perf: Reuse `PolarsExpr` impl in `PolarsSeries`
dangotbanned Jun 19, 2025
13f3b4b
Merge branch 'main' into zfill
dangotbanned Jun 19, 2025
8d9e546
use xfail instead
williambdean Jun 19, 2025
622c768
rename helper function
williambdean Jun 19, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/api-reference/expr_str.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,5 +18,6 @@
- to_datetime
- to_lowercase
- to_uppercase
- zfill
show_source: false
show_bases: false
1 change: 1 addition & 0 deletions docs/api-reference/series_str.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,5 +18,6 @@
- to_datetime
- to_lowercase
- to_uppercase
- zfill
show_source: false
show_bases: false
35 changes: 35 additions & 0 deletions narwhals/_arrow/series_str.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,14 @@
import string
from typing import TYPE_CHECKING

import pyarrow as pa
import pyarrow.compute as pc

from narwhals._arrow.utils import ArrowSeriesNamespace, lit, parse_datetime_format

if TYPE_CHECKING:
from narwhals._arrow.series import ArrowSeries
from narwhals._arrow.typing import Incomplete


class ArrowSeriesStringNamespace(ArrowSeriesNamespace):
Expand Down Expand Up @@ -60,3 +62,36 @@ def to_uppercase(self) -> ArrowSeries:

def to_lowercase(self) -> ArrowSeries:
return self.with_native(pc.utf8_lower(self.native))

def zfill(self, width: int) -> ArrowSeries:
binary_join: Incomplete = pc.binary_join_element_wise
native = self.native
hyphen, plus = lit("-"), lit("+")
first_char, remaining_chars = self.slice(0, 1).native, self.slice(1, None).native

# Conditions
less_than_width = pc.less(pc.utf8_length(native), lit(width))
starts_with_hyphen = pc.equal(first_char, hyphen)
starts_with_plus = pc.equal(first_char, plus)

conditions = pc.make_struct(
pc.and_(starts_with_hyphen, less_than_width),
pc.and_(starts_with_plus, less_than_width),
less_than_width,
)

# Cases
padded_remaining_chars = pc.utf8_lpad(remaining_chars, width - 1, padding="0")

result = pc.case_when(
conditions,
binary_join(
pa.repeat(hyphen, len(native)), padded_remaining_chars, ""
), # starts with hyphen and less than width
binary_join(
pa.repeat(plus, len(native)), padded_remaining_chars, ""
), # starts with plus and less than width
pc.utf8_lpad(native, width=width, padding="0"), # less than width
native,
)
return self.with_native(result)
1 change: 1 addition & 0 deletions narwhals/_compliant/any_namespace.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ def split(self, by: str) -> CompliantT_co: ...
def to_datetime(self, format: str | None) -> CompliantT_co: ...
def to_lowercase(self) -> CompliantT_co: ...
def to_uppercase(self) -> CompliantT_co: ...
def zfill(self, width: int) -> CompliantT_co: ...


class StructNamespace(_StoresCompliant[CompliantT_co], Protocol[CompliantT_co]):
Expand Down
3 changes: 3 additions & 0 deletions narwhals/_compliant/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -1127,6 +1127,9 @@ def to_lowercase(self) -> EagerExprT:
def to_uppercase(self) -> EagerExprT:
return self.compliant._reuse_series_namespace("str", "to_uppercase")

def zfill(self, width: int) -> EagerExprT:
return self.compliant._reuse_series_namespace("str", "zfill", width=width)


class EagerExprStructNamespace(
EagerExprNamespace[EagerExprT], StructNamespace[EagerExprT], Generic[EagerExprT]
Expand Down
5 changes: 5 additions & 0 deletions narwhals/_dask/expr_str.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,3 +96,8 @@ def to_lowercase(self) -> DaskExpr:
return self._compliant_expr._with_callable(
lambda expr: expr.str.lower(), "to_lowercase"
)

def zfill(self, width: int) -> DaskExpr:
return self._compliant_expr._with_callable(
lambda expr, width: expr.str.zfill(width), "zfill", width=width
)
29 changes: 28 additions & 1 deletion narwhals/_duckdb/expr_str.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

from duckdb import FunctionExpression

from narwhals._duckdb.utils import lit
from narwhals._duckdb.utils import lit, when
from narwhals._utils import not_implemented

if TYPE_CHECKING:
Expand Down Expand Up @@ -100,4 +100,31 @@ def to_datetime(self, format: str | None) -> DuckDBExpr:
lambda expr: FunctionExpression("strptime", expr, lit(format))
)

def zfill(self, width: int) -> DuckDBExpr:
# DuckDB does not have a built-in zfill function, so we need to implement it manually
# using string manipulation functions.

def func(expr: Expression) -> Expression:
less_than_width = FunctionExpression("length", expr) < lit(width)
zero, hyphen, plus = lit("0"), lit("-"), lit("+")

starts_with_minus = FunctionExpression("starts_with", expr, hyphen)
starts_with_plus = FunctionExpression("starts_with", expr, plus)
substring = FunctionExpression("substr", expr, lit(2))
padded_substring = FunctionExpression("lpad", substring, lit(width - 1), zero)
return (
when(
starts_with_minus & less_than_width,
FunctionExpression("concat", hyphen, padded_substring),
)
.when(
starts_with_plus & less_than_width,
FunctionExpression("concat", plus, padded_substring),
)
.when(less_than_width, FunctionExpression("lpad", expr, lit(width), zero))
.otherwise(expr)
)

return self._compliant_expr._with_callable(func)

replace = not_implemented()
26 changes: 23 additions & 3 deletions narwhals/_ibis/expr_str.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,15 @@
from __future__ import annotations

from typing import TYPE_CHECKING, Any, Callable
from typing import TYPE_CHECKING, Any, Callable, cast

import ibis
import ibis.expr.types as ir
from ibis.expr.datatypes import Timestamp

from narwhals._ibis.utils import lit
from narwhals._utils import _is_naive_format, not_implemented

if TYPE_CHECKING:
import ibis.expr.types as ir

from narwhals._ibis.expr import IbisExpr


Expand Down Expand Up @@ -100,4 +101,23 @@ def to_datetime(self, format: str | None) -> IbisExpr:
fn = self._to_datetime_naive if _is_naive_format(format) else self._to_datetime
return self._compliant_expr._with_callable(fn(format))

def zfill(self, width: int) -> IbisExpr:
def func(expr: ir.StringColumn) -> ir.Value:
length = expr.length()
less_than_width = length < lit(width)
zero, hyphen, plus = "0", "-", "+"
starts_with_minus = expr.startswith(hyphen)
starts_with_plus = expr.startswith(plus)
one = cast("ir.IntegerScalar", lit(1))
sub_length = cast("ir.IntegerValue", length - one)
substring = expr.substr(one, sub_length).lpad(width - 1, zero)
return ibis.cases(
(starts_with_minus & less_than_width, (substring.lpad(width, hyphen))),
(starts_with_plus & less_than_width, (substring.lpad(width, plus))),
(less_than_width, expr.lpad(width, zero)),
else_=expr,
)

return self._compliant_expr._with_callable(func)

replace = not_implemented()
3 changes: 3 additions & 0 deletions narwhals/_pandas_like/series_str.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,3 +77,6 @@ def to_uppercase(self) -> PandasLikeSeries:

def to_lowercase(self) -> PandasLikeSeries:
return self.with_native(self.native.str.lower())

def zfill(self, width: int) -> PandasLikeSeries:
return self.with_native(self.native.str.zfill(width))
21 changes: 21 additions & 0 deletions narwhals/_polars/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -339,6 +339,27 @@ class PolarsExprStringNamespace:
def __init__(self, expr: PolarsExpr) -> None:
self._compliant_expr = expr

def zfill(self, width: int) -> PolarsExpr:
native_expr = self._compliant_expr.native
native_result = native_expr.str.zfill(width)

if self._compliant_expr._backend_version <= (1, 30, 0):
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This number is based on this PR being merged and the current version in release is 1.30.0

Let me know if it should be changed!

length = native_expr.str.len_chars()
less_than_width = length < width
plus = "+"
starts_with_plus = native_expr.str.starts_with(plus)
native_result = (
pl.when(starts_with_plus & less_than_width)
.then(
native_expr.str.slice(1, length)
.str.zfill(width - 1)
.str.pad_start(width, plus)
)
.otherwise(native_result)
)

return self._compliant_expr._with_native(native_result)

def __getattr__(self, attr: str) -> Callable[[Any], PolarsExpr]:
def func(*args: Any, **kwargs: Any) -> PolarsExpr:
pos, kwds = extract_args_kwargs(args, kwargs)
Expand Down
6 changes: 6 additions & 0 deletions narwhals/_polars/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -686,6 +686,12 @@ class PolarsSeriesStringNamespace:
def __init__(self, series: PolarsSeries) -> None:
self._compliant_series = series

def zfill(self, width: int) -> PolarsSeries:
series = self._compliant_series
name = series.name
ns = series.__narwhals_namespace__()
return series.to_frame().select(ns.col(name).str.zfill(width)).get_column(name)

def __getattr__(self, attr: str) -> Any:
def func(*args: Any, **kwargs: Any) -> Any:
pos, kwds = extract_args_kwargs(args, kwargs)
Expand Down
27 changes: 27 additions & 0 deletions narwhals/_spark_like/expr_str.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,3 +113,30 @@ def to_datetime(self, format: str | None) -> SparkLikeExpr:
return self._compliant_expr._with_callable(
lambda expr: function(F.replace(expr, F.lit("T"), F.lit(" ")))
)

def zfill(self, width: int) -> SparkLikeExpr:
def func(expr: Column) -> Column:
F = self._compliant_expr._F # noqa: N806

length = F.length(expr)
less_than_width = length < width
hyphen, plus = F.lit("-"), F.lit("+")
starts_with_minus = F.startswith(expr, hyphen)
starts_with_plus = F.startswith(expr, plus)
sub_length = length - F.lit(1)
# NOTE: `len` annotated as `int`, but `Column.substr` accepts `int | Column`
substring = F.substring(expr, 2, sub_length) # pyright: ignore[reportArgumentType]
padded_substring = F.lpad(substring, width - 1, "0")
return (
F.when(
starts_with_minus & less_than_width,
F.concat(hyphen, padded_substring),
)
.when(
starts_with_plus & less_than_width, F.concat(plus, padded_substring)
)
.when(less_than_width, F.lpad(expr, width, "0"))
.otherwise(expr)
)

return self._compliant_expr._with_callable(func)
31 changes: 31 additions & 0 deletions narwhals/expr_str.py
Original file line number Diff line number Diff line change
Expand Up @@ -447,3 +447,34 @@ def to_lowercase(self) -> ExprT:
return self._expr._with_elementwise_op(
lambda plx: self._expr._to_compliant_expr(plx).str.to_lowercase()
)

def zfill(self, width: int) -> ExprT:
"""Transform string to zero-padded variant.

Arguments:
width: The desired length of the string after padding. If the length of the
string is greater than `width`, no padding is applied.
If `width` is less than 0, no padding is applied.

Returns:
A new expression.

Examples:
>>> import pandas as pd
>>> import narwhals as nw
>>> df_native = pd.DataFrame({"digits": ["+1", "-1", "1", None]})
>>> df = nw.from_native(df_native)
>>> df.with_columns(zfill_col=nw.col("digits").str.zfill(3))
β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”
|Narwhals DataFrame|
|------------------|
| digits zfill_col|
|0 +1 +01|
|1 -1 -01|
|2 1 001|
|3 None None|
β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜
"""
return self._expr._with_elementwise_op(
lambda plx: self._expr._to_compliant_expr(plx).str.zfill(width)
)
25 changes: 25 additions & 0 deletions narwhals/series_str.py
Original file line number Diff line number Diff line change
Expand Up @@ -398,3 +398,28 @@ def to_datetime(self, format: str | None = None) -> SeriesT:
return self._narwhals_series._with_compliant(
self._narwhals_series._compliant_series.str.to_datetime(format=format)
)

def zfill(self, width: int) -> SeriesT:
r"""Pad strings with zeros on the left.

Arguments:
width: The target width of the string. If the string is shorter than this width, it will be padded with zeros on the left.

Returns:
A new Series with strings padded with zeros on the left.

Examples:
>>> import pandas as pd
>>> import narwhals as nw
>>> s_native = pd.Series(["+1", "-23", "456", "123456"])
>>> s = nw.from_native(s_native, series_only=True)
>>> s.str.zfill(5).to_native()
0 +0001
1 -0023
2 00456
3 123456
dtype: object
"""
return self._narwhals_series._with_compliant(
self._narwhals_series._compliant_series.str.zfill(width)
)
46 changes: 46 additions & 0 deletions tests/expr_and_series/str/zfill_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
from __future__ import annotations

import pytest

import narwhals as nw
from tests.utils import PANDAS_VERSION, Constructor, ConstructorEager, assert_equal_data

data = {"a": ["-1", "+1", "1", "12", "123", "99999", "+9999", None]}
expected = {"a": ["-01", "+01", "001", "012", "123", "99999", "+9999", None]}


def uses_pyarrow_backend(constructor: Constructor | ConstructorEager) -> bool:
return constructor.__name__ in {
"pandas_pyarrow_constructor",
"modin_pyarrow_constructor",
}


@pytest.mark.skipif(PANDAS_VERSION < (1, 5), reason="different zfill behavior")
def test_str_zfill(request: pytest.FixtureRequest, constructor: Constructor) -> None:
if uses_pyarrow_backend(constructor):
reason = (
"pandas with pyarrow backend doesn't support str.zfill, see "
"https://github.com/pandas-dev/pandas/issues/61485"
)
request.applymarker(pytest.mark.xfail(reason=reason))

df = nw.from_native(constructor(data))
result = df.select(nw.col("a").str.zfill(3))
assert_equal_data(result, expected)


@pytest.mark.skipif(PANDAS_VERSION < (1, 5), reason="different zfill behavior")
def test_str_zfill_series(
request: pytest.FixtureRequest, constructor_eager: ConstructorEager
) -> None:
if uses_pyarrow_backend(constructor_eager):
reason = (
"pandas with pyarrow backend doesn't support str.zfill, see "
"https://github.com/pandas-dev/pandas/issues/61485"
)
request.applymarker(pytest.mark.xfail(reason=reason))

df = nw.from_native(constructor_eager(data), eager_only=True)
result = df["a"].str.zfill(3)
assert_equal_data({"a": result}, expected)
Loading