Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
70 commits
Select commit Hold shift + click to select a range
cd8b49d
Eager mode
FBruzzesi Jul 26, 2025
74b94c1
lazy WIP
FBruzzesi Jul 26, 2025
81f12fa
merge main
FBruzzesi Jul 26, 2025
c775ccd
fixed eager
FBruzzesi Jul 26, 2025
b3ba810
add docs, cleanse a bit
FBruzzesi Jul 26, 2025
9ee6209
fix or ignore typing issues
FBruzzesi Jul 26, 2025
a3496ed
skip if impl not installed
FBruzzesi Jul 26, 2025
dc263c8
overloads?
FBruzzesi Jul 26, 2025
f612646
fix overloads
FBruzzesi Jul 27, 2025
5a46579
merge main and add to v2
FBruzzesi Jul 28, 2025
b8b6ae2
add in v2.__all__
FBruzzesi Jul 28, 2025
22c52eb
factor out _native_int_range into utils
FBruzzesi Jul 28, 2025
8f4f647
resolve majority of typing and import issues
FBruzzesi Jul 28, 2025
7fb18bb
replace all type hints with IntegerDType, ignore import in functions
FBruzzesi Jul 28, 2025
dde4a99
Dan's suggestion
FBruzzesi Jul 28, 2025
876a771
refactor: Remove unused `IntegerType`
dangotbanned Jul 29, 2025
5ddd072
Merge remote-tracking branch 'upstream/main' into feat/int-range
dangotbanned Jul 29, 2025
5eeda2d
refactor: Reuse a single `Implementation.PYARROW._backend_version()`
dangotbanned Jul 29, 2025
704da84
fix(typing): Kinda fix `_native_int_range`
dangotbanned Jul 29, 2025
7ec01e0
refactor: Add `int64`
dangotbanned Jul 29, 2025
57393ec
fix(typing): make marco's checker happy πŸ˜‰
dangotbanned Jul 29, 2025
f1857f8
refactor(typing): Omit defaults in overloads
dangotbanned Jul 29, 2025
c94b4d4
refactor(typing): Remove unreached overload
dangotbanned Jul 29, 2025
fe2bc9d
refactor(typing): rinse/repeat for stable
dangotbanned Jul 29, 2025
fc29864
refactor: fix `polars` typing, use kwargs when required
dangotbanned Jul 30, 2025
749edfc
chore(typing): Remove unused asserts
dangotbanned Jul 30, 2025
4706a30
test: Add failing `Expr` + `eager` case
dangotbanned Jul 30, 2025
1be56cc
Merge remote-tracking branch 'upstream/main' into feat/int-range
dangotbanned Jul 30, 2025
bc0355f
low hanging feedback adjustments
FBruzzesi Jul 31, 2025
352fd4f
refactor into int_range_eager
FBruzzesi Jul 31, 2025
8b195ba
typo
FBruzzesi Jul 31, 2025
466d85d
forgot to mention about eager value in suggestion
FBruzzesi Jul 31, 2025
1575ae8
ci: Update `dtypes-import`
dangotbanned Jul 31, 2025
82e93fa
refactor: Add `PandasLikeNamespace._array_funcs`
dangotbanned Jul 31, 2025
2c52f91
refactor(suggestion): Move impl to `EagerNamespace.int_range`
dangotbanned Jul 31, 2025
f13d667
defaults for `int_range` as well
dangotbanned Jul 31, 2025
447a276
refactor: Use `_series`, pass `dtype_pa` once
dangotbanned Jul 31, 2025
1673573
Merge remote-tracking branch 'upstream/main' into feat/int-range
dangotbanned Jul 31, 2025
1098afb
revert: Undo `Int64` hack
dangotbanned Jul 31, 2025
15ae42a
refactor: Use `int_range_eager` in `with_row_index`
dangotbanned Jul 31, 2025
71e9856
always require `Expr` in `CompliantNamespace.int_range`
dangotbanned Jul 31, 2025
f3387c0
Update narwhals/functions.py
FBruzzesi Aug 1, 2025
8367cc2
Merge branch 'main' into feat/int-range
FBruzzesi Aug 1, 2025
c8af149
chore: Note remaining `np.arange` usage
dangotbanned Aug 1, 2025
ea155ca
tag as unstable
FBruzzesi Aug 1, 2025
77eb2bd
Merge branch 'main' into feat/int-range
FBruzzesi Aug 2, 2025
4c949e6
Merge branch 'main' into feat/int-range
dangotbanned Aug 4, 2025
c430d5b
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Aug 4, 2025
3b96967
Merge branch 'main' into feat/int-range
FBruzzesi Aug 6, 2025
c98740b
Merge branch 'main' into feat/int-range
dangotbanned Aug 7, 2025
a7d4e25
Merge branch 'main' into feat/int-range
dangotbanned Aug 7, 2025
190590b
Merge remote-tracking branch 'upstream/main' into feat/int-range
dangotbanned Aug 8, 2025
cf5799a
Merge branch 'main' into feat/int-range
FBruzzesi Aug 12, 2025
f7e5c9a
Merge branch 'main' into feat/int-range
dangotbanned Aug 13, 2025
5c7e6e6
Merge remote-tracking branch 'upstream/main' into feat/int-range
dangotbanned Aug 14, 2025
5484863
Merge branch 'main' into feat/int-range
dangotbanned Aug 14, 2025
ec5ee4b
Merge branch 'main' into feat/int-range
dangotbanned Aug 15, 2025
813c101
Merge remote-tracking branch 'upstream/main' into feat/int-range
dangotbanned Aug 17, 2025
9584b0f
Merge remote-tracking branch 'upstream/main' into feat/int-range
dangotbanned Aug 19, 2025
7e81d46
Merge branch 'main' into feat/int-range
dangotbanned Aug 20, 2025
0c6495f
refactor(typing): Use `IntoBackend[EagerAllowed]`
dangotbanned Aug 20, 2025
45cf54a
docs: Remove Returns sections
dangotbanned Aug 20, 2025
abe3da7
merge main
FBruzzesi Aug 24, 2025
fb4cfda
Merge branch 'main' into feat/int-range
dangotbanned Aug 25, 2025
e8739e5
Merge branch 'main' into feat/int-range
dangotbanned Aug 27, 2025
1d8602b
Merge remote-tracking branch 'upstream/main' into feat/int-range
dangotbanned Sep 5, 2025
fb6c328
fix: Update for (#3045)
dangotbanned Sep 5, 2025
35ece71
fix: Don't treat `step` as an `Expr`
dangotbanned Sep 5, 2025
afd35f6
Merge remote-tracking branch 'upstream/main' into feat/int-range
dangotbanned Sep 13, 2025
0de173e
chore(typing): fix incompatible override
dangotbanned Sep 13, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -62,12 +62,12 @@ repos:
name: don't import from narwhals.dtypes (use `Version.dtypes` instead)
entry: |
(?x)
import\ narwhals.dtypes|
from\ narwhals\ import\ dtypes|
from\ narwhals.dtypes\ import\ [^D_]+|
import\ narwhals.stable.v1.dtypes|
from\ narwhals.stable\.v.\ import\ dtypes|
from\ narwhals.stable\.v.\.dtypes\ import
import\ narwhals(\.stable\.v\d)?\.dtypes|
from\ narwhals(\.stable\.v\d)?\ import\ dtypes|
^from\ narwhals(\.stable\.v\d)?\.dtypes\ import
\ (DType,\ )?
((Datetime|Duration|Enum)(,\ )?)+
((,\ )?DType)?
language: pygrep
files: ^narwhals/
exclude: |
Expand Down
1 change: 1 addition & 0 deletions docs/api-reference/narwhals.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ Here are the top-level functions available in Narwhals.
- from_numpy
- generate_temporary_column_name
- get_native_namespace
- int_range
- is_ordered_categorical
- len
- lit
Expand Down
2 changes: 2 additions & 0 deletions narwhals/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@
from_arrow,
from_dict,
from_numpy,
int_range,
len_ as len,
lit,
max,
Expand Down Expand Up @@ -141,6 +142,7 @@
"from_numpy",
"generate_temporary_column_name",
"get_native_namespace",
"int_range",
"is_ordered_categorical",
"len",
"lit",
Expand Down
23 changes: 8 additions & 15 deletions narwhals/_arrow/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import pyarrow.compute as pc

from narwhals._arrow.series import ArrowSeries
from narwhals._arrow.utils import native_to_narwhals_dtype
from narwhals._arrow.utils import int_range, native_to_narwhals_dtype
from narwhals._compliant import EagerDataFrame
from narwhals._expression_parsing import ExprKind
from narwhals._utils import (
Expand Down Expand Up @@ -489,16 +489,11 @@ def to_dict(
def with_row_index(self, name: str, order_by: Sequence[str] | None) -> Self:
plx = self.__narwhals_namespace__()
if order_by is None:
import numpy as np # ignore-banned-import

data = pa.array(np.arange(len(self), dtype=np.int64))
row_index = plx._expr._from_series(
plx._series.from_iterable(data, context=self, name=name)
)
row_index = plx._expr._from_series(plx.int_range_eager(0, len(self)))
else:
rank = plx.col(order_by[0]).rank("ordinal", descending=False)
row_index = (rank.over(partition_by=[], order_by=order_by) - 1).alias(name)
return self.select(row_index, plx.all())
row_index = rank.over(partition_by=[], order_by=order_by) - 1
return self.select(row_index.alias(name), plx.all())

def filter(self, predicate: ArrowExpr | list[bool | None]) -> Self:
if isinstance(predicate, list):
Expand Down Expand Up @@ -677,10 +672,8 @@ def write_csv(self, file: str | Path | BytesIO | None) -> str | None:
return None

def is_unique(self) -> ArrowSeries:
import numpy as np # ignore-banned-import

col_token = generate_temporary_column_name(n_bytes=8, columns=self.columns)
row_index = pa.array(np.arange(len(self)))
row_index = int_range(0, len(self))
keep_idx = (
self.native.append_column(col_token, row_index)
.group_by(self.columns)
Expand All @@ -704,8 +697,6 @@ def unique(
) -> Self:
# The param `maintain_order` is only here for compatibility with the Polars API
# and has no effect on the output.
import numpy as np # ignore-banned-import

if subset and (error := self._check_columns_exist(subset)):
raise error
subset = list(subset or self.columns)
Expand All @@ -732,7 +723,7 @@ def unique(
else:
native = self.native
keep_idx_native = (
native.append_column(col_token, pa.array(np.arange(len(self))))
native.append_column(col_token, int_range(0, len(self)))
.group_by(subset)
.aggregate([(col_token, agg_func)])
.column(f"{col_token}_{agg_func}")
Expand All @@ -751,6 +742,8 @@ def gather_every(self, n: int, offset: int) -> Self:
def to_arrow(self) -> pa.Table:
return self.native

# TODO @dangotbanned: Replace `np.arange` w/ `utils.int_range`
# https://github.com/narwhals-dev/narwhals/issues/2722#issuecomment-3097350688
def sample(
self,
n: int | None,
Expand Down
23 changes: 21 additions & 2 deletions narwhals/_arrow/namespace.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,21 +12,27 @@
from narwhals._arrow.expr import ArrowExpr
from narwhals._arrow.selectors import ArrowSelectorNamespace
from narwhals._arrow.series import ArrowSeries
from narwhals._arrow.utils import cast_to_comparable_string_types
from narwhals._arrow.utils import (
cast_to_comparable_string_types,
chunked_array,
int_range,
narwhals_to_native_dtype,
)
from narwhals._compliant import CompliantThen, EagerNamespace, EagerWhen
from narwhals._expression_parsing import (
combine_alias_output_names,
combine_evaluate_output_names,
)
from narwhals._utils import Implementation
from narwhals.dtypes import Int64

if TYPE_CHECKING:
from collections.abc import Iterator, Sequence

from narwhals._arrow.typing import ArrayOrScalar, ChunkedArrayAny, Incomplete
from narwhals._compliant.typing import ScalarKwargs
from narwhals._utils import Version
from narwhals.typing import IntoDType, NonNestedLiteral
from narwhals.typing import IntegerDType, IntoDType, NonNestedLiteral


class ArrowNamespace(
Expand Down Expand Up @@ -278,6 +284,19 @@ def func(df: ArrowDataFrame) -> list[ArrowSeries]:
context=self,
)

def int_range_eager(
self,
start: int,
end: int,
step: int = 1,
*,
dtype: IntegerDType = Int64,
name: str = "literal",
) -> ArrowSeries:
dtype_pa = narwhals_to_native_dtype(dtype, version=self._version)
data = int_range(start=start, end=end, step=step, dtype=dtype_pa)
return self._series.from_native(chunked_array([data]), name=name, context=self)


class ArrowWhen(EagerWhen[ArrowDataFrame, ArrowSeries, ArrowExpr, "ChunkedArrayAny"]):
@property
Expand Down
17 changes: 10 additions & 7 deletions narwhals/_arrow/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
chunked_array,
extract_native,
floordiv_compat,
int_range,
is_array_or_scalar,
lit,
narwhals_to_native_dtype,
Expand Down Expand Up @@ -632,6 +633,8 @@ def zip_with(self, mask: Self, other: Self) -> Self:
cond = mask.native.combine_chunks()
return self._with_native(pc.if_else(cond, self.native, other.native))

# TODO @dangotbanned: Replace `np.arange` w/ `utils.int_range`
# https://github.com/narwhals-dev/narwhals/issues/2722#issuecomment-3097350688
def sample(
self,
n: int | None,
Expand Down Expand Up @@ -670,7 +673,7 @@ def fill_aux(
# then it calculates the distance of each new index and the original index
# if the distance is equal to or less than the limit and the original value is null, it is replaced
valid_mask = pc.is_valid(arr)
indices = pa.array(np.arange(len(arr)), type=pa.int64())
indices = int_range(0, len(arr))
if direction == "forward":
valid_index = np.maximum.accumulate(np.where(valid_mask, indices, -1))
distance = indices - valid_index
Expand Down Expand Up @@ -717,9 +720,7 @@ def is_unique(self) -> ArrowSeries:
return self.to_frame().is_unique().alias(self.name)

def is_first_distinct(self) -> Self:
import numpy as np # ignore-banned-import

row_number = pa.array(np.arange(len(self)))
row_number = int_range(0, len(self))
col_token = generate_temporary_column_name(n_bytes=8, columns=[self.name])
first_distinct_index = (
pa.Table.from_arrays([self.native], names=[self.name])
Expand All @@ -732,9 +733,7 @@ def is_first_distinct(self) -> Self:
return self._with_native(pc.is_in(row_number, first_distinct_index))

def is_last_distinct(self) -> Self:
import numpy as np # ignore-banned-import

row_number = pa.array(np.arange(len(self)))
row_number = int_range(0, len(self))
col_token = generate_temporary_column_name(n_bytes=8, columns=[self.name])
last_distinct_index = (
pa.Table.from_arrays([self.native], names=[self.name])
Expand Down Expand Up @@ -790,6 +789,8 @@ def sort(self, *, descending: bool, nulls_last: bool) -> Self:
)
return self._with_native(self.native.take(sorted_indices))

# TODO @dangotbanned: Replace `np.arange` w/ `utils.int_range`
# https://github.com/narwhals-dev/narwhals/issues/2722#issuecomment-3097350688
def to_dummies(self, *, separator: str, drop_first: bool) -> ArrowDataFrame:
import numpy as np # ignore-banned-import

Expand Down Expand Up @@ -1156,6 +1157,8 @@ def _calculate_bins(self, bin_count: int) -> _1DArray:
upper += 0.5
return self._linear_space(lower, upper, bin_count + 1)

# TODO @dangotbanned: Replace `np.arange` w/ `utils.int_range`
# https://github.com/narwhals-dev/narwhals/issues/2722#issuecomment-3097350688
def _calculate_hist(self, bins: list[float] | _1DArray) -> ArrowHistData:
ser = self.native
# NOTE: `mypy` refuses to resolve `ndarray.__getitem__`
Expand Down
24 changes: 22 additions & 2 deletions narwhals/_arrow/utils.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
from __future__ import annotations

from functools import lru_cache
from typing import TYPE_CHECKING, Any, cast
from typing import TYPE_CHECKING, Any, Final, cast

import pyarrow as pa
import pyarrow.compute as pc

from narwhals._compliant import EagerSeriesNamespace
from narwhals._utils import Version, isinstance_or_issubclass
from narwhals._utils import Implementation, Version, isinstance_or_issubclass

if TYPE_CHECKING:
from collections.abc import Iterable, Iterator, Mapping
Expand All @@ -21,6 +21,7 @@
ArrayOrScalarT1,
ArrayOrScalarT2,
ChunkedArrayAny,
Incomplete,
NativeIntervalUnit,
ScalarAny,
)
Expand Down Expand Up @@ -57,6 +58,9 @@ def extract_regex(
is_timestamp,
)

BACKEND_VERSION = Implementation.PYARROW._backend_version()
"""Static backend version for `pyarrow`."""

UNITS_DICT: Mapping[IntervalUnit, NativeIntervalUnit] = {
"y": "year",
"q": "quarter",
Expand All @@ -73,6 +77,9 @@ def extract_regex(
lit = pa.scalar
"""Alias for `pyarrow.scalar`."""

int64: Final = pa.int64()
"""Initialized `pyarrow.types.Int64Type`."""


def extract_py_scalar(value: Any, /) -> Any:
from narwhals._arrow.series import maybe_extract_py_scalar
Expand Down Expand Up @@ -435,4 +442,17 @@ def cast_to_comparable_string_types(
return (ca.cast(dtype) for ca in chunked_arrays), lit(separator, dtype)


def int_range(
start: int, end: int, step: int = 1, *, dtype: pa.DataType = int64
) -> ArrayAny:
if BACKEND_VERSION < (21, 0, 0): # pragma: no cover
import numpy as np # ignore-banned-import

return pa.array(np.arange(start=start, stop=end, step=step), type=dtype)
# NOTE: Added in https://github.com/apache/arrow/pull/46778
pa_arange = cast("Incomplete", pa.arange) # type: ignore[attr-defined]
arr: ArrayAny = pa_arange(start=start, stop=end, step=step)
return arr.cast(dtype)


class ArrowSeriesNamespace(EagerSeriesNamespace["ArrowSeries", "ChunkedArrayAny"]): ...
51 changes: 50 additions & 1 deletion narwhals/_compliant/namespace.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,15 @@
NativeFrameT_co,
NativeSeriesT,
)
from narwhals._expression_parsing import is_expr, is_series
from narwhals._expression_parsing import combine_evaluate_output_names, is_expr, is_series
from narwhals._utils import (
exclude_column_names,
get_column_names,
not_implemented,
passthrough_column_names,
)
from narwhals.dependencies import is_numpy_array, is_numpy_array_2d
from narwhals.dtypes import Int64

if TYPE_CHECKING:
from collections.abc import Container, Iterable, Sequence
Expand All @@ -36,6 +38,7 @@
from narwhals.series import Series
from narwhals.typing import (
ConcatMethod,
IntegerDType,
Into1DArray,
IntoDType,
IntoSchema,
Expand Down Expand Up @@ -110,6 +113,14 @@ def when(
def concat_str(
self, *exprs: CompliantExprT, separator: str, ignore_nulls: bool
) -> CompliantExprT: ...
def int_range(
self,
start: CompliantExprT,
end: CompliantExprT,
step: int = 1,
*,
dtype: IntegerDType = Int64,
) -> CompliantExprT: ...
@property
def selectors(self) -> CompliantSelectorNamespace[Any, Any]: ...
def coalesce(self, *exprs: CompliantExprT) -> CompliantExprT: ...
Expand Down Expand Up @@ -156,6 +167,8 @@ def from_native(self, data: NativeFrameT_co | Any, /) -> CompliantLazyFrameT:
msg = f"Unsupported type: {type(data).__name__!r}" # pragma: no cover
raise TypeError(msg)

int_range = not_implemented() # type: ignore[misc]


class EagerNamespace(
DepthTrackingNamespace[EagerDataFrameT, EagerExprT],
Expand Down Expand Up @@ -238,3 +251,39 @@ def concat(
else: # pragma: no cover
raise NotImplementedError
return self._dataframe.from_native(native, context=self)

def int_range_eager(
self,
start: int,
end: int,
step: int = 1,
*,
dtype: IntegerDType = Int64,
name: str = "literal",
) -> EagerSeriesT: ...

def int_range(
self,
start: EagerExprT,
end: EagerExprT,
step: int = 1,
*,
dtype: IntegerDType = Int64,
) -> EagerExprT:
def func(df: EagerDataFrameT) -> list[EagerSeriesT]:
start_eval = start(df)[0]
name = start_eval.name
start_value = start_eval.item()
end_value = end(df)[0].item()
return [
self.int_range_eager(start_value, end_value, step, dtype=dtype, name=name)
]

return self._expr._from_callable(
func=func,
depth=0,
function_name="int_range",
evaluate_output_names=combine_evaluate_output_names(start),
alias_output_names=None,
context=self,
)
3 changes: 1 addition & 2 deletions narwhals/_pandas_like/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -420,8 +420,7 @@ def estimated_size(self, unit: SizeUnit) -> int | float:
def with_row_index(self, name: str, order_by: Sequence[str] | None) -> Self:
plx = self.__narwhals_namespace__()
if order_by is None:
size = len(self)
data = self._array_funcs.arange(size)
data = self._array_funcs.arange(len(self))

row_index = plx._expr._from_series(
plx._series.from_iterable(
Expand Down
Loading
Loading