narwhals-dev · FBruzzesi · Jul 26, 2025 · Jul 26, 2025 · Jul 26, 2025 · Jul 26, 2025
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -62,12 +62,12 @@ repos:
       name: don't import from narwhals.dtypes (use `Version.dtypes` instead)
       entry: |
           (?x)
-            import\ narwhals.dtypes|
-            from\ narwhals\ import\ dtypes|
-            from\ narwhals.dtypes\ import\ [^D_]+|
-            import\ narwhals.stable.v1.dtypes|
-            from\ narwhals.stable\.v.\ import\ dtypes|
-            from\ narwhals.stable\.v.\.dtypes\ import
+            import\ narwhals(\.stable\.v\d)?\.dtypes|
+            from\ narwhals(\.stable\.v\d)?\ import\ dtypes|
+            ^from\ narwhals(\.stable\.v\d)?\.dtypes\ import
+              \ (DType,\ )?
+              ((Datetime|Duration|Enum)(,\ )?)+
+              ((,\ )?DType)?
       language: pygrep
       files: ^narwhals/
       exclude: |

diff --git a/docs/api-reference/narwhals.md b/docs/api-reference/narwhals.md
@@ -20,6 +20,7 @@ Here are the top-level functions available in Narwhals.
         - from_numpy
         - generate_temporary_column_name
         - get_native_namespace
+        - int_range
         - is_ordered_categorical
         - len
         - lit

diff --git a/narwhals/__init__.py b/narwhals/__init__.py
@@ -57,6 +57,7 @@
     from_arrow,
     from_dict,
     from_numpy,
+    int_range,
     len_ as len,
     lit,
     max,
@@ -141,6 +142,7 @@
     "from_numpy",
     "generate_temporary_column_name",
     "get_native_namespace",
+    "int_range",
     "is_ordered_categorical",
     "len",
     "lit",

diff --git a/narwhals/_arrow/dataframe.py b/narwhals/_arrow/dataframe.py
@@ -8,7 +8,7 @@
 import pyarrow.compute as pc
 
 from narwhals._arrow.series import ArrowSeries
-from narwhals._arrow.utils import native_to_narwhals_dtype
+from narwhals._arrow.utils import int_range, native_to_narwhals_dtype
 from narwhals._compliant import EagerDataFrame
 from narwhals._expression_parsing import ExprKind
 from narwhals._utils import (
@@ -489,16 +489,11 @@ def to_dict(
     def with_row_index(self, name: str, order_by: Sequence[str] | None) -> Self:
         plx = self.__narwhals_namespace__()
         if order_by is None:
-            import numpy as np  # ignore-banned-import
-
-            data = pa.array(np.arange(len(self), dtype=np.int64))
-            row_index = plx._expr._from_series(
-                plx._series.from_iterable(data, context=self, name=name)
-            )
+            row_index = plx._expr._from_series(plx.int_range_eager(0, len(self)))
         else:
             rank = plx.col(order_by[0]).rank("ordinal", descending=False)
-            row_index = (rank.over(partition_by=[], order_by=order_by) - 1).alias(name)
-        return self.select(row_index, plx.all())
+            row_index = rank.over(partition_by=[], order_by=order_by) - 1
+        return self.select(row_index.alias(name), plx.all())
 
     def filter(self, predicate: ArrowExpr | list[bool | None]) -> Self:
         if isinstance(predicate, list):
@@ -677,10 +672,8 @@ def write_csv(self, file: str | Path | BytesIO | None) -> str | None:
         return None
 
     def is_unique(self) -> ArrowSeries:
-        import numpy as np  # ignore-banned-import
-
         col_token = generate_temporary_column_name(n_bytes=8, columns=self.columns)
-        row_index = pa.array(np.arange(len(self)))
+        row_index = int_range(0, len(self))
         keep_idx = (
             self.native.append_column(col_token, row_index)
             .group_by(self.columns)
@@ -704,8 +697,6 @@ def unique(
     ) -> Self:
         # The param `maintain_order` is only here for compatibility with the Polars API
         # and has no effect on the output.
-        import numpy as np  # ignore-banned-import
-
         if subset and (error := self._check_columns_exist(subset)):
             raise error
         subset = list(subset or self.columns)
@@ -732,7 +723,7 @@ def unique(
             else:
                 native = self.native
             keep_idx_native = (
-                native.append_column(col_token, pa.array(np.arange(len(self))))
+                native.append_column(col_token, int_range(0, len(self)))
                 .group_by(subset)
                 .aggregate([(col_token, agg_func)])
                 .column(f"{col_token}_{agg_func}")
@@ -751,6 +742,8 @@ def gather_every(self, n: int, offset: int) -> Self:
     def to_arrow(self) -> pa.Table:
         return self.native
 
+    # TODO @dangotbanned: Replace `np.arange` w/ `utils.int_range`
+    # https://github.com/narwhals-dev/narwhals/issues/2722#issuecomment-3097350688
     def sample(
         self,
         n: int | None,

diff --git a/narwhals/_arrow/namespace.py b/narwhals/_arrow/namespace.py
@@ -12,21 +12,27 @@
 from narwhals._arrow.expr import ArrowExpr
 from narwhals._arrow.selectors import ArrowSelectorNamespace
 from narwhals._arrow.series import ArrowSeries
-from narwhals._arrow.utils import cast_to_comparable_string_types
+from narwhals._arrow.utils import (
+    cast_to_comparable_string_types,
+    chunked_array,
+    int_range,
+    narwhals_to_native_dtype,
+)
 from narwhals._compliant import CompliantThen, EagerNamespace, EagerWhen
 from narwhals._expression_parsing import (
     combine_alias_output_names,
     combine_evaluate_output_names,
 )
 from narwhals._utils import Implementation
+from narwhals.dtypes import Int64
 
 if TYPE_CHECKING:
     from collections.abc import Iterator, Sequence
 
     from narwhals._arrow.typing import ArrayOrScalar, ChunkedArrayAny, Incomplete
     from narwhals._compliant.typing import ScalarKwargs
     from narwhals._utils import Version
-    from narwhals.typing import IntoDType, NonNestedLiteral
+    from narwhals.typing import IntegerDType, IntoDType, NonNestedLiteral
 
 
 class ArrowNamespace(
@@ -278,6 +284,19 @@ def func(df: ArrowDataFrame) -> list[ArrowSeries]:
             context=self,
         )
 
+    def int_range_eager(
+        self,
+        start: int,
+        end: int,
+        step: int = 1,
+        *,
+        dtype: IntegerDType = Int64,
+        name: str = "literal",
+    ) -> ArrowSeries:
+        dtype_pa = narwhals_to_native_dtype(dtype, version=self._version)
+        data = int_range(start=start, end=end, step=step, dtype=dtype_pa)
+        return self._series.from_native(chunked_array([data]), name=name, context=self)
+
 
 class ArrowWhen(EagerWhen[ArrowDataFrame, ArrowSeries, ArrowExpr, "ChunkedArrayAny"]):
     @property

diff --git a/narwhals/_arrow/series.py b/narwhals/_arrow/series.py
@@ -15,6 +15,7 @@
     chunked_array,
     extract_native,
     floordiv_compat,
+    int_range,
     is_array_or_scalar,
     lit,
     narwhals_to_native_dtype,
@@ -632,6 +633,8 @@ def zip_with(self, mask: Self, other: Self) -> Self:
         cond = mask.native.combine_chunks()
         return self._with_native(pc.if_else(cond, self.native, other.native))
 
+    # TODO @dangotbanned: Replace `np.arange` w/ `utils.int_range`
+    # https://github.com/narwhals-dev/narwhals/issues/2722#issuecomment-3097350688
     def sample(
         self,
         n: int | None,
@@ -670,7 +673,7 @@ def fill_aux(
             # then it calculates the distance of each new index and the original index
             # if the distance is equal to or less than the limit and the original value is null, it is replaced
             valid_mask = pc.is_valid(arr)
-            indices = pa.array(np.arange(len(arr)), type=pa.int64())
+            indices = int_range(0, len(arr))
             if direction == "forward":
                 valid_index = np.maximum.accumulate(np.where(valid_mask, indices, -1))
                 distance = indices - valid_index
@@ -717,9 +720,7 @@ def is_unique(self) -> ArrowSeries:
         return self.to_frame().is_unique().alias(self.name)
 
     def is_first_distinct(self) -> Self:
-        import numpy as np  # ignore-banned-import
-
-        row_number = pa.array(np.arange(len(self)))
+        row_number = int_range(0, len(self))
         col_token = generate_temporary_column_name(n_bytes=8, columns=[self.name])
         first_distinct_index = (
             pa.Table.from_arrays([self.native], names=[self.name])
@@ -732,9 +733,7 @@ def is_first_distinct(self) -> Self:
         return self._with_native(pc.is_in(row_number, first_distinct_index))
 
     def is_last_distinct(self) -> Self:
-        import numpy as np  # ignore-banned-import
-
-        row_number = pa.array(np.arange(len(self)))
+        row_number = int_range(0, len(self))
         col_token = generate_temporary_column_name(n_bytes=8, columns=[self.name])
         last_distinct_index = (
             pa.Table.from_arrays([self.native], names=[self.name])
@@ -790,6 +789,8 @@ def sort(self, *, descending: bool, nulls_last: bool) -> Self:
         )
         return self._with_native(self.native.take(sorted_indices))
 
+    # TODO @dangotbanned: Replace `np.arange` w/ `utils.int_range`
+    # https://github.com/narwhals-dev/narwhals/issues/2722#issuecomment-3097350688
     def to_dummies(self, *, separator: str, drop_first: bool) -> ArrowDataFrame:
         import numpy as np  # ignore-banned-import
 
@@ -1156,6 +1157,8 @@ def _calculate_bins(self, bin_count: int) -> _1DArray:
             upper += 0.5
         return self._linear_space(lower, upper, bin_count + 1)
 
+    # TODO @dangotbanned: Replace `np.arange` w/ `utils.int_range`
+    # https://github.com/narwhals-dev/narwhals/issues/2722#issuecomment-3097350688
     def _calculate_hist(self, bins: list[float] | _1DArray) -> ArrowHistData:
         ser = self.native
         # NOTE: `mypy` refuses to resolve `ndarray.__getitem__`

diff --git a/narwhals/_arrow/utils.py b/narwhals/_arrow/utils.py
@@ -1,13 +1,13 @@
 from __future__ import annotations
 
 from functools import lru_cache
-from typing import TYPE_CHECKING, Any, cast
+from typing import TYPE_CHECKING, Any, Final, cast
 
 import pyarrow as pa
 import pyarrow.compute as pc
 
 from narwhals._compliant import EagerSeriesNamespace
-from narwhals._utils import Version, isinstance_or_issubclass
+from narwhals._utils import Implementation, Version, isinstance_or_issubclass
 
 if TYPE_CHECKING:
     from collections.abc import Iterable, Iterator, Mapping
@@ -21,6 +21,7 @@
         ArrayOrScalarT1,
         ArrayOrScalarT2,
         ChunkedArrayAny,
+        Incomplete,
         NativeIntervalUnit,
         ScalarAny,
     )
@@ -57,6 +58,9 @@ def extract_regex(
         is_timestamp,
     )
 
+BACKEND_VERSION = Implementation.PYARROW._backend_version()
+"""Static backend version for `pyarrow`."""
+
 UNITS_DICT: Mapping[IntervalUnit, NativeIntervalUnit] = {
     "y": "year",
     "q": "quarter",
@@ -73,6 +77,9 @@ def extract_regex(
 lit = pa.scalar
 """Alias for `pyarrow.scalar`."""
 
+int64: Final = pa.int64()
+"""Initialized `pyarrow.types.Int64Type`."""
+
 
 def extract_py_scalar(value: Any, /) -> Any:
     from narwhals._arrow.series import maybe_extract_py_scalar
@@ -435,4 +442,17 @@ def cast_to_comparable_string_types(
     return (ca.cast(dtype) for ca in chunked_arrays), lit(separator, dtype)
 
 
+def int_range(
+    start: int, end: int, step: int = 1, *, dtype: pa.DataType = int64
+) -> ArrayAny:
+    if BACKEND_VERSION < (21, 0, 0):  # pragma: no cover
+        import numpy as np  # ignore-banned-import
+
+        return pa.array(np.arange(start=start, stop=end, step=step), type=dtype)
+    # NOTE: Added in https://github.com/apache/arrow/pull/46778
+    pa_arange = cast("Incomplete", pa.arange)  # type: ignore[attr-defined]
+    arr: ArrayAny = pa_arange(start=start, stop=end, step=step)
+    return arr.cast(dtype)
+
+
 class ArrowSeriesNamespace(EagerSeriesNamespace["ArrowSeries", "ChunkedArrayAny"]): ...
diff --git a/narwhals/_compliant/namespace.py b/narwhals/_compliant/namespace.py
@@ -16,13 +16,15 @@
     NativeFrameT_co,
     NativeSeriesT,
 )
-from narwhals._expression_parsing import is_expr, is_series
+from narwhals._expression_parsing import combine_evaluate_output_names, is_expr, is_series
 from narwhals._utils import (
     exclude_column_names,
     get_column_names,
+    not_implemented,
     passthrough_column_names,
 )
 from narwhals.dependencies import is_numpy_array, is_numpy_array_2d
+from narwhals.dtypes import Int64
 
 if TYPE_CHECKING:
     from collections.abc import Container, Iterable, Sequence
@@ -36,6 +38,7 @@
     from narwhals.series import Series
     from narwhals.typing import (
         ConcatMethod,
+        IntegerDType,
         Into1DArray,
         IntoDType,
         IntoSchema,
@@ -110,6 +113,14 @@ def when(
     def concat_str(
         self, *exprs: CompliantExprT, separator: str, ignore_nulls: bool
     ) -> CompliantExprT: ...
+    def int_range(
+        self,
+        start: CompliantExprT,
+        end: CompliantExprT,
+        step: int = 1,
+        *,
+        dtype: IntegerDType = Int64,
+    ) -> CompliantExprT: ...
     @property
     def selectors(self) -> CompliantSelectorNamespace[Any, Any]: ...
     def coalesce(self, *exprs: CompliantExprT) -> CompliantExprT: ...
@@ -156,6 +167,8 @@ def from_native(self, data: NativeFrameT_co | Any, /) -> CompliantLazyFrameT:
         msg = f"Unsupported type: {type(data).__name__!r}"  # pragma: no cover
         raise TypeError(msg)
 
+    int_range = not_implemented()  # type: ignore[misc]
+
 
 class EagerNamespace(
     DepthTrackingNamespace[EagerDataFrameT, EagerExprT],
@@ -238,3 +251,39 @@ def concat(
         else:  # pragma: no cover
             raise NotImplementedError
         return self._dataframe.from_native(native, context=self)
+
+    def int_range_eager(
+        self,
+        start: int,
+        end: int,
+        step: int = 1,
+        *,
+        dtype: IntegerDType = Int64,
+        name: str = "literal",
+    ) -> EagerSeriesT: ...
+
+    def int_range(
+        self,
+        start: EagerExprT,
+        end: EagerExprT,
+        step: int = 1,
+        *,
+        dtype: IntegerDType = Int64,
+    ) -> EagerExprT:
+        def func(df: EagerDataFrameT) -> list[EagerSeriesT]:
+            start_eval = start(df)[0]
+            name = start_eval.name
+            start_value = start_eval.item()
+            end_value = end(df)[0].item()
+            return [
+                self.int_range_eager(start_value, end_value, step, dtype=dtype, name=name)
+            ]
+
+        return self._expr._from_callable(
+            func=func,
+            depth=0,
+            function_name="int_range",
+            evaluate_output_names=combine_evaluate_output_names(start),
+            alias_output_names=None,
+            context=self,
+        )
diff --git a/narwhals/_pandas_like/dataframe.py b/narwhals/_pandas_like/dataframe.py
@@ -420,8 +420,7 @@ def estimated_size(self, unit: SizeUnit) -> int | float:
     def with_row_index(self, name: str, order_by: Sequence[str] | None) -> Self:
         plx = self.__narwhals_namespace__()
         if order_by is None:
-            size = len(self)
-            data = self._array_funcs.arange(size)
+            data = self._array_funcs.arange(len(self))
 
             row_index = plx._expr._from_series(
                 plx._series.from_iterable(