diff --git a/py-polars/polars/dataframe/frame.py b/py-polars/polars/dataframe/frame.py index bdf256af1b1c3..666d88e822145 100644 --- a/py-polars/polars/dataframe/frame.py +++ b/py-polars/polars/dataframe/frame.py @@ -5207,14 +5207,19 @@ def pipe( def with_row_index(self, name: str = "index", offset: int = 0) -> Self: """ - Add a column at index 0 that counts the rows. + Add a row index as the first column in the DataFrame. Parameters ---------- name - Name of the column to add. + Name of the index column. offset - Start the row count at this offset. Default = 0 + Start the index at this offset. Cannot be negative. + + Notes + ----- + The resulting column does not have any special properties. It is a regular + column of type `UInt32` (or `UInt64` in `polars-u64-idx`). Examples -------- @@ -5235,8 +5240,24 @@ def with_row_index(self, name: str = "index", offset: int = 0) -> Self: │ 1 ┆ 3 ┆ 4 │ │ 2 ┆ 5 ┆ 6 │ └───────┴─────┴─────┘ + >>> df.with_row_index("id", offset=1000) + shape: (3, 3) + ┌──────┬─────┬─────┐ + │ id ┆ a ┆ b │ + │ --- ┆ --- ┆ --- │ + │ u32 ┆ i64 ┆ i64 │ + ╞══════╪═════╪═════╡ + │ 1000 ┆ 1 ┆ 2 │ + │ 1001 ┆ 3 ┆ 4 │ + │ 1002 ┆ 5 ┆ 6 │ + └──────┴─────┴─────┘ """ - return self._from_pydf(self._df.with_row_index(name, offset)) + try: + return self._from_pydf(self._df.with_row_index(name, offset)) + except OverflowError: + issue = "negative" if offset < 0 else "greater than the maximum index value" + msg = f"`offset` input for `with_row_index` cannot be {issue}, got {offset}" + raise ValueError(msg) from None @deprecate_function( "Use `with_row_index` instead." diff --git a/py-polars/polars/lazyframe/frame.py b/py-polars/polars/lazyframe/frame.py index d0594b5f21f43..448ef652cd5e7 100644 --- a/py-polars/polars/lazyframe/frame.py +++ b/py-polars/polars/lazyframe/frame.py @@ -4565,20 +4565,25 @@ def approx_n_unique(self) -> Self: def with_row_index(self, name: str = "index", offset: int = 0) -> Self: """ - Add a column at index 0 with the row number. + Add a row index as the first column in the LazyFrame. Parameters ---------- name - Name of the column to add. + Name of the index column. offset - Start the row count at this offset. + Start the index at this offset. Cannot be negative. Warnings -------- Using this function can have a negative effect on query performance. This may, for instance, block predicate pushdown optimization. + Notes + ----- + The resulting column does not have any special properties. It is a regular + column of type `UInt32` (or `UInt64` in `polars-u64-idx`). + Examples -------- >>> lf = pl.LazyFrame( @@ -4598,6 +4603,17 @@ def with_row_index(self, name: str = "index", offset: int = 0) -> Self: │ 1 ┆ 3 ┆ 4 │ │ 2 ┆ 5 ┆ 6 │ └───────┴─────┴─────┘ + >>> lf.with_row_index("id", offset=1000).collect() + shape: (3, 3) + ┌──────┬─────┬─────┐ + │ id ┆ a ┆ b │ + │ --- ┆ --- ┆ --- │ + │ u32 ┆ i64 ┆ i64 │ + ╞══════╪═════╪═════╡ + │ 1000 ┆ 1 ┆ 2 │ + │ 1001 ┆ 3 ┆ 4 │ + │ 1002 ┆ 5 ┆ 6 │ + └──────┴─────┴─────┘ """ return self._from_pyldf(self._ldf.with_row_index(name, offset))