Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

depr(python,rust!): Rename with_row_count to with_row_index #13494

Merged
merged 7 commits into from
Jan 8, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Rename on Python side
  • Loading branch information
stinodego committed Jan 8, 2024
commit 7cd4604f3871ddfad7aa603b6d36412d7610b8a0
42 changes: 40 additions & 2 deletions py-polars/polars/dataframe/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -5205,6 +5205,44 @@ def pipe(
"""
return function(self, *args, **kwargs)

def with_row_number(self, name: str = "row_number", offset: int = 0) -> Self:
"""
Add a column at index 0 that counts the rows.

Parameters
----------
name
Name of the column to add.
offset
Start the row count at this offset. Default = 0

Examples
--------
>>> df = pl.DataFrame(
... {
... "a": [1, 3, 5],
... "b": [2, 4, 6],
... }
... )
>>> df.with_row_number()
shape: (3, 3)
┌────────┬─────┬─────┐
│ row_number ┆ a ┆ b │
│ --- ┆ --- ┆ --- │
│ u32 ┆ i64 ┆ i64 │
╞════════╪═════╪═════╡
│ 0 ┆ 1 ┆ 2 │
│ 1 ┆ 3 ┆ 4 │
│ 2 ┆ 5 ┆ 6 │
└────────┴─────┴─────┘
"""
return self._from_pydf(self._df.with_row_number(name, offset))

@deprecate_function(
"Use `with_row_number` instead."
"Note that the default column name has changed from 'row_nr' to 'row_number'.",
version="0.20.4",
)
def with_row_count(self, name: str = "row_nr", offset: int = 0) -> Self:
"""
Add a column at index 0 that counts the rows.
Expand All @@ -5224,7 +5262,7 @@ def with_row_count(self, name: str = "row_nr", offset: int = 0) -> Self:
... "b": [2, 4, 6],
... }
... )
>>> df.with_row_count()
>>> df.with_row_number()
shape: (3, 3)
┌────────┬─────┬─────┐
│ row_nr ┆ a ┆ b │
Expand All @@ -5236,7 +5274,7 @@ def with_row_count(self, name: str = "row_nr", offset: int = 0) -> Self:
│ 2 ┆ 5 ┆ 6 │
└────────┴─────┴─────┘
"""
return self._from_pydf(self._df.with_row_count(name, offset))
return self.with_row_number(name, offset)

def group_by(
self,
Expand Down
55 changes: 49 additions & 6 deletions py-polars/polars/lazyframe/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -441,7 +441,7 @@ def _scan_parquet(
if n_rows:
scan = scan.head(n_rows)
if row_count_name is not None:
scan = scan.with_row_count(row_count_name, row_count_offset)
scan = scan.with_row_number(row_count_name, row_count_offset)
return scan # type: ignore[return-value]

if storage_options:
Expand Down Expand Up @@ -504,7 +504,7 @@ def _scan_ipc(
if n_rows:
scan = scan.head(n_rows)
if row_count_name is not None:
scan = scan.with_row_count(row_count_name, row_count_offset)
scan = scan.with_row_number(row_count_name, row_count_offset)
return scan # type: ignore[return-value]

self = cls.__new__(cls)
Expand Down Expand Up @@ -4563,6 +4563,49 @@ def approx_n_unique(self) -> Self:
"""
return self.select(F.all().approx_n_unique())

def with_row_number(self, name: str = "row_number", offset: int = 0) -> Self:
"""
Add a column at index 0 with the row number.

Parameters
----------
name
Name of the column to add.
offset
Start the row count at this offset.

Warnings
--------
Using this function can have a negative effect on query performance.
This may, for instance, block predicate pushdown optimization.

Examples
--------
>>> lf = pl.LazyFrame(
... {
... "a": [1, 3, 5],
... "b": [2, 4, 6],
... }
... )
>>> lf.with_row_number().collect()
shape: (3, 3)
┌────────┬─────┬─────┐
│ row_nr ┆ a ┆ b │
│ --- ┆ --- ┆ --- │
│ u32 ┆ i64 ┆ i64 │
╞════════╪═════╪═════╡
│ 0 ┆ 1 ┆ 2 │
│ 1 ┆ 3 ┆ 4 │
│ 2 ┆ 5 ┆ 6 │
└────────┴─────┴─────┘
"""
return self._from_pyldf(self._ldf.with_row_number(name, offset))

@deprecate_function(
"Use `with_row_number` instead."
"Note that the default column name has changed from 'row_nr' to 'row_number'.",
version="0.20.4",
)
def with_row_count(self, name: str = "row_nr", offset: int = 0) -> Self:
"""
Add a column at index 0 that counts the rows.
Expand All @@ -4587,7 +4630,7 @@ def with_row_count(self, name: str = "row_nr", offset: int = 0) -> Self:
... "b": [2, 4, 6],
... }
... )
>>> lf.with_row_count().collect()
>>> lf.with_row_number().collect()
shape: (3, 3)
┌────────┬─────┬─────┐
│ row_nr ┆ a ┆ b │
Expand All @@ -4599,7 +4642,7 @@ def with_row_count(self, name: str = "row_nr", offset: int = 0) -> Self:
│ 2 ┆ 5 ┆ 6 │
└────────┴─────┴─────┘
"""
return self._from_pyldf(self._ldf.with_row_count(name, offset))
return self.with_row_number(name, offset)

def gather_every(self, n: int, offset: int = 0) -> Self:
"""
Expand Down Expand Up @@ -5784,8 +5827,8 @@ def update(
# no keys provided--use row count
row_count_used = True
row_count_name = "__POLARS_ROW_COUNT"
self = self.with_row_count(row_count_name)
other = other.with_row_count(row_count_name)
self = self.with_row_number(row_count_name)
other = other.with_row_number(row_count_name)
left_on = right_on = [row_count_name]
else:
# one of left or right is missing, raise error
Expand Down
2 changes: 1 addition & 1 deletion py-polars/src/dataframe.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1136,7 +1136,7 @@ impl PyDataFrame {
}
}

pub fn with_row_count(&self, name: &str, offset: Option<IdxSize>) -> PyResult<Self> {
pub fn with_row_number(&self, name: &str, offset: Option<IdxSize>) -> PyResult<Self> {
let df = self
.df
.with_row_count(name, offset)
Expand Down
2 changes: 1 addition & 1 deletion py-polars/src/lazyframe/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -986,7 +986,7 @@ impl PyLazyFrame {
ldf.melt(args).into()
}

fn with_row_count(&self, name: &str, offset: Option<IdxSize>) -> Self {
fn with_row_number(&self, name: &str, offset: Option<IdxSize>) -> Self {
let ldf = self.ldf.clone();
ldf.with_row_count(name, offset).into()
}
Expand Down