Skip to content

Commit

Permalink
perf(python): Rechunk group-by __iter__
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Aug 13, 2024
1 parent c8fde41 commit d433f66
Showing 1 changed file with 3 additions and 1 deletion.
4 changes: 3 additions & 1 deletion py-polars/polars/dataframe/group_by.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,13 +93,15 @@ def __iter__(self) -> Self:
│ b ┆ 3 │
└─────┴─────┘
"""
# Every group gather can trigger a rechunk, so do early.
self.df = self.df.rechunk()
temp_col = "__POLARS_GB_GROUP_INDICES"
groups_df = (
self.df.lazy()
.group_by(*self.by, **self.named_by, maintain_order=self.maintain_order)
.agg(F.first().agg_groups().alias(temp_col))
.collect(no_optimization=True)
).rechunk()
)

self._group_names = groups_df.select(F.all().exclude(temp_col)).iter_rows()
self._group_indices = groups_df.select(temp_col).to_series()
Expand Down

0 comments on commit d433f66

Please sign in to comment.