Skip to content

Commit c8ec245

Browse files
feat: add nunique method to Series/DataFrameGroupby (#256)
Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [ ] Make sure to open an issue as a [bug/issue](https://togithub.com/googleapis/python-bigquery-dataframes/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [ ] Ensure the tests and linter pass - [ ] Code coverage does not decrease (if any source code was changed) - [ ] Appropriate docs were updated (if necessary) Fixes #<issue_number_goes_here> 🦕
1 parent 89a1c67 commit c8ec245

File tree

3 files changed

+26
-0
lines changed

3 files changed

+26
-0
lines changed

bigframes/core/groupby/__init__.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -179,6 +179,9 @@ def any(self) -> df.DataFrame:
179179
def count(self) -> df.DataFrame:
180180
return self._aggregate_all(agg_ops.count_op)
181181

182+
def nunique(self) -> df.DataFrame:
183+
return self._aggregate_all(agg_ops.nunique_op)
184+
182185
def cumsum(self, *args, numeric_only: bool = False, **kwargs) -> df.DataFrame:
183186
if not numeric_only:
184187
self._raise_on_non_numeric("cumsum")
@@ -442,6 +445,9 @@ def max(self, *args) -> series.Series:
442445
def count(self) -> series.Series:
443446
return self._aggregate(agg_ops.count_op)
444447

448+
def nunique(self) -> series.Series:
449+
return self._aggregate(agg_ops.nunique_op)
450+
445451
def sum(self, *args) -> series.Series:
446452
return self._aggregate(agg_ops.sum_op)
447453

tests/system/small/test_groupby.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,11 +69,13 @@ def test_dataframe_groupby_median(scalars_df_index, scalars_pandas_df_index):
6969
("operator"),
7070
[
7171
(lambda x: x.count()),
72+
(lambda x: x.nunique()),
7273
(lambda x: x.any()),
7374
(lambda x: x.all()),
7475
],
7576
ids=[
7677
"count",
78+
"nunique",
7779
"any",
7880
"all",
7981
],

third_party/bigframes_vendored/pandas/core/groupby/__init__.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -363,6 +363,15 @@ def agg(self, func):
363363
"""
364364
raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
365365

366+
def nunique(self):
367+
"""
368+
Return number of unique elements in the group.
369+
370+
Returns:
371+
Series: Number of unique values within each group.
372+
"""
373+
raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
374+
366375

367376
class DataFrameGroupBy(GroupBy):
368377
def agg(self, func, **kwargs):
@@ -391,3 +400,12 @@ def agg(self, func, **kwargs):
391400
DataFrame
392401
"""
393402
raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
403+
404+
def nunique(self):
405+
"""
406+
Return DataFrame with counts of unique elements in each position.
407+
408+
Returns:
409+
DataFrame
410+
"""
411+
raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)

0 commit comments

Comments
 (0)