Skip to content

Commit d3fa6f2

Browse files
fix: fix value_counts column label for normalize=True (#245)
Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [ ] Make sure to open an issue as a [bug/issue](https://togithub.com/googleapis/python-bigquery-dataframes/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [ ] Ensure the tests and linter pass - [ ] Code coverage does not decrease (if any source code was changed) - [ ] Appropriate docs were updated (if necessary) Fixes #<issue_number_goes_here> 🦕
1 parent c8ec245 commit d3fa6f2

File tree

3 files changed

+9
-12
lines changed

3 files changed

+9
-12
lines changed

bigframes/core/block_transforms.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -353,7 +353,9 @@ def value_counts(
353353
)
354354
]
355355
)
356-
return block.select_column(count_id).with_column_labels(["count"])
356+
return block.select_column(count_id).with_column_labels(
357+
["proportion" if normalize else "count"]
358+
)
357359

358360

359361
def pct_change(block: blocks.Block, periods: int = 1) -> blocks.Block:

tests/system/small/test_dataframe.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3453,6 +3453,8 @@ def test_df_to_orc(scalars_df_index, scalars_pandas_df_index):
34533453
],
34543454
)
34553455
def test_df_value_counts(scalars_dfs, subset, normalize, ascending, dropna):
3456+
if pd.__version__.startswith("1."):
3457+
pytest.skip("pandas 1.x produces different column labels.")
34563458
scalars_df, scalars_pandas_df = scalars_dfs
34573459

34583460
bf_result = (
@@ -3464,10 +3466,6 @@ def test_df_value_counts(scalars_dfs, subset, normalize, ascending, dropna):
34643466
subset, normalize=normalize, ascending=ascending, dropna=dropna
34653467
)
34663468

3467-
# Older pandas version may not have these values, bigframes tries to emulate 2.0+
3468-
pd_result.name = "count"
3469-
pd_result.index.names = bf_result.index.names
3470-
34713469
pd.testing.assert_series_equal(
34723470
bf_result, pd_result, check_dtype=False, check_index_type=False
34733471
)

tests/system/small/test_series.py

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1940,23 +1940,23 @@ def test_cummax_int(scalars_df_index, scalars_pandas_df_index):
19401940

19411941

19421942
def test_value_counts(scalars_dfs):
1943+
if pd.__version__.startswith("1."):
1944+
pytest.skip("pandas 1.x produces different column labels.")
19431945
scalars_df, scalars_pandas_df = scalars_dfs
19441946
col_name = "int64_too"
19451947

19461948
bf_result = scalars_df[col_name].value_counts().to_pandas()
19471949
pd_result = scalars_pandas_df[col_name].value_counts()
19481950

1949-
# Older pandas version may not have these values, bigframes tries to emulate 2.0+
1950-
pd_result.name = "count"
1951-
pd_result.index.name = col_name
1952-
19531951
pd.testing.assert_series_equal(
19541952
bf_result,
19551953
pd_result,
19561954
)
19571955

19581956

19591957
def test_value_counts_w_cut(scalars_dfs):
1958+
if pd.__version__.startswith("1."):
1959+
pytest.skip("value_counts results different in pandas 1.x.")
19601960
scalars_df, scalars_pandas_df = scalars_dfs
19611961
col_name = "int64_col"
19621962

@@ -1965,9 +1965,6 @@ def test_value_counts_w_cut(scalars_dfs):
19651965

19661966
bf_result = bf_cut.value_counts().to_pandas()
19671967
pd_result = pd_cut.value_counts()
1968-
# Older pandas version may not have these values, bigframes tries to emulate 2.0+
1969-
pd_result.name = "count"
1970-
pd_result.index.name = col_name
19711968
pd_result.index = pd_result.index.astype(pd.Int64Dtype())
19721969

19731970
pd.testing.assert_series_equal(

0 commit comments

Comments
 (0)