Skip to content

feat: iloc multiple columns selection. #1437

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Feb 28, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 23 additions & 17 deletions bigframes/core/indexers.py
Original file line number Diff line number Diff line change
Expand Up @@ -425,7 +425,7 @@ def _iloc_getitem_series_or_dataframe(
@typing.overload
def _iloc_getitem_series_or_dataframe(
series_or_dataframe: bigframes.dataframe.DataFrame, key
) -> Union[bigframes.dataframe.DataFrame, pd.Series]:
) -> Union[bigframes.dataframe.DataFrame, pd.Series, bigframes.core.scalar.Scalar]:
...


Expand All @@ -447,18 +447,29 @@ def _iloc_getitem_series_or_dataframe(
return result_pd_df.iloc[0]
elif isinstance(key, slice):
return series_or_dataframe._slice(key.start, key.stop, key.step)
elif isinstance(key, tuple) and len(key) == 0:
return series_or_dataframe
elif isinstance(key, tuple) and len(key) == 1:
return _iloc_getitem_series_or_dataframe(series_or_dataframe, key[0])
elif (
isinstance(key, tuple)
and isinstance(series_or_dataframe, bigframes.dataframe.DataFrame)
and len(key) == 2
):
return series_or_dataframe.iat[key]
elif isinstance(key, tuple):
raise pd.errors.IndexingError("Too many indexers")
if len(key) > 2 or (
len(key) == 2 and isinstance(series_or_dataframe, bigframes.series.Series)
):
raise pd.errors.IndexingError("Too many indexers")

if len(key) == 0:
return series_or_dataframe

if len(key) == 1:
return _iloc_getitem_series_or_dataframe(series_or_dataframe, key[0])

# len(key) == 2
if isinstance(key[1], int):
return series_or_dataframe.iat[key]
elif isinstance(key[1], list):
columns = series_or_dataframe.columns[key[1]]
return _iloc_getitem_series_or_dataframe(
series_or_dataframe[columns], key[0]
)
raise NotImplementedError(
f"iloc does not yet support indexing with {key}. {constants.FEEDBACK_LINK}"
)
elif pd.api.types.is_list_like(key):
if len(key) == 0:
return typing.cast(
Expand Down Expand Up @@ -491,11 +502,6 @@ def _iloc_getitem_series_or_dataframe(
result = result.rename(original_series_name)

return result

elif isinstance(key, tuple):
raise NotImplementedError(
f"iloc does not yet support indexing with a (row, column) tuple. {constants.FEEDBACK_LINK}"
)
elif callable(key):
raise NotImplementedError(
f"iloc does not yet support indexing with a callable. {constants.FEEDBACK_LINK}"
Expand Down
18 changes: 18 additions & 0 deletions tests/system/small/test_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -3414,6 +3414,24 @@ def test_iloc_tuple(scalars_df_index, scalars_pandas_df_index, index):
assert bf_result == pd_result


@pytest.mark.parametrize(
"index",
[(slice(None), [1, 2, 3]), (slice(1, 7, 2), [2, 5, 3])],
)
def test_iloc_tuple_multi_columns(scalars_df_index, scalars_pandas_df_index, index):
bf_result = scalars_df_index.iloc[index].to_pandas()
pd_result = scalars_pandas_df_index.iloc[index]

pd.testing.assert_frame_equal(bf_result, pd_result)


def test_iloc_tuple_multi_columns_single_row(scalars_df_index, scalars_pandas_df_index):
index = (2, [2, 1, 3, -4])
bf_result = scalars_df_index.iloc[index]
pd_result = scalars_pandas_df_index.iloc[index]
pd.testing.assert_series_equal(bf_result, pd_result)


@pytest.mark.parametrize(
("index", "error"),
[
Expand Down