Skip to content

Commit 740c451

Browse files
milkshakeiiiHenry J Solbergtswast
authored
feat: Allow df.drop to take an index object (#68)
* feat: Allow passing index objects to df.drop * remove notebook files * add first implementation for df.drop(index) * use index_columns property * don't use _expr.keys() * fix order bug and add test * fix index names * support multiindex * remove accidentally added files * add type hint * remove debug print statements * fix: fix df/series.iloc by list with multiindex * complete merge * remove unneeded isinstance * refactor _drop_by_index --------- Co-authored-by: Henry J Solberg <henryjsolberg@google.com> Co-authored-by: Tim Swast <swast@google.com>
1 parent c0efec8 commit 740c451

File tree

2 files changed

+80
-2
lines changed

2 files changed

+80
-2
lines changed

bigframes/dataframe.py

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -937,7 +937,7 @@ def drop(
937937
columns = labels
938938

939939
block = self._block
940-
if index:
940+
if index is not None:
941941
level_id = self._resolve_levels(level or 0)[0]
942942

943943
if utils.is_list_like(index):
@@ -947,6 +947,8 @@ def drop(
947947
block, condition_id = block.apply_unary_op(
948948
inverse_condition_id, ops.invert_op
949949
)
950+
elif isinstance(index, indexes.Index):
951+
return self._drop_by_index(index)
950952
else:
951953
block, condition_id = block.apply_unary_op(
952954
level_id, ops.partial_right(ops.ne_op, index)
@@ -956,10 +958,31 @@ def drop(
956958
)
957959
if columns:
958960
block = block.drop_columns(self._sql_names(columns))
959-
if not index and not columns:
961+
if index is None and not columns:
960962
raise ValueError("Must specify 'labels' or 'index'/'columns")
961963
return DataFrame(block)
962964

965+
def _drop_by_index(self, index: indexes.Index) -> DataFrame:
966+
block = index._data._get_block()
967+
block, ordering_col = block.promote_offsets()
968+
joined_index, (get_column_left, get_column_right) = self._block.index.join(
969+
block.index
970+
)
971+
972+
new_ordering_col = get_column_right(ordering_col)
973+
drop_block = joined_index._block
974+
drop_block, drop_col = drop_block.apply_unary_op(
975+
new_ordering_col,
976+
ops.isnull_op,
977+
)
978+
979+
drop_block = drop_block.filter(drop_col)
980+
original_columns = [
981+
get_column_left(column) for column in self._block.value_columns
982+
]
983+
drop_block = drop_block.select_columns(original_columns)
984+
return DataFrame(drop_block)
985+
963986
def droplevel(self, level: LevelsType, axis: int | str = 0):
964987
axis_n = utils.get_axis_number(axis)
965988
if axis_n == 0:

tests/system/small/test_dataframe.py

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -258,6 +258,61 @@ def test_drop_index(scalars_dfs):
258258
pd.testing.assert_frame_equal(pd_result, bf_result)
259259

260260

261+
def test_drop_pandas_index(scalars_dfs):
262+
scalars_df, scalars_pandas_df = scalars_dfs
263+
drop_index = scalars_pandas_df.iloc[[4, 1, 2]].index
264+
265+
pd_result = scalars_pandas_df.drop(index=drop_index)
266+
bf_result = scalars_df.drop(index=drop_index).to_pandas()
267+
268+
pd.testing.assert_frame_equal(pd_result, bf_result)
269+
270+
271+
def test_drop_bigframes_index(scalars_dfs):
272+
scalars_df, scalars_pandas_df = scalars_dfs
273+
drop_index = scalars_df.loc[[4, 1, 2]].index
274+
drop_pandas_index = scalars_pandas_df.loc[[4, 1, 2]].index
275+
276+
pd_result = scalars_pandas_df.drop(index=drop_pandas_index)
277+
bf_result = scalars_df.drop(index=drop_index).to_pandas()
278+
279+
pd.testing.assert_frame_equal(pd_result, bf_result)
280+
281+
282+
def test_drop_bigframes_index_with_na(scalars_dfs):
283+
scalars_df, scalars_pandas_df = scalars_dfs
284+
scalars_df = scalars_df.copy()
285+
scalars_pandas_df = scalars_pandas_df.copy()
286+
scalars_df = scalars_df.set_index("bytes_col")
287+
scalars_pandas_df = scalars_pandas_df.set_index("bytes_col")
288+
drop_index = scalars_df.iloc[[3, 5]].index
289+
drop_pandas_index = scalars_pandas_df.iloc[[3, 5]].index
290+
291+
pd_result = scalars_pandas_df.drop(index=drop_pandas_index) # drop_pandas_index)
292+
bf_result = scalars_df.drop(index=drop_index).to_pandas()
293+
294+
pd.testing.assert_frame_equal(pd_result, bf_result)
295+
296+
297+
def test_drop_bigframes_multiindex(scalars_dfs):
298+
scalars_df, scalars_pandas_df = scalars_dfs
299+
scalars_df = scalars_df.copy()
300+
scalars_pandas_df = scalars_pandas_df.copy()
301+
sub_df = scalars_df.iloc[[4, 1, 2]]
302+
sub_pandas_df = scalars_pandas_df.iloc[[4, 1, 2]]
303+
sub_df = sub_df.set_index(["bytes_col", "numeric_col"])
304+
sub_pandas_df = sub_pandas_df.set_index(["bytes_col", "numeric_col"])
305+
drop_index = sub_df.index
306+
drop_pandas_index = sub_pandas_df.index
307+
308+
scalars_df = scalars_df.set_index(["bytes_col", "numeric_col"])
309+
scalars_pandas_df = scalars_pandas_df.set_index(["bytes_col", "numeric_col"])
310+
bf_result = scalars_df.drop(index=drop_index).to_pandas()
311+
pd_result = scalars_pandas_df.drop(index=drop_pandas_index)
312+
313+
pd.testing.assert_frame_equal(pd_result, bf_result)
314+
315+
261316
def test_drop_labels_axis_0(scalars_dfs):
262317
scalars_df, scalars_pandas_df = scalars_dfs
263318

0 commit comments

Comments
 (0)