Skip to content

Commit

Permalink
PERF-modin-project#6362: Implement 2D setitem without to-pandas conve…
Browse files Browse the repository at this point in the history
…rsion

Signed-off-by: Dmitry Chigarev <dmitry.chigarev@intel.com>
  • Loading branch information
dchigarev committed Oct 2, 2023
1 parent ebe23d7 commit 9bd8c79
Show file tree
Hide file tree
Showing 3 changed files with 55 additions and 6 deletions.
40 changes: 34 additions & 6 deletions modin/pandas/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -2570,12 +2570,40 @@ def __setitem__(self, key, value):
value = np.array(value)
if len(key) != value.shape[-1]:
raise ValueError("Columns must be same length as key")
new_qc = self._query_compiler.write_items(
slice(None),
self.columns.get_indexer_for(key),
value,
need_columns_reindex=False,
)
if isinstance(value, type(self)):
# importing here to avoid circular import
from .general import concat

# here we iterate over every column in the 'self' frame, then check if it's in the 'key'
# and so has to be taken from either from the 'value' or from the 'self'. After that,
# we concatenate those mixed column chunks and get a dataframe with updated columns
to_concat = []
# columns to take for this chunk
to_take = []
# whether columns in this chunk are in the 'key' and has to be taken from the 'value'
is_col_in_key = False
# an object to take columns from for this chunk
src_obj = self
for col in self.columns:
if (col in key) != is_col_in_key:
if len(to_take):
to_concat.append(src_obj.loc[:, to_take])
to_take = [col]
is_col_in_key ^= 1
src_obj = value if is_col_in_key else self
else:
to_take.append(col)
if len(to_take):
to_concat.append(src_obj.loc[:, to_take])

new_qc = concat(to_concat, axis=1)._query_compiler
else:
new_qc = self._query_compiler.write_items(
slice(None),
self.columns.get_indexer_for(key),
value,
need_columns_reindex=False,
)
self._update_inplace(new_qc)
# self.loc[:, key] = value
return
Expand Down
20 changes: 20 additions & 0 deletions modin/pandas/test/dataframe/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -2388,6 +2388,26 @@ def build_value_picker(modin_value, pandas_value):
)


def test_setitem_2d_update():
def test(df, iloc):
cols = df.columns[iloc].tolist()

df[cols] = df[cols] + 10
return df

modin_df, pandas_df = create_test_dfs(test_data["int_data"])
eval_general(modin_df, pandas_df, test, iloc=[0, 1, 2])
eval_general(modin_df, pandas_df, test, iloc=[0, -1])
eval_general(modin_df, pandas_df, test, iloc=slice(1, None)) # (start=1, stop=None)
eval_general(
modin_df, pandas_df, test, iloc=slice(None, -2)
) # (start=None, stop=-2)
eval_general(modin_df, pandas_df, test, iloc=[0, 1, 5, 6, 9, 10, -2, -1])
eval_general(
modin_df, pandas_df, test, iloc=slice(None, None, 2)
) # (start=None, stop=None, step=2)


def test___setitem__single_item_in_series():
# Test assigning a single item in a Series for issue
# https://github.com/modin-project/modin/issues/3860
Expand Down
1 change: 1 addition & 0 deletions modin/pandas/test/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -926,6 +926,7 @@ def execute_callable(fn, inplace=False, md_kwargs={}, pd_kwargs={}):
values = execute_callable(
operation, md_kwargs=md_kwargs, pd_kwargs=pd_kwargs, inplace=__inplace__
)
breakpoint()
if values is not None:
comparator(*values, **(comparator_kwargs or {}))

Expand Down

0 comments on commit 9bd8c79

Please sign in to comment.