Skip to content

Commit

Permalink
fix cases with differently labeled frames
Browse files Browse the repository at this point in the history
Signed-off-by: Dmitry Chigarev <dmitry.chigarev@intel.com>
  • Loading branch information
dchigarev committed Oct 3, 2023
1 parent b3a7935 commit 6563dcb
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 13 deletions.
9 changes: 7 additions & 2 deletions modin/pandas/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -2574,6 +2574,11 @@ def __setitem__(self, key, value):
# importing here to avoid circular import
from .general import concat

if not value.columns.equals(pandas.Index(key)):
# we only need to change the labels, so shallow copy here
value = value.copy(deep=False)
value.columns = key

# here we iterate over every column in the 'self' frame, then check if it's in the 'key'
# and so has to be taken from either from the 'value' or from the 'self'. After that,
# we concatenate those mixed column chunks and get a dataframe with updated columns
Expand All @@ -2587,14 +2592,14 @@ def __setitem__(self, key, value):
for col in self.columns:
if (col in key) != is_col_in_key:
if len(to_take):
to_concat.append(src_obj.loc[:, to_take])
to_concat.append(src_obj[to_take])
to_take = [col]
is_col_in_key ^= 1
src_obj = value if is_col_in_key else self
else:
to_take.append(col)
if len(to_take):
to_concat.append(src_obj.loc[:, to_take])
to_concat.append(src_obj[to_take])

new_qc = concat(to_concat, axis=1)._query_compiler
else:
Expand Down
40 changes: 29 additions & 11 deletions modin/pandas/test/dataframe/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -2388,23 +2388,41 @@ def build_value_picker(modin_value, pandas_value):
)


def test_setitem_2d_update():
def test(df, iloc):
@pytest.mark.parametrize("does_value_have_different_columns", [True, False])
def test_setitem_2d_update(does_value_have_different_columns):
def test(dfs, iloc):
"""Update columns on the given numeric indices."""
cols = df.columns[iloc].tolist()
df[cols] = df[cols] + 10
return df
df1, df2 = dfs
cols1 = df1.columns[iloc].tolist()
cols2 = df2.columns[iloc].tolist()
df1[cols1] = df2[cols2]
return df1

modin_df, pandas_df = create_test_dfs(test_data["int_data"])
eval_general(modin_df, pandas_df, test, iloc=[0, 1, 2])
eval_general(modin_df, pandas_df, test, iloc=[0, -1])
eval_general(modin_df, pandas_df, test, iloc=slice(1, None)) # (start=1, stop=None)
modin_df2, pandas_df2 = create_test_dfs(test_data["int_data"])
modin_df2 *= 10
pandas_df2 *= 10

if does_value_have_different_columns:
new_columns = [f"{col}_new" for col in modin_df.columns]
modin_df2.columns = new_columns
pandas_df2.columns = new_columns

modin_dfs = (modin_df, modin_df2)
pandas_dfs = (pandas_df, pandas_df2)

eval_general(modin_dfs, pandas_dfs, test, iloc=[0, 1, 2])
eval_general(modin_dfs, pandas_dfs, test, iloc=[0, -1])
eval_general(
modin_dfs, pandas_dfs, test, iloc=slice(1, None)
) # (start=1, stop=None)
eval_general(
modin_df, pandas_df, test, iloc=slice(None, -2)
modin_dfs, pandas_dfs, test, iloc=slice(None, -2)
) # (start=None, stop=-2)
eval_general(modin_df, pandas_df, test, iloc=[0, 1, 5, 6, 9, 10, -2, -1])
eval_general(modin_dfs, pandas_dfs, test, iloc=[0, 1, 5, 6, 9, 10, -2, -1])
eval_general(modin_dfs, pandas_dfs, test, iloc=[5, 4, 0, 10, 1, -1])
eval_general(
modin_df, pandas_df, test, iloc=slice(None, None, 2)
modin_dfs, pandas_dfs, test, iloc=slice(None, None, 2)
) # (start=None, stop=None, step=2)


Expand Down

0 comments on commit 6563dcb

Please sign in to comment.