fix cases with differently labeled frames

Signed-off-by: Dmitry Chigarev <dmitry.chigarev@intel.com>
dchigarev · Oct 3, 2023 · 6563dcb · 6563dcb
1 parent b3a7935
commit 6563dcb
Show file tree

Hide file tree

Showing 2 changed files with 36 additions and 13 deletions.
diff --git a/modin/pandas/dataframe.py b/modin/pandas/dataframe.py
@@ -2574,6 +2574,11 @@ def __setitem__(self, key, value):
                     # importing here to avoid circular import
                     from .general import concat
 
+                    if not value.columns.equals(pandas.Index(key)):
+                        # we only need to change the labels, so shallow copy here
+                        value = value.copy(deep=False)
+                        value.columns = key
+
                     # here we iterate over every column in the 'self' frame, then check if it's in the 'key'
                     # and so has to be taken from either from the 'value' or from the 'self'. After that,
                     # we concatenate those mixed column chunks and get a dataframe with updated columns
@@ -2587,14 +2592,14 @@ def __setitem__(self, key, value):
                     for col in self.columns:
                         if (col in key) != is_col_in_key:
                             if len(to_take):
-                                to_concat.append(src_obj.loc[:, to_take])
+                                to_concat.append(src_obj[to_take])
                             to_take = [col]
                             is_col_in_key ^= 1
                             src_obj = value if is_col_in_key else self
                         else:
                             to_take.append(col)
                     if len(to_take):
-                        to_concat.append(src_obj.loc[:, to_take])
+                        to_concat.append(src_obj[to_take])
 
                     new_qc = concat(to_concat, axis=1)._query_compiler
                 else:

diff --git a/modin/pandas/test/dataframe/test_indexing.py b/modin/pandas/test/dataframe/test_indexing.py
@@ -2388,23 +2388,41 @@ def build_value_picker(modin_value, pandas_value):
     )
 
 
-def test_setitem_2d_update():
-    def test(df, iloc):
+@pytest.mark.parametrize("does_value_have_different_columns", [True, False])
+def test_setitem_2d_update(does_value_have_different_columns):
+    def test(dfs, iloc):
         """Update columns on the given numeric indices."""
-        cols = df.columns[iloc].tolist()
-        df[cols] = df[cols] + 10
-        return df
+        df1, df2 = dfs
+        cols1 = df1.columns[iloc].tolist()
+        cols2 = df2.columns[iloc].tolist()
+        df1[cols1] = df2[cols2]
+        return df1
 
     modin_df, pandas_df = create_test_dfs(test_data["int_data"])
-    eval_general(modin_df, pandas_df, test, iloc=[0, 1, 2])
-    eval_general(modin_df, pandas_df, test, iloc=[0, -1])
-    eval_general(modin_df, pandas_df, test, iloc=slice(1, None))  # (start=1, stop=None)
+    modin_df2, pandas_df2 = create_test_dfs(test_data["int_data"])
+    modin_df2 *= 10
+    pandas_df2 *= 10
+
+    if does_value_have_different_columns:
+        new_columns = [f"{col}_new" for col in modin_df.columns]
+        modin_df2.columns = new_columns
+        pandas_df2.columns = new_columns
+
+    modin_dfs = (modin_df, modin_df2)
+    pandas_dfs = (pandas_df, pandas_df2)
+
+    eval_general(modin_dfs, pandas_dfs, test, iloc=[0, 1, 2])
+    eval_general(modin_dfs, pandas_dfs, test, iloc=[0, -1])
+    eval_general(
+        modin_dfs, pandas_dfs, test, iloc=slice(1, None)
+    )  # (start=1, stop=None)
     eval_general(
-        modin_df, pandas_df, test, iloc=slice(None, -2)
+        modin_dfs, pandas_dfs, test, iloc=slice(None, -2)
     )  # (start=None, stop=-2)
-    eval_general(modin_df, pandas_df, test, iloc=[0, 1, 5, 6, 9, 10, -2, -1])
+    eval_general(modin_dfs, pandas_dfs, test, iloc=[0, 1, 5, 6, 9, 10, -2, -1])
+    eval_general(modin_dfs, pandas_dfs, test, iloc=[5, 4, 0, 10, 1, -1])
     eval_general(
-        modin_df, pandas_df, test, iloc=slice(None, None, 2)
+        modin_dfs, pandas_dfs, test, iloc=slice(None, None, 2)
     )  # (start=None, stop=None, step=2)