diff --git a/modin/core/dataframe/pandas/dataframe/dataframe.py b/modin/core/dataframe/pandas/dataframe/dataframe.py index 849e411164f..fe88066d7fe 100644 --- a/modin/core/dataframe/pandas/dataframe/dataframe.py +++ b/modin/core/dataframe/pandas/dataframe/dataframe.py @@ -581,9 +581,22 @@ def _set_columns(self, new_columns): if self.has_materialized_columns: # do not set new columns if they're identical to the previous ones if ( - isinstance(new_columns, pandas.Index) - and self.columns.equals(new_columns) - ) or np.array_equal(self.columns.values, new_columns): + # `Index.equals()` doesn't compare metadata, thus we have to compare + # it manually. Here we process the simpliest and the most common case only + # (when index the index is a 'pandas.Index' dtype). Other cases are not that + # common and we can omit them + type(new_columns) in (pandas.Index, pandas.MultiIndex) + and ( + type(new_columns) + is type(self.columns) # noqa; here we need exact types comparison + ) + and new_columns.name == self.columns.name + and new_columns.names == self.columns.names + and new_columns.equals(self.columns) + ) or ( + not isinstance(new_columns, pandas.Index) + and np.array_equal(self.columns.values, new_columns) + ): return new_columns = self._validate_set_axis(new_columns, self._columns_cache) if self.has_materialized_dtypes: @@ -639,7 +652,6 @@ def _compute_axis_labels_and_lengths(self, axis: int, partitions=None): List of int Size of partitions alongside specified `axis`. """ - if partitions is None: partitions = self._partitions new_index, internal_idx = self._partition_mgr_cls.get_indices(axis, partitions) diff --git a/modin/test/storage_formats/pandas/test_internals.py b/modin/test/storage_formats/pandas/test_internals.py index f5af3845ee5..64c3063d082 100644 --- a/modin/test/storage_formats/pandas/test_internals.py +++ b/modin/test/storage_formats/pandas/test_internals.py @@ -1166,6 +1166,27 @@ def test_skip_set_columns(): # Verifies that the new columns weren't set if they're equal to the previous ones assert not df._query_compiler._modin_frame._deferred_column + df = pd.DataFrame({"col1": [1, 2, 3], "col2": [3, 4, 5]}) + df.columns = pandas.Index(["col1", "col2"], name="new name") + # Verifies that the new columns weren't set if they're equal to the previous ones + assert df.columns.name == "new name" + + df = pd.DataFrame( + {("a", "col1"): [1, 2, 3], ("a", "col2"): [3, 4, 5], ("b", "col1"): [6, 7, 8]} + ) + df.columns = df.columns.copy() + # Verifies that the new columns weren't set if they're equal to the previous ones + assert not df._query_compiler._modin_frame._deferred_column + + df = pd.DataFrame( + {("a", "col1"): [1, 2, 3], ("a", "col2"): [3, 4, 5], ("b", "col1"): [6, 7, 8]} + ) + new_cols = df.columns[::-1] + df.columns = new_cols + # Verifies that the new columns were successfully set in case they're actually new + assert df._query_compiler._modin_frame._deferred_column + assert df.columns.equals(new_cols) + df = pd.DataFrame({"col1": [1, 2, 3], "col2": [3, 4, 5]}) remove_axis_cache(df, axis=1) df.columns = ["col1", "col2"]