From f9cdfca79e98c127d6c3d4f4ca8d11f2e6da26d7 Mon Sep 17 00:00:00 2001 From: Devin Petersohn Date: Fri, 24 Jul 2020 09:26:57 -0700 Subject: [PATCH] FIX-#1700: Fix metadata for concat and mask when `axis=1` Signed-off-by: Devin Petersohn --- modin/backends/pandas/query_compiler.py | 11 +++++++---- modin/engines/base/frame/data.py | 2 +- modin/pandas/test/test_concat.py | 11 +++++++++++ 3 files changed, 19 insertions(+), 5 deletions(-) diff --git a/modin/backends/pandas/query_compiler.py b/modin/backends/pandas/query_compiler.py index 1e9f170d36d..01e987454b6 100644 --- a/modin/backends/pandas/query_compiler.py +++ b/modin/backends/pandas/query_compiler.py @@ -252,11 +252,14 @@ def concat(self, axis, other, **kwargs): ignore_index = kwargs.get("ignore_index", False) other_modin_frame = [o._modin_frame for o in other] new_modin_frame = self._modin_frame._concat(axis, other_modin_frame, join, sort) + result = self.__constructor__(new_modin_frame) if ignore_index: - new_modin_frame.index = pandas.RangeIndex( - len(self.index) + sum(len(o.index) for o in other) - ) - return self.__constructor__(new_modin_frame) + if axis == 0: + return result.reset_index(drop=True) + else: + result.columns = pandas.RangeIndex(len(result.columns)) + return result + return result # END Append/Concat/Join diff --git a/modin/engines/base/frame/data.py b/modin/engines/base/frame/data.py index 507f6540872..8c931393845 100644 --- a/modin/engines/base/frame/data.py +++ b/modin/engines/base/frame/data.py @@ -489,7 +489,7 @@ def mask( new_col_widths = [len(idx) for _, idx in col_partitions_list.items()] new_columns = self.columns[sorted(col_numeric_idx)] if self._dtypes is not None: - new_dtypes = self.dtypes[sorted(col_numeric_idx)] + new_dtypes = self.dtypes.iloc[sorted(col_numeric_idx)] else: new_dtypes = None else: diff --git a/modin/pandas/test/test_concat.py b/modin/pandas/test/test_concat.py index 5e67427110a..941f45b83fe 100644 --- a/modin/pandas/test/test_concat.py +++ b/modin/pandas/test/test_concat.py @@ -83,6 +83,17 @@ def test_concat_on_column(): pandas.concat([df, df2], axis="columns"), ) + modin_result = pd.concat( + [pd.Series(np.ones(10)), pd.Series(np.ones(10))], axis=1, ignore_index=True + ) + pandas_result = pandas.concat( + [pandas.Series(np.ones(10)), pandas.Series(np.ones(10))], + axis=1, + ignore_index=True, + ) + df_equals(modin_result, pandas_result) + assert modin_result.dtypes.equals(pandas_result.dtypes) + def test_invalid_axis_errors(): df, df2 = generate_dfs()