Skip to content

Commit

Permalink
REFACTOR-modin-project#3885: Move PandasDataframePartitionManager.con…
Browse files Browse the repository at this point in the history
…catenate to a utils (modin-project#3892)

Signed-off-by: Naren Krishna <naren@ponder.io>
  • Loading branch information
naren-ponder authored Jan 11, 2022
1 parent 3c740db commit fea2064
Show file tree
Hide file tree
Showing 2 changed files with 46 additions and 28 deletions.
30 changes: 2 additions & 28 deletions modin/core/dataframe/pandas/partitioning/partition_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,9 @@

from modin.error_message import ErrorMessage
from modin.core.storage_formats.pandas.utils import compute_chunksize
from modin.core.dataframe.pandas.utils import concatenate
from modin.config import NPartitions, ProgressBar, BenchmarkMode

from pandas.api.types import union_categoricals
import os


Expand Down Expand Up @@ -600,32 +600,6 @@ def concat(cls, axis, left_parts, right_parts):
else:
return np.append(left_parts, right_parts, axis=axis)

@classmethod
def concatenate(cls, dfs):
"""
Concatenate pandas DataFrames with saving 'category' dtype.
Parameters
----------
dfs : list
List of pandas DataFrames to concatenate.
Returns
-------
pandas.DataFrame
A pandas DataFrame
"""
categoricals_columns = set.intersection(
*[set(df.select_dtypes("category").columns.tolist()) for df in dfs]
)

for col in categoricals_columns:
uc = union_categoricals([df[col] for df in dfs])
for df in dfs:
df[col] = pandas.Categorical(df[col], categories=uc.categories)

return pandas.concat(dfs)

@classmethod
def to_pandas(cls, partitions):
"""
Expand Down Expand Up @@ -662,7 +636,7 @@ def to_pandas(cls, partitions):
if len(df_rows) == 0:
return pandas.DataFrame()
else:
return cls.concatenate(df_rows)
return concatenate(df_rows)

@classmethod
def to_numpy(cls, partitions, **kwargs):
Expand Down
44 changes: 44 additions & 0 deletions modin/core/dataframe/pandas/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership. The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.


"""Collection of utility functions for the PandasDataFrame."""

import pandas
from pandas.api.types import union_categoricals


def concatenate(dfs):
"""
Concatenate pandas DataFrames with saving 'category' dtype.
Parameters
----------
dfs : list
List of pandas DataFrames to concatenate.
Returns
-------
pandas.DataFrame
A pandas DataFrame.
"""
categoricals_columns = set.intersection(
*[set(df.select_dtypes("category").columns.tolist()) for df in dfs]
)

for col in categoricals_columns:
uc = union_categoricals([df[col] for df in dfs])
for df in dfs:
df[col] = pandas.Categorical(df[col], categories=uc.categories)

return pandas.concat(dfs)

0 comments on commit fea2064

Please sign in to comment.