Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion modin/backends/pandas/query_compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@

from modin.backends.base.query_compiler import BaseQueryCompiler
from modin.error_message import ErrorMessage
from modin.pandas.utils import try_cast_to_pandas, wrap_udf_function
from modin.utils import try_cast_to_pandas, wrap_udf_function
from modin.data_management.functions import (
FoldFunction,
MapFunction,
Expand Down
2 changes: 1 addition & 1 deletion modin/data_management/functions/groupby_function.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
import pandas

from .mapreducefunction import MapReduceFunction
from modin.pandas.utils import try_cast_to_pandas
from modin.utils import try_cast_to_pandas


class GroupbyReduceFunction(MapReduceFunction):
Expand Down
3 changes: 2 additions & 1 deletion modin/pandas/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,9 @@
import warnings
import pickle as pkl

from modin.utils import try_cast_to_pandas
from modin.error_message import ErrorMessage
from modin.pandas.utils import try_cast_to_pandas, is_scalar
from modin.pandas.utils import is_scalar

# Similar to pandas, sentinel value to use as kwarg in place of None when None has
# special meaning and needs to be distinguished from a user explicitly passing None.
Expand Down
4 changes: 1 addition & 3 deletions modin/pandas/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,12 +33,10 @@
import warnings

from modin.error_message import ErrorMessage
from modin.utils import _inherit_docstrings, to_pandas, hashable
from .utils import (
from_pandas,
from_non_pandas,
to_pandas,
_inherit_docstrings,
hashable,
)
from .iterator import PartitionIterator
from .series import Series
Expand Down
2 changes: 1 addition & 1 deletion modin/pandas/general.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
from .base import BasePandasDataset
from .dataframe import DataFrame
from .series import Series
from .utils import to_pandas
from modin.utils import to_pandas


def isna(obj):
Expand Down
3 changes: 1 addition & 2 deletions modin/pandas/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,7 @@
import pandas.core.common as com

from modin.error_message import ErrorMessage

from .utils import _inherit_docstrings, wrap_udf_function, try_cast_to_pandas
from modin.utils import _inherit_docstrings, wrap_udf_function, try_cast_to_pandas
from .series import Series


Expand Down
4 changes: 2 additions & 2 deletions modin/pandas/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -485,7 +485,7 @@ def return_handler(*args, **kwargs):
A Modin DataFrame in place of a pandas DataFrame, or the same
return type as pandas.ExcelFile.
"""
from .utils import to_pandas
from modin.utils import to_pandas

# We don't want to constantly be giving this error message for
# internal methods.
Expand Down Expand Up @@ -554,7 +554,7 @@ def return_handler(*args, **kwargs):
A Modin DataFrame in place of a pandas DataFrame, or the same
return type as pandas.HDFStore.
"""
from .utils import to_pandas
from modin.utils import to_pandas

# We don't want to constantly be giving this error message for
# internal methods.
Expand Down
2 changes: 1 addition & 1 deletion modin/pandas/plotting.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@

from pandas import plotting as pdplot

from .utils import to_pandas
from modin.utils import to_pandas
from .dataframe import DataFrame


Expand Down
2 changes: 1 addition & 1 deletion modin/pandas/reshape.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@

from .dataframe import DataFrame
from .series import Series
from .utils import to_pandas
from modin.utils import to_pandas
from modin.error_message import ErrorMessage


Expand Down
4 changes: 2 additions & 2 deletions modin/pandas/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,10 @@
import sys
import warnings

from modin.utils import _inherit_docstrings, to_pandas
from .base import BasePandasDataset
from .iterator import PartitionIterator
from .utils import _inherit_docstrings
from .utils import from_pandas, to_pandas, is_scalar
from .utils import from_pandas, is_scalar

if sys.version_info[0] == 3 and sys.version_info[1] >= 7:
# Python >= 3.7
Expand Down
2 changes: 1 addition & 1 deletion modin/pandas/test/dataframe/test_default.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
import os
import matplotlib
import modin.pandas as pd
from modin.pandas.utils import to_pandas
from modin.utils import to_pandas
from numpy.testing import assert_array_equal
import io

Expand Down
2 changes: 1 addition & 1 deletion modin/pandas/test/dataframe/test_join_sort.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
import pandas
import matplotlib
import modin.pandas as pd
from modin.pandas.utils import to_pandas
from modin.utils import to_pandas

from modin.pandas.test.utils import (
random_state,
Expand Down
3 changes: 2 additions & 1 deletion modin/pandas/test/test_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@
import pandas
import numpy as np
import modin.pandas as pd
from modin.pandas.utils import from_pandas, to_pandas, try_cast_to_pandas
from modin.utils import try_cast_to_pandas, to_pandas
from modin.pandas.utils import from_pandas
from .utils import (
df_equals,
check_df_columns_have_nans,
Expand Down
3 changes: 2 additions & 1 deletion modin/pandas/test/test_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@
import pandas
from pandas.errors import ParserWarning
from collections import OrderedDict
from modin.pandas.utils import to_pandas, from_arrow
from modin.utils import to_pandas
from modin.pandas.utils import from_arrow
from pathlib import Path
import pyarrow as pa
import pyarrow.parquet as pq
Expand Down
2 changes: 1 addition & 1 deletion modin/pandas/test/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
from numpy.testing import assert_array_equal
import sys

from modin.pandas.utils import to_pandas
from modin.utils import to_pandas
from .utils import (
random_state,
RAND_LOW,
Expand Down
2 changes: 1 addition & 1 deletion modin/pandas/test/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
assert_categorical_equal,
)
import modin.pandas as pd
from modin.pandas.utils import to_pandas
from modin.utils import to_pandas
from io import BytesIO

random_state = np.random.RandomState(seed=42)
Expand Down
101 changes: 0 additions & 101 deletions modin/pandas/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,6 @@
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

import pandas


def from_non_pandas(df, index, columns, dtype):
from modin.data_management.dispatcher import EngineDispatcher
Expand Down Expand Up @@ -58,105 +56,6 @@ def from_arrow(at):
return DataFrame(query_compiler=EngineDispatcher.from_arrow(at))


def to_pandas(modin_obj):
"""Converts a Modin DataFrame/Series to a pandas DataFrame/Series.

Args:
obj {modin.DataFrame, modin.Series}: The Modin DataFrame/Series to convert.

Returns:
A new pandas DataFrame or Series.
"""
return modin_obj._to_pandas()


def _inherit_docstrings(parent, excluded=[]):
"""Creates a decorator which overwrites a decorated class' __doc__
attribute with parent's __doc__ attribute. Also overwrites __doc__ of
methods and properties defined in the class with the __doc__ of matching
methods and properties in parent.

Args:
parent (object): Class from which the decorated class inherits __doc__.
excluded (list): List of parent objects from which the class does not
inherit docstrings.

Returns:
function: decorator which replaces the decorated class' documentation
parent's documentation.
"""

def decorator(cls):
if parent not in excluded:
cls.__doc__ = parent.__doc__
for attr, obj in cls.__dict__.items():
parent_obj = getattr(parent, attr, None)
if parent_obj in excluded or (
not callable(parent_obj) and not isinstance(parent_obj, property)
):
continue
if callable(obj):
obj.__doc__ = parent_obj.__doc__
elif isinstance(obj, property) and obj.fget is not None:
p = property(obj.fget, obj.fset, obj.fdel, parent_obj.__doc__)
setattr(cls, attr, p)
return cls

return decorator


def try_cast_to_pandas(obj):
"""
Converts obj and all nested objects from modin to pandas if it is possible,
otherwise returns obj

Parameters
----------
obj : object,
object to convert from modin to pandas

Returns
-------
Converted object
"""
if hasattr(obj, "_to_pandas"):
return obj._to_pandas()
if isinstance(obj, (list, tuple)):
return type(obj)([try_cast_to_pandas(o) for o in obj])
if isinstance(obj, dict):
return {k: try_cast_to_pandas(v) for k, v in obj.items()}
if callable(obj):
module_hierarchy = getattr(obj, "__module__", "").split(".")
fn_name = getattr(obj, "__name__", None)
if fn_name and module_hierarchy[0] == "modin":
return (
getattr(pandas.DataFrame, fn_name, obj)
if module_hierarchy[-1] == "dataframe"
else getattr(pandas.Series, fn_name, obj)
)
return obj


def wrap_udf_function(func):
def wrapper(*args, **kwargs):
result = func(*args, **kwargs)
# if user accidently returns modin DataFrame or Series
# casting it back to pandas to properly process
return try_cast_to_pandas(result)

wrapper.__name__ = func.__name__
return wrapper


def hashable(obj):
"""Return whether the object is hashable."""
try:
hash(obj)
except TypeError:
return False
return True


def is_scalar(obj):
"""
Return True if given object is scalar.
Expand Down
113 changes: 113 additions & 0 deletions modin/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership. The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

import pandas


def _inherit_docstrings(parent, excluded=[]):
"""Creates a decorator which overwrites a decorated class' __doc__
attribute with parent's __doc__ attribute. Also overwrites __doc__ of
methods and properties defined in the class with the __doc__ of matching
methods and properties in parent.

Args:
parent (object): Class from which the decorated class inherits __doc__.
excluded (list): List of parent objects from which the class does not
inherit docstrings.

Returns:
function: decorator which replaces the decorated class' documentation
parent's documentation.
"""

def decorator(cls):
if parent not in excluded:
cls.__doc__ = parent.__doc__
for attr, obj in cls.__dict__.items():
parent_obj = getattr(parent, attr, None)
if parent_obj in excluded or (
not callable(parent_obj) and not isinstance(parent_obj, property)
):
continue
if callable(obj):
obj.__doc__ = parent_obj.__doc__
elif isinstance(obj, property) and obj.fget is not None:
p = property(obj.fget, obj.fset, obj.fdel, parent_obj.__doc__)
setattr(cls, attr, p)
return cls

return decorator


def to_pandas(modin_obj):
"""Converts a Modin DataFrame/Series to a pandas DataFrame/Series.

Args:
obj {modin.DataFrame, modin.Series}: The Modin DataFrame/Series to convert.

Returns:
A new pandas DataFrame or Series.
"""
return modin_obj._to_pandas()


def hashable(obj):
"""Return whether the object is hashable."""
try:
hash(obj)
except TypeError:
return False
return True


def try_cast_to_pandas(obj):
"""
Converts obj and all nested objects from modin to pandas if it is possible,
otherwise returns obj

Parameters
----------
obj : object,
object to convert from modin to pandas

Returns
-------
Converted object
"""
if hasattr(obj, "_to_pandas"):
return obj._to_pandas()
if isinstance(obj, (list, tuple)):
return type(obj)([try_cast_to_pandas(o) for o in obj])
if isinstance(obj, dict):
return {k: try_cast_to_pandas(v) for k, v in obj.items()}
if callable(obj):
module_hierarchy = getattr(obj, "__module__", "").split(".")
fn_name = getattr(obj, "__name__", None)
if fn_name and module_hierarchy[0] == "modin":
return (
getattr(pandas.DataFrame, fn_name, obj)
if module_hierarchy[-1] == "dataframe"
else getattr(pandas.Series, fn_name, obj)
)
return obj


def wrap_udf_function(func):
def wrapper(*args, **kwargs):
result = func(*args, **kwargs)
# if user accidently returns modin DataFrame or Series
# casting it back to pandas to properly process
return try_cast_to_pandas(result)

wrapper.__name__ = func.__name__
return wrapper