Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

FIX-#2054: Moved non-dependent on modin.DataFrame utils to modin/utils.py #2055

Merged
merged 2 commits into from
Sep 9, 2020
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
FIX-#2054: modin/utils.py created
Signed-off-by: Dmitry Chigarev <dmitry.chigarev@intel.com>
  • Loading branch information
dchigarev committed Sep 9, 2020
commit 8c30b8144e74235690f1608130c80424d8870f85
2 changes: 1 addition & 1 deletion modin/backends/pandas/query_compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@

from modin.backends.base.query_compiler import BaseQueryCompiler
from modin.error_message import ErrorMessage
from modin.pandas.utils import try_cast_to_pandas, wrap_udf_function
from modin.utils import try_cast_to_pandas, wrap_udf_function
from modin.data_management.functions import (
FoldFunction,
MapFunction,
Expand Down
3 changes: 2 additions & 1 deletion modin/pandas/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,9 @@
import warnings
import pickle as pkl

from modin.utils import try_cast_to_pandas
from modin.error_message import ErrorMessage
from modin.pandas.utils import try_cast_to_pandas, is_scalar
from modin.pandas.utils import is_scalar

# Similar to pandas, sentinel value to use as kwarg in place of None when None has
# special meaning and needs to be distinguished from a user explicitly passing None.
Expand Down
2 changes: 1 addition & 1 deletion modin/pandas/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,11 @@
import warnings

from modin.error_message import ErrorMessage
from modin.utils import _inherit_docstrings
from .utils import (
from_pandas,
from_non_pandas,
to_pandas,
_inherit_docstrings,
hashable,
)
from .iterator import PartitionIterator
Expand Down
3 changes: 1 addition & 2 deletions modin/pandas/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,7 @@
import pandas.core.common as com

from modin.error_message import ErrorMessage

from .utils import _inherit_docstrings, wrap_udf_function, try_cast_to_pandas
from modin.utils import _inherit_docstrings, wrap_udf_function, try_cast_to_pandas
from .series import Series


Expand Down
2 changes: 1 addition & 1 deletion modin/pandas/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,9 @@
import sys
import warnings

from modin.utils import _inherit_docstrings
from .base import BasePandasDataset
from .iterator import PartitionIterator
from .utils import _inherit_docstrings
from .utils import from_pandas, to_pandas, is_scalar

if sys.version_info[0] == 3 and sys.version_info[1] >= 7:
Expand Down
3 changes: 2 additions & 1 deletion modin/pandas/test/test_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@
import pandas
import numpy as np
import modin.pandas as pd
from modin.pandas.utils import from_pandas, to_pandas, try_cast_to_pandas
from modin.utils import try_cast_to_pandas
from modin.pandas.utils import from_pandas, to_pandas
dchigarev marked this conversation as resolved.
Show resolved Hide resolved
from .utils import (
df_equals,
check_df_columns_have_nans,
Expand Down
80 changes: 0 additions & 80 deletions modin/pandas/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,6 @@
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

import pandas


def from_non_pandas(df, index, columns, dtype):
from modin.data_management.dispatcher import EngineDispatcher
Expand Down Expand Up @@ -70,84 +68,6 @@ def to_pandas(modin_obj):
return modin_obj._to_pandas()


def _inherit_docstrings(parent, excluded=[]):
"""Creates a decorator which overwrites a decorated class' __doc__
attribute with parent's __doc__ attribute. Also overwrites __doc__ of
methods and properties defined in the class with the __doc__ of matching
methods and properties in parent.

Args:
parent (object): Class from which the decorated class inherits __doc__.
excluded (list): List of parent objects from which the class does not
inherit docstrings.

Returns:
function: decorator which replaces the decorated class' documentation
parent's documentation.
"""

def decorator(cls):
if parent not in excluded:
cls.__doc__ = parent.__doc__
for attr, obj in cls.__dict__.items():
parent_obj = getattr(parent, attr, None)
if parent_obj in excluded or (
not callable(parent_obj) and not isinstance(parent_obj, property)
):
continue
if callable(obj):
obj.__doc__ = parent_obj.__doc__
elif isinstance(obj, property) and obj.fget is not None:
p = property(obj.fget, obj.fset, obj.fdel, parent_obj.__doc__)
setattr(cls, attr, p)
return cls

return decorator


def try_cast_to_pandas(obj):
"""
Converts obj and all nested objects from modin to pandas if it is possible,
otherwise returns obj

Parameters
----------
obj : object,
object to convert from modin to pandas

Returns
-------
Converted object
"""
if hasattr(obj, "_to_pandas"):
return obj._to_pandas()
if isinstance(obj, (list, tuple)):
return type(obj)([try_cast_to_pandas(o) for o in obj])
if isinstance(obj, dict):
return {k: try_cast_to_pandas(v) for k, v in obj.items()}
if callable(obj):
module_hierarchy = getattr(obj, "__module__", "").split(".")
fn_name = getattr(obj, "__name__", None)
if fn_name and module_hierarchy[0] == "modin":
return (
getattr(pandas.DataFrame, fn_name, obj)
if module_hierarchy[-1] == "dataframe"
else getattr(pandas.Series, fn_name, obj)
)
return obj


def wrap_udf_function(func):
def wrapper(*args, **kwargs):
result = func(*args, **kwargs)
# if user accidently returns modin DataFrame or Series
# casting it back to pandas to properly process
return try_cast_to_pandas(result)

wrapper.__name__ = func.__name__
return wrapper


def hashable(obj):
"""Return whether the object is hashable."""
try:
Expand Down
92 changes: 92 additions & 0 deletions modin/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership. The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

import pandas


def _inherit_docstrings(parent, excluded=[]):
"""Creates a decorator which overwrites a decorated class' __doc__
attribute with parent's __doc__ attribute. Also overwrites __doc__ of
methods and properties defined in the class with the __doc__ of matching
methods and properties in parent.

Args:
parent (object): Class from which the decorated class inherits __doc__.
excluded (list): List of parent objects from which the class does not
inherit docstrings.

Returns:
function: decorator which replaces the decorated class' documentation
parent's documentation.
"""

def decorator(cls):
if parent not in excluded:
cls.__doc__ = parent.__doc__
for attr, obj in cls.__dict__.items():
parent_obj = getattr(parent, attr, None)
if parent_obj in excluded or (
not callable(parent_obj) and not isinstance(parent_obj, property)
):
continue
if callable(obj):
obj.__doc__ = parent_obj.__doc__
elif isinstance(obj, property) and obj.fget is not None:
p = property(obj.fget, obj.fset, obj.fdel, parent_obj.__doc__)
setattr(cls, attr, p)
return cls

return decorator


def try_cast_to_pandas(obj):
"""
Converts obj and all nested objects from modin to pandas if it is possible,
otherwise returns obj

Parameters
----------
obj : object,
object to convert from modin to pandas

Returns
-------
Converted object
"""
if hasattr(obj, "_to_pandas"):
return obj._to_pandas()
if isinstance(obj, (list, tuple)):
return type(obj)([try_cast_to_pandas(o) for o in obj])
if isinstance(obj, dict):
return {k: try_cast_to_pandas(v) for k, v in obj.items()}
if callable(obj):
module_hierarchy = getattr(obj, "__module__", "").split(".")
fn_name = getattr(obj, "__name__", None)
if fn_name and module_hierarchy[0] == "modin":
return (
getattr(pandas.DataFrame, fn_name, obj)
if module_hierarchy[-1] == "dataframe"
else getattr(pandas.Series, fn_name, obj)
)
return obj


def wrap_udf_function(func):
def wrapper(*args, **kwargs):
result = func(*args, **kwargs)
# if user accidently returns modin DataFrame or Series
# casting it back to pandas to properly process
return try_cast_to_pandas(result)

wrapper.__name__ = func.__name__
return wrapper