Skip to content

Commit

Permalink
FEAT-modin-project#6890: add consortium standard entrypoint
Browse files Browse the repository at this point in the history
Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
  • Loading branch information
anmyachev committed Apr 24, 2024
1 parent bbb136d commit 1994171
Show file tree
Hide file tree
Showing 10 changed files with 89 additions and 2 deletions.
9 changes: 9 additions & 0 deletions docs/getting_started/installation.rst
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,15 @@ storage formats or for different functionalities of Modin. Here is a list of dep
pip install "modin[mpi]" # If you want to use MPI through unidist execution engine
Consortium Standard-compatible implementation based on modin
""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""

.. code-block:: bash
pip install "modin[consortium-standard]"
Installing on Google Colab
"""""""""""""""""""""""""""

Expand Down
1 change: 1 addition & 0 deletions environment-dev.yml
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ dependencies:
- isort>=5.12

- pip:
- dataframe-api-compat>=0.2.6
- asv==0.5.1
# no conda package for windows so we install it with pip
- connectorx>=0.2.6a4
Expand Down
16 changes: 16 additions & 0 deletions modin/pandas/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@
_inherit_docstrings,
expanduser_path_arg,
hashable,
import_optional_dependency,
try_cast_to_pandas,
)

Expand Down Expand Up @@ -2892,6 +2893,21 @@ def __dataframe__(self, nan_as_null: bool = False, allow_copy: bool = True):
nan_as_null=nan_as_null, allow_copy=allow_copy
)

def __dataframe_consortium_standard__(
self, *, api_version: str | None = None
): # noqa: PR01, RT01
"""
Provide entry point to the Consortium DataFrame Standard API.
This is developed and maintained outside of Modin.
Please report any issues to https://github.com/data-apis/dataframe-api-compat.
"""
dataframe_api_compat = import_optional_dependency("dataframe_api_compat")
convert_to_standard_compliant_dataframe = (
dataframe_api_compat.modin_standard.convert_to_standard_compliant_dataframe
)
return convert_to_standard_compliant_dataframe(self, api_version=api_version)

@property
def attrs(self) -> dict: # noqa: RT01, D200
"""
Expand Down
20 changes: 19 additions & 1 deletion modin/pandas/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,11 @@
from modin.config import PersistentPickle
from modin.logging import disable_logging
from modin.pandas.io import from_pandas, to_pandas
from modin.utils import MODIN_UNNAMED_SERIES_LABEL, _inherit_docstrings
from modin.utils import (
MODIN_UNNAMED_SERIES_LABEL,
_inherit_docstrings,
import_optional_dependency,
)

from .accessor import CachedAccessor, SparseAccessor
from .base import _ATTRS_NO_LOOKUP, BasePandasDataset
Expand Down Expand Up @@ -222,6 +226,20 @@ def __array__(self, dtype=None) -> np.ndarray: # noqa: PR01, RT01, D200
"""
return super(Series, self).__array__(dtype).flatten()

def __column_consortium_standard__(
self, *, api_version: str | None = None
): # noqa: PR01, RT01
"""
Provide entry point to the Consortium DataFrame Standard API.
This is developed and maintained outside of Modin.
Please report any issues to https://github.com/data-apis/dataframe-api-compat.
"""
dataframe_api_compat = import_optional_dependency("dataframe_api_compat")
return dataframe_api_compat.modin_standard.convert_to_standard_compliant_column(
self, api_version=api_version
)

def __contains__(self, key: Hashable) -> bool:
"""
Check if `key` in the `Series.index`.
Expand Down
37 changes: 37 additions & 0 deletions modin/tests/test_dataframe_api_standard.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership. The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

import pytest

import modin.pandas


def test_dataframe_consortium() -> None:
"""
Test some basic methods of the dataframe consortium standard.
Full testing is done at https://github.com/data-apis/dataframe-api-compat,
this is just to check that the entry point works as expected.
"""
pytest.importorskip("dataframe_api_compat")
df_pd = modin.pandas.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
df = df_pd.__dataframe_consortium_standard__()
result_1 = df.get_column_names()
expected_1 = ["a", "b"]
assert result_1 == expected_1

ser = modin.pandas.Series([1, 2, 3])
col = ser.__column_consortium_standard__()
result_2 = col.get_value(1)
expected_2 = 2
assert result_2 == expected_2
1 change: 1 addition & 0 deletions requirements-dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ tqdm>=4.60.0
numexpr<2.8.5
# Latest modin-spreadsheet with widget fix
git+https://github.com/modin-project/modin-spreadsheet.git@49ffd89f683f54c311867d602c55443fb11bf2a5
dataframe-api-compat>=0.2.6

## dependencies for making release
PyGithub>=1.58.0
Expand Down
1 change: 1 addition & 0 deletions requirements/env_hdk.yml
Original file line number Diff line number Diff line change
Expand Up @@ -43,5 +43,6 @@ dependencies:
- mypy>=1.0.0

- pip:
- dataframe-api-compat>=0.2.6
# The `numpydoc` version should match the version installed in the `lint-pydocstyle` job of the CI.
- numpydoc==1.1.0
1 change: 1 addition & 0 deletions requirements/env_unidist_win.yml
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ dependencies:
- pandas-stubs>=2.0.0

- pip:
- dataframe-api-compat>=0.2.6
# Fixes breaking ipywidgets changes, but didn't release yet.
- git+https://github.com/modin-project/modin-spreadsheet.git@49ffd89f683f54c311867d602c55443fb11bf2a5
- connectorx>=0.2.6a4
Expand Down
1 change: 1 addition & 0 deletions requirements/requirements-no-engine.yml
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ dependencies:
- flake8-print>=5.0.0

- pip:
- dataframe-api-compat>=0.2.6
- asv==0.5.1
# no conda package for windows
- connectorx>=0.2.6a4
Expand Down
4 changes: 3 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,13 @@
# ray==2.5.0 broken: https://github.com/conda-forge/ray-packages-feedstock/issues/100
ray_deps = ["ray[default]>=2.1.0,!=2.5.0", "pyarrow>=7.0.0"]
mpi_deps = ["unidist[mpi]>=0.2.1"]
consortium_standard_deps = ['dataframe-api-compat>=0.2.6']
spreadsheet_deps = ["modin-spreadsheet>=0.1.0"]
# Currently, Modin does not include `mpi` option in `all`.
# Otherwise, installation of modin[all] would fail because
# users need to have a working MPI implementation and
# certain software installed beforehand.
all_deps = dask_deps + ray_deps + spreadsheet_deps
all_deps = dask_deps + ray_deps + spreadsheet_deps + consortium_standard_deps

# Distribute 'modin-autoimport-pandas.pth' along with binary and source distributions.
# This file provides the "import pandas before Ray init" feature if specific
Expand Down Expand Up @@ -62,6 +63,7 @@ def make_distribution(self):
"dask": dask_deps,
"ray": ray_deps,
"mpi": mpi_deps,
"consortium-standard": consortium_standard_deps,
"spreadsheet": spreadsheet_deps,
"all": all_deps,
},
Expand Down

0 comments on commit 1994171

Please sign in to comment.