Dataframe deprecation (ray-project#2353)

rongou · Jul 6, 2018 · 4185aae · 4185aae
1 parent 4f1d14e
commit 4185aae
Show file tree

Hide file tree

Showing 22 changed files with 6 additions and 16,270 deletions.
diff --git a/.travis.yml b/.travis.yml
@@ -143,12 +143,6 @@ matrix:
         - python test/cython_test.py
         - python test/credis_test.py
 
-        # ray dataframe tests
-        # - python -m pytest python/ray/dataframe/test/test_dataframe.py
-        - python -m pytest python/ray/dataframe/test/test_concat.py
-        - python -m pytest python/ray/dataframe/test/test_io.py
-        # - python -m pytest python/ray/dataframe/test/test_groupby.py
-
         # ray tune tests
         - python python/ray/tune/test/dependency_test.py
         - python -m pytest python/ray/tune/test/trial_runner_test.py
@@ -208,12 +202,6 @@ script:
   - python test/cython_test.py
   - python test/credis_test.py
 
-  # ray dataframe tests
-  - python -m pytest python/ray/dataframe/test/test_dataframe.py
-  - python -m pytest python/ray/dataframe/test/test_concat.py
-  - python -m pytest python/ray/dataframe/test/test_io.py
-  - python -m pytest python/ray/dataframe/test/test_groupby.py
-
   # ray tune tests
   - python python/ray/tune/test/dependency_test.py
   - python -m pytest python/ray/tune/test/trial_runner_test.py

diff --git a/.travis/upgrade-syn.sh b/.travis/upgrade-syn.sh
@@ -13,7 +13,6 @@ find \
     python test \
     -name '*.py' -type f \
     -not -path 'python/ray/cloudpickle/*' \
-    -not -path 'python/ray/dataframe/*' \
     -exec python -m pyupgrade {} +
 
 if ! git diff --quiet; then

diff --git a/.travis/yapf.sh b/.travis/yapf.sh
@@ -24,7 +24,6 @@ YAPF_FLAGS=(
 )
 
 YAPF_EXCLUDES=(
-    '--exclude' 'python/ray/dataframe/*'
     '--exclude' 'python/ray/rllib/*'
     '--exclude' 'python/ray/cloudpickle/*'
     '--exclude' 'python/build/*'

diff --git a/doc/source/pandas_on_ray.rst b/doc/source/pandas_on_ray.rst
@@ -1,71 +1,9 @@
 Pandas on Ray
 =============
 
-Pandas on Ray is an early stage DataFrame library that wraps Pandas and
-transparently distributes the data and computation. The user does not need to
-know how many cores their system has, nor do they need to specify how to
-distribute the data. In fact, users can continue using their previous Pandas
-notebooks while experiencing a considerable speedup from Pandas on Ray, even
-on a single machine. Only a modification of the import statement is needed, as
-we demonstrate below. Once you’ve changed your import statement, you’re ready
-to use Pandas on Ray just like you would Pandas.
+**Pandas on Ray has moved to Modin!**
 
-.. code-block:: python
+Pandas on Ray has moved into the `Modin project`_ with the intention of
+unifying the DataFrame APIs.
 
-  # import pandas as pd
-  import ray.dataframe as pd
-
-Currently, we have part of the Pandas API implemented and are working toward
-full functional parity with Pandas.
-
-Using Pandas on Ray on a Single Node
-------------------------------------
-
-In order to use the most up-to-date version of Pandas on Ray, please follow
-the instructions on the `installation page`_
-
-Once you import the library, you should see something similar to the following
-output:
-
-.. code-block:: text
-
-  >>> import ray.dataframe as pd
-
-  Waiting for redis server at 127.0.0.1:14618 to respond...
-  Waiting for redis server at 127.0.0.1:31410 to respond...
-  Starting local scheduler with the following resources: {'CPU': 4, 'GPU': 0}.
-
-  ======================================================================
-  View the web UI at http://localhost:8889/notebooks/ray_ui36796.ipynb?token=ac25867d62c4ae87941bc5a0ecd5f517dbf80bd8e9b04218
-  ======================================================================
-
-If you do not see output similar to the above, please make sure that you have
-built Ray using the instructions on the `installation page`_
-
-One you have executed  ``import ray.dataframe as pd``, you're ready to begin
-running your Pandas pipeline as you were before. Please note, the API is not
-yet complete. For some methods, you may see the following:
-
-.. code-block:: text
-
-  NotImplementedError: To contribute to Pandas on Ray, please visit github.com/ray-project/ray.
-
-If you would like to request a particular method be implemented, feel free to
-`open an issue`_. Before you open an issue please make sure that someone else
-has not already requested that functionality.
-
-Using Pandas on Ray on a Cluster
---------------------------------
-
-Currently, we do not yet support running Pandas on Ray on a cluster. Coming
-Soon!
-
-Examples
---------
-You can find an example on our recent `blog post`_ or on the
-`Jupyter Notebook`_ that we used to create the blog post.
-
-.. _`installation page`: http://ray.readthedocs.io/en/latest/installation.html
-.. _`open an issue`: http://github.com/ray-project/ray/issues
-.. _`blog post`: http://rise.cs.berkeley.edu/blog/pandas-on-ray
-.. _`Jupyter Notebook`: http://gist.github.com/devin-petersohn/f424d9fb5579a96507c709a36d487f24#file-pandas_on_ray_blog_post_0-ipynb
+.. _`Modin project`: https://github.com/modin-project/modin
diff --git a/python/ray/dataframe/__init__.py b/python/ray/dataframe/__init__.py
@@ -1,60 +1,2 @@
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import pandas
-# TODO: In the future `set_option` or similar needs to run on every node
-# in order to keep all pandas instances across nodes consistent
-from pandas import (eval, unique, value_counts, cut, to_numeric, factorize,
-                    test, qcut, match, Panel, date_range, Index, MultiIndex,
-                    CategoricalIndex, Series, bdate_range, DatetimeIndex,
-                    Timedelta, Timestamp, to_timedelta, set_eng_float_format,
-                    set_option, NaT, PeriodIndex, Categorical)
-import threading
-
-pandas_version = pandas.__version__
-pandas_major = int(pandas_version.split(".")[0])
-pandas_minor = int(pandas_version.split(".")[1])
-
-if pandas_major == 0 and pandas_minor != 22:
-    raise Exception("In order to use Pandas on Ray, your pandas version must "
-                    "be 0.22. You can run 'pip install pandas==0.22'")
-
-DEFAULT_NPARTITIONS = 8
-
-
-def set_npartition_default(n):
-    global DEFAULT_NPARTITIONS
-    DEFAULT_NPARTITIONS = n
-
-
-def get_npartitions():
-    return DEFAULT_NPARTITIONS
-
-
-# We import these file after above two function
-# because they depend on npartitions.
-from .dataframe import DataFrame  # noqa: 402
-from .io import (read_csv, read_parquet, read_json, read_html,  # noqa: 402
-                 read_clipboard, read_excel, read_hdf, read_feather,  # noqa: 402
-                 read_msgpack, read_stata, read_sas, read_pickle,  # noqa: 402
-                 read_sql)  # noqa: 402
-from .concat import concat  # noqa: 402
-from .datetimes import to_datetime  # noqa: 402
-from .reshape import get_dummies  # noqa: 402
-
-__all__ = [
-    "DataFrame", "Series", "read_csv", "read_parquet", "concat", "eval",
-    "unique", "value_counts", "cut", "to_numeric", "factorize", "test", "qcut",
-    "match", "to_datetime", "get_dummies", "Panel", "date_range", "Index",
-    "MultiIndex", "Series", "bdate_range", "DatetimeIndex", "to_timedelta",
-    "set_eng_float_format", "set_option", "CategoricalIndex", "Timedelta",
-    "Timestamp", "NaT", "PeriodIndex", "Categorical"
-]
-
-try:
-    if threading.current_thread().name == "MainThread":
-        import ray
-        ray.init()
-except AssertionError:
-    pass
+raise DeprecationWarning("Pandas on Ray has moved to Modin: "
+                         "github.com/modin-project/modin")
diff --git a/python/ray/dataframe/concat.py b/python/ray/dataframe/concat.py