Skip to content

Commit

Permalink
[DataFrame] Implementing write methods (ray-project#1918)
Browse files Browse the repository at this point in the history
* Add in write methods and functionality

* infer highest available pickle version

* Fix import rebase artifact

* formatting changes to test

* fix lint
  • Loading branch information
kunalgosar authored and devin-petersohn committed Apr 23, 2018
1 parent baf97e4 commit 7c9f392
Show file tree
Hide file tree
Showing 4 changed files with 446 additions and 230 deletions.
7 changes: 5 additions & 2 deletions python/ray/dataframe/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,11 @@ def get_npartitions():
# because they depend on npartitions.
from .dataframe import DataFrame # noqa: 402
from .series import Series # noqa: 402
from .io import (read_csv, read_parquet) # noqa: 402
from .concat import concat # noqa: 402
from .io import (read_csv, read_parquet, read_json, read_html, # noqa: 402
read_clipboard, read_excel, read_hdf, read_feather, # noqa: 402
read_msgpack, read_stata, read_sas, read_pickle, # noqa: 402
read_sql) # noqa: 402
from .concat import concat # noqa: 402

__all__ = [
"DataFrame", "Series", "read_csv", "read_parquet", "concat", "eval"
Expand Down
134 changes: 95 additions & 39 deletions python/ray/dataframe/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from pandas._libs import lib
from pandas.core.dtypes.cast import maybe_upcast_putmask
from pandas import compat
from pandas.compat import lzip
from pandas.compat import lzip, cPickle as pkl
import pandas.core.common as com
from pandas.core.dtypes.common import (
is_bool_dtype,
Expand Down Expand Up @@ -2924,19 +2924,30 @@ def take(self, indices, axis=0, convert=None, is_copy=True, **kwargs):
"github.com/ray-project/ray.")

def to_clipboard(self, excel=None, sep=None, **kwargs):
raise NotImplementedError(
"To contribute to Pandas on Ray, please visit "
"github.com/ray-project/ray.")

def to_csv(self, path_or_buf=None, sep=', ', na_rep='', float_format=None,
warnings.warn("Defaulting to Pandas implementation",
PendingDeprecationWarning)

port_frame = to_pandas(self)
port_frame.to_clipboard(excel, sep, **kwargs)

def to_csv(self, path_or_buf=None, sep=',', na_rep='', float_format=None,
columns=None, header=True, index=True, index_label=None,
mode='w', encoding=None, compression=None, quoting=None,
quotechar='"', line_terminator='\n', chunksize=None,
tupleize_cols=None, date_format=None, doublequote=True,
escapechar=None, decimal='.'):
raise NotImplementedError(
"To contribute to Pandas on Ray, please visit "
"github.com/ray-project/ray.")

warnings.warn("Defaulting to Pandas implementation",
PendingDeprecationWarning)

port_frame = to_pandas(self)
port_frame.to_csv(path_or_buf, sep, na_rep, float_format,
columns, header, index, index_label,
mode, encoding, compression, quoting,
quotechar, line_terminator, chunksize,
tupleize_cols, date_format, doublequote,
escapechar, decimal)

def to_dense(self):
raise NotImplementedError(
Expand All @@ -2953,14 +2964,24 @@ def to_excel(self, excel_writer, sheet_name='Sheet1', na_rep='',
index_label=None, startrow=0, startcol=0, engine=None,
merge_cells=True, encoding=None, inf_rep='inf', verbose=True,
freeze_panes=None):
raise NotImplementedError(
"To contribute to Pandas on Ray, please visit "
"github.com/ray-project/ray.")

warnings.warn("Defaulting to Pandas implementation",
PendingDeprecationWarning)

port_frame = to_pandas(self)
port_frame.to_excel(excel_writer, sheet_name, na_rep,
float_format, columns, header, index,
index_label, startrow, startcol, engine,
merge_cells, encoding, inf_rep, verbose,
freeze_panes)

def to_feather(self, fname):
raise NotImplementedError(
"To contribute to Pandas on Ray, please visit "
"github.com/ray-project/ray.")

warnings.warn("Defaulting to Pandas implementation",
PendingDeprecationWarning)

port_frame = to_pandas(self)
port_frame.to_feather(fname)

def to_gbq(self, destination_table, project_id, chunksize=10000,
verbose=True, reauth=False, if_exists='fail',
Expand All @@ -2970,26 +2991,42 @@ def to_gbq(self, destination_table, project_id, chunksize=10000,
"github.com/ray-project/ray.")

def to_hdf(self, path_or_buf, key, **kwargs):
raise NotImplementedError(
"To contribute to Pandas on Ray, please visit "
"github.com/ray-project/ray.")

warnings.warn("Defaulting to Pandas implementation",
PendingDeprecationWarning)

port_frame = to_pandas(self)
port_frame.to_hdf(path_or_buf, key, **kwargs)

def to_html(self, buf=None, columns=None, col_space=None, header=True,
index=True, na_rep='np.NaN', formatters=None,
float_format=None, sparsify=None, index_names=True,
justify=None, bold_rows=True, classes=None, escape=True,
max_rows=None, max_cols=None, show_dimensions=False,
notebook=False, decimal='.', border=None):
raise NotImplementedError(
"To contribute to Pandas on Ray, please visit "
"github.com/ray-project/ray.")

warnings.warn("Defaulting to Pandas implementation",
PendingDeprecationWarning)

port_frame = to_pandas(self)
port_frame.to_html(buf, columns, col_space, header,
index, na_rep, formatters,
float_format, sparsify, index_names,
justify, bold_rows, classes, escape,
max_rows, max_cols, show_dimensions,
notebook, decimal, border)

def to_json(self, path_or_buf=None, orient=None, date_format=None,
double_precision=10, force_ascii=True, date_unit='ms',
default_handler=None, lines=False, compression=None):
raise NotImplementedError(
"To contribute to Pandas on Ray, please visit "
"github.com/ray-project/ray.")

warnings.warn("Defaulting to Pandas implementation",
PendingDeprecationWarning)

port_frame = to_pandas(self)
port_frame.to_json(path_or_buf, orient, date_format,
double_precision, force_ascii, date_unit,
default_handler, lines, compression)

def to_latex(self, buf=None, columns=None, col_space=None, header=True,
index=True, na_rep='np.NaN', formatters=None,
Expand All @@ -3002,9 +3039,12 @@ def to_latex(self, buf=None, columns=None, col_space=None, header=True,
"github.com/ray-project/ray.")

def to_msgpack(self, path_or_buf=None, encoding='utf-8', **kwargs):
raise NotImplementedError(
"To contribute to Pandas on Ray, please visit "
"github.com/ray-project/ray.")

warnings.warn("Defaulting to Pandas implementation",
PendingDeprecationWarning)

port_frame = to_pandas(self)
port_frame.to_msgpack(path_or_buf, encoding, **kwargs)

def to_panel(self):
raise NotImplementedError(
Expand All @@ -3013,19 +3053,26 @@ def to_panel(self):

def to_parquet(self, fname, engine='auto', compression='snappy',
**kwargs):
raise NotImplementedError(
"To contribute to Pandas on Ray, please visit "
"github.com/ray-project/ray.")

warnings.warn("Defaulting to Pandas implementation",
PendingDeprecationWarning)

port_frame = to_pandas(self)
port_frame.to_parquet(fname, engine, compression, **kwargs)

def to_period(self, freq=None, axis=0, copy=True):
raise NotImplementedError(
"To contribute to Pandas on Ray, please visit "
"github.com/ray-project/ray.")

def to_pickle(self, path, compression='infer', protocol=4):
raise NotImplementedError(
"To contribute to Pandas on Ray, please visit "
"github.com/ray-project/ray.")
def to_pickle(self, path, compression='infer',
protocol=pkl.HIGHEST_PROTOCOL):

warnings.warn("Defaulting to Pandas implementation",
PendingDeprecationWarning)

port_frame = to_pandas(self)
port_frame.to_pickle(path, compression, protocol)

def to_records(self, index=True, convert_datetime64=True):
raise NotImplementedError(
Expand All @@ -3039,16 +3086,25 @@ def to_sparse(self, fill_value=None, kind='block'):

def to_sql(self, name, con, flavor=None, schema=None, if_exists='fail',
index=True, index_label=None, chunksize=None, dtype=None):
raise NotImplementedError(
"To contribute to Pandas on Ray, please visit "
"github.com/ray-project/ray.")

warnings.warn("Defaulting to Pandas implementation",
PendingDeprecationWarning)

port_frame = to_pandas(self)
port_frame.to_sql(name, con, flavor, schema, if_exists,
index, index_label, chunksize, dtype)

def to_stata(self, fname, convert_dates=None, write_index=True,
encoding='latin-1', byteorder=None, time_stamp=None,
data_label=None, variable_labels=None):
raise NotImplementedError(
"To contribute to Pandas on Ray, please visit "
"github.com/ray-project/ray.")

warnings.warn("Defaulting to Pandas implementation",
PendingDeprecationWarning)

port_frame = to_pandas(self)
port_frame.to_stata(fname, convert_dates, write_index,
encoding, byteorder, time_stamp,
data_label, variable_labels)

def to_string(self, buf=None, columns=None, col_space=None, header=True,
index=True, na_rep='np.NaN', formatters=None,
Expand Down
131 changes: 2 additions & 129 deletions python/ray/dataframe/test/test_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,8 @@
import pandas.util.testing as tm
import ray.dataframe as rdf
from ray.dataframe.utils import (
to_pandas,
from_pandas
)
from_pandas,
to_pandas)

from pandas.tests.frame.common import TestData

Expand Down Expand Up @@ -2665,118 +2664,6 @@ def test_take():
ray_df.take(None)


def test_to_clipboard():
ray_df = create_test_dataframe()

with pytest.raises(NotImplementedError):
ray_df.to_clipboard()


def test_to_csv():
ray_df = create_test_dataframe()

with pytest.raises(NotImplementedError):
ray_df.to_csv()


def test_to_dense():
ray_df = create_test_dataframe()

with pytest.raises(NotImplementedError):
ray_df.to_dense()


def test_to_dict():
ray_df = create_test_dataframe()

with pytest.raises(NotImplementedError):
ray_df.to_dict()


def test_to_excel():
ray_df = create_test_dataframe()

with pytest.raises(NotImplementedError):
ray_df.to_excel(None)


def test_to_feather():
ray_df = create_test_dataframe()

with pytest.raises(NotImplementedError):
ray_df.to_feather(None)


def test_to_gbq():
ray_df = create_test_dataframe()

with pytest.raises(NotImplementedError):
ray_df.to_gbq(None, None)


def test_to_hdf():
ray_df = create_test_dataframe()

with pytest.raises(NotImplementedError):
ray_df.to_hdf(None, None)


def test_to_html():
ray_df = create_test_dataframe()

with pytest.raises(NotImplementedError):
ray_df.to_html()


def test_to_json():
ray_df = create_test_dataframe()

with pytest.raises(NotImplementedError):
ray_df.to_json()


def test_to_latex():
ray_df = create_test_dataframe()

with pytest.raises(NotImplementedError):
ray_df.to_latex()


def test_to_msgpack():
ray_df = create_test_dataframe()

with pytest.raises(NotImplementedError):
ray_df.to_msgpack()


def test_to_panel():
ray_df = create_test_dataframe()

with pytest.raises(NotImplementedError):
ray_df.to_panel()


def test_to_parquet():
ray_df = create_test_dataframe()

with pytest.raises(NotImplementedError):
ray_df.to_parquet(None)


def test_to_period():
ray_df = create_test_dataframe()

with pytest.raises(NotImplementedError):
ray_df.to_period()


def test_to_pickle():
ray_df = create_test_dataframe()

with pytest.raises(NotImplementedError):
ray_df.to_pickle(None)


def test_to_records():
ray_df = create_test_dataframe()

Expand All @@ -2791,20 +2678,6 @@ def test_to_sparse():
ray_df.to_sparse()


def test_to_sql():
ray_df = create_test_dataframe()

with pytest.raises(NotImplementedError):
ray_df.to_sql(None, None)


def test_to_stata():
ray_df = create_test_dataframe()

with pytest.raises(NotImplementedError):
ray_df.to_stata(None)


def test_to_string():
ray_df = create_test_dataframe()

Expand Down
Loading

0 comments on commit 7c9f392

Please sign in to comment.