Skip to content

Commit 7c9f392

Browse files
kunalgosardevin-petersohn
authored andcommitted
[DataFrame] Implementing write methods (#1918)
* Add in write methods and functionality * infer highest available pickle version * Fix import rebase artifact * formatting changes to test * fix lint
1 parent baf97e4 commit 7c9f392

File tree

4 files changed

+446
-230
lines changed

4 files changed

+446
-230
lines changed

python/ray/dataframe/__init__.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,11 @@ def get_npartitions():
3030
# because they depend on npartitions.
3131
from .dataframe import DataFrame # noqa: 402
3232
from .series import Series # noqa: 402
33-
from .io import (read_csv, read_parquet) # noqa: 402
34-
from .concat import concat # noqa: 402
33+
from .io import (read_csv, read_parquet, read_json, read_html, # noqa: 402
34+
read_clipboard, read_excel, read_hdf, read_feather, # noqa: 402
35+
read_msgpack, read_stata, read_sas, read_pickle, # noqa: 402
36+
read_sql) # noqa: 402
37+
from .concat import concat # noqa: 402
3538

3639
__all__ = [
3740
"DataFrame", "Series", "read_csv", "read_parquet", "concat", "eval"

python/ray/dataframe/dataframe.py

Lines changed: 95 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
from pandas._libs import lib
1010
from pandas.core.dtypes.cast import maybe_upcast_putmask
1111
from pandas import compat
12-
from pandas.compat import lzip
12+
from pandas.compat import lzip, cPickle as pkl
1313
import pandas.core.common as com
1414
from pandas.core.dtypes.common import (
1515
is_bool_dtype,
@@ -2924,19 +2924,30 @@ def take(self, indices, axis=0, convert=None, is_copy=True, **kwargs):
29242924
"github.com/ray-project/ray.")
29252925

29262926
def to_clipboard(self, excel=None, sep=None, **kwargs):
2927-
raise NotImplementedError(
2928-
"To contribute to Pandas on Ray, please visit "
2929-
"github.com/ray-project/ray.")
29302927

2931-
def to_csv(self, path_or_buf=None, sep=', ', na_rep='', float_format=None,
2928+
warnings.warn("Defaulting to Pandas implementation",
2929+
PendingDeprecationWarning)
2930+
2931+
port_frame = to_pandas(self)
2932+
port_frame.to_clipboard(excel, sep, **kwargs)
2933+
2934+
def to_csv(self, path_or_buf=None, sep=',', na_rep='', float_format=None,
29322935
columns=None, header=True, index=True, index_label=None,
29332936
mode='w', encoding=None, compression=None, quoting=None,
29342937
quotechar='"', line_terminator='\n', chunksize=None,
29352938
tupleize_cols=None, date_format=None, doublequote=True,
29362939
escapechar=None, decimal='.'):
2937-
raise NotImplementedError(
2938-
"To contribute to Pandas on Ray, please visit "
2939-
"github.com/ray-project/ray.")
2940+
2941+
warnings.warn("Defaulting to Pandas implementation",
2942+
PendingDeprecationWarning)
2943+
2944+
port_frame = to_pandas(self)
2945+
port_frame.to_csv(path_or_buf, sep, na_rep, float_format,
2946+
columns, header, index, index_label,
2947+
mode, encoding, compression, quoting,
2948+
quotechar, line_terminator, chunksize,
2949+
tupleize_cols, date_format, doublequote,
2950+
escapechar, decimal)
29402951

29412952
def to_dense(self):
29422953
raise NotImplementedError(
@@ -2953,14 +2964,24 @@ def to_excel(self, excel_writer, sheet_name='Sheet1', na_rep='',
29532964
index_label=None, startrow=0, startcol=0, engine=None,
29542965
merge_cells=True, encoding=None, inf_rep='inf', verbose=True,
29552966
freeze_panes=None):
2956-
raise NotImplementedError(
2957-
"To contribute to Pandas on Ray, please visit "
2958-
"github.com/ray-project/ray.")
2967+
2968+
warnings.warn("Defaulting to Pandas implementation",
2969+
PendingDeprecationWarning)
2970+
2971+
port_frame = to_pandas(self)
2972+
port_frame.to_excel(excel_writer, sheet_name, na_rep,
2973+
float_format, columns, header, index,
2974+
index_label, startrow, startcol, engine,
2975+
merge_cells, encoding, inf_rep, verbose,
2976+
freeze_panes)
29592977

29602978
def to_feather(self, fname):
2961-
raise NotImplementedError(
2962-
"To contribute to Pandas on Ray, please visit "
2963-
"github.com/ray-project/ray.")
2979+
2980+
warnings.warn("Defaulting to Pandas implementation",
2981+
PendingDeprecationWarning)
2982+
2983+
port_frame = to_pandas(self)
2984+
port_frame.to_feather(fname)
29642985

29652986
def to_gbq(self, destination_table, project_id, chunksize=10000,
29662987
verbose=True, reauth=False, if_exists='fail',
@@ -2970,26 +2991,42 @@ def to_gbq(self, destination_table, project_id, chunksize=10000,
29702991
"github.com/ray-project/ray.")
29712992

29722993
def to_hdf(self, path_or_buf, key, **kwargs):
2973-
raise NotImplementedError(
2974-
"To contribute to Pandas on Ray, please visit "
2975-
"github.com/ray-project/ray.")
2994+
2995+
warnings.warn("Defaulting to Pandas implementation",
2996+
PendingDeprecationWarning)
2997+
2998+
port_frame = to_pandas(self)
2999+
port_frame.to_hdf(path_or_buf, key, **kwargs)
29763000

29773001
def to_html(self, buf=None, columns=None, col_space=None, header=True,
29783002
index=True, na_rep='np.NaN', formatters=None,
29793003
float_format=None, sparsify=None, index_names=True,
29803004
justify=None, bold_rows=True, classes=None, escape=True,
29813005
max_rows=None, max_cols=None, show_dimensions=False,
29823006
notebook=False, decimal='.', border=None):
2983-
raise NotImplementedError(
2984-
"To contribute to Pandas on Ray, please visit "
2985-
"github.com/ray-project/ray.")
3007+
3008+
warnings.warn("Defaulting to Pandas implementation",
3009+
PendingDeprecationWarning)
3010+
3011+
port_frame = to_pandas(self)
3012+
port_frame.to_html(buf, columns, col_space, header,
3013+
index, na_rep, formatters,
3014+
float_format, sparsify, index_names,
3015+
justify, bold_rows, classes, escape,
3016+
max_rows, max_cols, show_dimensions,
3017+
notebook, decimal, border)
29863018

29873019
def to_json(self, path_or_buf=None, orient=None, date_format=None,
29883020
double_precision=10, force_ascii=True, date_unit='ms',
29893021
default_handler=None, lines=False, compression=None):
2990-
raise NotImplementedError(
2991-
"To contribute to Pandas on Ray, please visit "
2992-
"github.com/ray-project/ray.")
3022+
3023+
warnings.warn("Defaulting to Pandas implementation",
3024+
PendingDeprecationWarning)
3025+
3026+
port_frame = to_pandas(self)
3027+
port_frame.to_json(path_or_buf, orient, date_format,
3028+
double_precision, force_ascii, date_unit,
3029+
default_handler, lines, compression)
29933030

29943031
def to_latex(self, buf=None, columns=None, col_space=None, header=True,
29953032
index=True, na_rep='np.NaN', formatters=None,
@@ -3002,9 +3039,12 @@ def to_latex(self, buf=None, columns=None, col_space=None, header=True,
30023039
"github.com/ray-project/ray.")
30033040

30043041
def to_msgpack(self, path_or_buf=None, encoding='utf-8', **kwargs):
3005-
raise NotImplementedError(
3006-
"To contribute to Pandas on Ray, please visit "
3007-
"github.com/ray-project/ray.")
3042+
3043+
warnings.warn("Defaulting to Pandas implementation",
3044+
PendingDeprecationWarning)
3045+
3046+
port_frame = to_pandas(self)
3047+
port_frame.to_msgpack(path_or_buf, encoding, **kwargs)
30083048

30093049
def to_panel(self):
30103050
raise NotImplementedError(
@@ -3013,19 +3053,26 @@ def to_panel(self):
30133053

30143054
def to_parquet(self, fname, engine='auto', compression='snappy',
30153055
**kwargs):
3016-
raise NotImplementedError(
3017-
"To contribute to Pandas on Ray, please visit "
3018-
"github.com/ray-project/ray.")
3056+
3057+
warnings.warn("Defaulting to Pandas implementation",
3058+
PendingDeprecationWarning)
3059+
3060+
port_frame = to_pandas(self)
3061+
port_frame.to_parquet(fname, engine, compression, **kwargs)
30193062

30203063
def to_period(self, freq=None, axis=0, copy=True):
30213064
raise NotImplementedError(
30223065
"To contribute to Pandas on Ray, please visit "
30233066
"github.com/ray-project/ray.")
30243067

3025-
def to_pickle(self, path, compression='infer', protocol=4):
3026-
raise NotImplementedError(
3027-
"To contribute to Pandas on Ray, please visit "
3028-
"github.com/ray-project/ray.")
3068+
def to_pickle(self, path, compression='infer',
3069+
protocol=pkl.HIGHEST_PROTOCOL):
3070+
3071+
warnings.warn("Defaulting to Pandas implementation",
3072+
PendingDeprecationWarning)
3073+
3074+
port_frame = to_pandas(self)
3075+
port_frame.to_pickle(path, compression, protocol)
30293076

30303077
def to_records(self, index=True, convert_datetime64=True):
30313078
raise NotImplementedError(
@@ -3039,16 +3086,25 @@ def to_sparse(self, fill_value=None, kind='block'):
30393086

30403087
def to_sql(self, name, con, flavor=None, schema=None, if_exists='fail',
30413088
index=True, index_label=None, chunksize=None, dtype=None):
3042-
raise NotImplementedError(
3043-
"To contribute to Pandas on Ray, please visit "
3044-
"github.com/ray-project/ray.")
3089+
3090+
warnings.warn("Defaulting to Pandas implementation",
3091+
PendingDeprecationWarning)
3092+
3093+
port_frame = to_pandas(self)
3094+
port_frame.to_sql(name, con, flavor, schema, if_exists,
3095+
index, index_label, chunksize, dtype)
30453096

30463097
def to_stata(self, fname, convert_dates=None, write_index=True,
30473098
encoding='latin-1', byteorder=None, time_stamp=None,
30483099
data_label=None, variable_labels=None):
3049-
raise NotImplementedError(
3050-
"To contribute to Pandas on Ray, please visit "
3051-
"github.com/ray-project/ray.")
3100+
3101+
warnings.warn("Defaulting to Pandas implementation",
3102+
PendingDeprecationWarning)
3103+
3104+
port_frame = to_pandas(self)
3105+
port_frame.to_stata(fname, convert_dates, write_index,
3106+
encoding, byteorder, time_stamp,
3107+
data_label, variable_labels)
30523108

30533109
def to_string(self, buf=None, columns=None, col_space=None, header=True,
30543110
index=True, na_rep='np.NaN', formatters=None,

python/ray/dataframe/test/test_dataframe.py

Lines changed: 2 additions & 129 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,8 @@
88
import pandas.util.testing as tm
99
import ray.dataframe as rdf
1010
from ray.dataframe.utils import (
11-
to_pandas,
12-
from_pandas
13-
)
11+
from_pandas,
12+
to_pandas)
1413

1514
from pandas.tests.frame.common import TestData
1615

@@ -2665,118 +2664,6 @@ def test_take():
26652664
ray_df.take(None)
26662665

26672666

2668-
def test_to_clipboard():
2669-
ray_df = create_test_dataframe()
2670-
2671-
with pytest.raises(NotImplementedError):
2672-
ray_df.to_clipboard()
2673-
2674-
2675-
def test_to_csv():
2676-
ray_df = create_test_dataframe()
2677-
2678-
with pytest.raises(NotImplementedError):
2679-
ray_df.to_csv()
2680-
2681-
2682-
def test_to_dense():
2683-
ray_df = create_test_dataframe()
2684-
2685-
with pytest.raises(NotImplementedError):
2686-
ray_df.to_dense()
2687-
2688-
2689-
def test_to_dict():
2690-
ray_df = create_test_dataframe()
2691-
2692-
with pytest.raises(NotImplementedError):
2693-
ray_df.to_dict()
2694-
2695-
2696-
def test_to_excel():
2697-
ray_df = create_test_dataframe()
2698-
2699-
with pytest.raises(NotImplementedError):
2700-
ray_df.to_excel(None)
2701-
2702-
2703-
def test_to_feather():
2704-
ray_df = create_test_dataframe()
2705-
2706-
with pytest.raises(NotImplementedError):
2707-
ray_df.to_feather(None)
2708-
2709-
2710-
def test_to_gbq():
2711-
ray_df = create_test_dataframe()
2712-
2713-
with pytest.raises(NotImplementedError):
2714-
ray_df.to_gbq(None, None)
2715-
2716-
2717-
def test_to_hdf():
2718-
ray_df = create_test_dataframe()
2719-
2720-
with pytest.raises(NotImplementedError):
2721-
ray_df.to_hdf(None, None)
2722-
2723-
2724-
def test_to_html():
2725-
ray_df = create_test_dataframe()
2726-
2727-
with pytest.raises(NotImplementedError):
2728-
ray_df.to_html()
2729-
2730-
2731-
def test_to_json():
2732-
ray_df = create_test_dataframe()
2733-
2734-
with pytest.raises(NotImplementedError):
2735-
ray_df.to_json()
2736-
2737-
2738-
def test_to_latex():
2739-
ray_df = create_test_dataframe()
2740-
2741-
with pytest.raises(NotImplementedError):
2742-
ray_df.to_latex()
2743-
2744-
2745-
def test_to_msgpack():
2746-
ray_df = create_test_dataframe()
2747-
2748-
with pytest.raises(NotImplementedError):
2749-
ray_df.to_msgpack()
2750-
2751-
2752-
def test_to_panel():
2753-
ray_df = create_test_dataframe()
2754-
2755-
with pytest.raises(NotImplementedError):
2756-
ray_df.to_panel()
2757-
2758-
2759-
def test_to_parquet():
2760-
ray_df = create_test_dataframe()
2761-
2762-
with pytest.raises(NotImplementedError):
2763-
ray_df.to_parquet(None)
2764-
2765-
2766-
def test_to_period():
2767-
ray_df = create_test_dataframe()
2768-
2769-
with pytest.raises(NotImplementedError):
2770-
ray_df.to_period()
2771-
2772-
2773-
def test_to_pickle():
2774-
ray_df = create_test_dataframe()
2775-
2776-
with pytest.raises(NotImplementedError):
2777-
ray_df.to_pickle(None)
2778-
2779-
27802667
def test_to_records():
27812668
ray_df = create_test_dataframe()
27822669

@@ -2791,20 +2678,6 @@ def test_to_sparse():
27912678
ray_df.to_sparse()
27922679

27932680

2794-
def test_to_sql():
2795-
ray_df = create_test_dataframe()
2796-
2797-
with pytest.raises(NotImplementedError):
2798-
ray_df.to_sql(None, None)
2799-
2800-
2801-
def test_to_stata():
2802-
ray_df = create_test_dataframe()
2803-
2804-
with pytest.raises(NotImplementedError):
2805-
ray_df.to_stata(None)
2806-
2807-
28082681
def test_to_string():
28092682
ray_df = create_test_dataframe()
28102683

0 commit comments

Comments
 (0)