Skip to content

Commit

Permalink
[DataFrame] Pass read_csv kwargs to _infer_column (ray-project#1894)
Browse files Browse the repository at this point in the history
* pass kwargs to _infer_column

* adding small test for non-comma delim

* fix lint
  • Loading branch information
p-yang authored and devin-petersohn committed Apr 16, 2018
1 parent cff3776 commit f505f06
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 5 deletions.
6 changes: 3 additions & 3 deletions python/ray/dataframe/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,8 +114,8 @@ def _get_firstline(file_path):
return first


def _infer_column(first_line):
return pd.read_csv(BytesIO(first_line)).columns
def _infer_column(first_line, kwargs={}):
return pd.read_csv(BytesIO(first_line), **kwargs).columns


@ray.remote
Expand Down Expand Up @@ -247,7 +247,7 @@ def read_csv(filepath,
offsets = _compute_offset(filepath, get_npartitions())

first_line = _get_firstline(filepath)
columns = _infer_column(first_line)
columns = _infer_column(first_line, kwargs=kwargs)

df_obj_ids = []
for start, end in offsets:
Expand Down
15 changes: 13 additions & 2 deletions python/ray/dataframe/test/test_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,15 +40,15 @@ def teardown_parquet_file():


@pytest.fixture
def setup_csv_file(row_size, force=False):
def setup_csv_file(row_size, force=False, delimiter=','):
if os.path.exists(TEST_CSV_FILENAME) and not force:
pass
else:
df = pd.DataFrame({
'col1': np.arange(row_size),
'col2': np.arange(row_size)
})
df.to_csv(TEST_CSV_FILENAME)
df.to_csv(TEST_CSV_FILENAME, sep=delimiter)


@pytest.fixture
Expand Down Expand Up @@ -88,3 +88,14 @@ def test_from_csv():
assert ray_df_equals_pandas(ray_df, pd_df)

teardown_csv_file()


def test_from_csv_delimiter():
setup_csv_file(SMALL_ROW_SIZE, delimiter='|')

pd_df = pd.read_csv(TEST_CSV_FILENAME)
ray_df = io.read_csv(TEST_CSV_FILENAME)

assert ray_df_equals_pandas(ray_df, pd_df)

teardown_csv_file()

0 comments on commit f505f06

Please sign in to comment.