Skip to content

Commit 6c8d1b5

Browse files
committed
FIX-#1386: Fix read_csv for incorrect csv data
Signed-off-by: Alexey Prutskov <alexey.prutskov@intel.com>
1 parent df725d2 commit 6c8d1b5

File tree

2 files changed

+8
-1
lines changed

2 files changed

+8
-1
lines changed

modin/engines/base/io/text/csv_reader.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -185,7 +185,7 @@ def _read(cls, filepath_or_buffer, **kwargs):
185185
# reported dtypes from differing rows can be different based on the inference in
186186
# the limited data seen by each worker. We use pandas to compute the exact dtype
187187
# over the whole column for each column. The index is set below.
188-
dtypes = cls.get_dtypes(dtypes_ids)
188+
dtypes = cls.get_dtypes(dtypes_ids) if len(dtypes_ids) > 0 else None
189189

190190
partition_ids = cls.build_partition(partition_ids, row_lengths, column_widths)
191191
# If parse_dates is present, the column names that we have might not be

modin/pandas/test/test_io.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1184,6 +1184,13 @@ def test_from_csv_newlines_in_quotes(nrows, skiprows):
11841184
)
11851185

11861186

1187+
def test_read_csv_incorrect_data():
1188+
name = "modin/pandas/test/data/test_categories.json"
1189+
pandas_df, modin_df = pandas.read_csv(name), pd.read_csv(name)
1190+
1191+
df_equals(pandas_df, modin_df)
1192+
1193+
11871194
@pytest.mark.skip(reason="No clipboard on Travis")
11881195
def test_to_clipboard():
11891196
modin_df = create_test_modin_dataframe()

0 commit comments

Comments
 (0)