FIX-#1386: Fix read_csv for incorrect csv data

prutskov · prutskov · commit 6c8d1b577c38 · 2020-09-15T11:19:16.000+03:00
Signed-off-by: Alexey Prutskov &lt;alexey.prutskov@intel.com&gt;
diff --git a/modin/engines/base/io/text/csv_reader.py b/modin/engines/base/io/text/csv_reader.py
@@ -185,7 +185,7 @@ def _read(cls, filepath_or_buffer, **kwargs):
         # reported dtypes from differing rows can be different based on the inference in
         # the limited data seen by each worker. We use pandas to compute the exact dtype
         # over the whole column for each column. The index is set below.
-        dtypes = cls.get_dtypes(dtypes_ids)
+        dtypes = cls.get_dtypes(dtypes_ids) if len(dtypes_ids) > 0 else None
 
         partition_ids = cls.build_partition(partition_ids, row_lengths, column_widths)
         # If parse_dates is present, the column names that we have might not be
diff --git a/modin/pandas/test/test_io.py b/modin/pandas/test/test_io.py
@@ -1184,6 +1184,13 @@ def test_from_csv_newlines_in_quotes(nrows, skiprows):
     )
 
 
+def test_read_csv_incorrect_data():
+    name = "modin/pandas/test/data/test_categories.json"
+    pandas_df, modin_df = pandas.read_csv(name), pd.read_csv(name)
+
+    df_equals(pandas_df, modin_df)
+
+
 @pytest.mark.skip(reason="No clipboard on Travis")
 def test_to_clipboard():
     modin_df = create_test_modin_dataframe()