Skip to content

Commit

Permalink
Fix bug where certain encodings were throwing an error
Browse files Browse the repository at this point in the history
* Resolves modin-project#976
* Change default value in `kwargs.get` to match pandas
* Add parametrized test for `encoding` with a variety of new encodings
  • Loading branch information
devin-petersohn committed Jan 10, 2020
1 parent 04e50bf commit c45ae50
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 5 deletions.
2 changes: 1 addition & 1 deletion modin/backends/pandas/parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ def parse(fname, **kwargs):
if start is not None and end is not None:
# pop "compression" from kwargs because bio is uncompressed
bio = FileReader.file_open(fname, "rb", kwargs.pop("compression", "infer"))
if kwargs.pop("encoding", False):
if kwargs.get("encoding", None) is not None:
header = b"" + bio.readline()
else:
header = b""
Expand Down
11 changes: 7 additions & 4 deletions modin/pandas/test/test_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -937,11 +937,14 @@ def test_from_csv_skiprows(make_csv_file):
df_equals(modin_df, pandas_df)


def test_from_csv_encoding(make_csv_file):
make_csv_file(encoding="latin8")
@pytest.mark.parametrize(
"encoding", ["latin8", "ISO-8859-1", "latin1", "iso-8859-1", "cp1252", "utf8"]
)
def test_from_csv_encoding(make_csv_file, encoding):
make_csv_file(encoding=encoding)

pandas_df = pandas.read_csv(TEST_CSV_FILENAME, encoding="latin8")
modin_df = pd.read_csv(TEST_CSV_FILENAME, encoding="latin8")
pandas_df = pandas.read_csv(TEST_CSV_FILENAME, encoding=encoding)
modin_df = pd.read_csv(TEST_CSV_FILENAME, encoding=encoding)

assert modin_df_equals_pandas(modin_df, pandas_df)

Expand Down

0 comments on commit c45ae50

Please sign in to comment.