From 8f0c4d2ca6856cc345917d62c3f989ada00617c0 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sun, 19 Nov 2023 11:26:45 -0800 Subject: [PATCH] TST: de-xfail pyarrow parser tests (#56056) * TST: un-xfail pyarrow thousands test * TST: de-xfail memory_map tests * remove unused * revert no-longer-needed --- pandas/tests/io/parser/test_encoding.py | 31 ++++++++++++++++------ pandas/tests/io/parser/test_parse_dates.py | 17 ++++++++---- 2 files changed, 35 insertions(+), 13 deletions(-) diff --git a/pandas/tests/io/parser/test_encoding.py b/pandas/tests/io/parser/test_encoding.py index 36d7a19cf6781..cbd3917ba9c04 100644 --- a/pandas/tests/io/parser/test_encoding.py +++ b/pandas/tests/io/parser/test_encoding.py @@ -23,7 +23,6 @@ "ignore:Passing a BlockManager to DataFrame:DeprecationWarning" ) -xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail") skip_pyarrow = pytest.mark.usefixtures("pyarrow_skip") @@ -238,7 +237,6 @@ def test_parse_encoded_special_characters(encoding): tm.assert_frame_equal(result, expected) -@xfail_pyarrow # ValueError: The 'memory_map' option is not supported @pytest.mark.parametrize("encoding", ["utf-8", None, "utf-16", "cp1255", "latin-1"]) def test_encoding_memory_map(all_parsers, encoding): # GH40986 @@ -252,11 +250,17 @@ def test_encoding_memory_map(all_parsers, encoding): ) with tm.ensure_clean() as file: expected.to_csv(file, index=False, encoding=encoding) + + if parser.engine == "pyarrow": + msg = "The 'memory_map' option is not supported with the 'pyarrow' engine" + with pytest.raises(ValueError, match=msg): + parser.read_csv(file, encoding=encoding, memory_map=True) + return + df = parser.read_csv(file, encoding=encoding, memory_map=True) tm.assert_frame_equal(df, expected) -@xfail_pyarrow # ValueError: The 'memory_map' option is not supported def test_chunk_splits_multibyte_char(all_parsers): """ Chunk splits a multibyte character with memory_map=True @@ -272,11 +276,17 @@ def test_chunk_splits_multibyte_char(all_parsers): df.iloc[2047] = "a" * 127 + "ą" with tm.ensure_clean("bug-gh43540.csv") as fname: df.to_csv(fname, index=False, header=False, encoding="utf-8") - dfr = parser.read_csv(fname, header=None, memory_map=True, engine="c") + + if parser.engine == "pyarrow": + msg = "The 'memory_map' option is not supported with the 'pyarrow' engine" + with pytest.raises(ValueError, match=msg): + parser.read_csv(fname, header=None, memory_map=True) + return + + dfr = parser.read_csv(fname, header=None, memory_map=True) tm.assert_frame_equal(dfr, df) -@xfail_pyarrow # ValueError: The 'memory_map' option is not supported def test_readcsv_memmap_utf8(all_parsers): """ GH 43787 @@ -300,9 +310,14 @@ def test_readcsv_memmap_utf8(all_parsers): df = DataFrame(lines) with tm.ensure_clean("utf8test.csv") as fname: df.to_csv(fname, index=False, header=False, encoding="utf-8") - dfr = parser.read_csv( - fname, header=None, memory_map=True, engine="c", encoding="utf-8" - ) + + if parser.engine == "pyarrow": + msg = "The 'memory_map' option is not supported with the 'pyarrow' engine" + with pytest.raises(ValueError, match=msg): + parser.read_csv(fname, header=None, memory_map=True, encoding="utf-8") + return + + dfr = parser.read_csv(fname, header=None, memory_map=True, encoding="utf-8") tm.assert_frame_equal(df, dfr) diff --git a/pandas/tests/io/parser/test_parse_dates.py b/pandas/tests/io/parser/test_parse_dates.py index 30580423a3099..ee19a20a155aa 100644 --- a/pandas/tests/io/parser/test_parse_dates.py +++ b/pandas/tests/io/parser/test_parse_dates.py @@ -1979,8 +1979,6 @@ def test_date_parser_multiindex_columns_combine_cols(all_parsers, parse_spec, co tm.assert_frame_equal(result, expected) -# ValueError: The 'thousands' option is not supported with the 'pyarrow' engine -@xfail_pyarrow def test_date_parser_usecols_thousands(all_parsers): # GH#39365 data = """A,B,C @@ -1989,12 +1987,21 @@ def test_date_parser_usecols_thousands(all_parsers): """ parser = all_parsers - warn = UserWarning + if parser.engine == "pyarrow": # DeprecationWarning for passing a Manager object - warn = (UserWarning, DeprecationWarning) + msg = "The 'thousands' option is not supported with the 'pyarrow' engine" + with pytest.raises(ValueError, match=msg): + parser.read_csv( + StringIO(data), + parse_dates=[1], + usecols=[1, 2], + thousands="-", + ) + return + result = parser.read_csv_check_warnings( - warn, + UserWarning, "Could not infer format", StringIO(data), parse_dates=[1],