From a2721fd602e43128314d4efd056dae56a89197bf Mon Sep 17 00:00:00 2001 From: Rushabh Vasani Date: Mon, 3 Feb 2020 21:22:26 +0530 Subject: [PATCH] Raise error in read_csv when arguments header and prefix both are not None (#31383) Closes https://github.com/pandas-dev/pandas/issues/27394 --- doc/source/whatsnew/v1.1.0.rst | 1 + pandas/io/parsers.py | 26 +++++++++++++++----------- pandas/tests/io/parser/test_common.py | 11 +++++++++++ 3 files changed, 27 insertions(+), 11 deletions(-) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 9fdda83abe944..27b28c1c08e23 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -177,6 +177,7 @@ MultiIndex I/O ^^^ - Bug in :meth:`read_json` where integer overflow was occuring when json contains big number strings. (:issue:`30320`) +- `read_csv` will now raise a ``ValueError`` when the arguments `header` and `prefix` both are not `None`. (:issue:`27394`) - Bug in :meth:`DataFrame.to_json` was raising ``NotFoundError`` when ``path_or_buf`` was an S3 URI (:issue:`28375`) - diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index a33d81ff437bf..b38aa9770a73b 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -1399,17 +1399,21 @@ def __init__(self, kwds): "index_col must only contain row numbers " "when specifying a multi-index header" ) - - # GH 16338 - elif self.header is not None and not is_integer(self.header): - raise ValueError("header must be integer or list of integers") - - # GH 27779 - elif self.header is not None and self.header < 0: - raise ValueError( - "Passing negative integer to header is invalid. " - "For no header, use header=None instead" - ) + elif self.header is not None: + # GH 27394 + if self.prefix is not None: + raise ValueError( + "Argument prefix must be None if argument header is not None" + ) + # GH 16338 + elif not is_integer(self.header): + raise ValueError("header must be integer or list of integers") + # GH 27779 + elif self.header < 0: + raise ValueError( + "Passing negative integer to header is invalid. " + "For no header, use header=None instead" + ) self._name_processed = False diff --git a/pandas/tests/io/parser/test_common.py b/pandas/tests/io/parser/test_common.py index 6c17f40b790ac..c19056d434ec3 100644 --- a/pandas/tests/io/parser/test_common.py +++ b/pandas/tests/io/parser/test_common.py @@ -2040,6 +2040,17 @@ def test_read_csv_memory_growth_chunksize(all_parsers): pass +def test_read_csv_raises_on_header_prefix(all_parsers): + # gh-27394 + parser = all_parsers + msg = "Argument prefix must be None if argument header is not None" + + s = StringIO("0,1\n2,3") + + with pytest.raises(ValueError, match=msg): + parser.read_csv(s, header=0, prefix="_X") + + def test_read_table_equivalency_to_read_csv(all_parsers): # see gh-21948 # As of 0.25.0, read_table is undeprecated