Skip to content

Commit

Permalink
CLN: let codecs validate the possible values of encoding and encodi…
Browse files Browse the repository at this point in the history
…ng errors (#43616)
  • Loading branch information
twoertwein authored Sep 17, 2021
1 parent 6791678 commit 72a1090
Show file tree
Hide file tree
Showing 3 changed files with 6 additions and 24 deletions.
26 changes: 4 additions & 22 deletions pandas/io/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -276,10 +276,6 @@ def _get_filepath_or_buffer(

compression = dict(compression, method=compression_method)

# uniform encoding names
if encoding is not None:
encoding = encoding.replace("_", "-").lower()

# bz2 and xz do not write the byte order mark for utf-16 and utf-32
# print a warning when writing such files
if (
Expand Down Expand Up @@ -602,25 +598,11 @@ def get_handle(
if _is_binary_mode(path_or_buf, mode) and "b" not in mode:
mode += "b"

# valdiate errors
# validate encoding and errors
if isinstance(encoding, str):
codecs.lookup(encoding)
if isinstance(errors, str):
errors = errors.lower()
if errors not in (
None,
"strict",
"ignore",
"replace",
"xmlcharrefreplace",
"backslashreplace",
"namereplace",
"surrogateescape",
"surrogatepass",
):
raise ValueError(
f"Invalid value for `encoding_errors` ({errors}). Please see "
+ "https://docs.python.org/3/library/codecs.html#error-handlers "
+ "for valid values."
)
codecs.lookup_error(errors)

# open URLs
ioargs = _get_filepath_or_buffer(
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/io/test_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -593,7 +593,7 @@ def test_encoding_errors(encoding_errors, format):
def test_bad_encdoing_errors():
# GH 39777
with tm.ensure_clean() as path:
with pytest.raises(ValueError, match="Invalid value for `encoding_errors`"):
with pytest.raises(LookupError, match="unknown error handler name"):
icom.get_handle(path, "w", errors="bad")


Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/io/xml/test_xml.py
Original file line number Diff line number Diff line change
Expand Up @@ -709,7 +709,7 @@ def test_utf16_encoding(datapath, parser):

def test_unknown_encoding(datapath, parser):
filename = datapath("io", "data", "xml", "baby_names.xml")
with pytest.raises(LookupError, match=("unknown encoding: uft-8")):
with pytest.raises(LookupError, match=("unknown encoding: UFT-8")):
read_xml(filename, encoding="UFT-8", parser=parser)


Expand Down

0 comments on commit 72a1090

Please sign in to comment.