Skip to content

gh-101961 fileinput.hookcompressed should not set the encoding value for the binary mode #102068

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Feb 21, 2023
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions Lib/fileinput.py
Original file line number Diff line number Diff line change
Expand Up @@ -409,6 +409,8 @@ def hook_compressed(filename, mode, *, encoding=None, errors=None):
import bz2
stream = bz2.BZ2File(filename, mode)
else:
if "b" in mode:
return open(filename, mode, errors=errors)
return open(filename, mode, encoding=encoding, errors=errors)

# gzip and bz2 are binary mode by default.
Expand Down
40 changes: 28 additions & 12 deletions Lib/test/test_fileinput.py
Original file line number Diff line number Diff line change
Expand Up @@ -855,29 +855,29 @@ def setUp(self):
self.fake_open = InvocationRecorder()

def test_empty_string(self):
self.do_test_use_builtin_open("", 1)
self.do_test_use_builtin_open_text("", "r")

def test_no_ext(self):
self.do_test_use_builtin_open("abcd", 2)
self.do_test_use_builtin_open_text("abcd", "r")

@unittest.skipUnless(gzip, "Requires gzip and zlib")
def test_gz_ext_fake(self):
original_open = gzip.open
gzip.open = self.fake_open
try:
result = fileinput.hook_compressed("test.gz", "3")
result = fileinput.hook_compressed("test.gz", "r")
finally:
gzip.open = original_open

self.assertEqual(self.fake_open.invocation_count, 1)
self.assertEqual(self.fake_open.last_invocation, (("test.gz", "3"), {}))
self.assertEqual(self.fake_open.last_invocation, (("test.gz", "r"), {}))

@unittest.skipUnless(gzip, "Requires gzip and zlib")
def test_gz_with_encoding_fake(self):
original_open = gzip.open
gzip.open = lambda filename, mode: io.BytesIO(b'Ex-binary string')
try:
result = fileinput.hook_compressed("test.gz", "3", encoding="utf-8")
result = fileinput.hook_compressed("test.gz", "r", encoding="utf-8")
finally:
gzip.open = original_open
self.assertEqual(list(result), ['Ex-binary string'])
Expand All @@ -887,23 +887,40 @@ def test_bz2_ext_fake(self):
original_open = bz2.BZ2File
bz2.BZ2File = self.fake_open
try:
result = fileinput.hook_compressed("test.bz2", "4")
result = fileinput.hook_compressed("test.bz2", "r")
finally:
bz2.BZ2File = original_open

self.assertEqual(self.fake_open.invocation_count, 1)
self.assertEqual(self.fake_open.last_invocation, (("test.bz2", "4"), {}))
self.assertEqual(self.fake_open.last_invocation, (("test.bz2", "r"), {}))

def test_blah_ext(self):
self.do_test_use_builtin_open("abcd.blah", "5")
self.do_test_use_builtin_open_binary("abcd.blah", "rb")

def test_gz_ext_builtin(self):
self.do_test_use_builtin_open("abcd.Gz", "6")
self.do_test_use_builtin_open_binary("abcd.Gz", "rb")

def test_bz2_ext_builtin(self):
self.do_test_use_builtin_open("abcd.Bz2", "7")
self.do_test_use_builtin_open_binary("abcd.Bz2", "rb")

def do_test_use_builtin_open(self, filename, mode):
def test_binary_mode_encoding(self):
self.do_test_use_builtin_open_binary("abcd", "rb")

def test_text_mode_encoding(self):
self.do_test_use_builtin_open_text("abcd", "r")

def do_test_use_builtin_open_binary(self, filename, mode):
original_open = self.replace_builtin_open(self.fake_open)
try:
result = fileinput.hook_compressed(filename, mode)
finally:
self.replace_builtin_open(original_open)

self.assertEqual(self.fake_open.invocation_count, 1)
self.assertEqual(self.fake_open.last_invocation,
((filename, mode), {'errors': None}))

def do_test_use_builtin_open_text(self, filename, mode):
original_open = self.replace_builtin_open(self.fake_open)
try:
result = fileinput.hook_compressed(filename, mode)
Expand Down Expand Up @@ -980,7 +997,6 @@ def check(mode, expected_lines):
with self.assertRaises(ValueError):
check('rb', ['A\n', 'B\r\n', 'C\r', 'D\u20ac'])


Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please revert the unrelated change :)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, thank you

class MiscTest(unittest.TestCase):

def test_all(self):
Expand Down
1 change: 1 addition & 0 deletions Misc/ACKS
Original file line number Diff line number Diff line change
Expand Up @@ -1001,6 +1001,7 @@ Ilia Kurenkov
Vladimir Kushnir
Erno Kuusela
Kabir Kwatra
Gihwan Kim
Copy link
Member

@corona10 corona10 Feb 20, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It looks like you have to move up the name to the following order.

Gihwan Kim
Jan Kim
Taek Joo Kim

Ross Lagerwall
Cameron Laird
Loïc Lajeanne
Expand Down