Skip to content

gh-111495: improve test coverage of codecs C API #126030

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Nov 1, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
138 changes: 115 additions & 23 deletions Lib/test/test_capi/test_codecs.py
Original file line number Diff line number Diff line change
Expand Up @@ -747,6 +747,49 @@ def test_codec_stream_writer(self):

class CAPICodecErrors(unittest.TestCase):

@classmethod
def _generate_exception_args(cls):
for objlen in range(5):
maxind = 2 * max(2, objlen)
for start in range(-maxind, maxind + 1):
for end in range(-maxind, maxind + 1):
yield objlen, start, end

@classmethod
def generate_encode_errors(cls):
return tuple(
UnicodeEncodeError('utf-8', '0' * objlen, start, end, 'why')
for objlen, start, end in cls._generate_exception_args()
)

@classmethod
def generate_decode_errors(cls):
return tuple(
UnicodeDecodeError('utf-8', b'0' * objlen, start, end, 'why')
for objlen, start, end in cls._generate_exception_args()
)

@classmethod
def generate_translate_errors(cls):
return tuple(
UnicodeTranslateError('0' * objlen, start, end, 'why')
for objlen, start, end in cls._generate_exception_args()
)

@classmethod
def setUpClass(cls):
cls.unicode_encode_errors = cls.generate_encode_errors()
cls.unicode_decode_errors = cls.generate_decode_errors()
cls.unicode_translate_errors = cls.generate_translate_errors()
cls.all_unicode_errors = (
cls.unicode_encode_errors
+ cls.unicode_decode_errors
+ cls.unicode_translate_errors
)
cls.bad_unicode_errors = (
ValueError(),
)

def test_codec_register_error(self):
# for cleaning up between tests
from _codecs import _unregister_error as _codecs_unregister_error
Expand Down Expand Up @@ -780,33 +823,82 @@ def test_codec_lookup_error(self):
self.assertIs(codec_lookup_error('ignore'), codecs.ignore_errors)
self.assertIs(codec_lookup_error('replace'), codecs.replace_errors)
self.assertIs(codec_lookup_error('xmlcharrefreplace'), codecs.xmlcharrefreplace_errors)
self.assertIs(codec_lookup_error('backslashreplace'), codecs.backslashreplace_errors)
self.assertIs(codec_lookup_error('namereplace'), codecs.namereplace_errors)
self.assertRaises(LookupError, codec_lookup_error, 'unknown')

def test_codec_error_handlers(self):
exceptions = [
# A UnicodeError with an empty message currently crashes:
# See: https://github.com/python/cpython/issues/123378
# UnicodeEncodeError('bad', '', 0, 1, 'reason'),
UnicodeEncodeError('bad', 'x', 0, 1, 'reason'),
UnicodeEncodeError('bad', 'xyz123', 0, 1, 'reason'),
UnicodeEncodeError('bad', 'xyz123', 1, 4, 'reason'),
]

strict_handler = _testcapi.codec_strict_errors
def test_codec_strict_errors_handler(self):
handler = _testcapi.codec_strict_errors
for exc in self.all_unicode_errors + self.bad_unicode_errors:
with self.subTest(handler=handler, exc=exc):
self.assertRaises(type(exc), handler, exc)

def test_codec_ignore_errors_handler(self):
handler = _testcapi.codec_ignore_errors
self.do_test_codec_errors_handler(handler, self.all_unicode_errors)

def test_codec_replace_errors_handler(self):
handler = _testcapi.codec_replace_errors
self.do_test_codec_errors_handler(handler, self.all_unicode_errors)

def test_codec_xmlcharrefreplace_errors_handler(self):
handler = _testcapi.codec_xmlcharrefreplace_errors
self.do_test_codec_errors_handler(handler, self.unicode_encode_errors)

def test_codec_backslashreplace_errors_handler(self):
handler = _testcapi.codec_backslashreplace_errors
self.do_test_codec_errors_handler(handler, self.all_unicode_errors)

def test_codec_namereplace_errors_handler(self):
handler = _testlimitedcapi.codec_namereplace_errors
self.do_test_codec_errors_handler(handler, self.unicode_encode_errors)

def do_test_codec_errors_handler(self, handler, exceptions):
at_least_one = False
for exc in exceptions:
with self.subTest(handler=strict_handler, exc=exc):
self.assertRaises(UnicodeEncodeError, strict_handler, exc)

for handler in [
_testcapi.codec_ignore_errors,
_testcapi.codec_replace_errors,
_testcapi.codec_xmlcharrefreplace_errors,
_testlimitedcapi.codec_namereplace_errors,
]:
for exc in exceptions:
with self.subTest(handler=handler, exc=exc):
self.assertIsInstance(handler(exc), tuple)
# See https://github.com/python/cpython/issues/123378 and related
# discussion and issues for details.
if self._exception_may_crash(exc):
continue

at_least_one = True
with self.subTest(handler=handler, exc=exc):
# test that the handler does not crash
self.assertIsInstance(handler(exc), tuple)

if exceptions:
self.assertTrue(at_least_one, "all exceptions are crashing")

for bad_exc in (
self.bad_unicode_errors
+ tuple(e for e in self.all_unicode_errors if e not in exceptions)
):
with self.subTest('bad type', handler=handler, exc=bad_exc):
self.assertRaises(TypeError, handler, bad_exc)

@classmethod
def _exception_may_crash(cls, exc):
"""Indicate whether a Unicode exception might currently crash
the interpreter when used by a built-in codecs error handler.

Until gh-123378 is fixed, we skip the tests for these exceptions.

This should only be used by "do_test_codec_errors_handler".
"""
message, start, end = exc.object, exc.start, exc.end
match exc:
case UnicodeEncodeError():
return end < start or (end - start) >= len(message)
case UnicodeDecodeError():
# The case "end - start >= len(message)" does not crash.
return end < start
case UnicodeTranslateError():
# Test "end <= start" because PyCodec_ReplaceErrors checks
# the Unicode kind of a 0-length string which by convention
# is PyUnicode_1BYTE_KIND and not PyUnicode_2BYTE_KIND as
# the handler currently expects.
return end <= start or (end - start) >= len(message)
return False


if __name__ == "__main__":
Expand Down
Loading