Skip to content

gh-126004: remove redundant safeguards for codecs handlers tests #127680

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Jan 23, 2025
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 13 additions & 42 deletions Lib/test/test_capi/test_codecs.py
Original file line number Diff line number Diff line change
Expand Up @@ -839,38 +839,33 @@ def test_codec_ignore_errors_handler(self):

def test_codec_replace_errors_handler(self):
handler = _testcapi.codec_replace_errors
self.do_test_codec_errors_handler(handler, self.all_unicode_errors,
safe=True)
self.do_test_codec_errors_handler(handler, self.all_unicode_errors)

def test_codec_xmlcharrefreplace_errors_handler(self):
handler = _testcapi.codec_xmlcharrefreplace_errors
self.do_test_codec_errors_handler(handler, self.unicode_encode_errors,
safe=True)
self.do_test_codec_errors_handler(handler, self.unicode_encode_errors)

def test_codec_backslashreplace_errors_handler(self):
handler = _testcapi.codec_backslashreplace_errors
self.do_test_codec_errors_handler(handler, self.all_unicode_errors,
safe=True)
self.do_test_codec_errors_handler(handler, self.all_unicode_errors)

def test_codec_namereplace_errors_handler(self):
handler = _testlimitedcapi.codec_namereplace_errors
self.do_test_codec_errors_handler(handler, self.unicode_encode_errors)

def do_test_codec_errors_handler(self, handler, exceptions, *, safe=False):
at_least_one = False
def do_test_codec_errors_handler(self, handler, exceptions):
self.assertNotEqual(len(exceptions), 0)
for exc in exceptions:
# See https://github.com/python/cpython/issues/123378 and related
# discussion and issues for details.
if not safe and self._exception_may_crash(exc):
continue

at_least_one = True
with self.subTest(handler=handler, exc=exc):
# test that the handler does not crash
self.assertIsInstance(handler(exc), tuple)

if exceptions:
self.assertTrue(at_least_one, "all exceptions are crashing")
res = handler(exc)
self.assertIsInstance(res, tuple)
self.assertEqual(len(res), 2)
replacement, continue_from = res
self.assertIsInstance(replacement, str)
self.assertIsInstance(continue_from, int)
self.assertGreaterEqual(continue_from, 0)
self.assertLessEqual(continue_from, len(exc.object))

for bad_exc in (
self.bad_unicode_errors
Expand All @@ -879,30 +874,6 @@ def do_test_codec_errors_handler(self, handler, exceptions, *, safe=False):
with self.subTest('bad type', handler=handler, exc=bad_exc):
self.assertRaises(TypeError, handler, bad_exc)

@classmethod
def _exception_may_crash(cls, exc):
"""Indicate whether a Unicode exception might currently crash
the interpreter when used by a built-in codecs error handler.

Until gh-123378 is fixed, we skip the tests for these exceptions.

This should only be used by "do_test_codec_errors_handler".
"""
message, start, end = exc.object, exc.start, exc.end
match exc:
case UnicodeEncodeError():
return end < start or (end - start) >= len(message)
case UnicodeDecodeError():
# The case "end - start >= len(message)" does not crash.
return end < start
case UnicodeTranslateError():
# Test "end <= start" because PyCodec_ReplaceErrors checks
# the Unicode kind of a 0-length string which by convention
# is PyUnicode_1BYTE_KIND and not PyUnicode_2BYTE_KIND as
# the handler currently expects.
return end <= start or (end - start) >= len(message)
return False


if __name__ == "__main__":
unittest.main()
Loading