gh-111495: improve test coverage of codecs C API (GH-126030)

For now, skip some crashers (tracked in gh-123378).
This commit is contained in:
Bénédikt Tran 2024-11-01 14:28:18 +01:00 committed by GitHub
parent 6c67446a6e
commit 32e07fd377
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -747,6 +747,49 @@ class CAPICodecs(unittest.TestCase):
class CAPICodecErrors(unittest.TestCase):
@classmethod
def _generate_exception_args(cls):
for objlen in range(5):
maxind = 2 * max(2, objlen)
for start in range(-maxind, maxind + 1):
for end in range(-maxind, maxind + 1):
yield objlen, start, end
@classmethod
def generate_encode_errors(cls):
return tuple(
UnicodeEncodeError('utf-8', '0' * objlen, start, end, 'why')
for objlen, start, end in cls._generate_exception_args()
)
@classmethod
def generate_decode_errors(cls):
return tuple(
UnicodeDecodeError('utf-8', b'0' * objlen, start, end, 'why')
for objlen, start, end in cls._generate_exception_args()
)
@classmethod
def generate_translate_errors(cls):
return tuple(
UnicodeTranslateError('0' * objlen, start, end, 'why')
for objlen, start, end in cls._generate_exception_args()
)
@classmethod
def setUpClass(cls):
cls.unicode_encode_errors = cls.generate_encode_errors()
cls.unicode_decode_errors = cls.generate_decode_errors()
cls.unicode_translate_errors = cls.generate_translate_errors()
cls.all_unicode_errors = (
cls.unicode_encode_errors
+ cls.unicode_decode_errors
+ cls.unicode_translate_errors
)
cls.bad_unicode_errors = (
ValueError(),
)
def test_codec_register_error(self):
# for cleaning up between tests
from _codecs import _unregister_error as _codecs_unregister_error
@ -780,33 +823,82 @@ class CAPICodecErrors(unittest.TestCase):
self.assertIs(codec_lookup_error('ignore'), codecs.ignore_errors)
self.assertIs(codec_lookup_error('replace'), codecs.replace_errors)
self.assertIs(codec_lookup_error('xmlcharrefreplace'), codecs.xmlcharrefreplace_errors)
self.assertIs(codec_lookup_error('backslashreplace'), codecs.backslashreplace_errors)
self.assertIs(codec_lookup_error('namereplace'), codecs.namereplace_errors)
self.assertRaises(LookupError, codec_lookup_error, 'unknown')
def test_codec_error_handlers(self):
exceptions = [
# A UnicodeError with an empty message currently crashes:
# See: https://github.com/python/cpython/issues/123378
# UnicodeEncodeError('bad', '', 0, 1, 'reason'),
UnicodeEncodeError('bad', 'x', 0, 1, 'reason'),
UnicodeEncodeError('bad', 'xyz123', 0, 1, 'reason'),
UnicodeEncodeError('bad', 'xyz123', 1, 4, 'reason'),
]
def test_codec_strict_errors_handler(self):
handler = _testcapi.codec_strict_errors
for exc in self.all_unicode_errors + self.bad_unicode_errors:
with self.subTest(handler=handler, exc=exc):
self.assertRaises(type(exc), handler, exc)
strict_handler = _testcapi.codec_strict_errors
def test_codec_ignore_errors_handler(self):
handler = _testcapi.codec_ignore_errors
self.do_test_codec_errors_handler(handler, self.all_unicode_errors)
def test_codec_replace_errors_handler(self):
handler = _testcapi.codec_replace_errors
self.do_test_codec_errors_handler(handler, self.all_unicode_errors)
def test_codec_xmlcharrefreplace_errors_handler(self):
handler = _testcapi.codec_xmlcharrefreplace_errors
self.do_test_codec_errors_handler(handler, self.unicode_encode_errors)
def test_codec_backslashreplace_errors_handler(self):
handler = _testcapi.codec_backslashreplace_errors
self.do_test_codec_errors_handler(handler, self.all_unicode_errors)
def test_codec_namereplace_errors_handler(self):
handler = _testlimitedcapi.codec_namereplace_errors
self.do_test_codec_errors_handler(handler, self.unicode_encode_errors)
def do_test_codec_errors_handler(self, handler, exceptions):
at_least_one = False
for exc in exceptions:
with self.subTest(handler=strict_handler, exc=exc):
self.assertRaises(UnicodeEncodeError, strict_handler, exc)
# See https://github.com/python/cpython/issues/123378 and related
# discussion and issues for details.
if self._exception_may_crash(exc):
continue
for handler in [
_testcapi.codec_ignore_errors,
_testcapi.codec_replace_errors,
_testcapi.codec_xmlcharrefreplace_errors,
_testlimitedcapi.codec_namereplace_errors,
]:
for exc in exceptions:
with self.subTest(handler=handler, exc=exc):
self.assertIsInstance(handler(exc), tuple)
at_least_one = True
with self.subTest(handler=handler, exc=exc):
# test that the handler does not crash
self.assertIsInstance(handler(exc), tuple)
if exceptions:
self.assertTrue(at_least_one, "all exceptions are crashing")
for bad_exc in (
self.bad_unicode_errors
+ tuple(e for e in self.all_unicode_errors if e not in exceptions)
):
with self.subTest('bad type', handler=handler, exc=bad_exc):
self.assertRaises(TypeError, handler, bad_exc)
@classmethod
def _exception_may_crash(cls, exc):
"""Indicate whether a Unicode exception might currently crash
the interpreter when used by a built-in codecs error handler.
Until gh-123378 is fixed, we skip the tests for these exceptions.
This should only be used by "do_test_codec_errors_handler".
"""
message, start, end = exc.object, exc.start, exc.end
match exc:
case UnicodeEncodeError():
return end < start or (end - start) >= len(message)
case UnicodeDecodeError():
# The case "end - start >= len(message)" does not crash.
return end < start
case UnicodeTranslateError():
# Test "end <= start" because PyCodec_ReplaceErrors checks
# the Unicode kind of a 0-length string which by convention
# is PyUnicode_1BYTE_KIND and not PyUnicode_2BYTE_KIND as
# the handler currently expects.
return end <= start or (end - start) >= len(message)
return False
if __name__ == "__main__":