mirror of
https://github.com/python/cpython.git
synced 2025-08-04 00:48:58 +00:00
gh-111495: improve test coverage of codecs C API (GH-126030)
For now, skip some crashers (tracked in gh-123378).
This commit is contained in:
parent
6c67446a6e
commit
32e07fd377
1 changed files with 113 additions and 21 deletions
|
@ -747,6 +747,49 @@ class CAPICodecs(unittest.TestCase):
|
|||
|
||||
class CAPICodecErrors(unittest.TestCase):
|
||||
|
||||
@classmethod
|
||||
def _generate_exception_args(cls):
|
||||
for objlen in range(5):
|
||||
maxind = 2 * max(2, objlen)
|
||||
for start in range(-maxind, maxind + 1):
|
||||
for end in range(-maxind, maxind + 1):
|
||||
yield objlen, start, end
|
||||
|
||||
@classmethod
|
||||
def generate_encode_errors(cls):
|
||||
return tuple(
|
||||
UnicodeEncodeError('utf-8', '0' * objlen, start, end, 'why')
|
||||
for objlen, start, end in cls._generate_exception_args()
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def generate_decode_errors(cls):
|
||||
return tuple(
|
||||
UnicodeDecodeError('utf-8', b'0' * objlen, start, end, 'why')
|
||||
for objlen, start, end in cls._generate_exception_args()
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def generate_translate_errors(cls):
|
||||
return tuple(
|
||||
UnicodeTranslateError('0' * objlen, start, end, 'why')
|
||||
for objlen, start, end in cls._generate_exception_args()
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def setUpClass(cls):
|
||||
cls.unicode_encode_errors = cls.generate_encode_errors()
|
||||
cls.unicode_decode_errors = cls.generate_decode_errors()
|
||||
cls.unicode_translate_errors = cls.generate_translate_errors()
|
||||
cls.all_unicode_errors = (
|
||||
cls.unicode_encode_errors
|
||||
+ cls.unicode_decode_errors
|
||||
+ cls.unicode_translate_errors
|
||||
)
|
||||
cls.bad_unicode_errors = (
|
||||
ValueError(),
|
||||
)
|
||||
|
||||
def test_codec_register_error(self):
|
||||
# for cleaning up between tests
|
||||
from _codecs import _unregister_error as _codecs_unregister_error
|
||||
|
@ -780,33 +823,82 @@ class CAPICodecErrors(unittest.TestCase):
|
|||
self.assertIs(codec_lookup_error('ignore'), codecs.ignore_errors)
|
||||
self.assertIs(codec_lookup_error('replace'), codecs.replace_errors)
|
||||
self.assertIs(codec_lookup_error('xmlcharrefreplace'), codecs.xmlcharrefreplace_errors)
|
||||
self.assertIs(codec_lookup_error('backslashreplace'), codecs.backslashreplace_errors)
|
||||
self.assertIs(codec_lookup_error('namereplace'), codecs.namereplace_errors)
|
||||
self.assertRaises(LookupError, codec_lookup_error, 'unknown')
|
||||
|
||||
def test_codec_error_handlers(self):
|
||||
exceptions = [
|
||||
# A UnicodeError with an empty message currently crashes:
|
||||
# See: https://github.com/python/cpython/issues/123378
|
||||
# UnicodeEncodeError('bad', '', 0, 1, 'reason'),
|
||||
UnicodeEncodeError('bad', 'x', 0, 1, 'reason'),
|
||||
UnicodeEncodeError('bad', 'xyz123', 0, 1, 'reason'),
|
||||
UnicodeEncodeError('bad', 'xyz123', 1, 4, 'reason'),
|
||||
]
|
||||
def test_codec_strict_errors_handler(self):
|
||||
handler = _testcapi.codec_strict_errors
|
||||
for exc in self.all_unicode_errors + self.bad_unicode_errors:
|
||||
with self.subTest(handler=handler, exc=exc):
|
||||
self.assertRaises(type(exc), handler, exc)
|
||||
|
||||
strict_handler = _testcapi.codec_strict_errors
|
||||
def test_codec_ignore_errors_handler(self):
|
||||
handler = _testcapi.codec_ignore_errors
|
||||
self.do_test_codec_errors_handler(handler, self.all_unicode_errors)
|
||||
|
||||
def test_codec_replace_errors_handler(self):
|
||||
handler = _testcapi.codec_replace_errors
|
||||
self.do_test_codec_errors_handler(handler, self.all_unicode_errors)
|
||||
|
||||
def test_codec_xmlcharrefreplace_errors_handler(self):
|
||||
handler = _testcapi.codec_xmlcharrefreplace_errors
|
||||
self.do_test_codec_errors_handler(handler, self.unicode_encode_errors)
|
||||
|
||||
def test_codec_backslashreplace_errors_handler(self):
|
||||
handler = _testcapi.codec_backslashreplace_errors
|
||||
self.do_test_codec_errors_handler(handler, self.all_unicode_errors)
|
||||
|
||||
def test_codec_namereplace_errors_handler(self):
|
||||
handler = _testlimitedcapi.codec_namereplace_errors
|
||||
self.do_test_codec_errors_handler(handler, self.unicode_encode_errors)
|
||||
|
||||
def do_test_codec_errors_handler(self, handler, exceptions):
|
||||
at_least_one = False
|
||||
for exc in exceptions:
|
||||
with self.subTest(handler=strict_handler, exc=exc):
|
||||
self.assertRaises(UnicodeEncodeError, strict_handler, exc)
|
||||
# See https://github.com/python/cpython/issues/123378 and related
|
||||
# discussion and issues for details.
|
||||
if self._exception_may_crash(exc):
|
||||
continue
|
||||
|
||||
for handler in [
|
||||
_testcapi.codec_ignore_errors,
|
||||
_testcapi.codec_replace_errors,
|
||||
_testcapi.codec_xmlcharrefreplace_errors,
|
||||
_testlimitedcapi.codec_namereplace_errors,
|
||||
]:
|
||||
for exc in exceptions:
|
||||
with self.subTest(handler=handler, exc=exc):
|
||||
self.assertIsInstance(handler(exc), tuple)
|
||||
at_least_one = True
|
||||
with self.subTest(handler=handler, exc=exc):
|
||||
# test that the handler does not crash
|
||||
self.assertIsInstance(handler(exc), tuple)
|
||||
|
||||
if exceptions:
|
||||
self.assertTrue(at_least_one, "all exceptions are crashing")
|
||||
|
||||
for bad_exc in (
|
||||
self.bad_unicode_errors
|
||||
+ tuple(e for e in self.all_unicode_errors if e not in exceptions)
|
||||
):
|
||||
with self.subTest('bad type', handler=handler, exc=bad_exc):
|
||||
self.assertRaises(TypeError, handler, bad_exc)
|
||||
|
||||
@classmethod
|
||||
def _exception_may_crash(cls, exc):
|
||||
"""Indicate whether a Unicode exception might currently crash
|
||||
the interpreter when used by a built-in codecs error handler.
|
||||
|
||||
Until gh-123378 is fixed, we skip the tests for these exceptions.
|
||||
|
||||
This should only be used by "do_test_codec_errors_handler".
|
||||
"""
|
||||
message, start, end = exc.object, exc.start, exc.end
|
||||
match exc:
|
||||
case UnicodeEncodeError():
|
||||
return end < start or (end - start) >= len(message)
|
||||
case UnicodeDecodeError():
|
||||
# The case "end - start >= len(message)" does not crash.
|
||||
return end < start
|
||||
case UnicodeTranslateError():
|
||||
# Test "end <= start" because PyCodec_ReplaceErrors checks
|
||||
# the Unicode kind of a 0-length string which by convention
|
||||
# is PyUnicode_1BYTE_KIND and not PyUnicode_2BYTE_KIND as
|
||||
# the handler currently expects.
|
||||
return end <= start or (end - start) >= len(message)
|
||||
return False
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue