mirror of
https://github.com/python/cpython.git
synced 2025-09-26 18:29:57 +00:00
gh-111495: improve test coverage of codecs C API (GH-126030)
For now, skip some crashers (tracked in gh-123378).
This commit is contained in:
parent
6c67446a6e
commit
32e07fd377
1 changed files with 113 additions and 21 deletions
|
@ -747,6 +747,49 @@ class CAPICodecs(unittest.TestCase):
|
||||||
|
|
||||||
class CAPICodecErrors(unittest.TestCase):
|
class CAPICodecErrors(unittest.TestCase):
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def _generate_exception_args(cls):
|
||||||
|
for objlen in range(5):
|
||||||
|
maxind = 2 * max(2, objlen)
|
||||||
|
for start in range(-maxind, maxind + 1):
|
||||||
|
for end in range(-maxind, maxind + 1):
|
||||||
|
yield objlen, start, end
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def generate_encode_errors(cls):
|
||||||
|
return tuple(
|
||||||
|
UnicodeEncodeError('utf-8', '0' * objlen, start, end, 'why')
|
||||||
|
for objlen, start, end in cls._generate_exception_args()
|
||||||
|
)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def generate_decode_errors(cls):
|
||||||
|
return tuple(
|
||||||
|
UnicodeDecodeError('utf-8', b'0' * objlen, start, end, 'why')
|
||||||
|
for objlen, start, end in cls._generate_exception_args()
|
||||||
|
)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def generate_translate_errors(cls):
|
||||||
|
return tuple(
|
||||||
|
UnicodeTranslateError('0' * objlen, start, end, 'why')
|
||||||
|
for objlen, start, end in cls._generate_exception_args()
|
||||||
|
)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def setUpClass(cls):
|
||||||
|
cls.unicode_encode_errors = cls.generate_encode_errors()
|
||||||
|
cls.unicode_decode_errors = cls.generate_decode_errors()
|
||||||
|
cls.unicode_translate_errors = cls.generate_translate_errors()
|
||||||
|
cls.all_unicode_errors = (
|
||||||
|
cls.unicode_encode_errors
|
||||||
|
+ cls.unicode_decode_errors
|
||||||
|
+ cls.unicode_translate_errors
|
||||||
|
)
|
||||||
|
cls.bad_unicode_errors = (
|
||||||
|
ValueError(),
|
||||||
|
)
|
||||||
|
|
||||||
def test_codec_register_error(self):
|
def test_codec_register_error(self):
|
||||||
# for cleaning up between tests
|
# for cleaning up between tests
|
||||||
from _codecs import _unregister_error as _codecs_unregister_error
|
from _codecs import _unregister_error as _codecs_unregister_error
|
||||||
|
@ -780,34 +823,83 @@ class CAPICodecErrors(unittest.TestCase):
|
||||||
self.assertIs(codec_lookup_error('ignore'), codecs.ignore_errors)
|
self.assertIs(codec_lookup_error('ignore'), codecs.ignore_errors)
|
||||||
self.assertIs(codec_lookup_error('replace'), codecs.replace_errors)
|
self.assertIs(codec_lookup_error('replace'), codecs.replace_errors)
|
||||||
self.assertIs(codec_lookup_error('xmlcharrefreplace'), codecs.xmlcharrefreplace_errors)
|
self.assertIs(codec_lookup_error('xmlcharrefreplace'), codecs.xmlcharrefreplace_errors)
|
||||||
|
self.assertIs(codec_lookup_error('backslashreplace'), codecs.backslashreplace_errors)
|
||||||
self.assertIs(codec_lookup_error('namereplace'), codecs.namereplace_errors)
|
self.assertIs(codec_lookup_error('namereplace'), codecs.namereplace_errors)
|
||||||
self.assertRaises(LookupError, codec_lookup_error, 'unknown')
|
self.assertRaises(LookupError, codec_lookup_error, 'unknown')
|
||||||
|
|
||||||
def test_codec_error_handlers(self):
|
def test_codec_strict_errors_handler(self):
|
||||||
exceptions = [
|
handler = _testcapi.codec_strict_errors
|
||||||
# A UnicodeError with an empty message currently crashes:
|
for exc in self.all_unicode_errors + self.bad_unicode_errors:
|
||||||
# See: https://github.com/python/cpython/issues/123378
|
|
||||||
# UnicodeEncodeError('bad', '', 0, 1, 'reason'),
|
|
||||||
UnicodeEncodeError('bad', 'x', 0, 1, 'reason'),
|
|
||||||
UnicodeEncodeError('bad', 'xyz123', 0, 1, 'reason'),
|
|
||||||
UnicodeEncodeError('bad', 'xyz123', 1, 4, 'reason'),
|
|
||||||
]
|
|
||||||
|
|
||||||
strict_handler = _testcapi.codec_strict_errors
|
|
||||||
for exc in exceptions:
|
|
||||||
with self.subTest(handler=strict_handler, exc=exc):
|
|
||||||
self.assertRaises(UnicodeEncodeError, strict_handler, exc)
|
|
||||||
|
|
||||||
for handler in [
|
|
||||||
_testcapi.codec_ignore_errors,
|
|
||||||
_testcapi.codec_replace_errors,
|
|
||||||
_testcapi.codec_xmlcharrefreplace_errors,
|
|
||||||
_testlimitedcapi.codec_namereplace_errors,
|
|
||||||
]:
|
|
||||||
for exc in exceptions:
|
|
||||||
with self.subTest(handler=handler, exc=exc):
|
with self.subTest(handler=handler, exc=exc):
|
||||||
|
self.assertRaises(type(exc), handler, exc)
|
||||||
|
|
||||||
|
def test_codec_ignore_errors_handler(self):
|
||||||
|
handler = _testcapi.codec_ignore_errors
|
||||||
|
self.do_test_codec_errors_handler(handler, self.all_unicode_errors)
|
||||||
|
|
||||||
|
def test_codec_replace_errors_handler(self):
|
||||||
|
handler = _testcapi.codec_replace_errors
|
||||||
|
self.do_test_codec_errors_handler(handler, self.all_unicode_errors)
|
||||||
|
|
||||||
|
def test_codec_xmlcharrefreplace_errors_handler(self):
|
||||||
|
handler = _testcapi.codec_xmlcharrefreplace_errors
|
||||||
|
self.do_test_codec_errors_handler(handler, self.unicode_encode_errors)
|
||||||
|
|
||||||
|
def test_codec_backslashreplace_errors_handler(self):
|
||||||
|
handler = _testcapi.codec_backslashreplace_errors
|
||||||
|
self.do_test_codec_errors_handler(handler, self.all_unicode_errors)
|
||||||
|
|
||||||
|
def test_codec_namereplace_errors_handler(self):
|
||||||
|
handler = _testlimitedcapi.codec_namereplace_errors
|
||||||
|
self.do_test_codec_errors_handler(handler, self.unicode_encode_errors)
|
||||||
|
|
||||||
|
def do_test_codec_errors_handler(self, handler, exceptions):
|
||||||
|
at_least_one = False
|
||||||
|
for exc in exceptions:
|
||||||
|
# See https://github.com/python/cpython/issues/123378 and related
|
||||||
|
# discussion and issues for details.
|
||||||
|
if self._exception_may_crash(exc):
|
||||||
|
continue
|
||||||
|
|
||||||
|
at_least_one = True
|
||||||
|
with self.subTest(handler=handler, exc=exc):
|
||||||
|
# test that the handler does not crash
|
||||||
self.assertIsInstance(handler(exc), tuple)
|
self.assertIsInstance(handler(exc), tuple)
|
||||||
|
|
||||||
|
if exceptions:
|
||||||
|
self.assertTrue(at_least_one, "all exceptions are crashing")
|
||||||
|
|
||||||
|
for bad_exc in (
|
||||||
|
self.bad_unicode_errors
|
||||||
|
+ tuple(e for e in self.all_unicode_errors if e not in exceptions)
|
||||||
|
):
|
||||||
|
with self.subTest('bad type', handler=handler, exc=bad_exc):
|
||||||
|
self.assertRaises(TypeError, handler, bad_exc)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def _exception_may_crash(cls, exc):
|
||||||
|
"""Indicate whether a Unicode exception might currently crash
|
||||||
|
the interpreter when used by a built-in codecs error handler.
|
||||||
|
|
||||||
|
Until gh-123378 is fixed, we skip the tests for these exceptions.
|
||||||
|
|
||||||
|
This should only be used by "do_test_codec_errors_handler".
|
||||||
|
"""
|
||||||
|
message, start, end = exc.object, exc.start, exc.end
|
||||||
|
match exc:
|
||||||
|
case UnicodeEncodeError():
|
||||||
|
return end < start or (end - start) >= len(message)
|
||||||
|
case UnicodeDecodeError():
|
||||||
|
# The case "end - start >= len(message)" does not crash.
|
||||||
|
return end < start
|
||||||
|
case UnicodeTranslateError():
|
||||||
|
# Test "end <= start" because PyCodec_ReplaceErrors checks
|
||||||
|
# the Unicode kind of a 0-length string which by convention
|
||||||
|
# is PyUnicode_1BYTE_KIND and not PyUnicode_2BYTE_KIND as
|
||||||
|
# the handler currently expects.
|
||||||
|
return end <= start or (end - start) >= len(message)
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue