mirror of
https://github.com/python/cpython.git
synced 2025-08-04 08:59:19 +00:00
bpo-32583: Fix possible crashing in builtin Unicode decoders (#5325)
When using customized decode error handlers, it is possible for builtin decoders to write out-of-bounds and then crash.
This commit is contained in:
parent
84521047e4
commit
2c7fd46e11
3 changed files with 74 additions and 2 deletions
|
@ -1044,6 +1044,58 @@ class CodecCallbackTest(unittest.TestCase):
|
|||
for (encoding, data) in baddata:
|
||||
self.assertEqual(data.decode(encoding, "test.mutating"), "\u4242")
|
||||
|
||||
# issue32583
|
||||
def test_crashing_decode_handler(self):
|
||||
# better generating one more character to fill the extra space slot
|
||||
# so in debug build it can steadily fail
|
||||
def forward_shorter_than_end(exc):
|
||||
if isinstance(exc, UnicodeDecodeError):
|
||||
# size one character, 0 < forward < exc.end
|
||||
return ('\ufffd', exc.start+1)
|
||||
else:
|
||||
raise TypeError("don't know how to handle %r" % exc)
|
||||
codecs.register_error(
|
||||
"test.forward_shorter_than_end", forward_shorter_than_end)
|
||||
|
||||
self.assertEqual(
|
||||
b'\xd8\xd8\xd8\xd8\xd8\x00\x00\x00'.decode(
|
||||
'utf-16-le', 'test.forward_shorter_than_end'),
|
||||
'\ufffd\ufffd\ufffd\ufffd\xd8\x00'
|
||||
)
|
||||
self.assertEqual(
|
||||
b'\xd8\xd8\xd8\xd8\x00\xd8\x00\x00'.decode(
|
||||
'utf-16-be', 'test.forward_shorter_than_end'),
|
||||
'\ufffd\ufffd\ufffd\ufffd\xd8\x00'
|
||||
)
|
||||
self.assertEqual(
|
||||
b'\x11\x11\x11\x11\x11\x00\x00\x00\x00\x00\x00'.decode(
|
||||
'utf-32-le', 'test.forward_shorter_than_end'),
|
||||
'\ufffd\ufffd\ufffd\u1111\x00'
|
||||
)
|
||||
self.assertEqual(
|
||||
b'\x11\x11\x11\x00\x00\x11\x11\x00\x00\x00\x00'.decode(
|
||||
'utf-32-be', 'test.forward_shorter_than_end'),
|
||||
'\ufffd\ufffd\ufffd\u1111\x00'
|
||||
)
|
||||
|
||||
def replace_with_long(exc):
|
||||
if isinstance(exc, UnicodeDecodeError):
|
||||
exc.object = b"\x00" * 8
|
||||
return ('\ufffd', exc.start)
|
||||
else:
|
||||
raise TypeError("don't know how to handle %r" % exc)
|
||||
codecs.register_error("test.replace_with_long", replace_with_long)
|
||||
|
||||
self.assertEqual(
|
||||
b'\x00'.decode('utf-16', 'test.replace_with_long'),
|
||||
'\ufffd\x00\x00\x00\x00'
|
||||
)
|
||||
self.assertEqual(
|
||||
b'\x00'.decode('utf-32', 'test.replace_with_long'),
|
||||
'\ufffd\x00\x00'
|
||||
)
|
||||
|
||||
|
||||
def test_fake_error_class(self):
|
||||
handlers = [
|
||||
codecs.strict_errors,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue