Bytes (which are the input for decoding) are mutable now. If a decoding

error callback changes the bytes object in the exception the decoder might
use memory that's no longer in use. Change unicode_decode_call_errorhandler()
so that it fetches the adresses of the bytes array (start and end) from the
exception object and passes them back to the caller.
This commit is contained in:
Walter Dörwald 2007-07-30 13:31:40 +00:00
parent 2dbde5ea44
commit e78178e2c0
2 changed files with 68 additions and 19 deletions

View file

@ -806,6 +806,39 @@ class CodecCallbackTest(unittest.TestCase):
text = 'abc<def>ghi'*n
text.translate(charmap)
def test_mutatingdecodehandler(self):
baddata = [
("ascii", b"\xff"),
("utf-7", b"++"),
("utf-8", b"\xff"),
("utf-16", b"\xff"),
("unicode-escape", b"\\u123g"),
("raw-unicode-escape", b"\\u123g"),
("unicode-internal", b"\xff"),
]
def replacing(exc):
if isinstance(exc, UnicodeDecodeError):
exc.object = 42
return ("\u4242", 0)
else:
raise TypeError("don't know how to handle %r" % exc)
codecs.register_error("test.replacing", replacing)
for (encoding, data) in baddata:
self.assertRaises(TypeError, data.decode, encoding, "test.replacing")
def mutating(exc):
if isinstance(exc, UnicodeDecodeError):
exc.object[:] = b""
return ("\u4242", 0)
else:
raise TypeError("don't know how to handle %r" % exc)
codecs.register_error("test.mutating", mutating)
# If the decoder doesn't pick up the modified input the following
# will lead to an endless loop
for (encoding, data) in baddata:
self.assertRaises(TypeError, data.decode, encoding, "test.replacing")
def test_main():
test.test_support.run_unittest(CodecCallbackTest)