mirror of
https://github.com/python/cpython.git
synced 2025-08-02 16:13:13 +00:00
[3.13] gh-133767: Fix use-after-free in the unicode-escape decoder with an error handler (GH-129648) (GH-133944)
If the error handler is used, a new bytes object is created to set as
the object attribute of UnicodeDecodeError, and that bytes object then
replaces the original data. A pointer to the decoded data will became invalid
after destroying that temporary bytes object. So we need other way to return
the first invalid escape from _PyUnicode_DecodeUnicodeEscapeInternal().
_PyBytes_DecodeEscape() does not have such issue, because it does not
use the error handlers registry, but it should be changed for compatibility
with _PyUnicode_DecodeUnicodeEscapeInternal().
(cherry picked from commit 9f69a58623
)
Co-authored-by: Serhiy Storchaka <storchaka@gmail.com>
This commit is contained in:
parent
0c0fedf289
commit
6279eb8c07
8 changed files with 194 additions and 57 deletions
|
@ -1,6 +1,7 @@
|
|||
import codecs
|
||||
import html.entities
|
||||
import itertools
|
||||
import re
|
||||
import sys
|
||||
import unicodedata
|
||||
import unittest
|
||||
|
@ -1124,7 +1125,7 @@ class CodecCallbackTest(unittest.TestCase):
|
|||
text = 'abc<def>ghi'*n
|
||||
text.translate(charmap)
|
||||
|
||||
def test_mutatingdecodehandler(self):
|
||||
def test_mutating_decode_handler(self):
|
||||
baddata = [
|
||||
("ascii", b"\xff"),
|
||||
("utf-7", b"++"),
|
||||
|
@ -1159,6 +1160,42 @@ class CodecCallbackTest(unittest.TestCase):
|
|||
for (encoding, data) in baddata:
|
||||
self.assertEqual(data.decode(encoding, "test.mutating"), "\u4242")
|
||||
|
||||
def test_mutating_decode_handler_unicode_escape(self):
|
||||
decode = codecs.unicode_escape_decode
|
||||
def mutating(exc):
|
||||
if isinstance(exc, UnicodeDecodeError):
|
||||
r = data.get(exc.object[:exc.end])
|
||||
if r is not None:
|
||||
exc.object = r[0] + exc.object[exc.end:]
|
||||
return ('\u0404', r[1])
|
||||
raise AssertionError("don't know how to handle %r" % exc)
|
||||
|
||||
codecs.register_error('test.mutating2', mutating)
|
||||
data = {
|
||||
br'\x0': (b'\\', 0),
|
||||
br'\x3': (b'xxx\\', 3),
|
||||
br'\x5': (b'x\\', 1),
|
||||
}
|
||||
def check(input, expected, msg):
|
||||
with self.assertWarns(DeprecationWarning) as cm:
|
||||
self.assertEqual(decode(input, 'test.mutating2'), (expected, len(input)))
|
||||
self.assertIn(msg, str(cm.warning))
|
||||
|
||||
check(br'\x0n\z', '\u0404\n\\z', r"invalid escape sequence '\z'")
|
||||
check(br'\x0n\501', '\u0404\n\u0141', r"invalid octal escape sequence '\501'")
|
||||
check(br'\x0z', '\u0404\\z', r"invalid escape sequence '\z'")
|
||||
|
||||
check(br'\x3n\zr', '\u0404\n\\zr', r"invalid escape sequence '\z'")
|
||||
check(br'\x3zr', '\u0404\\zr', r"invalid escape sequence '\z'")
|
||||
check(br'\x3z5', '\u0404\\z5', r"invalid escape sequence '\z'")
|
||||
check(memoryview(br'\x3z5x')[:-1], '\u0404\\z5', r"invalid escape sequence '\z'")
|
||||
check(memoryview(br'\x3z5xy')[:-2], '\u0404\\z5', r"invalid escape sequence '\z'")
|
||||
|
||||
check(br'\x5n\z', '\u0404\n\\z', r"invalid escape sequence '\z'")
|
||||
check(br'\x5n\501', '\u0404\n\u0141', r"invalid octal escape sequence '\501'")
|
||||
check(br'\x5z', '\u0404\\z', r"invalid escape sequence '\z'")
|
||||
check(memoryview(br'\x5zy')[:-1], '\u0404\\z', r"invalid escape sequence '\z'")
|
||||
|
||||
# issue32583
|
||||
def test_crashing_decode_handler(self):
|
||||
# better generating one more character to fill the extra space slot
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue