mirror of
https://github.com/python/cpython.git
synced 2025-07-19 09:15:34 +00:00
Bytes (which are the input for decoding) are mutable now. If a decoding
error callback changes the bytes object in the exception the decoder might use memory that's no longer in use. Change unicode_decode_call_errorhandler() so that it fetches the adresses of the bytes array (start and end) from the exception object and passes them back to the caller.
This commit is contained in:
parent
2dbde5ea44
commit
e78178e2c0
2 changed files with 68 additions and 19 deletions
|
@ -806,6 +806,39 @@ class CodecCallbackTest(unittest.TestCase):
|
||||||
text = 'abc<def>ghi'*n
|
text = 'abc<def>ghi'*n
|
||||||
text.translate(charmap)
|
text.translate(charmap)
|
||||||
|
|
||||||
|
def test_mutatingdecodehandler(self):
|
||||||
|
baddata = [
|
||||||
|
("ascii", b"\xff"),
|
||||||
|
("utf-7", b"++"),
|
||||||
|
("utf-8", b"\xff"),
|
||||||
|
("utf-16", b"\xff"),
|
||||||
|
("unicode-escape", b"\\u123g"),
|
||||||
|
("raw-unicode-escape", b"\\u123g"),
|
||||||
|
("unicode-internal", b"\xff"),
|
||||||
|
]
|
||||||
|
|
||||||
|
def replacing(exc):
|
||||||
|
if isinstance(exc, UnicodeDecodeError):
|
||||||
|
exc.object = 42
|
||||||
|
return ("\u4242", 0)
|
||||||
|
else:
|
||||||
|
raise TypeError("don't know how to handle %r" % exc)
|
||||||
|
codecs.register_error("test.replacing", replacing)
|
||||||
|
for (encoding, data) in baddata:
|
||||||
|
self.assertRaises(TypeError, data.decode, encoding, "test.replacing")
|
||||||
|
|
||||||
|
def mutating(exc):
|
||||||
|
if isinstance(exc, UnicodeDecodeError):
|
||||||
|
exc.object[:] = b""
|
||||||
|
return ("\u4242", 0)
|
||||||
|
else:
|
||||||
|
raise TypeError("don't know how to handle %r" % exc)
|
||||||
|
codecs.register_error("test.mutating", mutating)
|
||||||
|
# If the decoder doesn't pick up the modified input the following
|
||||||
|
# will lead to an endless loop
|
||||||
|
for (encoding, data) in baddata:
|
||||||
|
self.assertRaises(TypeError, data.decode, encoding, "test.replacing")
|
||||||
|
|
||||||
def test_main():
|
def test_main():
|
||||||
test.test_support.run_unittest(CodecCallbackTest)
|
test.test_support.run_unittest(CodecCallbackTest)
|
||||||
|
|
||||||
|
|
|
@ -1269,7 +1269,7 @@ int PyUnicode_SetDefaultEncoding(const char *encoding)
|
||||||
static
|
static
|
||||||
int unicode_decode_call_errorhandler(const char *errors, PyObject **errorHandler,
|
int unicode_decode_call_errorhandler(const char *errors, PyObject **errorHandler,
|
||||||
const char *encoding, const char *reason,
|
const char *encoding, const char *reason,
|
||||||
const char *input, Py_ssize_t insize, Py_ssize_t *startinpos, Py_ssize_t *endinpos, PyObject **exceptionObject, const char **inptr,
|
const char **input, const char **inend, Py_ssize_t *startinpos, Py_ssize_t *endinpos, PyObject **exceptionObject, const char **inptr,
|
||||||
PyObject **output, Py_ssize_t *outpos, Py_UNICODE **outptr)
|
PyObject **output, Py_ssize_t *outpos, Py_UNICODE **outptr)
|
||||||
{
|
{
|
||||||
static char *argparse = "O!n;decoding error handler must return (unicode, int) tuple";
|
static char *argparse = "O!n;decoding error handler must return (unicode, int) tuple";
|
||||||
|
@ -1277,9 +1277,11 @@ int unicode_decode_call_errorhandler(const char *errors, PyObject **errorHandler
|
||||||
PyObject *restuple = NULL;
|
PyObject *restuple = NULL;
|
||||||
PyObject *repunicode = NULL;
|
PyObject *repunicode = NULL;
|
||||||
Py_ssize_t outsize = PyUnicode_GET_SIZE(*output);
|
Py_ssize_t outsize = PyUnicode_GET_SIZE(*output);
|
||||||
|
Py_ssize_t insize;
|
||||||
Py_ssize_t requiredsize;
|
Py_ssize_t requiredsize;
|
||||||
Py_ssize_t newpos;
|
Py_ssize_t newpos;
|
||||||
Py_UNICODE *repptr;
|
Py_UNICODE *repptr;
|
||||||
|
PyObject *inputobj = NULL;
|
||||||
Py_ssize_t repsize;
|
Py_ssize_t repsize;
|
||||||
int res = -1;
|
int res = -1;
|
||||||
|
|
||||||
|
@ -1291,7 +1293,7 @@ int unicode_decode_call_errorhandler(const char *errors, PyObject **errorHandler
|
||||||
|
|
||||||
if (*exceptionObject == NULL) {
|
if (*exceptionObject == NULL) {
|
||||||
*exceptionObject = PyUnicodeDecodeError_Create(
|
*exceptionObject = PyUnicodeDecodeError_Create(
|
||||||
encoding, input, insize, *startinpos, *endinpos, reason);
|
encoding, *input, *inend-*input, *startinpos, *endinpos, reason);
|
||||||
if (*exceptionObject == NULL)
|
if (*exceptionObject == NULL)
|
||||||
goto onError;
|
goto onError;
|
||||||
}
|
}
|
||||||
|
@ -1313,6 +1315,19 @@ int unicode_decode_call_errorhandler(const char *errors, PyObject **errorHandler
|
||||||
}
|
}
|
||||||
if (!PyArg_ParseTuple(restuple, argparse, &PyUnicode_Type, &repunicode, &newpos))
|
if (!PyArg_ParseTuple(restuple, argparse, &PyUnicode_Type, &repunicode, &newpos))
|
||||||
goto onError;
|
goto onError;
|
||||||
|
|
||||||
|
/* Copy back the bytes variables, which might have been modified by the
|
||||||
|
callback */
|
||||||
|
inputobj = PyUnicodeDecodeError_GetObject(*exceptionObject);
|
||||||
|
if (!inputobj)
|
||||||
|
goto onError;
|
||||||
|
if (!PyBytes_Check(inputobj)) {
|
||||||
|
PyErr_Format(PyExc_TypeError, "exception attribute object must be bytes");
|
||||||
|
}
|
||||||
|
*input = PyBytes_AS_STRING(inputobj);
|
||||||
|
insize = PyBytes_GET_SIZE(inputobj);
|
||||||
|
*inend = *input + insize;
|
||||||
|
|
||||||
if (newpos<0)
|
if (newpos<0)
|
||||||
newpos = insize+newpos;
|
newpos = insize+newpos;
|
||||||
if (newpos<0 || newpos>insize) {
|
if (newpos<0 || newpos>insize) {
|
||||||
|
@ -1335,10 +1350,11 @@ int unicode_decode_call_errorhandler(const char *errors, PyObject **errorHandler
|
||||||
*outptr = PyUnicode_AS_UNICODE(*output) + *outpos;
|
*outptr = PyUnicode_AS_UNICODE(*output) + *outpos;
|
||||||
}
|
}
|
||||||
*endinpos = newpos;
|
*endinpos = newpos;
|
||||||
*inptr = input + newpos;
|
*inptr = *input + newpos;
|
||||||
Py_UNICODE_COPY(*outptr, repptr, repsize);
|
Py_UNICODE_COPY(*outptr, repptr, repsize);
|
||||||
*outptr += repsize;
|
*outptr += repsize;
|
||||||
*outpos += repsize;
|
*outpos += repsize;
|
||||||
|
|
||||||
/* we made it! */
|
/* we made it! */
|
||||||
res = 0;
|
res = 0;
|
||||||
|
|
||||||
|
@ -1503,7 +1519,7 @@ PyObject *PyUnicode_DecodeUTF7(const char *s,
|
||||||
else if (SPECIAL(ch,0,0)) {
|
else if (SPECIAL(ch,0,0)) {
|
||||||
errmsg = "unexpected special character";
|
errmsg = "unexpected special character";
|
||||||
s++;
|
s++;
|
||||||
goto utf7Error;
|
goto utf7Error;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
*p++ = ch;
|
*p++ = ch;
|
||||||
|
@ -1516,7 +1532,7 @@ PyObject *PyUnicode_DecodeUTF7(const char *s,
|
||||||
if (unicode_decode_call_errorhandler(
|
if (unicode_decode_call_errorhandler(
|
||||||
errors, &errorHandler,
|
errors, &errorHandler,
|
||||||
"utf7", errmsg,
|
"utf7", errmsg,
|
||||||
starts, size, &startinpos, &endinpos, &exc, &s,
|
&starts, &e, &startinpos, &endinpos, &exc, &s,
|
||||||
(PyObject **)&unicode, &outpos, &p))
|
(PyObject **)&unicode, &outpos, &p))
|
||||||
goto onError;
|
goto onError;
|
||||||
}
|
}
|
||||||
|
@ -1527,7 +1543,7 @@ PyObject *PyUnicode_DecodeUTF7(const char *s,
|
||||||
if (unicode_decode_call_errorhandler(
|
if (unicode_decode_call_errorhandler(
|
||||||
errors, &errorHandler,
|
errors, &errorHandler,
|
||||||
"utf7", "unterminated shift sequence",
|
"utf7", "unterminated shift sequence",
|
||||||
starts, size, &startinpos, &endinpos, &exc, &s,
|
&starts, &e, &startinpos, &endinpos, &exc, &s,
|
||||||
(PyObject **)&unicode, &outpos, &p))
|
(PyObject **)&unicode, &outpos, &p))
|
||||||
goto onError;
|
goto onError;
|
||||||
if (s < e)
|
if (s < e)
|
||||||
|
@ -1848,7 +1864,7 @@ PyObject *PyUnicode_DecodeUTF8Stateful(const char *s,
|
||||||
if (unicode_decode_call_errorhandler(
|
if (unicode_decode_call_errorhandler(
|
||||||
errors, &errorHandler,
|
errors, &errorHandler,
|
||||||
"utf8", errmsg,
|
"utf8", errmsg,
|
||||||
starts, size, &startinpos, &endinpos, &exc, &s,
|
&starts, &e, &startinpos, &endinpos, &exc, &s,
|
||||||
(PyObject **)&unicode, &outpos, &p))
|
(PyObject **)&unicode, &outpos, &p))
|
||||||
goto onError;
|
goto onError;
|
||||||
}
|
}
|
||||||
|
@ -2132,7 +2148,7 @@ PyUnicode_DecodeUTF16Stateful(const char *s,
|
||||||
if (unicode_decode_call_errorhandler(
|
if (unicode_decode_call_errorhandler(
|
||||||
errors, &errorHandler,
|
errors, &errorHandler,
|
||||||
"utf16", errmsg,
|
"utf16", errmsg,
|
||||||
starts, size, &startinpos, &endinpos, &exc, (const char **)&q,
|
&starts, (const char **)&e, &startinpos, &endinpos, &exc, (const char **)&q,
|
||||||
(PyObject **)&unicode, &outpos, &p))
|
(PyObject **)&unicode, &outpos, &p))
|
||||||
goto onError;
|
goto onError;
|
||||||
}
|
}
|
||||||
|
@ -2342,7 +2358,7 @@ PyObject *PyUnicode_DecodeUnicodeEscape(const char *s,
|
||||||
if (unicode_decode_call_errorhandler(
|
if (unicode_decode_call_errorhandler(
|
||||||
errors, &errorHandler,
|
errors, &errorHandler,
|
||||||
"unicodeescape", "end of string in escape sequence",
|
"unicodeescape", "end of string in escape sequence",
|
||||||
starts, size, &startinpos, &endinpos, &exc, &s,
|
&starts, &end, &startinpos, &endinpos, &exc, &s,
|
||||||
(PyObject **)&v, &outpos, &p))
|
(PyObject **)&v, &outpos, &p))
|
||||||
goto onError;
|
goto onError;
|
||||||
goto nextByte;
|
goto nextByte;
|
||||||
|
@ -2354,7 +2370,7 @@ PyObject *PyUnicode_DecodeUnicodeEscape(const char *s,
|
||||||
if (unicode_decode_call_errorhandler(
|
if (unicode_decode_call_errorhandler(
|
||||||
errors, &errorHandler,
|
errors, &errorHandler,
|
||||||
"unicodeescape", message,
|
"unicodeescape", message,
|
||||||
starts, size, &startinpos, &endinpos, &exc, &s,
|
&starts, &end, &startinpos, &endinpos, &exc, &s,
|
||||||
(PyObject **)&v, &outpos, &p))
|
(PyObject **)&v, &outpos, &p))
|
||||||
goto onError;
|
goto onError;
|
||||||
goto nextByte;
|
goto nextByte;
|
||||||
|
@ -2393,7 +2409,7 @@ PyObject *PyUnicode_DecodeUnicodeEscape(const char *s,
|
||||||
if (unicode_decode_call_errorhandler(
|
if (unicode_decode_call_errorhandler(
|
||||||
errors, &errorHandler,
|
errors, &errorHandler,
|
||||||
"unicodeescape", "illegal Unicode character",
|
"unicodeescape", "illegal Unicode character",
|
||||||
starts, size, &startinpos, &endinpos, &exc, &s,
|
&starts, &end, &startinpos, &endinpos, &exc, &s,
|
||||||
(PyObject **)&v, &outpos, &p))
|
(PyObject **)&v, &outpos, &p))
|
||||||
goto onError;
|
goto onError;
|
||||||
}
|
}
|
||||||
|
@ -2435,7 +2451,7 @@ PyObject *PyUnicode_DecodeUnicodeEscape(const char *s,
|
||||||
if (unicode_decode_call_errorhandler(
|
if (unicode_decode_call_errorhandler(
|
||||||
errors, &errorHandler,
|
errors, &errorHandler,
|
||||||
"unicodeescape", message,
|
"unicodeescape", message,
|
||||||
starts, size, &startinpos, &endinpos, &exc, &s,
|
&starts, &end, &startinpos, &endinpos, &exc, &s,
|
||||||
(PyObject **)&v, &outpos, &p))
|
(PyObject **)&v, &outpos, &p))
|
||||||
goto onError;
|
goto onError;
|
||||||
break;
|
break;
|
||||||
|
@ -2449,7 +2465,7 @@ PyObject *PyUnicode_DecodeUnicodeEscape(const char *s,
|
||||||
if (unicode_decode_call_errorhandler(
|
if (unicode_decode_call_errorhandler(
|
||||||
errors, &errorHandler,
|
errors, &errorHandler,
|
||||||
"unicodeescape", message,
|
"unicodeescape", message,
|
||||||
starts, size, &startinpos, &endinpos, &exc, &s,
|
&starts, &end, &startinpos, &endinpos, &exc, &s,
|
||||||
(PyObject **)&v, &outpos, &p))
|
(PyObject **)&v, &outpos, &p))
|
||||||
goto onError;
|
goto onError;
|
||||||
}
|
}
|
||||||
|
@ -2728,7 +2744,7 @@ PyObject *PyUnicode_DecodeRawUnicodeEscape(const char *s,
|
||||||
if (unicode_decode_call_errorhandler(
|
if (unicode_decode_call_errorhandler(
|
||||||
errors, &errorHandler,
|
errors, &errorHandler,
|
||||||
"rawunicodeescape", "truncated \\uXXXX",
|
"rawunicodeescape", "truncated \\uXXXX",
|
||||||
starts, size, &startinpos, &endinpos, &exc, &s,
|
&starts, &end, &startinpos, &endinpos, &exc, &s,
|
||||||
(PyObject **)&v, &outpos, &p))
|
(PyObject **)&v, &outpos, &p))
|
||||||
goto onError;
|
goto onError;
|
||||||
goto nextByte;
|
goto nextByte;
|
||||||
|
@ -2746,7 +2762,7 @@ PyObject *PyUnicode_DecodeRawUnicodeEscape(const char *s,
|
||||||
if (unicode_decode_call_errorhandler(
|
if (unicode_decode_call_errorhandler(
|
||||||
errors, &errorHandler,
|
errors, &errorHandler,
|
||||||
"rawunicodeescape", "\\Uxxxxxxxx out of range",
|
"rawunicodeescape", "\\Uxxxxxxxx out of range",
|
||||||
starts, size, &startinpos, &endinpos, &exc, &s,
|
&starts, &end, &startinpos, &endinpos, &exc, &s,
|
||||||
(PyObject **)&v, &outpos, &p))
|
(PyObject **)&v, &outpos, &p))
|
||||||
goto onError;
|
goto onError;
|
||||||
}
|
}
|
||||||
|
@ -2897,7 +2913,7 @@ PyObject *_PyUnicode_DecodeUnicodeInternal(const char *s,
|
||||||
if (unicode_decode_call_errorhandler(
|
if (unicode_decode_call_errorhandler(
|
||||||
errors, &errorHandler,
|
errors, &errorHandler,
|
||||||
"unicode_internal", reason,
|
"unicode_internal", reason,
|
||||||
starts, size, &startinpos, &endinpos, &exc, &s,
|
&starts, &end, &startinpos, &endinpos, &exc, &s,
|
||||||
(PyObject **)&v, &outpos, &p)) {
|
(PyObject **)&v, &outpos, &p)) {
|
||||||
goto onError;
|
goto onError;
|
||||||
}
|
}
|
||||||
|
@ -3277,7 +3293,7 @@ PyObject *PyUnicode_DecodeASCII(const char *s,
|
||||||
if (unicode_decode_call_errorhandler(
|
if (unicode_decode_call_errorhandler(
|
||||||
errors, &errorHandler,
|
errors, &errorHandler,
|
||||||
"ascii", "ordinal not in range(128)",
|
"ascii", "ordinal not in range(128)",
|
||||||
starts, size, &startinpos, &endinpos, &exc, &s,
|
&starts, &e, &startinpos, &endinpos, &exc, &s,
|
||||||
(PyObject **)&v, &outpos, &p))
|
(PyObject **)&v, &outpos, &p))
|
||||||
goto onError;
|
goto onError;
|
||||||
}
|
}
|
||||||
|
@ -3578,7 +3594,7 @@ PyObject *PyUnicode_DecodeCharmap(const char *s,
|
||||||
if (unicode_decode_call_errorhandler(
|
if (unicode_decode_call_errorhandler(
|
||||||
errors, &errorHandler,
|
errors, &errorHandler,
|
||||||
"charmap", "character maps to <undefined>",
|
"charmap", "character maps to <undefined>",
|
||||||
starts, size, &startinpos, &endinpos, &exc, &s,
|
&starts, &e, &startinpos, &endinpos, &exc, &s,
|
||||||
(PyObject **)&v, &outpos, &p)) {
|
(PyObject **)&v, &outpos, &p)) {
|
||||||
goto onError;
|
goto onError;
|
||||||
}
|
}
|
||||||
|
@ -3628,7 +3644,7 @@ PyObject *PyUnicode_DecodeCharmap(const char *s,
|
||||||
if (unicode_decode_call_errorhandler(
|
if (unicode_decode_call_errorhandler(
|
||||||
errors, &errorHandler,
|
errors, &errorHandler,
|
||||||
"charmap", "character maps to <undefined>",
|
"charmap", "character maps to <undefined>",
|
||||||
starts, size, &startinpos, &endinpos, &exc, &s,
|
&starts, &e, &startinpos, &endinpos, &exc, &s,
|
||||||
(PyObject **)&v, &outpos, &p)) {
|
(PyObject **)&v, &outpos, &p)) {
|
||||||
Py_DECREF(x);
|
Py_DECREF(x);
|
||||||
goto onError;
|
goto onError;
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue