mirror of
https://github.com/python/cpython.git
synced 2025-08-27 12:16:04 +00:00
bpo-32583: Fix possible crashing in builtin Unicode decoders (#5325)
When using customized decode error handlers, it is possible for builtin decoders to write out-of-bounds and then crash.
This commit is contained in:
parent
84521047e4
commit
2c7fd46e11
3 changed files with 74 additions and 2 deletions
|
@ -4190,7 +4190,10 @@ unicode_decode_call_errorhandler_writer(
|
|||
Py_ssize_t insize;
|
||||
Py_ssize_t newpos;
|
||||
Py_ssize_t replen;
|
||||
Py_ssize_t remain;
|
||||
PyObject *inputobj = NULL;
|
||||
int need_to_grow = 0;
|
||||
const char *new_inptr;
|
||||
|
||||
if (*errorHandler == NULL) {
|
||||
*errorHandler = PyCodec_LookupError(errors);
|
||||
|
@ -4221,6 +4224,7 @@ unicode_decode_call_errorhandler_writer(
|
|||
inputobj = PyUnicodeDecodeError_GetObject(*exceptionObject);
|
||||
if (!inputobj)
|
||||
goto onError;
|
||||
remain = *inend - *input - *endinpos;
|
||||
*input = PyBytes_AS_STRING(inputobj);
|
||||
insize = PyBytes_GET_SIZE(inputobj);
|
||||
*inend = *input + insize;
|
||||
|
@ -4238,6 +4242,19 @@ unicode_decode_call_errorhandler_writer(
|
|||
replen = PyUnicode_GET_LENGTH(repunicode);
|
||||
if (replen > 1) {
|
||||
writer->min_length += replen - 1;
|
||||
need_to_grow = 1;
|
||||
}
|
||||
new_inptr = *input + newpos;
|
||||
if (*inend - new_inptr > remain) {
|
||||
/* We don't know the decoding algorithm here so we make the worst
|
||||
assumption that one byte decodes to one unicode character.
|
||||
If unfortunately one byte could decode to more unicode characters,
|
||||
the decoder may write out-of-bound then. Is it possible for the
|
||||
algorithms using this function? */
|
||||
writer->min_length += *inend - new_inptr - remain;
|
||||
need_to_grow = 1;
|
||||
}
|
||||
if (need_to_grow) {
|
||||
writer->overallocate = 1;
|
||||
if (_PyUnicodeWriter_Prepare(writer, writer->min_length,
|
||||
PyUnicode_MAX_CHAR_VALUE(repunicode)) == -1)
|
||||
|
@ -4247,7 +4264,7 @@ unicode_decode_call_errorhandler_writer(
|
|||
goto onError;
|
||||
|
||||
*endinpos = newpos;
|
||||
*inptr = *input + newpos;
|
||||
*inptr = new_inptr;
|
||||
|
||||
/* we made it! */
|
||||
Py_DECREF(restuple);
|
||||
|
@ -5572,7 +5589,8 @@ PyUnicode_DecodeUTF16Stateful(const char *s,
|
|||
#endif
|
||||
|
||||
/* Note: size will always be longer than the resulting Unicode
|
||||
character count */
|
||||
character count normally. Error handler will take care of
|
||||
resizing when needed. */
|
||||
_PyUnicodeWriter_Init(&writer);
|
||||
writer.min_length = (e - q + 1) / 2;
|
||||
if (_PyUnicodeWriter_Prepare(&writer, writer.min_length, 127) == -1)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue