bpo-32583: Fix possible crashing in builtin Unicode decoders (#5325)

When using customized decode error handlers, it is possible for builtin decoders
to write out-of-bounds and then crash.
This commit is contained in:
Xiang Zhang 2018-01-31 20:48:05 +08:00 committed by GitHub
parent 84521047e4
commit 2c7fd46e11
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 74 additions and 2 deletions

View file

@ -4190,7 +4190,10 @@ unicode_decode_call_errorhandler_writer(
Py_ssize_t insize;
Py_ssize_t newpos;
Py_ssize_t replen;
Py_ssize_t remain;
PyObject *inputobj = NULL;
int need_to_grow = 0;
const char *new_inptr;
if (*errorHandler == NULL) {
*errorHandler = PyCodec_LookupError(errors);
@ -4221,6 +4224,7 @@ unicode_decode_call_errorhandler_writer(
inputobj = PyUnicodeDecodeError_GetObject(*exceptionObject);
if (!inputobj)
goto onError;
remain = *inend - *input - *endinpos;
*input = PyBytes_AS_STRING(inputobj);
insize = PyBytes_GET_SIZE(inputobj);
*inend = *input + insize;
@ -4238,6 +4242,19 @@ unicode_decode_call_errorhandler_writer(
replen = PyUnicode_GET_LENGTH(repunicode);
if (replen > 1) {
writer->min_length += replen - 1;
need_to_grow = 1;
}
new_inptr = *input + newpos;
if (*inend - new_inptr > remain) {
/* We don't know the decoding algorithm here so we make the worst
assumption that one byte decodes to one unicode character.
If unfortunately one byte could decode to more unicode characters,
the decoder may write out-of-bound then. Is it possible for the
algorithms using this function? */
writer->min_length += *inend - new_inptr - remain;
need_to_grow = 1;
}
if (need_to_grow) {
writer->overallocate = 1;
if (_PyUnicodeWriter_Prepare(writer, writer->min_length,
PyUnicode_MAX_CHAR_VALUE(repunicode)) == -1)
@ -4247,7 +4264,7 @@ unicode_decode_call_errorhandler_writer(
goto onError;
*endinpos = newpos;
*inptr = *input + newpos;
*inptr = new_inptr;
/* we made it! */
Py_DECREF(restuple);
@ -5572,7 +5589,8 @@ PyUnicode_DecodeUTF16Stateful(const char *s,
#endif
/* Note: size will always be longer than the resulting Unicode
character count */
character count normally. Error handler will take care of
resizing when needed. */
_PyUnicodeWriter_Init(&writer);
writer.min_length = (e - q + 1) / 2;
if (_PyUnicodeWriter_Prepare(&writer, writer.min_length, 127) == -1)