bpo-35365: Use a wchar_t* buffer in the code page decoder. (GH-10837)

This commit is contained in:
Serhiy Storchaka 2018-12-04 10:25:50 +02:00 committed by GitHub
parent 7fc633f5a5
commit eeb719eac6
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -4059,6 +4059,21 @@ onError:
} }
#ifdef MS_WINDOWS #ifdef MS_WINDOWS
static int
widechar_resize(wchar_t **buf, Py_ssize_t *size, Py_ssize_t newsize)
{
if (newsize > *size) {
wchar_t *newbuf = *buf;
if (PyMem_Resize(newbuf, wchar_t, newsize) == NULL) {
PyErr_NoMemory();
return -1;
}
*buf = newbuf;
}
*size = newsize;
return 0;
}
/* error handling callback helper: /* error handling callback helper:
build arguments, call the callback and check the arguments, build arguments, call the callback and check the arguments,
if no exception occurred, copy the replacement to the output if no exception occurred, copy the replacement to the output
@ -4072,7 +4087,7 @@ unicode_decode_call_errorhandler_wchar(
const char *encoding, const char *reason, const char *encoding, const char *reason,
const char **input, const char **inend, Py_ssize_t *startinpos, const char **input, const char **inend, Py_ssize_t *startinpos,
Py_ssize_t *endinpos, PyObject **exceptionObject, const char **inptr, Py_ssize_t *endinpos, PyObject **exceptionObject, const char **inptr,
PyObject **output, Py_ssize_t *outpos) wchar_t **buf, Py_ssize_t *bufsize, Py_ssize_t *outpos)
{ {
static const char *argparse = "Un;decoding error handler must return (str, int) tuple"; static const char *argparse = "Un;decoding error handler must return (str, int) tuple";
@ -4086,9 +4101,6 @@ unicode_decode_call_errorhandler_wchar(
wchar_t *repwstr; wchar_t *repwstr;
Py_ssize_t repwlen; Py_ssize_t repwlen;
assert (_PyUnicode_KIND(*output) == PyUnicode_WCHAR_KIND);
outsize = _PyUnicode_WSTR_LENGTH(*output);
if (*errorHandler == NULL) { if (*errorHandler == NULL) {
*errorHandler = PyCodec_LookupError(errors); *errorHandler = PyCodec_LookupError(errors);
if (*errorHandler == NULL) if (*errorHandler == NULL)
@ -4146,13 +4158,15 @@ unicode_decode_call_errorhandler_wchar(
if (requiredsize > PY_SSIZE_T_MAX - (insize - newpos)) if (requiredsize > PY_SSIZE_T_MAX - (insize - newpos))
goto overflow; goto overflow;
requiredsize += insize - newpos; requiredsize += insize - newpos;
outsize = *bufsize;
if (requiredsize > outsize) { if (requiredsize > outsize) {
if (outsize <= PY_SSIZE_T_MAX/2 && requiredsize < 2*outsize) if (outsize <= PY_SSIZE_T_MAX/2 && requiredsize < 2*outsize)
requiredsize = 2*outsize; requiredsize = 2*outsize;
if (unicode_resize(output, requiredsize) < 0) if (widechar_resize(buf, bufsize, requiredsize) < 0) {
goto onError; goto onError;
} }
wcsncpy(_PyUnicode_WSTR(*output) + *outpos, repwstr, repwlen); }
wcsncpy(*buf + *outpos, repwstr, repwlen);
*outpos += repwlen; *outpos += repwlen;
*endinpos = newpos; *endinpos = newpos;
*inptr = *input + newpos; *inptr = *input + newpos;
@ -7146,7 +7160,8 @@ decode_code_page_flags(UINT code_page)
*/ */
static int static int
decode_code_page_strict(UINT code_page, decode_code_page_strict(UINT code_page,
PyObject **v, wchar_t **buf,
Py_ssize_t *bufsize,
const char *in, const char *in,
int insize) int insize)
{ {
@ -7160,21 +7175,12 @@ decode_code_page_strict(UINT code_page,
if (outsize <= 0) if (outsize <= 0)
goto error; goto error;
if (*v == NULL) { /* Extend a wchar_t* buffer */
/* Create unicode object */ Py_ssize_t n = *bufsize; /* Get the current length */
/* FIXME: don't use _PyUnicode_New(), but allocate a wchar_t* buffer */ if (widechar_resize(buf, bufsize, n + outsize) < 0) {
*v = (PyObject*)_PyUnicode_New(outsize);
if (*v == NULL)
return -1; return -1;
out = PyUnicode_AS_UNICODE(*v);
}
else {
/* Extend unicode object */
Py_ssize_t n = PyUnicode_GET_SIZE(*v);
if (unicode_resize(v, n + outsize) < 0)
return -1;
out = PyUnicode_AS_UNICODE(*v) + n;
} }
out = *buf + n;
/* Do the conversion */ /* Do the conversion */
outsize = MultiByteToWideChar(code_page, flags, in, insize, out, outsize); outsize = MultiByteToWideChar(code_page, flags, in, insize, out, outsize);
@ -7198,7 +7204,8 @@ error:
*/ */
static int static int
decode_code_page_errors(UINT code_page, decode_code_page_errors(UINT code_page,
PyObject **v, wchar_t **buf,
Py_ssize_t *bufsize,
const char *in, const int size, const char *in, const int size,
const char *errors, int final) const char *errors, int final)
{ {
@ -7238,29 +7245,16 @@ decode_code_page_errors(UINT code_page,
goto error; goto error;
} }
if (*v == NULL) { /* Extend a wchar_t* buffer */
/* Create unicode object */ Py_ssize_t n = *bufsize; /* Get the current length */
if (size > PY_SSIZE_T_MAX / (Py_ssize_t)Py_ARRAY_LENGTH(buffer)) {
PyErr_NoMemory();
goto error;
}
/* FIXME: don't use _PyUnicode_New(), but allocate a wchar_t* buffer */
*v = (PyObject*)_PyUnicode_New(size * Py_ARRAY_LENGTH(buffer));
if (*v == NULL)
goto error;
out = PyUnicode_AS_UNICODE(*v);
}
else {
/* Extend unicode object */
Py_ssize_t n = PyUnicode_GET_SIZE(*v);
if (size > (PY_SSIZE_T_MAX - n) / (Py_ssize_t)Py_ARRAY_LENGTH(buffer)) { if (size > (PY_SSIZE_T_MAX - n) / (Py_ssize_t)Py_ARRAY_LENGTH(buffer)) {
PyErr_NoMemory(); PyErr_NoMemory();
goto error; goto error;
} }
if (unicode_resize(v, n + size * Py_ARRAY_LENGTH(buffer)) < 0) if (widechar_resize(buf, bufsize, n + size * Py_ARRAY_LENGTH(buffer)) < 0) {
goto error; goto error;
out = PyUnicode_AS_UNICODE(*v) + n;
} }
out = *buf + n;
/* Decode the byte string character per character */ /* Decode the byte string character per character */
while (in < endin) while (in < endin)
@ -7295,16 +7289,16 @@ decode_code_page_errors(UINT code_page,
startinpos = in - startin; startinpos = in - startin;
endinpos = startinpos + 1; endinpos = startinpos + 1;
outpos = out - PyUnicode_AS_UNICODE(*v); outpos = out - *buf;
if (unicode_decode_call_errorhandler_wchar( if (unicode_decode_call_errorhandler_wchar(
errors, &errorHandler, errors, &errorHandler,
encoding, reason, encoding, reason,
&startin, &endin, &startinpos, &endinpos, &exc, &in, &startin, &endin, &startinpos, &endinpos, &exc, &in,
v, &outpos)) buf, bufsize, &outpos))
{ {
goto error; goto error;
} }
out = PyUnicode_AS_UNICODE(*v) + outpos; out = *buf + outpos;
} }
else { else {
in += insize; in += insize;
@ -7313,14 +7307,9 @@ decode_code_page_errors(UINT code_page,
} }
} }
/* write a NUL character at the end */ /* Shrink the buffer */
*out = 0; assert(out - *buf <= *bufsize);
*bufsize = out - *buf;
/* Extend unicode object */
outsize = out - PyUnicode_AS_UNICODE(*v);
assert(outsize <= PyUnicode_WSTR_LENGTH(*v));
if (unicode_resize(v, outsize) < 0)
goto error;
/* (in - startin) <= size and size is an int */ /* (in - startin) <= size and size is an int */
ret = Py_SAFE_DOWNCAST(in - startin, Py_ssize_t, int); ret = Py_SAFE_DOWNCAST(in - startin, Py_ssize_t, int);
@ -7336,7 +7325,8 @@ decode_code_page_stateful(int code_page,
const char *s, Py_ssize_t size, const char *s, Py_ssize_t size,
const char *errors, Py_ssize_t *consumed) const char *errors, Py_ssize_t *consumed)
{ {
PyObject *v = NULL; wchar_t *buf = NULL;
Py_ssize_t bufsize = 0;
int chunk_size, final, converted, done; int chunk_size, final, converted, done;
if (code_page < 0) { if (code_page < 0) {
@ -7368,21 +7358,21 @@ decode_code_page_stateful(int code_page,
} }
if (chunk_size == 0 && done) { if (chunk_size == 0 && done) {
if (v != NULL) if (buf != NULL)
break; break;
_Py_RETURN_UNICODE_EMPTY(); _Py_RETURN_UNICODE_EMPTY();
} }
converted = decode_code_page_strict(code_page, &v, converted = decode_code_page_strict(code_page, &buf, &bufsize,
s, chunk_size); s, chunk_size);
if (converted == -2) if (converted == -2)
converted = decode_code_page_errors(code_page, &v, converted = decode_code_page_errors(code_page, &buf, &bufsize,
s, chunk_size, s, chunk_size,
errors, final); errors, final);
assert(converted != 0 || done); assert(converted != 0 || done);
if (converted < 0) { if (converted < 0) {
Py_XDECREF(v); PyMem_Free(buf);
return NULL; return NULL;
} }
@ -7393,7 +7383,9 @@ decode_code_page_stateful(int code_page,
size -= converted; size -= converted;
} while (!done); } while (!done);
return unicode_result(v); PyObject *v = PyUnicode_FromWideChar(buf, bufsize);
PyMem_Free(buf);
return v;
} }
PyObject * PyObject *