mirror of
https://github.com/python/cpython.git
synced 2025-09-26 18:29:57 +00:00
bpo-35365: Use a wchar_t* buffer in the code page decoder. (GH-10837)
This commit is contained in:
parent
7fc633f5a5
commit
eeb719eac6
1 changed files with 52 additions and 60 deletions
|
@ -4059,6 +4059,21 @@ onError:
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef MS_WINDOWS
|
#ifdef MS_WINDOWS
|
||||||
|
static int
|
||||||
|
widechar_resize(wchar_t **buf, Py_ssize_t *size, Py_ssize_t newsize)
|
||||||
|
{
|
||||||
|
if (newsize > *size) {
|
||||||
|
wchar_t *newbuf = *buf;
|
||||||
|
if (PyMem_Resize(newbuf, wchar_t, newsize) == NULL) {
|
||||||
|
PyErr_NoMemory();
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
*buf = newbuf;
|
||||||
|
}
|
||||||
|
*size = newsize;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
/* error handling callback helper:
|
/* error handling callback helper:
|
||||||
build arguments, call the callback and check the arguments,
|
build arguments, call the callback and check the arguments,
|
||||||
if no exception occurred, copy the replacement to the output
|
if no exception occurred, copy the replacement to the output
|
||||||
|
@ -4072,7 +4087,7 @@ unicode_decode_call_errorhandler_wchar(
|
||||||
const char *encoding, const char *reason,
|
const char *encoding, const char *reason,
|
||||||
const char **input, const char **inend, Py_ssize_t *startinpos,
|
const char **input, const char **inend, Py_ssize_t *startinpos,
|
||||||
Py_ssize_t *endinpos, PyObject **exceptionObject, const char **inptr,
|
Py_ssize_t *endinpos, PyObject **exceptionObject, const char **inptr,
|
||||||
PyObject **output, Py_ssize_t *outpos)
|
wchar_t **buf, Py_ssize_t *bufsize, Py_ssize_t *outpos)
|
||||||
{
|
{
|
||||||
static const char *argparse = "Un;decoding error handler must return (str, int) tuple";
|
static const char *argparse = "Un;decoding error handler must return (str, int) tuple";
|
||||||
|
|
||||||
|
@ -4086,9 +4101,6 @@ unicode_decode_call_errorhandler_wchar(
|
||||||
wchar_t *repwstr;
|
wchar_t *repwstr;
|
||||||
Py_ssize_t repwlen;
|
Py_ssize_t repwlen;
|
||||||
|
|
||||||
assert (_PyUnicode_KIND(*output) == PyUnicode_WCHAR_KIND);
|
|
||||||
outsize = _PyUnicode_WSTR_LENGTH(*output);
|
|
||||||
|
|
||||||
if (*errorHandler == NULL) {
|
if (*errorHandler == NULL) {
|
||||||
*errorHandler = PyCodec_LookupError(errors);
|
*errorHandler = PyCodec_LookupError(errors);
|
||||||
if (*errorHandler == NULL)
|
if (*errorHandler == NULL)
|
||||||
|
@ -4146,13 +4158,15 @@ unicode_decode_call_errorhandler_wchar(
|
||||||
if (requiredsize > PY_SSIZE_T_MAX - (insize - newpos))
|
if (requiredsize > PY_SSIZE_T_MAX - (insize - newpos))
|
||||||
goto overflow;
|
goto overflow;
|
||||||
requiredsize += insize - newpos;
|
requiredsize += insize - newpos;
|
||||||
|
outsize = *bufsize;
|
||||||
if (requiredsize > outsize) {
|
if (requiredsize > outsize) {
|
||||||
if (outsize <= PY_SSIZE_T_MAX/2 && requiredsize < 2*outsize)
|
if (outsize <= PY_SSIZE_T_MAX/2 && requiredsize < 2*outsize)
|
||||||
requiredsize = 2*outsize;
|
requiredsize = 2*outsize;
|
||||||
if (unicode_resize(output, requiredsize) < 0)
|
if (widechar_resize(buf, bufsize, requiredsize) < 0) {
|
||||||
goto onError;
|
goto onError;
|
||||||
}
|
}
|
||||||
wcsncpy(_PyUnicode_WSTR(*output) + *outpos, repwstr, repwlen);
|
}
|
||||||
|
wcsncpy(*buf + *outpos, repwstr, repwlen);
|
||||||
*outpos += repwlen;
|
*outpos += repwlen;
|
||||||
*endinpos = newpos;
|
*endinpos = newpos;
|
||||||
*inptr = *input + newpos;
|
*inptr = *input + newpos;
|
||||||
|
@ -7146,7 +7160,8 @@ decode_code_page_flags(UINT code_page)
|
||||||
*/
|
*/
|
||||||
static int
|
static int
|
||||||
decode_code_page_strict(UINT code_page,
|
decode_code_page_strict(UINT code_page,
|
||||||
PyObject **v,
|
wchar_t **buf,
|
||||||
|
Py_ssize_t *bufsize,
|
||||||
const char *in,
|
const char *in,
|
||||||
int insize)
|
int insize)
|
||||||
{
|
{
|
||||||
|
@ -7160,21 +7175,12 @@ decode_code_page_strict(UINT code_page,
|
||||||
if (outsize <= 0)
|
if (outsize <= 0)
|
||||||
goto error;
|
goto error;
|
||||||
|
|
||||||
if (*v == NULL) {
|
/* Extend a wchar_t* buffer */
|
||||||
/* Create unicode object */
|
Py_ssize_t n = *bufsize; /* Get the current length */
|
||||||
/* FIXME: don't use _PyUnicode_New(), but allocate a wchar_t* buffer */
|
if (widechar_resize(buf, bufsize, n + outsize) < 0) {
|
||||||
*v = (PyObject*)_PyUnicode_New(outsize);
|
|
||||||
if (*v == NULL)
|
|
||||||
return -1;
|
return -1;
|
||||||
out = PyUnicode_AS_UNICODE(*v);
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
/* Extend unicode object */
|
|
||||||
Py_ssize_t n = PyUnicode_GET_SIZE(*v);
|
|
||||||
if (unicode_resize(v, n + outsize) < 0)
|
|
||||||
return -1;
|
|
||||||
out = PyUnicode_AS_UNICODE(*v) + n;
|
|
||||||
}
|
}
|
||||||
|
out = *buf + n;
|
||||||
|
|
||||||
/* Do the conversion */
|
/* Do the conversion */
|
||||||
outsize = MultiByteToWideChar(code_page, flags, in, insize, out, outsize);
|
outsize = MultiByteToWideChar(code_page, flags, in, insize, out, outsize);
|
||||||
|
@ -7198,7 +7204,8 @@ error:
|
||||||
*/
|
*/
|
||||||
static int
|
static int
|
||||||
decode_code_page_errors(UINT code_page,
|
decode_code_page_errors(UINT code_page,
|
||||||
PyObject **v,
|
wchar_t **buf,
|
||||||
|
Py_ssize_t *bufsize,
|
||||||
const char *in, const int size,
|
const char *in, const int size,
|
||||||
const char *errors, int final)
|
const char *errors, int final)
|
||||||
{
|
{
|
||||||
|
@ -7238,29 +7245,16 @@ decode_code_page_errors(UINT code_page,
|
||||||
goto error;
|
goto error;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (*v == NULL) {
|
/* Extend a wchar_t* buffer */
|
||||||
/* Create unicode object */
|
Py_ssize_t n = *bufsize; /* Get the current length */
|
||||||
if (size > PY_SSIZE_T_MAX / (Py_ssize_t)Py_ARRAY_LENGTH(buffer)) {
|
|
||||||
PyErr_NoMemory();
|
|
||||||
goto error;
|
|
||||||
}
|
|
||||||
/* FIXME: don't use _PyUnicode_New(), but allocate a wchar_t* buffer */
|
|
||||||
*v = (PyObject*)_PyUnicode_New(size * Py_ARRAY_LENGTH(buffer));
|
|
||||||
if (*v == NULL)
|
|
||||||
goto error;
|
|
||||||
out = PyUnicode_AS_UNICODE(*v);
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
/* Extend unicode object */
|
|
||||||
Py_ssize_t n = PyUnicode_GET_SIZE(*v);
|
|
||||||
if (size > (PY_SSIZE_T_MAX - n) / (Py_ssize_t)Py_ARRAY_LENGTH(buffer)) {
|
if (size > (PY_SSIZE_T_MAX - n) / (Py_ssize_t)Py_ARRAY_LENGTH(buffer)) {
|
||||||
PyErr_NoMemory();
|
PyErr_NoMemory();
|
||||||
goto error;
|
goto error;
|
||||||
}
|
}
|
||||||
if (unicode_resize(v, n + size * Py_ARRAY_LENGTH(buffer)) < 0)
|
if (widechar_resize(buf, bufsize, n + size * Py_ARRAY_LENGTH(buffer)) < 0) {
|
||||||
goto error;
|
goto error;
|
||||||
out = PyUnicode_AS_UNICODE(*v) + n;
|
|
||||||
}
|
}
|
||||||
|
out = *buf + n;
|
||||||
|
|
||||||
/* Decode the byte string character per character */
|
/* Decode the byte string character per character */
|
||||||
while (in < endin)
|
while (in < endin)
|
||||||
|
@ -7295,16 +7289,16 @@ decode_code_page_errors(UINT code_page,
|
||||||
|
|
||||||
startinpos = in - startin;
|
startinpos = in - startin;
|
||||||
endinpos = startinpos + 1;
|
endinpos = startinpos + 1;
|
||||||
outpos = out - PyUnicode_AS_UNICODE(*v);
|
outpos = out - *buf;
|
||||||
if (unicode_decode_call_errorhandler_wchar(
|
if (unicode_decode_call_errorhandler_wchar(
|
||||||
errors, &errorHandler,
|
errors, &errorHandler,
|
||||||
encoding, reason,
|
encoding, reason,
|
||||||
&startin, &endin, &startinpos, &endinpos, &exc, &in,
|
&startin, &endin, &startinpos, &endinpos, &exc, &in,
|
||||||
v, &outpos))
|
buf, bufsize, &outpos))
|
||||||
{
|
{
|
||||||
goto error;
|
goto error;
|
||||||
}
|
}
|
||||||
out = PyUnicode_AS_UNICODE(*v) + outpos;
|
out = *buf + outpos;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
in += insize;
|
in += insize;
|
||||||
|
@ -7313,14 +7307,9 @@ decode_code_page_errors(UINT code_page,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* write a NUL character at the end */
|
/* Shrink the buffer */
|
||||||
*out = 0;
|
assert(out - *buf <= *bufsize);
|
||||||
|
*bufsize = out - *buf;
|
||||||
/* Extend unicode object */
|
|
||||||
outsize = out - PyUnicode_AS_UNICODE(*v);
|
|
||||||
assert(outsize <= PyUnicode_WSTR_LENGTH(*v));
|
|
||||||
if (unicode_resize(v, outsize) < 0)
|
|
||||||
goto error;
|
|
||||||
/* (in - startin) <= size and size is an int */
|
/* (in - startin) <= size and size is an int */
|
||||||
ret = Py_SAFE_DOWNCAST(in - startin, Py_ssize_t, int);
|
ret = Py_SAFE_DOWNCAST(in - startin, Py_ssize_t, int);
|
||||||
|
|
||||||
|
@ -7336,7 +7325,8 @@ decode_code_page_stateful(int code_page,
|
||||||
const char *s, Py_ssize_t size,
|
const char *s, Py_ssize_t size,
|
||||||
const char *errors, Py_ssize_t *consumed)
|
const char *errors, Py_ssize_t *consumed)
|
||||||
{
|
{
|
||||||
PyObject *v = NULL;
|
wchar_t *buf = NULL;
|
||||||
|
Py_ssize_t bufsize = 0;
|
||||||
int chunk_size, final, converted, done;
|
int chunk_size, final, converted, done;
|
||||||
|
|
||||||
if (code_page < 0) {
|
if (code_page < 0) {
|
||||||
|
@ -7368,21 +7358,21 @@ decode_code_page_stateful(int code_page,
|
||||||
}
|
}
|
||||||
|
|
||||||
if (chunk_size == 0 && done) {
|
if (chunk_size == 0 && done) {
|
||||||
if (v != NULL)
|
if (buf != NULL)
|
||||||
break;
|
break;
|
||||||
_Py_RETURN_UNICODE_EMPTY();
|
_Py_RETURN_UNICODE_EMPTY();
|
||||||
}
|
}
|
||||||
|
|
||||||
converted = decode_code_page_strict(code_page, &v,
|
converted = decode_code_page_strict(code_page, &buf, &bufsize,
|
||||||
s, chunk_size);
|
s, chunk_size);
|
||||||
if (converted == -2)
|
if (converted == -2)
|
||||||
converted = decode_code_page_errors(code_page, &v,
|
converted = decode_code_page_errors(code_page, &buf, &bufsize,
|
||||||
s, chunk_size,
|
s, chunk_size,
|
||||||
errors, final);
|
errors, final);
|
||||||
assert(converted != 0 || done);
|
assert(converted != 0 || done);
|
||||||
|
|
||||||
if (converted < 0) {
|
if (converted < 0) {
|
||||||
Py_XDECREF(v);
|
PyMem_Free(buf);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -7393,7 +7383,9 @@ decode_code_page_stateful(int code_page,
|
||||||
size -= converted;
|
size -= converted;
|
||||||
} while (!done);
|
} while (!done);
|
||||||
|
|
||||||
return unicode_result(v);
|
PyObject *v = PyUnicode_FromWideChar(buf, bufsize);
|
||||||
|
PyMem_Free(buf);
|
||||||
|
return v;
|
||||||
}
|
}
|
||||||
|
|
||||||
PyObject *
|
PyObject *
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue