bpo-36346: Make using the legacy Unicode C API optional (GH-21437)

Add compile time option USE_UNICODE_WCHAR_CACHE. Setting it to 0
makes the interpreter not using the wchar_t cache and the legacy Unicode C API.
This commit is contained in:
Serhiy Storchaka 2020-07-10 23:26:06 +03:00 committed by GitHub
parent 9650fe0197
commit 4c8f09d7ce
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
17 changed files with 360 additions and 99 deletions

View file

@ -3150,9 +3150,11 @@ unicode_get_widechar_size(PyObject *unicode)
assert(unicode != NULL);
assert(_PyUnicode_CHECK(unicode));
#if USE_UNICODE_WCHAR_CACHE
if (_PyUnicode_WSTR(unicode) != NULL) {
return PyUnicode_WSTR_LENGTH(unicode);
}
#endif /* USE_UNICODE_WCHAR_CACHE */
assert(PyUnicode_IS_READY(unicode));
res = _PyUnicode_LENGTH(unicode);
@ -3173,16 +3175,21 @@ unicode_get_widechar_size(PyObject *unicode)
static void
unicode_copy_as_widechar(PyObject *unicode, wchar_t *w, Py_ssize_t size)
{
const wchar_t *wstr;
assert(unicode != NULL);
assert(_PyUnicode_CHECK(unicode));
wstr = _PyUnicode_WSTR(unicode);
#if USE_UNICODE_WCHAR_CACHE
const wchar_t *wstr = _PyUnicode_WSTR(unicode);
if (wstr != NULL) {
memcpy(w, wstr, size * sizeof(wchar_t));
return;
}
#else /* USE_UNICODE_WCHAR_CACHE */
if (PyUnicode_KIND(unicode) == sizeof(wchar_t)) {
memcpy(w, PyUnicode_DATA(unicode), size * sizeof(wchar_t));
return;
}
#endif /* USE_UNICODE_WCHAR_CACHE */
assert(PyUnicode_IS_READY(unicode));
if (PyUnicode_KIND(unicode) == PyUnicode_1BYTE_KIND) {
@ -4378,7 +4385,6 @@ unicode_decode_call_errorhandler_wchar(
Py_ssize_t requiredsize;
Py_ssize_t newpos;
PyObject *inputobj = NULL;
wchar_t *repwstr;
Py_ssize_t repwlen;
if (*errorHandler == NULL) {
@ -4424,9 +4430,19 @@ unicode_decode_call_errorhandler_wchar(
goto onError;
}
repwstr = PyUnicode_AsUnicodeAndSize(repunicode, &repwlen);
if (repwstr == NULL)
#if USE_UNICODE_WCHAR_CACHE
_Py_COMP_DIAG_PUSH
_Py_COMP_DIAG_IGNORE_DEPR_DECLS
repwlen = PyUnicode_GetSize(repunicode);
if (repwlen < 0)
goto onError;
_Py_COMP_DIAG_POP
#else /* USE_UNICODE_WCHAR_CACHE */
repwlen = PyUnicode_AsWideChar(repunicode, NULL, 0);
if (repwlen < 0)
goto onError;
repwlen--;
#endif /* USE_UNICODE_WCHAR_CACHE */
/* need more space? (at least enough for what we
have+the replacement+the rest of the string (starting
at the new input position), so we won't have to check space
@ -4446,7 +4462,7 @@ unicode_decode_call_errorhandler_wchar(
goto onError;
}
}
wcsncpy(*buf + *outpos, repwstr, repwlen);
PyUnicode_AsWideChar(repunicode, *buf + *outpos, repwlen);
*outpos += repwlen;
*endinpos = newpos;
*inptr = *input + newpos;
@ -7748,6 +7764,7 @@ encode_code_page_strict(UINT code_page, PyObject **outbytes,
/* Create a substring so that we can get the UTF-16 representation
of just the slice under consideration. */
PyObject *substring;
int ret = -1;
assert(len > 0);
@ -7759,11 +7776,22 @@ encode_code_page_strict(UINT code_page, PyObject **outbytes,
substring = PyUnicode_Substring(unicode, offset, offset+len);
if (substring == NULL)
return -1;
#if USE_UNICODE_WCHAR_CACHE
_Py_COMP_DIAG_PUSH
_Py_COMP_DIAG_IGNORE_DEPR_DECLS
p = PyUnicode_AsUnicodeAndSize(substring, &size);
if (p == NULL) {
Py_DECREF(substring);
return -1;
}
_Py_COMP_DIAG_POP
#else /* USE_UNICODE_WCHAR_CACHE */
p = PyUnicode_AsWideCharString(substring, &size);
Py_CLEAR(substring);
if (p == NULL) {
return -1;
}
#endif /* USE_UNICODE_WCHAR_CACHE */
assert(size <= INT_MAX);
/* First get the size of the result */
@ -7775,16 +7803,15 @@ encode_code_page_strict(UINT code_page, PyObject **outbytes,
goto error;
/* If we used a default char, then we failed! */
if (pusedDefaultChar && *pusedDefaultChar) {
Py_DECREF(substring);
return -2;
ret = -2;
goto done;
}
if (*outbytes == NULL) {
/* Create string object */
*outbytes = PyBytes_FromStringAndSize(NULL, outsize);
if (*outbytes == NULL) {
Py_DECREF(substring);
return -1;
goto done;
}
out = PyBytes_AS_STRING(*outbytes);
}
@ -7793,12 +7820,10 @@ encode_code_page_strict(UINT code_page, PyObject **outbytes,
const Py_ssize_t n = PyBytes_Size(*outbytes);
if (outsize > PY_SSIZE_T_MAX - n) {
PyErr_NoMemory();
Py_DECREF(substring);
return -1;
goto done;
}
if (_PyBytes_Resize(outbytes, n + outsize) < 0) {
Py_DECREF(substring);
return -1;
goto done;
}
out = PyBytes_AS_STRING(*outbytes) + n;
}
@ -7808,19 +7833,29 @@ encode_code_page_strict(UINT code_page, PyObject **outbytes,
p, (int)size,
out, outsize,
NULL, pusedDefaultChar);
Py_CLEAR(substring);
if (outsize <= 0)
goto error;
if (pusedDefaultChar && *pusedDefaultChar)
return -2;
return 0;
if (pusedDefaultChar && *pusedDefaultChar) {
ret = -2;
goto done;
}
ret = 0;
done:
#if USE_UNICODE_WCHAR_CACHE
Py_DECREF(substring);
#else /* USE_UNICODE_WCHAR_CACHE */
PyMem_Free(p);
#endif /* USE_UNICODE_WCHAR_CACHE */
return ret;
error:
Py_XDECREF(substring);
if (GetLastError() == ERROR_NO_UNICODE_TRANSLATION)
return -2;
if (GetLastError() == ERROR_NO_UNICODE_TRANSLATION) {
ret = -2;
goto done;
}
PyErr_SetFromWindowsErr(0);
return -1;
goto done;
}
/*