mirror of
https://github.com/python/cpython.git
synced 2025-08-28 12:45:07 +00:00
bpo-29240: Fix locale encodings in UTF-8 Mode (#5170)
Modify locale.localeconv(), time.tzname, os.strerror() and other functions to ignore the UTF-8 Mode: always use the current locale encoding. Changes: * Add _Py_DecodeLocaleEx() and _Py_EncodeLocaleEx(). On decoding or encoding error, they return the position of the error and an error message which are used to raise Unicode errors in PyUnicode_DecodeLocale() and PyUnicode_EncodeLocale(). * Replace _Py_DecodeCurrentLocale() with _Py_DecodeLocaleEx(). * PyUnicode_DecodeLocale() now uses _Py_DecodeLocaleEx() for all cases, especially for the strict error handler. * Add _Py_DecodeUTF8Ex(): return more information on decoding error and supports the strict error handler. * Rename _Py_EncodeUTF8_surrogateescape() to _Py_EncodeUTF8Ex(). * Replace _Py_EncodeCurrentLocale() with _Py_EncodeLocaleEx(). * Ignore the UTF-8 mode to encode/decode localeconv(), strerror() and time zone name. * Remove PyUnicode_DecodeLocale(), PyUnicode_DecodeLocaleAndSize() and PyUnicode_EncodeLocale() now ignore the UTF-8 mode: always use the "current" locale. * Remove _PyUnicode_DecodeCurrentLocale(), _PyUnicode_DecodeCurrentLocaleAndSize() and _PyUnicode_EncodeCurrentLocale().
This commit is contained in:
parent
ee3b83547c
commit
7ed7aead95
12 changed files with 484 additions and 517 deletions
|
@ -3327,53 +3327,6 @@ PyUnicode_AsEncodedObject(PyObject *unicode,
|
|||
return NULL;
|
||||
}
|
||||
|
||||
static size_t
|
||||
wcstombs_errorpos(const wchar_t *wstr)
|
||||
{
|
||||
size_t len;
|
||||
#if SIZEOF_WCHAR_T == 2
|
||||
wchar_t buf[3];
|
||||
#else
|
||||
wchar_t buf[2];
|
||||
#endif
|
||||
char outbuf[MB_LEN_MAX];
|
||||
const wchar_t *start, *previous;
|
||||
|
||||
#if SIZEOF_WCHAR_T == 2
|
||||
buf[2] = 0;
|
||||
#else
|
||||
buf[1] = 0;
|
||||
#endif
|
||||
start = wstr;
|
||||
while (*wstr != L'\0')
|
||||
{
|
||||
previous = wstr;
|
||||
#if SIZEOF_WCHAR_T == 2
|
||||
if (Py_UNICODE_IS_HIGH_SURROGATE(wstr[0])
|
||||
&& Py_UNICODE_IS_LOW_SURROGATE(wstr[1]))
|
||||
{
|
||||
buf[0] = wstr[0];
|
||||
buf[1] = wstr[1];
|
||||
wstr += 2;
|
||||
}
|
||||
else {
|
||||
buf[0] = *wstr;
|
||||
buf[1] = 0;
|
||||
wstr++;
|
||||
}
|
||||
#else
|
||||
buf[0] = *wstr;
|
||||
wstr++;
|
||||
#endif
|
||||
len = wcstombs(outbuf, buf, sizeof(outbuf));
|
||||
if (len == (size_t)-1)
|
||||
return previous - start;
|
||||
}
|
||||
|
||||
/* failed to find the unencodable character */
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
locale_error_handler(const char *errors, int *surrogateescape)
|
||||
{
|
||||
|
@ -3396,130 +3349,60 @@ locale_error_handler(const char *errors, int *surrogateescape)
|
|||
}
|
||||
|
||||
static PyObject *
|
||||
unicode_encode_locale(PyObject *unicode, const char *errors, int current_locale)
|
||||
unicode_encode_locale(PyObject *unicode, const char *errors,
|
||||
int current_locale)
|
||||
{
|
||||
Py_ssize_t wlen, wlen2;
|
||||
wchar_t *wstr;
|
||||
char *errmsg;
|
||||
PyObject *bytes, *reason, *exc;
|
||||
size_t error_pos, errlen;
|
||||
int surrogateescape;
|
||||
|
||||
if (locale_error_handler(errors, &surrogateescape) < 0)
|
||||
return NULL;
|
||||
|
||||
wstr = PyUnicode_AsWideCharString(unicode, &wlen);
|
||||
if (wstr == NULL)
|
||||
Py_ssize_t wlen;
|
||||
wchar_t *wstr = PyUnicode_AsWideCharString(unicode, &wlen);
|
||||
if (wstr == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
wlen2 = wcslen(wstr);
|
||||
Py_ssize_t wlen2 = wcslen(wstr);
|
||||
if (wlen2 != wlen) {
|
||||
PyMem_Free(wstr);
|
||||
PyErr_SetString(PyExc_ValueError, "embedded null character");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (surrogateescape) {
|
||||
/* "surrogateescape" error handler */
|
||||
char *str;
|
||||
|
||||
if (current_locale) {
|
||||
str = _Py_EncodeCurrentLocale(wstr, &error_pos);
|
||||
char *str;
|
||||
size_t error_pos;
|
||||
const char *reason;
|
||||
int res = _Py_EncodeLocaleEx(wstr, &str, &error_pos, &reason,
|
||||
current_locale, surrogateescape);
|
||||
if (res != 0) {
|
||||
if (res == -2) {
|
||||
PyObject *exc;
|
||||
exc = PyObject_CallFunction(PyExc_UnicodeEncodeError, "sOnns",
|
||||
"locale", unicode,
|
||||
(Py_ssize_t)error_pos,
|
||||
(Py_ssize_t)(error_pos+1),
|
||||
reason);
|
||||
if (exc != NULL) {
|
||||
PyCodec_StrictErrors(exc);
|
||||
Py_DECREF(exc);
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
else {
|
||||
str = Py_EncodeLocale(wstr, &error_pos);
|
||||
}
|
||||
if (str == NULL) {
|
||||
if (error_pos == (size_t)-1) {
|
||||
PyErr_NoMemory();
|
||||
PyMem_Free(wstr);
|
||||
return NULL;
|
||||
}
|
||||
else {
|
||||
goto encode_error;
|
||||
}
|
||||
}
|
||||
PyMem_Free(wstr);
|
||||
|
||||
bytes = PyBytes_FromString(str);
|
||||
if (current_locale) {
|
||||
PyMem_RawFree(str);
|
||||
}
|
||||
else {
|
||||
PyMem_Free(str);
|
||||
}
|
||||
}
|
||||
else {
|
||||
/* strict mode */
|
||||
size_t len, len2;
|
||||
|
||||
len = wcstombs(NULL, wstr, 0);
|
||||
if (len == (size_t)-1) {
|
||||
error_pos = (size_t)-1;
|
||||
goto encode_error;
|
||||
}
|
||||
|
||||
bytes = PyBytes_FromStringAndSize(NULL, len);
|
||||
if (bytes == NULL) {
|
||||
PyErr_NoMemory();
|
||||
PyMem_Free(wstr);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
len2 = wcstombs(PyBytes_AS_STRING(bytes), wstr, len+1);
|
||||
if (len2 == (size_t)-1 || len2 > len) {
|
||||
Py_DECREF(bytes);
|
||||
error_pos = (size_t)-1;
|
||||
goto encode_error;
|
||||
}
|
||||
PyMem_Free(wstr);
|
||||
}
|
||||
return bytes;
|
||||
|
||||
encode_error:
|
||||
errmsg = strerror(errno);
|
||||
assert(errmsg != NULL);
|
||||
|
||||
if (error_pos == (size_t)-1)
|
||||
error_pos = wcstombs_errorpos(wstr);
|
||||
|
||||
PyMem_Free(wstr);
|
||||
|
||||
wstr = Py_DecodeLocale(errmsg, &errlen);
|
||||
if (wstr != NULL) {
|
||||
reason = PyUnicode_FromWideChar(wstr, errlen);
|
||||
PyMem_RawFree(wstr);
|
||||
} else {
|
||||
errmsg = NULL;
|
||||
}
|
||||
|
||||
if (errmsg == NULL)
|
||||
reason = PyUnicode_FromString(
|
||||
"wcstombs() encountered an unencodable "
|
||||
"wide character");
|
||||
if (reason == NULL)
|
||||
return NULL;
|
||||
|
||||
exc = PyObject_CallFunction(PyExc_UnicodeEncodeError, "sOnnO",
|
||||
"locale", unicode,
|
||||
(Py_ssize_t)error_pos,
|
||||
(Py_ssize_t)(error_pos+1),
|
||||
reason);
|
||||
Py_DECREF(reason);
|
||||
if (exc != NULL) {
|
||||
PyCodec_StrictErrors(exc);
|
||||
Py_DECREF(exc);
|
||||
}
|
||||
return NULL;
|
||||
PyObject *bytes = PyBytes_FromString(str);
|
||||
PyMem_RawFree(str);
|
||||
return bytes;
|
||||
}
|
||||
|
||||
PyObject *
|
||||
PyUnicode_EncodeLocale(PyObject *unicode, const char *errors)
|
||||
{
|
||||
return unicode_encode_locale(unicode, errors, 0);
|
||||
}
|
||||
|
||||
PyObject *
|
||||
_PyUnicode_EncodeCurrentLocale(PyObject *unicode, const char *errors)
|
||||
{
|
||||
return unicode_encode_locale(unicode, errors, 1);
|
||||
}
|
||||
|
@ -3687,51 +3570,11 @@ PyUnicode_AsEncodedUnicode(PyObject *unicode,
|
|||
return NULL;
|
||||
}
|
||||
|
||||
static size_t
|
||||
mbstowcs_errorpos(const char *str, size_t len)
|
||||
{
|
||||
#ifdef HAVE_MBRTOWC
|
||||
const char *start = str;
|
||||
mbstate_t mbs;
|
||||
size_t converted;
|
||||
wchar_t ch;
|
||||
|
||||
memset(&mbs, 0, sizeof mbs);
|
||||
while (len)
|
||||
{
|
||||
converted = mbrtowc(&ch, str, len, &mbs);
|
||||
if (converted == 0)
|
||||
/* Reached end of string */
|
||||
break;
|
||||
if (converted == (size_t)-1 || converted == (size_t)-2) {
|
||||
/* Conversion error or incomplete character */
|
||||
return str - start;
|
||||
}
|
||||
else {
|
||||
str += converted;
|
||||
len -= converted;
|
||||
}
|
||||
}
|
||||
/* failed to find the undecodable byte sequence */
|
||||
return 0;
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
|
||||
static PyObject*
|
||||
unicode_decode_locale(const char *str, Py_ssize_t len, const char *errors,
|
||||
int current_locale)
|
||||
{
|
||||
wchar_t smallbuf[256];
|
||||
size_t smallbuf_len = Py_ARRAY_LENGTH(smallbuf);
|
||||
wchar_t *wstr;
|
||||
size_t wlen, wlen2;
|
||||
PyObject *unicode;
|
||||
int surrogateescape;
|
||||
size_t error_pos, errlen;
|
||||
char *errmsg;
|
||||
PyObject *exc, *reason = NULL; /* initialize to prevent gcc warning */
|
||||
|
||||
if (locale_error_handler(errors, &surrogateescape) < 0)
|
||||
return NULL;
|
||||
|
||||
|
@ -3740,113 +3583,47 @@ unicode_decode_locale(const char *str, Py_ssize_t len, const char *errors,
|
|||
return NULL;
|
||||
}
|
||||
|
||||
if (surrogateescape) {
|
||||
/* "surrogateescape" error handler */
|
||||
if (current_locale) {
|
||||
wstr = _Py_DecodeCurrentLocale(str, &wlen);
|
||||
wchar_t *wstr;
|
||||
size_t wlen;
|
||||
const char *reason;
|
||||
int res = _Py_DecodeLocaleEx(str, &wstr, &wlen, &reason,
|
||||
current_locale, surrogateescape);
|
||||
if (res != 0) {
|
||||
if (res == -2) {
|
||||
PyObject *exc;
|
||||
exc = PyObject_CallFunction(PyExc_UnicodeDecodeError, "sy#nns",
|
||||
"locale", str, len,
|
||||
(Py_ssize_t)wlen,
|
||||
(Py_ssize_t)(wlen + 1),
|
||||
reason);
|
||||
if (exc != NULL) {
|
||||
PyCodec_StrictErrors(exc);
|
||||
Py_DECREF(exc);
|
||||
}
|
||||
}
|
||||
else {
|
||||
wstr = Py_DecodeLocale(str, &wlen);
|
||||
PyErr_NoMemory();
|
||||
}
|
||||
if (wstr == NULL) {
|
||||
if (wlen == (size_t)-1)
|
||||
PyErr_NoMemory();
|
||||
else
|
||||
PyErr_SetFromErrno(PyExc_OSError);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
unicode = PyUnicode_FromWideChar(wstr, wlen);
|
||||
PyMem_RawFree(wstr);
|
||||
}
|
||||
else {
|
||||
/* strict mode */
|
||||
#ifndef HAVE_BROKEN_MBSTOWCS
|
||||
wlen = mbstowcs(NULL, str, 0);
|
||||
#else
|
||||
wlen = len;
|
||||
#endif
|
||||
if (wlen == (size_t)-1)
|
||||
goto decode_error;
|
||||
if (wlen+1 <= smallbuf_len) {
|
||||
wstr = smallbuf;
|
||||
}
|
||||
else {
|
||||
wstr = PyMem_New(wchar_t, wlen+1);
|
||||
if (!wstr)
|
||||
return PyErr_NoMemory();
|
||||
}
|
||||
|
||||
wlen2 = mbstowcs(wstr, str, wlen+1);
|
||||
if (wlen2 == (size_t)-1) {
|
||||
if (wstr != smallbuf)
|
||||
PyMem_Free(wstr);
|
||||
goto decode_error;
|
||||
}
|
||||
#ifdef HAVE_BROKEN_MBSTOWCS
|
||||
assert(wlen2 == wlen);
|
||||
#endif
|
||||
unicode = PyUnicode_FromWideChar(wstr, wlen2);
|
||||
if (wstr != smallbuf)
|
||||
PyMem_Free(wstr);
|
||||
}
|
||||
return unicode;
|
||||
|
||||
decode_error:
|
||||
errmsg = strerror(errno);
|
||||
assert(errmsg != NULL);
|
||||
|
||||
error_pos = mbstowcs_errorpos(str, len);
|
||||
wstr = Py_DecodeLocale(errmsg, &errlen);
|
||||
if (wstr != NULL) {
|
||||
reason = PyUnicode_FromWideChar(wstr, errlen);
|
||||
PyMem_RawFree(wstr);
|
||||
}
|
||||
|
||||
if (reason == NULL)
|
||||
reason = PyUnicode_FromString(
|
||||
"mbstowcs() encountered an invalid multibyte sequence");
|
||||
if (reason == NULL)
|
||||
return NULL;
|
||||
|
||||
exc = PyObject_CallFunction(PyExc_UnicodeDecodeError, "sy#nnO",
|
||||
"locale", str, len,
|
||||
(Py_ssize_t)error_pos,
|
||||
(Py_ssize_t)(error_pos+1),
|
||||
reason);
|
||||
Py_DECREF(reason);
|
||||
if (exc != NULL) {
|
||||
PyCodec_StrictErrors(exc);
|
||||
Py_DECREF(exc);
|
||||
}
|
||||
return NULL;
|
||||
|
||||
PyObject *unicode = PyUnicode_FromWideChar(wstr, wlen);
|
||||
PyMem_RawFree(wstr);
|
||||
return unicode;
|
||||
}
|
||||
|
||||
PyObject*
|
||||
PyUnicode_DecodeLocaleAndSize(const char *str, Py_ssize_t len,
|
||||
const char *errors)
|
||||
{
|
||||
return unicode_decode_locale(str, len, errors, 0);
|
||||
}
|
||||
|
||||
PyObject*
|
||||
_PyUnicode_DecodeCurrentLocaleAndSize(const char *str, Py_ssize_t len,
|
||||
const char *errors)
|
||||
{
|
||||
return unicode_decode_locale(str, len, errors, 1);
|
||||
}
|
||||
|
||||
PyObject*
|
||||
_PyUnicode_DecodeCurrentLocale(const char *str, const char *errors)
|
||||
{
|
||||
return unicode_decode_locale(str, (Py_ssize_t)strlen(str), errors, 1);
|
||||
}
|
||||
|
||||
PyObject*
|
||||
PyUnicode_DecodeLocale(const char *str, const char *errors)
|
||||
{
|
||||
Py_ssize_t size = (Py_ssize_t)strlen(str);
|
||||
return unicode_decode_locale(str, size, errors, 0);
|
||||
return unicode_decode_locale(str, size, errors, 1);
|
||||
}
|
||||
|
||||
|
||||
|
@ -3878,7 +3655,8 @@ PyUnicode_DecodeFSDefaultAndSize(const char *s, Py_ssize_t size)
|
|||
Py_FileSystemDefaultEncodeErrors);
|
||||
}
|
||||
else {
|
||||
return PyUnicode_DecodeLocaleAndSize(s, size, Py_FileSystemDefaultEncodeErrors);
|
||||
return unicode_decode_locale(s, size,
|
||||
Py_FileSystemDefaultEncodeErrors, 0);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
@ -5128,17 +4906,23 @@ onError:
|
|||
}
|
||||
|
||||
|
||||
/* UTF-8 decoder using the surrogateescape error handler .
|
||||
/* UTF-8 decoder: use surrogateescape error handler if 'surrogateescape' is
|
||||
non-zero, use strict error handler otherwise.
|
||||
|
||||
On success, return a pointer to a newly allocated wide character string (use
|
||||
PyMem_RawFree() to free the memory) and write the output length (in number
|
||||
of wchar_t units) into *p_wlen (if p_wlen is set).
|
||||
On success, write a pointer to a newly allocated wide character string into
|
||||
*wstr (use PyMem_RawFree() to free the memory) and write the output length
|
||||
(in number of wchar_t units) into *wlen (if wlen is set).
|
||||
|
||||
On memory allocation failure, return -1 and write (size_t)-1 into *p_wlen
|
||||
(if p_wlen is set). */
|
||||
wchar_t*
|
||||
_Py_DecodeUTF8_surrogateescape(const char *s, Py_ssize_t size, size_t *p_wlen)
|
||||
On memory allocation failure, return -1.
|
||||
|
||||
On decoding error (if surrogateescape is zero), return -2. If wlen is
|
||||
non-NULL, write the start of the illegal byte sequence into *wlen. If reason
|
||||
is not NULL, write the decoding error message into *reason. */
|
||||
int
|
||||
_Py_DecodeUTF8Ex(const char *s, Py_ssize_t size, wchar_t **wstr, size_t *wlen,
|
||||
const char **reason, int surrogateescape)
|
||||
{
|
||||
const char *orig_s = s;
|
||||
const char *e;
|
||||
wchar_t *unicode;
|
||||
Py_ssize_t outpos;
|
||||
|
@ -5146,18 +4930,12 @@ _Py_DecodeUTF8_surrogateescape(const char *s, Py_ssize_t size, size_t *p_wlen)
|
|||
/* Note: size will always be longer than the resulting Unicode
|
||||
character count */
|
||||
if (PY_SSIZE_T_MAX / (Py_ssize_t)sizeof(wchar_t) < (size + 1)) {
|
||||
if (p_wlen) {
|
||||
*p_wlen = (size_t)-1;
|
||||
}
|
||||
return NULL;
|
||||
return -1;
|
||||
}
|
||||
|
||||
unicode = PyMem_RawMalloc((size + 1) * sizeof(wchar_t));
|
||||
if (!unicode) {
|
||||
if (p_wlen) {
|
||||
*p_wlen = (size_t)-1;
|
||||
}
|
||||
return NULL;
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Unpack UTF-8 encoded data */
|
||||
|
@ -5175,7 +4953,7 @@ _Py_DecodeUTF8_surrogateescape(const char *s, Py_ssize_t size, size_t *p_wlen)
|
|||
Py_UNREACHABLE();
|
||||
#else
|
||||
assert(ch > 0xFFFF && ch <= MAX_UNICODE);
|
||||
/* compute and append the two surrogates: */
|
||||
/* write a surrogate pair */
|
||||
unicode[outpos++] = (wchar_t)Py_UNICODE_HIGH_SURROGATE(ch);
|
||||
unicode[outpos++] = (wchar_t)Py_UNICODE_LOW_SURROGATE(ch);
|
||||
#endif
|
||||
|
@ -5183,60 +4961,88 @@ _Py_DecodeUTF8_surrogateescape(const char *s, Py_ssize_t size, size_t *p_wlen)
|
|||
else {
|
||||
if (!ch && s == e)
|
||||
break;
|
||||
if (!surrogateescape) {
|
||||
PyMem_RawFree(unicode );
|
||||
if (reason != NULL) {
|
||||
switch (ch) {
|
||||
case 0:
|
||||
*reason = "unexpected end of data";
|
||||
break;
|
||||
case 1:
|
||||
*reason = "invalid start byte";
|
||||
break;
|
||||
/* 2, 3, 4 */
|
||||
default:
|
||||
*reason = "invalid continuation byte";
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (wlen != NULL) {
|
||||
*wlen = s - orig_s;
|
||||
}
|
||||
return -2;
|
||||
}
|
||||
/* surrogateescape */
|
||||
unicode[outpos++] = 0xDC00 + (unsigned char)*s++;
|
||||
}
|
||||
}
|
||||
unicode[outpos] = L'\0';
|
||||
if (p_wlen) {
|
||||
*p_wlen = outpos;
|
||||
if (wlen) {
|
||||
*wlen = outpos;
|
||||
}
|
||||
return unicode;
|
||||
*wstr = unicode;
|
||||
return 0;
|
||||
}
|
||||
|
||||
wchar_t*
|
||||
_Py_DecodeUTF8_surrogateescape(const char *arg, Py_ssize_t arglen)
|
||||
{
|
||||
wchar_t *wstr;
|
||||
int res = _Py_DecodeUTF8Ex(arg, arglen, &wstr, NULL, NULL, 1);
|
||||
if (res != 0) {
|
||||
return NULL;
|
||||
}
|
||||
return wstr;
|
||||
}
|
||||
|
||||
|
||||
/* UTF-8 encoder using the surrogateescape error handler .
|
||||
|
||||
On success, return a pointer to a newly allocated character string (use
|
||||
PyMem_Free() to free the memory).
|
||||
On success, return 0 and write the newly allocated character string (use
|
||||
PyMem_Free() to free the memory) into *str.
|
||||
|
||||
On encoding failure, return NULL and write the position of the invalid
|
||||
surrogate character into *error_pos (if error_pos is set).
|
||||
On encoding failure, return -2 and write the position of the invalid
|
||||
surrogate character into *error_pos (if error_pos is set) and the decoding
|
||||
error message into *reason (if reason is set).
|
||||
|
||||
On memory allocation failure, return NULL and write (size_t)-1 into
|
||||
*error_pos (if error_pos is set). */
|
||||
char*
|
||||
_Py_EncodeUTF8_surrogateescape(const wchar_t *text, size_t *error_pos,
|
||||
int raw_malloc)
|
||||
On memory allocation failure, return -1. */
|
||||
int
|
||||
_Py_EncodeUTF8Ex(const wchar_t *text, char **str, size_t *error_pos,
|
||||
const char **reason, int raw_malloc, int surrogateescape)
|
||||
{
|
||||
const Py_ssize_t max_char_size = 4;
|
||||
Py_ssize_t len = wcslen(text);
|
||||
|
||||
assert(len >= 0);
|
||||
|
||||
if (len > PY_SSIZE_T_MAX / max_char_size - 1) {
|
||||
return -1;
|
||||
}
|
||||
char *bytes;
|
||||
if (len <= PY_SSIZE_T_MAX / max_char_size - 1) {
|
||||
if (raw_malloc) {
|
||||
bytes = PyMem_RawMalloc((len + 1) * max_char_size);
|
||||
}
|
||||
else {
|
||||
bytes = PyMem_Malloc((len + 1) * max_char_size);
|
||||
}
|
||||
if (raw_malloc) {
|
||||
bytes = PyMem_RawMalloc((len + 1) * max_char_size);
|
||||
}
|
||||
else {
|
||||
bytes = NULL;
|
||||
bytes = PyMem_Malloc((len + 1) * max_char_size);
|
||||
}
|
||||
if (bytes == NULL) {
|
||||
if (error_pos != NULL) {
|
||||
*error_pos = (size_t)-1;
|
||||
}
|
||||
return NULL;
|
||||
return -1;
|
||||
}
|
||||
|
||||
char *p = bytes;
|
||||
Py_ssize_t i;
|
||||
for (i = 0; i < len;) {
|
||||
Py_UCS4 ch = text[i++];
|
||||
for (i = 0; i < len; i++) {
|
||||
Py_UCS4 ch = text[i];
|
||||
|
||||
if (ch < 0x80) {
|
||||
/* Encode ASCII */
|
||||
|
@ -5250,11 +5056,20 @@ _Py_EncodeUTF8_surrogateescape(const wchar_t *text, size_t *error_pos,
|
|||
}
|
||||
else if (Py_UNICODE_IS_SURROGATE(ch)) {
|
||||
/* surrogateescape error handler */
|
||||
if (!(0xDC80 <= ch && ch <= 0xDCFF)) {
|
||||
if (!surrogateescape || !(0xDC80 <= ch && ch <= 0xDCFF)) {
|
||||
if (error_pos != NULL) {
|
||||
*error_pos = (size_t)i - 1;
|
||||
*error_pos = (size_t)i;
|
||||
}
|
||||
goto error;
|
||||
if (reason != NULL) {
|
||||
*reason = "encoding error";
|
||||
}
|
||||
if (raw_malloc) {
|
||||
PyMem_RawFree(bytes);
|
||||
}
|
||||
else {
|
||||
PyMem_Free(bytes);
|
||||
}
|
||||
return -2;
|
||||
}
|
||||
*p++ = (char)(ch & 0xff);
|
||||
}
|
||||
|
@ -5286,18 +5101,16 @@ _Py_EncodeUTF8_surrogateescape(const wchar_t *text, size_t *error_pos,
|
|||
if (error_pos != NULL) {
|
||||
*error_pos = (size_t)-1;
|
||||
}
|
||||
goto error;
|
||||
if (raw_malloc) {
|
||||
PyMem_RawFree(bytes);
|
||||
}
|
||||
else {
|
||||
PyMem_Free(bytes);
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
return bytes2;
|
||||
|
||||
error:
|
||||
if (raw_malloc) {
|
||||
PyMem_RawFree(bytes);
|
||||
}
|
||||
else {
|
||||
PyMem_Free(bytes);
|
||||
}
|
||||
return NULL;
|
||||
*str = bytes2;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue