Add PyUnicode_DecodeLocaleAndSize() and PyUnicode_DecodeLocale()

* PyUnicode_DecodeLocaleAndSize() and PyUnicode_DecodeLocale() decode a string
   from the current locale encoding
 * _Py_char2wchar() writes an "error code" in the size argument to indicate
   if the function failed because of memory allocation failure or because of a
   decoding error. The function doesn't write the error message directly to
   stderr.
 * Fix time.strftime() (if wcsftime() is missing): decode strftime() result
   from the current locale encoding, not from the filesystem encoding.
This commit is contained in:
Victor Stinner 2011-12-16 23:56:01 +01:00
parent 3607e3de27
commit af02e1c85a
7 changed files with 174 additions and 84 deletions

View file

@ -3234,6 +3234,83 @@ PyUnicode_AsEncodedUnicode(PyObject *unicode,
return NULL;
}
PyObject*
PyUnicode_DecodeLocaleAndSize(const char *str, Py_ssize_t len,
int surrogateescape)
{
wchar_t smallbuf[256];
size_t smallbuf_len = Py_ARRAY_LENGTH(smallbuf);
wchar_t *wstr;
size_t wlen, wlen2;
PyObject *unicode;
if (str[len] != '\0' || len != strlen(str)) {
PyErr_SetString(PyExc_TypeError, "embedded null character");
return NULL;
}
if (surrogateescape)
{
wstr = _Py_char2wchar(str, &wlen);
if (wstr == NULL) {
if (wlen == (size_t)-1)
PyErr_NoMemory();
else
PyErr_SetFromErrno(PyExc_OSError);
return NULL;
}
unicode = PyUnicode_FromWideChar(wstr, wlen);
PyMem_Free(wstr);
}
else {
#ifndef HAVE_BROKEN_MBSTOWCS
wlen = mbstowcs(NULL, str, 0);
#else
wlen = len;
#endif
if (wlen == (size_t)-1) {
PyErr_SetFromErrno(PyExc_OSError);
return NULL;
}
if (wlen+1 <= smallbuf_len) {
wstr = smallbuf;
}
else {
if (wlen > PY_SSIZE_T_MAX / sizeof(wchar_t) - 1)
return PyErr_NoMemory();
wstr = PyMem_Malloc((wlen+1) * sizeof(wchar_t));
if (!wstr)
return PyErr_NoMemory();
}
/* This shouldn't fail now */
wlen2 = mbstowcs(wstr, str, wlen+1);
if (wlen2 == (size_t)-1) {
if (wstr != smallbuf)
PyMem_Free(wstr);
PyErr_SetFromErrno(PyExc_OSError);
return NULL;
}
#ifdef HAVE_BROKEN_MBSTOWCS
assert(wlen2 == wlen);
#endif
unicode = PyUnicode_FromWideChar(wstr, wlen2);
if (wstr != smallbuf)
PyMem_Free(wstr);
}
return unicode;
}
PyObject*
PyUnicode_DecodeLocale(const char *str, int surrogateescape)
{
Py_ssize_t size = (Py_ssize_t)strlen(str);
return PyUnicode_DecodeLocaleAndSize(str, size, surrogateescape);
}
PyObject*
PyUnicode_DecodeFSDefault(const char *s) {
Py_ssize_t size = (Py_ssize_t)strlen(s);
@ -3264,23 +3341,7 @@ PyUnicode_DecodeFSDefaultAndSize(const char *s, Py_ssize_t size)
"surrogateescape");
}
else {
/* locale encoding with surrogateescape */
wchar_t *wchar;
PyObject *unicode;
size_t len;
if (s[size] != '\0' || size != strlen(s)) {
PyErr_SetString(PyExc_TypeError, "embedded NUL character");
return NULL;
}
wchar = _Py_char2wchar(s, &len);
if (wchar == NULL)
return PyErr_NoMemory();
unicode = PyUnicode_FromWideChar(wchar, len);
PyMem_Free(wchar);
return unicode;
return PyUnicode_DecodeLocaleAndSize(s, size, 1);
}
#endif
}