Add PyUnicode_DecodeLocaleAndSize() and PyUnicode_DecodeLocale()

* PyUnicode_DecodeLocaleAndSize() and PyUnicode_DecodeLocale() decode a string from the current locale encoding * _Py_char2wchar() writes an "error code" in the size argument to indicate if the function failed because of memory allocation failure or because of a decoding error. The function doesn't write the error message directly to stderr. * Fix time.strftime() (if wcsftime() is missing): decode strftime() result from the current locale encoding, not from the filesystem encoding.
2025-10-21 06:02:21 +00:00 · 2011-12-16 23:56:01 +01:00 · 2011-12-16 23:56:01 +01:00 · af02e1c85a
commit af02e1c85a
parent 3607e3de27
7 changed files with 174 additions and 84 deletions
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@ -3234,6 +3234,83 @@ PyUnicode_AsEncodedUnicode(PyObject *unicode,
    return NULL;
 }

+PyObject*
+PyUnicode_DecodeLocaleAndSize(const char *str, Py_ssize_t len,
+                              int surrogateescape)
+{
+    wchar_t smallbuf[256];
+    size_t smallbuf_len = Py_ARRAY_LENGTH(smallbuf);
+    wchar_t *wstr;
+    size_t wlen, wlen2;
+    PyObject *unicode;
+
+    if (str[len] != '\0' || len != strlen(str)) {
+        PyErr_SetString(PyExc_TypeError, "embedded null character");
+        return NULL;
+    }
+
+    if (surrogateescape)
+    {
+        wstr = _Py_char2wchar(str, &wlen);
+        if (wstr == NULL) {
+            if (wlen == (size_t)-1)
+                PyErr_NoMemory();
+            else
+                PyErr_SetFromErrno(PyExc_OSError);
+            return NULL;
+        }
+
+        unicode = PyUnicode_FromWideChar(wstr, wlen);
+        PyMem_Free(wstr);
+    }
+    else {
+#ifndef HAVE_BROKEN_MBSTOWCS
+        wlen = mbstowcs(NULL, str, 0);
+#else
+        wlen = len;
+#endif
+        if (wlen == (size_t)-1) {
+            PyErr_SetFromErrno(PyExc_OSError);
+            return NULL;
+        }
+        if (wlen+1 <= smallbuf_len) {
+            wstr = smallbuf;
+        }
+        else {
+            if (wlen > PY_SSIZE_T_MAX / sizeof(wchar_t) - 1)
+                return PyErr_NoMemory();
+
+            wstr = PyMem_Malloc((wlen+1) * sizeof(wchar_t));
+            if (!wstr)
+                return PyErr_NoMemory();
+        }
+
+        /* This shouldn't fail now */
+        wlen2 = mbstowcs(wstr, str, wlen+1);
+        if (wlen2 == (size_t)-1) {
+            if (wstr != smallbuf)
+                PyMem_Free(wstr);
+            PyErr_SetFromErrno(PyExc_OSError);
+            return NULL;
+        }
+#ifdef HAVE_BROKEN_MBSTOWCS
+        assert(wlen2 == wlen);
+#endif
+        unicode = PyUnicode_FromWideChar(wstr, wlen2);
+        if (wstr != smallbuf)
+            PyMem_Free(wstr);
+    }
+    return unicode;
+}
+
+PyObject*
+PyUnicode_DecodeLocale(const char *str, int surrogateescape)
+{
+    Py_ssize_t size = (Py_ssize_t)strlen(str);
+    return PyUnicode_DecodeLocaleAndSize(str, size, surrogateescape);
+}
+
+
 PyObject*
 PyUnicode_DecodeFSDefault(const char *s) {
    Py_ssize_t size = (Py_ssize_t)strlen(s);
@ -3264,23 +3341,7 @@ PyUnicode_DecodeFSDefaultAndSize(const char *s, Py_ssize_t size)
                                "surrogateescape");
    }
    else {
-        /* locale encoding with surrogateescape */
-        wchar_t *wchar;
-        PyObject *unicode;
-        size_t len;
-
-        if (s[size] != '\0' || size != strlen(s)) {
-            PyErr_SetString(PyExc_TypeError, "embedded NUL character");
-            return NULL;
-        }
-
-        wchar = _Py_char2wchar(s, &len);
-        if (wchar == NULL)
-            return PyErr_NoMemory();
-
-        unicode = PyUnicode_FromWideChar(wchar, len);
-        PyMem_Free(wchar);
-        return unicode;
+        return PyUnicode_DecodeLocaleAndSize(s, size, 1);
    }
 #endif
 }