Create _Py_wchar2char() function, reverse of _Py_char2wchar()

* Use _Py_wchar2char() in _wstat() and _Py_wfopen() * Document _Py_char2wchar()
2025-09-26 18:29:57 +00:00 · 2010-08-13 23:29:08 +00:00 · 2010-08-13 23:29:08 +00:00 · f2e08b34f1
commit f2e08b34f1
parent e9b428f997
3 changed files with 97 additions and 11 deletions
--- a/Include/Python.h
+++ b/Include/Python.h
@ -126,12 +126,15 @@
 #ifdef __cplusplus
 extern "C" {
 #endif
 /* _Py_Mangle is defined in compile.c */
 PyAPI_FUNC(PyObject*) _Py_Mangle(PyObject *p, PyObject *name);
 /* These functions live in main.c */
 PyAPI_FUNC(wchar_t *) _Py_char2wchar(char *);
 PyAPI_FUNC(char*) _Py_wchar2char(const wchar_t *text);
 PyAPI_FUNC(FILE *) _Py_wfopen(const wchar_t *path, const wchar_t *mode);
 #ifdef __cplusplus
 }
 #endif
--- a/Modules/getpath.c
+++ b/Modules/getpath.c
@ -139,13 +139,16 @@ static wchar_t *lib_python = L"lib/python" VERSION;
 static int
 _wstat(const wchar_t* path, struct stat *buf)
 {
-    char fname[PATH_MAX];
+    int err;
-    size_t res = wcstombs(fname, path, sizeof(fname));
+    char *fname;
-    if (res == (size_t)-1) {
+    fname = _Py_wchar2char(path);
    if (fname == NULL) {
        errno = EINVAL;
        return -1;
    }
-    return stat(fname, buf);
+    err = stat(fname, buf);
    PyMem_Free(fname);
    return err;
 }
 #endif
--- a/Modules/main.c
+++ b/Modules/main.c
@ -105,20 +105,21 @@ FILE *
 _Py_wfopen(const wchar_t *path, const wchar_t *mode)
 {
 #ifndef MS_WINDOWS
-    char cpath[PATH_MAX];
+    FILE *f;
    char *cpath;
    char cmode[10];
    size_t r;
    r = wcstombs(cpath, path, PATH_MAX);
    if (r == (size_t)-1 || r >= PATH_MAX) {
        errno = EINVAL;
        return NULL;
    }
    r = wcstombs(cmode, mode, 10);
    if (r == (size_t)-1 || r >= 10) {
        errno = EINVAL;
        return NULL;
    }
-    return fopen(cpath, cmode);
+    cpath = _Py_wchar2char(path);
    if (cpath == NULL)
        return NULL;
    f = fopen(cpath, cmode);
    PyMem_Free(cpath);
    return f;
 #else
    return _wfopen(path, mode);
 #endif
@ -734,6 +735,85 @@ Py_GetArgcArgv(int *argc, wchar_t ***argv)
 }
 /* Encode a (wide) character string to the locale encoding with the
   surrogateescape error handler (characters in range U+DC80..U+DCFF are
   converted to bytes 0x80..0xFF).
   This function is the reverse of _Py_char2wchar().
   Return a pointer to a newly allocated byte string (use PyMem_Free() to free
   the memory), or NULL on error (conversion error or memory error). */
 char*
 _Py_wchar2char(const wchar_t *text)
 {
    const size_t len = wcslen(text);
    char *result = NULL, *bytes = NULL;
    size_t i, size, converted;
    wchar_t c, buf[2];
    /* The function works in two steps:
       1. compute the length of the output buffer in bytes (size)
       2. outputs the bytes */
    size = 0;
    buf[1] = 0;
    while (1) {
        for (i=0; i < len; i++) {
            c = text[i];
            if (c >= 0xdc80 && c <= 0xdcff) {
                /* UTF-8b surrogate */
                if (bytes != NULL) {
                    *bytes++ = c - 0xdc00;
                    size--;
                }
                else
                    size++;
                continue;
            }
            else {
                buf[0] = c;
                if (bytes != NULL)
                    converted = wcstombs(bytes, buf, size);
                else
                    converted = wcstombs(NULL, buf, 0);
                if (converted == (size_t)-1) {
                    if (result != NULL)
                        PyMem_Free(result);
                    return NULL;
                }
                if (bytes != NULL) {
                    bytes += converted;
                    size -= converted;
                }
                else
                    size += converted;
            }
        }
        if (result != NULL) {
            *bytes = 0;
            break;
        }
        size += 1; /* nul byte at the end */
        result = PyMem_Malloc(size);
        if (result == NULL)
            return NULL;
        bytes = result;
    }
    return result;
 }
 /* Decode a byte string from the locale encoding with the
   surrogateescape error handler (undecodable bytes are decoded as characters
   in range U+DC80..U+DCFF). If a byte sequence can be decoded as a surrogate
   character, escape the bytes using the surrogateescape error handler instead
   of decoding them.
   Use _Py_wchar2char() to encode the character string back to a byte string.
   Return a pointer to a newly allocated (wide) character string (use
   PyMem_Free() to free the memory), or NULL on error (conversion error or
   memory error). */
 wchar_t*
 _Py_char2wchar(char* arg)
 {