Issue #27781: Change file system encoding on Windows to UTF-8 (PEP 529)

This commit is contained in:
Steve Dower 2016-09-08 10:35:16 -07:00
parent cfbd48bc56
commit cc16be85c0
18 changed files with 618 additions and 836 deletions

View file

@ -21,16 +21,18 @@
Don't forget to modify PyUnicode_DecodeFSDefault() if you touch any of the
values for Py_FileSystemDefaultEncoding!
*/
#ifdef HAVE_MBCS
const char *Py_FileSystemDefaultEncoding = "mbcs";
#if defined(__APPLE__)
const char *Py_FileSystemDefaultEncoding = "utf-8";
int Py_HasFileSystemDefaultEncoding = 1;
#elif defined(__APPLE__)
#elif defined(MS_WINDOWS)
/* may be changed by initfsencoding(), but should never be free()d */
const char *Py_FileSystemDefaultEncoding = "utf-8";
int Py_HasFileSystemDefaultEncoding = 1;
#else
const char *Py_FileSystemDefaultEncoding = NULL; /* set by initfsencoding() */
int Py_HasFileSystemDefaultEncoding = 0;
#endif
const char *Py_FileSystemDefaultEncodeErrors = "surrogateescape";
_Py_IDENTIFIER(__builtins__);
_Py_IDENTIFIER(__dict__);

View file

@ -90,6 +90,9 @@ int Py_NoUserSiteDirectory = 0; /* for -s and site.py */
int Py_UnbufferedStdioFlag = 0; /* Unbuffered binary std{in,out,err} */
int Py_HashRandomizationFlag = 0; /* for -R and PYTHONHASHSEED */
int Py_IsolatedFlag = 0; /* for -I, isolate from user's env */
#ifdef MS_WINDOWS
int Py_LegacyWindowsFSEncodingFlag = 0; /* Uses mbcs instead of utf-8 */
#endif
PyThreadState *_Py_Finalizing = NULL;
@ -321,6 +324,10 @@ _Py_InitializeEx_Private(int install_sigs, int install_importlib)
check its value further. */
if ((p = Py_GETENV("PYTHONHASHSEED")) && *p != '\0')
Py_HashRandomizationFlag = add_flag(Py_HashRandomizationFlag, p);
#ifdef MS_WINDOWS
if ((p = Py_GETENV("PYTHONLEGACYWINDOWSFSENCODING")) && *p != '\0')
Py_LegacyWindowsFSEncodingFlag = add_flag(Py_LegacyWindowsFSEncodingFlag, p);
#endif
_PyRandom_Init();
@ -958,6 +965,18 @@ initfsencoding(PyInterpreterState *interp)
{
PyObject *codec;
#ifdef MS_WINDOWS
if (Py_LegacyWindowsFSEncodingFlag)
{
Py_FileSystemDefaultEncoding = "mbcs";
Py_FileSystemDefaultEncodeErrors = "replace";
}
else
{
Py_FileSystemDefaultEncoding = "utf-8";
Py_FileSystemDefaultEncodeErrors = "surrogatepass";
}
#else
if (Py_FileSystemDefaultEncoding == NULL)
{
Py_FileSystemDefaultEncoding = get_locale_encoding();
@ -968,6 +987,7 @@ initfsencoding(PyInterpreterState *interp)
interp->fscodec_initialized = 1;
return 0;
}
#endif
/* the encoding is mbcs, utf-8 or ascii */
codec = _PyCodec_Lookup(Py_FileSystemDefaultEncoding);

View file

@ -310,6 +310,23 @@ Return the encoding used to convert Unicode filenames in\n\
operating system filenames."
);
static PyObject *
sys_getfilesystemencodeerrors(PyObject *self)
{
if (Py_FileSystemDefaultEncodeErrors)
return PyUnicode_FromString(Py_FileSystemDefaultEncodeErrors);
PyErr_SetString(PyExc_RuntimeError,
"filesystem encoding is not initialized");
return NULL;
}
PyDoc_STRVAR(getfilesystemencodeerrors_doc,
"getfilesystemencodeerrors() -> string\n\
\n\
Return the error mode used to convert Unicode filenames in\n\
operating system filenames."
);
static PyObject *
sys_intern(PyObject *self, PyObject *args)
{
@ -866,6 +883,24 @@ sys_getwindowsversion(PyObject *self)
#pragma warning(pop)
PyDoc_STRVAR(enablelegacywindowsfsencoding_doc,
"_enablelegacywindowsfsencoding()\n\
\n\
Changes the default filesystem encoding to mbcs:replace for consistency\n\
with earlier versions of Python. See PEP 529 for more information.\n\
\n\
This is equivalent to defining the PYTHONLEGACYWINDOWSFSENCODING \n\
environment variable before launching Python."
);
static PyObject *
sys_enablelegacywindowsfsencoding(PyObject *self)
{
Py_FileSystemDefaultEncoding = "mbcs";
Py_FileSystemDefaultEncodeErrors = "replace";
Py_RETURN_NONE;
}
#endif /* MS_WINDOWS */
#ifdef HAVE_DLOPEN
@ -1225,6 +1260,8 @@ static PyMethodDef sys_methods[] = {
#endif
{"getfilesystemencoding", (PyCFunction)sys_getfilesystemencoding,
METH_NOARGS, getfilesystemencoding_doc},
{ "getfilesystemencodeerrors", (PyCFunction)sys_getfilesystemencodeerrors,
METH_NOARGS, getfilesystemencodeerrors_doc },
#ifdef Py_TRACE_REFS
{"getobjects", _Py_GetObjects, METH_VARARGS},
#endif
@ -1240,6 +1277,8 @@ static PyMethodDef sys_methods[] = {
#ifdef MS_WINDOWS
{"getwindowsversion", (PyCFunction)sys_getwindowsversion, METH_NOARGS,
getwindowsversion_doc},
{"_enablelegacywindowsfsencoding", (PyCFunction)sys_enablelegacywindowsfsencoding,
METH_NOARGS, enablelegacywindowsfsencoding_doc },
#endif /* MS_WINDOWS */
{"intern", sys_intern, METH_VARARGS, intern_doc},
{"is_finalizing", sys_is_finalizing, METH_NOARGS, is_finalizing_doc},
@ -1456,14 +1495,21 @@ version -- the version of this interpreter as a string\n\
version_info -- version information as a named tuple\n\
"
)
#ifdef MS_WINDOWS
#ifdef MS_COREDLL
/* concatenating string here */
PyDoc_STR(
"dllhandle -- [Windows only] integer handle of the Python DLL\n\
winver -- [Windows only] version number of the Python DLL\n\
"
)
#endif /* MS_WINDOWS */
#endif /* MS_COREDLL */
#ifdef MS_WINDOWS
/* concatenating string here */
PyDoc_STR(
"_enablelegacywindowsfsencoding -- [Windows only] \n\
"
)
#endif
PyDoc_STR(
"__stdin__ -- the original stdin; don't touch!\n\
__stdout__ -- the original stdout; don't touch!\n\