mirror of
https://github.com/python/cpython.git
synced 2025-09-26 18:29:57 +00:00
Issue #8969: On Windows, use mbcs codec in strict mode to encode and decode
filenames and enable os.fsencode().
This commit is contained in:
parent
0f35e2c0f4
commit
313a120ab6
5 changed files with 33 additions and 22 deletions
|
@ -159,10 +159,10 @@ process and user.
|
||||||
.. function:: fsencode(value)
|
.. function:: fsencode(value)
|
||||||
|
|
||||||
Encode *value* to bytes for use in the file system, environment variables or
|
Encode *value* to bytes for use in the file system, environment variables or
|
||||||
the command line. Uses :func:`sys.getfilesystemencoding` and
|
the command line. Use :func:`sys.getfilesystemencoding` and
|
||||||
``'surrogateescape'`` error handler for strings and returns bytes unchanged.
|
``'surrogateescape'`` error handler for strings and return bytes unchanged.
|
||||||
|
On Windows, use ``'strict'`` error handler for strings if the file system
|
||||||
Availability: Unix.
|
encoding is ``'mbcs'`` (which is the default encoding).
|
||||||
|
|
||||||
.. versionadded:: 3.2
|
.. versionadded:: 3.2
|
||||||
|
|
||||||
|
|
|
@ -533,14 +533,17 @@ if supports_bytes_environ:
|
||||||
return environb.get(key, default)
|
return environb.get(key, default)
|
||||||
__all__.append("getenvb")
|
__all__.append("getenvb")
|
||||||
|
|
||||||
if name != 'nt':
|
def fsencode(value):
|
||||||
def fsencode(value):
|
|
||||||
"""Encode value for use in the file system, environment variables
|
"""Encode value for use in the file system, environment variables
|
||||||
or the command line."""
|
or the command line."""
|
||||||
if isinstance(value, bytes):
|
if isinstance(value, bytes):
|
||||||
return value
|
return value
|
||||||
elif isinstance(value, str):
|
elif isinstance(value, str):
|
||||||
return value.encode(sys.getfilesystemencoding(), 'surrogateescape')
|
encoding = sys.getfilesystemencoding()
|
||||||
|
if encoding == 'mbcs':
|
||||||
|
return value.encode(encoding)
|
||||||
|
else:
|
||||||
|
return value.encode(encoding, 'surrogateescape')
|
||||||
else:
|
else:
|
||||||
raise TypeError("expect bytes or str, not %s" % type(value).__name__)
|
raise TypeError("expect bytes or str, not %s" % type(value).__name__)
|
||||||
|
|
||||||
|
|
|
@ -33,16 +33,15 @@ else:
|
||||||
HOST = support.HOST
|
HOST = support.HOST
|
||||||
|
|
||||||
data_file = lambda name: os.path.join(os.path.dirname(__file__), name)
|
data_file = lambda name: os.path.join(os.path.dirname(__file__), name)
|
||||||
fsencode = lambda name: name.encode(sys.getfilesystemencoding(), "surrogateescape")
|
|
||||||
|
|
||||||
CERTFILE = data_file("keycert.pem")
|
CERTFILE = data_file("keycert.pem")
|
||||||
BYTES_CERTFILE = fsencode(CERTFILE)
|
BYTES_CERTFILE = os.fsencode(CERTFILE)
|
||||||
ONLYCERT = data_file("ssl_cert.pem")
|
ONLYCERT = data_file("ssl_cert.pem")
|
||||||
ONLYKEY = data_file("ssl_key.pem")
|
ONLYKEY = data_file("ssl_key.pem")
|
||||||
BYTES_ONLYCERT = fsencode(ONLYCERT)
|
BYTES_ONLYCERT = os.fsencode(ONLYCERT)
|
||||||
BYTES_ONLYKEY = fsencode(ONLYKEY)
|
BYTES_ONLYKEY = os.fsencode(ONLYKEY)
|
||||||
CAPATH = data_file("capath")
|
CAPATH = data_file("capath")
|
||||||
BYTES_CAPATH = fsencode(CAPATH)
|
BYTES_CAPATH = os.fsencode(CAPATH)
|
||||||
|
|
||||||
SVN_PYTHON_ORG_ROOT_CERT = data_file("https_svn_python_org_root.pem")
|
SVN_PYTHON_ORG_ROOT_CERT = data_file("https_svn_python_org_root.pem")
|
||||||
|
|
||||||
|
|
|
@ -12,6 +12,9 @@ What's New in Python 3.2 Alpha 1?
|
||||||
Core and Builtins
|
Core and Builtins
|
||||||
-----------------
|
-----------------
|
||||||
|
|
||||||
|
- Issue #8969: On Windows, use mbcs codec in strict mode to encode and decode
|
||||||
|
filenames and enable os.fsencode().
|
||||||
|
|
||||||
- Issue #8941: decoding big endian UTF-32 data in UCS-2 builds could crash
|
- Issue #8941: decoding big endian UTF-32 data in UCS-2 builds could crash
|
||||||
the interpreter with characters outside the Basic Multilingual Plane
|
the interpreter with characters outside the Basic Multilingual Plane
|
||||||
(higher than 0x10000).
|
(higher than 0x10000).
|
||||||
|
|
|
@ -1478,11 +1478,17 @@ PyObject *PyUnicode_AsEncodedObject(PyObject *unicode,
|
||||||
|
|
||||||
PyObject *PyUnicode_EncodeFSDefault(PyObject *unicode)
|
PyObject *PyUnicode_EncodeFSDefault(PyObject *unicode)
|
||||||
{
|
{
|
||||||
if (Py_FileSystemDefaultEncoding)
|
if (Py_FileSystemDefaultEncoding) {
|
||||||
|
#if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
|
||||||
|
if (strcmp(Py_FileSystemDefaultEncoding, "mbcs") == 0)
|
||||||
|
return PyUnicode_EncodeMBCS(PyUnicode_AS_UNICODE(unicode),
|
||||||
|
PyUnicode_GET_SIZE(unicode),
|
||||||
|
NULL);
|
||||||
|
#endif
|
||||||
return PyUnicode_AsEncodedString(unicode,
|
return PyUnicode_AsEncodedString(unicode,
|
||||||
Py_FileSystemDefaultEncoding,
|
Py_FileSystemDefaultEncoding,
|
||||||
"surrogateescape");
|
"surrogateescape");
|
||||||
else
|
} else
|
||||||
return PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(unicode),
|
return PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(unicode),
|
||||||
PyUnicode_GET_SIZE(unicode),
|
PyUnicode_GET_SIZE(unicode),
|
||||||
"surrogateescape");
|
"surrogateescape");
|
||||||
|
@ -1639,7 +1645,7 @@ PyUnicode_DecodeFSDefaultAndSize(const char *s, Py_ssize_t size)
|
||||||
if (Py_FileSystemDefaultEncoding) {
|
if (Py_FileSystemDefaultEncoding) {
|
||||||
#if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
|
#if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
|
||||||
if (strcmp(Py_FileSystemDefaultEncoding, "mbcs") == 0) {
|
if (strcmp(Py_FileSystemDefaultEncoding, "mbcs") == 0) {
|
||||||
return PyUnicode_DecodeMBCS(s, size, "surrogateescape");
|
return PyUnicode_DecodeMBCS(s, size, NULL);
|
||||||
}
|
}
|
||||||
#elif defined(__APPLE__)
|
#elif defined(__APPLE__)
|
||||||
if (strcmp(Py_FileSystemDefaultEncoding, "utf-8") == 0) {
|
if (strcmp(Py_FileSystemDefaultEncoding, "utf-8") == 0) {
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue