Issue #8969: On Windows, use mbcs codec in strict mode to encode and decode

filenames and enable os.fsencode().
This commit is contained in:
Victor Stinner 2010-06-11 23:56:51 +00:00
parent 0f35e2c0f4
commit 313a120ab6
5 changed files with 33 additions and 22 deletions

View file

@ -159,10 +159,10 @@ process and user.
.. function:: fsencode(value) .. function:: fsencode(value)
Encode *value* to bytes for use in the file system, environment variables or Encode *value* to bytes for use in the file system, environment variables or
the command line. Uses :func:`sys.getfilesystemencoding` and the command line. Use :func:`sys.getfilesystemencoding` and
``'surrogateescape'`` error handler for strings and returns bytes unchanged. ``'surrogateescape'`` error handler for strings and return bytes unchanged.
On Windows, use ``'strict'`` error handler for strings if the file system
Availability: Unix. encoding is ``'mbcs'`` (which is the default encoding).
.. versionadded:: 3.2 .. versionadded:: 3.2

View file

@ -533,14 +533,17 @@ if supports_bytes_environ:
return environb.get(key, default) return environb.get(key, default)
__all__.append("getenvb") __all__.append("getenvb")
if name != 'nt': def fsencode(value):
def fsencode(value):
"""Encode value for use in the file system, environment variables """Encode value for use in the file system, environment variables
or the command line.""" or the command line."""
if isinstance(value, bytes): if isinstance(value, bytes):
return value return value
elif isinstance(value, str): elif isinstance(value, str):
return value.encode(sys.getfilesystemencoding(), 'surrogateescape') encoding = sys.getfilesystemencoding()
if encoding == 'mbcs':
return value.encode(encoding)
else:
return value.encode(encoding, 'surrogateescape')
else: else:
raise TypeError("expect bytes or str, not %s" % type(value).__name__) raise TypeError("expect bytes or str, not %s" % type(value).__name__)

View file

@ -33,16 +33,15 @@ else:
HOST = support.HOST HOST = support.HOST
data_file = lambda name: os.path.join(os.path.dirname(__file__), name) data_file = lambda name: os.path.join(os.path.dirname(__file__), name)
fsencode = lambda name: name.encode(sys.getfilesystemencoding(), "surrogateescape")
CERTFILE = data_file("keycert.pem") CERTFILE = data_file("keycert.pem")
BYTES_CERTFILE = fsencode(CERTFILE) BYTES_CERTFILE = os.fsencode(CERTFILE)
ONLYCERT = data_file("ssl_cert.pem") ONLYCERT = data_file("ssl_cert.pem")
ONLYKEY = data_file("ssl_key.pem") ONLYKEY = data_file("ssl_key.pem")
BYTES_ONLYCERT = fsencode(ONLYCERT) BYTES_ONLYCERT = os.fsencode(ONLYCERT)
BYTES_ONLYKEY = fsencode(ONLYKEY) BYTES_ONLYKEY = os.fsencode(ONLYKEY)
CAPATH = data_file("capath") CAPATH = data_file("capath")
BYTES_CAPATH = fsencode(CAPATH) BYTES_CAPATH = os.fsencode(CAPATH)
SVN_PYTHON_ORG_ROOT_CERT = data_file("https_svn_python_org_root.pem") SVN_PYTHON_ORG_ROOT_CERT = data_file("https_svn_python_org_root.pem")

View file

@ -12,6 +12,9 @@ What's New in Python 3.2 Alpha 1?
Core and Builtins Core and Builtins
----------------- -----------------
- Issue #8969: On Windows, use mbcs codec in strict mode to encode and decode
filenames and enable os.fsencode().
- Issue #8941: decoding big endian UTF-32 data in UCS-2 builds could crash - Issue #8941: decoding big endian UTF-32 data in UCS-2 builds could crash
the interpreter with characters outside the Basic Multilingual Plane the interpreter with characters outside the Basic Multilingual Plane
(higher than 0x10000). (higher than 0x10000).

View file

@ -1478,11 +1478,17 @@ PyObject *PyUnicode_AsEncodedObject(PyObject *unicode,
PyObject *PyUnicode_EncodeFSDefault(PyObject *unicode) PyObject *PyUnicode_EncodeFSDefault(PyObject *unicode)
{ {
if (Py_FileSystemDefaultEncoding) if (Py_FileSystemDefaultEncoding) {
#if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
if (strcmp(Py_FileSystemDefaultEncoding, "mbcs") == 0)
return PyUnicode_EncodeMBCS(PyUnicode_AS_UNICODE(unicode),
PyUnicode_GET_SIZE(unicode),
NULL);
#endif
return PyUnicode_AsEncodedString(unicode, return PyUnicode_AsEncodedString(unicode,
Py_FileSystemDefaultEncoding, Py_FileSystemDefaultEncoding,
"surrogateescape"); "surrogateescape");
else } else
return PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(unicode), return PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(unicode),
PyUnicode_GET_SIZE(unicode), PyUnicode_GET_SIZE(unicode),
"surrogateescape"); "surrogateescape");
@ -1639,7 +1645,7 @@ PyUnicode_DecodeFSDefaultAndSize(const char *s, Py_ssize_t size)
if (Py_FileSystemDefaultEncoding) { if (Py_FileSystemDefaultEncoding) {
#if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T) #if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
if (strcmp(Py_FileSystemDefaultEncoding, "mbcs") == 0) { if (strcmp(Py_FileSystemDefaultEncoding, "mbcs") == 0) {
return PyUnicode_DecodeMBCS(s, size, "surrogateescape"); return PyUnicode_DecodeMBCS(s, size, NULL);
} }
#elif defined(__APPLE__) #elif defined(__APPLE__)
if (strcmp(Py_FileSystemDefaultEncoding, "utf-8") == 0) { if (strcmp(Py_FileSystemDefaultEncoding, "utf-8") == 0) {