mirror of
https://github.com/python/cpython.git
synced 2025-08-04 08:59:19 +00:00
Issue #8622: Add PYTHONFSENCODING environment variable to override the
filesystem encoding. initfsencoding() displays also a better error message if get_codeset() failed.
This commit is contained in:
parent
56ab01b66a
commit
94908bbc15
7 changed files with 93 additions and 34 deletions
|
@ -442,11 +442,20 @@ These environment variables influence Python's behavior.
|
||||||
import of source modules.
|
import of source modules.
|
||||||
|
|
||||||
|
|
||||||
|
.. envvar:: PYTHONFSENCODING
|
||||||
|
|
||||||
|
If this is set before running the intepreter, it overrides the encoding used
|
||||||
|
for the filesystem encoding (see :func:`sys.getfilesystemencoding`).
|
||||||
|
|
||||||
|
.. versionadded:: 3.2
|
||||||
|
|
||||||
|
|
||||||
.. envvar:: PYTHONIOENCODING
|
.. envvar:: PYTHONIOENCODING
|
||||||
|
|
||||||
Overrides the encoding used for stdin/stdout/stderr, in the syntax
|
If this is set before running the intepreter, it overrides the encoding used
|
||||||
``encodingname:errorhandler``. The ``:errorhandler`` part is optional and
|
for stdin/stdout/stderr, in the syntax ``encodingname:errorhandler``. The
|
||||||
has the same meaning as in :func:`str.encode`.
|
``:errorhandler`` part is optional and has the same meaning as in
|
||||||
|
:func:`str.encode`.
|
||||||
|
|
||||||
For stderr, the ``:errorhandler`` part is ignored; the handler will always be
|
For stderr, the ``:errorhandler`` part is ignored; the handler will always be
|
||||||
``'backslashreplace'``.
|
``'backslashreplace'``.
|
||||||
|
|
|
@ -232,6 +232,15 @@ Major performance enhancements have been added:
|
||||||
|
|
||||||
* Stub
|
* Stub
|
||||||
|
|
||||||
|
|
||||||
|
Unicode
|
||||||
|
=======
|
||||||
|
|
||||||
|
The filesystem encoding can be specified by setting the
|
||||||
|
:envvar:`PYTHONFSENCODING` environment variable before running the intepreter.
|
||||||
|
The value should be a string in the form ``<encoding>``, e.g. ``utf-8``.
|
||||||
|
|
||||||
|
|
||||||
IDLE
|
IDLE
|
||||||
====
|
====
|
||||||
|
|
||||||
|
|
|
@ -43,7 +43,7 @@ if sys.platform != 'darwin':
|
||||||
|
|
||||||
# Is it Unicode-friendly?
|
# Is it Unicode-friendly?
|
||||||
if not os.path.supports_unicode_filenames:
|
if not os.path.supports_unicode_filenames:
|
||||||
fsencoding = sys.getfilesystemencoding() or sys.getdefaultencoding()
|
fsencoding = sys.getfilesystemencoding()
|
||||||
try:
|
try:
|
||||||
for name in filenames:
|
for name in filenames:
|
||||||
name.encode(fsencoding)
|
name.encode(fsencoding)
|
||||||
|
|
|
@ -863,16 +863,24 @@ class SizeofTest(unittest.TestCase):
|
||||||
def test_getfilesystemencoding(self):
|
def test_getfilesystemencoding(self):
|
||||||
import codecs
|
import codecs
|
||||||
|
|
||||||
def check_fsencoding(fs_encoding):
|
def check_fsencoding(fs_encoding, expected=None):
|
||||||
self.assertIsNotNone(fs_encoding)
|
self.assertIsNotNone(fs_encoding)
|
||||||
if sys.platform == 'darwin':
|
if sys.platform == 'darwin':
|
||||||
self.assertEqual(fs_encoding, 'utf-8')
|
self.assertEqual(fs_encoding, 'utf-8')
|
||||||
codecs.lookup(fs_encoding)
|
codecs.lookup(fs_encoding)
|
||||||
|
if expected:
|
||||||
|
self.assertEqual(fs_encoding, expected)
|
||||||
|
|
||||||
fs_encoding = sys.getfilesystemencoding()
|
fs_encoding = sys.getfilesystemencoding()
|
||||||
check_fsencoding(fs_encoding)
|
check_fsencoding(fs_encoding)
|
||||||
|
|
||||||
# Even in C locale
|
def get_fsencoding(env):
|
||||||
|
output = subprocess.check_output(
|
||||||
|
[sys.executable, "-c",
|
||||||
|
"import sys; print(sys.getfilesystemencoding())"],
|
||||||
|
env=env)
|
||||||
|
return output.rstrip().decode('ascii')
|
||||||
|
|
||||||
try:
|
try:
|
||||||
sys.executable.encode('ascii')
|
sys.executable.encode('ascii')
|
||||||
except UnicodeEncodeError:
|
except UnicodeEncodeError:
|
||||||
|
@ -880,14 +888,22 @@ class SizeofTest(unittest.TestCase):
|
||||||
# see issue #8611
|
# see issue #8611
|
||||||
pass
|
pass
|
||||||
else:
|
else:
|
||||||
|
# Even in C locale
|
||||||
env = os.environ.copy()
|
env = os.environ.copy()
|
||||||
env['LANG'] = 'C'
|
env['LANG'] = 'C'
|
||||||
output = subprocess.check_output(
|
try:
|
||||||
[sys.executable, "-c",
|
del env['PYTHONFSENCODING']
|
||||||
"import sys; print(sys.getfilesystemencoding())"],
|
except KeyError:
|
||||||
env=env)
|
pass
|
||||||
fs_encoding = output.rstrip().decode('ascii')
|
check_fsencoding(get_fsencoding(env), 'ascii')
|
||||||
check_fsencoding(fs_encoding)
|
|
||||||
|
# Filesystem encoding is hardcoded on Windows and Mac OS X
|
||||||
|
if sys.platform not in ('win32', 'darwin'):
|
||||||
|
for encoding in ('ascii', 'cp850', 'iso8859-1', 'utf-8'):
|
||||||
|
env = os.environ.copy()
|
||||||
|
env['PYTHONFSENCODING'] = encoding
|
||||||
|
check_fsencoding(get_fsencoding(env), encoding)
|
||||||
|
|
||||||
|
|
||||||
def test_setfilesystemencoding(self):
|
def test_setfilesystemencoding(self):
|
||||||
old = sys.getfilesystemencoding()
|
old = sys.getfilesystemencoding()
|
||||||
|
|
|
@ -12,6 +12,9 @@ What's New in Python 3.2 Alpha 2?
|
||||||
Core and Builtins
|
Core and Builtins
|
||||||
-----------------
|
-----------------
|
||||||
|
|
||||||
|
- Issue #8622: Add PYTHONFSENCODING environment variable to override the
|
||||||
|
filesystem encoding.
|
||||||
|
|
||||||
- Issue #5127: The C functions that access the Unicode Database now accept and
|
- Issue #5127: The C functions that access the Unicode Database now accept and
|
||||||
return characters from the full Unicode range, even on narrow unicode builds
|
return characters from the full Unicode range, even on narrow unicode builds
|
||||||
(Py_UNICODE_TOLOWER, Py_UNICODE_ISDECIMAL, and others). A visible difference
|
(Py_UNICODE_TOLOWER, Py_UNICODE_ISDECIMAL, and others). A visible difference
|
||||||
|
|
|
@ -99,6 +99,7 @@ PYTHONHOME : alternate <prefix> directory (or <prefix>%c<exec_prefix>).\n\
|
||||||
The default module search path uses %s.\n\
|
The default module search path uses %s.\n\
|
||||||
PYTHONCASEOK : ignore case in 'import' statements (Windows).\n\
|
PYTHONCASEOK : ignore case in 'import' statements (Windows).\n\
|
||||||
PYTHONIOENCODING: Encoding[:errors] used for stdin/stdout/stderr.\n\
|
PYTHONIOENCODING: Encoding[:errors] used for stdin/stdout/stderr.\n\
|
||||||
|
PYTHONFSENCODING: Encoding used for the filesystem.\n\
|
||||||
";
|
";
|
||||||
|
|
||||||
FILE *
|
FILE *
|
||||||
|
|
|
@ -134,18 +134,13 @@ add_flag(int flag, const char *envs)
|
||||||
return flag;
|
return flag;
|
||||||
}
|
}
|
||||||
|
|
||||||
#if defined(HAVE_LANGINFO_H) && defined(CODESET)
|
|
||||||
static char*
|
static char*
|
||||||
get_codeset(void)
|
get_codec_name(const char *encoding)
|
||||||
{
|
{
|
||||||
char* codeset, *name_str;
|
char *name_utf8, *name_str;
|
||||||
PyObject *codec, *name = NULL;
|
PyObject *codec, *name = NULL;
|
||||||
|
|
||||||
codeset = nl_langinfo(CODESET);
|
codec = _PyCodec_Lookup(encoding);
|
||||||
if (!codeset || codeset[0] == '\0')
|
|
||||||
return NULL;
|
|
||||||
|
|
||||||
codec = _PyCodec_Lookup(codeset);
|
|
||||||
if (!codec)
|
if (!codec)
|
||||||
goto error;
|
goto error;
|
||||||
|
|
||||||
|
@ -154,18 +149,34 @@ get_codeset(void)
|
||||||
if (!name)
|
if (!name)
|
||||||
goto error;
|
goto error;
|
||||||
|
|
||||||
name_str = _PyUnicode_AsString(name);
|
name_utf8 = _PyUnicode_AsString(name);
|
||||||
if (name == NULL)
|
if (name == NULL)
|
||||||
goto error;
|
goto error;
|
||||||
codeset = strdup(name_str);
|
name_str = strdup(name_utf8);
|
||||||
Py_DECREF(name);
|
Py_DECREF(name);
|
||||||
return codeset;
|
if (name_str == NULL) {
|
||||||
|
PyErr_NoMemory();
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
return name_str;
|
||||||
|
|
||||||
error:
|
error:
|
||||||
Py_XDECREF(codec);
|
Py_XDECREF(codec);
|
||||||
Py_XDECREF(name);
|
Py_XDECREF(name);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if defined(HAVE_LANGINFO_H) && defined(CODESET)
|
||||||
|
static char*
|
||||||
|
get_codeset(void)
|
||||||
|
{
|
||||||
|
char* codeset = nl_langinfo(CODESET);
|
||||||
|
if (!codeset || codeset[0] == '\0') {
|
||||||
|
PyErr_SetString(PyExc_ValueError, "CODESET is not set or empty");
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
return get_codec_name(codeset);
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
void
|
void
|
||||||
|
@ -706,25 +717,35 @@ initfsencoding(void)
|
||||||
{
|
{
|
||||||
PyObject *codec;
|
PyObject *codec;
|
||||||
#if defined(HAVE_LANGINFO_H) && defined(CODESET)
|
#if defined(HAVE_LANGINFO_H) && defined(CODESET)
|
||||||
char *codeset;
|
char *codeset = NULL;
|
||||||
|
|
||||||
if (Py_FileSystemDefaultEncoding == NULL) {
|
if (Py_FileSystemDefaultEncoding == NULL) {
|
||||||
/* On Unix, set the file system encoding according to the
|
const char *env_encoding = Py_GETENV("PYTHONFSENCODING");
|
||||||
user's preference, if the CODESET names a well-known
|
if (env_encoding != NULL) {
|
||||||
Python codec, and Py_FileSystemDefaultEncoding isn't
|
codeset = get_codec_name(env_encoding);
|
||||||
initialized by other means. Also set the encoding of
|
if (!codeset) {
|
||||||
stdin and stdout if these are terminals. */
|
fprintf(stderr, "PYTHONFSENCODING is not a valid encoding:\n");
|
||||||
codeset = get_codeset();
|
PyErr_Print();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (!codeset) {
|
||||||
|
/* On Unix, set the file system encoding according to the
|
||||||
|
user's preference, if the CODESET names a well-known
|
||||||
|
Python codec, and Py_FileSystemDefaultEncoding isn't
|
||||||
|
initialized by other means. Also set the encoding of
|
||||||
|
stdin and stdout if these are terminals. */
|
||||||
|
codeset = get_codeset();
|
||||||
|
}
|
||||||
if (codeset != NULL) {
|
if (codeset != NULL) {
|
||||||
Py_FileSystemDefaultEncoding = codeset;
|
Py_FileSystemDefaultEncoding = codeset;
|
||||||
Py_HasFileSystemDefaultEncoding = 0;
|
Py_HasFileSystemDefaultEncoding = 0;
|
||||||
return;
|
return;
|
||||||
|
} else {
|
||||||
|
fprintf(stderr, "Unable to get the locale encoding:\n");
|
||||||
|
PyErr_Print();
|
||||||
}
|
}
|
||||||
|
|
||||||
PyErr_Clear();
|
fprintf(stderr, "Unable to get the filesystem encoding: fallback to utf-8\n");
|
||||||
fprintf(stderr,
|
|
||||||
"Unable to get the locale encoding: "
|
|
||||||
"fallback to utf-8\n");
|
|
||||||
Py_FileSystemDefaultEncoding = "utf-8";
|
Py_FileSystemDefaultEncoding = "utf-8";
|
||||||
Py_HasFileSystemDefaultEncoding = 1;
|
Py_HasFileSystemDefaultEncoding = 1;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue