Issue #8622: Add PYTHONFSENCODING environment variable to override the

filesystem encoding.

initfsencoding() displays also a better error message if get_codeset() failed.
This commit is contained in:
Victor Stinner 2010-08-18 21:23:25 +00:00
parent 56ab01b66a
commit 94908bbc15
7 changed files with 93 additions and 34 deletions

View file

@ -442,11 +442,20 @@ These environment variables influence Python's behavior.
import of source modules. import of source modules.
.. envvar:: PYTHONFSENCODING
If this is set before running the intepreter, it overrides the encoding used
for the filesystem encoding (see :func:`sys.getfilesystemencoding`).
.. versionadded:: 3.2
.. envvar:: PYTHONIOENCODING .. envvar:: PYTHONIOENCODING
Overrides the encoding used for stdin/stdout/stderr, in the syntax If this is set before running the intepreter, it overrides the encoding used
``encodingname:errorhandler``. The ``:errorhandler`` part is optional and for stdin/stdout/stderr, in the syntax ``encodingname:errorhandler``. The
has the same meaning as in :func:`str.encode`. ``:errorhandler`` part is optional and has the same meaning as in
:func:`str.encode`.
For stderr, the ``:errorhandler`` part is ignored; the handler will always be For stderr, the ``:errorhandler`` part is ignored; the handler will always be
``'backslashreplace'``. ``'backslashreplace'``.

View file

@ -232,6 +232,15 @@ Major performance enhancements have been added:
* Stub * Stub
Unicode
=======
The filesystem encoding can be specified by setting the
:envvar:`PYTHONFSENCODING` environment variable before running the intepreter.
The value should be a string in the form ``<encoding>``, e.g. ``utf-8``.
IDLE IDLE
==== ====

View file

@ -43,7 +43,7 @@ if sys.platform != 'darwin':
# Is it Unicode-friendly? # Is it Unicode-friendly?
if not os.path.supports_unicode_filenames: if not os.path.supports_unicode_filenames:
fsencoding = sys.getfilesystemencoding() or sys.getdefaultencoding() fsencoding = sys.getfilesystemencoding()
try: try:
for name in filenames: for name in filenames:
name.encode(fsencoding) name.encode(fsencoding)

View file

@ -863,16 +863,24 @@ class SizeofTest(unittest.TestCase):
def test_getfilesystemencoding(self): def test_getfilesystemencoding(self):
import codecs import codecs
def check_fsencoding(fs_encoding): def check_fsencoding(fs_encoding, expected=None):
self.assertIsNotNone(fs_encoding) self.assertIsNotNone(fs_encoding)
if sys.platform == 'darwin': if sys.platform == 'darwin':
self.assertEqual(fs_encoding, 'utf-8') self.assertEqual(fs_encoding, 'utf-8')
codecs.lookup(fs_encoding) codecs.lookup(fs_encoding)
if expected:
self.assertEqual(fs_encoding, expected)
fs_encoding = sys.getfilesystemencoding() fs_encoding = sys.getfilesystemencoding()
check_fsencoding(fs_encoding) check_fsencoding(fs_encoding)
# Even in C locale def get_fsencoding(env):
output = subprocess.check_output(
[sys.executable, "-c",
"import sys; print(sys.getfilesystemencoding())"],
env=env)
return output.rstrip().decode('ascii')
try: try:
sys.executable.encode('ascii') sys.executable.encode('ascii')
except UnicodeEncodeError: except UnicodeEncodeError:
@ -880,14 +888,22 @@ class SizeofTest(unittest.TestCase):
# see issue #8611 # see issue #8611
pass pass
else: else:
# Even in C locale
env = os.environ.copy() env = os.environ.copy()
env['LANG'] = 'C' env['LANG'] = 'C'
output = subprocess.check_output( try:
[sys.executable, "-c", del env['PYTHONFSENCODING']
"import sys; print(sys.getfilesystemencoding())"], except KeyError:
env=env) pass
fs_encoding = output.rstrip().decode('ascii') check_fsencoding(get_fsencoding(env), 'ascii')
check_fsencoding(fs_encoding)
# Filesystem encoding is hardcoded on Windows and Mac OS X
if sys.platform not in ('win32', 'darwin'):
for encoding in ('ascii', 'cp850', 'iso8859-1', 'utf-8'):
env = os.environ.copy()
env['PYTHONFSENCODING'] = encoding
check_fsencoding(get_fsencoding(env), encoding)
def test_setfilesystemencoding(self): def test_setfilesystemencoding(self):
old = sys.getfilesystemencoding() old = sys.getfilesystemencoding()

View file

@ -12,6 +12,9 @@ What's New in Python 3.2 Alpha 2?
Core and Builtins Core and Builtins
----------------- -----------------
- Issue #8622: Add PYTHONFSENCODING environment variable to override the
filesystem encoding.
- Issue #5127: The C functions that access the Unicode Database now accept and - Issue #5127: The C functions that access the Unicode Database now accept and
return characters from the full Unicode range, even on narrow unicode builds return characters from the full Unicode range, even on narrow unicode builds
(Py_UNICODE_TOLOWER, Py_UNICODE_ISDECIMAL, and others). A visible difference (Py_UNICODE_TOLOWER, Py_UNICODE_ISDECIMAL, and others). A visible difference

View file

@ -99,6 +99,7 @@ PYTHONHOME : alternate <prefix> directory (or <prefix>%c<exec_prefix>).\n\
The default module search path uses %s.\n\ The default module search path uses %s.\n\
PYTHONCASEOK : ignore case in 'import' statements (Windows).\n\ PYTHONCASEOK : ignore case in 'import' statements (Windows).\n\
PYTHONIOENCODING: Encoding[:errors] used for stdin/stdout/stderr.\n\ PYTHONIOENCODING: Encoding[:errors] used for stdin/stdout/stderr.\n\
PYTHONFSENCODING: Encoding used for the filesystem.\n\
"; ";
FILE * FILE *

View file

@ -134,18 +134,13 @@ add_flag(int flag, const char *envs)
return flag; return flag;
} }
#if defined(HAVE_LANGINFO_H) && defined(CODESET)
static char* static char*
get_codeset(void) get_codec_name(const char *encoding)
{ {
char* codeset, *name_str; char *name_utf8, *name_str;
PyObject *codec, *name = NULL; PyObject *codec, *name = NULL;
codeset = nl_langinfo(CODESET); codec = _PyCodec_Lookup(encoding);
if (!codeset || codeset[0] == '\0')
return NULL;
codec = _PyCodec_Lookup(codeset);
if (!codec) if (!codec)
goto error; goto error;
@ -154,18 +149,34 @@ get_codeset(void)
if (!name) if (!name)
goto error; goto error;
name_str = _PyUnicode_AsString(name); name_utf8 = _PyUnicode_AsString(name);
if (name == NULL) if (name == NULL)
goto error; goto error;
codeset = strdup(name_str); name_str = strdup(name_utf8);
Py_DECREF(name); Py_DECREF(name);
return codeset; if (name_str == NULL) {
PyErr_NoMemory();
return NULL;
}
return name_str;
error: error:
Py_XDECREF(codec); Py_XDECREF(codec);
Py_XDECREF(name); Py_XDECREF(name);
return NULL; return NULL;
} }
#if defined(HAVE_LANGINFO_H) && defined(CODESET)
static char*
get_codeset(void)
{
char* codeset = nl_langinfo(CODESET);
if (!codeset || codeset[0] == '\0') {
PyErr_SetString(PyExc_ValueError, "CODESET is not set or empty");
return NULL;
}
return get_codec_name(codeset);
}
#endif #endif
void void
@ -706,25 +717,35 @@ initfsencoding(void)
{ {
PyObject *codec; PyObject *codec;
#if defined(HAVE_LANGINFO_H) && defined(CODESET) #if defined(HAVE_LANGINFO_H) && defined(CODESET)
char *codeset; char *codeset = NULL;
if (Py_FileSystemDefaultEncoding == NULL) { if (Py_FileSystemDefaultEncoding == NULL) {
/* On Unix, set the file system encoding according to the const char *env_encoding = Py_GETENV("PYTHONFSENCODING");
user's preference, if the CODESET names a well-known if (env_encoding != NULL) {
Python codec, and Py_FileSystemDefaultEncoding isn't codeset = get_codec_name(env_encoding);
initialized by other means. Also set the encoding of if (!codeset) {
stdin and stdout if these are terminals. */ fprintf(stderr, "PYTHONFSENCODING is not a valid encoding:\n");
codeset = get_codeset(); PyErr_Print();
}
}
if (!codeset) {
/* On Unix, set the file system encoding according to the
user's preference, if the CODESET names a well-known
Python codec, and Py_FileSystemDefaultEncoding isn't
initialized by other means. Also set the encoding of
stdin and stdout if these are terminals. */
codeset = get_codeset();
}
if (codeset != NULL) { if (codeset != NULL) {
Py_FileSystemDefaultEncoding = codeset; Py_FileSystemDefaultEncoding = codeset;
Py_HasFileSystemDefaultEncoding = 0; Py_HasFileSystemDefaultEncoding = 0;
return; return;
} else {
fprintf(stderr, "Unable to get the locale encoding:\n");
PyErr_Print();
} }
PyErr_Clear(); fprintf(stderr, "Unable to get the filesystem encoding: fallback to utf-8\n");
fprintf(stderr,
"Unable to get the locale encoding: "
"fallback to utf-8\n");
Py_FileSystemDefaultEncoding = "utf-8"; Py_FileSystemDefaultEncoding = "utf-8";
Py_HasFileSystemDefaultEncoding = 1; Py_HasFileSystemDefaultEncoding = 1;
} }