New environment variable PYTHONIOENCODING.

This commit is contained in:
Martin v. Löwis 2008-06-01 07:20:46 +00:00
parent 7f7ca35f5b
commit 99815892f6
10 changed files with 152 additions and 51 deletions

View file

@ -130,6 +130,14 @@ change in future releases of Python.
.. versionadded:: 2.3
.. cfunction:: int PyFile_SetEncodingAndErrors(PyFileObject *p, const char *enc, *errors)
Set the file's encoding for Unicode output to *enc*, and its error
mode to *err*. Return 1 on success and 0 on failure.
.. versionadded:: 2.6
.. cfunction:: int PyFile_SoftSpace(PyObject *p, int newflag)
.. index:: single: softspace (file attribute)

View file

@ -2165,6 +2165,13 @@ the particular object.
.. versionadded:: 2.3
.. attribute:: file.errors
The Unicode error handler used to along with the encoding.
.. versionadded:: 2.6
.. attribute:: file.mode
The I/O mode for the file. If the file was created using the :func:`open`

View file

@ -481,6 +481,13 @@ These environment variables influence Python's behavior.
.. versionadded:: 2.6
.. envvar:: PYTHONIOENCODING
Overrides the encoding used for stdin/stdout/stderr, in the syntax
encodingname:errorhandler, with the :errors part being optional.
.. versionadded:: 2.6
.. envvar:: PYTHONNOUSERSITE

View file

@ -24,6 +24,7 @@ typedef struct {
int f_newlinetypes; /* Types of newlines seen */
int f_skipnextlf; /* Skip next \n */
PyObject *f_encoding;
PyObject *f_errors;
PyObject *weakreflist; /* List of weak references */
int unlocked_count; /* Num. currently running sections of code
using f_fp with the GIL released. */
@ -37,6 +38,7 @@ PyAPI_DATA(PyTypeObject) PyFile_Type;
PyAPI_FUNC(PyObject *) PyFile_FromString(char *, char *);
PyAPI_FUNC(void) PyFile_SetBufSize(PyObject *, int);
PyAPI_FUNC(int) PyFile_SetEncoding(PyObject *, const char *);
PyAPI_FUNC(int) PyFile_SetEncodingAndErrors(PyObject *, const char *, char *errors);
PyAPI_FUNC(PyObject *) PyFile_FromFile(FILE *, char *, char *,
int (*)(FILE *));
PyAPI_FUNC(FILE *) PyFile_AsFile(PyObject *);

View file

@ -385,6 +385,26 @@ class SysModuleTest(unittest.TestCase):
## self.assert_(r[0][2] > 100, r[0][2])
## self.assert_(r[1][2] > 100, r[1][2])
def test_ioencoding(self):
import subprocess,os
env = dict(os.environ)
# Test character: cent sign, encoded as 0x4A (ASCII J) in CP424,
# not representable in ASCII.
env["PYTHONIOENCODING"] = "cp424"
p = subprocess.Popen([sys.executable, "-c", 'print unichr(0xa2)'],
stdout = subprocess.PIPE, env=env)
out = p.stdout.read().strip()
self.assertEqual(out, unichr(0xa2).encode("cp424"))
env["PYTHONIOENCODING"] = "ascii:replace"
p = subprocess.Popen([sys.executable, "-c", 'print unichr(0xa2)'],
stdout = subprocess.PIPE, env=env)
out = p.stdout.read().strip()
self.assertEqual(out, '?')
def test_main():
test.test_support.run_unittest(SysModuleTest)

View file

@ -12,6 +12,8 @@ What's New in Python 2.6 beta 1?
Core and Builtins
-----------------
- New environment variable PYTHONIOENCODING.
- Patch #2488: Add sys.maxsize.
- Issue #2353: file.xreadlines() now emits a Py3k warning.

View file

@ -99,6 +99,7 @@ static char *usage_5 = "\
PYTHONHOME : alternate <prefix> directory (or <prefix>%c<exec_prefix>).\n\
The default module search path uses %s.\n\
PYTHONCASEOK : ignore case in 'import' statements (Windows).\n\
PYTHONIOENCODING: Encoding[:errors] used for stdin/stdout/stderr.\n\
";

View file

@ -155,6 +155,7 @@ fill_file_fields(PyFileObject *f, FILE *fp, PyObject *name, char *mode,
Py_DECREF(f->f_name);
Py_DECREF(f->f_mode);
Py_DECREF(f->f_encoding);
Py_DECREF(f->f_errors);
Py_INCREF(name);
f->f_name = name;
@ -170,6 +171,8 @@ fill_file_fields(PyFileObject *f, FILE *fp, PyObject *name, char *mode,
f->f_skipnextlf = 0;
Py_INCREF(Py_None);
f->f_encoding = Py_None;
Py_INCREF(Py_None);
f->f_errors = Py_None;
if (f->f_mode == NULL)
return NULL;
@ -435,19 +438,38 @@ PyFile_SetBufSize(PyObject *f, int bufsize)
}
/* Set the encoding used to output Unicode strings.
Returh 1 on success, 0 on failure. */
Return 1 on success, 0 on failure. */
int
PyFile_SetEncoding(PyObject *f, const char *enc)
{
return PyFile_SetEncodingAndErrors(f, enc, NULL);
}
int
PyFile_SetEncodingAndErrors(PyObject *f, const char *enc, char* errors)
{
PyFileObject *file = (PyFileObject*)f;
PyObject *str = PyBytes_FromString(enc);
PyObject *str, *oerrors;
assert(PyFile_Check(f));
str = PyBytes_FromString(enc);
if (!str)
return 0;
if (errors) {
oerrors = PyString_FromString(errors);
if (!oerrors) {
Py_DECREF(str);
return 0;
}
} else {
oerrors = Py_None;
Py_INCREF(Py_None);
}
Py_DECREF(file->f_encoding);
file->f_encoding = str;
Py_DECREF(file->f_errors);
file->f_errors = oerrors;
return 1;
}
@ -491,6 +513,7 @@ file_dealloc(PyFileObject *f)
Py_XDECREF(f->f_name);
Py_XDECREF(f->f_mode);
Py_XDECREF(f->f_encoding);
Py_XDECREF(f->f_errors);
drop_readahead(f);
Py_TYPE(f)->tp_free((PyObject *)f);
}
@ -1879,6 +1902,8 @@ static PyMemberDef file_memberlist[] = {
"file name"},
{"encoding", T_OBJECT, OFF(f_encoding), RO,
"file encoding"},
{"errors", T_OBJECT, OFF(f_errors), RO,
"Unicode error handler"},
/* getattr(f, "closed") is implemented without this table */
{NULL} /* Sentinel */
};
@ -2093,6 +2118,8 @@ file_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
((PyFileObject *)self)->f_mode = not_yet_string;
Py_INCREF(Py_None);
((PyFileObject *)self)->f_encoding = Py_None;
Py_INCREF(Py_None);
((PyFileObject *)self)->f_errors = Py_None;
((PyFileObject *)self)->weakreflist = NULL;
((PyFileObject *)self)->unlocked_count = 0;
}
@ -2295,7 +2322,9 @@ PyFile_WriteObject(PyObject *v, PyObject *f, int flags)
if ((flags & Py_PRINT_RAW) &&
PyUnicode_Check(v) && enc != Py_None) {
char *cenc = PyBytes_AS_STRING(enc);
value = PyUnicode_AsEncodedString(v, cenc, "strict");
char *errors = fobj->f_errors == Py_None ?
"strict" : PyBytes_AS_STRING(fobj->f_errors);
value = PyUnicode_AsEncodedString(v, cenc, errors);
if (value == NULL)
return -1;
} else {

View file

@ -132,10 +132,19 @@ Py_InitializeEx(int install_sigs)
PyThreadState *tstate;
PyObject *bimod, *sysmod;
char *p;
char *icodeset; /* On Windows, input codeset may theoretically
differ from output codeset. */
char *codeset = NULL;
char *errors = NULL;
int free_codeset = 0;
int overridden = 0;
#if defined(Py_USING_UNICODE) && defined(HAVE_LANGINFO_H) && defined(CODESET)
char *codeset;
char *saved_locale;
char *saved_locale, *loc_codeset;
PyObject *sys_stream, *sys_isatty;
#endif
#ifdef MS_WINDOWS
char ibuf[128];
char buf[128];
#endif
extern void _Py_ReadyTypes(void);
@ -238,38 +247,75 @@ Py_InitializeEx(int install_sigs)
_PyGILState_Init(interp, tstate);
#endif /* WITH_THREAD */
if ((p = Py_GETENV("PYTHONIOENCODING")) && *p != '\0') {
p = icodeset = codeset = strdup(p);
free_codeset = 1;
errors = strchr(p, ':');
if (errors) {
*errors = '\0';
errors++;
}
overridden = 1;
}
#if defined(Py_USING_UNICODE) && defined(HAVE_LANGINFO_H) && defined(CODESET)
/* On Unix, set the file system encoding according to the
user's preference, if the CODESET names a well-known
Python codec, and Py_FileSystemDefaultEncoding isn't
initialized by other means. Also set the encoding of
stdin and stdout if these are terminals. */
stdin and stdout if these are terminals, unless overridden. */
saved_locale = strdup(setlocale(LC_CTYPE, NULL));
setlocale(LC_CTYPE, "");
codeset = nl_langinfo(CODESET);
if (codeset && *codeset) {
PyObject *enc = PyCodec_Encoder(codeset);
if (enc) {
codeset = strdup(codeset);
Py_DECREF(enc);
} else {
codeset = NULL;
PyErr_Clear();
if (!overridden || !Py_FileSystemDefaultEncoding) {
saved_locale = strdup(setlocale(LC_CTYPE, NULL));
setlocale(LC_CTYPE, "");
loc_codeset = nl_langinfo(CODESET);
if (loc_codeset && *loc_codeset) {
PyObject *enc = PyCodec_Encoder(loc_codeset);
if (enc) {
loc_codeset = strdup(loc_codeset);
Py_DECREF(enc);
} else {
loc_codeset = NULL;
PyErr_Clear();
}
} else
loc_codeset = NULL;
setlocale(LC_CTYPE, saved_locale);
free(saved_locale);
if (!overridden) {
codeset = icodeset = loc_codeset;
free_codeset = 1;
}
} else
codeset = NULL;
setlocale(LC_CTYPE, saved_locale);
free(saved_locale);
/* Initialize Py_FileSystemDefaultEncoding from
locale even if PYTHONIOENCODING is set. */
if (!Py_FileSystemDefaultEncoding) {
Py_FileSystemDefaultEncoding = loc_codeset;
if (!overridden)
free_codeset = 0;
}
}
#endif
#ifdef MS_WINDOWS
if (!overridden) {
icodeset = ibuf;
encoding = buf;
sprintf(ibuf, "cp%d", GetConsoleCP());
sprintf(buf, "cp%d", GetConsoleOutputCP());
}
#endif
if (codeset) {
sys_stream = PySys_GetObject("stdin");
sys_isatty = PyObject_CallMethod(sys_stream, "isatty", "");
if (!sys_isatty)
PyErr_Clear();
if(sys_isatty && PyObject_IsTrue(sys_isatty) &&
if ((overridden ||
(sys_isatty && PyObject_IsTrue(sys_isatty))) &&
PyFile_Check(sys_stream)) {
if (!PyFile_SetEncoding(sys_stream, codeset))
if (!PyFile_SetEncodingAndErrors(sys_stream, icodeset, errors))
Py_FatalError("Cannot set codeset of stdin");
}
Py_XDECREF(sys_isatty);
@ -278,9 +324,10 @@ Py_InitializeEx(int install_sigs)
sys_isatty = PyObject_CallMethod(sys_stream, "isatty", "");
if (!sys_isatty)
PyErr_Clear();
if(sys_isatty && PyObject_IsTrue(sys_isatty) &&
if ((overridden ||
(sys_isatty && PyObject_IsTrue(sys_isatty))) &&
PyFile_Check(sys_stream)) {
if (!PyFile_SetEncoding(sys_stream, codeset))
if (!PyFile_SetEncodingAndErrors(sys_stream, codeset, errors))
Py_FatalError("Cannot set codeset of stdout");
}
Py_XDECREF(sys_isatty);
@ -289,19 +336,17 @@ Py_InitializeEx(int install_sigs)
sys_isatty = PyObject_CallMethod(sys_stream, "isatty", "");
if (!sys_isatty)
PyErr_Clear();
if(sys_isatty && PyObject_IsTrue(sys_isatty) &&
if((overridden ||
(sys_isatty && PyObject_IsTrue(sys_isatty))) &&
PyFile_Check(sys_stream)) {
if (!PyFile_SetEncoding(sys_stream, codeset))
if (!PyFile_SetEncodingAndErrors(sys_stream, codeset, errors))
Py_FatalError("Cannot set codeset of stderr");
}
Py_XDECREF(sys_isatty);
if (!Py_FileSystemDefaultEncoding)
Py_FileSystemDefaultEncoding = codeset;
else
if (free_codeset)
free(codeset);
}
#endif
}
void

View file

@ -1232,9 +1232,6 @@ _PySys_Init(void)
PyObject *m, *v, *sysdict;
PyObject *sysin, *sysout, *syserr;
char *s;
#ifdef MS_WINDOWS
char buf[128];
#endif
m = Py_InitModule3("sys", sys_methods, sys_doc);
if (m == NULL)
@ -1272,23 +1269,6 @@ _PySys_Init(void)
syserr = PyFile_FromFile(stderr, "<stderr>", "w", _check_and_flush);
if (PyErr_Occurred())
return NULL;
#ifdef MS_WINDOWS
if(isatty(_fileno(stdin)) && PyFile_Check(sysin)) {
sprintf(buf, "cp%d", GetConsoleCP());
if (!PyFile_SetEncoding(sysin, buf))
return NULL;
}
if(isatty(_fileno(stdout)) && PyFile_Check(sysout)) {
sprintf(buf, "cp%d", GetConsoleOutputCP());
if (!PyFile_SetEncoding(sysout, buf))
return NULL;
}
if(isatty(_fileno(stderr)) && PyFile_Check(syserr)) {
sprintf(buf, "cp%d", GetConsoleOutputCP());
if (!PyFile_SetEncoding(syserr, buf))
return NULL;
}
#endif
PyDict_SetItemString(sysdict, "stdin", sysin);
PyDict_SetItemString(sysdict, "stdout", sysout);