bpo-43510: Implement PEP 597 opt-in EncodingWarning. (GH-19481)

See [PEP 597](https://www.python.org/dev/peps/pep-0597/).

* Add `-X warn_default_encoding` and `PYTHONWARNDEFAULTENCODING`.
* Add EncodingWarning
* Add io.text_encoding()
* open(), TextIOWrapper() emits EncodingWarning when encoding is omitted and warn_default_encoding is enabled.
* _pyio.TextIOWrapper() uses UTF-8 as fallback default encoding used when failed to import locale module. (used during building Python)
* bz2, configparser, gzip, lzma, pathlib, tempfile modules use io.text_encoding().
* What's new entry
This commit is contained in:
Inada Naoki 2021-03-29 12:28:14 +09:00 committed by GitHub
parent 261a452a13
commit 4827483f47
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
32 changed files with 366 additions and 18 deletions

View file

@ -10,6 +10,7 @@
#define PY_SSIZE_T_CLEAN
#include "Python.h"
#include "_iomodule.h"
#include "pycore_pystate.h" // _PyInterpreterState_GET()
#ifdef HAVE_SYS_TYPES_H
#include <sys/types.h>
@ -33,6 +34,7 @@ PyObject *_PyIO_str_fileno = NULL;
PyObject *_PyIO_str_flush = NULL;
PyObject *_PyIO_str_getstate = NULL;
PyObject *_PyIO_str_isatty = NULL;
PyObject *_PyIO_str_locale = NULL;
PyObject *_PyIO_str_newlines = NULL;
PyObject *_PyIO_str_nl = NULL;
PyObject *_PyIO_str_peek = NULL;
@ -504,6 +506,43 @@ _io_open_impl(PyObject *module, PyObject *file, const char *mode,
return NULL;
}
/*[clinic input]
_io.text_encoding
encoding: object
stacklevel: int = 2
/
A helper function to choose the text encoding.
When encoding is not None, just return it.
Otherwise, return the default text encoding (i.e. "locale").
This function emits an EncodingWarning if encoding is None and
sys.flags.warn_default_encoding is true.
This can be used in APIs with an encoding=None parameter.
However, please consider using encoding="utf-8" for new APIs.
[clinic start generated code]*/
static PyObject *
_io_text_encoding_impl(PyObject *module, PyObject *encoding, int stacklevel)
/*[clinic end generated code: output=91b2cfea6934cc0c input=bf70231213e2a7b4]*/
{
if (encoding == NULL || encoding == Py_None) {
PyInterpreterState *interp = _PyInterpreterState_GET();
if (_PyInterpreterState_GetConfig(interp)->warn_default_encoding) {
PyErr_WarnEx(PyExc_EncodingWarning,
"'encoding' argument not specified", stacklevel);
}
Py_INCREF(_PyIO_str_locale);
return _PyIO_str_locale;
}
Py_INCREF(encoding);
return encoding;
}
/*[clinic input]
_io.open_code
@ -629,6 +668,7 @@ iomodule_free(PyObject *mod) {
static PyMethodDef module_methods[] = {
_IO_OPEN_METHODDEF
_IO_TEXT_ENCODING_METHODDEF
_IO_OPEN_CODE_METHODDEF
{NULL, NULL}
};
@ -747,6 +787,7 @@ PyInit__io(void)
ADD_INTERNED(flush)
ADD_INTERNED(getstate)
ADD_INTERNED(isatty)
ADD_INTERNED(locale)
ADD_INTERNED(newlines)
ADD_INTERNED(peek)
ADD_INTERNED(read)

View file

@ -272,6 +272,52 @@ exit:
return return_value;
}
PyDoc_STRVAR(_io_text_encoding__doc__,
"text_encoding($module, encoding, stacklevel=2, /)\n"
"--\n"
"\n"
"A helper function to choose the text encoding.\n"
"\n"
"When encoding is not None, just return it.\n"
"Otherwise, return the default text encoding (i.e. \"locale\").\n"
"\n"
"This function emits an EncodingWarning if encoding is None and\n"
"sys.flags.warn_default_encoding is true.\n"
"\n"
"This can be used in APIs with an encoding=None parameter.\n"
"However, please consider using encoding=\"utf-8\" for new APIs.");
#define _IO_TEXT_ENCODING_METHODDEF \
{"text_encoding", (PyCFunction)(void(*)(void))_io_text_encoding, METH_FASTCALL, _io_text_encoding__doc__},
static PyObject *
_io_text_encoding_impl(PyObject *module, PyObject *encoding, int stacklevel);
static PyObject *
_io_text_encoding(PyObject *module, PyObject *const *args, Py_ssize_t nargs)
{
PyObject *return_value = NULL;
PyObject *encoding;
int stacklevel = 2;
if (!_PyArg_CheckPositional("text_encoding", nargs, 1, 2)) {
goto exit;
}
encoding = args[0];
if (nargs < 2) {
goto skip_optional;
}
stacklevel = _PyLong_AsInt(args[1]);
if (stacklevel == -1 && PyErr_Occurred()) {
goto exit;
}
skip_optional:
return_value = _io_text_encoding_impl(module, encoding, stacklevel);
exit:
return return_value;
}
PyDoc_STRVAR(_io_open_code__doc__,
"open_code($module, /, path)\n"
"--\n"
@ -313,4 +359,4 @@ _io_open_code(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObjec
exit:
return return_value;
}
/*[clinic end generated code: output=5c0dd7a262c30ebc input=a9049054013a1b77]*/
/*[clinic end generated code: output=06e055d1d80b835d input=a9049054013a1b77]*/

View file

@ -1123,6 +1123,17 @@ _io_TextIOWrapper___init___impl(textio *self, PyObject *buffer,
self->encodefunc = NULL;
self->b2cratio = 0.0;
if (encoding == NULL) {
PyInterpreterState *interp = _PyInterpreterState_GET();
if (_PyInterpreterState_GetConfig(interp)->warn_default_encoding) {
PyErr_WarnEx(PyExc_EncodingWarning,
"'encoding' argument not specified", 1);
}
}
else if (strcmp(encoding, "locale") == 0) {
encoding = NULL;
}
if (encoding == NULL) {
/* Try os.device_encoding(fileno) */
PyObject *fileno;