bpo-29240: Fix locale encodings in UTF-8 Mode (#5170)

Modify locale.localeconv(), time.tzname, os.strerror() and other
functions to ignore the UTF-8 Mode: always use the current locale
encoding.

Changes:

* Add _Py_DecodeLocaleEx() and _Py_EncodeLocaleEx(). On decoding or
  encoding error, they return the position of the error and an error
  message which are used to raise Unicode errors in
  PyUnicode_DecodeLocale() and PyUnicode_EncodeLocale().
* Replace _Py_DecodeCurrentLocale() with _Py_DecodeLocaleEx().
* PyUnicode_DecodeLocale() now uses _Py_DecodeLocaleEx() for all
  cases, especially for the strict error handler.
* Add _Py_DecodeUTF8Ex(): return more information on decoding error
  and supports the strict error handler.
* Rename _Py_EncodeUTF8_surrogateescape() to _Py_EncodeUTF8Ex().
* Replace _Py_EncodeCurrentLocale() with _Py_EncodeLocaleEx().
* Ignore the UTF-8 mode to encode/decode localeconv(), strerror()
  and time zone name.
* Remove PyUnicode_DecodeLocale(), PyUnicode_DecodeLocaleAndSize()
  and PyUnicode_EncodeLocale() now ignore the UTF-8 mode: always use
  the "current" locale.
* Remove _PyUnicode_DecodeCurrentLocale(),
  _PyUnicode_DecodeCurrentLocaleAndSize() and
  _PyUnicode_EncodeCurrentLocale().
This commit is contained in:
Victor Stinner 2018-01-15 10:45:49 +01:00 committed by GitHub
parent ee3b83547c
commit 7ed7aead95
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
12 changed files with 484 additions and 517 deletions

View file

@ -696,7 +696,7 @@ static int parse_isoformat_date(const char *dtstr,
if (NULL == p) {
return -1;
}
if (*(p++) != '-') {
return -2;
}

View file

@ -572,8 +572,9 @@ PyIntl_bind_textdomain_codeset(PyObject* self,PyObject*args)
if (!PyArg_ParseTuple(args, "sz", &domain, &codeset))
return NULL;
codeset = bind_textdomain_codeset(domain, codeset);
if (codeset)
if (codeset) {
return PyUnicode_DecodeLocale(codeset, NULL);
}
Py_RETURN_NONE;
}
#endif

View file

@ -449,8 +449,8 @@ search_for_exec_prefix(const _PyCoreConfig *core_config,
n = fread(buf, 1, MAXPATHLEN, f);
buf[n] = '\0';
fclose(f);
rel_builddir_path = _Py_DecodeUTF8_surrogateescape(buf, n, NULL);
if (rel_builddir_path != NULL) {
rel_builddir_path = _Py_DecodeUTF8_surrogateescape(buf, n);
if (rel_builddir_path) {
wcsncpy(exec_prefix, calculate->argv0_path, MAXPATHLEN);
exec_prefix[MAXPATHLEN] = L'\0';
joinpath(exec_prefix, rel_builddir_path);

View file

@ -132,13 +132,13 @@ static PyModuleDef readlinemodule;
static PyObject *
encode(PyObject *b)
{
return _PyUnicode_EncodeCurrentLocale(b, "surrogateescape");
return PyUnicode_EncodeLocale(b, "surrogateescape");
}
static PyObject *
decode(const char *s)
{
return _PyUnicode_DecodeCurrentLocale(s, "surrogateescape");
return PyUnicode_DecodeLocale(s, "surrogateescape");
}

View file

@ -418,11 +418,11 @@ tmtotuple(struct tm *p
SET(8, p->tm_isdst);
#ifdef HAVE_STRUCT_TM_TM_ZONE
PyStructSequence_SET_ITEM(v, 9,
_PyUnicode_DecodeCurrentLocale(p->tm_zone, "surrogateescape"));
PyUnicode_DecodeLocale(p->tm_zone, "surrogateescape"));
SET(10, p->tm_gmtoff);
#else
PyStructSequence_SET_ITEM(v, 9,
_PyUnicode_DecodeCurrentLocale(zone, "surrogateescape"));
PyUnicode_DecodeLocale(zone, "surrogateescape"));
PyStructSequence_SET_ITEM(v, 10, _PyLong_FromTime_t(gmtoff));
#endif /* HAVE_STRUCT_TM_TM_ZONE */
#undef SET
@ -809,8 +809,7 @@ time_strftime(PyObject *self, PyObject *args)
#ifdef HAVE_WCSFTIME
ret = PyUnicode_FromWideChar(outbuf, buflen);
#else
ret = _PyUnicode_DecodeCurrentLocaleAndSize(outbuf, buflen,
"surrogateescape");
ret = PyUnicode_DecodeLocaleAndSize(outbuf, buflen, "surrogateescape");
#endif
PyMem_Free(outbuf);
break;
@ -1541,8 +1540,8 @@ PyInit_timezone(PyObject *m) {
PyModule_AddIntConstant(m, "altzone", timezone-3600);
#endif
PyModule_AddIntConstant(m, "daylight", daylight);
otz0 = _PyUnicode_DecodeCurrentLocale(tzname[0], "surrogateescape");
otz1 = _PyUnicode_DecodeCurrentLocale(tzname[1], "surrogateescape");
otz0 = PyUnicode_DecodeLocale(tzname[0], "surrogateescape");
otz1 = PyUnicode_DecodeLocale(tzname[1], "surrogateescape");
PyModule_AddObject(m, "tzname", Py_BuildValue("(NN)", otz0, otz1));
#else /* !HAVE_TZNAME || __GLIBC__ || __CYGWIN__*/
{