bpo-34523: Support surrogatepass in locale codecs (GH-8995)

Add support for the "surrogatepass" error handler in
PyUnicode_DecodeFSDefault() and PyUnicode_EncodeFSDefault()
for the UTF-8 encoding.

Changes:

* _Py_DecodeUTF8Ex() and _Py_EncodeUTF8Ex() now support the
  surrogatepass error handler (_Py_ERROR_SURROGATEPASS).
* _Py_DecodeLocaleEx() and _Py_EncodeLocaleEx() now use
  the _Py_error_handler enum instead of "int surrogateescape" to pass
  the error handler. These functions now return -3 if the error
  handler is unknown.
* Add unit tests on _Py_DecodeLocaleEx() and _Py_EncodeLocaleEx()
  in test_codecs.
* Rename get_error_handler() to _Py_GetErrorHandler() and expose it
  as a private function.
* _freeze_importlib doesn't need config.filesystem_errors="strict"
  workaround anymore.
This commit is contained in:
Victor Stinner 2018-08-29 22:21:32 +02:00 committed by GitHub
parent c5989cd876
commit 3d4226a832
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 423 additions and 117 deletions

View file

@ -4550,6 +4550,98 @@ new_hamt(PyObject *self, PyObject *args)
}
static PyObject *
encode_locale_ex(PyObject *self, PyObject *args)
{
PyObject *unicode;
int current_locale = 0;
wchar_t *wstr;
PyObject *res = NULL;
const char *errors = NULL;
if (!PyArg_ParseTuple(args, "U|is", &unicode, &current_locale, &errors)) {
return NULL;
}
wstr = PyUnicode_AsWideCharString(unicode, NULL);
if (wstr == NULL) {
return NULL;
}
_Py_error_handler error_handler = _Py_GetErrorHandler(errors);
char *str = NULL;
size_t error_pos;
const char *reason = NULL;
int ret = _Py_EncodeLocaleEx(wstr,
&str, &error_pos, &reason,
current_locale, error_handler);
PyMem_Free(wstr);
switch(ret) {
case 0:
res = PyBytes_FromString(str);
PyMem_RawFree(str);
break;
case -1:
PyErr_NoMemory();
break;
case -2:
PyErr_Format(PyExc_RuntimeError, "encode error: pos=%zu, reason=%s",
error_pos, reason);
break;
case -3:
PyErr_SetString(PyExc_ValueError, "unsupported error handler");
break;
default:
PyErr_SetString(PyExc_ValueError, "unknow error code");
break;
}
return res;
}
static PyObject *
decode_locale_ex(PyObject *self, PyObject *args)
{
char *str;
int current_locale = 0;
PyObject *res = NULL;
const char *errors = NULL;
if (!PyArg_ParseTuple(args, "y|is", &str, &current_locale, &errors)) {
return NULL;
}
_Py_error_handler error_handler = _Py_GetErrorHandler(errors);
wchar_t *wstr = NULL;
size_t wlen = 0;
const char *reason = NULL;
int ret = _Py_DecodeLocaleEx(str,
&wstr, &wlen, &reason,
current_locale, error_handler);
switch(ret) {
case 0:
res = PyUnicode_FromWideChar(wstr, wlen);
PyMem_RawFree(wstr);
break;
case -1:
PyErr_NoMemory();
break;
case -2:
PyErr_Format(PyExc_RuntimeError, "decode error: pos=%zu, reason=%s",
wlen, reason);
break;
case -3:
PyErr_SetString(PyExc_ValueError, "unsupported error handler");
break;
default:
PyErr_SetString(PyExc_ValueError, "unknow error code");
break;
}
return res;
}
static PyMethodDef TestMethods[] = {
{"raise_exception", raise_exception, METH_VARARGS},
{"raise_memoryerror", raise_memoryerror, METH_NOARGS},
@ -4771,6 +4863,8 @@ static PyMethodDef TestMethods[] = {
{"get_mapping_items", get_mapping_items, METH_O},
{"test_pythread_tss_key_state", test_pythread_tss_key_state, METH_VARARGS},
{"hamt", new_hamt, METH_NOARGS},
{"EncodeLocaleEx", encode_locale_ex, METH_VARARGS},
{"DecodeLocaleEx", decode_locale_ex, METH_VARARGS},
{NULL, NULL} /* sentinel */
};