mirror of
https://github.com/python/cpython.git
synced 2025-08-29 13:15:11 +00:00
Issue #5915: Implement PEP 383, Non-decodable Bytes in
System Character Interfaces.
This commit is contained in:
parent
93f65a177b
commit
011e842033
15 changed files with 726 additions and 289 deletions
|
@ -1530,6 +1530,53 @@ PyUnicode_DecodeFSDefaultAndSize(const char *s, Py_ssize_t size)
|
|||
}
|
||||
}
|
||||
|
||||
/* Convert the argument to a bytes object, according to the file
|
||||
system encoding */
|
||||
|
||||
int
|
||||
PyUnicode_FSConverter(PyObject* arg, void* addr)
|
||||
{
|
||||
PyObject *output = NULL;
|
||||
Py_ssize_t size;
|
||||
void *data;
|
||||
if (PyBytes_Check(arg) || PyByteArray_Check(arg)) {
|
||||
output = arg;
|
||||
Py_INCREF(output);
|
||||
}
|
||||
else {
|
||||
arg = PyUnicode_FromObject(arg);
|
||||
if (!arg)
|
||||
return 0;
|
||||
output = PyUnicode_AsEncodedObject(arg,
|
||||
Py_FileSystemDefaultEncoding,
|
||||
"utf8b");
|
||||
Py_DECREF(arg);
|
||||
if (!output)
|
||||
return 0;
|
||||
if (!PyBytes_Check(output)) {
|
||||
Py_DECREF(output);
|
||||
PyErr_SetString(PyExc_TypeError, "encoder failed to return bytes");
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
if (PyBytes_Check(output)) {
|
||||
size = PyBytes_GET_SIZE(output);
|
||||
data = PyBytes_AS_STRING(output);
|
||||
}
|
||||
else {
|
||||
size = PyByteArray_GET_SIZE(output);
|
||||
data = PyByteArray_AS_STRING(output);
|
||||
}
|
||||
if (size != strlen(data)) {
|
||||
PyErr_SetString(PyExc_TypeError, "embedded NUL character");
|
||||
Py_DECREF(output);
|
||||
return 0;
|
||||
}
|
||||
*(PyObject**)addr = output;
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
char*
|
||||
_PyUnicode_AsStringAndSize(PyObject *unicode, Py_ssize_t *psize)
|
||||
{
|
||||
|
@ -4154,11 +4201,22 @@ static PyObject *unicode_encode_ucs1(const Py_UNICODE *p,
|
|||
collstart-startp, collend-startp, &newpos);
|
||||
if (repunicode == NULL)
|
||||
goto onError;
|
||||
if (!PyUnicode_Check(repunicode)) {
|
||||
/* Implementation limitation: byte results not supported yet. */
|
||||
PyErr_SetString(PyExc_TypeError, "error handler should return unicode");
|
||||
if (PyBytes_Check(repunicode)) {
|
||||
/* Directly copy bytes result to output. */
|
||||
repsize = PyBytes_Size(repunicode);
|
||||
if (repsize > 1) {
|
||||
/* Make room for all additional bytes. */
|
||||
if (_PyBytes_Resize(&res, ressize+repsize-1)) {
|
||||
Py_DECREF(repunicode);
|
||||
goto onError;
|
||||
}
|
||||
ressize += repsize-1;
|
||||
}
|
||||
memcpy(str, PyBytes_AsString(repunicode), repsize);
|
||||
str += repsize;
|
||||
p = startp + newpos;
|
||||
Py_DECREF(repunicode);
|
||||
goto onError;
|
||||
break;
|
||||
}
|
||||
/* need more space? (at least enough for what we
|
||||
have+the replacement+the rest of the string, so
|
||||
|
@ -5123,11 +5181,24 @@ int charmap_encoding_error(
|
|||
collstartpos, collendpos, &newpos);
|
||||
if (repunicode == NULL)
|
||||
return -1;
|
||||
if (!PyUnicode_Check(repunicode)) {
|
||||
/* Implementation limitation: byte results not supported yet. */
|
||||
PyErr_SetString(PyExc_TypeError, "error handler should return unicode");
|
||||
if (PyBytes_Check(repunicode)) {
|
||||
/* Directly copy bytes result to output. */
|
||||
Py_ssize_t outsize = PyBytes_Size(*res);
|
||||
Py_ssize_t requiredsize;
|
||||
repsize = PyBytes_Size(repunicode);
|
||||
requiredsize = *respos + repsize;
|
||||
if (requiredsize > outsize)
|
||||
/* Make room for all additional bytes. */
|
||||
if (charmapencode_resize(res, respos, requiredsize)) {
|
||||
Py_DECREF(repunicode);
|
||||
return -1;
|
||||
}
|
||||
memcpy(PyBytes_AsString(*res) + *respos,
|
||||
PyBytes_AsString(repunicode), repsize);
|
||||
*respos += repsize;
|
||||
*inpos = newpos;
|
||||
Py_DECREF(repunicode);
|
||||
return -1;
|
||||
break;
|
||||
}
|
||||
/* generate replacement */
|
||||
repsize = PyUnicode_GET_SIZE(repunicode);
|
||||
|
@ -5691,7 +5762,7 @@ int PyUnicode_EncodeDecimal(Py_UNICODE *s,
|
|||
if (repunicode == NULL)
|
||||
goto onError;
|
||||
if (!PyUnicode_Check(repunicode)) {
|
||||
/* Implementation limitation: byte results not supported yet. */
|
||||
/* Byte results not supported, since they have no decimal property. */
|
||||
PyErr_SetString(PyExc_TypeError, "error handler should return unicode");
|
||||
Py_DECREF(repunicode);
|
||||
goto onError;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue