mirror of
https://github.com/python/cpython.git
synced 2025-07-12 13:55:34 +00:00
SF bug #1251300: On UCS-4 builds the "unicode-internal" codec will now complain
about illegal code points. The codec now supports PEP 293 style error handlers. (This is a variant of the Nik Haldimann's patch that detects truncated data)
This commit is contained in:
parent
523c9f0709
commit
a47d1c08d0
6 changed files with 173 additions and 5 deletions
|
@ -2273,6 +2273,81 @@ PyObject *PyUnicode_AsRawUnicodeEscapeString(PyObject *unicode)
|
|||
PyUnicode_GET_SIZE(unicode));
|
||||
}
|
||||
|
||||
/* --- Unicode Internal Codec ------------------------------------------- */
|
||||
|
||||
PyObject *_PyUnicode_DecodeUnicodeInternal(const char *s,
|
||||
int size,
|
||||
const char *errors)
|
||||
{
|
||||
const char *starts = s;
|
||||
int startinpos;
|
||||
int endinpos;
|
||||
int outpos;
|
||||
Py_UNICODE unimax;
|
||||
PyUnicodeObject *v;
|
||||
Py_UNICODE *p;
|
||||
const char *end;
|
||||
const char *reason;
|
||||
PyObject *errorHandler = NULL;
|
||||
PyObject *exc = NULL;
|
||||
|
||||
unimax = PyUnicode_GetMax();
|
||||
v = _PyUnicode_New((size+Py_UNICODE_SIZE-1)/ Py_UNICODE_SIZE);
|
||||
if (v == NULL)
|
||||
goto onError;
|
||||
if (PyUnicode_GetSize((PyObject *)v) == 0)
|
||||
return (PyObject *)v;
|
||||
p = PyUnicode_AS_UNICODE(v);
|
||||
end = s + size;
|
||||
|
||||
while (s < end) {
|
||||
*p = *(Py_UNICODE *)s;
|
||||
/* We have to sanity check the raw data, otherwise doom looms for
|
||||
some malformed UCS-4 data. */
|
||||
if (
|
||||
#ifdef Py_UNICODE_WIDE
|
||||
*p > unimax || *p < 0 ||
|
||||
#endif
|
||||
end-s < Py_UNICODE_SIZE
|
||||
)
|
||||
{
|
||||
startinpos = s - starts;
|
||||
if (end-s < Py_UNICODE_SIZE) {
|
||||
endinpos = end-starts;
|
||||
reason = "truncated input";
|
||||
}
|
||||
else {
|
||||
endinpos = s - starts + Py_UNICODE_SIZE;
|
||||
reason = "illegal code point (> 0x10FFFF)";
|
||||
}
|
||||
outpos = p - PyUnicode_AS_UNICODE(v);
|
||||
if (unicode_decode_call_errorhandler(
|
||||
errors, &errorHandler,
|
||||
"unicode_internal", reason,
|
||||
starts, size, &startinpos, &endinpos, &exc, &s,
|
||||
(PyObject **)&v, &outpos, &p)) {
|
||||
goto onError;
|
||||
}
|
||||
}
|
||||
else {
|
||||
p++;
|
||||
s += Py_UNICODE_SIZE;
|
||||
}
|
||||
}
|
||||
|
||||
if (_PyUnicode_Resize(&v, (int)(p - PyUnicode_AS_UNICODE(v))) < 0)
|
||||
goto onError;
|
||||
Py_XDECREF(errorHandler);
|
||||
Py_XDECREF(exc);
|
||||
return (PyObject *)v;
|
||||
|
||||
onError:
|
||||
Py_XDECREF(v);
|
||||
Py_XDECREF(errorHandler);
|
||||
Py_XDECREF(exc);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* --- Latin-1 Codec ------------------------------------------------------ */
|
||||
|
||||
PyObject *PyUnicode_DecodeLatin1(const char *s,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue