mirror of
https://github.com/python/cpython.git
synced 2025-07-23 11:15:24 +00:00
Change UnicodeDecodeError objects so that the 'object' attribute
is a bytes object. Add 'y' and 'y#' format specifiers that work like 's' and 's#' but only accept bytes objects.
This commit is contained in:
parent
c2b87a6dff
commit
612344f127
5 changed files with 98 additions and 8 deletions
|
@ -424,6 +424,18 @@ whose address should be passed.
|
||||||
compatible objects pass back a reference to the raw internal data
|
compatible objects pass back a reference to the raw internal data
|
||||||
representation.
|
representation.
|
||||||
|
|
||||||
|
\item[\samp{y} (bytes object)
|
||||||
|
{[const char *]}]
|
||||||
|
This variant on \samp{s} convert a Python bytes object to a C pointer to a
|
||||||
|
character string. The bytes object must not contain embedded NUL bytes;
|
||||||
|
if it does, a \exception{TypeError} exception is raised.
|
||||||
|
|
||||||
|
\item[\samp{y\#} (bytes object)
|
||||||
|
{[const char *, int]}]
|
||||||
|
This variant on \samp{s#} stores into two C variables, the first one
|
||||||
|
a pointer to a character string, the second one its length. This only
|
||||||
|
accepts bytes objects.
|
||||||
|
|
||||||
\item[\samp{z} (string or \code{None}) {[const char *]}]
|
\item[\samp{z} (string or \code{None}) {[const char *]}]
|
||||||
Like \samp{s}, but the Python object may also be \code{None}, in
|
Like \samp{s}, but the Python object may also be \code{None}, in
|
||||||
which case the C pointer is set to \NULL.
|
which case the C pointer is set to \NULL.
|
||||||
|
|
|
@ -802,8 +802,10 @@ Examples (to the left the call, to the right the resulting Python value):
|
||||||
Py_BuildValue("i", 123) 123
|
Py_BuildValue("i", 123) 123
|
||||||
Py_BuildValue("iii", 123, 456, 789) (123, 456, 789)
|
Py_BuildValue("iii", 123, 456, 789) (123, 456, 789)
|
||||||
Py_BuildValue("s", "hello") 'hello'
|
Py_BuildValue("s", "hello") 'hello'
|
||||||
|
Py_BuildValue("y", "hello") b'hello'
|
||||||
Py_BuildValue("ss", "hello", "world") ('hello', 'world')
|
Py_BuildValue("ss", "hello", "world") ('hello', 'world')
|
||||||
Py_BuildValue("s#", "hello", 4) 'hell'
|
Py_BuildValue("s#", "hello", 4) 'hell'
|
||||||
|
Py_BuildValue("y#", "hello", 4) b'hell'
|
||||||
Py_BuildValue("()") ()
|
Py_BuildValue("()") ()
|
||||||
Py_BuildValue("(i)", 123) (123,)
|
Py_BuildValue("(i)", 123) (123,)
|
||||||
Py_BuildValue("(ii)", 123, 456) (123, 456)
|
Py_BuildValue("(ii)", 123, 456) (123, 456)
|
||||||
|
|
|
@ -1242,6 +1242,22 @@ set_string(PyObject **attr, const char *value)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static PyObject *
|
||||||
|
get_bytes(PyObject *attr, const char *name)
|
||||||
|
{
|
||||||
|
if (!attr) {
|
||||||
|
PyErr_Format(PyExc_TypeError, "%.200s attribute not set", name);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!PyBytes_Check(attr)) {
|
||||||
|
PyErr_Format(PyExc_TypeError, "%.200s attribute must be bytes", name);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
Py_INCREF(attr);
|
||||||
|
return attr;
|
||||||
|
}
|
||||||
|
|
||||||
static PyObject *
|
static PyObject *
|
||||||
get_unicode(PyObject *attr, const char *name)
|
get_unicode(PyObject *attr, const char *name)
|
||||||
{
|
{
|
||||||
|
@ -1280,7 +1296,7 @@ PyUnicodeEncodeError_GetObject(PyObject *exc)
|
||||||
PyObject *
|
PyObject *
|
||||||
PyUnicodeDecodeError_GetObject(PyObject *exc)
|
PyUnicodeDecodeError_GetObject(PyObject *exc)
|
||||||
{
|
{
|
||||||
return get_string(((PyUnicodeErrorObject *)exc)->object, "object");
|
return get_bytes(((PyUnicodeErrorObject *)exc)->object, "object");
|
||||||
}
|
}
|
||||||
|
|
||||||
PyObject *
|
PyObject *
|
||||||
|
@ -1314,10 +1330,10 @@ PyUnicodeDecodeError_GetStart(PyObject *exc, Py_ssize_t *start)
|
||||||
{
|
{
|
||||||
if (!get_int(((PyUnicodeErrorObject *)exc)->start, start, "start")) {
|
if (!get_int(((PyUnicodeErrorObject *)exc)->start, start, "start")) {
|
||||||
Py_ssize_t size;
|
Py_ssize_t size;
|
||||||
PyObject *obj = get_string(((PyUnicodeErrorObject *)exc)->object,
|
PyObject *obj = get_bytes(((PyUnicodeErrorObject *)exc)->object,
|
||||||
"object");
|
"object");
|
||||||
if (!obj) return -1;
|
if (!obj) return -1;
|
||||||
size = PyString_GET_SIZE(obj);
|
size = PyBytes_GET_SIZE(obj);
|
||||||
if (*start<0)
|
if (*start<0)
|
||||||
*start = 0;
|
*start = 0;
|
||||||
if (*start>=size)
|
if (*start>=size)
|
||||||
|
@ -1382,10 +1398,10 @@ PyUnicodeDecodeError_GetEnd(PyObject *exc, Py_ssize_t *end)
|
||||||
{
|
{
|
||||||
if (!get_int(((PyUnicodeErrorObject *)exc)->end, end, "end")) {
|
if (!get_int(((PyUnicodeErrorObject *)exc)->end, end, "end")) {
|
||||||
Py_ssize_t size;
|
Py_ssize_t size;
|
||||||
PyObject *obj = get_string(((PyUnicodeErrorObject *)exc)->object,
|
PyObject *obj = get_bytes(((PyUnicodeErrorObject *)exc)->object,
|
||||||
"object");
|
"object");
|
||||||
if (!obj) return -1;
|
if (!obj) return -1;
|
||||||
size = PyString_GET_SIZE(obj);
|
size = PyBytes_GET_SIZE(obj);
|
||||||
if (*end<1)
|
if (*end<1)
|
||||||
*end = 1;
|
*end = 1;
|
||||||
if (*end>size)
|
if (*end>size)
|
||||||
|
@ -1629,7 +1645,7 @@ UnicodeDecodeError_init(PyObject *self, PyObject *args, PyObject *kwds)
|
||||||
if (BaseException_init((PyBaseExceptionObject *)self, args, kwds) == -1)
|
if (BaseException_init((PyBaseExceptionObject *)self, args, kwds) == -1)
|
||||||
return -1;
|
return -1;
|
||||||
return UnicodeError_init((PyUnicodeErrorObject *)self, args,
|
return UnicodeError_init((PyUnicodeErrorObject *)self, args,
|
||||||
kwds, &PyString_Type);
|
kwds, &PyBytes_Type);
|
||||||
}
|
}
|
||||||
|
|
||||||
static PyObject *
|
static PyObject *
|
||||||
|
@ -1648,7 +1664,7 @@ UnicodeDecodeError_str(PyObject *self)
|
||||||
/* FromFormat does not support %02x, so format that separately */
|
/* FromFormat does not support %02x, so format that separately */
|
||||||
char byte[4];
|
char byte[4];
|
||||||
PyOS_snprintf(byte, sizeof(byte), "%02x",
|
PyOS_snprintf(byte, sizeof(byte), "%02x",
|
||||||
((int)PyString_AS_STRING(((PyUnicodeErrorObject *)self)->object)[start])&0xff);
|
((int)PyBytes_AS_STRING(((PyUnicodeErrorObject *)self)->object)[start])&0xff);
|
||||||
return PyString_FromFormat(
|
return PyString_FromFormat(
|
||||||
"'%.400s' codec can't decode byte 0x%s in position %zd: %.400s",
|
"'%.400s' codec can't decode byte 0x%s in position %zd: %.400s",
|
||||||
PyString_AS_STRING(((PyUnicodeErrorObject *)self)->encoding),
|
PyString_AS_STRING(((PyUnicodeErrorObject *)self)->encoding),
|
||||||
|
@ -1689,7 +1705,7 @@ PyUnicodeDecodeError_Create(
|
||||||
assert(length < INT_MAX);
|
assert(length < INT_MAX);
|
||||||
assert(start < INT_MAX);
|
assert(start < INT_MAX);
|
||||||
assert(end < INT_MAX);
|
assert(end < INT_MAX);
|
||||||
return PyObject_CallFunction(PyExc_UnicodeDecodeError, "ss#nns",
|
return PyObject_CallFunction(PyExc_UnicodeDecodeError, "sy#nns",
|
||||||
encoding, object, length, start, end, reason);
|
encoding, object, length, start, end, reason);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -819,6 +819,32 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags,
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
case 'y': {/* bytes */
|
||||||
|
if (*format == '#') {
|
||||||
|
void **p = (void **)va_arg(*p_va, char **);
|
||||||
|
FETCH_SIZE;
|
||||||
|
|
||||||
|
if (PyBytes_Check(arg)) {
|
||||||
|
*p = PyBytes_AS_STRING(arg);
|
||||||
|
STORE_SIZE(PyBytes_GET_SIZE(arg));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
return converterr("bytes", arg, msgbuf, bufsize);
|
||||||
|
format++;
|
||||||
|
} else {
|
||||||
|
char **p = va_arg(*p_va, char **);
|
||||||
|
|
||||||
|
if (PyBytes_Check(arg))
|
||||||
|
*p = PyBytes_AS_STRING(arg);
|
||||||
|
else
|
||||||
|
return converterr("bytes", arg, msgbuf, bufsize);
|
||||||
|
if ((Py_ssize_t)strlen(*p) != PyBytes_Size(arg))
|
||||||
|
return converterr("bytes without null bytes",
|
||||||
|
arg, msgbuf, bufsize);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
case 'z': {/* string, may be NULL (None) */
|
case 'z': {/* string, may be NULL (None) */
|
||||||
if (*format == '#') { /* any buffer-like object */
|
if (*format == '#') { /* any buffer-like object */
|
||||||
void **p = (void **)va_arg(*p_va, char **);
|
void **p = (void **)va_arg(*p_va, char **);
|
||||||
|
@ -1595,6 +1621,7 @@ skipitem(const char **p_format, va_list *p_va, int flags)
|
||||||
|
|
||||||
case 's': /* string */
|
case 's': /* string */
|
||||||
case 'z': /* string or None */
|
case 'z': /* string or None */
|
||||||
|
case 'y': /* bytes */
|
||||||
case 'u': /* unicode string */
|
case 'u': /* unicode string */
|
||||||
case 't': /* buffer, read-only */
|
case 't': /* buffer, read-only */
|
||||||
case 'w': /* buffer, read-write */
|
case 'w': /* buffer, read-write */
|
||||||
|
|
|
@ -424,6 +424,39 @@ do_mkvalue(const char **p_format, va_list *p_va, int flags)
|
||||||
return v;
|
return v;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
case 'y':
|
||||||
|
{
|
||||||
|
PyObject *v;
|
||||||
|
char *str = va_arg(*p_va, char *);
|
||||||
|
Py_ssize_t n;
|
||||||
|
if (**p_format == '#') {
|
||||||
|
++*p_format;
|
||||||
|
if (flags & FLAG_SIZE_T)
|
||||||
|
n = va_arg(*p_va, Py_ssize_t);
|
||||||
|
else
|
||||||
|
n = va_arg(*p_va, int);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
n = -1;
|
||||||
|
if (str == NULL) {
|
||||||
|
v = Py_None;
|
||||||
|
Py_INCREF(v);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
if (n < 0) {
|
||||||
|
size_t m = strlen(str);
|
||||||
|
if (m > PY_SSIZE_T_MAX) {
|
||||||
|
PyErr_SetString(PyExc_OverflowError,
|
||||||
|
"string too long for Python bytes");
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
n = (Py_ssize_t)m;
|
||||||
|
}
|
||||||
|
v = PyBytes_FromStringAndSize(str, n);
|
||||||
|
}
|
||||||
|
return v;
|
||||||
|
}
|
||||||
|
|
||||||
case 'N':
|
case 'N':
|
||||||
case 'S':
|
case 'S':
|
||||||
case 'O':
|
case 'O':
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue