Add T_PYSSIZET in structmember.h: This can be used for

Py_ssize_t members.

Simplify the implementation of UnicodeError objects:
start and end attributes are now stored directly as
Py_ssize_t members, which simplifies various get and
set functions.
This commit is contained in:
Walter Dörwald 2007-06-13 16:57:12 +00:00
parent 22000cbd6b
commit 84a3efec37
5 changed files with 120 additions and 168 deletions

View file

@ -34,8 +34,8 @@ typedef struct {
PyObject *message;
PyObject *encoding;
PyObject *object;
PyObject *start;
PyObject *end;
Py_ssize_t start;
Py_ssize_t end;
PyObject *reason;
} PyUnicodeErrorObject;
#endif

View file

@ -68,6 +68,7 @@ typedef struct PyMemberDef {
#ifdef HAVE_LONG_LONG
#define T_LONGLONG 17
#define T_ULONGLONG 18
#define T_PYSSIZET 19 /* Py_ssize_t */
#endif /* HAVE_LONG_LONG */
/* Flags */

View file

@ -14,6 +14,8 @@ Core and builtins
- Patch #1733960: Allow T_LONGLONG to accept ints.
- T_PYSSIZET can now be used in PyMemberDef lists for Py_ssize_t members.
- Prevent expandtabs() on string and unicode objects from causing a segfault
when a large width is passed on 32-bit platforms.
@ -687,6 +689,9 @@ Library
- Fix utf-8-sig incremental decoder, which didn't recognise a BOM when the
first chunk fed to the decoder started with a BOM, but was longer than 3 bytes.
- The implementation of UnicodeError objects has been simplified (start and end
attributes are now stored directly as Py_ssize_t members).
Extension Modules
-----------------

View file

@ -1238,38 +1238,6 @@ SimpleExtendsException(PyExc_ValueError, UnicodeError,
"Unicode related error.");
#ifdef Py_USING_UNICODE
static int
get_int(PyObject *attr, Py_ssize_t *value, const char *name)
{
if (!attr) {
PyErr_Format(PyExc_TypeError, "%.200s attribute not set", name);
return -1;
}
if (PyInt_Check(attr)) {
*value = PyInt_AS_LONG(attr);
} else if (PyLong_Check(attr)) {
*value = _PyLong_AsSsize_t(attr);
if (*value == -1 && PyErr_Occurred())
return -1;
} else {
PyErr_Format(PyExc_TypeError, "%.200s attribute must be int", name);
return -1;
}
return 0;
}
static int
set_ssize_t(PyObject **attr, Py_ssize_t value)
{
PyObject *obj = PyInt_FromSsize_t(value);
if (!obj)
return -1;
Py_CLEAR(*attr);
*attr = obj;
return 0;
}
static PyObject *
get_string(PyObject *attr, const char *name)
{
@ -1349,11 +1317,12 @@ PyUnicodeTranslateError_GetObject(PyObject *exc)
int
PyUnicodeEncodeError_GetStart(PyObject *exc, Py_ssize_t *start)
{
if (!get_int(((PyUnicodeErrorObject *)exc)->start, start, "start")) {
Py_ssize_t size;
PyObject *obj = get_unicode(((PyUnicodeErrorObject *)exc)->object,
"object");
if (!obj) return -1;
if (!obj)
return -1;
*start = ((PyUnicodeErrorObject *)exc)->start;
size = PyUnicode_GET_SIZE(obj);
if (*start<0)
*start = 0; /*XXX check for values <0*/
@ -1362,19 +1331,18 @@ PyUnicodeEncodeError_GetStart(PyObject *exc, Py_ssize_t *start)
Py_DECREF(obj);
return 0;
}
return -1;
}
int
PyUnicodeDecodeError_GetStart(PyObject *exc, Py_ssize_t *start)
{
if (!get_int(((PyUnicodeErrorObject *)exc)->start, start, "start")) {
Py_ssize_t size;
PyObject *obj = get_string(((PyUnicodeErrorObject *)exc)->object,
"object");
if (!obj) return -1;
if (!obj)
return -1;
size = PyString_GET_SIZE(obj);
*start = ((PyUnicodeErrorObject *)exc)->start;
if (*start<0)
*start = 0;
if (*start>=size)
@ -1382,8 +1350,6 @@ PyUnicodeDecodeError_GetStart(PyObject *exc, Py_ssize_t *start)
Py_DECREF(obj);
return 0;
}
return -1;
}
int
@ -1396,32 +1362,36 @@ PyUnicodeTranslateError_GetStart(PyObject *exc, Py_ssize_t *start)
int
PyUnicodeEncodeError_SetStart(PyObject *exc, Py_ssize_t start)
{
return set_ssize_t(&((PyUnicodeErrorObject *)exc)->start, start);
((PyUnicodeErrorObject *)exc)->start = start;
return 0;
}
int
PyUnicodeDecodeError_SetStart(PyObject *exc, Py_ssize_t start)
{
return set_ssize_t(&((PyUnicodeErrorObject *)exc)->start, start);
((PyUnicodeErrorObject *)exc)->start = start;
return 0;
}
int
PyUnicodeTranslateError_SetStart(PyObject *exc, Py_ssize_t start)
{
return set_ssize_t(&((PyUnicodeErrorObject *)exc)->start, start);
((PyUnicodeErrorObject *)exc)->start = start;
return 0;
}
int
PyUnicodeEncodeError_GetEnd(PyObject *exc, Py_ssize_t *end)
{
if (!get_int(((PyUnicodeErrorObject *)exc)->end, end, "end")) {
Py_ssize_t size;
PyObject *obj = get_unicode(((PyUnicodeErrorObject *)exc)->object,
"object");
if (!obj) return -1;
if (!obj)
return -1;
*end = ((PyUnicodeErrorObject *)exc)->end;
size = PyUnicode_GET_SIZE(obj);
if (*end<1)
*end = 1;
@ -1430,18 +1400,17 @@ PyUnicodeEncodeError_GetEnd(PyObject *exc, Py_ssize_t *end)
Py_DECREF(obj);
return 0;
}
return -1;
}
int
PyUnicodeDecodeError_GetEnd(PyObject *exc, Py_ssize_t *end)
{
if (!get_int(((PyUnicodeErrorObject *)exc)->end, end, "end")) {
Py_ssize_t size;
PyObject *obj = get_string(((PyUnicodeErrorObject *)exc)->object,
"object");
if (!obj) return -1;
if (!obj)
return -1;
*end = ((PyUnicodeErrorObject *)exc)->end;
size = PyString_GET_SIZE(obj);
if (*end<1)
*end = 1;
@ -1450,8 +1419,6 @@ PyUnicodeDecodeError_GetEnd(PyObject *exc, Py_ssize_t *end)
Py_DECREF(obj);
return 0;
}
return -1;
}
int
@ -1464,21 +1431,24 @@ PyUnicodeTranslateError_GetEnd(PyObject *exc, Py_ssize_t *start)
int
PyUnicodeEncodeError_SetEnd(PyObject *exc, Py_ssize_t end)
{
return set_ssize_t(&((PyUnicodeErrorObject *)exc)->end, end);
((PyUnicodeErrorObject *)exc)->end = end;
return 0;
}
int
PyUnicodeDecodeError_SetEnd(PyObject *exc, Py_ssize_t end)
{
return set_ssize_t(&((PyUnicodeErrorObject *)exc)->end, end);
((PyUnicodeErrorObject *)exc)->end = end;
return 0;
}
int
PyUnicodeTranslateError_SetEnd(PyObject *exc, Py_ssize_t end)
{
return set_ssize_t(&((PyUnicodeErrorObject *)exc)->end, end);
((PyUnicodeErrorObject *)exc)->end = end;
return 0;
}
PyObject *
@ -1529,25 +1499,20 @@ UnicodeError_init(PyUnicodeErrorObject *self, PyObject *args, PyObject *kwds,
{
Py_CLEAR(self->encoding);
Py_CLEAR(self->object);
Py_CLEAR(self->start);
Py_CLEAR(self->end);
Py_CLEAR(self->reason);
if (!PyArg_ParseTuple(args, "O!O!O!O!O!",
if (!PyArg_ParseTuple(args, "O!O!nnO!",
&PyString_Type, &self->encoding,
objecttype, &self->object,
&PyInt_Type, &self->start,
&PyInt_Type, &self->end,
&self->start,
&self->end,
&PyString_Type, &self->reason)) {
self->encoding = self->object = self->start = self->end =
self->reason = NULL;
self->encoding = self->object = self->reason = NULL;
return -1;
}
Py_INCREF(self->encoding);
Py_INCREF(self->object);
Py_INCREF(self->start);
Py_INCREF(self->end);
Py_INCREF(self->reason);
return 0;
@ -1558,8 +1523,6 @@ UnicodeError_clear(PyUnicodeErrorObject *self)
{
Py_CLEAR(self->encoding);
Py_CLEAR(self->object);
Py_CLEAR(self->start);
Py_CLEAR(self->end);
Py_CLEAR(self->reason);
return BaseException_clear((PyBaseExceptionObject *)self);
}
@ -1577,8 +1540,6 @@ UnicodeError_traverse(PyUnicodeErrorObject *self, visitproc visit, void *arg)
{
Py_VISIT(self->encoding);
Py_VISIT(self->object);
Py_VISIT(self->start);
Py_VISIT(self->end);
Py_VISIT(self->reason);
return BaseException_traverse((PyBaseExceptionObject *)self, visit, arg);
}
@ -1588,9 +1549,9 @@ static PyMemberDef UnicodeError_members[] = {
PyDoc_STR("exception encoding")},
{"object", T_OBJECT, offsetof(PyUnicodeErrorObject, object), 0,
PyDoc_STR("exception object")},
{"start", T_OBJECT, offsetof(PyUnicodeErrorObject, start), 0,
{"start", T_PYSSIZET, offsetof(PyUnicodeErrorObject, start), 0,
PyDoc_STR("exception start")},
{"end", T_OBJECT, offsetof(PyUnicodeErrorObject, end), 0,
{"end", T_PYSSIZET, offsetof(PyUnicodeErrorObject, end), 0,
PyDoc_STR("exception end")},
{"reason", T_OBJECT, offsetof(PyUnicodeErrorObject, reason), 0,
PyDoc_STR("exception reason")},
@ -1614,17 +1575,10 @@ UnicodeEncodeError_init(PyObject *self, PyObject *args, PyObject *kwds)
static PyObject *
UnicodeEncodeError_str(PyObject *self)
{
Py_ssize_t start;
Py_ssize_t end;
PyUnicodeErrorObject *uself = (PyUnicodeErrorObject *)self;
if (PyUnicodeEncodeError_GetStart(self, &start))
return NULL;
if (PyUnicodeEncodeError_GetEnd(self, &end))
return NULL;
if (end==start+1) {
int badchar = (int)PyUnicode_AS_UNICODE(((PyUnicodeErrorObject *)self)->object)[start];
if (uself->end==uself->start+1) {
int badchar = (int)PyUnicode_AS_UNICODE(uself->object)[uself->start];
char badchar_str[20];
if (badchar <= 0xff)
PyOS_snprintf(badchar_str, sizeof(badchar_str), "x%02x", badchar);
@ -1634,18 +1588,18 @@ UnicodeEncodeError_str(PyObject *self)
PyOS_snprintf(badchar_str, sizeof(badchar_str), "U%08x", badchar);
return PyString_FromFormat(
"'%.400s' codec can't encode character u'\\%s' in position %zd: %.400s",
PyString_AS_STRING(((PyUnicodeErrorObject *)self)->encoding),
PyString_AS_STRING(uself->encoding),
badchar_str,
start,
PyString_AS_STRING(((PyUnicodeErrorObject *)self)->reason)
uself->start,
PyString_AS_STRING(uself->reason)
);
}
return PyString_FromFormat(
"'%.400s' codec can't encode characters in position %zd-%zd: %.400s",
PyString_AS_STRING(((PyUnicodeErrorObject *)self)->encoding),
start,
(end-1),
PyString_AS_STRING(((PyUnicodeErrorObject *)self)->reason)
PyString_AS_STRING(uself->encoding),
uself->start,
uself->end-1,
PyString_AS_STRING(uself->reason)
);
}
@ -1690,34 +1644,27 @@ UnicodeDecodeError_init(PyObject *self, PyObject *args, PyObject *kwds)
static PyObject *
UnicodeDecodeError_str(PyObject *self)
{
Py_ssize_t start = 0;
Py_ssize_t end = 0;
PyUnicodeErrorObject *uself = (PyUnicodeErrorObject *)self;
if (PyUnicodeDecodeError_GetStart(self, &start))
return NULL;
if (PyUnicodeDecodeError_GetEnd(self, &end))
return NULL;
if (end==start+1) {
if (uself->end==uself->start+1) {
/* FromFormat does not support %02x, so format that separately */
char byte[4];
PyOS_snprintf(byte, sizeof(byte), "%02x",
((int)PyString_AS_STRING(((PyUnicodeErrorObject *)self)->object)[start])&0xff);
((int)PyString_AS_STRING(uself->object)[uself->start])&0xff);
return PyString_FromFormat(
"'%.400s' codec can't decode byte 0x%s in position %zd: %.400s",
PyString_AS_STRING(((PyUnicodeErrorObject *)self)->encoding),
PyString_AS_STRING(uself->encoding),
byte,
start,
PyString_AS_STRING(((PyUnicodeErrorObject *)self)->reason)
uself->start,
PyString_AS_STRING(uself->reason)
);
}
return PyString_FromFormat(
"'%.400s' codec can't decode bytes in position %zd-%zd: %.400s",
PyString_AS_STRING(((PyUnicodeErrorObject *)self)->encoding),
start,
(end-1),
PyString_AS_STRING(((PyUnicodeErrorObject *)self)->reason)
PyString_AS_STRING(uself->encoding),
uself->start,
uself->end-1,
PyString_AS_STRING(uself->reason)
);
}
@ -1761,22 +1708,18 @@ UnicodeTranslateError_init(PyUnicodeErrorObject *self, PyObject *args,
return -1;
Py_CLEAR(self->object);
Py_CLEAR(self->start);
Py_CLEAR(self->end);
Py_CLEAR(self->reason);
if (!PyArg_ParseTuple(args, "O!O!O!O!",
if (!PyArg_ParseTuple(args, "O!nnO!",
&PyUnicode_Type, &self->object,
&PyInt_Type, &self->start,
&PyInt_Type, &self->end,
&self->start,
&self->end,
&PyString_Type, &self->reason)) {
self->object = self->start = self->end = self->reason = NULL;
self->object = self->reason = NULL;
return -1;
}
Py_INCREF(self->object);
Py_INCREF(self->start);
Py_INCREF(self->end);
Py_INCREF(self->reason);
return 0;
@ -1786,17 +1729,10 @@ UnicodeTranslateError_init(PyUnicodeErrorObject *self, PyObject *args,
static PyObject *
UnicodeTranslateError_str(PyObject *self)
{
Py_ssize_t start;
Py_ssize_t end;
PyUnicodeErrorObject *uself = (PyUnicodeErrorObject *)self;
if (PyUnicodeTranslateError_GetStart(self, &start))
return NULL;
if (PyUnicodeTranslateError_GetEnd(self, &end))
return NULL;
if (end==start+1) {
int badchar = (int)PyUnicode_AS_UNICODE(((PyUnicodeErrorObject *)self)->object)[start];
if (uself->end==uself->start+1) {
int badchar = (int)PyUnicode_AS_UNICODE(uself->object)[uself->start];
char badchar_str[20];
if (badchar <= 0xff)
PyOS_snprintf(badchar_str, sizeof(badchar_str), "x%02x", badchar);
@ -1807,15 +1743,15 @@ UnicodeTranslateError_str(PyObject *self)
return PyString_FromFormat(
"can't translate character u'\\%s' in position %zd: %.400s",
badchar_str,
start,
PyString_AS_STRING(((PyUnicodeErrorObject *)self)->reason)
uself->start,
PyString_AS_STRING(uself->reason)
);
}
return PyString_FromFormat(
"can't translate characters in position %zd-%zd: %.400s",
start,
(end-1),
PyString_AS_STRING(((PyUnicodeErrorObject *)self)->reason)
uself->start,
uself->end-1,
PyString_AS_STRING(uself->reason)
);
}

View file

@ -85,6 +85,9 @@ PyMember_GetOne(const char *addr, PyMemberDef *l)
case T_ULONG:
v = PyLong_FromUnsignedLong(*(unsigned long*)addr);
break;
case T_PYSSIZET:
v = PyInt_FromSsize_t(*(Py_ssize_t*)addr);
break;
case T_FLOAT:
v = PyFloat_FromDouble((double)*(float*)addr);
break;
@ -263,6 +266,13 @@ PyMember_SetOne(char *addr, PyMemberDef *l, PyObject *v)
}
break;
}
case T_PYSSIZET:{
*(Py_ssize_t*)addr = PyInt_AsSsize_t(v);
if ((*(Py_ssize_t*)addr == (Py_ssize_t)-1)
&& PyErr_Occurred())
return -1;
break;
}
case T_FLOAT:{
double double_val;
double_val = PyFloat_AsDouble(v);