Add T_PYSSIZET in structmember.h: This can be used for

Py_ssize_t members.

Simplify the implementation of UnicodeError objects:
start and end attributes are now stored directly as
Py_ssize_t members, which simplifies various get and
set functions.
This commit is contained in:
Walter Dörwald 2007-06-13 16:57:12 +00:00
parent 22000cbd6b
commit 84a3efec37
5 changed files with 120 additions and 168 deletions

View file

@ -34,8 +34,8 @@ typedef struct {
PyObject *message; PyObject *message;
PyObject *encoding; PyObject *encoding;
PyObject *object; PyObject *object;
PyObject *start; Py_ssize_t start;
PyObject *end; Py_ssize_t end;
PyObject *reason; PyObject *reason;
} PyUnicodeErrorObject; } PyUnicodeErrorObject;
#endif #endif

View file

@ -68,6 +68,7 @@ typedef struct PyMemberDef {
#ifdef HAVE_LONG_LONG #ifdef HAVE_LONG_LONG
#define T_LONGLONG 17 #define T_LONGLONG 17
#define T_ULONGLONG 18 #define T_ULONGLONG 18
#define T_PYSSIZET 19 /* Py_ssize_t */
#endif /* HAVE_LONG_LONG */ #endif /* HAVE_LONG_LONG */
/* Flags */ /* Flags */

View file

@ -14,6 +14,8 @@ Core and builtins
- Patch #1733960: Allow T_LONGLONG to accept ints. - Patch #1733960: Allow T_LONGLONG to accept ints.
- T_PYSSIZET can now be used in PyMemberDef lists for Py_ssize_t members.
- Prevent expandtabs() on string and unicode objects from causing a segfault - Prevent expandtabs() on string and unicode objects from causing a segfault
when a large width is passed on 32-bit platforms. when a large width is passed on 32-bit platforms.
@ -687,6 +689,9 @@ Library
- Fix utf-8-sig incremental decoder, which didn't recognise a BOM when the - Fix utf-8-sig incremental decoder, which didn't recognise a BOM when the
first chunk fed to the decoder started with a BOM, but was longer than 3 bytes. first chunk fed to the decoder started with a BOM, but was longer than 3 bytes.
- The implementation of UnicodeError objects has been simplified (start and end
attributes are now stored directly as Py_ssize_t members).
Extension Modules Extension Modules
----------------- -----------------

View file

@ -1238,38 +1238,6 @@ SimpleExtendsException(PyExc_ValueError, UnicodeError,
"Unicode related error."); "Unicode related error.");
#ifdef Py_USING_UNICODE #ifdef Py_USING_UNICODE
static int
get_int(PyObject *attr, Py_ssize_t *value, const char *name)
{
if (!attr) {
PyErr_Format(PyExc_TypeError, "%.200s attribute not set", name);
return -1;
}
if (PyInt_Check(attr)) {
*value = PyInt_AS_LONG(attr);
} else if (PyLong_Check(attr)) {
*value = _PyLong_AsSsize_t(attr);
if (*value == -1 && PyErr_Occurred())
return -1;
} else {
PyErr_Format(PyExc_TypeError, "%.200s attribute must be int", name);
return -1;
}
return 0;
}
static int
set_ssize_t(PyObject **attr, Py_ssize_t value)
{
PyObject *obj = PyInt_FromSsize_t(value);
if (!obj)
return -1;
Py_CLEAR(*attr);
*attr = obj;
return 0;
}
static PyObject * static PyObject *
get_string(PyObject *attr, const char *name) get_string(PyObject *attr, const char *name)
{ {
@ -1349,11 +1317,12 @@ PyUnicodeTranslateError_GetObject(PyObject *exc)
int int
PyUnicodeEncodeError_GetStart(PyObject *exc, Py_ssize_t *start) PyUnicodeEncodeError_GetStart(PyObject *exc, Py_ssize_t *start)
{ {
if (!get_int(((PyUnicodeErrorObject *)exc)->start, start, "start")) {
Py_ssize_t size; Py_ssize_t size;
PyObject *obj = get_unicode(((PyUnicodeErrorObject *)exc)->object, PyObject *obj = get_unicode(((PyUnicodeErrorObject *)exc)->object,
"object"); "object");
if (!obj) return -1; if (!obj)
return -1;
*start = ((PyUnicodeErrorObject *)exc)->start;
size = PyUnicode_GET_SIZE(obj); size = PyUnicode_GET_SIZE(obj);
if (*start<0) if (*start<0)
*start = 0; /*XXX check for values <0*/ *start = 0; /*XXX check for values <0*/
@ -1362,19 +1331,18 @@ PyUnicodeEncodeError_GetStart(PyObject *exc, Py_ssize_t *start)
Py_DECREF(obj); Py_DECREF(obj);
return 0; return 0;
} }
return -1;
}
int int
PyUnicodeDecodeError_GetStart(PyObject *exc, Py_ssize_t *start) PyUnicodeDecodeError_GetStart(PyObject *exc, Py_ssize_t *start)
{ {
if (!get_int(((PyUnicodeErrorObject *)exc)->start, start, "start")) {
Py_ssize_t size; Py_ssize_t size;
PyObject *obj = get_string(((PyUnicodeErrorObject *)exc)->object, PyObject *obj = get_string(((PyUnicodeErrorObject *)exc)->object,
"object"); "object");
if (!obj) return -1; if (!obj)
return -1;
size = PyString_GET_SIZE(obj); size = PyString_GET_SIZE(obj);
*start = ((PyUnicodeErrorObject *)exc)->start;
if (*start<0) if (*start<0)
*start = 0; *start = 0;
if (*start>=size) if (*start>=size)
@ -1382,8 +1350,6 @@ PyUnicodeDecodeError_GetStart(PyObject *exc, Py_ssize_t *start)
Py_DECREF(obj); Py_DECREF(obj);
return 0; return 0;
} }
return -1;
}
int int
@ -1396,32 +1362,36 @@ PyUnicodeTranslateError_GetStart(PyObject *exc, Py_ssize_t *start)
int int
PyUnicodeEncodeError_SetStart(PyObject *exc, Py_ssize_t start) PyUnicodeEncodeError_SetStart(PyObject *exc, Py_ssize_t start)
{ {
return set_ssize_t(&((PyUnicodeErrorObject *)exc)->start, start); ((PyUnicodeErrorObject *)exc)->start = start;
return 0;
} }
int int
PyUnicodeDecodeError_SetStart(PyObject *exc, Py_ssize_t start) PyUnicodeDecodeError_SetStart(PyObject *exc, Py_ssize_t start)
{ {
return set_ssize_t(&((PyUnicodeErrorObject *)exc)->start, start); ((PyUnicodeErrorObject *)exc)->start = start;
return 0;
} }
int int
PyUnicodeTranslateError_SetStart(PyObject *exc, Py_ssize_t start) PyUnicodeTranslateError_SetStart(PyObject *exc, Py_ssize_t start)
{ {
return set_ssize_t(&((PyUnicodeErrorObject *)exc)->start, start); ((PyUnicodeErrorObject *)exc)->start = start;
return 0;
} }
int int
PyUnicodeEncodeError_GetEnd(PyObject *exc, Py_ssize_t *end) PyUnicodeEncodeError_GetEnd(PyObject *exc, Py_ssize_t *end)
{ {
if (!get_int(((PyUnicodeErrorObject *)exc)->end, end, "end")) {
Py_ssize_t size; Py_ssize_t size;
PyObject *obj = get_unicode(((PyUnicodeErrorObject *)exc)->object, PyObject *obj = get_unicode(((PyUnicodeErrorObject *)exc)->object,
"object"); "object");
if (!obj) return -1; if (!obj)
return -1;
*end = ((PyUnicodeErrorObject *)exc)->end;
size = PyUnicode_GET_SIZE(obj); size = PyUnicode_GET_SIZE(obj);
if (*end<1) if (*end<1)
*end = 1; *end = 1;
@ -1430,18 +1400,17 @@ PyUnicodeEncodeError_GetEnd(PyObject *exc, Py_ssize_t *end)
Py_DECREF(obj); Py_DECREF(obj);
return 0; return 0;
} }
return -1;
}
int int
PyUnicodeDecodeError_GetEnd(PyObject *exc, Py_ssize_t *end) PyUnicodeDecodeError_GetEnd(PyObject *exc, Py_ssize_t *end)
{ {
if (!get_int(((PyUnicodeErrorObject *)exc)->end, end, "end")) {
Py_ssize_t size; Py_ssize_t size;
PyObject *obj = get_string(((PyUnicodeErrorObject *)exc)->object, PyObject *obj = get_string(((PyUnicodeErrorObject *)exc)->object,
"object"); "object");
if (!obj) return -1; if (!obj)
return -1;
*end = ((PyUnicodeErrorObject *)exc)->end;
size = PyString_GET_SIZE(obj); size = PyString_GET_SIZE(obj);
if (*end<1) if (*end<1)
*end = 1; *end = 1;
@ -1450,8 +1419,6 @@ PyUnicodeDecodeError_GetEnd(PyObject *exc, Py_ssize_t *end)
Py_DECREF(obj); Py_DECREF(obj);
return 0; return 0;
} }
return -1;
}
int int
@ -1464,21 +1431,24 @@ PyUnicodeTranslateError_GetEnd(PyObject *exc, Py_ssize_t *start)
int int
PyUnicodeEncodeError_SetEnd(PyObject *exc, Py_ssize_t end) PyUnicodeEncodeError_SetEnd(PyObject *exc, Py_ssize_t end)
{ {
return set_ssize_t(&((PyUnicodeErrorObject *)exc)->end, end); ((PyUnicodeErrorObject *)exc)->end = end;
return 0;
} }
int int
PyUnicodeDecodeError_SetEnd(PyObject *exc, Py_ssize_t end) PyUnicodeDecodeError_SetEnd(PyObject *exc, Py_ssize_t end)
{ {
return set_ssize_t(&((PyUnicodeErrorObject *)exc)->end, end); ((PyUnicodeErrorObject *)exc)->end = end;
return 0;
} }
int int
PyUnicodeTranslateError_SetEnd(PyObject *exc, Py_ssize_t end) PyUnicodeTranslateError_SetEnd(PyObject *exc, Py_ssize_t end)
{ {
return set_ssize_t(&((PyUnicodeErrorObject *)exc)->end, end); ((PyUnicodeErrorObject *)exc)->end = end;
return 0;
} }
PyObject * PyObject *
@ -1529,25 +1499,20 @@ UnicodeError_init(PyUnicodeErrorObject *self, PyObject *args, PyObject *kwds,
{ {
Py_CLEAR(self->encoding); Py_CLEAR(self->encoding);
Py_CLEAR(self->object); Py_CLEAR(self->object);
Py_CLEAR(self->start);
Py_CLEAR(self->end);
Py_CLEAR(self->reason); Py_CLEAR(self->reason);
if (!PyArg_ParseTuple(args, "O!O!O!O!O!", if (!PyArg_ParseTuple(args, "O!O!nnO!",
&PyString_Type, &self->encoding, &PyString_Type, &self->encoding,
objecttype, &self->object, objecttype, &self->object,
&PyInt_Type, &self->start, &self->start,
&PyInt_Type, &self->end, &self->end,
&PyString_Type, &self->reason)) { &PyString_Type, &self->reason)) {
self->encoding = self->object = self->start = self->end = self->encoding = self->object = self->reason = NULL;
self->reason = NULL;
return -1; return -1;
} }
Py_INCREF(self->encoding); Py_INCREF(self->encoding);
Py_INCREF(self->object); Py_INCREF(self->object);
Py_INCREF(self->start);
Py_INCREF(self->end);
Py_INCREF(self->reason); Py_INCREF(self->reason);
return 0; return 0;
@ -1558,8 +1523,6 @@ UnicodeError_clear(PyUnicodeErrorObject *self)
{ {
Py_CLEAR(self->encoding); Py_CLEAR(self->encoding);
Py_CLEAR(self->object); Py_CLEAR(self->object);
Py_CLEAR(self->start);
Py_CLEAR(self->end);
Py_CLEAR(self->reason); Py_CLEAR(self->reason);
return BaseException_clear((PyBaseExceptionObject *)self); return BaseException_clear((PyBaseExceptionObject *)self);
} }
@ -1577,8 +1540,6 @@ UnicodeError_traverse(PyUnicodeErrorObject *self, visitproc visit, void *arg)
{ {
Py_VISIT(self->encoding); Py_VISIT(self->encoding);
Py_VISIT(self->object); Py_VISIT(self->object);
Py_VISIT(self->start);
Py_VISIT(self->end);
Py_VISIT(self->reason); Py_VISIT(self->reason);
return BaseException_traverse((PyBaseExceptionObject *)self, visit, arg); return BaseException_traverse((PyBaseExceptionObject *)self, visit, arg);
} }
@ -1588,9 +1549,9 @@ static PyMemberDef UnicodeError_members[] = {
PyDoc_STR("exception encoding")}, PyDoc_STR("exception encoding")},
{"object", T_OBJECT, offsetof(PyUnicodeErrorObject, object), 0, {"object", T_OBJECT, offsetof(PyUnicodeErrorObject, object), 0,
PyDoc_STR("exception object")}, PyDoc_STR("exception object")},
{"start", T_OBJECT, offsetof(PyUnicodeErrorObject, start), 0, {"start", T_PYSSIZET, offsetof(PyUnicodeErrorObject, start), 0,
PyDoc_STR("exception start")}, PyDoc_STR("exception start")},
{"end", T_OBJECT, offsetof(PyUnicodeErrorObject, end), 0, {"end", T_PYSSIZET, offsetof(PyUnicodeErrorObject, end), 0,
PyDoc_STR("exception end")}, PyDoc_STR("exception end")},
{"reason", T_OBJECT, offsetof(PyUnicodeErrorObject, reason), 0, {"reason", T_OBJECT, offsetof(PyUnicodeErrorObject, reason), 0,
PyDoc_STR("exception reason")}, PyDoc_STR("exception reason")},
@ -1614,17 +1575,10 @@ UnicodeEncodeError_init(PyObject *self, PyObject *args, PyObject *kwds)
static PyObject * static PyObject *
UnicodeEncodeError_str(PyObject *self) UnicodeEncodeError_str(PyObject *self)
{ {
Py_ssize_t start; PyUnicodeErrorObject *uself = (PyUnicodeErrorObject *)self;
Py_ssize_t end;
if (PyUnicodeEncodeError_GetStart(self, &start)) if (uself->end==uself->start+1) {
return NULL; int badchar = (int)PyUnicode_AS_UNICODE(uself->object)[uself->start];
if (PyUnicodeEncodeError_GetEnd(self, &end))
return NULL;
if (end==start+1) {
int badchar = (int)PyUnicode_AS_UNICODE(((PyUnicodeErrorObject *)self)->object)[start];
char badchar_str[20]; char badchar_str[20];
if (badchar <= 0xff) if (badchar <= 0xff)
PyOS_snprintf(badchar_str, sizeof(badchar_str), "x%02x", badchar); PyOS_snprintf(badchar_str, sizeof(badchar_str), "x%02x", badchar);
@ -1634,18 +1588,18 @@ UnicodeEncodeError_str(PyObject *self)
PyOS_snprintf(badchar_str, sizeof(badchar_str), "U%08x", badchar); PyOS_snprintf(badchar_str, sizeof(badchar_str), "U%08x", badchar);
return PyString_FromFormat( return PyString_FromFormat(
"'%.400s' codec can't encode character u'\\%s' in position %zd: %.400s", "'%.400s' codec can't encode character u'\\%s' in position %zd: %.400s",
PyString_AS_STRING(((PyUnicodeErrorObject *)self)->encoding), PyString_AS_STRING(uself->encoding),
badchar_str, badchar_str,
start, uself->start,
PyString_AS_STRING(((PyUnicodeErrorObject *)self)->reason) PyString_AS_STRING(uself->reason)
); );
} }
return PyString_FromFormat( return PyString_FromFormat(
"'%.400s' codec can't encode characters in position %zd-%zd: %.400s", "'%.400s' codec can't encode characters in position %zd-%zd: %.400s",
PyString_AS_STRING(((PyUnicodeErrorObject *)self)->encoding), PyString_AS_STRING(uself->encoding),
start, uself->start,
(end-1), uself->end-1,
PyString_AS_STRING(((PyUnicodeErrorObject *)self)->reason) PyString_AS_STRING(uself->reason)
); );
} }
@ -1690,34 +1644,27 @@ UnicodeDecodeError_init(PyObject *self, PyObject *args, PyObject *kwds)
static PyObject * static PyObject *
UnicodeDecodeError_str(PyObject *self) UnicodeDecodeError_str(PyObject *self)
{ {
Py_ssize_t start = 0; PyUnicodeErrorObject *uself = (PyUnicodeErrorObject *)self;
Py_ssize_t end = 0;
if (PyUnicodeDecodeError_GetStart(self, &start)) if (uself->end==uself->start+1) {
return NULL;
if (PyUnicodeDecodeError_GetEnd(self, &end))
return NULL;
if (end==start+1) {
/* FromFormat does not support %02x, so format that separately */ /* FromFormat does not support %02x, so format that separately */
char byte[4]; char byte[4];
PyOS_snprintf(byte, sizeof(byte), "%02x", PyOS_snprintf(byte, sizeof(byte), "%02x",
((int)PyString_AS_STRING(((PyUnicodeErrorObject *)self)->object)[start])&0xff); ((int)PyString_AS_STRING(uself->object)[uself->start])&0xff);
return PyString_FromFormat( return PyString_FromFormat(
"'%.400s' codec can't decode byte 0x%s in position %zd: %.400s", "'%.400s' codec can't decode byte 0x%s in position %zd: %.400s",
PyString_AS_STRING(((PyUnicodeErrorObject *)self)->encoding), PyString_AS_STRING(uself->encoding),
byte, byte,
start, uself->start,
PyString_AS_STRING(((PyUnicodeErrorObject *)self)->reason) PyString_AS_STRING(uself->reason)
); );
} }
return PyString_FromFormat( return PyString_FromFormat(
"'%.400s' codec can't decode bytes in position %zd-%zd: %.400s", "'%.400s' codec can't decode bytes in position %zd-%zd: %.400s",
PyString_AS_STRING(((PyUnicodeErrorObject *)self)->encoding), PyString_AS_STRING(uself->encoding),
start, uself->start,
(end-1), uself->end-1,
PyString_AS_STRING(((PyUnicodeErrorObject *)self)->reason) PyString_AS_STRING(uself->reason)
); );
} }
@ -1761,22 +1708,18 @@ UnicodeTranslateError_init(PyUnicodeErrorObject *self, PyObject *args,
return -1; return -1;
Py_CLEAR(self->object); Py_CLEAR(self->object);
Py_CLEAR(self->start);
Py_CLEAR(self->end);
Py_CLEAR(self->reason); Py_CLEAR(self->reason);
if (!PyArg_ParseTuple(args, "O!O!O!O!", if (!PyArg_ParseTuple(args, "O!nnO!",
&PyUnicode_Type, &self->object, &PyUnicode_Type, &self->object,
&PyInt_Type, &self->start, &self->start,
&PyInt_Type, &self->end, &self->end,
&PyString_Type, &self->reason)) { &PyString_Type, &self->reason)) {
self->object = self->start = self->end = self->reason = NULL; self->object = self->reason = NULL;
return -1; return -1;
} }
Py_INCREF(self->object); Py_INCREF(self->object);
Py_INCREF(self->start);
Py_INCREF(self->end);
Py_INCREF(self->reason); Py_INCREF(self->reason);
return 0; return 0;
@ -1786,17 +1729,10 @@ UnicodeTranslateError_init(PyUnicodeErrorObject *self, PyObject *args,
static PyObject * static PyObject *
UnicodeTranslateError_str(PyObject *self) UnicodeTranslateError_str(PyObject *self)
{ {
Py_ssize_t start; PyUnicodeErrorObject *uself = (PyUnicodeErrorObject *)self;
Py_ssize_t end;
if (PyUnicodeTranslateError_GetStart(self, &start)) if (uself->end==uself->start+1) {
return NULL; int badchar = (int)PyUnicode_AS_UNICODE(uself->object)[uself->start];
if (PyUnicodeTranslateError_GetEnd(self, &end))
return NULL;
if (end==start+1) {
int badchar = (int)PyUnicode_AS_UNICODE(((PyUnicodeErrorObject *)self)->object)[start];
char badchar_str[20]; char badchar_str[20];
if (badchar <= 0xff) if (badchar <= 0xff)
PyOS_snprintf(badchar_str, sizeof(badchar_str), "x%02x", badchar); PyOS_snprintf(badchar_str, sizeof(badchar_str), "x%02x", badchar);
@ -1807,15 +1743,15 @@ UnicodeTranslateError_str(PyObject *self)
return PyString_FromFormat( return PyString_FromFormat(
"can't translate character u'\\%s' in position %zd: %.400s", "can't translate character u'\\%s' in position %zd: %.400s",
badchar_str, badchar_str,
start, uself->start,
PyString_AS_STRING(((PyUnicodeErrorObject *)self)->reason) PyString_AS_STRING(uself->reason)
); );
} }
return PyString_FromFormat( return PyString_FromFormat(
"can't translate characters in position %zd-%zd: %.400s", "can't translate characters in position %zd-%zd: %.400s",
start, uself->start,
(end-1), uself->end-1,
PyString_AS_STRING(((PyUnicodeErrorObject *)self)->reason) PyString_AS_STRING(uself->reason)
); );
} }

View file

@ -85,6 +85,9 @@ PyMember_GetOne(const char *addr, PyMemberDef *l)
case T_ULONG: case T_ULONG:
v = PyLong_FromUnsignedLong(*(unsigned long*)addr); v = PyLong_FromUnsignedLong(*(unsigned long*)addr);
break; break;
case T_PYSSIZET:
v = PyInt_FromSsize_t(*(Py_ssize_t*)addr);
break;
case T_FLOAT: case T_FLOAT:
v = PyFloat_FromDouble((double)*(float*)addr); v = PyFloat_FromDouble((double)*(float*)addr);
break; break;
@ -263,6 +266,13 @@ PyMember_SetOne(char *addr, PyMemberDef *l, PyObject *v)
} }
break; break;
} }
case T_PYSSIZET:{
*(Py_ssize_t*)addr = PyInt_AsSsize_t(v);
if ((*(Py_ssize_t*)addr == (Py_ssize_t)-1)
&& PyErr_Occurred())
return -1;
break;
}
case T_FLOAT:{ case T_FLOAT:{
double double_val; double double_val;
double_val = PyFloat_AsDouble(v); double_val = PyFloat_AsDouble(v);