mirror of
https://github.com/python/cpython.git
synced 2025-09-03 15:31:08 +00:00
bpo-36365: Rewrite structseq_repr() using _PyUnicodeWriter (GH-12440)
No longer limit repr(structseq) to 512 bytes. Use _PyUnicodeWriter for better performance and to write directly Unicode rather than encoding repr() value to UTF-8 and then decoding from UTF-8.
This commit is contained in:
parent
fd23cfa464
commit
c70ab02df2
2 changed files with 69 additions and 58 deletions
|
@ -0,0 +1 @@
|
||||||
|
repr(structseq) is no longer limited to 512 bytes.
|
|
@ -168,78 +168,88 @@ structseq_new_impl(PyTypeObject *type, PyObject *arg, PyObject *dict)
|
||||||
static PyObject *
|
static PyObject *
|
||||||
structseq_repr(PyStructSequence *obj)
|
structseq_repr(PyStructSequence *obj)
|
||||||
{
|
{
|
||||||
/* buffer and type size were chosen well considered. */
|
|
||||||
#define REPR_BUFFER_SIZE 512
|
|
||||||
#define TYPE_MAXSIZE 100
|
|
||||||
|
|
||||||
PyTypeObject *typ = Py_TYPE(obj);
|
PyTypeObject *typ = Py_TYPE(obj);
|
||||||
Py_ssize_t i;
|
_PyUnicodeWriter writer;
|
||||||
int removelast = 0;
|
|
||||||
Py_ssize_t len;
|
|
||||||
char buf[REPR_BUFFER_SIZE];
|
|
||||||
char *endofbuf, *pbuf = buf;
|
|
||||||
|
|
||||||
/* pointer to end of writeable buffer; safes space for "...)\0" */
|
/* Write "typename(" */
|
||||||
endofbuf= &buf[REPR_BUFFER_SIZE-5];
|
PyObject *type_name = PyUnicode_DecodeUTF8(typ->tp_name,
|
||||||
|
strlen(typ->tp_name),
|
||||||
|
NULL);
|
||||||
|
if (type_name == NULL) {
|
||||||
|
goto error;
|
||||||
|
}
|
||||||
|
|
||||||
/* "typename(", limited to TYPE_MAXSIZE */
|
_PyUnicodeWriter_Init(&writer);
|
||||||
len = strlen(typ->tp_name);
|
writer.overallocate = 1;
|
||||||
len = Py_MIN(len, TYPE_MAXSIZE);
|
/* count 5 characters per item: "x=1, " */
|
||||||
memcpy(pbuf, typ->tp_name, len);
|
writer.min_length = (PyUnicode_GET_LENGTH(type_name) + 1
|
||||||
pbuf += len;
|
+ VISIBLE_SIZE(obj) * 5 + 1);
|
||||||
*pbuf++ = '(';
|
|
||||||
|
|
||||||
for (i=0; i < VISIBLE_SIZE(obj); i++) {
|
if (_PyUnicodeWriter_WriteStr(&writer, type_name) < 0) {
|
||||||
PyObject *val, *repr;
|
Py_DECREF(type_name);
|
||||||
const char *cname, *crepr;
|
goto error;
|
||||||
|
}
|
||||||
|
Py_DECREF(type_name);
|
||||||
|
|
||||||
cname = typ->tp_members[i].name;
|
if (_PyUnicodeWriter_WriteChar(&writer, '(') < 0) {
|
||||||
if (cname == NULL) {
|
goto error;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (Py_ssize_t i=0; i < VISIBLE_SIZE(obj); i++) {
|
||||||
|
if (i > 0) {
|
||||||
|
/* Write ", " */
|
||||||
|
if (_PyUnicodeWriter_WriteASCIIString(&writer, ", ", 2) < 0) {
|
||||||
|
goto error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Write "name=repr" */
|
||||||
|
const char *name_utf8 = typ->tp_members[i].name;
|
||||||
|
if (name_utf8 == NULL) {
|
||||||
PyErr_Format(PyExc_SystemError, "In structseq_repr(), member %zd name is NULL"
|
PyErr_Format(PyExc_SystemError, "In structseq_repr(), member %zd name is NULL"
|
||||||
" for type %.500s", i, typ->tp_name);
|
" for type %.500s", i, typ->tp_name);
|
||||||
return NULL;
|
goto error;
|
||||||
}
|
|
||||||
val = PyStructSequence_GET_ITEM(obj, i);
|
|
||||||
repr = PyObject_Repr(val);
|
|
||||||
if (repr == NULL)
|
|
||||||
return NULL;
|
|
||||||
crepr = PyUnicode_AsUTF8(repr);
|
|
||||||
if (crepr == NULL) {
|
|
||||||
Py_DECREF(repr);
|
|
||||||
return NULL;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* + 3: keep space for "=" and ", " */
|
PyObject *name = PyUnicode_DecodeUTF8(name_utf8, strlen(name_utf8), NULL);
|
||||||
len = strlen(cname) + strlen(crepr) + 3;
|
if (name == NULL) {
|
||||||
if ((pbuf+len) <= endofbuf) {
|
goto error;
|
||||||
strcpy(pbuf, cname);
|
|
||||||
pbuf += strlen(cname);
|
|
||||||
*pbuf++ = '=';
|
|
||||||
strcpy(pbuf, crepr);
|
|
||||||
pbuf += strlen(crepr);
|
|
||||||
*pbuf++ = ',';
|
|
||||||
*pbuf++ = ' ';
|
|
||||||
removelast = 1;
|
|
||||||
Py_DECREF(repr);
|
|
||||||
}
|
}
|
||||||
else {
|
if (_PyUnicodeWriter_WriteStr(&writer, name) < 0) {
|
||||||
strcpy(pbuf, "...");
|
Py_DECREF(name);
|
||||||
pbuf += 3;
|
goto error;
|
||||||
removelast = 0;
|
|
||||||
Py_DECREF(repr);
|
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
}
|
Py_DECREF(name);
|
||||||
if (removelast) {
|
|
||||||
/* overwrite last ", " */
|
|
||||||
pbuf-=2;
|
|
||||||
}
|
|
||||||
*pbuf++ = ')';
|
|
||||||
*pbuf = '\0';
|
|
||||||
|
|
||||||
return PyUnicode_FromString(buf);
|
if (_PyUnicodeWriter_WriteChar(&writer, '=') < 0) {
|
||||||
|
goto error;
|
||||||
|
}
|
||||||
|
|
||||||
|
PyObject *value = PyStructSequence_GET_ITEM(obj, i);
|
||||||
|
assert(value != NULL);
|
||||||
|
PyObject *repr = PyObject_Repr(value);
|
||||||
|
if (repr == NULL) {
|
||||||
|
goto error;
|
||||||
|
}
|
||||||
|
if (_PyUnicodeWriter_WriteStr(&writer, repr) < 0) {
|
||||||
|
Py_DECREF(repr);
|
||||||
|
goto error;
|
||||||
|
}
|
||||||
|
Py_DECREF(repr);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (_PyUnicodeWriter_WriteChar(&writer, ')') < 0) {
|
||||||
|
goto error;
|
||||||
|
}
|
||||||
|
|
||||||
|
return _PyUnicodeWriter_Finish(&writer);
|
||||||
|
|
||||||
|
error:
|
||||||
|
_PyUnicodeWriter_Dealloc(&writer);
|
||||||
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static PyObject *
|
static PyObject *
|
||||||
structseq_reduce(PyStructSequence* self, PyObject *Py_UNUSED(ignored))
|
structseq_reduce(PyStructSequence* self, PyObject *Py_UNUSED(ignored))
|
||||||
{
|
{
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue