Close #14716: str.format() now uses the new "unicode writer" API instead of the

PyAccu API. For example, it makes str.format() from 25% to 30% faster on Linux.
This commit is contained in:
Victor Stinner 2012-05-07 12:47:02 +02:00
parent 9fad160411
commit 202fdca133
2 changed files with 148 additions and 170 deletions

View file

@ -13200,6 +13200,135 @@ unicode_endswith(PyObject *self,
return PyBool_FromLong(result);
}
typedef struct {
PyObject *buffer;
void *data;
enum PyUnicode_Kind kind;
Py_UCS4 maxchar;
Py_ssize_t pos;
} unicode_writer_t;
Py_LOCAL_INLINE(void)
unicode_writer_update(unicode_writer_t *writer)
{
writer->maxchar = PyUnicode_MAX_CHAR_VALUE(writer->buffer);
writer->data = PyUnicode_DATA(writer->buffer);
writer->kind = PyUnicode_KIND(writer->buffer);
}
Py_LOCAL(int)
unicode_writer_init(unicode_writer_t *writer,
Py_ssize_t length, Py_UCS4 maxchar)
{
writer->pos = 0;
writer->buffer = PyUnicode_New(length, maxchar);
if (writer->buffer == NULL)
return -1;
unicode_writer_update(writer);
return 0;
}
Py_LOCAL_INLINE(int)
unicode_writer_prepare(unicode_writer_t *writer,
Py_ssize_t length, Py_UCS4 maxchar)
{
Py_ssize_t newlen;
PyObject *newbuffer;
if (length > PY_SSIZE_T_MAX - writer->pos) {
PyErr_NoMemory();
return -1;
}
newlen = writer->pos + length;
if (newlen > PyUnicode_GET_LENGTH(writer->buffer)) {
/* overallocate 25% to limit the number of resize */
if (newlen <= (PY_SSIZE_T_MAX - newlen / 4))
newlen += newlen / 4;
if (maxchar > writer->maxchar) {
/* resize + widen */
newbuffer = PyUnicode_New(newlen, maxchar);
if (newbuffer == NULL)
return -1;
PyUnicode_CopyCharacters(newbuffer, 0,
writer->buffer, 0, writer->pos);
Py_DECREF(writer->buffer);
}
else {
newbuffer = resize_compact(writer->buffer, newlen);
if (newbuffer == NULL)
return -1;
}
writer->buffer = newbuffer;
unicode_writer_update(writer);
}
else if (maxchar > writer->maxchar) {
if (unicode_widen(&writer->buffer, writer->pos, maxchar) < 0)
return -1;
unicode_writer_update(writer);
}
return 0;
}
Py_LOCAL_INLINE(int)
unicode_writer_write_str(
unicode_writer_t *writer,
PyObject *str, Py_ssize_t start, Py_ssize_t length)
{
Py_UCS4 maxchar;
assert(str != NULL);
assert(PyUnicode_Check(str));
if (PyUnicode_READY(str) == -1)
return -1;
assert(0 <= start);
assert(0 <= length);
assert(start + length <= PyUnicode_GET_LENGTH(str));
if (length == 0)
return 0;
maxchar = _PyUnicode_FindMaxChar(str, start, start + length);
if (unicode_writer_prepare(writer, length, maxchar) == -1)
return -1;
assert((writer->pos + length) <= PyUnicode_GET_LENGTH(writer->buffer));
copy_characters(writer->buffer, writer->pos,
str, start, length);
writer->pos += length;
return 0;
}
Py_LOCAL_INLINE(int)
unicode_writer_write_char(
unicode_writer_t *writer,
Py_UCS4 ch)
{
if (unicode_writer_prepare(writer, 1, ch) == -1)
return -1;
assert((writer->pos + 1) <= PyUnicode_GET_LENGTH(writer->buffer));
PyUnicode_WRITE(writer->kind, writer->data, writer->pos, ch);
writer->pos += 1;
return 0;
}
Py_LOCAL(PyObject *)
unicode_writer_finish(unicode_writer_t *writer)
{
if (PyUnicode_Resize(&writer->buffer, writer->pos) < 0) {
Py_DECREF(writer->buffer);
return NULL;
}
return writer->buffer;
}
Py_LOCAL(void)
unicode_writer_dealloc(unicode_writer_t *writer)
{
Py_CLEAR(writer->buffer);
}
#include "stringlib/unicode_format.h"
PyDoc_STRVAR(format__doc__,
@ -13649,135 +13778,6 @@ formatchar(PyObject *v)
return (Py_UCS4) -1;
}
typedef struct {
PyObject *buffer;
void *data;
enum PyUnicode_Kind kind;
Py_UCS4 maxchar;
Py_ssize_t pos;
} unicode_writer_t;
Py_LOCAL_INLINE(void)
unicode_writer_update(unicode_writer_t *writer)
{
writer->maxchar = PyUnicode_MAX_CHAR_VALUE(writer->buffer);
writer->data = PyUnicode_DATA(writer->buffer);
writer->kind = PyUnicode_KIND(writer->buffer);
}
Py_LOCAL(int)
unicode_writer_init(unicode_writer_t *writer,
Py_ssize_t length, Py_UCS4 maxchar)
{
writer->pos = 0;
writer->buffer = PyUnicode_New(length, maxchar);
if (writer->buffer == NULL)
return -1;
unicode_writer_update(writer);
return 0;
}
Py_LOCAL_INLINE(int)
unicode_writer_prepare(unicode_writer_t *writer,
Py_ssize_t length, Py_UCS4 maxchar)
{
Py_ssize_t newlen;
PyObject *newbuffer;
if (length > PY_SSIZE_T_MAX - writer->pos) {
PyErr_NoMemory();
return -1;
}
newlen = writer->pos + length;
if (newlen > PyUnicode_GET_LENGTH(writer->buffer)) {
/* overallocate 25% to limit the number of resize */
if (newlen <= (PY_SSIZE_T_MAX - newlen / 4))
newlen += newlen / 4;
if (maxchar > writer->maxchar) {
/* resize + widen */
newbuffer = PyUnicode_New(newlen, maxchar);
if (newbuffer == NULL)
return -1;
PyUnicode_CopyCharacters(newbuffer, 0,
writer->buffer, 0, writer->pos);
Py_DECREF(writer->buffer);
}
else {
newbuffer = resize_compact(writer->buffer, newlen);
if (newbuffer == NULL)
return -1;
}
writer->buffer = newbuffer;
unicode_writer_update(writer);
}
else if (maxchar > writer->maxchar) {
if (unicode_widen(&writer->buffer, writer->pos, maxchar) < 0)
return -1;
unicode_writer_update(writer);
}
return 0;
}
Py_LOCAL_INLINE(int)
unicode_writer_write_str(
unicode_writer_t *writer,
PyObject *str, Py_ssize_t start, Py_ssize_t length)
{
Py_UCS4 maxchar;
assert(str != NULL);
assert(PyUnicode_Check(str));
if (PyUnicode_READY(str) == -1)
return -1;
assert(0 <= start);
assert(0 <= length);
assert(start + length <= PyUnicode_GET_LENGTH(str));
if (length == 0)
return 0;
maxchar = _PyUnicode_FindMaxChar(str, start, start + length);
if (unicode_writer_prepare(writer, length, maxchar) == -1)
return -1;
assert((writer->pos + length) <= PyUnicode_GET_LENGTH(writer->buffer));
copy_characters(writer->buffer, writer->pos,
str, start, length);
writer->pos += length;
return 0;
}
Py_LOCAL_INLINE(int)
unicode_writer_write_char(
unicode_writer_t *writer,
Py_UCS4 ch)
{
if (unicode_writer_prepare(writer, 1, ch) == -1)
return -1;
assert((writer->pos + 1) <= PyUnicode_GET_LENGTH(writer->buffer));
PyUnicode_WRITE(writer->kind, writer->data, writer->pos, ch);
writer->pos += 1;
return 0;
}
Py_LOCAL(PyObject *)
unicode_writer_finish(unicode_writer_t *writer)
{
if (PyUnicode_Resize(&writer->buffer, writer->pos) < 0) {
Py_DECREF(writer->buffer);
return NULL;
}
return writer->buffer;
}
Py_LOCAL(void)
unicode_writer_dealloc(unicode_writer_t *writer)
{
Py_CLEAR(writer->buffer);
}
PyObject *
PyUnicode_Format(PyObject *format, PyObject *args)
{