mirror of
https://github.com/python/cpython.git
synced 2025-07-15 15:25:29 +00:00
Close #14716: str.format() now uses the new "unicode writer" API instead of the
PyAccu API. For example, it makes str.format() from 25% to 30% faster on Linux.
This commit is contained in:
parent
9fad160411
commit
202fdca133
2 changed files with 148 additions and 170 deletions
|
@ -13200,6 +13200,135 @@ unicode_endswith(PyObject *self,
|
|||
return PyBool_FromLong(result);
|
||||
}
|
||||
|
||||
typedef struct {
|
||||
PyObject *buffer;
|
||||
void *data;
|
||||
enum PyUnicode_Kind kind;
|
||||
Py_UCS4 maxchar;
|
||||
Py_ssize_t pos;
|
||||
} unicode_writer_t;
|
||||
|
||||
Py_LOCAL_INLINE(void)
|
||||
unicode_writer_update(unicode_writer_t *writer)
|
||||
{
|
||||
writer->maxchar = PyUnicode_MAX_CHAR_VALUE(writer->buffer);
|
||||
writer->data = PyUnicode_DATA(writer->buffer);
|
||||
writer->kind = PyUnicode_KIND(writer->buffer);
|
||||
}
|
||||
|
||||
Py_LOCAL(int)
|
||||
unicode_writer_init(unicode_writer_t *writer,
|
||||
Py_ssize_t length, Py_UCS4 maxchar)
|
||||
{
|
||||
writer->pos = 0;
|
||||
writer->buffer = PyUnicode_New(length, maxchar);
|
||||
if (writer->buffer == NULL)
|
||||
return -1;
|
||||
unicode_writer_update(writer);
|
||||
return 0;
|
||||
}
|
||||
|
||||
Py_LOCAL_INLINE(int)
|
||||
unicode_writer_prepare(unicode_writer_t *writer,
|
||||
Py_ssize_t length, Py_UCS4 maxchar)
|
||||
{
|
||||
Py_ssize_t newlen;
|
||||
PyObject *newbuffer;
|
||||
|
||||
if (length > PY_SSIZE_T_MAX - writer->pos) {
|
||||
PyErr_NoMemory();
|
||||
return -1;
|
||||
}
|
||||
newlen = writer->pos + length;
|
||||
|
||||
if (newlen > PyUnicode_GET_LENGTH(writer->buffer)) {
|
||||
/* overallocate 25% to limit the number of resize */
|
||||
if (newlen <= (PY_SSIZE_T_MAX - newlen / 4))
|
||||
newlen += newlen / 4;
|
||||
|
||||
if (maxchar > writer->maxchar) {
|
||||
/* resize + widen */
|
||||
newbuffer = PyUnicode_New(newlen, maxchar);
|
||||
if (newbuffer == NULL)
|
||||
return -1;
|
||||
PyUnicode_CopyCharacters(newbuffer, 0,
|
||||
writer->buffer, 0, writer->pos);
|
||||
Py_DECREF(writer->buffer);
|
||||
}
|
||||
else {
|
||||
newbuffer = resize_compact(writer->buffer, newlen);
|
||||
if (newbuffer == NULL)
|
||||
return -1;
|
||||
}
|
||||
writer->buffer = newbuffer;
|
||||
unicode_writer_update(writer);
|
||||
}
|
||||
else if (maxchar > writer->maxchar) {
|
||||
if (unicode_widen(&writer->buffer, writer->pos, maxchar) < 0)
|
||||
return -1;
|
||||
unicode_writer_update(writer);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
Py_LOCAL_INLINE(int)
|
||||
unicode_writer_write_str(
|
||||
unicode_writer_t *writer,
|
||||
PyObject *str, Py_ssize_t start, Py_ssize_t length)
|
||||
{
|
||||
Py_UCS4 maxchar;
|
||||
|
||||
assert(str != NULL);
|
||||
assert(PyUnicode_Check(str));
|
||||
if (PyUnicode_READY(str) == -1)
|
||||
return -1;
|
||||
|
||||
assert(0 <= start);
|
||||
assert(0 <= length);
|
||||
assert(start + length <= PyUnicode_GET_LENGTH(str));
|
||||
if (length == 0)
|
||||
return 0;
|
||||
|
||||
maxchar = _PyUnicode_FindMaxChar(str, start, start + length);
|
||||
if (unicode_writer_prepare(writer, length, maxchar) == -1)
|
||||
return -1;
|
||||
|
||||
assert((writer->pos + length) <= PyUnicode_GET_LENGTH(writer->buffer));
|
||||
copy_characters(writer->buffer, writer->pos,
|
||||
str, start, length);
|
||||
writer->pos += length;
|
||||
return 0;
|
||||
}
|
||||
|
||||
Py_LOCAL_INLINE(int)
|
||||
unicode_writer_write_char(
|
||||
unicode_writer_t *writer,
|
||||
Py_UCS4 ch)
|
||||
{
|
||||
if (unicode_writer_prepare(writer, 1, ch) == -1)
|
||||
return -1;
|
||||
assert((writer->pos + 1) <= PyUnicode_GET_LENGTH(writer->buffer));
|
||||
PyUnicode_WRITE(writer->kind, writer->data, writer->pos, ch);
|
||||
writer->pos += 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
Py_LOCAL(PyObject *)
|
||||
unicode_writer_finish(unicode_writer_t *writer)
|
||||
{
|
||||
if (PyUnicode_Resize(&writer->buffer, writer->pos) < 0) {
|
||||
Py_DECREF(writer->buffer);
|
||||
return NULL;
|
||||
}
|
||||
return writer->buffer;
|
||||
}
|
||||
|
||||
Py_LOCAL(void)
|
||||
unicode_writer_dealloc(unicode_writer_t *writer)
|
||||
{
|
||||
Py_CLEAR(writer->buffer);
|
||||
}
|
||||
|
||||
#include "stringlib/unicode_format.h"
|
||||
|
||||
PyDoc_STRVAR(format__doc__,
|
||||
|
@ -13649,135 +13778,6 @@ formatchar(PyObject *v)
|
|||
return (Py_UCS4) -1;
|
||||
}
|
||||
|
||||
typedef struct {
|
||||
PyObject *buffer;
|
||||
void *data;
|
||||
enum PyUnicode_Kind kind;
|
||||
Py_UCS4 maxchar;
|
||||
Py_ssize_t pos;
|
||||
} unicode_writer_t;
|
||||
|
||||
Py_LOCAL_INLINE(void)
|
||||
unicode_writer_update(unicode_writer_t *writer)
|
||||
{
|
||||
writer->maxchar = PyUnicode_MAX_CHAR_VALUE(writer->buffer);
|
||||
writer->data = PyUnicode_DATA(writer->buffer);
|
||||
writer->kind = PyUnicode_KIND(writer->buffer);
|
||||
}
|
||||
|
||||
Py_LOCAL(int)
|
||||
unicode_writer_init(unicode_writer_t *writer,
|
||||
Py_ssize_t length, Py_UCS4 maxchar)
|
||||
{
|
||||
writer->pos = 0;
|
||||
writer->buffer = PyUnicode_New(length, maxchar);
|
||||
if (writer->buffer == NULL)
|
||||
return -1;
|
||||
unicode_writer_update(writer);
|
||||
return 0;
|
||||
}
|
||||
|
||||
Py_LOCAL_INLINE(int)
|
||||
unicode_writer_prepare(unicode_writer_t *writer,
|
||||
Py_ssize_t length, Py_UCS4 maxchar)
|
||||
{
|
||||
Py_ssize_t newlen;
|
||||
PyObject *newbuffer;
|
||||
|
||||
if (length > PY_SSIZE_T_MAX - writer->pos) {
|
||||
PyErr_NoMemory();
|
||||
return -1;
|
||||
}
|
||||
newlen = writer->pos + length;
|
||||
|
||||
if (newlen > PyUnicode_GET_LENGTH(writer->buffer)) {
|
||||
/* overallocate 25% to limit the number of resize */
|
||||
if (newlen <= (PY_SSIZE_T_MAX - newlen / 4))
|
||||
newlen += newlen / 4;
|
||||
|
||||
if (maxchar > writer->maxchar) {
|
||||
/* resize + widen */
|
||||
newbuffer = PyUnicode_New(newlen, maxchar);
|
||||
if (newbuffer == NULL)
|
||||
return -1;
|
||||
PyUnicode_CopyCharacters(newbuffer, 0,
|
||||
writer->buffer, 0, writer->pos);
|
||||
Py_DECREF(writer->buffer);
|
||||
}
|
||||
else {
|
||||
newbuffer = resize_compact(writer->buffer, newlen);
|
||||
if (newbuffer == NULL)
|
||||
return -1;
|
||||
}
|
||||
writer->buffer = newbuffer;
|
||||
unicode_writer_update(writer);
|
||||
}
|
||||
else if (maxchar > writer->maxchar) {
|
||||
if (unicode_widen(&writer->buffer, writer->pos, maxchar) < 0)
|
||||
return -1;
|
||||
unicode_writer_update(writer);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
Py_LOCAL_INLINE(int)
|
||||
unicode_writer_write_str(
|
||||
unicode_writer_t *writer,
|
||||
PyObject *str, Py_ssize_t start, Py_ssize_t length)
|
||||
{
|
||||
Py_UCS4 maxchar;
|
||||
|
||||
assert(str != NULL);
|
||||
assert(PyUnicode_Check(str));
|
||||
if (PyUnicode_READY(str) == -1)
|
||||
return -1;
|
||||
|
||||
assert(0 <= start);
|
||||
assert(0 <= length);
|
||||
assert(start + length <= PyUnicode_GET_LENGTH(str));
|
||||
if (length == 0)
|
||||
return 0;
|
||||
|
||||
maxchar = _PyUnicode_FindMaxChar(str, start, start + length);
|
||||
if (unicode_writer_prepare(writer, length, maxchar) == -1)
|
||||
return -1;
|
||||
|
||||
assert((writer->pos + length) <= PyUnicode_GET_LENGTH(writer->buffer));
|
||||
copy_characters(writer->buffer, writer->pos,
|
||||
str, start, length);
|
||||
writer->pos += length;
|
||||
return 0;
|
||||
}
|
||||
|
||||
Py_LOCAL_INLINE(int)
|
||||
unicode_writer_write_char(
|
||||
unicode_writer_t *writer,
|
||||
Py_UCS4 ch)
|
||||
{
|
||||
if (unicode_writer_prepare(writer, 1, ch) == -1)
|
||||
return -1;
|
||||
assert((writer->pos + 1) <= PyUnicode_GET_LENGTH(writer->buffer));
|
||||
PyUnicode_WRITE(writer->kind, writer->data, writer->pos, ch);
|
||||
writer->pos += 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
Py_LOCAL(PyObject *)
|
||||
unicode_writer_finish(unicode_writer_t *writer)
|
||||
{
|
||||
if (PyUnicode_Resize(&writer->buffer, writer->pos) < 0) {
|
||||
Py_DECREF(writer->buffer);
|
||||
return NULL;
|
||||
}
|
||||
return writer->buffer;
|
||||
}
|
||||
|
||||
Py_LOCAL(void)
|
||||
unicode_writer_dealloc(unicode_writer_t *writer)
|
||||
{
|
||||
Py_CLEAR(writer->buffer);
|
||||
}
|
||||
|
||||
PyObject *
|
||||
PyUnicode_Format(PyObject *format, PyObject *args)
|
||||
{
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue