mirror of
https://github.com/python/cpython.git
synced 2025-08-28 12:45:07 +00:00
PyUnicode_Join(): Rewrote to use PySequence_Fast(). This doesn't do
much to reduce the size of the code, but greatly improves its clarity. It's also quicker in what's probably the most common case (the argument iterable is a list). Against it, if the iterable isn't a list or a tuple, a temp tuple is materialized containing the entire input sequence, and that's a bigger temp memory burden. Yawn.
This commit is contained in:
parent
cca018356d
commit
05eba1fdc8
1 changed files with 96 additions and 126 deletions
|
@ -3979,159 +3979,129 @@ PyObject *
|
||||||
PyUnicode_Join(PyObject *separator, PyObject *seq)
|
PyUnicode_Join(PyObject *separator, PyObject *seq)
|
||||||
{
|
{
|
||||||
PyObject *internal_separator = NULL;
|
PyObject *internal_separator = NULL;
|
||||||
Py_UNICODE *sep;
|
const Py_UNICODE *sep;
|
||||||
size_t seplen;
|
size_t seplen;
|
||||||
PyUnicodeObject *res = NULL;
|
PyUnicodeObject *res = NULL; /* the result */
|
||||||
size_t sz; /* # allocated bytes for string in res */
|
size_t res_alloc = 100; /* # allocated bytes for string in res */
|
||||||
size_t reslen; /* # used bytes */
|
size_t res_used; /* # used bytes */
|
||||||
Py_UNICODE *p; /* pointer to free byte in res's string area */
|
Py_UNICODE *res_p; /* pointer to free byte in res's string area */
|
||||||
PyObject *it; /* iterator */
|
PyObject *fseq; /* PySequence_Fast(seq) */
|
||||||
|
int seqlen; /* len(fseq) -- number of items in sequence */
|
||||||
|
const Py_UNICODE blank = ' ';
|
||||||
PyObject *item;
|
PyObject *item;
|
||||||
int i;
|
int i;
|
||||||
PyObject *temp;
|
|
||||||
|
|
||||||
it = PyObject_GetIter(seq);
|
fseq = PySequence_Fast(seq, "");
|
||||||
if (it == NULL)
|
if (fseq == NULL) {
|
||||||
return NULL;
|
if (PyErr_ExceptionMatches(PyExc_TypeError))
|
||||||
|
PyErr_Format(PyExc_TypeError,
|
||||||
item = PyIter_Next(it);
|
"sequence expected, %.80s found",
|
||||||
if (item == NULL) {
|
seq->ob_type->tp_name);
|
||||||
if (PyErr_Occurred())
|
return NULL;
|
||||||
goto onError;
|
|
||||||
/* empty sequence; return u"" */
|
|
||||||
res = _PyUnicode_New(0);
|
|
||||||
goto Done;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* If this is the only item, maybe we can get out cheap. */
|
seqlen = PySequence_Fast_GET_SIZE(fseq);
|
||||||
res = (PyUnicodeObject *)item;
|
/* If empty sequence, return u"". */
|
||||||
item = PyIter_Next(it);
|
if (seqlen == 0) {
|
||||||
if (item == NULL) {
|
res = _PyUnicode_New(0); /* empty sequence; return u"" */
|
||||||
if (PyErr_Occurred())
|
goto Done;
|
||||||
goto onError;
|
}
|
||||||
/* There's only one item in the sequence. */
|
/* If singleton sequence with an exact Unicode, return that. */
|
||||||
if (PyUnicode_CheckExact(res)) /* whatever.join([u]) -> u */
|
if (seqlen == 1) {
|
||||||
goto Done;
|
item = PySequence_Fast_GET_ITEM(fseq, 0);
|
||||||
|
if (PyUnicode_CheckExact(item)) {
|
||||||
|
Py_INCREF(item);
|
||||||
|
res = (PyUnicodeObject *)item;
|
||||||
|
goto Done;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* There are at least two to join (item != NULL), or there's only
|
/* At least two items to join, or one that isn't exact Unicode. */
|
||||||
* one but it's not an exact Unicode (item == NULL). res needs
|
if (seqlen > 1) {
|
||||||
* conversion to Unicode in either case.
|
/* Set up sep and seplen -- they're needed. */
|
||||||
* Caution: we may need to ensure a copy is made, and that's trickier
|
if (separator == NULL) {
|
||||||
* than it sounds because, e.g., PyUnicode_FromObject() may return
|
sep = ␣
|
||||||
* a shared object (which must not be mutated).
|
seplen = 1;
|
||||||
*/
|
}
|
||||||
if (! PyUnicode_Check(res) && ! PyString_Check(res)) {
|
else {
|
||||||
PyErr_Format(PyExc_TypeError,
|
internal_separator = PyUnicode_FromObject(separator);
|
||||||
"sequence item 0: expected string or Unicode,"
|
if (internal_separator == NULL)
|
||||||
" %.80s found",
|
goto onError;
|
||||||
res->ob_type->tp_name);
|
sep = PyUnicode_AS_UNICODE(internal_separator);
|
||||||
Py_XDECREF(item);
|
seplen = PyUnicode_GET_SIZE(internal_separator);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Get space. */
|
||||||
|
res = _PyUnicode_New((int)res_alloc);
|
||||||
|
if (res == NULL)
|
||||||
goto onError;
|
goto onError;
|
||||||
}
|
res_p = PyUnicode_AS_UNICODE(res);
|
||||||
temp = PyUnicode_FromObject((PyObject *)res);
|
res_used = 0;
|
||||||
if (temp == NULL) {
|
|
||||||
Py_XDECREF(item);
|
|
||||||
goto onError;
|
|
||||||
}
|
|
||||||
Py_DECREF(res);
|
|
||||||
if (item == NULL) {
|
|
||||||
/* res was the only item */
|
|
||||||
res = (PyUnicodeObject *)temp;
|
|
||||||
goto Done;
|
|
||||||
}
|
|
||||||
/* There are at least two items. As above, temp may be a shared object,
|
|
||||||
* so we need to copy it.
|
|
||||||
*/
|
|
||||||
reslen = PyUnicode_GET_SIZE(temp);
|
|
||||||
sz = reslen + 100; /* breathing room */
|
|
||||||
if (sz < reslen || sz > INT_MAX) /* overflow -- no breathing room */
|
|
||||||
sz = reslen;
|
|
||||||
res = _PyUnicode_New((int)sz);
|
|
||||||
if (res == NULL) {
|
|
||||||
Py_DECREF(item);
|
|
||||||
goto onError;
|
|
||||||
}
|
|
||||||
p = PyUnicode_AS_UNICODE(res);
|
|
||||||
Py_UNICODE_COPY(p, PyUnicode_AS_UNICODE(temp), (int)reslen);
|
|
||||||
p += reslen;
|
|
||||||
Py_DECREF(temp);
|
|
||||||
|
|
||||||
if (separator == NULL) {
|
for (i = 0; i < seqlen; ++i) {
|
||||||
Py_UNICODE blank = ' ';
|
size_t itemlen;
|
||||||
sep = ␣
|
size_t new_res_used;
|
||||||
seplen = 1;
|
|
||||||
}
|
item = PySequence_Fast_GET_ITEM(fseq, i);
|
||||||
else {
|
/* Convert item to Unicode. */
|
||||||
internal_separator = PyUnicode_FromObject(separator);
|
if (! PyUnicode_Check(item) && ! PyString_Check(item)) {
|
||||||
if (internal_separator == NULL) {
|
PyErr_Format(PyExc_TypeError,
|
||||||
Py_DECREF(item);
|
"sequence item %i: expected string or Unicode,"
|
||||||
|
" %.80s found",
|
||||||
|
i, item->ob_type->tp_name);
|
||||||
goto onError;
|
goto onError;
|
||||||
}
|
}
|
||||||
sep = PyUnicode_AS_UNICODE(internal_separator);
|
item = PyUnicode_FromObject(item);
|
||||||
seplen = PyUnicode_GET_SIZE(internal_separator);
|
if (item == NULL)
|
||||||
}
|
goto onError;
|
||||||
|
/* We own a reference to item from here on. */
|
||||||
|
|
||||||
i = 1;
|
|
||||||
do {
|
|
||||||
size_t itemlen;
|
|
||||||
size_t newreslen;
|
|
||||||
|
|
||||||
/* Catenate the separator, then item. */
|
|
||||||
/* First convert item to Unicode. */
|
|
||||||
if (!PyUnicode_Check(item)) {
|
|
||||||
PyObject *v;
|
|
||||||
if (!PyString_Check(item)) {
|
|
||||||
PyErr_Format(PyExc_TypeError,
|
|
||||||
"sequence item %i: expected string or Unicode,"
|
|
||||||
" %.80s found",
|
|
||||||
i, item->ob_type->tp_name);
|
|
||||||
Py_DECREF(item);
|
|
||||||
goto onError;
|
|
||||||
}
|
|
||||||
v = PyUnicode_FromObject(item);
|
|
||||||
Py_DECREF(item);
|
|
||||||
item = v;
|
|
||||||
if (item == NULL)
|
|
||||||
goto onError;
|
|
||||||
}
|
|
||||||
/* Make sure we have enough space for the separator and the item. */
|
/* Make sure we have enough space for the separator and the item. */
|
||||||
itemlen = PyUnicode_GET_SIZE(item);
|
itemlen = PyUnicode_GET_SIZE(item);
|
||||||
newreslen = reslen + seplen + itemlen;
|
new_res_used = res_used + itemlen;
|
||||||
if (newreslen < reslen || newreslen > INT_MAX)
|
if (new_res_used < res_used || new_res_used > INT_MAX)
|
||||||
goto Overflow;
|
goto Overflow;
|
||||||
if (newreslen > sz) {
|
if (i < seqlen - 1) {
|
||||||
|
new_res_used += seplen;
|
||||||
|
if (new_res_used < res_used || new_res_used > INT_MAX)
|
||||||
|
goto Overflow;
|
||||||
|
}
|
||||||
|
if (new_res_used > res_alloc) {
|
||||||
|
/* double allocated size until it's big enough */
|
||||||
do {
|
do {
|
||||||
size_t oldsize = sz;
|
size_t oldsize = res_alloc;
|
||||||
sz += sz;
|
res_alloc += res_alloc;
|
||||||
if (sz < oldsize || sz > INT_MAX)
|
if (res_alloc < oldsize || res_alloc > INT_MAX)
|
||||||
goto Overflow;
|
goto Overflow;
|
||||||
} while (newreslen > sz);
|
} while (new_res_used > res_alloc);
|
||||||
if (_PyUnicode_Resize(&res, (int)sz) < 0) {
|
if (_PyUnicode_Resize(&res, (int)res_alloc) < 0) {
|
||||||
Py_DECREF(item);
|
Py_DECREF(item);
|
||||||
goto onError;
|
goto onError;
|
||||||
}
|
}
|
||||||
p = PyUnicode_AS_UNICODE(res) + reslen;
|
res_p = PyUnicode_AS_UNICODE(res) + res_used;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Copy item, and maybe the separator. */
|
||||||
|
Py_UNICODE_COPY(res_p, PyUnicode_AS_UNICODE(item), (int)itemlen);
|
||||||
|
res_p += itemlen;
|
||||||
|
if (i < seqlen - 1) {
|
||||||
|
Py_UNICODE_COPY(res_p, sep, (int)seplen);
|
||||||
|
res_p += seplen;
|
||||||
}
|
}
|
||||||
Py_UNICODE_COPY(p, sep, (int)seplen);
|
|
||||||
p += seplen;
|
|
||||||
Py_UNICODE_COPY(p, PyUnicode_AS_UNICODE(item), (int)itemlen);
|
|
||||||
p += itemlen;
|
|
||||||
Py_DECREF(item);
|
Py_DECREF(item);
|
||||||
reslen = newreslen;
|
res_used = new_res_used;
|
||||||
|
}
|
||||||
|
|
||||||
++i;
|
/* Shrink res to match the used area; this probably can't fail,
|
||||||
item = PyIter_Next(it);
|
* but it's cheap to check.
|
||||||
} while (item != NULL);
|
*/
|
||||||
if (PyErr_Occurred())
|
if (_PyUnicode_Resize(&res, (int)res_used) < 0)
|
||||||
goto onError;
|
|
||||||
|
|
||||||
if (_PyUnicode_Resize(&res, (int)reslen) < 0)
|
|
||||||
goto onError;
|
goto onError;
|
||||||
|
|
||||||
Done:
|
Done:
|
||||||
Py_XDECREF(internal_separator);
|
Py_XDECREF(internal_separator);
|
||||||
Py_DECREF(it);
|
Py_DECREF(fseq);
|
||||||
return (PyObject *)res;
|
return (PyObject *)res;
|
||||||
|
|
||||||
Overflow:
|
Overflow:
|
||||||
|
@ -4142,7 +4112,7 @@ PyUnicode_Join(PyObject *separator, PyObject *seq)
|
||||||
|
|
||||||
onError:
|
onError:
|
||||||
Py_XDECREF(internal_separator);
|
Py_XDECREF(internal_separator);
|
||||||
Py_DECREF(it);
|
Py_DECREF(fseq);
|
||||||
Py_XDECREF(res);
|
Py_XDECREF(res);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue