Issue #7622: Improve the split(), rsplit(), splitlines() and replace()

methods of bytes, bytearray and unicode objects by using a common
implementation based on stringlib's fast search.  Patch by Florent Xicluna.
This commit is contained in:
Antoine Pitrou 2010-01-13 07:55:48 +00:00
parent d0ff51c43f
commit 6467213bfd
17 changed files with 717 additions and 1442 deletions

View file

@ -563,6 +563,7 @@ STRINGLIB_HEADERS= \
$(srcdir)/Objects/stringlib/find.h \ $(srcdir)/Objects/stringlib/find.h \
$(srcdir)/Objects/stringlib/formatter.h \ $(srcdir)/Objects/stringlib/formatter.h \
$(srcdir)/Objects/stringlib/partition.h \ $(srcdir)/Objects/stringlib/partition.h \
$(srcdir)/Objects/stringlib/split.h \
$(srcdir)/Objects/stringlib/stringdefs.h \ $(srcdir)/Objects/stringlib/stringdefs.h \
$(srcdir)/Objects/stringlib/string_format.h \ $(srcdir)/Objects/stringlib/string_format.h \
$(srcdir)/Objects/stringlib/transmogrify.h \ $(srcdir)/Objects/stringlib/transmogrify.h \

View file

@ -12,6 +12,10 @@ What's New in Python 2.7 alpha 3?
Core and Builtins Core and Builtins
----------------- -----------------
- Issue #7622: Improve the split(), rsplit(), splitlines() and replace()
methods of bytes, bytearray and unicode objects by using a common
implementation based on stringlib's fast search. Patch by Florent Xicluna.
- Issue #7632: Fix a crash in dtoa.c that occurred in debug builds - Issue #7632: Fix a crash in dtoa.c that occurred in debug builds
when parsing certain long numeric strings corresponding to subnormal when parsing certain long numeric strings corresponding to subnormal
values. Also fix a number of bugs in dtoa.c that could lead to values. Also fix a number of bugs in dtoa.c that could lead to

View file

@ -1115,14 +1115,16 @@ bytearray_dealloc(PyByteArrayObject *self)
#define STRINGLIB_STR PyByteArray_AS_STRING #define STRINGLIB_STR PyByteArray_AS_STRING
#define STRINGLIB_NEW PyByteArray_FromStringAndSize #define STRINGLIB_NEW PyByteArray_FromStringAndSize
#define STRINGLIB_EMPTY nullbytes #define STRINGLIB_EMPTY nullbytes
#define STRINGLIB_ISSPACE Py_ISSPACE
#define STRINGLIB_ISLINEBREAK(x) ((x == '\n') || (x == '\r'))
#define STRINGLIB_CHECK_EXACT PyByteArray_CheckExact #define STRINGLIB_CHECK_EXACT PyByteArray_CheckExact
#define STRINGLIB_MUTABLE 1 #define STRINGLIB_MUTABLE 1
#define FROM_BYTEARRAY 1
#include "stringlib/fastsearch.h" #include "stringlib/fastsearch.h"
#include "stringlib/count.h" #include "stringlib/count.h"
#include "stringlib/find.h" #include "stringlib/find.h"
#include "stringlib/partition.h" #include "stringlib/partition.h"
#include "stringlib/split.h"
#include "stringlib/ctype.h" #include "stringlib/ctype.h"
#include "stringlib/transmogrify.h" #include "stringlib/transmogrify.h"
@ -1130,22 +1132,21 @@ bytearray_dealloc(PyByteArrayObject *self)
/* The following Py_LOCAL_INLINE and Py_LOCAL functions /* The following Py_LOCAL_INLINE and Py_LOCAL functions
were copied from the old char* style string object. */ were copied from the old char* style string object. */
Py_LOCAL_INLINE(void) /* helper macro to fixup start/end slice values */
_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len) #define ADJUST_INDICES(start, end, len) \
{ if (end > len) \
if (*end > len) end = len; \
*end = len; else if (end < 0) { \
else if (*end < 0) end += len; \
*end += len; if (end < 0) \
if (*end < 0) end = 0; \
*end = 0; } \
if (*start < 0) if (start < 0) { \
*start += len; start += len; \
if (*start < 0) if (start < 0) \
*start = 0; start = 0; \
} }
Py_LOCAL_INLINE(Py_ssize_t) Py_LOCAL_INLINE(Py_ssize_t)
bytearray_find_internal(PyByteArrayObject *self, PyObject *args, int dir) bytearray_find_internal(PyByteArrayObject *self, PyObject *args, int dir)
{ {
@ -1212,10 +1213,10 @@ bytearray_count(PyByteArrayObject *self, PyObject *args)
if (_getbuffer(sub_obj, &vsub) < 0) if (_getbuffer(sub_obj, &vsub) < 0)
return NULL; return NULL;
_adjust_indices(&start, &end, PyByteArray_GET_SIZE(self)); ADJUST_INDICES(start, end, PyByteArray_GET_SIZE(self));
count_obj = PyInt_FromSsize_t( count_obj = PyInt_FromSsize_t(
stringlib_count(str + start, end - start, vsub.buf, vsub.len) stringlib_count(str + start, end - start, vsub.buf, vsub.len, PY_SSIZE_T_MAX)
); );
PyBuffer_Release(&vsub); PyBuffer_Release(&vsub);
return count_obj; return count_obj;
@ -1323,7 +1324,7 @@ _bytearray_tailmatch(PyByteArrayObject *self, PyObject *substr, Py_ssize_t start
if (_getbuffer(substr, &vsubstr) < 0) if (_getbuffer(substr, &vsubstr) < 0)
return -1; return -1;
_adjust_indices(&start, &end, len); ADJUST_INDICES(start, end, len);
if (direction < 0) { if (direction < 0) {
/* startswith */ /* startswith */
@ -1528,20 +1529,11 @@ done:
} }
#define FORWARD 1
#define REVERSE -1
/* find and count characters and substrings */ /* find and count characters and substrings */
#define findchar(target, target_len, c) \ #define findchar(target, target_len, c) \
((char *)memchr((const void *)(target), c, target_len)) ((char *)memchr((const void *)(target), c, target_len))
/* Don't call if length < 2 */
#define Py_STRING_MATCH(target, offset, pattern, length) \
(target[offset] == pattern[0] && \
target[offset+length-1] == pattern[length-1] && \
!memcmp(target+offset+1, pattern+1, length-2) )
/* Bytes ops must return a string, create a copy */ /* Bytes ops must return a string, create a copy */
Py_LOCAL(PyByteArrayObject *) Py_LOCAL(PyByteArrayObject *)
@ -1568,93 +1560,6 @@ countchar(const char *target, Py_ssize_t target_len, char c, Py_ssize_t maxcount
return count; return count;
} }
Py_LOCAL(Py_ssize_t)
findstring(const char *target, Py_ssize_t target_len,
const char *pattern, Py_ssize_t pattern_len,
Py_ssize_t start,
Py_ssize_t end,
int direction)
{
if (start < 0) {
start += target_len;
if (start < 0)
start = 0;
}
if (end > target_len) {
end = target_len;
} else if (end < 0) {
end += target_len;
if (end < 0)
end = 0;
}
/* zero-length substrings always match at the first attempt */
if (pattern_len == 0)
return (direction > 0) ? start : end;
end -= pattern_len;
if (direction < 0) {
for (; end >= start; end--)
if (Py_STRING_MATCH(target, end, pattern, pattern_len))
return end;
} else {
for (; start <= end; start++)
if (Py_STRING_MATCH(target, start, pattern, pattern_len))
return start;
}
return -1;
}
Py_LOCAL_INLINE(Py_ssize_t)
countstring(const char *target, Py_ssize_t target_len,
const char *pattern, Py_ssize_t pattern_len,
Py_ssize_t start,
Py_ssize_t end,
int direction, Py_ssize_t maxcount)
{
Py_ssize_t count=0;
if (start < 0) {
start += target_len;
if (start < 0)
start = 0;
}
if (end > target_len) {
end = target_len;
} else if (end < 0) {
end += target_len;
if (end < 0)
end = 0;
}
/* zero-length substrings match everywhere */
if (pattern_len == 0 || maxcount == 0) {
if (target_len+1 < maxcount)
return target_len+1;
return maxcount;
}
end -= pattern_len;
if (direction < 0) {
for (; (end >= start); end--)
if (Py_STRING_MATCH(target, end, pattern, pattern_len)) {
count++;
if (--maxcount <= 0) break;
end -= pattern_len-1;
}
} else {
for (; (start <= end); start++)
if (Py_STRING_MATCH(target, start, pattern, pattern_len)) {
count++;
if (--maxcount <= 0)
break;
start += pattern_len-1;
}
}
return count;
}
/* Algorithms for different cases of string replacement */ /* Algorithms for different cases of string replacement */
@ -1776,9 +1681,8 @@ replace_delete_substring(PyByteArrayObject *self,
self_len = PyByteArray_GET_SIZE(self); self_len = PyByteArray_GET_SIZE(self);
self_s = PyByteArray_AS_STRING(self); self_s = PyByteArray_AS_STRING(self);
count = countstring(self_s, self_len, count = stringlib_count(self_s, self_len,
from_s, from_len, from_s, from_len,
0, self_len, 1,
maxcount); maxcount);
if (count == 0) { if (count == 0) {
@ -1798,9 +1702,9 @@ replace_delete_substring(PyByteArrayObject *self,
start = self_s; start = self_s;
end = self_s + self_len; end = self_s + self_len;
while (count-- > 0) { while (count-- > 0) {
offset = findstring(start, end-start, offset = stringlib_find(start, end-start,
from_s, from_len, from_s, from_len,
0, end-start, FORWARD); 0);
if (offset == -1) if (offset == -1)
break; break;
next = start + offset; next = start + offset;
@ -1876,9 +1780,9 @@ replace_substring_in_place(PyByteArrayObject *self,
self_s = PyByteArray_AS_STRING(self); self_s = PyByteArray_AS_STRING(self);
self_len = PyByteArray_GET_SIZE(self); self_len = PyByteArray_GET_SIZE(self);
offset = findstring(self_s, self_len, offset = stringlib_find(self_s, self_len,
from_s, from_len, from_s, from_len,
0, self_len, FORWARD); 0);
if (offset == -1) { if (offset == -1) {
/* No matches; return the original bytes */ /* No matches; return the original bytes */
return return_self(self); return return_self(self);
@ -1898,9 +1802,9 @@ replace_substring_in_place(PyByteArrayObject *self,
end = result_s + self_len; end = result_s + self_len;
while ( --maxcount > 0) { while ( --maxcount > 0) {
offset = findstring(start, end-start, offset = stringlib_find(start, end-start,
from_s, from_len, from_s, from_len,
0, end-start, FORWARD); 0);
if (offset==-1) if (offset==-1)
break; break;
Py_MEMCPY(start+offset, to_s, from_len); Py_MEMCPY(start+offset, to_s, from_len);
@ -1993,9 +1897,10 @@ replace_substring(PyByteArrayObject *self,
self_s = PyByteArray_AS_STRING(self); self_s = PyByteArray_AS_STRING(self);
self_len = PyByteArray_GET_SIZE(self); self_len = PyByteArray_GET_SIZE(self);
count = countstring(self_s, self_len, count = stringlib_count(self_s, self_len,
from_s, from_len, from_s, from_len,
0, self_len, FORWARD, maxcount); maxcount);
if (count == 0) { if (count == 0) {
/* no matches, return unchanged */ /* no matches, return unchanged */
return return_self(self); return return_self(self);
@ -2022,9 +1927,9 @@ replace_substring(PyByteArrayObject *self,
start = self_s; start = self_s;
end = self_s + self_len; end = self_s + self_len;
while (count-- > 0) { while (count-- > 0) {
offset = findstring(start, end-start, offset = stringlib_find(start, end-start,
from_s, from_len, from_s, from_len,
0, end-start, FORWARD); 0);
if (offset == -1) if (offset == -1)
break; break;
next = start+offset; next = start+offset;
@ -2153,123 +2058,6 @@ bytearray_replace(PyByteArrayObject *self, PyObject *args)
return res; return res;
} }
/* Overallocate the initial list to reduce the number of reallocs for small
split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
resizes, to sizes 4, 8, then 16. Most observed string splits are for human
text (roughly 11 words per line) and field delimited data (usually 1-10
fields). For large strings the split algorithms are bandwidth limited
so increasing the preallocation likely will not improve things.*/
#define MAX_PREALLOC 12
/* 5 splits gives 6 elements */
#define PREALLOC_SIZE(maxsplit) \
(maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
#define SPLIT_APPEND(data, left, right) \
str = PyByteArray_FromStringAndSize((data) + (left), \
(right) - (left)); \
if (str == NULL) \
goto onError; \
if (PyList_Append(list, str)) { \
Py_DECREF(str); \
goto onError; \
} \
else \
Py_DECREF(str);
#define SPLIT_ADD(data, left, right) { \
str = PyByteArray_FromStringAndSize((data) + (left), \
(right) - (left)); \
if (str == NULL) \
goto onError; \
if (count < MAX_PREALLOC) { \
PyList_SET_ITEM(list, count, str); \
} else { \
if (PyList_Append(list, str)) { \
Py_DECREF(str); \
goto onError; \
} \
else \
Py_DECREF(str); \
} \
count++; }
/* Always force the list to the expected size. */
#define FIX_PREALLOC_SIZE(list) Py_SIZE(list) = count
Py_LOCAL_INLINE(PyObject *)
split_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
{
register Py_ssize_t i, j, count = 0;
PyObject *str;
PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
if (list == NULL)
return NULL;
i = j = 0;
while ((j < len) && (maxcount-- > 0)) {
for(; j < len; j++) {
/* I found that using memchr makes no difference */
if (s[j] == ch) {
SPLIT_ADD(s, i, j);
i = j = j + 1;
break;
}
}
}
if (i <= len) {
SPLIT_ADD(s, i, len);
}
FIX_PREALLOC_SIZE(list);
return list;
onError:
Py_DECREF(list);
return NULL;
}
Py_LOCAL_INLINE(PyObject *)
split_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxcount)
{
register Py_ssize_t i, j, count = 0;
PyObject *str;
PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
if (list == NULL)
return NULL;
for (i = j = 0; i < len; ) {
/* find a token */
while (i < len && Py_ISSPACE(s[i]))
i++;
j = i;
while (i < len && !Py_ISSPACE(s[i]))
i++;
if (j < i) {
if (maxcount-- <= 0)
break;
SPLIT_ADD(s, j, i);
while (i < len && Py_ISSPACE(s[i]))
i++;
j = i;
}
}
if (j < len) {
SPLIT_ADD(s, j, len);
}
FIX_PREALLOC_SIZE(list);
return list;
onError:
Py_DECREF(list);
return NULL;
}
PyDoc_STRVAR(split__doc__, PyDoc_STRVAR(split__doc__,
"B.split([sep[, maxsplit]]) -> list of bytearray\n\ "B.split([sep[, maxsplit]]) -> list of bytearray\n\
\n\ \n\
@ -2281,10 +2069,10 @@ If maxsplit is given, at most maxsplit splits are done.");
static PyObject * static PyObject *
bytearray_split(PyByteArrayObject *self, PyObject *args) bytearray_split(PyByteArrayObject *self, PyObject *args)
{ {
Py_ssize_t len = PyByteArray_GET_SIZE(self), n, i, j, pos; Py_ssize_t len = PyByteArray_GET_SIZE(self), n;
Py_ssize_t maxsplit = -1, count = 0; Py_ssize_t maxsplit = -1;
const char *s = PyByteArray_AS_STRING(self), *sub; const char *s = PyByteArray_AS_STRING(self), *sub;
PyObject *list, *str, *subobj = Py_None; PyObject *list, *subobj = Py_None;
Py_buffer vsub; Py_buffer vsub;
if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit)) if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
@ -2293,75 +2081,20 @@ bytearray_split(PyByteArrayObject *self, PyObject *args)
maxsplit = PY_SSIZE_T_MAX; maxsplit = PY_SSIZE_T_MAX;
if (subobj == Py_None) if (subobj == Py_None)
return split_whitespace(s, len, maxsplit); return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
if (_getbuffer(subobj, &vsub) < 0) if (_getbuffer(subobj, &vsub) < 0)
return NULL; return NULL;
sub = vsub.buf; sub = vsub.buf;
n = vsub.len; n = vsub.len;
if (n == 0) { list = stringlib_split(
PyErr_SetString(PyExc_ValueError, "empty separator"); (PyObject*) self, s, len, sub, n, maxsplit
PyBuffer_Release(&vsub); );
return NULL;
}
if (n == 1) {
list = split_char(s, len, sub[0], maxsplit);
PyBuffer_Release(&vsub); PyBuffer_Release(&vsub);
return list; return list;
} }
list = PyList_New(PREALLOC_SIZE(maxsplit));
if (list == NULL) {
PyBuffer_Release(&vsub);
return NULL;
}
i = j = 0;
while (maxsplit-- > 0) {
pos = fastsearch(s+i, len-i, sub, n, FAST_SEARCH);
if (pos < 0)
break;
j = i+pos;
SPLIT_ADD(s, i, j);
i = j + n;
}
SPLIT_ADD(s, i, len);
FIX_PREALLOC_SIZE(list);
PyBuffer_Release(&vsub);
return list;
onError:
Py_DECREF(list);
PyBuffer_Release(&vsub);
return NULL;
}
/* stringlib's partition shares nullbytes in some cases.
undo this, we don't want the nullbytes to be shared. */
static PyObject *
make_nullbytes_unique(PyObject *result)
{
if (result != NULL) {
int i;
assert(PyTuple_Check(result));
assert(PyTuple_GET_SIZE(result) == 3);
for (i = 0; i < 3; i++) {
if (PyTuple_GET_ITEM(result, i) == (PyObject *)nullbytes) {
PyObject *new = PyByteArray_FromStringAndSize(NULL, 0);
if (new == NULL) {
Py_DECREF(result);
result = NULL;
break;
}
Py_DECREF(nullbytes);
PyTuple_SET_ITEM(result, i, new);
}
}
}
return result;
}
PyDoc_STRVAR(partition__doc__, PyDoc_STRVAR(partition__doc__,
"B.partition(sep) -> (head, sep, tail)\n\ "B.partition(sep) -> (head, sep, tail)\n\
\n\ \n\
@ -2386,7 +2119,7 @@ bytearray_partition(PyByteArrayObject *self, PyObject *sep_obj)
); );
Py_DECREF(bytesep); Py_DECREF(bytesep);
return make_nullbytes_unique(result); return result;
} }
PyDoc_STRVAR(rpartition__doc__, PyDoc_STRVAR(rpartition__doc__,
@ -2414,81 +2147,7 @@ bytearray_rpartition(PyByteArrayObject *self, PyObject *sep_obj)
); );
Py_DECREF(bytesep); Py_DECREF(bytesep);
return make_nullbytes_unique(result); return result;
}
Py_LOCAL_INLINE(PyObject *)
rsplit_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
{
register Py_ssize_t i, j, count=0;
PyObject *str;
PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
if (list == NULL)
return NULL;
i = j = len - 1;
while ((i >= 0) && (maxcount-- > 0)) {
for (; i >= 0; i--) {
if (s[i] == ch) {
SPLIT_ADD(s, i + 1, j + 1);
j = i = i - 1;
break;
}
}
}
if (j >= -1) {
SPLIT_ADD(s, 0, j + 1);
}
FIX_PREALLOC_SIZE(list);
if (PyList_Reverse(list) < 0)
goto onError;
return list;
onError:
Py_DECREF(list);
return NULL;
}
Py_LOCAL_INLINE(PyObject *)
rsplit_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxcount)
{
register Py_ssize_t i, j, count = 0;
PyObject *str;
PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
if (list == NULL)
return NULL;
for (i = j = len - 1; i >= 0; ) {
/* find a token */
while (i >= 0 && Py_ISSPACE(s[i]))
i--;
j = i;
while (i >= 0 && !Py_ISSPACE(s[i]))
i--;
if (j > i) {
if (maxcount-- <= 0)
break;
SPLIT_ADD(s, i + 1, j + 1);
while (i >= 0 && Py_ISSPACE(s[i]))
i--;
j = i;
}
}
if (j >= 0) {
SPLIT_ADD(s, 0, j + 1);
}
FIX_PREALLOC_SIZE(list);
if (PyList_Reverse(list) < 0)
goto onError;
return list;
onError:
Py_DECREF(list);
return NULL;
} }
PyDoc_STRVAR(rsplit__doc__, PyDoc_STRVAR(rsplit__doc__,
@ -2503,10 +2162,10 @@ If maxsplit is given, at most maxsplit splits are done.");
static PyObject * static PyObject *
bytearray_rsplit(PyByteArrayObject *self, PyObject *args) bytearray_rsplit(PyByteArrayObject *self, PyObject *args)
{ {
Py_ssize_t len = PyByteArray_GET_SIZE(self), n, j, pos; Py_ssize_t len = PyByteArray_GET_SIZE(self), n;
Py_ssize_t maxsplit = -1, count = 0; Py_ssize_t maxsplit = -1;
const char *s = PyByteArray_AS_STRING(self), *sub; const char *s = PyByteArray_AS_STRING(self), *sub;
PyObject *list, *str, *subobj = Py_None; PyObject *list, *subobj = Py_None;
Py_buffer vsub; Py_buffer vsub;
if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit)) if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
@ -2515,52 +2174,20 @@ bytearray_rsplit(PyByteArrayObject *self, PyObject *args)
maxsplit = PY_SSIZE_T_MAX; maxsplit = PY_SSIZE_T_MAX;
if (subobj == Py_None) if (subobj == Py_None)
return rsplit_whitespace(s, len, maxsplit); return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
if (_getbuffer(subobj, &vsub) < 0) if (_getbuffer(subobj, &vsub) < 0)
return NULL; return NULL;
sub = vsub.buf; sub = vsub.buf;
n = vsub.len; n = vsub.len;
if (n == 0) { list = stringlib_rsplit(
PyErr_SetString(PyExc_ValueError, "empty separator"); (PyObject*) self, s, len, sub, n, maxsplit
PyBuffer_Release(&vsub); );
return NULL;
}
else if (n == 1) {
list = rsplit_char(s, len, sub[0], maxsplit);
PyBuffer_Release(&vsub); PyBuffer_Release(&vsub);
return list; return list;
} }
list = PyList_New(PREALLOC_SIZE(maxsplit));
if (list == NULL) {
PyBuffer_Release(&vsub);
return NULL;
}
j = len;
while (maxsplit-- > 0) {
pos = fastsearch(s, j, sub, n, FAST_RSEARCH);
if (pos < 0)
break;
SPLIT_ADD(s, pos + n, j);
j = pos;
}
SPLIT_ADD(s, 0, j);
FIX_PREALLOC_SIZE(list);
if (PyList_Reverse(list) < 0)
goto onError;
PyBuffer_Release(&vsub);
return list;
onError:
Py_DECREF(list);
PyBuffer_Release(&vsub);
return NULL;
}
PyDoc_STRVAR(reverse__doc__, PyDoc_STRVAR(reverse__doc__,
"B.reverse() -> None\n\ "B.reverse() -> None\n\
\n\ \n\
@ -3026,6 +2653,27 @@ bytearray_join(PyByteArrayObject *self, PyObject *it)
return NULL; return NULL;
} }
PyDoc_STRVAR(splitlines__doc__,
"B.splitlines([keepends]) -> list of lines\n\
\n\
Return a list of the lines in B, breaking at line boundaries.\n\
Line breaks are not included in the resulting list unless keepends\n\
is given and true.");
static PyObject*
bytearray_splitlines(PyObject *self, PyObject *args)
{
int keepends = 0;
if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
return NULL;
return stringlib_splitlines(
(PyObject*) self, PyByteArray_AS_STRING(self),
PyByteArray_GET_SIZE(self), keepends
);
}
PyDoc_STRVAR(fromhex_doc, PyDoc_STRVAR(fromhex_doc,
"bytearray.fromhex(string) -> bytearray\n\ "bytearray.fromhex(string) -> bytearray\n\
\n\ \n\
@ -3209,7 +2857,7 @@ bytearray_methods[] = {
{"rsplit", (PyCFunction)bytearray_rsplit, METH_VARARGS, rsplit__doc__}, {"rsplit", (PyCFunction)bytearray_rsplit, METH_VARARGS, rsplit__doc__},
{"rstrip", (PyCFunction)bytearray_rstrip, METH_VARARGS, rstrip__doc__}, {"rstrip", (PyCFunction)bytearray_rstrip, METH_VARARGS, rstrip__doc__},
{"split", (PyCFunction)bytearray_split, METH_VARARGS, split__doc__}, {"split", (PyCFunction)bytearray_split, METH_VARARGS, split__doc__},
{"splitlines", (PyCFunction)stringlib_splitlines, METH_VARARGS, {"splitlines", (PyCFunction)bytearray_splitlines, METH_VARARGS,
splitlines__doc__}, splitlines__doc__},
{"startswith", (PyCFunction)bytearray_startswith, METH_VARARGS , {"startswith", (PyCFunction)bytearray_startswith, METH_VARARGS ,
startswith__doc__}, startswith__doc__},

View file

@ -28,3 +28,12 @@ STRINGLIB_CHAR* STRINGLIB_STR(PyObject*)
returns the pointer to the character data for the given string returns the pointer to the character data for the given string
object (which must be of the right type) object (which must be of the right type)
int STRINGLIB_CHECK_EXACT(PyObject *)
returns true if the object is an instance of our type, not a subclass.
STRINGLIB_MUTABLE
Must be 0 or 1 to tell the cpp macros in stringlib code if the object
being operated on is mutable or not.

View file

@ -9,28 +9,22 @@
Py_LOCAL_INLINE(Py_ssize_t) Py_LOCAL_INLINE(Py_ssize_t)
stringlib_count(const STRINGLIB_CHAR* str, Py_ssize_t str_len, stringlib_count(const STRINGLIB_CHAR* str, Py_ssize_t str_len,
const STRINGLIB_CHAR* sub, Py_ssize_t sub_len) const STRINGLIB_CHAR* sub, Py_ssize_t sub_len,
Py_ssize_t maxcount)
{ {
Py_ssize_t count; Py_ssize_t count;
if (str_len < 0) if (str_len < 0)
return 0; /* start > len(str) */ return 0; /* start > len(str) */
if (sub_len == 0) if (sub_len == 0)
return str_len + 1; return (str_len < maxcount) ? str_len + 1 : maxcount;
count = fastsearch(str, str_len, sub, sub_len, FAST_COUNT); count = fastsearch(str, str_len, sub, sub_len, maxcount, FAST_COUNT);
if (count < 0) if (count < 0)
count = 0; /* no match */ return 0; /* no match */
return count; return count;
} }
#endif #endif
/*
Local variables:
c-basic-offset: 4
indent-tabs-mode: nil
End:
*/

View file

@ -107,4 +107,3 @@ stringlib_swapcase(PyObject *self)
STRINGLIB_LEN(self)); STRINGLIB_LEN(self));
return newobj; return newobj;
} }

View file

@ -18,10 +18,13 @@
#define FAST_SEARCH 1 #define FAST_SEARCH 1
#define FAST_RSEARCH 2 #define FAST_RSEARCH 2
#define BLOOM_ADD(mask, ch) ((mask |= (1 << ((ch) & (LONG_BIT - 1)))))
#define BLOOM(mask, ch) ((mask & (1 << ((ch) & (LONG_BIT - 1)))))
Py_LOCAL_INLINE(Py_ssize_t) Py_LOCAL_INLINE(Py_ssize_t)
fastsearch(const STRINGLIB_CHAR* s, Py_ssize_t n, fastsearch(const STRINGLIB_CHAR* s, Py_ssize_t n,
const STRINGLIB_CHAR* p, Py_ssize_t m, const STRINGLIB_CHAR* p, Py_ssize_t m,
int mode) Py_ssize_t maxcount, int mode)
{ {
long mask; long mask;
Py_ssize_t skip, count = 0; Py_ssize_t skip, count = 0;
@ -29,7 +32,7 @@ fastsearch(const STRINGLIB_CHAR* s, Py_ssize_t n,
w = n - m; w = n - m;
if (w < 0) if (w < 0 || (mode == FAST_COUNT && maxcount == 0))
return -1; return -1;
/* look for special cases */ /* look for special cases */
@ -39,8 +42,11 @@ fastsearch(const STRINGLIB_CHAR* s, Py_ssize_t n,
/* use special case for 1-character strings */ /* use special case for 1-character strings */
if (mode == FAST_COUNT) { if (mode == FAST_COUNT) {
for (i = 0; i < n; i++) for (i = 0; i < n; i++)
if (s[i] == p[0]) if (s[i] == p[0]) {
count++; count++;
if (count == maxcount)
return maxcount;
}
return count; return count;
} else if (mode == FAST_SEARCH) { } else if (mode == FAST_SEARCH) {
for (i = 0; i < n; i++) for (i = 0; i < n; i++)
@ -56,19 +62,20 @@ fastsearch(const STRINGLIB_CHAR* s, Py_ssize_t n,
mlast = m - 1; mlast = m - 1;
skip = mlast - 1; skip = mlast - 1;
mask = 0;
if (mode != FAST_RSEARCH) { if (mode != FAST_RSEARCH) {
/* create compressed boyer-moore delta 1 table */ /* create compressed boyer-moore delta 1 table */
/* process pattern[:-1] */ /* process pattern[:-1] */
for (mask = i = 0; i < mlast; i++) { for (i = 0; i < mlast; i++) {
mask |= (1 << (p[i] & 0x1F)); BLOOM_ADD(mask, p[i]);
if (p[i] == p[mlast]) if (p[i] == p[mlast])
skip = mlast - i - 1; skip = mlast - i - 1;
} }
/* process pattern[-1] outside the loop */ /* process pattern[-1] outside the loop */
mask |= (1 << (p[mlast] & 0x1F)); BLOOM_ADD(mask, p[mlast]);
for (i = 0; i <= w; i++) { for (i = 0; i <= w; i++) {
/* note: using mlast in the skip path slows things down on x86 */ /* note: using mlast in the skip path slows things down on x86 */
@ -82,17 +89,19 @@ fastsearch(const STRINGLIB_CHAR* s, Py_ssize_t n,
if (mode != FAST_COUNT) if (mode != FAST_COUNT)
return i; return i;
count++; count++;
if (count == maxcount)
return maxcount;
i = i + mlast; i = i + mlast;
continue; continue;
} }
/* miss: check if next character is part of pattern */ /* miss: check if next character is part of pattern */
if (!(mask & (1 << (s[i+m] & 0x1F)))) if (!BLOOM(mask, s[i+m]))
i = i + m; i = i + m;
else else
i = i + skip; i = i + skip;
} else { } else {
/* skip: check if next character is part of pattern */ /* skip: check if next character is part of pattern */
if (!(mask & (1 << (s[i+m] & 0x1F)))) if (!BLOOM(mask, s[i+m]))
i = i + m; i = i + m;
} }
} }
@ -101,10 +110,10 @@ fastsearch(const STRINGLIB_CHAR* s, Py_ssize_t n,
/* create compressed boyer-moore delta 1 table */ /* create compressed boyer-moore delta 1 table */
/* process pattern[0] outside the loop */ /* process pattern[0] outside the loop */
mask = (1 << (p[0] & 0x1F)); BLOOM_ADD(mask, p[0]);
/* process pattern[:0:-1] */ /* process pattern[:0:-1] */
for (i = mlast; i > 0; i--) { for (i = mlast; i > 0; i--) {
mask |= (1 << (p[i] & 0x1F)); BLOOM_ADD(mask, p[i]);
if (p[i] == p[0]) if (p[i] == p[0])
skip = i - 1; skip = i - 1;
} }
@ -119,13 +128,13 @@ fastsearch(const STRINGLIB_CHAR* s, Py_ssize_t n,
/* got a match! */ /* got a match! */
return i; return i;
/* miss: check if previous character is part of pattern */ /* miss: check if previous character is part of pattern */
if (!(mask & (1 << (s[i-1] & 0x1F)))) if (!BLOOM(mask, s[i-1]))
i = i - m; i = i - m;
else else
i = i - skip; i = i - skip;
} else { } else {
/* skip: check if previous character is part of pattern */ /* skip: check if previous character is part of pattern */
if (!(mask & (1 << (s[i-1] & 0x1F)))) if (!BLOOM(mask, s[i-1]))
i = i - m; i = i - m;
} }
} }
@ -137,10 +146,3 @@ fastsearch(const STRINGLIB_CHAR* s, Py_ssize_t n,
} }
#endif #endif
/*
Local variables:
c-basic-offset: 4
indent-tabs-mode: nil
End:
*/

View file

@ -19,7 +19,7 @@ stringlib_find(const STRINGLIB_CHAR* str, Py_ssize_t str_len,
if (sub_len == 0) if (sub_len == 0)
return offset; return offset;
pos = fastsearch(str, str_len, sub, sub_len, FAST_SEARCH); pos = fastsearch(str, str_len, sub, sub_len, -1, FAST_SEARCH);
if (pos >= 0) if (pos >= 0)
pos += offset; pos += offset;
@ -39,7 +39,7 @@ stringlib_rfind(const STRINGLIB_CHAR* str, Py_ssize_t str_len,
if (sub_len == 0) if (sub_len == 0)
return str_len + offset; return str_len + offset;
pos = fastsearch(str, str_len, sub, sub_len, FAST_RSEARCH); pos = fastsearch(str, str_len, sub, sub_len, -1, FAST_RSEARCH);
if (pos >= 0) if (pos >= 0)
pos += offset; pos += offset;
@ -47,22 +47,27 @@ stringlib_rfind(const STRINGLIB_CHAR* str, Py_ssize_t str_len,
return pos; return pos;
} }
/* helper macro to fixup start/end slice values */
#define ADJUST_INDICES(start, end, len) \
if (end > len) \
end = len; \
else if (end < 0) { \
end += len; \
if (end < 0) \
end = 0; \
} \
if (start < 0) { \
start += len; \
if (start < 0) \
start = 0; \
}
Py_LOCAL_INLINE(Py_ssize_t) Py_LOCAL_INLINE(Py_ssize_t)
stringlib_find_slice(const STRINGLIB_CHAR* str, Py_ssize_t str_len, stringlib_find_slice(const STRINGLIB_CHAR* str, Py_ssize_t str_len,
const STRINGLIB_CHAR* sub, Py_ssize_t sub_len, const STRINGLIB_CHAR* sub, Py_ssize_t sub_len,
Py_ssize_t start, Py_ssize_t end) Py_ssize_t start, Py_ssize_t end)
{ {
if (start < 0) ADJUST_INDICES(start, end, str_len);
start += str_len;
if (start < 0)
start = 0;
if (end > str_len)
end = str_len;
if (end < 0)
end += str_len;
if (end < 0)
end = 0;
return stringlib_find(str + start, end - start, sub, sub_len, start); return stringlib_find(str + start, end - start, sub, sub_len, start);
} }
@ -71,21 +76,11 @@ stringlib_rfind_slice(const STRINGLIB_CHAR* str, Py_ssize_t str_len,
const STRINGLIB_CHAR* sub, Py_ssize_t sub_len, const STRINGLIB_CHAR* sub, Py_ssize_t sub_len,
Py_ssize_t start, Py_ssize_t end) Py_ssize_t start, Py_ssize_t end)
{ {
if (start < 0) ADJUST_INDICES(start, end, str_len);
start += str_len;
if (start < 0)
start = 0;
if (end > str_len)
end = str_len;
if (end < 0)
end += str_len;
if (end < 0)
end = 0;
return stringlib_rfind(str + start, end - start, sub, sub_len, start); return stringlib_rfind(str + start, end - start, sub, sub_len, start);
} }
#if defined(STRINGLIB_STR) && !defined(FROM_BYTEARRAY) #ifdef STRINGLIB_WANT_CONTAINS_OBJ
Py_LOCAL_INLINE(int) Py_LOCAL_INLINE(int)
stringlib_contains_obj(PyObject* str, PyObject* sub) stringlib_contains_obj(PyObject* str, PyObject* sub)
@ -96,9 +91,9 @@ stringlib_contains_obj(PyObject* str, PyObject* sub)
) != -1; ) != -1;
} }
#endif /* STRINGLIB_STR */ #endif /* STRINGLIB_WANT_CONTAINS_OBJ */
#ifdef FROM_UNICODE #if STRINGLIB_IS_UNICODE
/* /*
This function is a helper for the "find" family (find, rfind, index, This function is a helper for the "find" family (find, rfind, index,
@ -146,13 +141,6 @@ _ParseTupleFinds (PyObject *args, PyObject **substring,
return 1; return 1;
} }
#endif /* FROM_UNICODE */ #endif /* STRINGLIB_IS_UNICODE */
#endif /* STRINGLIB_FIND_H */ #endif /* STRINGLIB_FIND_H */
/*
Local variables:
c-basic-offset: 4
indent-tabs-mode: nil
End:
*/

View file

@ -8,10 +8,10 @@
#endif #endif
Py_LOCAL_INLINE(PyObject*) Py_LOCAL_INLINE(PyObject*)
stringlib_partition( stringlib_partition(PyObject* str_obj,
PyObject* str_obj, const STRINGLIB_CHAR* str, Py_ssize_t str_len, const STRINGLIB_CHAR* str, Py_ssize_t str_len,
PyObject* sep_obj, const STRINGLIB_CHAR* sep, Py_ssize_t sep_len PyObject* sep_obj,
) const STRINGLIB_CHAR* sep, Py_ssize_t sep_len)
{ {
PyObject* out; PyObject* out;
Py_ssize_t pos; Py_ssize_t pos;
@ -25,15 +25,21 @@ stringlib_partition(
if (!out) if (!out)
return NULL; return NULL;
pos = fastsearch(str, str_len, sep, sep_len, FAST_SEARCH); pos = fastsearch(str, str_len, sep, sep_len, -1, FAST_SEARCH);
if (pos < 0) { if (pos < 0) {
#if STRINGLIB_MUTABLE
PyTuple_SET_ITEM(out, 0, STRINGLIB_NEW(str, str_len));
PyTuple_SET_ITEM(out, 1, STRINGLIB_NEW(NULL, 0));
PyTuple_SET_ITEM(out, 2, STRINGLIB_NEW(NULL, 0));
#else
Py_INCREF(str_obj); Py_INCREF(str_obj);
PyTuple_SET_ITEM(out, 0, (PyObject*) str_obj); PyTuple_SET_ITEM(out, 0, (PyObject*) str_obj);
Py_INCREF(STRINGLIB_EMPTY); Py_INCREF(STRINGLIB_EMPTY);
PyTuple_SET_ITEM(out, 1, (PyObject*) STRINGLIB_EMPTY); PyTuple_SET_ITEM(out, 1, (PyObject*) STRINGLIB_EMPTY);
Py_INCREF(STRINGLIB_EMPTY); Py_INCREF(STRINGLIB_EMPTY);
PyTuple_SET_ITEM(out, 2, (PyObject*) STRINGLIB_EMPTY); PyTuple_SET_ITEM(out, 2, (PyObject*) STRINGLIB_EMPTY);
#endif
return out; return out;
} }
@ -52,10 +58,10 @@ stringlib_partition(
} }
Py_LOCAL_INLINE(PyObject*) Py_LOCAL_INLINE(PyObject*)
stringlib_rpartition( stringlib_rpartition(PyObject* str_obj,
PyObject* str_obj, const STRINGLIB_CHAR* str, Py_ssize_t str_len, const STRINGLIB_CHAR* str, Py_ssize_t str_len,
PyObject* sep_obj, const STRINGLIB_CHAR* sep, Py_ssize_t sep_len PyObject* sep_obj,
) const STRINGLIB_CHAR* sep, Py_ssize_t sep_len)
{ {
PyObject* out; PyObject* out;
Py_ssize_t pos; Py_ssize_t pos;
@ -69,15 +75,21 @@ stringlib_rpartition(
if (!out) if (!out)
return NULL; return NULL;
pos = fastsearch(str, str_len, sep, sep_len, FAST_RSEARCH); pos = fastsearch(str, str_len, sep, sep_len, -1, FAST_RSEARCH);
if (pos < 0) { if (pos < 0) {
#if STRINGLIB_MUTABLE
PyTuple_SET_ITEM(out, 0, STRINGLIB_NEW(NULL, 0));
PyTuple_SET_ITEM(out, 1, STRINGLIB_NEW(NULL, 0));
PyTuple_SET_ITEM(out, 2, STRINGLIB_NEW(str, str_len));
#else
Py_INCREF(STRINGLIB_EMPTY); Py_INCREF(STRINGLIB_EMPTY);
PyTuple_SET_ITEM(out, 0, (PyObject*) STRINGLIB_EMPTY); PyTuple_SET_ITEM(out, 0, (PyObject*) STRINGLIB_EMPTY);
Py_INCREF(STRINGLIB_EMPTY); Py_INCREF(STRINGLIB_EMPTY);
PyTuple_SET_ITEM(out, 1, (PyObject*) STRINGLIB_EMPTY); PyTuple_SET_ITEM(out, 1, (PyObject*) STRINGLIB_EMPTY);
Py_INCREF(str_obj); Py_INCREF(str_obj);
PyTuple_SET_ITEM(out, 2, (PyObject*) str_obj); PyTuple_SET_ITEM(out, 2, (PyObject*) str_obj);
#endif
return out; return out;
} }
@ -96,10 +108,3 @@ stringlib_rpartition(
} }
#endif #endif
/*
Local variables:
c-basic-offset: 4
indent-tabs-mode: nil
End:
*/

394
Objects/stringlib/split.h Normal file
View file

@ -0,0 +1,394 @@
/* stringlib: split implementation */
#ifndef STRINGLIB_SPLIT_H
#define STRINGLIB_SPLIT_H
#ifndef STRINGLIB_FASTSEARCH_H
#error must include "stringlib/fastsearch.h" before including this module
#endif
/* Overallocate the initial list to reduce the number of reallocs for small
split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
resizes, to sizes 4, 8, then 16. Most observed string splits are for human
text (roughly 11 words per line) and field delimited data (usually 1-10
fields). For large strings the split algorithms are bandwidth limited
so increasing the preallocation likely will not improve things.*/
#define MAX_PREALLOC 12
/* 5 splits gives 6 elements */
#define PREALLOC_SIZE(maxsplit) \
(maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
#define SPLIT_APPEND(data, left, right) \
sub = STRINGLIB_NEW((data) + (left), \
(right) - (left)); \
if (sub == NULL) \
goto onError; \
if (PyList_Append(list, sub)) { \
Py_DECREF(sub); \
goto onError; \
} \
else \
Py_DECREF(sub);
#define SPLIT_ADD(data, left, right) { \
sub = STRINGLIB_NEW((data) + (left), \
(right) - (left)); \
if (sub == NULL) \
goto onError; \
if (count < MAX_PREALLOC) { \
PyList_SET_ITEM(list, count, sub); \
} else { \
if (PyList_Append(list, sub)) { \
Py_DECREF(sub); \
goto onError; \
} \
else \
Py_DECREF(sub); \
} \
count++; }
/* Always force the list to the expected size. */
#define FIX_PREALLOC_SIZE(list) Py_SIZE(list) = count
Py_LOCAL_INLINE(PyObject *)
stringlib_split_whitespace(PyObject* str_obj,
const STRINGLIB_CHAR* str, Py_ssize_t str_len,
Py_ssize_t maxcount)
{
Py_ssize_t i, j, count=0;
PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
PyObject *sub;
if (list == NULL)
return NULL;
i = j = 0;
while (maxcount-- > 0) {
while (i < str_len && STRINGLIB_ISSPACE(str[i]))
i++;
if (i == str_len) break;
j = i; i++;
while (i < str_len && !STRINGLIB_ISSPACE(str[i]))
i++;
#ifndef STRINGLIB_MUTABLE
if (j == 0 && i == str_len && STRINGLIB_CHECK_EXACT(str_obj)) {
/* No whitespace in str_obj, so just use it as list[0] */
Py_INCREF(str_obj);
PyList_SET_ITEM(list, 0, (PyObject *)str_obj);
count++;
break;
}
#endif
SPLIT_ADD(str, j, i);
}
if (i < str_len) {
/* Only occurs when maxcount was reached */
/* Skip any remaining whitespace and copy to end of string */
while (i < str_len && STRINGLIB_ISSPACE(str[i]))
i++;
if (i != str_len)
SPLIT_ADD(str, i, str_len);
}
FIX_PREALLOC_SIZE(list);
return list;
onError:
Py_DECREF(list);
return NULL;
}
Py_LOCAL_INLINE(PyObject *)
stringlib_split_char(PyObject* str_obj,
const STRINGLIB_CHAR* str, Py_ssize_t str_len,
const STRINGLIB_CHAR ch,
Py_ssize_t maxcount)
{
Py_ssize_t i, j, count=0;
PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
PyObject *sub;
if (list == NULL)
return NULL;
i = j = 0;
while ((j < str_len) && (maxcount-- > 0)) {
for(; j < str_len; j++) {
/* I found that using memchr makes no difference */
if (str[j] == ch) {
SPLIT_ADD(str, i, j);
i = j = j + 1;
break;
}
}
}
#ifndef STRINGLIB_MUTABLE
if (count == 0 && STRINGLIB_CHECK_EXACT(str_obj)) {
/* ch not in str_obj, so just use str_obj as list[0] */
Py_INCREF(str_obj);
PyList_SET_ITEM(list, 0, (PyObject *)str_obj);
count++;
} else
#endif
if (i <= str_len) {
SPLIT_ADD(str, i, str_len);
}
FIX_PREALLOC_SIZE(list);
return list;
onError:
Py_DECREF(list);
return NULL;
}
Py_LOCAL_INLINE(PyObject *)
stringlib_split(PyObject* str_obj,
const STRINGLIB_CHAR* str, Py_ssize_t str_len,
const STRINGLIB_CHAR* sep, Py_ssize_t sep_len,
Py_ssize_t maxcount)
{
Py_ssize_t i, j, pos, count=0;
PyObject *list, *sub;
if (sep_len == 0) {
PyErr_SetString(PyExc_ValueError, "empty separator");
return NULL;
}
else if (sep_len == 1)
return stringlib_split_char(str_obj, str, str_len, sep[0], maxcount);
list = PyList_New(PREALLOC_SIZE(maxcount));
if (list == NULL)
return NULL;
i = j = 0;
while (maxcount-- > 0) {
pos = fastsearch(str+i, str_len-i, sep, sep_len, -1, FAST_SEARCH);
if (pos < 0)
break;
j = i + pos;
SPLIT_ADD(str, i, j);
i = j + sep_len;
}
#ifndef STRINGLIB_MUTABLE
if (count == 0 && STRINGLIB_CHECK_EXACT(str_obj)) {
/* No match in str_obj, so just use it as list[0] */
Py_INCREF(str_obj);
PyList_SET_ITEM(list, 0, (PyObject *)str_obj);
count++;
} else
#endif
{
SPLIT_ADD(str, i, str_len);
}
FIX_PREALLOC_SIZE(list);
return list;
onError:
Py_DECREF(list);
return NULL;
}
Py_LOCAL_INLINE(PyObject *)
stringlib_rsplit_whitespace(PyObject* str_obj,
const STRINGLIB_CHAR* str, Py_ssize_t str_len,
Py_ssize_t maxcount)
{
Py_ssize_t i, j, count=0;
PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
PyObject *sub;
if (list == NULL)
return NULL;
i = j = str_len - 1;
while (maxcount-- > 0) {
while (i >= 0 && STRINGLIB_ISSPACE(str[i]))
i--;
if (i < 0) break;
j = i; i--;
while (i >= 0 && !STRINGLIB_ISSPACE(str[i]))
i--;
#ifndef STRINGLIB_MUTABLE
if (j == str_len - 1 && i < 0 && STRINGLIB_CHECK_EXACT(str_obj)) {
/* No whitespace in str_obj, so just use it as list[0] */
Py_INCREF(str_obj);
PyList_SET_ITEM(list, 0, (PyObject *)str_obj);
count++;
break;
}
#endif
SPLIT_ADD(str, i + 1, j + 1);
}
if (i >= 0) {
/* Only occurs when maxcount was reached */
/* Skip any remaining whitespace and copy to beginning of string */
while (i >= 0 && STRINGLIB_ISSPACE(str[i]))
i--;
if (i >= 0)
SPLIT_ADD(str, 0, i + 1);
}
FIX_PREALLOC_SIZE(list);
if (PyList_Reverse(list) < 0)
goto onError;
return list;
onError:
Py_DECREF(list);
return NULL;
}
Py_LOCAL_INLINE(PyObject *)
stringlib_rsplit_char(PyObject* str_obj,
const STRINGLIB_CHAR* str, Py_ssize_t str_len,
const STRINGLIB_CHAR ch,
Py_ssize_t maxcount)
{
Py_ssize_t i, j, count=0;
PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
PyObject *sub;
if (list == NULL)
return NULL;
i = j = str_len - 1;
while ((i >= 0) && (maxcount-- > 0)) {
for(; i >= 0; i--) {
if (str[i] == ch) {
SPLIT_ADD(str, i + 1, j + 1);
j = i = i - 1;
break;
}
}
}
#ifndef STRINGLIB_MUTABLE
if (count == 0 && STRINGLIB_CHECK_EXACT(str_obj)) {
/* ch not in str_obj, so just use str_obj as list[0] */
Py_INCREF(str_obj);
PyList_SET_ITEM(list, 0, (PyObject *)str_obj);
count++;
} else
#endif
if (j >= -1) {
SPLIT_ADD(str, 0, j + 1);
}
FIX_PREALLOC_SIZE(list);
if (PyList_Reverse(list) < 0)
goto onError;
return list;
onError:
Py_DECREF(list);
return NULL;
}
Py_LOCAL_INLINE(PyObject *)
stringlib_rsplit(PyObject* str_obj,
const STRINGLIB_CHAR* str, Py_ssize_t str_len,
const STRINGLIB_CHAR* sep, Py_ssize_t sep_len,
Py_ssize_t maxcount)
{
Py_ssize_t j, pos, count=0;
PyObject *list, *sub;
if (sep_len == 0) {
PyErr_SetString(PyExc_ValueError, "empty separator");
return NULL;
}
else if (sep_len == 1)
return stringlib_rsplit_char(str_obj, str, str_len, sep[0], maxcount);
list = PyList_New(PREALLOC_SIZE(maxcount));
if (list == NULL)
return NULL;
j = str_len;
while (maxcount-- > 0) {
pos = fastsearch(str, j, sep, sep_len, -1, FAST_RSEARCH);
if (pos < 0)
break;
SPLIT_ADD(str, pos + sep_len, j);
j = pos;
}
#ifndef STRINGLIB_MUTABLE
if (count == 0 && STRINGLIB_CHECK_EXACT(str_obj)) {
/* No match in str_obj, so just use it as list[0] */
Py_INCREF(str_obj);
PyList_SET_ITEM(list, 0, (PyObject *)str_obj);
count++;
} else
#endif
{
SPLIT_ADD(str, 0, j);
}
FIX_PREALLOC_SIZE(list);
if (PyList_Reverse(list) < 0)
goto onError;
return list;
onError:
Py_DECREF(list);
return NULL;
}
Py_LOCAL_INLINE(PyObject *)
stringlib_splitlines(PyObject* str_obj,
const STRINGLIB_CHAR* str, Py_ssize_t str_len,
int keepends)
{
/* This does not use the preallocated list because splitlines is
usually run with hundreds of newlines. The overhead of
switching between PyList_SET_ITEM and append causes about a
2-3% slowdown for that common case. A smarter implementation
could move the if check out, so the SET_ITEMs are done first
and the appends only done when the prealloc buffer is full.
That's too much work for little gain.*/
register Py_ssize_t i;
register Py_ssize_t j;
PyObject *list = PyList_New(0);
PyObject *sub;
if (list == NULL)
return NULL;
for (i = j = 0; i < str_len; ) {
Py_ssize_t eol;
/* Find a line and append it */
while (i < str_len && !STRINGLIB_ISLINEBREAK(str[i]))
i++;
/* Skip the line break reading CRLF as one line break */
eol = i;
if (i < str_len) {
if (str[i] == '\r' && i + 1 < str_len && str[i+1] == '\n')
i += 2;
else
i++;
if (keepends)
eol = i;
}
#ifndef STRINGLIB_MUTABLE
if (j == 0 && eol == str_len && STRINGLIB_CHECK_EXACT(str_obj)) {
/* No linebreak in str_obj, so just use it as list[0] */
if (PyList_Append(list, str_obj))
goto onError;
break;
}
#endif
SPLIT_APPEND(str, j, eol);
j = i;
}
return list;
onError:
Py_DECREF(list);
return NULL;
}
#endif

View file

@ -11,6 +11,8 @@
#define STRINGLIB_TYPE_NAME "string" #define STRINGLIB_TYPE_NAME "string"
#define STRINGLIB_PARSE_CODE "S" #define STRINGLIB_PARSE_CODE "S"
#define STRINGLIB_EMPTY nullstring #define STRINGLIB_EMPTY nullstring
#define STRINGLIB_ISSPACE Py_ISSPACE
#define STRINGLIB_ISLINEBREAK(x) ((x == '\n') || (x == '\r'))
#define STRINGLIB_ISDECIMAL(x) ((x >= '0') && (x <= '9')) #define STRINGLIB_ISDECIMAL(x) ((x >= '0') && (x <= '9'))
#define STRINGLIB_TODECIMAL(x) (STRINGLIB_ISDECIMAL(x) ? (x - '0') : -1) #define STRINGLIB_TODECIMAL(x) (STRINGLIB_ISDECIMAL(x) ? (x - '0') : -1)
#define STRINGLIB_TOUPPER Py_TOUPPER #define STRINGLIB_TOUPPER Py_TOUPPER
@ -21,8 +23,11 @@
#define STRINGLIB_NEW PyString_FromStringAndSize #define STRINGLIB_NEW PyString_FromStringAndSize
#define STRINGLIB_RESIZE _PyString_Resize #define STRINGLIB_RESIZE _PyString_Resize
#define STRINGLIB_CHECK PyString_Check #define STRINGLIB_CHECK PyString_Check
#define STRINGLIB_CHECK_EXACT PyString_CheckExact
#define STRINGLIB_TOSTR PyObject_Str #define STRINGLIB_TOSTR PyObject_Str
#define STRINGLIB_GROUPING _PyString_InsertThousandsGrouping #define STRINGLIB_GROUPING _PyString_InsertThousandsGrouping
#define STRINGLIB_GROUPING_LOCALE _PyString_InsertThousandsGroupingLocale #define STRINGLIB_GROUPING_LOCALE _PyString_InsertThousandsGroupingLocale
#define STRINGLIB_WANT_CONTAINS_OBJ 1
#endif /* !STRINGLIB_STRINGDEFS_H */ #endif /* !STRINGLIB_STRINGDEFS_H */

View file

@ -1,13 +1,6 @@
/* NOTE: this API is -ONLY- for use with single byte character strings. */ /* NOTE: this API is -ONLY- for use with single byte character strings. */
/* Do not use it with Unicode. */ /* Do not use it with Unicode. */
#include "bytes_methods.h"
#ifndef STRINGLIB_MUTABLE
#warning "STRINGLIB_MUTABLE not defined before #include, assuming 0"
#define STRINGLIB_MUTABLE 0
#endif
/* the more complicated methods. parts of these should be pulled out into the /* the more complicated methods. parts of these should be pulled out into the
shared code in bytes_methods.c to cut down on duplicate code bloat. */ shared code in bytes_methods.c to cut down on duplicate code bloat. */
@ -269,87 +262,3 @@ stringlib_zfill(PyObject *self, PyObject *args)
return (PyObject*) s; return (PyObject*) s;
} }
#define _STRINGLIB_SPLIT_APPEND(data, left, right) \
str = STRINGLIB_NEW((data) + (left), \
(right) - (left)); \
if (str == NULL) \
goto onError; \
if (PyList_Append(list, str)) { \
Py_DECREF(str); \
goto onError; \
} \
else \
Py_DECREF(str);
PyDoc_STRVAR(splitlines__doc__,
"B.splitlines([keepends]) -> list of lines\n\
\n\
Return a list of the lines in B, breaking at line boundaries.\n\
Line breaks are not included in the resulting list unless keepends\n\
is given and true.");
static PyObject*
stringlib_splitlines(PyObject *self, PyObject *args)
{
register Py_ssize_t i;
register Py_ssize_t j;
Py_ssize_t len;
int keepends = 0;
PyObject *list;
PyObject *str;
char *data;
if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
return NULL;
data = STRINGLIB_STR(self);
len = STRINGLIB_LEN(self);
/* This does not use the preallocated list because splitlines is
usually run with hundreds of newlines. The overhead of
switching between PyList_SET_ITEM and append causes about a
2-3% slowdown for that common case. A smarter implementation
could move the if check out, so the SET_ITEMs are done first
and the appends only done when the prealloc buffer is full.
That's too much work for little gain.*/
list = PyList_New(0);
if (!list)
goto onError;
for (i = j = 0; i < len; ) {
Py_ssize_t eol;
/* Find a line and append it */
while (i < len && data[i] != '\n' && data[i] != '\r')
i++;
/* Skip the line break reading CRLF as one line break */
eol = i;
if (i < len) {
if (data[i] == '\r' && i + 1 < len &&
data[i+1] == '\n')
i += 2;
else
i++;
if (keepends)
eol = i;
}
_STRINGLIB_SPLIT_APPEND(data, j, eol);
j = i;
}
if (j < len) {
_STRINGLIB_SPLIT_APPEND(data, j, len);
}
return list;
onError:
Py_XDECREF(list);
return NULL;
}
#undef _STRINGLIB_SPLIT_APPEND

View file

@ -11,6 +11,8 @@
#define STRINGLIB_TYPE_NAME "unicode" #define STRINGLIB_TYPE_NAME "unicode"
#define STRINGLIB_PARSE_CODE "U" #define STRINGLIB_PARSE_CODE "U"
#define STRINGLIB_EMPTY unicode_empty #define STRINGLIB_EMPTY unicode_empty
#define STRINGLIB_ISSPACE Py_UNICODE_ISSPACE
#define STRINGLIB_ISLINEBREAK BLOOM_LINEBREAK
#define STRINGLIB_ISDECIMAL Py_UNICODE_ISDECIMAL #define STRINGLIB_ISDECIMAL Py_UNICODE_ISDECIMAL
#define STRINGLIB_TODECIMAL Py_UNICODE_TODECIMAL #define STRINGLIB_TODECIMAL Py_UNICODE_TODECIMAL
#define STRINGLIB_TOUPPER Py_UNICODE_TOUPPER #define STRINGLIB_TOUPPER Py_UNICODE_TOUPPER
@ -21,6 +23,7 @@
#define STRINGLIB_NEW PyUnicode_FromUnicode #define STRINGLIB_NEW PyUnicode_FromUnicode
#define STRINGLIB_RESIZE PyUnicode_Resize #define STRINGLIB_RESIZE PyUnicode_Resize
#define STRINGLIB_CHECK PyUnicode_Check #define STRINGLIB_CHECK PyUnicode_Check
#define STRINGLIB_CHECK_EXACT PyUnicode_CheckExact
#define STRINGLIB_GROUPING _PyUnicode_InsertThousandsGrouping #define STRINGLIB_GROUPING _PyUnicode_InsertThousandsGrouping
#if PY_VERSION_HEX < 0x03000000 #if PY_VERSION_HEX < 0x03000000

View file

@ -841,6 +841,7 @@ PyString_AsStringAndSize(register PyObject *obj,
#include "stringlib/count.h" #include "stringlib/count.h"
#include "stringlib/find.h" #include "stringlib/find.h"
#include "stringlib/partition.h" #include "stringlib/partition.h"
#include "stringlib/split.h"
#define _Py_InsertThousandsGrouping _PyString_InsertThousandsGrouping #define _Py_InsertThousandsGrouping _PyString_InsertThousandsGrouping
#include "stringlib/localeutil.h" #include "stringlib/localeutil.h"
@ -1425,145 +1426,6 @@ static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
#define STRIPNAME(i) (stripformat[i]+3) #define STRIPNAME(i) (stripformat[i]+3)
/* Don't call if length < 2 */
#define Py_STRING_MATCH(target, offset, pattern, length) \
(target[offset] == pattern[0] && \
target[offset+length-1] == pattern[length-1] && \
!memcmp(target+offset+1, pattern+1, length-2) )
/* Overallocate the initial list to reduce the number of reallocs for small
split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
resizes, to sizes 4, 8, then 16. Most observed string splits are for human
text (roughly 11 words per line) and field delimited data (usually 1-10
fields). For large strings the split algorithms are bandwidth limited
so increasing the preallocation likely will not improve things.*/
#define MAX_PREALLOC 12
/* 5 splits gives 6 elements */
#define PREALLOC_SIZE(maxsplit) \
(maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
#define SPLIT_APPEND(data, left, right) \
str = PyString_FromStringAndSize((data) + (left), \
(right) - (left)); \
if (str == NULL) \
goto onError; \
if (PyList_Append(list, str)) { \
Py_DECREF(str); \
goto onError; \
} \
else \
Py_DECREF(str);
#define SPLIT_ADD(data, left, right) { \
str = PyString_FromStringAndSize((data) + (left), \
(right) - (left)); \
if (str == NULL) \
goto onError; \
if (count < MAX_PREALLOC) { \
PyList_SET_ITEM(list, count, str); \
} else { \
if (PyList_Append(list, str)) { \
Py_DECREF(str); \
goto onError; \
} \
else \
Py_DECREF(str); \
} \
count++; }
/* Always force the list to the expected size. */
#define FIX_PREALLOC_SIZE(list) Py_SIZE(list) = count
#define SKIP_SPACE(s, i, len) { while (i<len && isspace(Py_CHARMASK(s[i]))) i++; }
#define SKIP_NONSPACE(s, i, len) { while (i<len && !isspace(Py_CHARMASK(s[i]))) i++; }
#define RSKIP_SPACE(s, i) { while (i>=0 && isspace(Py_CHARMASK(s[i]))) i--; }
#define RSKIP_NONSPACE(s, i) { while (i>=0 && !isspace(Py_CHARMASK(s[i]))) i--; }
Py_LOCAL_INLINE(PyObject *)
split_whitespace(PyStringObject *self, Py_ssize_t len, Py_ssize_t maxsplit)
{
const char *s = PyString_AS_STRING(self);
Py_ssize_t i, j, count=0;
PyObject *str;
PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
if (list == NULL)
return NULL;
i = j = 0;
while (maxsplit-- > 0) {
SKIP_SPACE(s, i, len);
if (i==len) break;
j = i; i++;
SKIP_NONSPACE(s, i, len);
if (j == 0 && i == len && PyString_CheckExact(self)) {
/* No whitespace in self, so just use it as list[0] */
Py_INCREF(self);
PyList_SET_ITEM(list, 0, (PyObject *)self);
count++;
break;
}
SPLIT_ADD(s, j, i);
}
if (i < len) {
/* Only occurs when maxsplit was reached */
/* Skip any remaining whitespace and copy to end of string */
SKIP_SPACE(s, i, len);
if (i != len)
SPLIT_ADD(s, i, len);
}
FIX_PREALLOC_SIZE(list);
return list;
onError:
Py_DECREF(list);
return NULL;
}
Py_LOCAL_INLINE(PyObject *)
split_char(PyStringObject *self, Py_ssize_t len, char ch, Py_ssize_t maxcount)
{
const char *s = PyString_AS_STRING(self);
register Py_ssize_t i, j, count=0;
PyObject *str;
PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
if (list == NULL)
return NULL;
i = j = 0;
while ((j < len) && (maxcount-- > 0)) {
for(; j<len; j++) {
/* I found that using memchr makes no difference */
if (s[j] == ch) {
SPLIT_ADD(s, i, j);
i = j = j + 1;
break;
}
}
}
if (i == 0 && count == 0 && PyString_CheckExact(self)) {
/* ch not in self, so just use self as list[0] */
Py_INCREF(self);
PyList_SET_ITEM(list, 0, (PyObject *)self);
count++;
}
else if (i <= len) {
SPLIT_ADD(s, i, len);
}
FIX_PREALLOC_SIZE(list);
return list;
onError:
Py_DECREF(list);
return NULL;
}
PyDoc_STRVAR(split__doc__, PyDoc_STRVAR(split__doc__,
"S.split([sep [,maxsplit]]) -> list of strings\n\ "S.split([sep [,maxsplit]]) -> list of strings\n\
\n\ \n\
@ -1576,17 +1438,17 @@ from the result.");
static PyObject * static PyObject *
string_split(PyStringObject *self, PyObject *args) string_split(PyStringObject *self, PyObject *args)
{ {
Py_ssize_t len = PyString_GET_SIZE(self), n, i, j, pos; Py_ssize_t len = PyString_GET_SIZE(self), n;
Py_ssize_t maxsplit = -1, count=0; Py_ssize_t maxsplit = -1;
const char *s = PyString_AS_STRING(self), *sub; const char *s = PyString_AS_STRING(self), *sub;
PyObject *list, *str, *subobj = Py_None; PyObject *subobj = Py_None;
if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit)) if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
return NULL; return NULL;
if (maxsplit < 0) if (maxsplit < 0)
maxsplit = PY_SSIZE_T_MAX; maxsplit = PY_SSIZE_T_MAX;
if (subobj == Py_None) if (subobj == Py_None)
return split_whitespace(self, len, maxsplit); return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
if (PyString_Check(subobj)) { if (PyString_Check(subobj)) {
sub = PyString_AS_STRING(subobj); sub = PyString_AS_STRING(subobj);
n = PyString_GET_SIZE(subobj); n = PyString_GET_SIZE(subobj);
@ -1598,33 +1460,7 @@ string_split(PyStringObject *self, PyObject *args)
else if (PyObject_AsCharBuffer(subobj, &sub, &n)) else if (PyObject_AsCharBuffer(subobj, &sub, &n))
return NULL; return NULL;
if (n == 0) { return stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);
PyErr_SetString(PyExc_ValueError, "empty separator");
return NULL;
}
else if (n == 1)
return split_char(self, len, sub[0], maxsplit);
list = PyList_New(PREALLOC_SIZE(maxsplit));
if (list == NULL)
return NULL;
i = j = 0;
while (maxsplit-- > 0) {
pos = fastsearch(s+i, len-i, sub, n, FAST_SEARCH);
if (pos < 0)
break;
j = i + pos;
SPLIT_ADD(s, i, j);
i = j + n;
}
SPLIT_ADD(s, i, len);
FIX_PREALLOC_SIZE(list);
return list;
onError:
Py_DECREF(list);
return NULL;
} }
PyDoc_STRVAR(partition__doc__, PyDoc_STRVAR(partition__doc__,
@ -1689,90 +1525,6 @@ string_rpartition(PyStringObject *self, PyObject *sep_obj)
); );
} }
Py_LOCAL_INLINE(PyObject *)
rsplit_whitespace(PyStringObject *self, Py_ssize_t len, Py_ssize_t maxsplit)
{
const char *s = PyString_AS_STRING(self);
Py_ssize_t i, j, count=0;
PyObject *str;
PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
if (list == NULL)
return NULL;
i = j = len-1;
while (maxsplit-- > 0) {
RSKIP_SPACE(s, i);
if (i<0) break;
j = i; i--;
RSKIP_NONSPACE(s, i);
if (j == len-1 && i < 0 && PyString_CheckExact(self)) {
/* No whitespace in self, so just use it as list[0] */
Py_INCREF(self);
PyList_SET_ITEM(list, 0, (PyObject *)self);
count++;
break;
}
SPLIT_ADD(s, i + 1, j + 1);
}
if (i >= 0) {
/* Only occurs when maxsplit was reached */
/* Skip any remaining whitespace and copy to beginning of string */
RSKIP_SPACE(s, i);
if (i >= 0)
SPLIT_ADD(s, 0, i + 1);
}
FIX_PREALLOC_SIZE(list);
if (PyList_Reverse(list) < 0)
goto onError;
return list;
onError:
Py_DECREF(list);
return NULL;
}
Py_LOCAL_INLINE(PyObject *)
rsplit_char(PyStringObject *self, Py_ssize_t len, char ch, Py_ssize_t maxcount)
{
const char *s = PyString_AS_STRING(self);
register Py_ssize_t i, j, count=0;
PyObject *str;
PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
if (list == NULL)
return NULL;
i = j = len - 1;
while ((i >= 0) && (maxcount-- > 0)) {
for (; i >= 0; i--) {
if (s[i] == ch) {
SPLIT_ADD(s, i + 1, j + 1);
j = i = i - 1;
break;
}
}
}
if (i < 0 && count == 0 && PyString_CheckExact(self)) {
/* ch not in self, so just use self as list[0] */
Py_INCREF(self);
PyList_SET_ITEM(list, 0, (PyObject *)self);
count++;
}
else if (j >= -1) {
SPLIT_ADD(s, 0, j + 1);
}
FIX_PREALLOC_SIZE(list);
if (PyList_Reverse(list) < 0)
goto onError;
return list;
onError:
Py_DECREF(list);
return NULL;
}
PyDoc_STRVAR(rsplit__doc__, PyDoc_STRVAR(rsplit__doc__,
"S.rsplit([sep [,maxsplit]]) -> list of strings\n\ "S.rsplit([sep [,maxsplit]]) -> list of strings\n\
\n\ \n\
@ -1785,17 +1537,17 @@ is a separator.");
static PyObject * static PyObject *
string_rsplit(PyStringObject *self, PyObject *args) string_rsplit(PyStringObject *self, PyObject *args)
{ {
Py_ssize_t len = PyString_GET_SIZE(self), n, j, pos; Py_ssize_t len = PyString_GET_SIZE(self), n;
Py_ssize_t maxsplit = -1, count=0; Py_ssize_t maxsplit = -1;
const char *s = PyString_AS_STRING(self), *sub; const char *s = PyString_AS_STRING(self), *sub;
PyObject *list, *str, *subobj = Py_None; PyObject *subobj = Py_None;
if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit)) if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
return NULL; return NULL;
if (maxsplit < 0) if (maxsplit < 0)
maxsplit = PY_SSIZE_T_MAX; maxsplit = PY_SSIZE_T_MAX;
if (subobj == Py_None) if (subobj == Py_None)
return rsplit_whitespace(self, len, maxsplit); return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
if (PyString_Check(subobj)) { if (PyString_Check(subobj)) {
sub = PyString_AS_STRING(subobj); sub = PyString_AS_STRING(subobj);
n = PyString_GET_SIZE(subobj); n = PyString_GET_SIZE(subobj);
@ -1807,35 +1559,7 @@ string_rsplit(PyStringObject *self, PyObject *args)
else if (PyObject_AsCharBuffer(subobj, &sub, &n)) else if (PyObject_AsCharBuffer(subobj, &sub, &n))
return NULL; return NULL;
if (n == 0) { return stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);
PyErr_SetString(PyExc_ValueError, "empty separator");
return NULL;
}
else if (n == 1)
return rsplit_char(self, len, sub[0], maxsplit);
list = PyList_New(PREALLOC_SIZE(maxsplit));
if (list == NULL)
return NULL;
j = len;
while (maxsplit-- > 0) {
pos = fastsearch(s, j, sub, n, FAST_RSEARCH);
if (pos < 0)
break;
SPLIT_ADD(s, pos + n, j);
j = pos;
}
SPLIT_ADD(s, 0, j);
FIX_PREALLOC_SIZE(list);
if (PyList_Reverse(list) < 0)
goto onError;
return list;
onError:
Py_DECREF(list);
return NULL;
} }
@ -1950,19 +1674,19 @@ _PyString_Join(PyObject *sep, PyObject *x)
return string_join((PyStringObject *)sep, x); return string_join((PyStringObject *)sep, x);
} }
Py_LOCAL_INLINE(void) /* helper macro to fixup start/end slice values */
string_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len) #define ADJUST_INDICES(start, end, len) \
{ if (end > len) \
if (*end > len) end = len; \
*end = len; else if (end < 0) { \
else if (*end < 0) end += len; \
*end += len; if (end < 0) \
if (*end < 0) end = 0; \
*end = 0; } \
if (*start < 0) if (start < 0) { \
*start += len; start += len; \
if (*start < 0) if (start < 0) \
*start = 0; start = 0; \
} }
Py_LOCAL_INLINE(Py_ssize_t) Py_LOCAL_INLINE(Py_ssize_t)
@ -2417,10 +2141,10 @@ string_count(PyStringObject *self, PyObject *args)
else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len)) else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
return NULL; return NULL;
string_adjust_indices(&start, &end, PyString_GET_SIZE(self)); ADJUST_INDICES(start, end, PyString_GET_SIZE(self));
return PyInt_FromSsize_t( return PyInt_FromSsize_t(
stringlib_count(str + start, end - start, sub, sub_len) stringlib_count(str + start, end - start, sub, sub_len, PY_SSIZE_T_MAX)
); );
} }
@ -2583,9 +2307,6 @@ string_translate(PyStringObject *self, PyObject *args)
} }
#define FORWARD 1
#define REVERSE -1
/* find and count characters and substrings */ /* find and count characters and substrings */
#define findchar(target, target_len, c) \ #define findchar(target, target_len, c) \
@ -2621,93 +2342,6 @@ countchar(const char *target, int target_len, char c, Py_ssize_t maxcount)
return count; return count;
} }
Py_LOCAL(Py_ssize_t)
findstring(const char *target, Py_ssize_t target_len,
const char *pattern, Py_ssize_t pattern_len,
Py_ssize_t start,
Py_ssize_t end,
int direction)
{
if (start < 0) {
start += target_len;
if (start < 0)
start = 0;
}
if (end > target_len) {
end = target_len;
} else if (end < 0) {
end += target_len;
if (end < 0)
end = 0;
}
/* zero-length substrings always match at the first attempt */
if (pattern_len == 0)
return (direction > 0) ? start : end;
end -= pattern_len;
if (direction < 0) {
for (; end >= start; end--)
if (Py_STRING_MATCH(target, end, pattern, pattern_len))
return end;
} else {
for (; start <= end; start++)
if (Py_STRING_MATCH(target, start, pattern, pattern_len))
return start;
}
return -1;
}
Py_LOCAL_INLINE(Py_ssize_t)
countstring(const char *target, Py_ssize_t target_len,
const char *pattern, Py_ssize_t pattern_len,
Py_ssize_t start,
Py_ssize_t end,
int direction, Py_ssize_t maxcount)
{
Py_ssize_t count=0;
if (start < 0) {
start += target_len;
if (start < 0)
start = 0;
}
if (end > target_len) {
end = target_len;
} else if (end < 0) {
end += target_len;
if (end < 0)
end = 0;
}
/* zero-length substrings match everywhere */
if (pattern_len == 0 || maxcount == 0) {
if (target_len+1 < maxcount)
return target_len+1;
return maxcount;
}
end -= pattern_len;
if (direction < 0) {
for (; (end >= start); end--)
if (Py_STRING_MATCH(target, end, pattern, pattern_len)) {
count++;
if (--maxcount <= 0) break;
end -= pattern_len-1;
}
} else {
for (; (start <= end); start++)
if (Py_STRING_MATCH(target, start, pattern, pattern_len)) {
count++;
if (--maxcount <= 0)
break;
start += pattern_len-1;
}
}
return count;
}
/* Algorithms for different cases of string replacement */ /* Algorithms for different cases of string replacement */
@ -2828,9 +2462,8 @@ replace_delete_substring(PyStringObject *self,
self_len = PyString_GET_SIZE(self); self_len = PyString_GET_SIZE(self);
self_s = PyString_AS_STRING(self); self_s = PyString_AS_STRING(self);
count = countstring(self_s, self_len, count = stringlib_count(self_s, self_len,
from_s, from_len, from_s, from_len,
0, self_len, 1,
maxcount); maxcount);
if (count == 0) { if (count == 0) {
@ -2850,9 +2483,9 @@ replace_delete_substring(PyStringObject *self,
start = self_s; start = self_s;
end = self_s + self_len; end = self_s + self_len;
while (count-- > 0) { while (count-- > 0) {
offset = findstring(start, end-start, offset = stringlib_find(start, end-start,
from_s, from_len, from_s, from_len,
0, end-start, FORWARD); 0);
if (offset == -1) if (offset == -1)
break; break;
next = start + offset; next = start + offset;
@ -2928,9 +2561,9 @@ replace_substring_in_place(PyStringObject *self,
self_s = PyString_AS_STRING(self); self_s = PyString_AS_STRING(self);
self_len = PyString_GET_SIZE(self); self_len = PyString_GET_SIZE(self);
offset = findstring(self_s, self_len, offset = stringlib_find(self_s, self_len,
from_s, from_len, from_s, from_len,
0, self_len, FORWARD); 0);
if (offset == -1) { if (offset == -1) {
/* No matches; return the original string */ /* No matches; return the original string */
return return_self(self); return return_self(self);
@ -2950,9 +2583,9 @@ replace_substring_in_place(PyStringObject *self,
end = result_s + self_len; end = result_s + self_len;
while ( --maxcount > 0) { while ( --maxcount > 0) {
offset = findstring(start, end-start, offset = stringlib_find(start, end-start,
from_s, from_len, from_s, from_len,
0, end-start, FORWARD); 0);
if (offset==-1) if (offset==-1)
break; break;
Py_MEMCPY(start+offset, to_s, from_len); Py_MEMCPY(start+offset, to_s, from_len);
@ -3044,9 +2677,10 @@ replace_substring(PyStringObject *self,
self_s = PyString_AS_STRING(self); self_s = PyString_AS_STRING(self);
self_len = PyString_GET_SIZE(self); self_len = PyString_GET_SIZE(self);
count = countstring(self_s, self_len, count = stringlib_count(self_s, self_len,
from_s, from_len, from_s, from_len,
0, self_len, FORWARD, maxcount); maxcount);
if (count == 0) { if (count == 0) {
/* no matches, return unchanged */ /* no matches, return unchanged */
return return_self(self); return return_self(self);
@ -3073,9 +2707,9 @@ replace_substring(PyStringObject *self,
start = self_s; start = self_s;
end = self_s + self_len; end = self_s + self_len;
while (count-- > 0) { while (count-- > 0) {
offset = findstring(start, end-start, offset = stringlib_find(start, end-start,
from_s, from_len, from_s, from_len,
0, end-start, FORWARD); 0);
if (offset == -1) if (offset == -1)
break; break;
next = start+offset; next = start+offset;
@ -3245,7 +2879,7 @@ _string_tailmatch(PyStringObject *self, PyObject *substr, Py_ssize_t start,
return -1; return -1;
str = PyString_AS_STRING(self); str = PyString_AS_STRING(self);
string_adjust_indices(&start, &end, len); ADJUST_INDICES(start, end, len);
if (direction < 0) { if (direction < 0) {
/* startswith */ /* startswith */
@ -3913,62 +3547,15 @@ is given and true.");
static PyObject* static PyObject*
string_splitlines(PyStringObject *self, PyObject *args) string_splitlines(PyStringObject *self, PyObject *args)
{ {
register Py_ssize_t i;
register Py_ssize_t j;
Py_ssize_t len;
int keepends = 0; int keepends = 0;
PyObject *list;
PyObject *str;
char *data;
if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends)) if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
return NULL; return NULL;
data = PyString_AS_STRING(self); return stringlib_splitlines(
len = PyString_GET_SIZE(self); (PyObject*) self, PyString_AS_STRING(self), PyString_GET_SIZE(self),
keepends
/* This does not use the preallocated list because splitlines is );
usually run with hundreds of newlines. The overhead of
switching between PyList_SET_ITEM and append causes about a
2-3% slowdown for that common case. A smarter implementation
could move the if check out, so the SET_ITEMs are done first
and the appends only done when the prealloc buffer is full.
That's too much work for little gain.*/
list = PyList_New(0);
if (!list)
goto onError;
for (i = j = 0; i < len; ) {
Py_ssize_t eol;
/* Find a line and append it */
while (i < len && data[i] != '\n' && data[i] != '\r')
i++;
/* Skip the line break reading CRLF as one line break */
eol = i;
if (i < len) {
if (data[i] == '\r' && i + 1 < len &&
data[i+1] == '\n')
i += 2;
else
i++;
if (keepends)
eol = i;
}
SPLIT_APPEND(data, j, eol);
j = i;
}
if (j < len) {
SPLIT_APPEND(data, j, len);
}
return list;
onError:
Py_XDECREF(list);
return NULL;
} }
PyDoc_STRVAR(sizeof__doc__, PyDoc_STRVAR(sizeof__doc__,
@ -3982,11 +3569,6 @@ string_sizeof(PyStringObject *v)
return PyInt_FromSsize_t(res); return PyInt_FromSsize_t(res);
} }
#undef SPLIT_APPEND
#undef SPLIT_ADD
#undef MAX_PREALLOC
#undef PREALLOC_SIZE
static PyObject * static PyObject *
string_getnewargs(PyStringObject *v) string_getnewargs(PyStringObject *v)
{ {

View file

@ -194,7 +194,8 @@ PyUnicode_GetMax(void)
static BLOOM_MASK bloom_linebreak; static BLOOM_MASK bloom_linebreak;
#define BLOOM(mask, ch) ((mask & (1 << ((ch) & 0x1F)))) #define BLOOM_ADD(mask, ch) ((mask |= (1 << ((ch) & (LONG_BIT - 1)))))
#define BLOOM(mask, ch) ((mask & (1 << ((ch) & (LONG_BIT - 1)))))
#define BLOOM_LINEBREAK(ch) \ #define BLOOM_LINEBREAK(ch) \
((ch) < 128U ? ascii_linebreak[(ch)] : \ ((ch) < 128U ? ascii_linebreak[(ch)] : \
@ -209,7 +210,7 @@ Py_LOCAL_INLINE(BLOOM_MASK) make_bloom_mask(Py_UNICODE* ptr, Py_ssize_t len)
mask = 0; mask = 0;
for (i = 0; i < len; i++) for (i = 0; i < len; i++)
mask |= (1 << (ptr[i] & 0x1F)); BLOOM_ADD(mask, ptr[i]);
return mask; return mask;
} }
@ -5245,27 +5246,27 @@ int PyUnicode_EncodeDecimal(Py_UNICODE *s,
/* --- Helpers ------------------------------------------------------------ */ /* --- Helpers ------------------------------------------------------------ */
#include "stringlib/unicodedefs.h" #include "stringlib/unicodedefs.h"
#define FROM_UNICODE
#include "stringlib/fastsearch.h" #include "stringlib/fastsearch.h"
#include "stringlib/count.h" #include "stringlib/count.h"
#include "stringlib/find.h" #include "stringlib/find.h"
#include "stringlib/partition.h" #include "stringlib/partition.h"
#include "stringlib/split.h"
/* helper macro to fixup start/end slice values */ /* helper macro to fixup start/end slice values */
#define FIX_START_END(obj) \ #define ADJUST_INDICES(start, end, len) \
if (start < 0) \ if (end > len) \
start += (obj)->length; \ end = len; \
else if (end < 0) { \
end += len; \
if (end < 0) \
end = 0; \
} \
if (start < 0) { \
start += len; \
if (start < 0) \ if (start < 0) \
start = 0; \ start = 0; \
if (end > (obj)->length) \ }
end = (obj)->length; \
if (end < 0) \
end += (obj)->length; \
if (end < 0) \
end = 0;
Py_ssize_t PyUnicode_Count(PyObject *str, Py_ssize_t PyUnicode_Count(PyObject *str,
PyObject *substr, PyObject *substr,
@ -5285,10 +5286,10 @@ Py_ssize_t PyUnicode_Count(PyObject *str,
return -1; return -1;
} }
FIX_START_END(str_obj); ADJUST_INDICES(start, end, str_obj->length);
result = stringlib_count( result = stringlib_count(
str_obj->str + start, end - start, sub_obj->str, sub_obj->length str_obj->str + start, end - start, sub_obj->str, sub_obj->length,
PY_SSIZE_T_MAX
); );
Py_DECREF(sub_obj); Py_DECREF(sub_obj);
@ -5343,8 +5344,7 @@ int tailmatch(PyUnicodeObject *self,
if (substring->length == 0) if (substring->length == 0)
return 1; return 1;
FIX_START_END(self); ADJUST_INDICES(start, end, self->length);
end -= substring->length; end -= substring->length;
if (end < start) if (end < start)
return 0; return 0;
@ -5721,305 +5721,40 @@ PyUnicodeObject *pad(PyUnicodeObject *self,
return u; return u;
} }
#define SPLIT_APPEND(data, left, right) \ PyObject *PyUnicode_Splitlines(PyObject *string, int keepends)
str = PyUnicode_FromUnicode((data) + (left), (right) - (left)); \
if (!str) \
goto onError; \
if (PyList_Append(list, str)) { \
Py_DECREF(str); \
goto onError; \
} \
else \
Py_DECREF(str);
static
PyObject *split_whitespace(PyUnicodeObject *self,
PyObject *list,
Py_ssize_t maxcount)
{ {
register Py_ssize_t i;
register Py_ssize_t j;
Py_ssize_t len = self->length;
PyObject *str;
register const Py_UNICODE *buf = self->str;
for (i = j = 0; i < len; ) {
/* find a token */
while (i < len && Py_UNICODE_ISSPACE(buf[i]))
i++;
j = i;
while (i < len && !Py_UNICODE_ISSPACE(buf[i]))
i++;
if (j < i) {
if (maxcount-- <= 0)
break;
SPLIT_APPEND(buf, j, i);
while (i < len && Py_UNICODE_ISSPACE(buf[i]))
i++;
j = i;
}
}
if (j < len) {
SPLIT_APPEND(buf, j, len);
}
return list;
onError:
Py_DECREF(list);
return NULL;
}
PyObject *PyUnicode_Splitlines(PyObject *string,
int keepends)
{
register Py_ssize_t i;
register Py_ssize_t j;
Py_ssize_t len;
PyObject *list; PyObject *list;
PyObject *str;
Py_UNICODE *data;
string = PyUnicode_FromObject(string); string = PyUnicode_FromObject(string);
if (string == NULL) if (string == NULL)
return NULL; return NULL;
data = PyUnicode_AS_UNICODE(string);
len = PyUnicode_GET_SIZE(string);
list = PyList_New(0); list = stringlib_splitlines(
if (!list) (PyObject*) string, PyUnicode_AS_UNICODE(string),
goto onError; PyUnicode_GET_SIZE(string), keepends);
for (i = j = 0; i < len; ) {
Py_ssize_t eol;
/* Find a line and append it */
while (i < len && !BLOOM_LINEBREAK(data[i]))
i++;
/* Skip the line break reading CRLF as one line break */
eol = i;
if (i < len) {
if (data[i] == '\r' && i + 1 < len &&
data[i+1] == '\n')
i += 2;
else
i++;
if (keepends)
eol = i;
}
SPLIT_APPEND(data, j, eol);
j = i;
}
if (j < len) {
SPLIT_APPEND(data, j, len);
}
Py_DECREF(string); Py_DECREF(string);
return list; return list;
onError:
Py_XDECREF(list);
Py_DECREF(string);
return NULL;
} }
static
PyObject *split_char(PyUnicodeObject *self,
PyObject *list,
Py_UNICODE ch,
Py_ssize_t maxcount)
{
register Py_ssize_t i;
register Py_ssize_t j;
Py_ssize_t len = self->length;
PyObject *str;
register const Py_UNICODE *buf = self->str;
for (i = j = 0; i < len; ) {
if (buf[i] == ch) {
if (maxcount-- <= 0)
break;
SPLIT_APPEND(buf, j, i);
i = j = i + 1;
} else
i++;
}
if (j <= len) {
SPLIT_APPEND(buf, j, len);
}
return list;
onError:
Py_DECREF(list);
return NULL;
}
static
PyObject *split_substring(PyUnicodeObject *self,
PyObject *list,
PyUnicodeObject *substring,
Py_ssize_t maxcount)
{
register Py_ssize_t i;
register Py_ssize_t j;
Py_ssize_t len = self->length;
Py_ssize_t sublen = substring->length;
PyObject *str;
for (i = j = 0; i <= len - sublen; ) {
if (Py_UNICODE_MATCH(self, i, substring)) {
if (maxcount-- <= 0)
break;
SPLIT_APPEND(self->str, j, i);
i = j = i + sublen;
} else
i++;
}
if (j <= len) {
SPLIT_APPEND(self->str, j, len);
}
return list;
onError:
Py_DECREF(list);
return NULL;
}
static
PyObject *rsplit_whitespace(PyUnicodeObject *self,
PyObject *list,
Py_ssize_t maxcount)
{
register Py_ssize_t i;
register Py_ssize_t j;
Py_ssize_t len = self->length;
PyObject *str;
register const Py_UNICODE *buf = self->str;
for (i = j = len - 1; i >= 0; ) {
/* find a token */
while (i >= 0 && Py_UNICODE_ISSPACE(buf[i]))
i--;
j = i;
while (i >= 0 && !Py_UNICODE_ISSPACE(buf[i]))
i--;
if (j > i) {
if (maxcount-- <= 0)
break;
SPLIT_APPEND(buf, i + 1, j + 1);
while (i >= 0 && Py_UNICODE_ISSPACE(buf[i]))
i--;
j = i;
}
}
if (j >= 0) {
SPLIT_APPEND(buf, 0, j + 1);
}
if (PyList_Reverse(list) < 0)
goto onError;
return list;
onError:
Py_DECREF(list);
return NULL;
}
static
PyObject *rsplit_char(PyUnicodeObject *self,
PyObject *list,
Py_UNICODE ch,
Py_ssize_t maxcount)
{
register Py_ssize_t i;
register Py_ssize_t j;
Py_ssize_t len = self->length;
PyObject *str;
register const Py_UNICODE *buf = self->str;
for (i = j = len - 1; i >= 0; ) {
if (buf[i] == ch) {
if (maxcount-- <= 0)
break;
SPLIT_APPEND(buf, i + 1, j + 1);
j = i = i - 1;
} else
i--;
}
if (j >= -1) {
SPLIT_APPEND(buf, 0, j + 1);
}
if (PyList_Reverse(list) < 0)
goto onError;
return list;
onError:
Py_DECREF(list);
return NULL;
}
static
PyObject *rsplit_substring(PyUnicodeObject *self,
PyObject *list,
PyUnicodeObject *substring,
Py_ssize_t maxcount)
{
register Py_ssize_t i;
register Py_ssize_t j;
Py_ssize_t len = self->length;
Py_ssize_t sublen = substring->length;
PyObject *str;
for (i = len - sublen, j = len; i >= 0; ) {
if (Py_UNICODE_MATCH(self, i, substring)) {
if (maxcount-- <= 0)
break;
SPLIT_APPEND(self->str, i + sublen, j);
j = i;
i -= sublen;
} else
i--;
}
if (j >= 0) {
SPLIT_APPEND(self->str, 0, j);
}
if (PyList_Reverse(list) < 0)
goto onError;
return list;
onError:
Py_DECREF(list);
return NULL;
}
#undef SPLIT_APPEND
static static
PyObject *split(PyUnicodeObject *self, PyObject *split(PyUnicodeObject *self,
PyUnicodeObject *substring, PyUnicodeObject *substring,
Py_ssize_t maxcount) Py_ssize_t maxcount)
{ {
PyObject *list;
if (maxcount < 0) if (maxcount < 0)
maxcount = PY_SSIZE_T_MAX; maxcount = PY_SSIZE_T_MAX;
list = PyList_New(0);
if (!list)
return NULL;
if (substring == NULL) if (substring == NULL)
return split_whitespace(self,list,maxcount); return stringlib_split_whitespace(
(PyObject*) self, self->str, self->length, maxcount
);
else if (substring->length == 1) return stringlib_split(
return split_char(self,list,substring->str[0],maxcount); (PyObject*) self, self->str, self->length,
substring->str, substring->length,
else if (substring->length == 0) { maxcount
Py_DECREF(list); );
PyErr_SetString(PyExc_ValueError, "empty separator");
return NULL;
}
else
return split_substring(self,list,substring,maxcount);
} }
static static
@ -6027,28 +5762,19 @@ PyObject *rsplit(PyUnicodeObject *self,
PyUnicodeObject *substring, PyUnicodeObject *substring,
Py_ssize_t maxcount) Py_ssize_t maxcount)
{ {
PyObject *list;
if (maxcount < 0) if (maxcount < 0)
maxcount = PY_SSIZE_T_MAX; maxcount = PY_SSIZE_T_MAX;
list = PyList_New(0);
if (!list)
return NULL;
if (substring == NULL) if (substring == NULL)
return rsplit_whitespace(self,list,maxcount); return stringlib_rsplit_whitespace(
(PyObject*) self, self->str, self->length, maxcount
);
else if (substring->length == 1) return stringlib_rsplit(
return rsplit_char(self,list,substring->str[0],maxcount); (PyObject*) self, self->str, self->length,
substring->str, substring->length,
else if (substring->length == 0) { maxcount
Py_DECREF(list); );
PyErr_SetString(PyExc_ValueError, "empty separator");
return NULL;
}
else
return rsplit_substring(self,list,substring,maxcount);
} }
static static
@ -6061,9 +5787,13 @@ PyObject *replace(PyUnicodeObject *self,
if (maxcount < 0) if (maxcount < 0)
maxcount = PY_SSIZE_T_MAX; maxcount = PY_SSIZE_T_MAX;
else if (maxcount == 0 || self->length == 0)
goto nothing;
if (str1->length == str2->length) { if (str1->length == str2->length) {
/* same length */ /* same length */
if (str1->length == 0)
goto nothing;
Py_ssize_t i; Py_ssize_t i;
if (str1->length == 1) { if (str1->length == 1) {
/* replace characters */ /* replace characters */
@ -6083,8 +5813,8 @@ PyObject *replace(PyUnicodeObject *self,
u->str[i] = u2; u->str[i] = u2;
} }
} else { } else {
i = fastsearch( i = stringlib_find(
self->str, self->length, str1->str, str1->length, FAST_SEARCH self->str, self->length, str1->str, str1->length, 0
); );
if (i < 0) if (i < 0)
goto nothing; goto nothing;
@ -6092,14 +5822,20 @@ PyObject *replace(PyUnicodeObject *self,
if (!u) if (!u)
return NULL; return NULL;
Py_UNICODE_COPY(u->str, self->str, self->length); Py_UNICODE_COPY(u->str, self->str, self->length);
while (i <= self->length - str1->length)
if (Py_UNICODE_MATCH(self, i, str1)) { /* change everything in-place, starting with this one */
if (--maxcount < 0) Py_UNICODE_COPY(u->str+i, str2->str, str2->length);
i += str1->length;
while ( --maxcount > 0) {
i = stringlib_find(self->str+i, self->length-i,
str1->str, str1->length,
i);
if (i == -1)
break; break;
Py_UNICODE_COPY(u->str+i, str2->str, str2->length); Py_UNICODE_COPY(u->str+i, str2->str, str2->length);
i += str1->length; i += str1->length;
} else }
i++;
} }
} else { } else {
@ -6108,9 +5844,8 @@ PyObject *replace(PyUnicodeObject *self,
Py_UNICODE *p; Py_UNICODE *p;
/* replace strings */ /* replace strings */
n = stringlib_count(self->str, self->length, str1->str, str1->length); n = stringlib_count(self->str, self->length, str1->str, str1->length,
if (n > maxcount) maxcount);
n = maxcount;
if (n == 0) if (n == 0)
goto nothing; goto nothing;
/* new_size = self->length + n * (str2->length - str1->length)); */ /* new_size = self->length + n * (str2->length - str1->length)); */
@ -6140,15 +5875,12 @@ PyObject *replace(PyUnicodeObject *self,
if (str1->length > 0) { if (str1->length > 0) {
while (n-- > 0) { while (n-- > 0) {
/* look for next match */ /* look for next match */
j = i; j = stringlib_find(self->str+i, self->length-i,
while (j <= e) { str1->str, str1->length,
if (Py_UNICODE_MATCH(self, j, str1)) i);
break; if (j == -1)
j++;
}
if (j > i) {
if (j > e)
break; break;
else if (j > i) {
/* copy unchanged part [i:j] */ /* copy unchanged part [i:j] */
Py_UNICODE_COPY(p, self->str+i, j-i); Py_UNICODE_COPY(p, self->str+i, j-i);
p += j - i; p += j - i;
@ -6585,11 +6317,11 @@ unicode_count(PyUnicodeObject *self, PyObject *args)
if (substring == NULL) if (substring == NULL)
return NULL; return NULL;
FIX_START_END(self); ADJUST_INDICES(start, end, self->length);
result = PyInt_FromSsize_t( result = PyInt_FromSsize_t(
stringlib_count(self->str + start, end - start, stringlib_count(self->str + start, end - start,
substring->str, substring->length) substring->str, substring->length,
PY_SSIZE_T_MAX)
); );
Py_DECREF(substring); Py_DECREF(substring);
@ -9132,11 +8864,3 @@ _PyUnicode_Fini(void)
#ifdef __cplusplus #ifdef __cplusplus
} }
#endif #endif
/*
Local variables:
c-basic-offset: 4
indent-tabs-mode: nil
End:
*/

View file

@ -1538,6 +1538,10 @@
RelativePath="..\..\Objects\sliceobject.c" RelativePath="..\..\Objects\sliceobject.c"
> >
</File> </File>
<File
RelativePath="..\..\Objects\stringlib\split.h"
>
</File>
<File <File
RelativePath="..\..\Objects\structseq.c" RelativePath="..\..\Objects\structseq.c"
> >

View file

@ -1538,6 +1538,10 @@
RelativePath="..\Objects\sliceobject.c" RelativePath="..\Objects\sliceobject.c"
> >
</File> </File>
<File
RelativePath="..\Objects\stringlib\split.h"
>
</File>
<File <File
RelativePath="..\Objects\structseq.c" RelativePath="..\Objects\structseq.c"
> >