mirror of
https://github.com/python/cpython.git
synced 2025-08-04 08:59:19 +00:00
Merged revisions 77461 via svnmerge from
svn+ssh://pythondev@svn.python.org/python/trunk ........ r77461 | antoine.pitrou | 2010-01-13 08:55:48 +0100 (mer., 13 janv. 2010) | 5 lines Issue #7622: Improve the split(), rsplit(), splitlines() and replace() methods of bytes, bytearray and unicode objects by using a common implementation based on stringlib's fast search. Patch by Florent Xicluna. ........
This commit is contained in:
parent
5efea0430c
commit
f2c5484f9e
16 changed files with 1123 additions and 1418 deletions
|
@ -1,13 +1,6 @@
|
|||
/* NOTE: this API is -ONLY- for use with single byte character strings. */
|
||||
/* Do not use it with Unicode. */
|
||||
|
||||
#include "bytes_methods.h"
|
||||
|
||||
#ifndef STRINGLIB_MUTABLE
|
||||
#warning "STRINGLIB_MUTABLE not defined before #include, assuming 0"
|
||||
#define STRINGLIB_MUTABLE 0
|
||||
#endif
|
||||
|
||||
/* the more complicated methods. parts of these should be pulled out into the
|
||||
shared code in bytes_methods.c to cut down on duplicate code bloat. */
|
||||
|
||||
|
@ -269,87 +262,3 @@ stringlib_zfill(PyObject *self, PyObject *args)
|
|||
|
||||
return (PyObject*) s;
|
||||
}
|
||||
|
||||
|
||||
#define _STRINGLIB_SPLIT_APPEND(data, left, right) \
|
||||
str = STRINGLIB_NEW((data) + (left), \
|
||||
(right) - (left)); \
|
||||
if (str == NULL) \
|
||||
goto onError; \
|
||||
if (PyList_Append(list, str)) { \
|
||||
Py_DECREF(str); \
|
||||
goto onError; \
|
||||
} \
|
||||
else \
|
||||
Py_DECREF(str);
|
||||
|
||||
PyDoc_STRVAR(splitlines__doc__,
|
||||
"B.splitlines([keepends]) -> list of lines\n\
|
||||
\n\
|
||||
Return a list of the lines in B, breaking at line boundaries.\n\
|
||||
Line breaks are not included in the resulting list unless keepends\n\
|
||||
is given and true.");
|
||||
|
||||
static PyObject*
|
||||
stringlib_splitlines(PyObject *self, PyObject *args)
|
||||
{
|
||||
register Py_ssize_t i;
|
||||
register Py_ssize_t j;
|
||||
Py_ssize_t len;
|
||||
int keepends = 0;
|
||||
PyObject *list;
|
||||
PyObject *str;
|
||||
char *data;
|
||||
|
||||
if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
|
||||
return NULL;
|
||||
|
||||
data = STRINGLIB_STR(self);
|
||||
len = STRINGLIB_LEN(self);
|
||||
|
||||
/* This does not use the preallocated list because splitlines is
|
||||
usually run with hundreds of newlines. The overhead of
|
||||
switching between PyList_SET_ITEM and append causes about a
|
||||
2-3% slowdown for that common case. A smarter implementation
|
||||
could move the if check out, so the SET_ITEMs are done first
|
||||
and the appends only done when the prealloc buffer is full.
|
||||
That's too much work for little gain.*/
|
||||
|
||||
list = PyList_New(0);
|
||||
if (!list)
|
||||
goto onError;
|
||||
|
||||
for (i = j = 0; i < len; ) {
|
||||
Py_ssize_t eol;
|
||||
|
||||
/* Find a line and append it */
|
||||
while (i < len && data[i] != '\n' && data[i] != '\r')
|
||||
i++;
|
||||
|
||||
/* Skip the line break reading CRLF as one line break */
|
||||
eol = i;
|
||||
if (i < len) {
|
||||
if (data[i] == '\r' && i + 1 < len &&
|
||||
data[i+1] == '\n')
|
||||
i += 2;
|
||||
else
|
||||
i++;
|
||||
if (keepends)
|
||||
eol = i;
|
||||
}
|
||||
_STRINGLIB_SPLIT_APPEND(data, j, eol);
|
||||
j = i;
|
||||
}
|
||||
if (j < len) {
|
||||
_STRINGLIB_SPLIT_APPEND(data, j, len);
|
||||
}
|
||||
|
||||
return list;
|
||||
|
||||
onError:
|
||||
Py_XDECREF(list);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
#undef _STRINGLIB_SPLIT_APPEND
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue