mirror of
https://github.com/python/cpython.git
synced 2025-09-16 21:56:14 +00:00
needforspeed: new replace implementation by Andrew Dalke. replace is
now about 3x faster on my machine, for the replace tests from string- bench.
This commit is contained in:
parent
0c71f88fc9
commit
e68955cf32
1 changed files with 612 additions and 189 deletions
|
@ -2379,174 +2379,622 @@ string_translate(PyStringObject *self, PyObject *args)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/* What follows is used for implementing replace(). Perry Stoll. */
|
#define FORWARD 1
|
||||||
|
#define REVERSE -1
|
||||||
|
|
||||||
/*
|
/* find and count characters and substrings */
|
||||||
mymemfind
|
|
||||||
|
|
||||||
strstr replacement for arbitrary blocks of memory.
|
/* Don't call if length < 2 */
|
||||||
|
#define Py_STRING_MATCH(target, offset, pattern, length) \
|
||||||
|
(target[offset] == pattern[0] && \
|
||||||
|
target[offset+length-1] == pattern[length-1] && \
|
||||||
|
!memcmp(target+offset+1, pattern+1, length-2) )
|
||||||
|
|
||||||
Locates the first occurrence in the memory pointed to by MEM of the
|
#define findchar(target, target_len, c) \
|
||||||
contents of memory pointed to by PAT. Returns the index into MEM if
|
((char *)memchr((const void *)(target), c, target_len))
|
||||||
found, or -1 if not found. If len of PAT is greater than length of
|
|
||||||
MEM, the function returns -1.
|
/* String ops must return a string. */
|
||||||
*/
|
/* If the object is subclass of string, create a copy */
|
||||||
static Py_ssize_t
|
static PyStringObject *
|
||||||
mymemfind(const char *mem, Py_ssize_t len, const char *pat, Py_ssize_t pat_len)
|
return_self(PyStringObject *self)
|
||||||
{
|
{
|
||||||
register Py_ssize_t ii;
|
if (PyString_CheckExact(self)) {
|
||||||
|
Py_INCREF(self);
|
||||||
|
return self;
|
||||||
|
}
|
||||||
|
return (PyStringObject *)PyString_FromStringAndSize(
|
||||||
|
PyString_AS_STRING(self),
|
||||||
|
PyString_GET_SIZE(self));
|
||||||
|
}
|
||||||
|
|
||||||
/* pattern can not occur in the last pat_len-1 chars */
|
static Py_ssize_t
|
||||||
len -= pat_len;
|
countchar(char *target, int target_len, char c)
|
||||||
|
{
|
||||||
|
Py_ssize_t count=0;
|
||||||
|
char *start=target;
|
||||||
|
char *end=target+target_len;
|
||||||
|
|
||||||
for (ii = 0; ii <= len; ii++) {
|
while ( (start=findchar(start, end-start, c)) != NULL ) {
|
||||||
if (mem[ii] == pat[0] && memcmp(&mem[ii], pat, pat_len) == 0) {
|
count++;
|
||||||
return ii;
|
start += 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return count;
|
||||||
|
}
|
||||||
|
|
||||||
|
static Py_ssize_t
|
||||||
|
findstring(char *target, Py_ssize_t target_len,
|
||||||
|
char *pattern, Py_ssize_t pattern_len,
|
||||||
|
Py_ssize_t start,
|
||||||
|
Py_ssize_t end,
|
||||||
|
int direction)
|
||||||
|
{
|
||||||
|
if (start < 0) {
|
||||||
|
start += target_len;
|
||||||
|
if (start < 0)
|
||||||
|
start = 0;
|
||||||
|
}
|
||||||
|
if (end > target_len) {
|
||||||
|
end = target_len;
|
||||||
|
} else if (end < 0) {
|
||||||
|
end += target_len;
|
||||||
|
if (end < 0)
|
||||||
|
end = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* zero-length substrings always match at the first attempt */
|
||||||
|
if (pattern_len == 0)
|
||||||
|
return (direction > 0) ? start : end;
|
||||||
|
|
||||||
|
end -= pattern_len;
|
||||||
|
|
||||||
|
if (direction < 0) {
|
||||||
|
for (; end >= start; end--)
|
||||||
|
if (Py_STRING_MATCH(target, end, pattern, pattern_len))
|
||||||
|
return end;
|
||||||
|
} else {
|
||||||
|
for (; start <= end; start++)
|
||||||
|
if (Py_STRING_MATCH(target, start, pattern, pattern_len))
|
||||||
|
return start;
|
||||||
}
|
}
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
Py_ssize_t
|
||||||
mymemcnt
|
countstring(char *target, Py_ssize_t target_len,
|
||||||
|
char *pattern, Py_ssize_t pattern_len,
|
||||||
Return the number of distinct times PAT is found in MEM.
|
Py_ssize_t start,
|
||||||
meaning mem=1111 and pat==11 returns 2.
|
Py_ssize_t end,
|
||||||
mem=11111 and pat==11 also return 2.
|
int direction)
|
||||||
*/
|
|
||||||
static Py_ssize_t
|
|
||||||
mymemcnt(const char *mem, Py_ssize_t len, const char *pat, Py_ssize_t pat_len)
|
|
||||||
{
|
{
|
||||||
register Py_ssize_t offset = 0;
|
Py_ssize_t count=0;
|
||||||
Py_ssize_t nfound = 0;
|
|
||||||
|
|
||||||
while (len >= 0) {
|
if (start < 0) {
|
||||||
offset = mymemfind(mem, len, pat, pat_len);
|
start += target_len;
|
||||||
|
if (start < 0)
|
||||||
|
start = 0;
|
||||||
|
}
|
||||||
|
if (end > target_len) {
|
||||||
|
end = target_len;
|
||||||
|
} else if (end < 0) {
|
||||||
|
end += target_len;
|
||||||
|
if (end < 0)
|
||||||
|
end = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* zero-length substrings match everywhere */
|
||||||
|
if (pattern_len == 0)
|
||||||
|
return target_len+1;
|
||||||
|
|
||||||
|
end -= pattern_len;
|
||||||
|
|
||||||
|
if (direction < 0) {
|
||||||
|
for (; end >= start; end--)
|
||||||
|
if (Py_STRING_MATCH(target, end, pattern, pattern_len)) {
|
||||||
|
count++;
|
||||||
|
end -= pattern_len-1;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
for (; start <= end; start++)
|
||||||
|
if (Py_STRING_MATCH(target, start, pattern, pattern_len)) {
|
||||||
|
count++;
|
||||||
|
start += pattern_len-1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return count;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* Algorithms for difference cases of string replacement */
|
||||||
|
|
||||||
|
/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
|
||||||
|
static PyStringObject *
|
||||||
|
replace_interleave(PyStringObject *self,
|
||||||
|
PyStringObject *to,
|
||||||
|
Py_ssize_t maxcount)
|
||||||
|
{
|
||||||
|
char *self_s, *to_s, *result_s;
|
||||||
|
Py_ssize_t self_len, to_len, result_len;
|
||||||
|
Py_ssize_t count, i, product;
|
||||||
|
PyStringObject *result;
|
||||||
|
|
||||||
|
self_len = PyString_GET_SIZE(self);
|
||||||
|
to_len = PyString_GET_SIZE(to);
|
||||||
|
|
||||||
|
/* 1 at the end plus 1 after every character */
|
||||||
|
count = self_len+1;
|
||||||
|
if (maxcount < count)
|
||||||
|
count = maxcount;
|
||||||
|
|
||||||
|
/* Check for overflow */
|
||||||
|
/* result_len = count * to_len + self_len; */
|
||||||
|
product = count * to_len;
|
||||||
|
if (product / to_len != count) {
|
||||||
|
PyErr_SetString(PyExc_OverflowError,
|
||||||
|
"replace string is too long");
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
result_len = product + self_len;
|
||||||
|
if (result_len < 0) {
|
||||||
|
PyErr_SetString(PyExc_OverflowError,
|
||||||
|
"replace string is too long");
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (! (result = (PyStringObject *)
|
||||||
|
PyString_FromStringAndSize(NULL, result_len)) )
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
self_s = PyString_AS_STRING(self);
|
||||||
|
to_s = PyString_AS_STRING(to);
|
||||||
|
to_len = PyString_GET_SIZE(to);
|
||||||
|
result_s = PyString_AS_STRING(result);
|
||||||
|
|
||||||
|
/* TODO: special case single character, which doesn't need memcpy */
|
||||||
|
|
||||||
|
/* Lay the first one down (guaranteed this will occur) */
|
||||||
|
memcpy(result_s, to_s, to_len);
|
||||||
|
result_s += to_len;
|
||||||
|
count -= 1;
|
||||||
|
|
||||||
|
for (i=0; i<count; i++) {
|
||||||
|
*result_s++ = *self_s++;
|
||||||
|
memcpy(result_s, to_s, to_len);
|
||||||
|
result_s += to_len;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Copy the rest of the original string */
|
||||||
|
memcpy(result_s, self_s, self_len-i);
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Special case for deleting a single character */
|
||||||
|
/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
|
||||||
|
static PyStringObject *
|
||||||
|
replace_delete_single_character(PyStringObject *self,
|
||||||
|
char from_c, Py_ssize_t maxcount)
|
||||||
|
{
|
||||||
|
char *self_s, *result_s;
|
||||||
|
char *start, *next, *end;
|
||||||
|
Py_ssize_t self_len, result_len;
|
||||||
|
Py_ssize_t count;
|
||||||
|
PyStringObject *result;
|
||||||
|
|
||||||
|
self_len = PyString_GET_SIZE(self);
|
||||||
|
self_s = PyString_AS_STRING(self);
|
||||||
|
|
||||||
|
count = countchar(self_s, self_len, from_c);
|
||||||
|
if (count == 0) {
|
||||||
|
return return_self(self);
|
||||||
|
}
|
||||||
|
if (count > maxcount)
|
||||||
|
count = maxcount;
|
||||||
|
|
||||||
|
result_len = self_len - count; /* from_len == 1 */
|
||||||
|
assert(result_len>=0);
|
||||||
|
|
||||||
|
if ( (result = (PyStringObject *)
|
||||||
|
PyString_FromStringAndSize(NULL, result_len)) == NULL)
|
||||||
|
return NULL;
|
||||||
|
result_s = PyString_AS_STRING(result);
|
||||||
|
|
||||||
|
start = self_s;
|
||||||
|
end = self_s + self_len;
|
||||||
|
while (count-- > 0) {
|
||||||
|
next = findchar(start, end-start, from_c);
|
||||||
|
if (next == NULL)
|
||||||
|
break;
|
||||||
|
memcpy(result_s, start, next-start);
|
||||||
|
result_s += (next-start);
|
||||||
|
start = next+1;
|
||||||
|
}
|
||||||
|
memcpy(result_s, start, end-start);
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
|
||||||
|
|
||||||
|
static PyStringObject *
|
||||||
|
replace_delete_substring(PyStringObject *self, PyStringObject *from,
|
||||||
|
Py_ssize_t maxcount) {
|
||||||
|
char *self_s, *from_s, *result_s;
|
||||||
|
char *start, *next, *end;
|
||||||
|
Py_ssize_t self_len, from_len, result_len;
|
||||||
|
Py_ssize_t count, offset;
|
||||||
|
PyStringObject *result;
|
||||||
|
|
||||||
|
self_len = PyString_GET_SIZE(self);
|
||||||
|
self_s = PyString_AS_STRING(self);
|
||||||
|
from_len = PyString_GET_SIZE(from);
|
||||||
|
from_s = PyString_AS_STRING(from);
|
||||||
|
|
||||||
|
count = countstring(self_s, self_len,
|
||||||
|
from_s, from_len,
|
||||||
|
0, self_len, 1);
|
||||||
|
|
||||||
|
if (count > maxcount)
|
||||||
|
count = maxcount;
|
||||||
|
|
||||||
|
if (count == 0) {
|
||||||
|
/* no matches */
|
||||||
|
return return_self(self);
|
||||||
|
}
|
||||||
|
|
||||||
|
result_len = self_len - (count * from_len);
|
||||||
|
assert (result_len>=0);
|
||||||
|
|
||||||
|
if ( (result = (PyStringObject *)
|
||||||
|
PyString_FromStringAndSize(NULL, result_len)) == NULL )
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
result_s = PyString_AS_STRING(result);
|
||||||
|
|
||||||
|
start = self_s;
|
||||||
|
end = self_s + self_len;
|
||||||
|
while (count-- > 0) {
|
||||||
|
offset = findstring(start, end-start,
|
||||||
|
from_s, from_len,
|
||||||
|
0, end-start, FORWARD);
|
||||||
if (offset == -1)
|
if (offset == -1)
|
||||||
break;
|
break;
|
||||||
mem += offset + pat_len;
|
next = start + offset;
|
||||||
len -= offset + pat_len;
|
|
||||||
nfound++;
|
memcpy(result_s, start, next-start);
|
||||||
|
|
||||||
|
result_s += (next-start);
|
||||||
|
start = next+from_len;
|
||||||
}
|
}
|
||||||
return nfound;
|
memcpy(result_s, start, end-start);
|
||||||
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
|
||||||
mymemreplace
|
static PyStringObject *
|
||||||
|
replace_single_character_in_place(PyStringObject *self,
|
||||||
Return a string in which all occurrences of PAT in memory STR are
|
char from_c, char to_c,
|
||||||
replaced with SUB.
|
Py_ssize_t maxcount)
|
||||||
|
|
||||||
If length of PAT is less than length of STR or there are no occurrences
|
|
||||||
of PAT in STR, then the original string is returned. Otherwise, a new
|
|
||||||
string is allocated here and returned.
|
|
||||||
|
|
||||||
on return, out_len is:
|
|
||||||
the length of output string, or
|
|
||||||
-1 if the input string is returned, or
|
|
||||||
unchanged if an error occurs (no memory).
|
|
||||||
|
|
||||||
return value is:
|
|
||||||
the new string allocated locally, or
|
|
||||||
NULL if an error occurred.
|
|
||||||
*/
|
|
||||||
static char *
|
|
||||||
mymemreplace(const char *str, Py_ssize_t len, /* input string */
|
|
||||||
const char *pat, Py_ssize_t pat_len, /* pattern string to find */
|
|
||||||
const char *sub, Py_ssize_t sub_len, /* substitution string */
|
|
||||||
Py_ssize_t count, /* number of replacements */
|
|
||||||
Py_ssize_t *out_len)
|
|
||||||
{
|
{
|
||||||
char *out_s;
|
char *self_s, *result_s, *start, *end, *next;
|
||||||
char *new_s;
|
Py_ssize_t self_len;
|
||||||
Py_ssize_t nfound, offset, new_len;
|
PyStringObject *result;
|
||||||
Py_ssize_t product, delta;
|
|
||||||
|
/* The result string will be the same size */
|
||||||
if (len == 0 || (pat_len == 0 && sub_len == 0) || pat_len > len)
|
self_s = PyString_AS_STRING(self);
|
||||||
goto return_same;
|
self_len = PyString_GET_SIZE(self);
|
||||||
|
|
||||||
/* find length of output string */
|
next = findchar(self_s, self_len, from_c);
|
||||||
nfound = (pat_len > 0) ? mymemcnt(str, len, pat, pat_len) : len + 1;
|
|
||||||
if (count < 0)
|
if (next == NULL) {
|
||||||
count = PY_SSIZE_T_MAX;
|
/* No matches; return the original string */
|
||||||
else if (nfound > count)
|
return return_self(self);
|
||||||
nfound = count;
|
|
||||||
if (nfound == 0)
|
|
||||||
goto return_same;
|
|
||||||
|
|
||||||
delta = (sub_len - pat_len);
|
|
||||||
if (delta == 0) {
|
|
||||||
new_len = len;
|
|
||||||
} else {
|
|
||||||
product = nfound * (sub_len - pat_len);
|
|
||||||
if ((product / (sub_len - pat_len)) != nfound) {
|
|
||||||
PyErr_SetString(PyExc_OverflowError,
|
|
||||||
"replace string is too long");
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
new_len = len + product;
|
|
||||||
if (new_len < 0) {
|
|
||||||
PyErr_SetString(PyExc_OverflowError,
|
|
||||||
"replace string is too long");
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (new_len == 0) {
|
|
||||||
/* Have to allocate something for the caller to free(). */
|
|
||||||
out_s = (char *)PyMem_MALLOC(1);
|
|
||||||
if (out_s == NULL)
|
|
||||||
return NULL;
|
|
||||||
out_s[0] = '\0';
|
|
||||||
}
|
}
|
||||||
else {
|
|
||||||
assert(new_len > 0);
|
/* Need to make a new string */
|
||||||
new_s = (char *)PyMem_MALLOC(new_len);
|
result = (PyStringObject *) PyString_FromStringAndSize(self_s, self_len);
|
||||||
if (new_s == NULL)
|
if (result == NULL)
|
||||||
return NULL;
|
return NULL;
|
||||||
out_s = new_s;
|
result_s = PyString_AS_STRING(result);
|
||||||
|
|
||||||
if (pat_len > 0) {
|
/* change everything in-place, starting with this one */
|
||||||
for (; nfound > 0; --nfound) {
|
start = result_s + (next-self_s);
|
||||||
/* find index of next instance of pattern */
|
*start = to_c;
|
||||||
offset = mymemfind(str, len, pat, pat_len);
|
start++;
|
||||||
if (offset == -1)
|
end = result_s + self_len;
|
||||||
break;
|
|
||||||
|
while (--maxcount > 0) {
|
||||||
/* copy non matching part of input string */
|
next = findchar(start, end-start, from_c);
|
||||||
memcpy(new_s, str, offset);
|
if (next == NULL)
|
||||||
str += offset + pat_len;
|
break;
|
||||||
len -= offset + pat_len;
|
*next = to_c;
|
||||||
|
start = next+1;
|
||||||
/* copy substitute into the output string */
|
|
||||||
new_s += offset;
|
|
||||||
memcpy(new_s, sub, sub_len);
|
|
||||||
new_s += sub_len;
|
|
||||||
}
|
|
||||||
/* copy any remaining values into output string */
|
|
||||||
if (len > 0)
|
|
||||||
memcpy(new_s, str, len);
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
for (;;++str, --len) {
|
|
||||||
memcpy(new_s, sub, sub_len);
|
|
||||||
new_s += sub_len;
|
|
||||||
if (--nfound <= 0) {
|
|
||||||
memcpy(new_s, str, len);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
*new_s++ = *str;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
*out_len = new_len;
|
|
||||||
return out_s;
|
return result;
|
||||||
|
|
||||||
return_same:
|
|
||||||
*out_len = -1;
|
|
||||||
return (char *)str; /* cast away const */
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
|
||||||
|
static PyStringObject *
|
||||||
|
replace_substring_in_place(PyStringObject *self,
|
||||||
|
PyStringObject *from,
|
||||||
|
PyStringObject *to,
|
||||||
|
Py_ssize_t maxcount)
|
||||||
|
{
|
||||||
|
char *result_s, *start, *end;
|
||||||
|
char *self_s, *from_s, *to_s;
|
||||||
|
Py_ssize_t self_len, from_len, offset;
|
||||||
|
PyStringObject *result;
|
||||||
|
|
||||||
|
/* The result string will be the same size */
|
||||||
|
|
||||||
|
self_s = PyString_AS_STRING(self);
|
||||||
|
self_len = PyString_GET_SIZE(self);
|
||||||
|
|
||||||
|
from_s = PyString_AS_STRING(from);
|
||||||
|
from_len = PyString_GET_SIZE(from);
|
||||||
|
to_s = PyString_AS_STRING(to);
|
||||||
|
|
||||||
|
offset = findstring(self_s, self_len,
|
||||||
|
from_s, from_len,
|
||||||
|
0, self_len, FORWARD);
|
||||||
|
|
||||||
|
if (offset == -1) {
|
||||||
|
/* No matches; return the original string */
|
||||||
|
return return_self(self);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Need to make a new string */
|
||||||
|
result = (PyStringObject *) PyString_FromStringAndSize(self_s, self_len);
|
||||||
|
if (result == NULL)
|
||||||
|
return NULL;
|
||||||
|
result_s = PyString_AS_STRING(result);
|
||||||
|
|
||||||
|
/* change everything in-place, starting with this one */
|
||||||
|
start = result_s + offset;
|
||||||
|
memcpy(start, to_s, from_len);
|
||||||
|
start += from_len;
|
||||||
|
end = result_s + self_len;
|
||||||
|
|
||||||
|
while ( --maxcount > 0) {
|
||||||
|
offset = findstring(start, end-start,
|
||||||
|
from_s, from_len,
|
||||||
|
0, end-start, FORWARD);
|
||||||
|
if (offset==-1)
|
||||||
|
break;
|
||||||
|
memcpy(start+offset, to_s, from_len);
|
||||||
|
start += offset+from_len;
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
|
||||||
|
static PyStringObject *
|
||||||
|
replace_single_character(PyStringObject *self,
|
||||||
|
char from_c,
|
||||||
|
PyStringObject *to,
|
||||||
|
Py_ssize_t maxcount)
|
||||||
|
{
|
||||||
|
char *self_s, *to_s, *result_s;
|
||||||
|
char *start, *next, *end;
|
||||||
|
Py_ssize_t self_len, to_len, result_len;
|
||||||
|
Py_ssize_t count, product;
|
||||||
|
PyStringObject *result;
|
||||||
|
|
||||||
|
self_s = PyString_AS_STRING(self);
|
||||||
|
self_len = PyString_GET_SIZE(self);
|
||||||
|
|
||||||
|
count = countchar(self_s, self_len, from_c);
|
||||||
|
if (count > maxcount)
|
||||||
|
count = maxcount;
|
||||||
|
|
||||||
|
if (count == 0) {
|
||||||
|
/* no matches, return unchanged */
|
||||||
|
return return_self(self);
|
||||||
|
}
|
||||||
|
|
||||||
|
to_s = PyString_AS_STRING(to);
|
||||||
|
to_len = PyString_GET_SIZE(to);
|
||||||
|
|
||||||
|
/* use the difference between current and new, hence the "-1" */
|
||||||
|
/* result_len = self_len + count * (to_len-1) */
|
||||||
|
product = count * (to_len-1);
|
||||||
|
if (product / (to_len-1) != count) {
|
||||||
|
PyErr_SetString(PyExc_OverflowError, "replace string is too long");
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
result_len = self_len + product;
|
||||||
|
if (result_len < 0) {
|
||||||
|
PyErr_SetString(PyExc_OverflowError, "replace string is too long");
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( (result = (PyStringObject *)
|
||||||
|
PyString_FromStringAndSize(NULL, result_len)) == NULL)
|
||||||
|
return NULL;
|
||||||
|
result_s = PyString_AS_STRING(result);
|
||||||
|
|
||||||
|
start = self_s;
|
||||||
|
end = self_s + self_len;
|
||||||
|
while (count-- > 0) {
|
||||||
|
next = findchar(start, end-start, from_c);
|
||||||
|
if (next == NULL)
|
||||||
|
break;
|
||||||
|
|
||||||
|
if (next == start) {
|
||||||
|
/* replace with the 'to' */
|
||||||
|
memcpy(result_s, to_s, to_len);
|
||||||
|
result_s += to_len;
|
||||||
|
start += 1;
|
||||||
|
} else {
|
||||||
|
/* copy the unchanged old then the 'to' */
|
||||||
|
memcpy(result_s, start, next-start);
|
||||||
|
result_s += (next-start);
|
||||||
|
memcpy(result_s, to_s, to_len);
|
||||||
|
result_s += to_len;
|
||||||
|
start = next+1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/* Copy the remainder of the remaining string */
|
||||||
|
memcpy(result_s, start, end-start);
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
|
||||||
|
static PyStringObject *
|
||||||
|
replace_substring(PyStringObject *self,
|
||||||
|
PyStringObject *from,
|
||||||
|
PyStringObject *to,
|
||||||
|
Py_ssize_t maxcount) {
|
||||||
|
char *self_s, *from_s, *to_s, *result_s;
|
||||||
|
char *start, *next, *end;
|
||||||
|
Py_ssize_t self_len, from_len, to_len, result_len;
|
||||||
|
Py_ssize_t count, offset, product;
|
||||||
|
PyStringObject *result;
|
||||||
|
|
||||||
|
self_s = PyString_AS_STRING(self);
|
||||||
|
self_len = PyString_GET_SIZE(self);
|
||||||
|
from_s = PyString_AS_STRING(from);
|
||||||
|
from_len = PyString_GET_SIZE(from);
|
||||||
|
|
||||||
|
count = countstring(self_s, self_len,
|
||||||
|
from_s, from_len,
|
||||||
|
0, self_len, FORWARD);
|
||||||
|
if (count > maxcount)
|
||||||
|
count = maxcount;
|
||||||
|
|
||||||
|
if (count == 0) {
|
||||||
|
/* no matches, return unchanged */
|
||||||
|
return return_self(self);
|
||||||
|
}
|
||||||
|
|
||||||
|
to_s = PyString_AS_STRING(to);
|
||||||
|
to_len = PyString_GET_SIZE(to);
|
||||||
|
|
||||||
|
/* Check for overflow */
|
||||||
|
/* result_len = self_len + count * (to_len-from_len) */
|
||||||
|
product = count * (to_len-from_len);
|
||||||
|
if (product / (to_len-from_len) != count) {
|
||||||
|
PyErr_SetString(PyExc_OverflowError, "replace string is too long");
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
result_len = self_len + product;
|
||||||
|
if (result_len < 0) {
|
||||||
|
PyErr_SetString(PyExc_OverflowError, "replace string is too long");
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( (result = (PyStringObject *)
|
||||||
|
PyString_FromStringAndSize(NULL, result_len)) == NULL)
|
||||||
|
return NULL;
|
||||||
|
result_s = PyString_AS_STRING(result);
|
||||||
|
|
||||||
|
start = self_s;
|
||||||
|
end = self_s + self_len;
|
||||||
|
while (count-- > 0) {
|
||||||
|
offset = findstring(start, end-start,
|
||||||
|
from_s, from_len,
|
||||||
|
0, end-start, FORWARD);
|
||||||
|
if (offset == -1)
|
||||||
|
break;
|
||||||
|
next = start+offset;
|
||||||
|
if (next == start) {
|
||||||
|
/* replace with the 'to' */
|
||||||
|
memcpy(result_s, to_s, to_len);
|
||||||
|
result_s += to_len;
|
||||||
|
start += from_len;
|
||||||
|
} else {
|
||||||
|
/* copy the unchanged old then the 'to' */
|
||||||
|
memcpy(result_s, start, next-start);
|
||||||
|
result_s += (next-start);
|
||||||
|
memcpy(result_s, to_s, to_len);
|
||||||
|
result_s += to_len;
|
||||||
|
start = next+from_len;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/* Copy the remainder of the remaining string */
|
||||||
|
memcpy(result_s, start, end-start);
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static PyStringObject *
|
||||||
|
replace(PyStringObject *self,
|
||||||
|
PyStringObject *from,
|
||||||
|
PyStringObject *to,
|
||||||
|
Py_ssize_t maxcount)
|
||||||
|
{
|
||||||
|
Py_ssize_t from_len, to_len;
|
||||||
|
|
||||||
|
if (maxcount < 0) {
|
||||||
|
maxcount = PY_SSIZE_T_MAX;
|
||||||
|
} else if (maxcount == 0 || PyString_GET_SIZE(self) == 0) {
|
||||||
|
/* nothing to do; return the original string */
|
||||||
|
return return_self(self);
|
||||||
|
}
|
||||||
|
|
||||||
|
from_len = PyString_GET_SIZE(from);
|
||||||
|
to_len = PyString_GET_SIZE(to);
|
||||||
|
|
||||||
|
if (maxcount == 0 ||
|
||||||
|
(from_len == 0 && to_len == 0)) {
|
||||||
|
/* nothing to do; return the original string */
|
||||||
|
return return_self(self);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Handle zero-length special cases */
|
||||||
|
|
||||||
|
if (from_len == 0) {
|
||||||
|
/* insert the 'to' string everywhere. */
|
||||||
|
/* >>> "Python".replace("", ".") */
|
||||||
|
/* '.P.y.t.h.o.n.' */
|
||||||
|
return replace_interleave(self, to, maxcount);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Except for "".replace("", "A") == "A" there is no way beyond this */
|
||||||
|
/* point for an empty self string to generate a non-empty string */
|
||||||
|
/* Special case so the remaining code always gets a non-empty string */
|
||||||
|
if (PyString_GET_SIZE(self) == 0) {
|
||||||
|
return return_self(self);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (to_len == 0) {
|
||||||
|
/* delete all occurances of 'from' string */
|
||||||
|
if (from_len == 1) {
|
||||||
|
return replace_delete_single_character(
|
||||||
|
self, PyString_AS_STRING(from)[0], maxcount);
|
||||||
|
} else {
|
||||||
|
return replace_delete_substring(self, from, maxcount);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Handle special case where both strings have the same length */
|
||||||
|
|
||||||
|
if (from_len == to_len) {
|
||||||
|
if (from_len == 1) {
|
||||||
|
return replace_single_character_in_place(
|
||||||
|
self,
|
||||||
|
PyString_AS_STRING(from)[0],
|
||||||
|
PyString_AS_STRING(to)[0],
|
||||||
|
maxcount);
|
||||||
|
} else {
|
||||||
|
return replace_substring_in_place(
|
||||||
|
self, from, to, maxcount);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Otherwise use the more generic algorithms */
|
||||||
|
if (from_len == 1) {
|
||||||
|
return replace_single_character(self, PyString_AS_STRING(from)[0],
|
||||||
|
to, maxcount);
|
||||||
|
} else {
|
||||||
|
/* len('from')>=2, len('to')>=1 */
|
||||||
|
return replace_substring(self, from, to, maxcount);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
PyDoc_STRVAR(replace__doc__,
|
PyDoc_STRVAR(replace__doc__,
|
||||||
"S.replace (old, new[, count]) -> string\n\
|
"S.replace (old, new[, count]) -> string\n\
|
||||||
|
@ -2558,67 +3006,42 @@ given, only the first count occurrences are replaced.");
|
||||||
static PyObject *
|
static PyObject *
|
||||||
string_replace(PyStringObject *self, PyObject *args)
|
string_replace(PyStringObject *self, PyObject *args)
|
||||||
{
|
{
|
||||||
const char *str = PyString_AS_STRING(self), *sub, *repl;
|
|
||||||
char *new_s;
|
|
||||||
const Py_ssize_t len = PyString_GET_SIZE(self);
|
|
||||||
Py_ssize_t sub_len, repl_len, out_len;
|
|
||||||
Py_ssize_t count = -1;
|
Py_ssize_t count = -1;
|
||||||
PyObject *newobj;
|
PyObject *from, *to;
|
||||||
PyObject *subobj, *replobj;
|
char *tmp_s;
|
||||||
|
Py_ssize_t tmp_len;
|
||||||
|
|
||||||
if (!PyArg_ParseTuple(args, "OO|n:replace",
|
if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
|
||||||
&subobj, &replobj, &count))
|
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
if (PyString_Check(subobj)) {
|
if (PyString_Check(from)) {
|
||||||
sub = PyString_AS_STRING(subobj);
|
/* Can this be made a '!check' after the Unicode check? */
|
||||||
sub_len = PyString_GET_SIZE(subobj);
|
|
||||||
}
|
}
|
||||||
#ifdef Py_USING_UNICODE
|
#ifdef Py_USING_UNICODE
|
||||||
else if (PyUnicode_Check(subobj))
|
if (PyUnicode_Check(from))
|
||||||
return PyUnicode_Replace((PyObject *)self,
|
return PyUnicode_Replace((PyObject *)self,
|
||||||
subobj, replobj, count);
|
from, to, count);
|
||||||
#endif
|
#endif
|
||||||
else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
|
else if (PyObject_AsCharBuffer(from, &tmp_s, &tmp_len))
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
if (PyString_Check(replobj)) {
|
if (PyString_Check(to)) {
|
||||||
repl = PyString_AS_STRING(replobj);
|
/* Can this be made a '!check' after the Unicode check? */
|
||||||
repl_len = PyString_GET_SIZE(replobj);
|
|
||||||
}
|
}
|
||||||
#ifdef Py_USING_UNICODE
|
#ifdef Py_USING_UNICODE
|
||||||
else if (PyUnicode_Check(replobj))
|
else if (PyUnicode_Check(to))
|
||||||
return PyUnicode_Replace((PyObject *)self,
|
return PyUnicode_Replace((PyObject *)self,
|
||||||
subobj, replobj, count);
|
from, to, count);
|
||||||
#endif
|
#endif
|
||||||
else if (PyObject_AsCharBuffer(replobj, &repl, &repl_len))
|
else if (PyObject_AsCharBuffer(to, &tmp_s, &tmp_len))
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
new_s = mymemreplace(str,len,sub,sub_len,repl,repl_len,count,&out_len);
|
return (PyObject *)replace((PyStringObject *) self,
|
||||||
if (new_s == NULL) {
|
(PyStringObject *) from,
|
||||||
if (!PyErr_Occurred())
|
(PyStringObject *) to, count);
|
||||||
PyErr_NoMemory();
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
if (out_len == -1) {
|
|
||||||
if (PyString_CheckExact(self)) {
|
|
||||||
/* we're returning another reference to self */
|
|
||||||
newobj = (PyObject*)self;
|
|
||||||
Py_INCREF(newobj);
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
newobj = PyString_FromStringAndSize(str, len);
|
|
||||||
if (newobj == NULL)
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
newobj = PyString_FromStringAndSize(new_s, out_len);
|
|
||||||
PyMem_FREE(new_s);
|
|
||||||
}
|
|
||||||
return newobj;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** End DALKE **/
|
||||||
|
|
||||||
PyDoc_STRVAR(startswith__doc__,
|
PyDoc_STRVAR(startswith__doc__,
|
||||||
"S.startswith(prefix[, start[, end]]) -> bool\n\
|
"S.startswith(prefix[, start[, end]]) -> bool\n\
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue