mirror of
https://github.com/python/cpython.git
synced 2025-07-31 23:23:11 +00:00
needforspeed: new replace implementation by Andrew Dalke. replace is
now about 3x faster on my machine, for the replace tests from string- bench.
This commit is contained in:
parent
0c71f88fc9
commit
e68955cf32
1 changed files with 612 additions and 189 deletions
|
@ -2379,174 +2379,622 @@ string_translate(PyStringObject *self, PyObject *args)
|
|||
}
|
||||
|
||||
|
||||
/* What follows is used for implementing replace(). Perry Stoll. */
|
||||
#define FORWARD 1
|
||||
#define REVERSE -1
|
||||
|
||||
/*
|
||||
mymemfind
|
||||
/* find and count characters and substrings */
|
||||
|
||||
strstr replacement for arbitrary blocks of memory.
|
||||
/* Don't call if length < 2 */
|
||||
#define Py_STRING_MATCH(target, offset, pattern, length) \
|
||||
(target[offset] == pattern[0] && \
|
||||
target[offset+length-1] == pattern[length-1] && \
|
||||
!memcmp(target+offset+1, pattern+1, length-2) )
|
||||
|
||||
Locates the first occurrence in the memory pointed to by MEM of the
|
||||
contents of memory pointed to by PAT. Returns the index into MEM if
|
||||
found, or -1 if not found. If len of PAT is greater than length of
|
||||
MEM, the function returns -1.
|
||||
*/
|
||||
static Py_ssize_t
|
||||
mymemfind(const char *mem, Py_ssize_t len, const char *pat, Py_ssize_t pat_len)
|
||||
#define findchar(target, target_len, c) \
|
||||
((char *)memchr((const void *)(target), c, target_len))
|
||||
|
||||
/* String ops must return a string. */
|
||||
/* If the object is subclass of string, create a copy */
|
||||
static PyStringObject *
|
||||
return_self(PyStringObject *self)
|
||||
{
|
||||
register Py_ssize_t ii;
|
||||
if (PyString_CheckExact(self)) {
|
||||
Py_INCREF(self);
|
||||
return self;
|
||||
}
|
||||
return (PyStringObject *)PyString_FromStringAndSize(
|
||||
PyString_AS_STRING(self),
|
||||
PyString_GET_SIZE(self));
|
||||
}
|
||||
|
||||
/* pattern can not occur in the last pat_len-1 chars */
|
||||
len -= pat_len;
|
||||
static Py_ssize_t
|
||||
countchar(char *target, int target_len, char c)
|
||||
{
|
||||
Py_ssize_t count=0;
|
||||
char *start=target;
|
||||
char *end=target+target_len;
|
||||
|
||||
for (ii = 0; ii <= len; ii++) {
|
||||
if (mem[ii] == pat[0] && memcmp(&mem[ii], pat, pat_len) == 0) {
|
||||
return ii;
|
||||
}
|
||||
while ( (start=findchar(start, end-start, c)) != NULL ) {
|
||||
count++;
|
||||
start += 1;
|
||||
}
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
static Py_ssize_t
|
||||
findstring(char *target, Py_ssize_t target_len,
|
||||
char *pattern, Py_ssize_t pattern_len,
|
||||
Py_ssize_t start,
|
||||
Py_ssize_t end,
|
||||
int direction)
|
||||
{
|
||||
if (start < 0) {
|
||||
start += target_len;
|
||||
if (start < 0)
|
||||
start = 0;
|
||||
}
|
||||
if (end > target_len) {
|
||||
end = target_len;
|
||||
} else if (end < 0) {
|
||||
end += target_len;
|
||||
if (end < 0)
|
||||
end = 0;
|
||||
}
|
||||
|
||||
/* zero-length substrings always match at the first attempt */
|
||||
if (pattern_len == 0)
|
||||
return (direction > 0) ? start : end;
|
||||
|
||||
end -= pattern_len;
|
||||
|
||||
if (direction < 0) {
|
||||
for (; end >= start; end--)
|
||||
if (Py_STRING_MATCH(target, end, pattern, pattern_len))
|
||||
return end;
|
||||
} else {
|
||||
for (; start <= end; start++)
|
||||
if (Py_STRING_MATCH(target, start, pattern, pattern_len))
|
||||
return start;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
/*
|
||||
mymemcnt
|
||||
|
||||
Return the number of distinct times PAT is found in MEM.
|
||||
meaning mem=1111 and pat==11 returns 2.
|
||||
mem=11111 and pat==11 also return 2.
|
||||
*/
|
||||
static Py_ssize_t
|
||||
mymemcnt(const char *mem, Py_ssize_t len, const char *pat, Py_ssize_t pat_len)
|
||||
Py_ssize_t
|
||||
countstring(char *target, Py_ssize_t target_len,
|
||||
char *pattern, Py_ssize_t pattern_len,
|
||||
Py_ssize_t start,
|
||||
Py_ssize_t end,
|
||||
int direction)
|
||||
{
|
||||
register Py_ssize_t offset = 0;
|
||||
Py_ssize_t nfound = 0;
|
||||
Py_ssize_t count=0;
|
||||
|
||||
while (len >= 0) {
|
||||
offset = mymemfind(mem, len, pat, pat_len);
|
||||
if (start < 0) {
|
||||
start += target_len;
|
||||
if (start < 0)
|
||||
start = 0;
|
||||
}
|
||||
if (end > target_len) {
|
||||
end = target_len;
|
||||
} else if (end < 0) {
|
||||
end += target_len;
|
||||
if (end < 0)
|
||||
end = 0;
|
||||
}
|
||||
|
||||
/* zero-length substrings match everywhere */
|
||||
if (pattern_len == 0)
|
||||
return target_len+1;
|
||||
|
||||
end -= pattern_len;
|
||||
|
||||
if (direction < 0) {
|
||||
for (; end >= start; end--)
|
||||
if (Py_STRING_MATCH(target, end, pattern, pattern_len)) {
|
||||
count++;
|
||||
end -= pattern_len-1;
|
||||
}
|
||||
} else {
|
||||
for (; start <= end; start++)
|
||||
if (Py_STRING_MATCH(target, start, pattern, pattern_len)) {
|
||||
count++;
|
||||
start += pattern_len-1;
|
||||
}
|
||||
}
|
||||
return count;
|
||||
}
|
||||
|
||||
|
||||
/* Algorithms for difference cases of string replacement */
|
||||
|
||||
/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
|
||||
static PyStringObject *
|
||||
replace_interleave(PyStringObject *self,
|
||||
PyStringObject *to,
|
||||
Py_ssize_t maxcount)
|
||||
{
|
||||
char *self_s, *to_s, *result_s;
|
||||
Py_ssize_t self_len, to_len, result_len;
|
||||
Py_ssize_t count, i, product;
|
||||
PyStringObject *result;
|
||||
|
||||
self_len = PyString_GET_SIZE(self);
|
||||
to_len = PyString_GET_SIZE(to);
|
||||
|
||||
/* 1 at the end plus 1 after every character */
|
||||
count = self_len+1;
|
||||
if (maxcount < count)
|
||||
count = maxcount;
|
||||
|
||||
/* Check for overflow */
|
||||
/* result_len = count * to_len + self_len; */
|
||||
product = count * to_len;
|
||||
if (product / to_len != count) {
|
||||
PyErr_SetString(PyExc_OverflowError,
|
||||
"replace string is too long");
|
||||
return NULL;
|
||||
}
|
||||
result_len = product + self_len;
|
||||
if (result_len < 0) {
|
||||
PyErr_SetString(PyExc_OverflowError,
|
||||
"replace string is too long");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (! (result = (PyStringObject *)
|
||||
PyString_FromStringAndSize(NULL, result_len)) )
|
||||
return NULL;
|
||||
|
||||
self_s = PyString_AS_STRING(self);
|
||||
to_s = PyString_AS_STRING(to);
|
||||
to_len = PyString_GET_SIZE(to);
|
||||
result_s = PyString_AS_STRING(result);
|
||||
|
||||
/* TODO: special case single character, which doesn't need memcpy */
|
||||
|
||||
/* Lay the first one down (guaranteed this will occur) */
|
||||
memcpy(result_s, to_s, to_len);
|
||||
result_s += to_len;
|
||||
count -= 1;
|
||||
|
||||
for (i=0; i<count; i++) {
|
||||
*result_s++ = *self_s++;
|
||||
memcpy(result_s, to_s, to_len);
|
||||
result_s += to_len;
|
||||
}
|
||||
|
||||
/* Copy the rest of the original string */
|
||||
memcpy(result_s, self_s, self_len-i);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/* Special case for deleting a single character */
|
||||
/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
|
||||
static PyStringObject *
|
||||
replace_delete_single_character(PyStringObject *self,
|
||||
char from_c, Py_ssize_t maxcount)
|
||||
{
|
||||
char *self_s, *result_s;
|
||||
char *start, *next, *end;
|
||||
Py_ssize_t self_len, result_len;
|
||||
Py_ssize_t count;
|
||||
PyStringObject *result;
|
||||
|
||||
self_len = PyString_GET_SIZE(self);
|
||||
self_s = PyString_AS_STRING(self);
|
||||
|
||||
count = countchar(self_s, self_len, from_c);
|
||||
if (count == 0) {
|
||||
return return_self(self);
|
||||
}
|
||||
if (count > maxcount)
|
||||
count = maxcount;
|
||||
|
||||
result_len = self_len - count; /* from_len == 1 */
|
||||
assert(result_len>=0);
|
||||
|
||||
if ( (result = (PyStringObject *)
|
||||
PyString_FromStringAndSize(NULL, result_len)) == NULL)
|
||||
return NULL;
|
||||
result_s = PyString_AS_STRING(result);
|
||||
|
||||
start = self_s;
|
||||
end = self_s + self_len;
|
||||
while (count-- > 0) {
|
||||
next = findchar(start, end-start, from_c);
|
||||
if (next == NULL)
|
||||
break;
|
||||
memcpy(result_s, start, next-start);
|
||||
result_s += (next-start);
|
||||
start = next+1;
|
||||
}
|
||||
memcpy(result_s, start, end-start);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
|
||||
|
||||
static PyStringObject *
|
||||
replace_delete_substring(PyStringObject *self, PyStringObject *from,
|
||||
Py_ssize_t maxcount) {
|
||||
char *self_s, *from_s, *result_s;
|
||||
char *start, *next, *end;
|
||||
Py_ssize_t self_len, from_len, result_len;
|
||||
Py_ssize_t count, offset;
|
||||
PyStringObject *result;
|
||||
|
||||
self_len = PyString_GET_SIZE(self);
|
||||
self_s = PyString_AS_STRING(self);
|
||||
from_len = PyString_GET_SIZE(from);
|
||||
from_s = PyString_AS_STRING(from);
|
||||
|
||||
count = countstring(self_s, self_len,
|
||||
from_s, from_len,
|
||||
0, self_len, 1);
|
||||
|
||||
if (count > maxcount)
|
||||
count = maxcount;
|
||||
|
||||
if (count == 0) {
|
||||
/* no matches */
|
||||
return return_self(self);
|
||||
}
|
||||
|
||||
result_len = self_len - (count * from_len);
|
||||
assert (result_len>=0);
|
||||
|
||||
if ( (result = (PyStringObject *)
|
||||
PyString_FromStringAndSize(NULL, result_len)) == NULL )
|
||||
return NULL;
|
||||
|
||||
result_s = PyString_AS_STRING(result);
|
||||
|
||||
start = self_s;
|
||||
end = self_s + self_len;
|
||||
while (count-- > 0) {
|
||||
offset = findstring(start, end-start,
|
||||
from_s, from_len,
|
||||
0, end-start, FORWARD);
|
||||
if (offset == -1)
|
||||
break;
|
||||
mem += offset + pat_len;
|
||||
len -= offset + pat_len;
|
||||
nfound++;
|
||||
next = start + offset;
|
||||
|
||||
memcpy(result_s, start, next-start);
|
||||
|
||||
result_s += (next-start);
|
||||
start = next+from_len;
|
||||
}
|
||||
return nfound;
|
||||
memcpy(result_s, start, end-start);
|
||||
return result;
|
||||
}
|
||||
|
||||
/*
|
||||
mymemreplace
|
||||
|
||||
Return a string in which all occurrences of PAT in memory STR are
|
||||
replaced with SUB.
|
||||
|
||||
If length of PAT is less than length of STR or there are no occurrences
|
||||
of PAT in STR, then the original string is returned. Otherwise, a new
|
||||
string is allocated here and returned.
|
||||
|
||||
on return, out_len is:
|
||||
the length of output string, or
|
||||
-1 if the input string is returned, or
|
||||
unchanged if an error occurs (no memory).
|
||||
|
||||
return value is:
|
||||
the new string allocated locally, or
|
||||
NULL if an error occurred.
|
||||
*/
|
||||
static char *
|
||||
mymemreplace(const char *str, Py_ssize_t len, /* input string */
|
||||
const char *pat, Py_ssize_t pat_len, /* pattern string to find */
|
||||
const char *sub, Py_ssize_t sub_len, /* substitution string */
|
||||
Py_ssize_t count, /* number of replacements */
|
||||
Py_ssize_t *out_len)
|
||||
/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
|
||||
static PyStringObject *
|
||||
replace_single_character_in_place(PyStringObject *self,
|
||||
char from_c, char to_c,
|
||||
Py_ssize_t maxcount)
|
||||
{
|
||||
char *out_s;
|
||||
char *new_s;
|
||||
Py_ssize_t nfound, offset, new_len;
|
||||
Py_ssize_t product, delta;
|
||||
|
||||
if (len == 0 || (pat_len == 0 && sub_len == 0) || pat_len > len)
|
||||
goto return_same;
|
||||
|
||||
/* find length of output string */
|
||||
nfound = (pat_len > 0) ? mymemcnt(str, len, pat, pat_len) : len + 1;
|
||||
if (count < 0)
|
||||
count = PY_SSIZE_T_MAX;
|
||||
else if (nfound > count)
|
||||
nfound = count;
|
||||
if (nfound == 0)
|
||||
goto return_same;
|
||||
|
||||
delta = (sub_len - pat_len);
|
||||
if (delta == 0) {
|
||||
new_len = len;
|
||||
} else {
|
||||
product = nfound * (sub_len - pat_len);
|
||||
if ((product / (sub_len - pat_len)) != nfound) {
|
||||
PyErr_SetString(PyExc_OverflowError,
|
||||
"replace string is too long");
|
||||
return NULL;
|
||||
}
|
||||
new_len = len + product;
|
||||
if (new_len < 0) {
|
||||
PyErr_SetString(PyExc_OverflowError,
|
||||
"replace string is too long");
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
if (new_len == 0) {
|
||||
/* Have to allocate something for the caller to free(). */
|
||||
out_s = (char *)PyMem_MALLOC(1);
|
||||
if (out_s == NULL)
|
||||
return NULL;
|
||||
out_s[0] = '\0';
|
||||
char *self_s, *result_s, *start, *end, *next;
|
||||
Py_ssize_t self_len;
|
||||
PyStringObject *result;
|
||||
|
||||
/* The result string will be the same size */
|
||||
self_s = PyString_AS_STRING(self);
|
||||
self_len = PyString_GET_SIZE(self);
|
||||
|
||||
next = findchar(self_s, self_len, from_c);
|
||||
|
||||
if (next == NULL) {
|
||||
/* No matches; return the original string */
|
||||
return return_self(self);
|
||||
}
|
||||
else {
|
||||
assert(new_len > 0);
|
||||
new_s = (char *)PyMem_MALLOC(new_len);
|
||||
if (new_s == NULL)
|
||||
return NULL;
|
||||
out_s = new_s;
|
||||
|
||||
if (pat_len > 0) {
|
||||
for (; nfound > 0; --nfound) {
|
||||
/* find index of next instance of pattern */
|
||||
offset = mymemfind(str, len, pat, pat_len);
|
||||
if (offset == -1)
|
||||
break;
|
||||
|
||||
/* copy non matching part of input string */
|
||||
memcpy(new_s, str, offset);
|
||||
str += offset + pat_len;
|
||||
len -= offset + pat_len;
|
||||
|
||||
/* copy substitute into the output string */
|
||||
new_s += offset;
|
||||
memcpy(new_s, sub, sub_len);
|
||||
new_s += sub_len;
|
||||
}
|
||||
/* copy any remaining values into output string */
|
||||
if (len > 0)
|
||||
memcpy(new_s, str, len);
|
||||
}
|
||||
else {
|
||||
for (;;++str, --len) {
|
||||
memcpy(new_s, sub, sub_len);
|
||||
new_s += sub_len;
|
||||
if (--nfound <= 0) {
|
||||
memcpy(new_s, str, len);
|
||||
break;
|
||||
}
|
||||
*new_s++ = *str;
|
||||
}
|
||||
}
|
||||
|
||||
/* Need to make a new string */
|
||||
result = (PyStringObject *) PyString_FromStringAndSize(self_s, self_len);
|
||||
if (result == NULL)
|
||||
return NULL;
|
||||
result_s = PyString_AS_STRING(result);
|
||||
|
||||
/* change everything in-place, starting with this one */
|
||||
start = result_s + (next-self_s);
|
||||
*start = to_c;
|
||||
start++;
|
||||
end = result_s + self_len;
|
||||
|
||||
while (--maxcount > 0) {
|
||||
next = findchar(start, end-start, from_c);
|
||||
if (next == NULL)
|
||||
break;
|
||||
*next = to_c;
|
||||
start = next+1;
|
||||
}
|
||||
*out_len = new_len;
|
||||
return out_s;
|
||||
|
||||
return_same:
|
||||
*out_len = -1;
|
||||
return (char *)str; /* cast away const */
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
|
||||
static PyStringObject *
|
||||
replace_substring_in_place(PyStringObject *self,
|
||||
PyStringObject *from,
|
||||
PyStringObject *to,
|
||||
Py_ssize_t maxcount)
|
||||
{
|
||||
char *result_s, *start, *end;
|
||||
char *self_s, *from_s, *to_s;
|
||||
Py_ssize_t self_len, from_len, offset;
|
||||
PyStringObject *result;
|
||||
|
||||
/* The result string will be the same size */
|
||||
|
||||
self_s = PyString_AS_STRING(self);
|
||||
self_len = PyString_GET_SIZE(self);
|
||||
|
||||
from_s = PyString_AS_STRING(from);
|
||||
from_len = PyString_GET_SIZE(from);
|
||||
to_s = PyString_AS_STRING(to);
|
||||
|
||||
offset = findstring(self_s, self_len,
|
||||
from_s, from_len,
|
||||
0, self_len, FORWARD);
|
||||
|
||||
if (offset == -1) {
|
||||
/* No matches; return the original string */
|
||||
return return_self(self);
|
||||
}
|
||||
|
||||
/* Need to make a new string */
|
||||
result = (PyStringObject *) PyString_FromStringAndSize(self_s, self_len);
|
||||
if (result == NULL)
|
||||
return NULL;
|
||||
result_s = PyString_AS_STRING(result);
|
||||
|
||||
/* change everything in-place, starting with this one */
|
||||
start = result_s + offset;
|
||||
memcpy(start, to_s, from_len);
|
||||
start += from_len;
|
||||
end = result_s + self_len;
|
||||
|
||||
while ( --maxcount > 0) {
|
||||
offset = findstring(start, end-start,
|
||||
from_s, from_len,
|
||||
0, end-start, FORWARD);
|
||||
if (offset==-1)
|
||||
break;
|
||||
memcpy(start+offset, to_s, from_len);
|
||||
start += offset+from_len;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
|
||||
static PyStringObject *
|
||||
replace_single_character(PyStringObject *self,
|
||||
char from_c,
|
||||
PyStringObject *to,
|
||||
Py_ssize_t maxcount)
|
||||
{
|
||||
char *self_s, *to_s, *result_s;
|
||||
char *start, *next, *end;
|
||||
Py_ssize_t self_len, to_len, result_len;
|
||||
Py_ssize_t count, product;
|
||||
PyStringObject *result;
|
||||
|
||||
self_s = PyString_AS_STRING(self);
|
||||
self_len = PyString_GET_SIZE(self);
|
||||
|
||||
count = countchar(self_s, self_len, from_c);
|
||||
if (count > maxcount)
|
||||
count = maxcount;
|
||||
|
||||
if (count == 0) {
|
||||
/* no matches, return unchanged */
|
||||
return return_self(self);
|
||||
}
|
||||
|
||||
to_s = PyString_AS_STRING(to);
|
||||
to_len = PyString_GET_SIZE(to);
|
||||
|
||||
/* use the difference between current and new, hence the "-1" */
|
||||
/* result_len = self_len + count * (to_len-1) */
|
||||
product = count * (to_len-1);
|
||||
if (product / (to_len-1) != count) {
|
||||
PyErr_SetString(PyExc_OverflowError, "replace string is too long");
|
||||
return NULL;
|
||||
}
|
||||
result_len = self_len + product;
|
||||
if (result_len < 0) {
|
||||
PyErr_SetString(PyExc_OverflowError, "replace string is too long");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if ( (result = (PyStringObject *)
|
||||
PyString_FromStringAndSize(NULL, result_len)) == NULL)
|
||||
return NULL;
|
||||
result_s = PyString_AS_STRING(result);
|
||||
|
||||
start = self_s;
|
||||
end = self_s + self_len;
|
||||
while (count-- > 0) {
|
||||
next = findchar(start, end-start, from_c);
|
||||
if (next == NULL)
|
||||
break;
|
||||
|
||||
if (next == start) {
|
||||
/* replace with the 'to' */
|
||||
memcpy(result_s, to_s, to_len);
|
||||
result_s += to_len;
|
||||
start += 1;
|
||||
} else {
|
||||
/* copy the unchanged old then the 'to' */
|
||||
memcpy(result_s, start, next-start);
|
||||
result_s += (next-start);
|
||||
memcpy(result_s, to_s, to_len);
|
||||
result_s += to_len;
|
||||
start = next+1;
|
||||
}
|
||||
}
|
||||
/* Copy the remainder of the remaining string */
|
||||
memcpy(result_s, start, end-start);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
|
||||
static PyStringObject *
|
||||
replace_substring(PyStringObject *self,
|
||||
PyStringObject *from,
|
||||
PyStringObject *to,
|
||||
Py_ssize_t maxcount) {
|
||||
char *self_s, *from_s, *to_s, *result_s;
|
||||
char *start, *next, *end;
|
||||
Py_ssize_t self_len, from_len, to_len, result_len;
|
||||
Py_ssize_t count, offset, product;
|
||||
PyStringObject *result;
|
||||
|
||||
self_s = PyString_AS_STRING(self);
|
||||
self_len = PyString_GET_SIZE(self);
|
||||
from_s = PyString_AS_STRING(from);
|
||||
from_len = PyString_GET_SIZE(from);
|
||||
|
||||
count = countstring(self_s, self_len,
|
||||
from_s, from_len,
|
||||
0, self_len, FORWARD);
|
||||
if (count > maxcount)
|
||||
count = maxcount;
|
||||
|
||||
if (count == 0) {
|
||||
/* no matches, return unchanged */
|
||||
return return_self(self);
|
||||
}
|
||||
|
||||
to_s = PyString_AS_STRING(to);
|
||||
to_len = PyString_GET_SIZE(to);
|
||||
|
||||
/* Check for overflow */
|
||||
/* result_len = self_len + count * (to_len-from_len) */
|
||||
product = count * (to_len-from_len);
|
||||
if (product / (to_len-from_len) != count) {
|
||||
PyErr_SetString(PyExc_OverflowError, "replace string is too long");
|
||||
return NULL;
|
||||
}
|
||||
result_len = self_len + product;
|
||||
if (result_len < 0) {
|
||||
PyErr_SetString(PyExc_OverflowError, "replace string is too long");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if ( (result = (PyStringObject *)
|
||||
PyString_FromStringAndSize(NULL, result_len)) == NULL)
|
||||
return NULL;
|
||||
result_s = PyString_AS_STRING(result);
|
||||
|
||||
start = self_s;
|
||||
end = self_s + self_len;
|
||||
while (count-- > 0) {
|
||||
offset = findstring(start, end-start,
|
||||
from_s, from_len,
|
||||
0, end-start, FORWARD);
|
||||
if (offset == -1)
|
||||
break;
|
||||
next = start+offset;
|
||||
if (next == start) {
|
||||
/* replace with the 'to' */
|
||||
memcpy(result_s, to_s, to_len);
|
||||
result_s += to_len;
|
||||
start += from_len;
|
||||
} else {
|
||||
/* copy the unchanged old then the 'to' */
|
||||
memcpy(result_s, start, next-start);
|
||||
result_s += (next-start);
|
||||
memcpy(result_s, to_s, to_len);
|
||||
result_s += to_len;
|
||||
start = next+from_len;
|
||||
}
|
||||
}
|
||||
/* Copy the remainder of the remaining string */
|
||||
memcpy(result_s, start, end-start);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
static PyStringObject *
|
||||
replace(PyStringObject *self,
|
||||
PyStringObject *from,
|
||||
PyStringObject *to,
|
||||
Py_ssize_t maxcount)
|
||||
{
|
||||
Py_ssize_t from_len, to_len;
|
||||
|
||||
if (maxcount < 0) {
|
||||
maxcount = PY_SSIZE_T_MAX;
|
||||
} else if (maxcount == 0 || PyString_GET_SIZE(self) == 0) {
|
||||
/* nothing to do; return the original string */
|
||||
return return_self(self);
|
||||
}
|
||||
|
||||
from_len = PyString_GET_SIZE(from);
|
||||
to_len = PyString_GET_SIZE(to);
|
||||
|
||||
if (maxcount == 0 ||
|
||||
(from_len == 0 && to_len == 0)) {
|
||||
/* nothing to do; return the original string */
|
||||
return return_self(self);
|
||||
}
|
||||
|
||||
/* Handle zero-length special cases */
|
||||
|
||||
if (from_len == 0) {
|
||||
/* insert the 'to' string everywhere. */
|
||||
/* >>> "Python".replace("", ".") */
|
||||
/* '.P.y.t.h.o.n.' */
|
||||
return replace_interleave(self, to, maxcount);
|
||||
}
|
||||
|
||||
/* Except for "".replace("", "A") == "A" there is no way beyond this */
|
||||
/* point for an empty self string to generate a non-empty string */
|
||||
/* Special case so the remaining code always gets a non-empty string */
|
||||
if (PyString_GET_SIZE(self) == 0) {
|
||||
return return_self(self);
|
||||
}
|
||||
|
||||
if (to_len == 0) {
|
||||
/* delete all occurances of 'from' string */
|
||||
if (from_len == 1) {
|
||||
return replace_delete_single_character(
|
||||
self, PyString_AS_STRING(from)[0], maxcount);
|
||||
} else {
|
||||
return replace_delete_substring(self, from, maxcount);
|
||||
}
|
||||
}
|
||||
|
||||
/* Handle special case where both strings have the same length */
|
||||
|
||||
if (from_len == to_len) {
|
||||
if (from_len == 1) {
|
||||
return replace_single_character_in_place(
|
||||
self,
|
||||
PyString_AS_STRING(from)[0],
|
||||
PyString_AS_STRING(to)[0],
|
||||
maxcount);
|
||||
} else {
|
||||
return replace_substring_in_place(
|
||||
self, from, to, maxcount);
|
||||
}
|
||||
}
|
||||
|
||||
/* Otherwise use the more generic algorithms */
|
||||
if (from_len == 1) {
|
||||
return replace_single_character(self, PyString_AS_STRING(from)[0],
|
||||
to, maxcount);
|
||||
} else {
|
||||
/* len('from')>=2, len('to')>=1 */
|
||||
return replace_substring(self, from, to, maxcount);
|
||||
}
|
||||
}
|
||||
|
||||
PyDoc_STRVAR(replace__doc__,
|
||||
"S.replace (old, new[, count]) -> string\n\
|
||||
|
@ -2558,67 +3006,42 @@ given, only the first count occurrences are replaced.");
|
|||
static PyObject *
|
||||
string_replace(PyStringObject *self, PyObject *args)
|
||||
{
|
||||
const char *str = PyString_AS_STRING(self), *sub, *repl;
|
||||
char *new_s;
|
||||
const Py_ssize_t len = PyString_GET_SIZE(self);
|
||||
Py_ssize_t sub_len, repl_len, out_len;
|
||||
Py_ssize_t count = -1;
|
||||
PyObject *newobj;
|
||||
PyObject *subobj, *replobj;
|
||||
PyObject *from, *to;
|
||||
char *tmp_s;
|
||||
Py_ssize_t tmp_len;
|
||||
|
||||
if (!PyArg_ParseTuple(args, "OO|n:replace",
|
||||
&subobj, &replobj, &count))
|
||||
if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
|
||||
return NULL;
|
||||
|
||||
if (PyString_Check(subobj)) {
|
||||
sub = PyString_AS_STRING(subobj);
|
||||
sub_len = PyString_GET_SIZE(subobj);
|
||||
if (PyString_Check(from)) {
|
||||
/* Can this be made a '!check' after the Unicode check? */
|
||||
}
|
||||
#ifdef Py_USING_UNICODE
|
||||
else if (PyUnicode_Check(subobj))
|
||||
if (PyUnicode_Check(from))
|
||||
return PyUnicode_Replace((PyObject *)self,
|
||||
subobj, replobj, count);
|
||||
from, to, count);
|
||||
#endif
|
||||
else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
|
||||
else if (PyObject_AsCharBuffer(from, &tmp_s, &tmp_len))
|
||||
return NULL;
|
||||
|
||||
if (PyString_Check(replobj)) {
|
||||
repl = PyString_AS_STRING(replobj);
|
||||
repl_len = PyString_GET_SIZE(replobj);
|
||||
if (PyString_Check(to)) {
|
||||
/* Can this be made a '!check' after the Unicode check? */
|
||||
}
|
||||
#ifdef Py_USING_UNICODE
|
||||
else if (PyUnicode_Check(replobj))
|
||||
else if (PyUnicode_Check(to))
|
||||
return PyUnicode_Replace((PyObject *)self,
|
||||
subobj, replobj, count);
|
||||
from, to, count);
|
||||
#endif
|
||||
else if (PyObject_AsCharBuffer(replobj, &repl, &repl_len))
|
||||
else if (PyObject_AsCharBuffer(to, &tmp_s, &tmp_len))
|
||||
return NULL;
|
||||
|
||||
new_s = mymemreplace(str,len,sub,sub_len,repl,repl_len,count,&out_len);
|
||||
if (new_s == NULL) {
|
||||
if (!PyErr_Occurred())
|
||||
PyErr_NoMemory();
|
||||
return NULL;
|
||||
}
|
||||
if (out_len == -1) {
|
||||
if (PyString_CheckExact(self)) {
|
||||
/* we're returning another reference to self */
|
||||
newobj = (PyObject*)self;
|
||||
Py_INCREF(newobj);
|
||||
}
|
||||
else {
|
||||
newobj = PyString_FromStringAndSize(str, len);
|
||||
if (newobj == NULL)
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
else {
|
||||
newobj = PyString_FromStringAndSize(new_s, out_len);
|
||||
PyMem_FREE(new_s);
|
||||
}
|
||||
return newobj;
|
||||
return (PyObject *)replace((PyStringObject *) self,
|
||||
(PyStringObject *) from,
|
||||
(PyStringObject *) to, count);
|
||||
}
|
||||
|
||||
/** End DALKE **/
|
||||
|
||||
PyDoc_STRVAR(startswith__doc__,
|
||||
"S.startswith(prefix[, start[, end]]) -> bool\n\
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue