mirror of
https://github.com/python/cpython.git
synced 2025-09-01 22:47:59 +00:00
Issue #24821: Refactor STRINGLIB(fastsearch_memchr_1char) and split it on
STRINGLIB(find_char) and STRINGLIB(rfind_char) that can be used independedly without special preconditions.
This commit is contained in:
parent
0304729ec4
commit
413fdcea21
4 changed files with 124 additions and 103 deletions
|
@ -1159,16 +1159,15 @@ bytearray_find_internal(PyByteArrayObject *self, PyObject *args, int dir)
|
||||||
ADJUST_INDICES(start, end, len);
|
ADJUST_INDICES(start, end, len);
|
||||||
if (end - start < sub_len)
|
if (end - start < sub_len)
|
||||||
res = -1;
|
res = -1;
|
||||||
else if (sub_len == 1
|
else if (sub_len == 1) {
|
||||||
#ifndef HAVE_MEMRCHR
|
if (dir > 0)
|
||||||
&& dir > 0
|
res = stringlib_find_char(
|
||||||
#endif
|
PyByteArray_AS_STRING(self) + start, end - start,
|
||||||
) {
|
*sub);
|
||||||
unsigned char needle = *sub;
|
else
|
||||||
int mode = (dir > 0) ? FAST_SEARCH : FAST_RSEARCH;
|
res = stringlib_rfind_char(
|
||||||
res = stringlib_fastsearch_memchr_1char(
|
PyByteArray_AS_STRING(self) + start, end - start,
|
||||||
PyByteArray_AS_STRING(self) + start, end - start,
|
*sub);
|
||||||
needle, needle, mode);
|
|
||||||
if (res >= 0)
|
if (res >= 0)
|
||||||
res += start;
|
res += start;
|
||||||
}
|
}
|
||||||
|
|
|
@ -1937,16 +1937,15 @@ bytes_find_internal(PyBytesObject *self, PyObject *args, int dir)
|
||||||
ADJUST_INDICES(start, end, len);
|
ADJUST_INDICES(start, end, len);
|
||||||
if (end - start < sub_len)
|
if (end - start < sub_len)
|
||||||
res = -1;
|
res = -1;
|
||||||
else if (sub_len == 1
|
else if (sub_len == 1) {
|
||||||
#ifndef HAVE_MEMRCHR
|
if (dir > 0)
|
||||||
&& dir > 0
|
res = stringlib_find_char(
|
||||||
#endif
|
PyBytes_AS_STRING(self) + start, end - start,
|
||||||
) {
|
*sub);
|
||||||
unsigned char needle = *sub;
|
else
|
||||||
int mode = (dir > 0) ? FAST_SEARCH : FAST_RSEARCH;
|
res = stringlib_rfind_char(
|
||||||
res = stringlib_fastsearch_memchr_1char(
|
PyBytes_AS_STRING(self) + start, end - start,
|
||||||
PyBytes_AS_STRING(self) + start, end - start,
|
*sub);
|
||||||
needle, needle, mode);
|
|
||||||
if (res >= 0)
|
if (res >= 0)
|
||||||
res += start;
|
res += start;
|
||||||
}
|
}
|
||||||
|
|
|
@ -32,52 +32,98 @@
|
||||||
#define STRINGLIB_BLOOM(mask, ch) \
|
#define STRINGLIB_BLOOM(mask, ch) \
|
||||||
((mask & (1UL << ((ch) & (STRINGLIB_BLOOM_WIDTH -1)))))
|
((mask & (1UL << ((ch) & (STRINGLIB_BLOOM_WIDTH -1)))))
|
||||||
|
|
||||||
|
Py_LOCAL_INLINE(Py_ssize_t)
|
||||||
|
STRINGLIB(find_char)(const STRINGLIB_CHAR* s, Py_ssize_t n, STRINGLIB_CHAR ch)
|
||||||
|
{
|
||||||
|
const STRINGLIB_CHAR *p, *e;
|
||||||
|
|
||||||
|
p = s;
|
||||||
|
e = s + n;
|
||||||
|
if (n > 10) {
|
||||||
|
#if STRINGLIB_SIZEOF_CHAR == 1
|
||||||
|
p = memchr(s, ch, n);
|
||||||
|
if (p != NULL)
|
||||||
|
return (p - s);
|
||||||
|
return -1;
|
||||||
|
#else
|
||||||
|
/* use memchr if we can choose a needle without two many likely
|
||||||
|
false positives */
|
||||||
|
unsigned char needle = ch & 0xff;
|
||||||
|
/* If looking for a multiple of 256, we'd have too
|
||||||
|
many false positives looking for the '\0' byte in UCS2
|
||||||
|
and UCS4 representations. */
|
||||||
|
if (needle != 0) {
|
||||||
|
while (p < e) {
|
||||||
|
void *candidate = memchr(p, needle,
|
||||||
|
(e - p) * sizeof(STRINGLIB_CHAR));
|
||||||
|
if (candidate == NULL)
|
||||||
|
return -1;
|
||||||
|
p = (const STRINGLIB_CHAR *)
|
||||||
|
_Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
|
||||||
|
if (*p == ch)
|
||||||
|
return (p - s);
|
||||||
|
/* False positive */
|
||||||
|
p++;
|
||||||
|
}
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
while (p < e) {
|
||||||
|
if (*p == ch)
|
||||||
|
return (p - s);
|
||||||
|
p++;
|
||||||
|
}
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
Py_LOCAL_INLINE(Py_ssize_t)
|
Py_LOCAL_INLINE(Py_ssize_t)
|
||||||
STRINGLIB(fastsearch_memchr_1char)(const STRINGLIB_CHAR* s, Py_ssize_t n,
|
STRINGLIB(rfind_char)(const STRINGLIB_CHAR* s, Py_ssize_t n, STRINGLIB_CHAR ch)
|
||||||
STRINGLIB_CHAR ch, unsigned char needle,
|
|
||||||
int mode)
|
|
||||||
{
|
{
|
||||||
if (mode == FAST_SEARCH) {
|
const STRINGLIB_CHAR *p;
|
||||||
const STRINGLIB_CHAR *ptr = s;
|
|
||||||
const STRINGLIB_CHAR *e = s + n;
|
|
||||||
while (ptr < e) {
|
|
||||||
void *candidate = memchr((const void *) ptr, needle, (e - ptr) * sizeof(STRINGLIB_CHAR));
|
|
||||||
if (candidate == NULL)
|
|
||||||
return -1;
|
|
||||||
ptr = (const STRINGLIB_CHAR *) _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
|
|
||||||
if (sizeof(STRINGLIB_CHAR) == 1 || *ptr == ch)
|
|
||||||
return (ptr - s);
|
|
||||||
/* False positive */
|
|
||||||
ptr++;
|
|
||||||
}
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
#ifdef HAVE_MEMRCHR
|
#ifdef HAVE_MEMRCHR
|
||||||
/* memrchr() is a GNU extension, available since glibc 2.1.91.
|
/* memrchr() is a GNU extension, available since glibc 2.1.91.
|
||||||
it doesn't seem as optimized as memchr(), but is still quite
|
it doesn't seem as optimized as memchr(), but is still quite
|
||||||
faster than our hand-written loop in FASTSEARCH below */
|
faster than our hand-written loop below */
|
||||||
else if (mode == FAST_RSEARCH) {
|
|
||||||
while (n > 0) {
|
|
||||||
const STRINGLIB_CHAR *found;
|
|
||||||
void *candidate = memrchr((const void *) s, needle, n * sizeof(STRINGLIB_CHAR));
|
|
||||||
if (candidate == NULL)
|
|
||||||
return -1;
|
|
||||||
found = (const STRINGLIB_CHAR *) _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
|
|
||||||
n = found - s;
|
|
||||||
if (sizeof(STRINGLIB_CHAR) == 1 || *found == ch)
|
|
||||||
return n;
|
|
||||||
/* False positive */
|
|
||||||
}
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
else {
|
|
||||||
assert(0); /* Should never get here */
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
#undef DO_MEMCHR
|
if (n > 10) {
|
||||||
|
#if STRINGLIB_SIZEOF_CHAR == 1
|
||||||
|
p = memrchr(s, ch, n);
|
||||||
|
if (p != NULL)
|
||||||
|
return (p - s);
|
||||||
|
return -1;
|
||||||
|
#else
|
||||||
|
/* use memrchr if we can choose a needle without two many likely
|
||||||
|
false positives */
|
||||||
|
unsigned char needle = ch & 0xff;
|
||||||
|
/* If looking for a multiple of 256, we'd have too
|
||||||
|
many false positives looking for the '\0' byte in UCS2
|
||||||
|
and UCS4 representations. */
|
||||||
|
if (needle != 0) {
|
||||||
|
while (n > 0) {
|
||||||
|
void *candidate = memrchr(s, needle,
|
||||||
|
n * sizeof(STRINGLIB_CHAR));
|
||||||
|
if (candidate == NULL)
|
||||||
|
return -1;
|
||||||
|
p = (const STRINGLIB_CHAR *)
|
||||||
|
_Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
|
||||||
|
n = p - s;
|
||||||
|
if (*p == ch)
|
||||||
|
return n;
|
||||||
|
/* False positive */
|
||||||
|
}
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
#endif /* HAVE_MEMRCHR */
|
||||||
|
p = s + n;
|
||||||
|
while (p > s) {
|
||||||
|
p--;
|
||||||
|
if (*p == ch)
|
||||||
|
return (p - s);
|
||||||
|
}
|
||||||
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
Py_LOCAL_INLINE(Py_ssize_t)
|
Py_LOCAL_INLINE(Py_ssize_t)
|
||||||
|
@ -99,25 +145,11 @@ FASTSEARCH(const STRINGLIB_CHAR* s, Py_ssize_t n,
|
||||||
if (m <= 0)
|
if (m <= 0)
|
||||||
return -1;
|
return -1;
|
||||||
/* use special case for 1-character strings */
|
/* use special case for 1-character strings */
|
||||||
if (n > 10 && (mode == FAST_SEARCH
|
if (mode == FAST_SEARCH)
|
||||||
#ifdef HAVE_MEMRCHR
|
return STRINGLIB(find_char)(s, n, p[0]);
|
||||||
|| mode == FAST_RSEARCH
|
else if (mode == FAST_RSEARCH)
|
||||||
#endif
|
return STRINGLIB(rfind_char)(s, n, p[0]);
|
||||||
)) {
|
else { /* FAST_COUNT */
|
||||||
/* use memchr if we can choose a needle without two many likely
|
|
||||||
false positives */
|
|
||||||
unsigned char needle;
|
|
||||||
needle = p[0] & 0xff;
|
|
||||||
#if STRINGLIB_SIZEOF_CHAR > 1
|
|
||||||
/* If looking for a multiple of 256, we'd have too
|
|
||||||
many false positives looking for the '\0' byte in UCS2
|
|
||||||
and UCS4 representations. */
|
|
||||||
if (needle != 0)
|
|
||||||
#endif
|
|
||||||
return STRINGLIB(fastsearch_memchr_1char)
|
|
||||||
(s, n, p[0], needle, mode);
|
|
||||||
}
|
|
||||||
if (mode == FAST_COUNT) {
|
|
||||||
for (i = 0; i < n; i++)
|
for (i = 0; i < n; i++)
|
||||||
if (s[i] == p[0]) {
|
if (s[i] == p[0]) {
|
||||||
count++;
|
count++;
|
||||||
|
@ -125,14 +157,6 @@ FASTSEARCH(const STRINGLIB_CHAR* s, Py_ssize_t n,
|
||||||
return maxcount;
|
return maxcount;
|
||||||
}
|
}
|
||||||
return count;
|
return count;
|
||||||
} else if (mode == FAST_SEARCH) {
|
|
||||||
for (i = 0; i < n; i++)
|
|
||||||
if (s[i] == p[0])
|
|
||||||
return i;
|
|
||||||
} else { /* FAST_RSEARCH */
|
|
||||||
for (i = n - 1; i > -1; i--)
|
|
||||||
if (s[i] == p[0])
|
|
||||||
return i;
|
|
||||||
}
|
}
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
|
@ -811,27 +811,26 @@ Py_LOCAL_INLINE(Py_ssize_t) findchar(const void *s, int kind,
|
||||||
Py_ssize_t size, Py_UCS4 ch,
|
Py_ssize_t size, Py_UCS4 ch,
|
||||||
int direction)
|
int direction)
|
||||||
{
|
{
|
||||||
int mode = (direction == 1) ? FAST_SEARCH : FAST_RSEARCH;
|
|
||||||
|
|
||||||
switch (kind) {
|
switch (kind) {
|
||||||
case PyUnicode_1BYTE_KIND:
|
case PyUnicode_1BYTE_KIND:
|
||||||
{
|
if ((Py_UCS1) ch != ch)
|
||||||
Py_UCS1 ch1 = (Py_UCS1) ch;
|
return -1;
|
||||||
if (ch1 == ch)
|
if (direction > 0)
|
||||||
return ucs1lib_fastsearch((Py_UCS1 *) s, size, &ch1, 1, 0, mode);
|
return ucs1lib_find_char((Py_UCS1 *) s, size, (Py_UCS1) ch);
|
||||||
else
|
else
|
||||||
return -1;
|
return ucs1lib_rfind_char((Py_UCS1 *) s, size, (Py_UCS1) ch);
|
||||||
}
|
|
||||||
case PyUnicode_2BYTE_KIND:
|
case PyUnicode_2BYTE_KIND:
|
||||||
{
|
if ((Py_UCS2) ch != ch)
|
||||||
Py_UCS2 ch2 = (Py_UCS2) ch;
|
return -1;
|
||||||
if (ch2 == ch)
|
if (direction > 0)
|
||||||
return ucs2lib_fastsearch((Py_UCS2 *) s, size, &ch2, 1, 0, mode);
|
return ucs2lib_find_char((Py_UCS2 *) s, size, (Py_UCS2) ch);
|
||||||
else
|
else
|
||||||
return -1;
|
return ucs2lib_rfind_char((Py_UCS2 *) s, size, (Py_UCS2) ch);
|
||||||
}
|
|
||||||
case PyUnicode_4BYTE_KIND:
|
case PyUnicode_4BYTE_KIND:
|
||||||
return ucs4lib_fastsearch((Py_UCS4 *) s, size, &ch, 1, 0, mode);
|
if (direction > 0)
|
||||||
|
return ucs4lib_find_char((Py_UCS4 *) s, size, ch);
|
||||||
|
else
|
||||||
|
return ucs4lib_rfind_char((Py_UCS4 *) s, size, ch);
|
||||||
default:
|
default:
|
||||||
assert(0);
|
assert(0);
|
||||||
return -1;
|
return -1;
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue