Issue #24821: Refactor STRINGLIB(fastsearch_memchr_1char) and split it on

STRINGLIB(find_char) and STRINGLIB(rfind_char) that can be used independedly
without special preconditions.
This commit is contained in:
Serhiy Storchaka 2015-11-14 15:42:17 +02:00
parent 0304729ec4
commit 413fdcea21
4 changed files with 124 additions and 103 deletions

View file

@ -1159,16 +1159,15 @@ bytearray_find_internal(PyByteArrayObject *self, PyObject *args, int dir)
ADJUST_INDICES(start, end, len); ADJUST_INDICES(start, end, len);
if (end - start < sub_len) if (end - start < sub_len)
res = -1; res = -1;
else if (sub_len == 1 else if (sub_len == 1) {
#ifndef HAVE_MEMRCHR if (dir > 0)
&& dir > 0 res = stringlib_find_char(
#endif
) {
unsigned char needle = *sub;
int mode = (dir > 0) ? FAST_SEARCH : FAST_RSEARCH;
res = stringlib_fastsearch_memchr_1char(
PyByteArray_AS_STRING(self) + start, end - start, PyByteArray_AS_STRING(self) + start, end - start,
needle, needle, mode); *sub);
else
res = stringlib_rfind_char(
PyByteArray_AS_STRING(self) + start, end - start,
*sub);
if (res >= 0) if (res >= 0)
res += start; res += start;
} }

View file

@ -1937,16 +1937,15 @@ bytes_find_internal(PyBytesObject *self, PyObject *args, int dir)
ADJUST_INDICES(start, end, len); ADJUST_INDICES(start, end, len);
if (end - start < sub_len) if (end - start < sub_len)
res = -1; res = -1;
else if (sub_len == 1 else if (sub_len == 1) {
#ifndef HAVE_MEMRCHR if (dir > 0)
&& dir > 0 res = stringlib_find_char(
#endif
) {
unsigned char needle = *sub;
int mode = (dir > 0) ? FAST_SEARCH : FAST_RSEARCH;
res = stringlib_fastsearch_memchr_1char(
PyBytes_AS_STRING(self) + start, end - start, PyBytes_AS_STRING(self) + start, end - start,
needle, needle, mode); *sub);
else
res = stringlib_rfind_char(
PyBytes_AS_STRING(self) + start, end - start,
*sub);
if (res >= 0) if (res >= 0)
res += start; res += start;
} }

View file

@ -32,52 +32,98 @@
#define STRINGLIB_BLOOM(mask, ch) \ #define STRINGLIB_BLOOM(mask, ch) \
((mask & (1UL << ((ch) & (STRINGLIB_BLOOM_WIDTH -1))))) ((mask & (1UL << ((ch) & (STRINGLIB_BLOOM_WIDTH -1)))))
Py_LOCAL_INLINE(Py_ssize_t) Py_LOCAL_INLINE(Py_ssize_t)
STRINGLIB(fastsearch_memchr_1char)(const STRINGLIB_CHAR* s, Py_ssize_t n, STRINGLIB(find_char)(const STRINGLIB_CHAR* s, Py_ssize_t n, STRINGLIB_CHAR ch)
STRINGLIB_CHAR ch, unsigned char needle,
int mode)
{ {
if (mode == FAST_SEARCH) { const STRINGLIB_CHAR *p, *e;
const STRINGLIB_CHAR *ptr = s;
const STRINGLIB_CHAR *e = s + n; p = s;
while (ptr < e) { e = s + n;
void *candidate = memchr((const void *) ptr, needle, (e - ptr) * sizeof(STRINGLIB_CHAR)); if (n > 10) {
#if STRINGLIB_SIZEOF_CHAR == 1
p = memchr(s, ch, n);
if (p != NULL)
return (p - s);
return -1;
#else
/* use memchr if we can choose a needle without two many likely
false positives */
unsigned char needle = ch & 0xff;
/* If looking for a multiple of 256, we'd have too
many false positives looking for the '\0' byte in UCS2
and UCS4 representations. */
if (needle != 0) {
while (p < e) {
void *candidate = memchr(p, needle,
(e - p) * sizeof(STRINGLIB_CHAR));
if (candidate == NULL) if (candidate == NULL)
return -1; return -1;
ptr = (const STRINGLIB_CHAR *) _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR)); p = (const STRINGLIB_CHAR *)
if (sizeof(STRINGLIB_CHAR) == 1 || *ptr == ch) _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
return (ptr - s); if (*p == ch)
return (p - s);
/* False positive */ /* False positive */
ptr++; p++;
} }
return -1; return -1;
} }
#endif
}
while (p < e) {
if (*p == ch)
return (p - s);
p++;
}
return -1;
}
Py_LOCAL_INLINE(Py_ssize_t)
STRINGLIB(rfind_char)(const STRINGLIB_CHAR* s, Py_ssize_t n, STRINGLIB_CHAR ch)
{
const STRINGLIB_CHAR *p;
#ifdef HAVE_MEMRCHR #ifdef HAVE_MEMRCHR
/* memrchr() is a GNU extension, available since glibc 2.1.91. /* memrchr() is a GNU extension, available since glibc 2.1.91.
it doesn't seem as optimized as memchr(), but is still quite it doesn't seem as optimized as memchr(), but is still quite
faster than our hand-written loop in FASTSEARCH below */ faster than our hand-written loop below */
else if (mode == FAST_RSEARCH) {
if (n > 10) {
#if STRINGLIB_SIZEOF_CHAR == 1
p = memrchr(s, ch, n);
if (p != NULL)
return (p - s);
return -1;
#else
/* use memrchr if we can choose a needle without two many likely
false positives */
unsigned char needle = ch & 0xff;
/* If looking for a multiple of 256, we'd have too
many false positives looking for the '\0' byte in UCS2
and UCS4 representations. */
if (needle != 0) {
while (n > 0) { while (n > 0) {
const STRINGLIB_CHAR *found; void *candidate = memrchr(s, needle,
void *candidate = memrchr((const void *) s, needle, n * sizeof(STRINGLIB_CHAR)); n * sizeof(STRINGLIB_CHAR));
if (candidate == NULL) if (candidate == NULL)
return -1; return -1;
found = (const STRINGLIB_CHAR *) _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR)); p = (const STRINGLIB_CHAR *)
n = found - s; _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
if (sizeof(STRINGLIB_CHAR) == 1 || *found == ch) n = p - s;
if (*p == ch)
return n; return n;
/* False positive */ /* False positive */
} }
return -1; return -1;
} }
#endif #endif
else {
assert(0); /* Should never get here */
return 0;
} }
#endif /* HAVE_MEMRCHR */
#undef DO_MEMCHR p = s + n;
while (p > s) {
p--;
if (*p == ch)
return (p - s);
}
return -1;
} }
Py_LOCAL_INLINE(Py_ssize_t) Py_LOCAL_INLINE(Py_ssize_t)
@ -99,25 +145,11 @@ FASTSEARCH(const STRINGLIB_CHAR* s, Py_ssize_t n,
if (m <= 0) if (m <= 0)
return -1; return -1;
/* use special case for 1-character strings */ /* use special case for 1-character strings */
if (n > 10 && (mode == FAST_SEARCH if (mode == FAST_SEARCH)
#ifdef HAVE_MEMRCHR return STRINGLIB(find_char)(s, n, p[0]);
|| mode == FAST_RSEARCH else if (mode == FAST_RSEARCH)
#endif return STRINGLIB(rfind_char)(s, n, p[0]);
)) { else { /* FAST_COUNT */
/* use memchr if we can choose a needle without two many likely
false positives */
unsigned char needle;
needle = p[0] & 0xff;
#if STRINGLIB_SIZEOF_CHAR > 1
/* If looking for a multiple of 256, we'd have too
many false positives looking for the '\0' byte in UCS2
and UCS4 representations. */
if (needle != 0)
#endif
return STRINGLIB(fastsearch_memchr_1char)
(s, n, p[0], needle, mode);
}
if (mode == FAST_COUNT) {
for (i = 0; i < n; i++) for (i = 0; i < n; i++)
if (s[i] == p[0]) { if (s[i] == p[0]) {
count++; count++;
@ -125,14 +157,6 @@ FASTSEARCH(const STRINGLIB_CHAR* s, Py_ssize_t n,
return maxcount; return maxcount;
} }
return count; return count;
} else if (mode == FAST_SEARCH) {
for (i = 0; i < n; i++)
if (s[i] == p[0])
return i;
} else { /* FAST_RSEARCH */
for (i = n - 1; i > -1; i--)
if (s[i] == p[0])
return i;
} }
return -1; return -1;
} }

View file

@ -811,27 +811,26 @@ Py_LOCAL_INLINE(Py_ssize_t) findchar(const void *s, int kind,
Py_ssize_t size, Py_UCS4 ch, Py_ssize_t size, Py_UCS4 ch,
int direction) int direction)
{ {
int mode = (direction == 1) ? FAST_SEARCH : FAST_RSEARCH;
switch (kind) { switch (kind) {
case PyUnicode_1BYTE_KIND: case PyUnicode_1BYTE_KIND:
{ if ((Py_UCS1) ch != ch)
Py_UCS1 ch1 = (Py_UCS1) ch;
if (ch1 == ch)
return ucs1lib_fastsearch((Py_UCS1 *) s, size, &ch1, 1, 0, mode);
else
return -1; return -1;
} if (direction > 0)
return ucs1lib_find_char((Py_UCS1 *) s, size, (Py_UCS1) ch);
else
return ucs1lib_rfind_char((Py_UCS1 *) s, size, (Py_UCS1) ch);
case PyUnicode_2BYTE_KIND: case PyUnicode_2BYTE_KIND:
{ if ((Py_UCS2) ch != ch)
Py_UCS2 ch2 = (Py_UCS2) ch;
if (ch2 == ch)
return ucs2lib_fastsearch((Py_UCS2 *) s, size, &ch2, 1, 0, mode);
else
return -1; return -1;
} if (direction > 0)
return ucs2lib_find_char((Py_UCS2 *) s, size, (Py_UCS2) ch);
else
return ucs2lib_rfind_char((Py_UCS2 *) s, size, (Py_UCS2) ch);
case PyUnicode_4BYTE_KIND: case PyUnicode_4BYTE_KIND:
return ucs4lib_fastsearch((Py_UCS4 *) s, size, &ch, 1, 0, mode); if (direction > 0)
return ucs4lib_find_char((Py_UCS4 *) s, size, ch);
else
return ucs4lib_rfind_char((Py_UCS4 *) s, size, ch);
default: default:
assert(0); assert(0);
return -1; return -1;