Issue #24821: Refactor STRINGLIB(fastsearch_memchr_1char) and split it on

STRINGLIB(find_char) and STRINGLIB(rfind_char) that can be used independedly without special preconditions.
2025-11-03 03:22:27 +00:00 · 2015-11-14 15:42:17 +02:00 · 2015-11-14 15:42:17 +02:00 · 413fdcea21
commit 413fdcea21
parent 0304729ec4
4 changed files with 124 additions and 103 deletions
--- a/Objects/bytearrayobject.c
+++ b/Objects/bytearrayobject.c
@ -1159,16 +1159,15 @@ bytearray_find_internal(PyByteArrayObject *self, PyObject *args, int dir)
    ADJUST_INDICES(start, end, len);
    if (end - start < sub_len)
        res = -1;
-    else if (sub_len == 1
+    else if (sub_len == 1) {
-#ifndef HAVE_MEMRCHR
+        if (dir > 0)
-            && dir > 0
+            res = stringlib_find_char(
-#endif
+                PyByteArray_AS_STRING(self) + start, end - start,
-    ) {
+                *sub);
-        unsigned char needle = *sub;
+        else
-        int mode = (dir > 0) ? FAST_SEARCH : FAST_RSEARCH;
+            res = stringlib_rfind_char(
-        res = stringlib_fastsearch_memchr_1char(
+                PyByteArray_AS_STRING(self) + start, end - start,
-            PyByteArray_AS_STRING(self) + start, end - start,
+                *sub);
            needle, needle, mode);
        if (res >= 0)
            res += start;
    }
--- a/Objects/bytesobject.c
+++ b/Objects/bytesobject.c
@ -1937,16 +1937,15 @@ bytes_find_internal(PyBytesObject *self, PyObject *args, int dir)
    ADJUST_INDICES(start, end, len);
    if (end - start < sub_len)
        res = -1;
-    else if (sub_len == 1
+    else if (sub_len == 1) {
-#ifndef HAVE_MEMRCHR
+        if (dir > 0)
-            && dir > 0
+            res = stringlib_find_char(
-#endif
+                PyBytes_AS_STRING(self) + start, end - start,
-    ) {
+                *sub);
-        unsigned char needle = *sub;
+        else
-        int mode = (dir > 0) ? FAST_SEARCH : FAST_RSEARCH;
+            res = stringlib_rfind_char(
-        res = stringlib_fastsearch_memchr_1char(
+                PyBytes_AS_STRING(self) + start, end - start,
-            PyBytes_AS_STRING(self) + start, end - start,
+                *sub);
            needle, needle, mode);
        if (res >= 0)
            res += start;
    }
--- a/Objects/stringlib/fastsearch.h
+++ b/Objects/stringlib/fastsearch.h
@ -32,52 +32,98 @@
 #define STRINGLIB_BLOOM(mask, ch)     \
    ((mask &  (1UL << ((ch) & (STRINGLIB_BLOOM_WIDTH -1)))))
 Py_LOCAL_INLINE(Py_ssize_t)
 STRINGLIB(find_char)(const STRINGLIB_CHAR* s, Py_ssize_t n, STRINGLIB_CHAR ch)
 {
    const STRINGLIB_CHAR *p, *e;
    p = s;
    e = s + n;
    if (n > 10) {
 #if STRINGLIB_SIZEOF_CHAR == 1
        p = memchr(s, ch, n);
        if (p != NULL)
            return (p - s);
        return -1;
 #else
        /* use memchr if we can choose a needle without two many likely
           false positives */
        unsigned char needle = ch & 0xff;
        /* If looking for a multiple of 256, we'd have too
           many false positives looking for the '\0' byte in UCS2
           and UCS4 representations. */
        if (needle != 0) {
            while (p < e) {
                void *candidate = memchr(p, needle,
                                         (e - p) * sizeof(STRINGLIB_CHAR));
                if (candidate == NULL)
                    return -1;
                p = (const STRINGLIB_CHAR *)
                        _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
                if (*p == ch)
                    return (p - s);
                /* False positive */
                p++;
            }
            return -1;
        }
 #endif
    }
    while (p < e) {
        if (*p == ch)
            return (p - s);
        p++;
    }
    return -1;
 }
 Py_LOCAL_INLINE(Py_ssize_t)
-STRINGLIB(fastsearch_memchr_1char)(const STRINGLIB_CHAR* s, Py_ssize_t n,
+STRINGLIB(rfind_char)(const STRINGLIB_CHAR* s, Py_ssize_t n, STRINGLIB_CHAR ch)
                                   STRINGLIB_CHAR ch, unsigned char needle,
                                   int mode)
 {
-    if (mode == FAST_SEARCH) {
+    const STRINGLIB_CHAR *p;
        const STRINGLIB_CHAR *ptr = s;
        const STRINGLIB_CHAR *e = s + n;
        while (ptr < e) {
            void *candidate = memchr((const void *) ptr, needle, (e - ptr) * sizeof(STRINGLIB_CHAR));
            if (candidate == NULL)
                return -1;
            ptr = (const STRINGLIB_CHAR *) _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
            if (sizeof(STRINGLIB_CHAR) == 1 || *ptr == ch)
                return (ptr - s);
            /* False positive */
            ptr++;
        }
        return -1;
    }
 #ifdef HAVE_MEMRCHR
    /* memrchr() is a GNU extension, available since glibc 2.1.91.
       it doesn't seem as optimized as memchr(), but is still quite
-       faster than our hand-written loop in FASTSEARCH below */
+       faster than our hand-written loop below */
    else if (mode == FAST_RSEARCH) {
        while (n > 0) {
            const STRINGLIB_CHAR *found;
            void *candidate = memrchr((const void *) s, needle, n * sizeof(STRINGLIB_CHAR));
            if (candidate == NULL)
                return -1;
            found = (const STRINGLIB_CHAR *) _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
            n = found - s;
            if (sizeof(STRINGLIB_CHAR) == 1 || *found == ch)
                return n;
            /* False positive */
        }
        return -1;
    }
 #endif
    else {
        assert(0); /* Should never get here */
        return 0;
    }
-#undef DO_MEMCHR
+    if (n > 10) {
 #if STRINGLIB_SIZEOF_CHAR == 1
        p = memrchr(s, ch, n);
        if (p != NULL)
            return (p - s);
        return -1;
 #else
        /* use memrchr if we can choose a needle without two many likely
           false positives */
        unsigned char needle = ch & 0xff;
        /* If looking for a multiple of 256, we'd have too
           many false positives looking for the '\0' byte in UCS2
           and UCS4 representations. */
        if (needle != 0) {
            while (n > 0) {
                void *candidate = memrchr(s, needle,
                                          n * sizeof(STRINGLIB_CHAR));
                if (candidate == NULL)
                    return -1;
                p = (const STRINGLIB_CHAR *)
                        _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
                n = p - s;
                if (*p == ch)
                    return n;
                /* False positive */
            }
            return -1;
        }
 #endif
    }
 #endif  /* HAVE_MEMRCHR */
    p = s + n;
    while (p > s) {
        p--;
        if (*p == ch)
            return (p - s);
    }
    return -1;
 }
 Py_LOCAL_INLINE(Py_ssize_t)
@ -99,25 +145,11 @@ FASTSEARCH(const STRINGLIB_CHAR* s, Py_ssize_t n,
        if (m <= 0)
            return -1;
        /* use special case for 1-character strings */
-        if (n > 10 && (mode == FAST_SEARCH
+        if (mode == FAST_SEARCH)
-#ifdef HAVE_MEMRCHR
+            return STRINGLIB(find_char)(s, n, p[0]);
-                    || mode == FAST_RSEARCH
+        else if (mode == FAST_RSEARCH)
-#endif
+            return STRINGLIB(rfind_char)(s, n, p[0]);
-                    )) {
+        else {  /* FAST_COUNT */
            /* use memchr if we can choose a needle without two many likely
               false positives */
            unsigned char needle;
            needle = p[0] & 0xff;
 #if STRINGLIB_SIZEOF_CHAR > 1
            /* If looking for a multiple of 256, we'd have too
               many false positives looking for the '\0' byte in UCS2
               and UCS4 representations. */
            if (needle != 0)
 #endif
                return STRINGLIB(fastsearch_memchr_1char)
                       (s, n, p[0], needle, mode);
        }
        if (mode == FAST_COUNT) {
            for (i = 0; i < n; i++)
                if (s[i] == p[0]) {
                    count++;
@ -125,14 +157,6 @@ FASTSEARCH(const STRINGLIB_CHAR* s, Py_ssize_t n,
                        return maxcount;
                }
            return count;
        } else if (mode == FAST_SEARCH) {
            for (i = 0; i < n; i++)
                if (s[i] == p[0])
                    return i;
        } else {    /* FAST_RSEARCH */
            for (i = n - 1; i > -1; i--)
                if (s[i] == p[0])
                    return i;
        }
        return -1;
    }
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@ -811,27 +811,26 @@ Py_LOCAL_INLINE(Py_ssize_t) findchar(const void *s, int kind,
                                     Py_ssize_t size, Py_UCS4 ch,
                                     int direction)
 {
    int mode = (direction == 1) ? FAST_SEARCH : FAST_RSEARCH;
    switch (kind) {
    case PyUnicode_1BYTE_KIND:
-        {
+        if ((Py_UCS1) ch != ch)
-            Py_UCS1 ch1 = (Py_UCS1) ch;
+            return -1;
-            if (ch1 == ch)
+        if (direction > 0)
-                return ucs1lib_fastsearch((Py_UCS1 *) s, size, &ch1, 1, 0, mode);
+            return ucs1lib_find_char((Py_UCS1 *) s, size, (Py_UCS1) ch);
-            else
+        else
-                return -1;
+            return ucs1lib_rfind_char((Py_UCS1 *) s, size, (Py_UCS1) ch);
        }
    case PyUnicode_2BYTE_KIND:
-        {
+        if ((Py_UCS2) ch != ch)
-            Py_UCS2 ch2 = (Py_UCS2) ch;
+            return -1;
-            if (ch2 == ch)
+        if (direction > 0)
-                return ucs2lib_fastsearch((Py_UCS2 *) s, size, &ch2, 1, 0, mode);
+            return ucs2lib_find_char((Py_UCS2 *) s, size, (Py_UCS2) ch);
-            else
+        else
-                return -1;
+            return ucs2lib_rfind_char((Py_UCS2 *) s, size, (Py_UCS2) ch);
        }
    case PyUnicode_4BYTE_KIND:
-        return ucs4lib_fastsearch((Py_UCS4 *) s, size, &ch, 1, 0, mode);
+        if (direction > 0)
            return ucs4lib_find_char((Py_UCS4 *) s, size, ch);
        else
            return ucs4lib_rfind_char((Py_UCS4 *) s, size, ch);
    default:
        assert(0);
        return -1;