needforspeed: stringlib refactoring (in progress)

2025-09-11 11:17:16 +00:00 · 2006-05-26 17:04:58 +00:00 · 2006-05-26 17:04:58 +00:00 · a50d201bd9
commit a50d201bd9
parent 877ab9bc23
4 changed files with 111 additions and 179 deletions
--- a/Objects/stringobject.c
+++ b/Objects/stringobject.c
@ -765,102 +765,17 @@ PyString_AsStringAndSize(register PyObject *obj,
 }

 /* -------------------------------------------------------------------- */
-/* Helpers */
+/* stringlib components */

-#define USE_FAST /* experimental fast search implementation */
+#define USE_FAST

-/* XXX - this code is copied from unicodeobject.c.  we really should
-   refactor the core implementations (see _sre.c for how this can be
-   done), but that'll have to wait -- fredrik */
+#ifdef USE_FAST

-/* fast search/count implementation, based on a mix between boyer-
-   moore and horspool, with a few more bells and whistles on the top.
-   for some more background, see: http://effbot.org/stringlib */
+#define STRINGLIB_CHAR char

-/* note: fastsearch may access s[n], which isn't a problem when using
-   Python's ordinary string types, but may cause problems if you're
-   using this code in other contexts.  also, the count mode returns -1
-   if there cannot possibly be a match in the target string, and 0 if
-   it has actually checked for matches, but didn't find any.  callers
-   beware! */
+#include "stringlib/fastsearch.h"

-#define FAST_COUNT 0
-#define FAST_SEARCH 1
-
-Py_LOCAL(Py_ssize_t)
-fastsearch(const char* s, Py_ssize_t n, const char* p, Py_ssize_t m, int mode)
-{
-	long mask;
-	Py_ssize_t skip, count = 0;
-	Py_ssize_t i, j, mlast, w;
-
-	w = n - m;
-
-	if (w < 0)
-		return -1;
-
-	/* look for special cases */
-	if (m <= 1) {
-		if (m <= 0)
-			return -1;
-		/* use special case for 1-character strings */
-		if (mode == FAST_COUNT) {
-			for (i = 0; i < n; i++)
-				if (s[i] == p[0])
-					count++;
-			return count;
-		} else {
-			for (i = 0; i < n; i++)
-				if (s[i] == p[0])
-					return i;
-		}
-		return -1;
-	}
-
-	mlast = m - 1;
-
-	/* create compressed boyer-moore delta 1 table */
-	skip = mlast - 1;
-	/* process pattern[:-1] */
-	for (mask = i = 0; i < mlast; i++) {
-		mask |= (1 << (p[i] & 0x1F));
-		if (p[i] == p[mlast])
-			skip = mlast - i - 1;
-	}
-	/* process pattern[-1] outside the loop */
-	mask |= (1 << (p[mlast] & 0x1F));
-
-	for (i = 0; i <= w; i++) {
-		/* note: using mlast in the skip path slows things down on x86 */
-		if (s[i+m-1] == p[m-1]) {
-			/* candidate match */
-			for (j = 0; j < mlast; j++)
-				if (s[i+j] != p[j])
-					break;
-			if (j == mlast) {
-				/* got a match! */
-				if (mode != FAST_COUNT)
-					return i;
-				count++;
-				i = i + mlast;
-				continue;
-			}
-			/* miss: check if next character is part of pattern */
-			if (!(mask & (1 << (s[i+m] & 0x1F))))
-				i = i + m;
-			else
-				i = i + skip;
-		} else {
-			/* skip: check if next character is part of pattern */
-			if (!(mask & (1 << (s[i+m] & 0x1F))))
-				i = i + m;
-		}
-	}
-
-	if (mode != FAST_COUNT)
-		return -1;
-	return count;
-}
+#endif

 /* -------------------------------------------------------------------- */
 /* Methods */
@ -2416,7 +2331,7 @@ string_count(PyStringObject *self, PyObject *args)
 #else
 	r = 0;
 	while (i < m) {
-		const char *t
+		const char *t;
 		if (!memcmp(s+i, sub, n)) {
 			r++;
 			i += n;