* Beef-up testing of str.__contains__() and str.find().

* Speed-up "x in y" where x has more than one character. The existing code made excessive calls to the expensive memcmp() function. The new code uses memchr() to rapidly find a start point for memcmp(). In addition to knowing that the first character is a match, the new code also checks that the last character is a match. This significantly reduces the incidence of false starts (saving memcmp() calls and making quadratic behavior less likely). Improves the timings on: python -m timeit -r7 -s"x='a'*1000" "'ab' in x" python -m timeit -r7 -s"x='a'*1000" "'bc' in x" Once this code has proven itself, then string_find_internal() should refer to it rather than running its own version. Also, something similar may apply to unicode objects.
2025-11-20 02:50:14 +00:00 · 2005-02-20 04:07:08 +00:00 · 2005-02-20 04:07:08 +00:00 · 7cbf1bcb3e
commit 7cbf1bcb3e
parent 54c273c703
2 changed files with 50 additions and 13 deletions
--- a/Lib/test/string_tests.py
+++ b/Lib/test/string_tests.py
@ -122,6 +122,30 @@ class CommonTest(unittest.TestCase):
        self.checkraises(TypeError, 'hello', 'find')
        self.checkraises(TypeError, 'hello', 'find', 42)

+        # For a variety of combinations,
+        #    verify that str.find() matches __contains__
+        #    and that the found substring is really at that location
+        charset = ['', 'a', 'b', 'c']
+        digits = 5
+        base = len(charset)
+        teststrings = set()
+        for i in xrange(base ** digits):
+            entry = []
+            for j in xrange(digits):
+                i, m = divmod(i, base)
+                entry.append(charset[m])
+            teststrings.add(''.join(entry))
+        for i in teststrings:
+            i = self.fixtype(i)
+            for j in teststrings:
+                loc = i.find(j)
+                r1 = (loc != -1)
+                r2 = j in i
+                if r1 != r2:
+                    self.assertEqual(r1, r2)
+                if loc != -1:
+                    self.assertEqual(i[loc:loc+len(j)], j)
+
    def test_rfind(self):
        self.checkequal(9,  'abcdefghiabc', 'rfind', 'abc')
        self.checkequal(12, 'abcdefghiabc', 'rfind', '')