gh-94808: improve comments and coverage of fastsearch.h (GH-96760)

This commit is contained in:
Dennis Sweeney 2022-09-13 14:25:10 -04:00 committed by GitHub
parent 4995f5f9a0
commit 69d9a08099
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 54 additions and 5 deletions

View file

@ -18,7 +18,8 @@
algorithm, which has worst-case O(n) runtime and best-case O(n/k).
Also compute a table of shifts to achieve O(n/k) in more cases,
and often (data dependent) deduce larger shifts than pure C&P can
deduce. */
deduce. See stringlib_find_two_way_notes.txt in this folder for a
detailed explanation. */
#define FAST_COUNT 0
#define FAST_SEARCH 1
@ -398,7 +399,7 @@ STRINGLIB(_two_way)(const STRINGLIB_CHAR *haystack, Py_ssize_t len_haystack,
if (window_last >= haystack_end) {
return -1;
}
LOG("Horspool skip");
LOG("Horspool skip\n");
}
no_shift:
window = window_last - len_needle + 1;
@ -457,7 +458,7 @@ STRINGLIB(_two_way)(const STRINGLIB_CHAR *haystack, Py_ssize_t len_haystack,
if (window_last >= haystack_end) {
return -1;
}
LOG("Horspool skip");
LOG("Horspool skip\n");
}
window = window_last - len_needle + 1;
assert((window[len_needle - 1] & TABLE_MASK) ==

View file

@ -239,7 +239,7 @@ We cut as AA + bAAbAAbA, and then the algorithm runs as follows:
~~ AA != bA at the cut
bbbAbbAAbAAbAAbbbAAbAAbAAbAA
AAbAAbAAbA
^^^^X 7-3=4 match, and the 5th misses.
^^^^X 7-3=4 match, and the 5th misses.
bbbAbbAAbAAbAAbbbAAbAAbAAbAA
AAbAAbAAbA
~ A != b at the cut
@ -395,7 +395,7 @@ of their proof goes something like this (this is far from complete):
needle == (a + w) + (w + b), meaning there's a bad equality
w == w, it's impossible for w + b to be bigger than both
b and w + w + b, so this can't happen. We thus have all of
the ineuqalities with no question marks.
the inequalities with no question marks.
* By maximality, the right part is not a substring of the left
part. Thus, we have all of the inequalities involving no
left-side question marks.