mirror of
https://github.com/python/cpython.git
synced 2025-12-04 00:30:19 +00:00
bpo-30340: Enhanced regular expressions optimization. (#1542)
This increased the performance of matching some patterns up to 25 times.
This commit is contained in:
parent
cbddf58c79
commit
821a9d146b
4 changed files with 95 additions and 54 deletions
|
|
@ -20,6 +20,7 @@ _LITERAL_CODES = {LITERAL, NOT_LITERAL}
|
|||
_REPEATING_CODES = {REPEAT, MIN_REPEAT, MAX_REPEAT}
|
||||
_SUCCESS_CODES = {SUCCESS, FAILURE}
|
||||
_ASSERT_CODES = {ASSERT, ASSERT_NOT}
|
||||
_UNIT_CODES = _LITERAL_CODES | {ANY, IN}
|
||||
|
||||
# Sets of lowercase characters which have the same uppercase.
|
||||
_equivalences = (
|
||||
|
|
@ -125,7 +126,7 @@ def _compile(code, pattern, flags):
|
|||
elif op in REPEATING_CODES:
|
||||
if flags & SRE_FLAG_TEMPLATE:
|
||||
raise error("internal: unsupported template operator %r" % (op,))
|
||||
elif _simple(av) and op is not REPEAT:
|
||||
if _simple(av[2]):
|
||||
if op is MAX_REPEAT:
|
||||
emit(REPEAT_ONE)
|
||||
else:
|
||||
|
|
@ -404,10 +405,14 @@ def _bytes_to_codes(b):
|
|||
assert len(a) * a.itemsize == len(b)
|
||||
return a.tolist()
|
||||
|
||||
def _simple(av):
|
||||
# check if av is a "simple" operator
|
||||
lo, hi = av[2].getwidth()
|
||||
return lo == hi == 1 and av[2][0][0] != SUBPATTERN
|
||||
def _simple(p):
|
||||
# check if this subpattern is a "simple" operator
|
||||
if len(p) != 1:
|
||||
return False
|
||||
op, av = p[0]
|
||||
if op is SUBPATTERN:
|
||||
return av[0] is None and _simple(av[-1])
|
||||
return op in _UNIT_CODES
|
||||
|
||||
def _generate_overlap_table(prefix):
|
||||
"""
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue