mirror of
https://github.com/python/cpython.git
synced 2025-08-31 05:58:33 +00:00
bpo-433030: Add support of atomic grouping in regular expressions (GH-31982)
* Atomic grouping: (?>...). * Possessive quantifiers: x++, x*+, x?+, x{m,n}+. Equivalent to (?>x+), (?>x*), (?>x?), (?>x{m,n}). Co-authored-by: Jeffrey C. Jacobs <timehorse@users.sourceforge.net>
This commit is contained in:
parent
2bde6827ea
commit
345b390ed6
11 changed files with 593 additions and 92 deletions
|
@ -17,11 +17,16 @@ from sre_constants import *
|
|||
assert _sre.MAGIC == MAGIC, "SRE module mismatch"
|
||||
|
||||
_LITERAL_CODES = {LITERAL, NOT_LITERAL}
|
||||
_REPEATING_CODES = {REPEAT, MIN_REPEAT, MAX_REPEAT}
|
||||
_SUCCESS_CODES = {SUCCESS, FAILURE}
|
||||
_ASSERT_CODES = {ASSERT, ASSERT_NOT}
|
||||
_UNIT_CODES = _LITERAL_CODES | {ANY, IN}
|
||||
|
||||
_REPEATING_CODES = {
|
||||
MIN_REPEAT: (REPEAT, MIN_UNTIL, MIN_REPEAT_ONE),
|
||||
MAX_REPEAT: (REPEAT, MAX_UNTIL, REPEAT_ONE),
|
||||
POSSESSIVE_REPEAT: (POSSESSIVE_REPEAT, SUCCESS, POSSESSIVE_REPEAT_ONE),
|
||||
}
|
||||
|
||||
# Sets of lowercase characters which have the same uppercase.
|
||||
_equivalences = (
|
||||
# LATIN SMALL LETTER I, LATIN SMALL LETTER DOTLESS I
|
||||
|
@ -138,10 +143,7 @@ def _compile(code, pattern, flags):
|
|||
if flags & SRE_FLAG_TEMPLATE:
|
||||
raise error("internal: unsupported template operator %r" % (op,))
|
||||
if _simple(av[2]):
|
||||
if op is MAX_REPEAT:
|
||||
emit(REPEAT_ONE)
|
||||
else:
|
||||
emit(MIN_REPEAT_ONE)
|
||||
emit(REPEATING_CODES[op][2])
|
||||
skip = _len(code); emit(0)
|
||||
emit(av[0])
|
||||
emit(av[1])
|
||||
|
@ -149,16 +151,13 @@ def _compile(code, pattern, flags):
|
|||
emit(SUCCESS)
|
||||
code[skip] = _len(code) - skip
|
||||
else:
|
||||
emit(REPEAT)
|
||||
emit(REPEATING_CODES[op][0])
|
||||
skip = _len(code); emit(0)
|
||||
emit(av[0])
|
||||
emit(av[1])
|
||||
_compile(code, av[2], flags)
|
||||
code[skip] = _len(code) - skip
|
||||
if op is MAX_REPEAT:
|
||||
emit(MAX_UNTIL)
|
||||
else:
|
||||
emit(MIN_UNTIL)
|
||||
emit(REPEATING_CODES[op][1])
|
||||
elif op is SUBPATTERN:
|
||||
group, add_flags, del_flags, p = av
|
||||
if group:
|
||||
|
@ -169,6 +168,17 @@ def _compile(code, pattern, flags):
|
|||
if group:
|
||||
emit(MARK)
|
||||
emit((group-1)*2+1)
|
||||
elif op is ATOMIC_GROUP:
|
||||
# Atomic Groups are handled by starting with an Atomic
|
||||
# Group op code, then putting in the atomic group pattern
|
||||
# and finally a success op code to tell any repeat
|
||||
# operations within the Atomic Group to stop eating and
|
||||
# pop their stack if they reach it
|
||||
emit(ATOMIC_GROUP)
|
||||
skip = _len(code); emit(0)
|
||||
_compile(code, av, flags)
|
||||
emit(SUCCESS)
|
||||
code[skip] = _len(code) - skip
|
||||
elif op in SUCCESS_CODES:
|
||||
emit(op)
|
||||
elif op in ASSERT_CODES:
|
||||
|
@ -709,7 +719,8 @@ def dis(code):
|
|||
else:
|
||||
print_(FAILURE)
|
||||
i += 1
|
||||
elif op in (REPEAT, REPEAT_ONE, MIN_REPEAT_ONE):
|
||||
elif op in (REPEAT, REPEAT_ONE, MIN_REPEAT_ONE,
|
||||
POSSESSIVE_REPEAT, POSSESSIVE_REPEAT_ONE):
|
||||
skip, min, max = code[i: i+3]
|
||||
if max == MAXREPEAT:
|
||||
max = 'MAXREPEAT'
|
||||
|
@ -725,6 +736,11 @@ def dis(code):
|
|||
print_(op, skip, arg, to=i+skip)
|
||||
dis_(i+2, i+skip)
|
||||
i += skip
|
||||
elif op is ATOMIC_GROUP:
|
||||
skip = code[i]
|
||||
print_(op, skip, to=i+skip)
|
||||
dis_(i+1, i+skip)
|
||||
i += skip
|
||||
elif op is INFO:
|
||||
skip, flags, min, max = code[i: i+4]
|
||||
if max == MAXREPEAT:
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue