gh-109747: Improve errors for unsupported look-behind patterns (GH-109859)

Now re.error is raised instead of OverflowError or RuntimeError for
too large width of look-behind pattern.

The limit is increased to 2**32-1 (was 2**31-1).
This commit is contained in:
Serhiy Storchaka 2023-10-14 09:13:02 +03:00 committed by GitHub
parent ca0f3d858d
commit e2b3d831fd
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 46 additions and 13 deletions

View file

@ -67,6 +67,10 @@ FLAGS = {
TYPE_FLAGS = SRE_FLAG_ASCII | SRE_FLAG_LOCALE | SRE_FLAG_UNICODE
GLOBAL_FLAGS = SRE_FLAG_DEBUG
# Maximal value returned by SubPattern.getwidth().
# Must be larger than MAXREPEAT, MAXCODE and sys.maxsize.
MAXWIDTH = 1 << 64
class State:
# keeps track of state for parsing
def __init__(self):
@ -177,7 +181,7 @@ class SubPattern:
lo = hi = 0
for op, av in self.data:
if op is BRANCH:
i = MAXREPEAT - 1
i = MAXWIDTH
j = 0
for av in av[1]:
l, h = av.getwidth()
@ -196,7 +200,10 @@ class SubPattern:
elif op in _REPEATCODES:
i, j = av[2].getwidth()
lo = lo + i * av[0]
hi = hi + j * av[1]
if av[1] == MAXREPEAT and j:
hi = MAXWIDTH
else:
hi = hi + j * av[1]
elif op in _UNITCODES:
lo = lo + 1
hi = hi + 1
@ -216,7 +223,7 @@ class SubPattern:
hi = hi + j
elif op is SUCCESS:
break
self.width = min(lo, MAXREPEAT - 1), min(hi, MAXREPEAT)
self.width = min(lo, MAXWIDTH), min(hi, MAXWIDTH)
return self.width
class Tokenizer: