gh-109747: Improve errors for unsupported look-behind patterns (GH-109859)

Now re.error is raised instead of OverflowError or RuntimeError for
too large width of look-behind pattern.

The limit is increased to 2**32-1 (was 2**31-1).
This commit is contained in:
Serhiy Storchaka 2023-10-14 09:13:02 +03:00 committed by GitHub
parent ca0f3d858d
commit e2b3d831fd
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 46 additions and 13 deletions

View file

@ -1861,6 +1861,29 @@ class ReTests(unittest.TestCase):
self.assertRaises(OverflowError, re.compile, r".{%d,}?" % 2**128)
self.assertRaises(OverflowError, re.compile, r".{%d,%d}" % (2**129, 2**128))
def test_look_behind_overflow(self):
string = "x" * 2_500_000
p1 = r"(?<=((.{%d}){%d}){%d})"
p2 = r"(?<!((.{%d}){%d}){%d})"
# Test that the templates are valid and look-behind with width 2**21
# (larger than sys.maxunicode) are supported.
self.assertEqual(re.search(p1 % (2**7, 2**7, 2**7), string).span(),
(2**21, 2**21))
self.assertEqual(re.search(p2 % (2**7, 2**7, 2**7), string).span(),
(0, 0))
# Test that 2**22 is accepted as a repetition number and look-behind
# width.
re.compile(p1 % (2**22, 1, 1))
re.compile(p1 % (1, 2**22, 1))
re.compile(p1 % (1, 1, 2**22))
re.compile(p2 % (2**22, 1, 1))
re.compile(p2 % (1, 2**22, 1))
re.compile(p2 % (1, 1, 2**22))
# But 2**66 is too large for look-behind width.
errmsg = "looks too much behind"
self.assertRaisesRegex(re.error, errmsg, re.compile, p1 % (2**22, 2**22, 2**22))
self.assertRaisesRegex(re.error, errmsg, re.compile, p2 % (2**22, 2**22, 2**22))
def test_backref_group_name_in_exception(self):
# Issue 17341: Poor error message when compiling invalid regex
self.checkPatternError('(?P=<foo>)',