[3.12] gh-100061: Proper fix of the bug in the matching of possessive quantifiers (GH-102612) (#108003)

Restore the global Input Stream pointer after trying to match a sub-pattern.

.
(cherry picked from commit abd9cc52d9)

Co-authored-by: SKO <41810398+uyw4687@users.noreply.github.com>
This commit is contained in:
Serhiy Storchaka 2023-08-16 13:00:55 +03:00 committed by GitHub
parent 00bfed7cba
commit bd2ef82a50
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 16 additions and 9 deletions

View file

@ -100,13 +100,6 @@ def _compile(code, pattern, flags):
emit(ANY_ALL)
else:
emit(ANY)
elif op is POSSESSIVE_REPEAT:
# gh-106052: Possessive quantifiers do not work when the
# subpattern contains backtracking, i.e. "(?:ab?c)*+".
# Implement it as equivalent greedy qualifier in atomic group.
p = [(MAX_REPEAT, av)]
p = [(ATOMIC_GROUP, p)]
_compile(code, p, flags)
elif op in REPEATING_CODES:
if flags & SRE_FLAG_TEMPLATE:
raise error("internal: unsupported template operator %r" % (op,))

View file

@ -2366,6 +2366,16 @@ class ReTests(unittest.TestCase):
self.assertFalse(template_re1.match('nope'))
def test_bug_gh106052(self):
# gh-100061
self.assertEqual(re.match('(?>(?:.(?!D))+)', 'ABCDE').span(), (0, 2))
self.assertEqual(re.match('(?:.(?!D))++', 'ABCDE').span(), (0, 2))
self.assertEqual(re.match('(?>(?:.(?!D))*)', 'ABCDE').span(), (0, 2))
self.assertEqual(re.match('(?:.(?!D))*+', 'ABCDE').span(), (0, 2))
self.assertEqual(re.match('(?>(?:.(?!D))?)', 'CDE').span(), (0, 0))
self.assertEqual(re.match('(?:.(?!D))?+', 'CDE').span(), (0, 0))
self.assertEqual(re.match('(?>(?:.(?!D)){1,3})', 'ABCDE').span(), (0, 2))
self.assertEqual(re.match('(?:.(?!D)){1,3}+', 'ABCDE').span(), (0, 2))
# gh-106052
self.assertEqual(re.match("(?>(?:ab?c)+)", "aca").span(), (0, 2))
self.assertEqual(re.match("(?:ab?c)++", "aca").span(), (0, 2))
self.assertEqual(re.match("(?>(?:ab?c)*)", "aca").span(), (0, 2))
@ -2471,7 +2481,6 @@ ATOMIC_GROUP
17: SUCCESS
''')
@unittest.expectedFailure # gh-106052
def test_possesive_repeat_one(self):
self.assertEqual(get_debug_out(r'a?+'), '''\
POSSESSIVE_REPEAT 0 1
@ -2484,7 +2493,6 @@ POSSESSIVE_REPEAT 0 1
12: SUCCESS
''')
@unittest.expectedFailure # gh-106052
def test_possesive_repeat(self):
self.assertEqual(get_debug_out(r'(?:ab)?+'), '''\
POSSESSIVE_REPEAT 0 1

View file

@ -0,0 +1,2 @@
Fix a bug that causes wrong matches for regular expressions with possessive
qualifier.

View file

@ -1334,6 +1334,10 @@ dispatch:
MARK_POP(ctx->lastmark);
LASTMARK_RESTORE();
/* Restore the global Input Stream pointer
since it can change after jumps. */
state->ptr = ptr;
/* We have sufficient matches, so exit loop. */
break;
}