mirror of
https://github.com/python/cpython.git
synced 2025-08-31 14:07:50 +00:00
Applied patch #725106, by Greg Chapman, fixing capturing groups
within repeats of alternatives. The only change to the original patch was to convert the tests to the new test_re.py file. This patch fixes cases like: >>> re.match('((a)|b)*', 'abc').groups() ('b', '') Which is wrong (it's impossible to match the empty string), and incompatible with other regex systems, like the following examples show: % perl -e '"abc" =~ /^((a)|b)*/; print "$1 $2\n";' b a % echo "abc" | sed -r -e "s/^((a)|b)*/\1 \2|/" b a|c
This commit is contained in:
parent
9dcbbea878
commit
c34f2555bd
2 changed files with 29 additions and 0 deletions
|
@ -276,6 +276,25 @@ class ReTests(unittest.TestCase):
|
||||||
self.assertEqual(re.match(r'((.%s):)?z'%op, 'a:z').groups(),
|
self.assertEqual(re.match(r'((.%s):)?z'%op, 'a:z').groups(),
|
||||||
('a:', 'a'))
|
('a:', 'a'))
|
||||||
|
|
||||||
|
def test_bug_725106(self):
|
||||||
|
# capturing groups in alternatives in repeats
|
||||||
|
self.assertEqual(re.match('^((a)|b)*', 'abc').groups(),
|
||||||
|
('b', 'a'))
|
||||||
|
self.assertEqual(re.match('^(([ab])|c)*', 'abc').groups(),
|
||||||
|
('c', 'b'))
|
||||||
|
self.assertEqual(re.match('^((d)|[ab])*', 'abc').groups(),
|
||||||
|
('b', None))
|
||||||
|
self.assertEqual(re.match('^((a)c|[ab])*', 'abc').groups(),
|
||||||
|
('b', None))
|
||||||
|
self.assertEqual(re.match('^((a)|b)*?c', 'abc').groups(),
|
||||||
|
('b', 'a'))
|
||||||
|
self.assertEqual(re.match('^(([ab])|c)*?d', 'abcd').groups(),
|
||||||
|
('c', 'b'))
|
||||||
|
self.assertEqual(re.match('^((d)|[ab])*?c', 'abc').groups(),
|
||||||
|
('b', None))
|
||||||
|
self.assertEqual(re.match('^((a)c|[ab])*?c', 'abc').groups(),
|
||||||
|
('b', None))
|
||||||
|
|
||||||
def test_finditer(self):
|
def test_finditer(self):
|
||||||
iter = re.finditer(r":+", "a:b::c:::d")
|
iter = re.finditer(r":+", "a:b::c:::d")
|
||||||
self.assertEqual([item.group(0) for item in iter],
|
self.assertEqual([item.group(0) for item in iter],
|
||||||
|
|
|
@ -947,10 +947,20 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern, int level)
|
||||||
if (pattern[1] == SRE_OP_IN &&
|
if (pattern[1] == SRE_OP_IN &&
|
||||||
(ptr >= end || !SRE_CHARSET(pattern + 3, (SRE_CODE) *ptr)))
|
(ptr >= end || !SRE_CHARSET(pattern + 3, (SRE_CODE) *ptr)))
|
||||||
continue;
|
continue;
|
||||||
|
if (state->repeat) {
|
||||||
|
i = mark_save(state, 0, lastmark);
|
||||||
|
if (i < 0)
|
||||||
|
return i;
|
||||||
|
}
|
||||||
state->ptr = ptr;
|
state->ptr = ptr;
|
||||||
i = SRE_MATCH(state, pattern + 1, level + 1);
|
i = SRE_MATCH(state, pattern + 1, level + 1);
|
||||||
if (i)
|
if (i)
|
||||||
return i;
|
return i;
|
||||||
|
if (state->repeat) {
|
||||||
|
i = mark_restore(state, 0, lastmark);
|
||||||
|
if (i < 0)
|
||||||
|
return i;
|
||||||
|
}
|
||||||
LASTMARK_RESTORE();
|
LASTMARK_RESTORE();
|
||||||
}
|
}
|
||||||
return 0;
|
return 0;
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue