mirror of
https://github.com/python/cpython.git
synced 2025-12-01 23:32:16 +00:00
bpo-34294: re module, fix wrong capturing groups in rare cases. (GH-11546)
Need to reset capturing groups between two SRE(match) callings in loops, this fixes wrong capturing groups in rare cases. Also add a missing index in re.rst.
This commit is contained in:
parent
02c04f26df
commit
4a7f44a2ed
5 changed files with 49 additions and 1 deletions
|
|
@ -371,6 +371,8 @@ The special characters are:
|
||||||
``(?#...)``
|
``(?#...)``
|
||||||
A comment; the contents of the parentheses are simply ignored.
|
A comment; the contents of the parentheses are simply ignored.
|
||||||
|
|
||||||
|
.. index:: single: (?=; in regular expressions
|
||||||
|
|
||||||
``(?=...)``
|
``(?=...)``
|
||||||
Matches if ``...`` matches next, but doesn't consume any of the string. This is
|
Matches if ``...`` matches next, but doesn't consume any of the string. This is
|
||||||
called a :dfn:`lookahead assertion`. For example, ``Isaac (?=Asimov)`` will match
|
called a :dfn:`lookahead assertion`. For example, ``Isaac (?=Asimov)`` will match
|
||||||
|
|
|
||||||
|
|
@ -2067,6 +2067,40 @@ ELSE
|
||||||
self.assertEqual(m.group(), b'xyz')
|
self.assertEqual(m.group(), b'xyz')
|
||||||
self.assertEqual(m2.group(), b'')
|
self.assertEqual(m2.group(), b'')
|
||||||
|
|
||||||
|
def test_bug_34294(self):
|
||||||
|
# Issue 34294: wrong capturing groups
|
||||||
|
|
||||||
|
# exists since Python 2
|
||||||
|
s = "a\tx"
|
||||||
|
p = r"\b(?=(\t)|(x))x"
|
||||||
|
self.assertEqual(re.search(p, s).groups(), (None, 'x'))
|
||||||
|
|
||||||
|
# introduced in Python 3.7.0
|
||||||
|
s = "ab"
|
||||||
|
p = r"(?=(.)(.)?)"
|
||||||
|
self.assertEqual(re.findall(p, s),
|
||||||
|
[('a', 'b'), ('b', '')])
|
||||||
|
self.assertEqual([m.groups() for m in re.finditer(p, s)],
|
||||||
|
[('a', 'b'), ('b', None)])
|
||||||
|
|
||||||
|
# test-cases provided by issue34294, introduced in Python 3.7.0
|
||||||
|
p = r"(?=<(?P<tag>\w+)/?>(?:(?P<text>.+?)</(?P=tag)>)?)"
|
||||||
|
s = "<test><foo2/></test>"
|
||||||
|
self.assertEqual(re.findall(p, s),
|
||||||
|
[('test', '<foo2/>'), ('foo2', '')])
|
||||||
|
self.assertEqual([m.groupdict() for m in re.finditer(p, s)],
|
||||||
|
[{'tag': 'test', 'text': '<foo2/>'},
|
||||||
|
{'tag': 'foo2', 'text': None}])
|
||||||
|
s = "<test>Hello</test><foo/>"
|
||||||
|
self.assertEqual([m.groupdict() for m in re.finditer(p, s)],
|
||||||
|
[{'tag': 'test', 'text': 'Hello'},
|
||||||
|
{'tag': 'foo', 'text': None}])
|
||||||
|
s = "<test>Hello</test><foo/><foo/>"
|
||||||
|
self.assertEqual([m.groupdict() for m in re.finditer(p, s)],
|
||||||
|
[{'tag': 'test', 'text': 'Hello'},
|
||||||
|
{'tag': 'foo', 'text': None},
|
||||||
|
{'tag': 'foo', 'text': None}])
|
||||||
|
|
||||||
|
|
||||||
class PatternReprTests(unittest.TestCase):
|
class PatternReprTests(unittest.TestCase):
|
||||||
def check(self, pattern, expected):
|
def check(self, pattern, expected):
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,4 @@
|
||||||
|
re module, fix wrong capturing groups in rare cases. :func:`re.search`,
|
||||||
|
:func:`re.findall`, :func:`re.sub` and other functions that scan through
|
||||||
|
string looking for a match, should reset capturing groups between two match
|
||||||
|
attempts. Patch by Ma Lin.
|
||||||
|
|
@ -340,7 +340,7 @@ _sre_unicode_tolower_impl(PyObject *module, int character)
|
||||||
LOCAL(void)
|
LOCAL(void)
|
||||||
state_reset(SRE_STATE* state)
|
state_reset(SRE_STATE* state)
|
||||||
{
|
{
|
||||||
/* FIXME: dynamic! */
|
/* state->mark will be set to 0 in SRE_OP_MARK dynamically. */
|
||||||
/*memset(state->mark, 0, sizeof(*state->mark) * SRE_MARK_SIZE);*/
|
/*memset(state->mark, 0, sizeof(*state->mark) * SRE_MARK_SIZE);*/
|
||||||
|
|
||||||
state->lastmark = -1;
|
state->lastmark = -1;
|
||||||
|
|
|
||||||
|
|
@ -1363,6 +1363,10 @@ exit:
|
||||||
return ret; /* should never get here */
|
return ret; /* should never get here */
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* need to reset capturing groups between two SRE(match) callings in loops */
|
||||||
|
#define RESET_CAPTURE_GROUP() \
|
||||||
|
do { state->lastmark = state->lastindex = -1; } while (0)
|
||||||
|
|
||||||
LOCAL(Py_ssize_t)
|
LOCAL(Py_ssize_t)
|
||||||
SRE(search)(SRE_STATE* state, SRE_CODE* pattern)
|
SRE(search)(SRE_STATE* state, SRE_CODE* pattern)
|
||||||
{
|
{
|
||||||
|
|
@ -1440,6 +1444,7 @@ SRE(search)(SRE_STATE* state, SRE_CODE* pattern)
|
||||||
if (status != 0)
|
if (status != 0)
|
||||||
return status;
|
return status;
|
||||||
++ptr;
|
++ptr;
|
||||||
|
RESET_CAPTURE_GROUP();
|
||||||
}
|
}
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
@ -1487,6 +1492,7 @@ SRE(search)(SRE_STATE* state, SRE_CODE* pattern)
|
||||||
/* close but no cigar -- try again */
|
/* close but no cigar -- try again */
|
||||||
if (++ptr >= end)
|
if (++ptr >= end)
|
||||||
return 0;
|
return 0;
|
||||||
|
RESET_CAPTURE_GROUP();
|
||||||
}
|
}
|
||||||
i = overlap[i];
|
i = overlap[i];
|
||||||
} while (i != 0);
|
} while (i != 0);
|
||||||
|
|
@ -1510,6 +1516,7 @@ SRE(search)(SRE_STATE* state, SRE_CODE* pattern)
|
||||||
if (status != 0)
|
if (status != 0)
|
||||||
break;
|
break;
|
||||||
ptr++;
|
ptr++;
|
||||||
|
RESET_CAPTURE_GROUP();
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
/* general case */
|
/* general case */
|
||||||
|
|
@ -1520,6 +1527,7 @@ SRE(search)(SRE_STATE* state, SRE_CODE* pattern)
|
||||||
state->must_advance = 0;
|
state->must_advance = 0;
|
||||||
while (status == 0 && ptr < end) {
|
while (status == 0 && ptr < end) {
|
||||||
ptr++;
|
ptr++;
|
||||||
|
RESET_CAPTURE_GROUP();
|
||||||
TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
|
TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
|
||||||
state->start = state->ptr = ptr;
|
state->start = state->ptr = ptr;
|
||||||
status = SRE(match)(state, pattern, 0);
|
status = SRE(match)(state, pattern, 0);
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue