mirror of
https://github.com/python/cpython.git
synced 2025-10-09 16:34:44 +00:00
I've applied a modified version of Greg Chapman's patch. I've included the fixes without introducing the reorganization mentioned, for the sake of stability. Also, the second fix mentioned in the patch don't fix the mentioned problem anymore, because of the change introduced by patch #720991 (by Greg as well). The new fix wasn't complicated though, and is included as well. As a note. It seems that there are other places that require the "protection" of LASTMARK_SAVE()/LASTMARK_RESTORE(), and are just waiting for someone to find how to break them. Particularly, I belive that every recursion of SRE_MATCH() should be protected by these macros. I won't do that right now since I'm not completely sure about this, and we don't have much time for testing until the next release.
This commit is contained in:
parent
48f3dcc93e
commit
be733ee7fb
3 changed files with 45 additions and 24 deletions
|
@ -655,6 +655,10 @@ xyzabc
|
||||||
(r'^a*?$', 'foo', FAIL),
|
(r'^a*?$', 'foo', FAIL),
|
||||||
# bug 470582: nested groups problem
|
# bug 470582: nested groups problem
|
||||||
(r'^((a)c)?(ab)$', 'ab', SUCCEED, 'g1+"-"+g2+"-"+g3', 'None-None-ab'),
|
(r'^((a)c)?(ab)$', 'ab', SUCCEED, 'g1+"-"+g2+"-"+g3', 'None-None-ab'),
|
||||||
|
# another minimizing repeat problem (capturing groups in assertions)
|
||||||
|
('^([ab]*?)(?=(b)?)c', 'abc', SUCCEED, 'g1+"-"+g2', 'ab-None'),
|
||||||
|
('^([ab]*?)(?!(b))c', 'abc', SUCCEED, 'g1+"-"+g2', 'ab-None'),
|
||||||
|
('^([ab]*?)(?<!(a))c', 'abc', SUCCEED, 'g1+"-"+g2', 'ab-None'),
|
||||||
]
|
]
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
|
|
@ -78,10 +78,12 @@ test(r"""sre.match(r'(a)|(b)', 'b').start(1)""", -1)
|
||||||
test(r"""sre.match(r'(a)|(b)', 'b').end(1)""", -1)
|
test(r"""sre.match(r'(a)|(b)', 'b').end(1)""", -1)
|
||||||
test(r"""sre.match(r'(a)|(b)', 'b').span(1)""", (-1, -1))
|
test(r"""sre.match(r'(a)|(b)', 'b').span(1)""", (-1, -1))
|
||||||
|
|
||||||
# bug described in patch 527371
|
# bug described in patches 527371/672491
|
||||||
test(r"""sre.match(r'(a)?a','a').lastindex""", None)
|
test(r"""sre.match(r'(a)?a','a').lastindex""", None)
|
||||||
test(r"""sre.match(r'(a)(b)?b','ab').lastindex""", 1)
|
test(r"""sre.match(r'(a)(b)?b','ab').lastindex""", 1)
|
||||||
test(r"""sre.match(r'(?P<a>a)(?P<b>b)?b','ab').lastgroup""", 'a')
|
test(r"""sre.match(r'(?P<a>a)(?P<b>b)?b','ab').lastgroup""", 'a')
|
||||||
|
test(r"""sre.match("(?P<a>a(b))", "ab").lastgroup""", 'a')
|
||||||
|
test(r"""sre.match("((a))", "a").lastindex""", 1)
|
||||||
|
|
||||||
# bug 545855 -- This pattern failed to cause a compile error as it
|
# bug 545855 -- This pattern failed to cause a compile error as it
|
||||||
# should, instead provoking a TypeError.
|
# should, instead provoking a TypeError.
|
||||||
|
|
|
@ -337,19 +337,6 @@ mark_restore(SRE_STATE* state, int lo, int hi)
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
|
||||||
lastmark_restore(SRE_STATE *state, int lastmark)
|
|
||||||
{
|
|
||||||
if (state->lastmark > lastmark) {
|
|
||||||
memset(
|
|
||||||
state->mark + lastmark + 1, 0,
|
|
||||||
(state->lastmark - lastmark) * sizeof(void*)
|
|
||||||
);
|
|
||||||
state->lastmark = lastmark;
|
|
||||||
state->lastindex = (lastmark == 0) ? -1 : (lastmark-1)/2+1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* generate 8-bit version */
|
/* generate 8-bit version */
|
||||||
|
|
||||||
#define SRE_CHAR unsigned char
|
#define SRE_CHAR unsigned char
|
||||||
|
@ -690,6 +677,22 @@ SRE_INFO(SRE_STATE* state, SRE_CODE* pattern)
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
/* macros to preserve lastmark in case of backtracking */
|
||||||
|
#define LASTMARK_SAVE() \
|
||||||
|
do { \
|
||||||
|
lastmark = state->lastmark; \
|
||||||
|
lastindex = state->lastindex; \
|
||||||
|
} while (0)
|
||||||
|
#define LASTMARK_RESTORE() \
|
||||||
|
do { \
|
||||||
|
if (state->lastmark > lastmark) { \
|
||||||
|
memset(state->mark + lastmark + 1, 0, \
|
||||||
|
(state->lastmark - lastmark) * sizeof(void*)); \
|
||||||
|
state->lastmark = lastmark; \
|
||||||
|
state->lastindex = lastindex; \
|
||||||
|
} \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
LOCAL(int)
|
LOCAL(int)
|
||||||
SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern, int level)
|
SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern, int level)
|
||||||
{
|
{
|
||||||
|
@ -700,7 +703,7 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern, int level)
|
||||||
SRE_CHAR* ptr = state->ptr;
|
SRE_CHAR* ptr = state->ptr;
|
||||||
int i, count;
|
int i, count;
|
||||||
SRE_REPEAT* rp;
|
SRE_REPEAT* rp;
|
||||||
int lastmark;
|
int lastmark, lastindex;
|
||||||
SRE_CODE chr;
|
SRE_CODE chr;
|
||||||
|
|
||||||
SRE_REPEAT rep; /* FIXME: <fl> allocate in STATE instead */
|
SRE_REPEAT rep; /* FIXME: <fl> allocate in STATE instead */
|
||||||
|
@ -927,7 +930,7 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern, int level)
|
||||||
/* alternation */
|
/* alternation */
|
||||||
/* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
|
/* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
|
||||||
TRACE(("|%p|%p|BRANCH\n", pattern, ptr));
|
TRACE(("|%p|%p|BRANCH\n", pattern, ptr));
|
||||||
lastmark = state->lastmark;
|
LASTMARK_SAVE();
|
||||||
for (; pattern[0]; pattern += pattern[0]) {
|
for (; pattern[0]; pattern += pattern[0]) {
|
||||||
if (pattern[1] == SRE_OP_LITERAL &&
|
if (pattern[1] == SRE_OP_LITERAL &&
|
||||||
(ptr >= end || (SRE_CODE) *ptr != pattern[2]))
|
(ptr >= end || (SRE_CODE) *ptr != pattern[2]))
|
||||||
|
@ -939,7 +942,7 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern, int level)
|
||||||
i = SRE_MATCH(state, pattern + 1, level + 1);
|
i = SRE_MATCH(state, pattern + 1, level + 1);
|
||||||
if (i)
|
if (i)
|
||||||
return i;
|
return i;
|
||||||
lastmark_restore(state, lastmark);
|
LASTMARK_RESTORE();
|
||||||
}
|
}
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
|
@ -979,8 +982,11 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern, int level)
|
||||||
/* tail is empty. we're finished */
|
/* tail is empty. we're finished */
|
||||||
state->ptr = ptr;
|
state->ptr = ptr;
|
||||||
return 1;
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
} else if (pattern[pattern[0]] == SRE_OP_LITERAL) {
|
LASTMARK_SAVE();
|
||||||
|
|
||||||
|
if (pattern[pattern[0]] == SRE_OP_LITERAL) {
|
||||||
/* tail starts with a literal. skip positions where
|
/* tail starts with a literal. skip positions where
|
||||||
the rest of the pattern cannot possibly match */
|
the rest of the pattern cannot possibly match */
|
||||||
chr = pattern[pattern[0]+1];
|
chr = pattern[pattern[0]+1];
|
||||||
|
@ -998,11 +1004,11 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern, int level)
|
||||||
return i;
|
return i;
|
||||||
ptr--;
|
ptr--;
|
||||||
count--;
|
count--;
|
||||||
|
LASTMARK_RESTORE();
|
||||||
}
|
}
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
/* general case */
|
/* general case */
|
||||||
lastmark = state->lastmark;
|
|
||||||
while (count >= (int) pattern[1]) {
|
while (count >= (int) pattern[1]) {
|
||||||
state->ptr = ptr;
|
state->ptr = ptr;
|
||||||
i = SRE_MATCH(state, pattern + pattern[0], level + 1);
|
i = SRE_MATCH(state, pattern + pattern[0], level + 1);
|
||||||
|
@ -1010,7 +1016,7 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern, int level)
|
||||||
return i;
|
return i;
|
||||||
ptr--;
|
ptr--;
|
||||||
count--;
|
count--;
|
||||||
lastmark_restore(state, lastmark);
|
LASTMARK_RESTORE();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -1055,7 +1061,7 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern, int level)
|
||||||
/* general case */
|
/* general case */
|
||||||
int matchmax = ((int)pattern[2] == 65535);
|
int matchmax = ((int)pattern[2] == 65535);
|
||||||
int c;
|
int c;
|
||||||
lastmark = state->lastmark;
|
LASTMARK_SAVE();
|
||||||
while (matchmax || count <= (int) pattern[2]) {
|
while (matchmax || count <= (int) pattern[2]) {
|
||||||
state->ptr = ptr;
|
state->ptr = ptr;
|
||||||
i = SRE_MATCH(state, pattern + pattern[0], level + 1);
|
i = SRE_MATCH(state, pattern + pattern[0], level + 1);
|
||||||
|
@ -1065,13 +1071,13 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern, int level)
|
||||||
c = SRE_COUNT(state, pattern+3, 1, level+1);
|
c = SRE_COUNT(state, pattern+3, 1, level+1);
|
||||||
if (c < 0)
|
if (c < 0)
|
||||||
return c;
|
return c;
|
||||||
|
LASTMARK_RESTORE();
|
||||||
if (c == 0)
|
if (c == 0)
|
||||||
break;
|
break;
|
||||||
assert(c == 1);
|
assert(c == 1);
|
||||||
ptr++;
|
ptr++;
|
||||||
count++;
|
count++;
|
||||||
}
|
}
|
||||||
lastmark_restore(state, lastmark);
|
|
||||||
}
|
}
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
|
@ -1113,6 +1119,8 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern, int level)
|
||||||
|
|
||||||
TRACE(("|%p|%p|MAX_UNTIL %d\n", pattern, ptr, count));
|
TRACE(("|%p|%p|MAX_UNTIL %d\n", pattern, ptr, count));
|
||||||
|
|
||||||
|
LASTMARK_SAVE();
|
||||||
|
|
||||||
if (count < rp->pattern[1]) {
|
if (count < rp->pattern[1]) {
|
||||||
/* not enough matches */
|
/* not enough matches */
|
||||||
rp->count = count;
|
rp->count = count;
|
||||||
|
@ -1122,6 +1130,7 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern, int level)
|
||||||
return i;
|
return i;
|
||||||
rp->count = count - 1;
|
rp->count = count - 1;
|
||||||
state->ptr = ptr;
|
state->ptr = ptr;
|
||||||
|
LASTMARK_RESTORE();
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1129,7 +1138,6 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern, int level)
|
||||||
/* we may have enough matches, but if we can
|
/* we may have enough matches, but if we can
|
||||||
match another item, do so */
|
match another item, do so */
|
||||||
rp->count = count;
|
rp->count = count;
|
||||||
lastmark = state->lastmark;
|
|
||||||
i = mark_save(state, 0, lastmark);
|
i = mark_save(state, 0, lastmark);
|
||||||
if (i < 0)
|
if (i < 0)
|
||||||
return i;
|
return i;
|
||||||
|
@ -1138,7 +1146,7 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern, int level)
|
||||||
if (i)
|
if (i)
|
||||||
return i;
|
return i;
|
||||||
i = mark_restore(state, 0, lastmark);
|
i = mark_restore(state, 0, lastmark);
|
||||||
state->lastmark = lastmark;
|
LASTMARK_RESTORE();
|
||||||
if (i < 0)
|
if (i < 0)
|
||||||
return i;
|
return i;
|
||||||
rp->count = count - 1;
|
rp->count = count - 1;
|
||||||
|
@ -1182,6 +1190,8 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern, int level)
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
LASTMARK_SAVE();
|
||||||
|
|
||||||
/* see if the tail matches */
|
/* see if the tail matches */
|
||||||
state->repeat = rp->prev;
|
state->repeat = rp->prev;
|
||||||
i = SRE_MATCH(state, pattern, level + 1);
|
i = SRE_MATCH(state, pattern, level + 1);
|
||||||
|
@ -1191,6 +1201,8 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern, int level)
|
||||||
state->ptr = ptr;
|
state->ptr = ptr;
|
||||||
state->repeat = rp;
|
state->repeat = rp;
|
||||||
|
|
||||||
|
LASTMARK_RESTORE();
|
||||||
|
|
||||||
if (count >= rp->pattern[2] && rp->pattern[2] != 65535)
|
if (count >= rp->pattern[2] && rp->pattern[2] != 65535)
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
|
@ -3084,3 +3096,6 @@ PyMODINIT_FUNC init_sre(void)
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif /* !defined(SRE_RECURSIVE) */
|
#endif /* !defined(SRE_RECURSIVE) */
|
||||||
|
|
||||||
|
/* vim:ts=4:sw=4:et
|
||||||
|
*/
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue