Issue #16688: Fix backreferences did make case-insensitive regex fail on non-ASCII strings.

Patch by Matthew Barnett.
This commit is contained in:
Serhiy Storchaka 2012-12-29 23:41:08 +02:00
commit ac1069387e
4 changed files with 15 additions and 5 deletions

View file

@ -968,6 +968,11 @@ class ReTests(unittest.TestCase):
self.assertEqual(r, s) self.assertEqual(r, s)
self.assertEqual(n, size + 1) self.assertEqual(n, size + 1)
def test_bug_16688(self):
# Issue 16688: Backreferences make case-insensitive regex fail on
# non-ASCII strings.
self.assertEqual(re.findall(r"(?i)(a)\1", "aa \u0100"), ['a'])
self.assertEqual(re.match(r"(?s).{1,3}", "\u0100\u0100").span(), (0, 2))
def run_re_tests(): def run_re_tests():
from test.re_tests import tests, SUCCEED, FAIL, SYNTAX_ERROR from test.re_tests import tests, SUCCEED, FAIL, SYNTAX_ERROR

View file

@ -70,6 +70,7 @@ Anton Barkovsky
Nick Barnes Nick Barnes
Quentin Barnes Quentin Barnes
David Barnett David Barnett
Matthew Barnett
Richard Barran Richard Barran
Cesar Eduardo Barros Cesar Eduardo Barros
Des Barry Des Barry

View file

@ -200,6 +200,9 @@ Core and Builtins
Library Library
------- -------
- Issue #16688: Fix backreferences did make case-insensitive regex fail on
non-ASCII strings. Patch by Matthew Barnett.
- Issue #16486: Make aifc files work with 'with' as context managers. - Issue #16486: Make aifc files work with 'with' as context managers.
- Issue #16485: Fix file descriptor not being closed if file header patching - Issue #16485: Fix file descriptor not being closed if file header patching

View file

@ -492,7 +492,7 @@ SRE_COUNT(SRE_STATE* state, SRE_CODE* pattern, Py_ssize_t maxcount)
Py_ssize_t i; Py_ssize_t i;
/* adjust end */ /* adjust end */
if (maxcount < end - ptr && maxcount != 65535) if (maxcount < (end - ptr) / state->charsize && maxcount != 65535)
end = ptr + maxcount*state->charsize; end = ptr + maxcount*state->charsize;
switch (pattern[0]) { switch (pattern[0]) {
@ -583,7 +583,7 @@ SRE_INFO(SRE_STATE* state, SRE_CODE* pattern)
Py_ssize_t i; Py_ssize_t i;
/* check minimal length */ /* check minimal length */
if (pattern[3] && (end - ptr) < pattern[3]) if (pattern[3] && (end - ptr)/state->charsize < pattern[3])
return 0; return 0;
/* check known prefix */ /* check known prefix */
@ -801,7 +801,7 @@ entrance:
/* <INFO> <1=skip> <2=flags> <3=min> ... */ /* <INFO> <1=skip> <2=flags> <3=min> ... */
if (ctx->pattern[3] && (end - ctx->ptr)/state->charsize < ctx->pattern[3]) { if (ctx->pattern[3] && (end - ctx->ptr)/state->charsize < ctx->pattern[3]) {
TRACE(("reject (got %d chars, need %d)\n", TRACE(("reject (got %d chars, need %d)\n",
(end - ctx->ptr), ctx->pattern[3])); (end - ctx->ptr)/state->charsize, ctx->pattern[3]));
RETURN_FAILURE; RETURN_FAILURE;
} }
ctx->pattern += ctx->pattern[1] + 1; ctx->pattern += ctx->pattern[1] + 1;
@ -1329,9 +1329,10 @@ entrance:
RETURN_FAILURE; RETURN_FAILURE;
while (p < e) { while (p < e) {
if (ctx->ptr >= end || if (ctx->ptr >= end ||
state->lower(SRE_CHARGET(state, ctx->ptr, 0)) != state->lower(*p)) state->lower(SRE_CHARGET(state, ctx->ptr, 0)) !=
state->lower(SRE_CHARGET(state, p, 0)))
RETURN_FAILURE; RETURN_FAILURE;
p++; p += state->charsize;
ctx->ptr += state->charsize; ctx->ptr += state->charsize;
} }
} }