Issue #16688: Fix backreferences did make case-insensitive regex fail on non-ASCII strings.

Patch by Matthew Barnett.
This commit is contained in:
Serhiy Storchaka 2012-12-29 23:38:48 +02:00
parent 2d8298dcd2
commit c1b59d4552
4 changed files with 15 additions and 5 deletions

View file

@ -968,6 +968,11 @@ class ReTests(unittest.TestCase):
self.assertEqual(r, s) self.assertEqual(r, s)
self.assertEqual(n, size + 1) self.assertEqual(n, size + 1)
def test_bug_16688(self):
# Issue 16688: Backreferences make case-insensitive regex fail on
# non-ASCII strings.
self.assertEqual(re.findall(r"(?i)(a)\1", "aa \u0100"), ['a'])
self.assertEqual(re.match(r"(?s).{1,3}", "\u0100\u0100").span(), (0, 2))
def run_re_tests(): def run_re_tests():
from test.re_tests import tests, SUCCEED, FAIL, SYNTAX_ERROR from test.re_tests import tests, SUCCEED, FAIL, SYNTAX_ERROR

View file

@ -70,6 +70,7 @@ Anton Barkovsky
Nick Barnes Nick Barnes
Quentin Barnes Quentin Barnes
David Barnett David Barnett
Matthew Barnett
Richard Barran Richard Barran
Cesar Eduardo Barros Cesar Eduardo Barros
Des Barry Des Barry

View file

@ -124,6 +124,9 @@ Core and Builtins
Library Library
------- -------
- Issue #16688: Fix backreferences did make case-insensitive regex fail on
non-ASCII strings. Patch by Matthew Barnett.
- Issue #16485: Fix file descriptor not being closed if file header patching - Issue #16485: Fix file descriptor not being closed if file header patching
fails on closing of aifc file. fails on closing of aifc file.

View file

@ -492,7 +492,7 @@ SRE_COUNT(SRE_STATE* state, SRE_CODE* pattern, Py_ssize_t maxcount)
Py_ssize_t i; Py_ssize_t i;
/* adjust end */ /* adjust end */
if (maxcount < end - ptr && maxcount != 65535) if (maxcount < (end - ptr) / state->charsize && maxcount != 65535)
end = ptr + maxcount*state->charsize; end = ptr + maxcount*state->charsize;
switch (pattern[0]) { switch (pattern[0]) {
@ -583,7 +583,7 @@ SRE_INFO(SRE_STATE* state, SRE_CODE* pattern)
Py_ssize_t i; Py_ssize_t i;
/* check minimal length */ /* check minimal length */
if (pattern[3] && (end - ptr) < pattern[3]) if (pattern[3] && (end - ptr)/state->charsize < pattern[3])
return 0; return 0;
/* check known prefix */ /* check known prefix */
@ -801,7 +801,7 @@ entrance:
/* <INFO> <1=skip> <2=flags> <3=min> ... */ /* <INFO> <1=skip> <2=flags> <3=min> ... */
if (ctx->pattern[3] && (end - ctx->ptr)/state->charsize < ctx->pattern[3]) { if (ctx->pattern[3] && (end - ctx->ptr)/state->charsize < ctx->pattern[3]) {
TRACE(("reject (got %d chars, need %d)\n", TRACE(("reject (got %d chars, need %d)\n",
(end - ctx->ptr), ctx->pattern[3])); (end - ctx->ptr)/state->charsize, ctx->pattern[3]));
RETURN_FAILURE; RETURN_FAILURE;
} }
ctx->pattern += ctx->pattern[1] + 1; ctx->pattern += ctx->pattern[1] + 1;
@ -1329,9 +1329,10 @@ entrance:
RETURN_FAILURE; RETURN_FAILURE;
while (p < e) { while (p < e) {
if (ctx->ptr >= end || if (ctx->ptr >= end ||
state->lower(SRE_CHARGET(state, ctx->ptr, 0)) != state->lower(*p)) state->lower(SRE_CHARGET(state, ctx->ptr, 0)) !=
state->lower(SRE_CHARGET(state, p, 0)))
RETURN_FAILURE; RETURN_FAILURE;
p++; p += state->charsize;
ctx->ptr += state->charsize; ctx->ptr += state->charsize;
} }
} }