mirror of
https://github.com/python/cpython.git
synced 2025-08-04 00:48:58 +00:00
#3231: re.compile fails with some bytes patterns
This commit is contained in:
parent
943f33912c
commit
22628c4d6a
3 changed files with 26 additions and 27 deletions
|
@ -200,7 +200,7 @@ class Tokenizer:
|
||||||
except IndexError:
|
except IndexError:
|
||||||
raise error("bogus escape (end of line)")
|
raise error("bogus escape (end of line)")
|
||||||
if isinstance(self.string, bytes):
|
if isinstance(self.string, bytes):
|
||||||
char = chr(c)
|
c = chr(c)
|
||||||
char = char + c
|
char = char + c
|
||||||
self.index = self.index + len(char)
|
self.index = self.index + len(char)
|
||||||
self.next = char
|
self.next = char
|
||||||
|
|
|
@ -661,12 +661,8 @@ xyzabc
|
||||||
('^([ab]*?)(?<!(a))c', 'abc', SUCCEED, 'g1+"-"+g2', 'ab-None'),
|
('^([ab]*?)(?<!(a))c', 'abc', SUCCEED, 'g1+"-"+g2', 'ab-None'),
|
||||||
]
|
]
|
||||||
|
|
||||||
try:
|
u = '\N{LATIN CAPITAL LETTER A WITH DIAERESIS}'
|
||||||
u = eval("u'\N{LATIN CAPITAL LETTER A WITH DIAERESIS}'")
|
tests.extend([
|
||||||
except SyntaxError:
|
|
||||||
pass
|
|
||||||
else:
|
|
||||||
tests.extend([
|
|
||||||
# bug 410271: \b broken under locales
|
# bug 410271: \b broken under locales
|
||||||
(r'\b.\b', 'a', SUCCEED, 'found', 'a'),
|
(r'\b.\b', 'a', SUCCEED, 'found', 'a'),
|
||||||
(r'(?u)\b.\b', u, SUCCEED, 'found', u),
|
(r'(?u)\b.\b', u, SUCCEED, 'found', u),
|
||||||
|
|
|
@ -732,23 +732,25 @@ def run_re_tests():
|
||||||
else:
|
else:
|
||||||
print('=== Failed incorrectly', t)
|
print('=== Failed incorrectly', t)
|
||||||
|
|
||||||
# Try the match on a unicode string, and check that it
|
# Try the match with both pattern and string converted to
|
||||||
# still succeeds.
|
# bytes, and check that it still succeeds.
|
||||||
try:
|
try:
|
||||||
result = obj.search(str(s, "latin-1"))
|
bpat = bytes(pattern, "ascii")
|
||||||
if result is None:
|
bs = bytes(s, "ascii")
|
||||||
print('=== Fails on unicode match', t)
|
except UnicodeEncodeError:
|
||||||
except NameError:
|
# skip non-ascii tests
|
||||||
continue # 1.5.2
|
pass
|
||||||
except TypeError:
|
else:
|
||||||
continue # unicode test case
|
try:
|
||||||
|
bpat = re.compile(bpat)
|
||||||
# Try the match on a unicode pattern, and check that it
|
except Exception:
|
||||||
# still succeeds.
|
print('=== Fails on bytes pattern compile', t)
|
||||||
obj=re.compile(str(pattern, "latin-1"))
|
if verbose:
|
||||||
result = obj.search(s)
|
traceback.print_exc(file=sys.stdout)
|
||||||
if result is None:
|
else:
|
||||||
print('=== Fails on unicode pattern match', t)
|
bytes_result = bpat.search(bs)
|
||||||
|
if bytes_result is None:
|
||||||
|
print('=== Fails on bytes pattern match', t)
|
||||||
|
|
||||||
# Try the match with the search area limited to the extent
|
# Try the match with the search area limited to the extent
|
||||||
# of the match and see if it still succeeds. \B will
|
# of the match and see if it still succeeds. \B will
|
||||||
|
@ -771,10 +773,11 @@ def run_re_tests():
|
||||||
|
|
||||||
# Try the match with LOCALE enabled, and check that it
|
# Try the match with LOCALE enabled, and check that it
|
||||||
# still succeeds.
|
# still succeeds.
|
||||||
obj = re.compile(pattern, re.LOCALE)
|
if '(?u)' not in pattern:
|
||||||
result = obj.search(s)
|
obj = re.compile(pattern, re.LOCALE)
|
||||||
if result is None:
|
result = obj.search(s)
|
||||||
print('=== Fails on locale-sensitive match', t)
|
if result is None:
|
||||||
|
print('=== Fails on locale-sensitive match', t)
|
||||||
|
|
||||||
# Try the match with UNICODE locale enabled, and check
|
# Try the match with UNICODE locale enabled, and check
|
||||||
# that it still succeeds.
|
# that it still succeeds.
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue