bpo-29995: re.escape() now escapes only special characters. (#1007)

This commit is contained in:
Serhiy Storchaka 2017-04-13 21:06:43 +03:00 committed by GitHub
parent a6e395dffa
commit 5908300e4b
6 changed files with 40 additions and 51 deletions

View file

@ -241,39 +241,21 @@ def template(pattern, flags=0):
"Compile a template pattern, returning a pattern object"
return _compile(pattern, flags|T)
_alphanum_str = frozenset(
"_abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ01234567890")
_alphanum_bytes = frozenset(
b"_abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ01234567890")
# SPECIAL_CHARS
# closing ')', '}' and ']'
# '-' (a range in character set)
# '#' (comment) and WHITESPACE (ignored) in verbose mode
_special_chars_map = {i: '\\' + chr(i) for i in b'()[]{}?*+-|^$\\.# \t\n\r\v\f'}
def escape(pattern):
"""
Escape all the characters in pattern except ASCII letters, numbers and '_'.
Escape special characters in a string.
"""
if isinstance(pattern, str):
alphanum = _alphanum_str
s = list(pattern)
for i, c in enumerate(pattern):
if c not in alphanum:
if c == "\000":
s[i] = "\\000"
else:
s[i] = "\\" + c
return "".join(s)
return pattern.translate(_special_chars_map)
else:
alphanum = _alphanum_bytes
s = []
esc = ord(b"\\")
for c in pattern:
if c in alphanum:
s.append(c)
else:
if c == 0:
s.extend(b"\\000")
else:
s.append(esc)
s.append(c)
return bytes(s)
pattern = str(pattern, 'latin1')
return pattern.translate(_special_chars_map).encode('latin1')
# --------------------------------------------------------------------
# internals