mirror of
https://github.com/python/cpython.git
synced 2025-08-30 13:38:43 +00:00
- restored 1.5.2 compatibility (sorry, eric)
- removed __all__ cruft from internal modules (sorry, skip) - don't assume ASCII for string escapes (sorry, per)
This commit is contained in:
parent
ae7636753e
commit
f2989b22ff
5 changed files with 39 additions and 31 deletions
14
Lib/sre.py
14
Lib/sre.py
|
@ -17,9 +17,14 @@
|
||||||
import sre_compile
|
import sre_compile
|
||||||
import sre_parse
|
import sre_parse
|
||||||
|
|
||||||
__all__ = ["match","search","sub","subn","split","findall","compile",
|
# public symbols
|
||||||
"purge","template","escape","I","L","M","S","X","U","IGNORECASE",
|
__all__ = [ "match", "search", "sub", "subn", "split", "findall",
|
||||||
"LOCALE","MULTILINE","DOTALL","VERBOSE","UNICODE","error"]
|
"compile", "purge", "template", "escape", "I", "L", "M", "S", "X",
|
||||||
|
"U", "IGNORECASE", "LOCALE", "MULTILINE", "DOTALL", "VERBOSE",
|
||||||
|
"UNICODE", "error" ]
|
||||||
|
|
||||||
|
# this module works under 1.5.2 and later. don't use string methods
|
||||||
|
import string
|
||||||
|
|
||||||
# flags
|
# flags
|
||||||
I = IGNORECASE = sre_compile.SRE_FLAG_IGNORECASE # ignore case
|
I = IGNORECASE = sre_compile.SRE_FLAG_IGNORECASE # ignore case
|
||||||
|
@ -88,7 +93,6 @@ def purge():
|
||||||
|
|
||||||
def template(pattern, flags=0):
|
def template(pattern, flags=0):
|
||||||
"Compile a template pattern, returning a pattern object"
|
"Compile a template pattern, returning a pattern object"
|
||||||
|
|
||||||
return _compile(pattern, flags|T)
|
return _compile(pattern, flags|T)
|
||||||
|
|
||||||
def escape(pattern):
|
def escape(pattern):
|
||||||
|
@ -111,7 +115,7 @@ _MAXCACHE = 100
|
||||||
|
|
||||||
def _join(seq, sep):
|
def _join(seq, sep):
|
||||||
# internal: join into string having the same type as sep
|
# internal: join into string having the same type as sep
|
||||||
return sep[:0].join(seq)
|
return string.join(seq, sep[:0])
|
||||||
|
|
||||||
def _compile(*key):
|
def _compile(*key):
|
||||||
# internal: compile pattern
|
# internal: compile pattern
|
||||||
|
|
|
@ -12,8 +12,6 @@ import _sre
|
||||||
|
|
||||||
from sre_constants import *
|
from sre_constants import *
|
||||||
|
|
||||||
__all__ = ["compile"]
|
|
||||||
|
|
||||||
assert _sre.MAGIC == MAGIC, "SRE module mismatch"
|
assert _sre.MAGIC == MAGIC, "SRE module mismatch"
|
||||||
|
|
||||||
MAXCODE = 65535
|
MAXCODE = 65535
|
||||||
|
|
|
@ -195,11 +195,12 @@ SRE_INFO_LITERAL = 2 # entire pattern is literal (given by prefix)
|
||||||
SRE_INFO_CHARSET = 4 # pattern starts with character from given set
|
SRE_INFO_CHARSET = 4 # pattern starts with character from given set
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
import string
|
||||||
def dump(f, d, prefix):
|
def dump(f, d, prefix):
|
||||||
items = d.items()
|
items = d.items()
|
||||||
items.sort(lambda a, b: cmp(a[1], b[1]))
|
items.sort(lambda a, b: cmp(a[1], b[1]))
|
||||||
for k, v in items:
|
for k, v in items:
|
||||||
f.write("#define %s_%s %s\n" % (prefix, k.upper(), v))
|
f.write("#define %s_%s %s\n" % (prefix, string.upper(k), v))
|
||||||
f = open("sre_constants.h", "w")
|
f = open("sre_constants.h", "w")
|
||||||
f.write("""\
|
f.write("""\
|
||||||
/*
|
/*
|
||||||
|
|
|
@ -10,13 +10,11 @@
|
||||||
|
|
||||||
# XXX: show string offset and offending character for all errors
|
# XXX: show string offset and offending character for all errors
|
||||||
|
|
||||||
import sys
|
# this module works under 1.5.2 and later. don't use string methods
|
||||||
|
import string, sys
|
||||||
|
|
||||||
from sre_constants import *
|
from sre_constants import *
|
||||||
|
|
||||||
__all__ = ["Pattern","SubPattern","Tokenizer","parse","parse_template",
|
|
||||||
"expand_template"]
|
|
||||||
|
|
||||||
SPECIAL_CHARS = ".\\[{()*+?^$|"
|
SPECIAL_CHARS = ".\\[{()*+?^$|"
|
||||||
REPEAT_CHARS = "*+?{"
|
REPEAT_CHARS = "*+?{"
|
||||||
|
|
||||||
|
@ -28,13 +26,13 @@ HEXDIGITS = tuple("0123456789abcdefABCDEF")
|
||||||
WHITESPACE = tuple(" \t\n\r\v\f")
|
WHITESPACE = tuple(" \t\n\r\v\f")
|
||||||
|
|
||||||
ESCAPES = {
|
ESCAPES = {
|
||||||
r"\a": (LITERAL, 7),
|
r"\a": (LITERAL, ord("\a")),
|
||||||
r"\b": (LITERAL, 8),
|
r"\b": (LITERAL, ord("\b")),
|
||||||
r"\f": (LITERAL, 12),
|
r"\f": (LITERAL, ord("\f")),
|
||||||
r"\n": (LITERAL, 10),
|
r"\n": (LITERAL, ord("\n")),
|
||||||
r"\r": (LITERAL, 13),
|
r"\r": (LITERAL, ord("\r")),
|
||||||
r"\t": (LITERAL, 9),
|
r"\t": (LITERAL, ord("\t")),
|
||||||
r"\v": (LITERAL, 11),
|
r"\v": (LITERAL, ord("\v")),
|
||||||
r"\\": (LITERAL, ord("\\"))
|
r"\\": (LITERAL, ord("\\"))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -63,6 +61,13 @@ FLAGS = {
|
||||||
"u": SRE_FLAG_UNICODE,
|
"u": SRE_FLAG_UNICODE,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# figure out best way to convert hex/octal numbers to integers
|
||||||
|
try:
|
||||||
|
int("10", 8)
|
||||||
|
atoi = int # 2.0 and later
|
||||||
|
except TypeError:
|
||||||
|
atoi = string.atoi # 1.5.2
|
||||||
|
|
||||||
class Pattern:
|
class Pattern:
|
||||||
# master pattern object. keeps track of global attributes
|
# master pattern object. keeps track of global attributes
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
|
@ -219,7 +224,7 @@ def isname(name):
|
||||||
def _group(escape, groups):
|
def _group(escape, groups):
|
||||||
# check if the escape string represents a valid group
|
# check if the escape string represents a valid group
|
||||||
try:
|
try:
|
||||||
gid = int(escape[1:])
|
gid = atoi(escape[1:])
|
||||||
if gid and gid < groups:
|
if gid and gid < groups:
|
||||||
return gid
|
return gid
|
||||||
except ValueError:
|
except ValueError:
|
||||||
|
@ -242,13 +247,13 @@ def _class_escape(source, escape):
|
||||||
escape = escape[2:]
|
escape = escape[2:]
|
||||||
if len(escape) != 2:
|
if len(escape) != 2:
|
||||||
raise error, "bogus escape: %s" % repr("\\" + escape)
|
raise error, "bogus escape: %s" % repr("\\" + escape)
|
||||||
return LITERAL, int(escape, 16) & 0xff
|
return LITERAL, atoi(escape, 16) & 0xff
|
||||||
elif str(escape[1:2]) in OCTDIGITS:
|
elif str(escape[1:2]) in OCTDIGITS:
|
||||||
# octal escape (up to three digits)
|
# octal escape (up to three digits)
|
||||||
while source.next in OCTDIGITS and len(escape) < 5:
|
while source.next in OCTDIGITS and len(escape) < 5:
|
||||||
escape = escape + source.get()
|
escape = escape + source.get()
|
||||||
escape = escape[1:]
|
escape = escape[1:]
|
||||||
return LITERAL, int(escape, 8) & 0xff
|
return LITERAL, atoi(escape, 8) & 0xff
|
||||||
if len(escape) == 2:
|
if len(escape) == 2:
|
||||||
return LITERAL, ord(escape[1])
|
return LITERAL, ord(escape[1])
|
||||||
except ValueError:
|
except ValueError:
|
||||||
|
@ -270,12 +275,12 @@ def _escape(source, escape, state):
|
||||||
escape = escape + source.get()
|
escape = escape + source.get()
|
||||||
if len(escape) != 4:
|
if len(escape) != 4:
|
||||||
raise ValueError
|
raise ValueError
|
||||||
return LITERAL, int(escape[2:], 16) & 0xff
|
return LITERAL, atoi(escape[2:], 16) & 0xff
|
||||||
elif escape[1:2] == "0":
|
elif escape[1:2] == "0":
|
||||||
# octal escape
|
# octal escape
|
||||||
while source.next in OCTDIGITS and len(escape) < 4:
|
while source.next in OCTDIGITS and len(escape) < 4:
|
||||||
escape = escape + source.get()
|
escape = escape + source.get()
|
||||||
return LITERAL, int(escape[1:], 8) & 0xff
|
return LITERAL, atoi(escape[1:], 8) & 0xff
|
||||||
elif escape[1:2] in DIGITS:
|
elif escape[1:2] in DIGITS:
|
||||||
# octal escape *or* decimal group reference (sigh)
|
# octal escape *or* decimal group reference (sigh)
|
||||||
here = source.tell()
|
here = source.tell()
|
||||||
|
@ -285,7 +290,7 @@ def _escape(source, escape, state):
|
||||||
source.next in OCTDIGITS):
|
source.next in OCTDIGITS):
|
||||||
# got three octal digits; this is an octal escape
|
# got three octal digits; this is an octal escape
|
||||||
escape = escape + source.get()
|
escape = escape + source.get()
|
||||||
return LITERAL, int(escape[1:], 8) & 0xff
|
return LITERAL, atoi(escape[1:], 8) & 0xff
|
||||||
# got at least one decimal digit; this is a group reference
|
# got at least one decimal digit; this is a group reference
|
||||||
group = _group(escape, state.groups)
|
group = _group(escape, state.groups)
|
||||||
if group:
|
if group:
|
||||||
|
@ -459,9 +464,9 @@ def _parse(source, state):
|
||||||
source.seek(here)
|
source.seek(here)
|
||||||
continue
|
continue
|
||||||
if lo:
|
if lo:
|
||||||
min = int(lo)
|
min = atoi(lo)
|
||||||
if hi:
|
if hi:
|
||||||
max = int(hi)
|
max = atoi(hi)
|
||||||
if max < min:
|
if max < min:
|
||||||
raise error, "bad repeat interval"
|
raise error, "bad repeat interval"
|
||||||
else:
|
else:
|
||||||
|
@ -649,7 +654,7 @@ def parse_template(source, pattern):
|
||||||
if not name:
|
if not name:
|
||||||
raise error, "bad group name"
|
raise error, "bad group name"
|
||||||
try:
|
try:
|
||||||
index = int(name)
|
index = atoi(name)
|
||||||
except ValueError:
|
except ValueError:
|
||||||
if not isname(name):
|
if not isname(name):
|
||||||
raise error, "bad character in group name"
|
raise error, "bad character in group name"
|
||||||
|
@ -673,7 +678,7 @@ def parse_template(source, pattern):
|
||||||
break
|
break
|
||||||
if not code:
|
if not code:
|
||||||
this = this[1:]
|
this = this[1:]
|
||||||
code = LITERAL, int(this[-6:], 8) & 0xff
|
code = LITERAL, atoi(this[-6:], 8) & 0xff
|
||||||
a(code)
|
a(code)
|
||||||
else:
|
else:
|
||||||
try:
|
try:
|
||||||
|
@ -702,4 +707,4 @@ def expand_template(template, match):
|
||||||
if s is None:
|
if s is None:
|
||||||
raise error, "empty group"
|
raise error, "empty group"
|
||||||
a(s)
|
a(s)
|
||||||
return sep.join(p)
|
return string.join(p, sep)
|
||||||
|
|
|
@ -8,7 +8,7 @@ sys.path=['.']+sys.path
|
||||||
|
|
||||||
from test_support import verbose, TestFailed
|
from test_support import verbose, TestFailed
|
||||||
import sre
|
import sre
|
||||||
import sys, os, traceback
|
import sys, os, string, traceback
|
||||||
|
|
||||||
#
|
#
|
||||||
# test support
|
# test support
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue