mirror of
https://github.com/python/cpython.git
synced 2025-10-17 12:18:23 +00:00
Simple optimizations:
* pre-build a single identity function for the fixup function * pre-build membership tests in dictionaries instead of in-line tuples * assign len() to a local variable * assign append() methods to a local variable * use xrange() instead of range() * replace "x<<1" with "x+x"
This commit is contained in:
parent
707483fdef
commit
01c9f8c35f
1 changed files with 69 additions and 47 deletions
|
@ -21,11 +21,25 @@ if _sre.CODESIZE == 2:
|
||||||
else:
|
else:
|
||||||
MAXCODE = 0xFFFFFFFFL
|
MAXCODE = 0xFFFFFFFFL
|
||||||
|
|
||||||
|
def _identityfunction(x):
|
||||||
|
return x
|
||||||
|
|
||||||
|
# use xrange if available
|
||||||
|
try:
|
||||||
|
xrange
|
||||||
|
except NameError:
|
||||||
|
xrange = range
|
||||||
|
|
||||||
def _compile(code, pattern, flags):
|
def _compile(code, pattern, flags):
|
||||||
# internal: compile a (sub)pattern
|
# internal: compile a (sub)pattern
|
||||||
emit = code.append
|
emit = code.append
|
||||||
|
_len = len
|
||||||
|
LITERAL_CODES = {LITERAL:1, NOT_LITERAL:1}
|
||||||
|
REPEATING_CODES = {REPEAT:1, MIN_REPEAT:1, MAX_REPEAT:1}
|
||||||
|
SUCCESS_CODES = {SUCCESS:1, FAILURE:1}
|
||||||
|
ASSERT_CODES = {ASSERT:1, ASSERT_NOT:1}
|
||||||
for op, av in pattern:
|
for op, av in pattern:
|
||||||
if op in (LITERAL, NOT_LITERAL):
|
if op in LITERAL_CODES:
|
||||||
if flags & SRE_FLAG_IGNORECASE:
|
if flags & SRE_FLAG_IGNORECASE:
|
||||||
emit(OPCODES[OP_IGNORE[op]])
|
emit(OPCODES[OP_IGNORE[op]])
|
||||||
emit(_sre.getlower(av, flags))
|
emit(_sre.getlower(av, flags))
|
||||||
|
@ -39,44 +53,44 @@ def _compile(code, pattern, flags):
|
||||||
return _sre.getlower(literal, flags)
|
return _sre.getlower(literal, flags)
|
||||||
else:
|
else:
|
||||||
emit(OPCODES[op])
|
emit(OPCODES[op])
|
||||||
fixup = lambda x: x
|
fixup = _identityfunction
|
||||||
skip = len(code); emit(0)
|
skip = _len(code); emit(0)
|
||||||
_compile_charset(av, flags, code, fixup)
|
_compile_charset(av, flags, code, fixup)
|
||||||
code[skip] = len(code) - skip
|
code[skip] = _len(code) - skip
|
||||||
elif op is ANY:
|
elif op is ANY:
|
||||||
if flags & SRE_FLAG_DOTALL:
|
if flags & SRE_FLAG_DOTALL:
|
||||||
emit(OPCODES[ANY_ALL])
|
emit(OPCODES[ANY_ALL])
|
||||||
else:
|
else:
|
||||||
emit(OPCODES[ANY])
|
emit(OPCODES[ANY])
|
||||||
elif op in (REPEAT, MIN_REPEAT, MAX_REPEAT):
|
elif op in REPEATING_CODES:
|
||||||
if flags & SRE_FLAG_TEMPLATE:
|
if flags & SRE_FLAG_TEMPLATE:
|
||||||
raise error, "internal: unsupported template operator"
|
raise error, "internal: unsupported template operator"
|
||||||
emit(OPCODES[REPEAT])
|
emit(OPCODES[REPEAT])
|
||||||
skip = len(code); emit(0)
|
skip = _len(code); emit(0)
|
||||||
emit(av[0])
|
emit(av[0])
|
||||||
emit(av[1])
|
emit(av[1])
|
||||||
_compile(code, av[2], flags)
|
_compile(code, av[2], flags)
|
||||||
emit(OPCODES[SUCCESS])
|
emit(OPCODES[SUCCESS])
|
||||||
code[skip] = len(code) - skip
|
code[skip] = _len(code) - skip
|
||||||
elif _simple(av) and op != REPEAT:
|
elif _simple(av) and op is not REPEAT:
|
||||||
if op == MAX_REPEAT:
|
if op is MAX_REPEAT:
|
||||||
emit(OPCODES[REPEAT_ONE])
|
emit(OPCODES[REPEAT_ONE])
|
||||||
else:
|
else:
|
||||||
emit(OPCODES[MIN_REPEAT_ONE])
|
emit(OPCODES[MIN_REPEAT_ONE])
|
||||||
skip = len(code); emit(0)
|
skip = _len(code); emit(0)
|
||||||
emit(av[0])
|
emit(av[0])
|
||||||
emit(av[1])
|
emit(av[1])
|
||||||
_compile(code, av[2], flags)
|
_compile(code, av[2], flags)
|
||||||
emit(OPCODES[SUCCESS])
|
emit(OPCODES[SUCCESS])
|
||||||
code[skip] = len(code) - skip
|
code[skip] = _len(code) - skip
|
||||||
else:
|
else:
|
||||||
emit(OPCODES[REPEAT])
|
emit(OPCODES[REPEAT])
|
||||||
skip = len(code); emit(0)
|
skip = _len(code); emit(0)
|
||||||
emit(av[0])
|
emit(av[0])
|
||||||
emit(av[1])
|
emit(av[1])
|
||||||
_compile(code, av[2], flags)
|
_compile(code, av[2], flags)
|
||||||
code[skip] = len(code) - skip
|
code[skip] = _len(code) - skip
|
||||||
if op == MAX_REPEAT:
|
if op is MAX_REPEAT:
|
||||||
emit(OPCODES[MAX_UNTIL])
|
emit(OPCODES[MAX_UNTIL])
|
||||||
else:
|
else:
|
||||||
emit(OPCODES[MIN_UNTIL])
|
emit(OPCODES[MIN_UNTIL])
|
||||||
|
@ -89,11 +103,11 @@ def _compile(code, pattern, flags):
|
||||||
if av[0]:
|
if av[0]:
|
||||||
emit(OPCODES[MARK])
|
emit(OPCODES[MARK])
|
||||||
emit((av[0]-1)*2+1)
|
emit((av[0]-1)*2+1)
|
||||||
elif op in (SUCCESS, FAILURE):
|
elif op in SUCCESS_CODES:
|
||||||
emit(OPCODES[op])
|
emit(OPCODES[op])
|
||||||
elif op in (ASSERT, ASSERT_NOT):
|
elif op in ASSERT_CODES:
|
||||||
emit(OPCODES[op])
|
emit(OPCODES[op])
|
||||||
skip = len(code); emit(0)
|
skip = _len(code); emit(0)
|
||||||
if av[0] >= 0:
|
if av[0] >= 0:
|
||||||
emit(0) # look ahead
|
emit(0) # look ahead
|
||||||
else:
|
else:
|
||||||
|
@ -103,13 +117,13 @@ def _compile(code, pattern, flags):
|
||||||
emit(lo) # look behind
|
emit(lo) # look behind
|
||||||
_compile(code, av[1], flags)
|
_compile(code, av[1], flags)
|
||||||
emit(OPCODES[SUCCESS])
|
emit(OPCODES[SUCCESS])
|
||||||
code[skip] = len(code) - skip
|
code[skip] = _len(code) - skip
|
||||||
elif op is CALL:
|
elif op is CALL:
|
||||||
emit(OPCODES[op])
|
emit(OPCODES[op])
|
||||||
skip = len(code); emit(0)
|
skip = _len(code); emit(0)
|
||||||
_compile(code, av, flags)
|
_compile(code, av, flags)
|
||||||
emit(OPCODES[SUCCESS])
|
emit(OPCODES[SUCCESS])
|
||||||
code[skip] = len(code) - skip
|
code[skip] = _len(code) - skip
|
||||||
elif op is AT:
|
elif op is AT:
|
||||||
emit(OPCODES[op])
|
emit(OPCODES[op])
|
||||||
if flags & SRE_FLAG_MULTILINE:
|
if flags & SRE_FLAG_MULTILINE:
|
||||||
|
@ -122,16 +136,17 @@ def _compile(code, pattern, flags):
|
||||||
elif op is BRANCH:
|
elif op is BRANCH:
|
||||||
emit(OPCODES[op])
|
emit(OPCODES[op])
|
||||||
tail = []
|
tail = []
|
||||||
|
tailappend = tail.append
|
||||||
for av in av[1]:
|
for av in av[1]:
|
||||||
skip = len(code); emit(0)
|
skip = _len(code); emit(0)
|
||||||
# _compile_info(code, av, flags)
|
# _compile_info(code, av, flags)
|
||||||
_compile(code, av, flags)
|
_compile(code, av, flags)
|
||||||
emit(OPCODES[JUMP])
|
emit(OPCODES[JUMP])
|
||||||
tail.append(len(code)); emit(0)
|
tailappend(_len(code)); emit(0)
|
||||||
code[skip] = len(code) - skip
|
code[skip] = _len(code) - skip
|
||||||
emit(0) # end of branch
|
emit(0) # end of branch
|
||||||
for tail in tail:
|
for tail in tail:
|
||||||
code[tail] = len(code) - tail
|
code[tail] = _len(code) - tail
|
||||||
elif op is CATEGORY:
|
elif op is CATEGORY:
|
||||||
emit(OPCODES[op])
|
emit(OPCODES[op])
|
||||||
if flags & SRE_FLAG_LOCALE:
|
if flags & SRE_FLAG_LOCALE:
|
||||||
|
@ -148,16 +163,16 @@ def _compile(code, pattern, flags):
|
||||||
elif op is GROUPREF_EXISTS:
|
elif op is GROUPREF_EXISTS:
|
||||||
emit(OPCODES[op])
|
emit(OPCODES[op])
|
||||||
emit((av[0]-1)*2)
|
emit((av[0]-1)*2)
|
||||||
skipyes = len(code); emit(0)
|
skipyes = _len(code); emit(0)
|
||||||
_compile(code, av[1], flags)
|
_compile(code, av[1], flags)
|
||||||
if av[2]:
|
if av[2]:
|
||||||
emit(OPCODES[JUMP])
|
emit(OPCODES[JUMP])
|
||||||
skipno = len(code); emit(0)
|
skipno = _len(code); emit(0)
|
||||||
code[skipyes] = len(code) - skipyes + 1
|
code[skipyes] = _len(code) - skipyes + 1
|
||||||
_compile(code, av[2], flags)
|
_compile(code, av[2], flags)
|
||||||
code[skipno] = len(code) - skipno
|
code[skipno] = _len(code) - skipno
|
||||||
else:
|
else:
|
||||||
code[skipyes] = len(code) - skipyes + 1
|
code[skipyes] = _len(code) - skipyes + 1
|
||||||
else:
|
else:
|
||||||
raise ValueError, ("unsupported operand type", op)
|
raise ValueError, ("unsupported operand type", op)
|
||||||
|
|
||||||
|
@ -165,7 +180,7 @@ def _compile_charset(charset, flags, code, fixup=None):
|
||||||
# compile charset subprogram
|
# compile charset subprogram
|
||||||
emit = code.append
|
emit = code.append
|
||||||
if fixup is None:
|
if fixup is None:
|
||||||
fixup = lambda x: x
|
fixup = _identityfunction
|
||||||
for op, av in _optimize_charset(charset, fixup):
|
for op, av in _optimize_charset(charset, fixup):
|
||||||
emit(OPCODES[op])
|
emit(OPCODES[op])
|
||||||
if op is NEGATE:
|
if op is NEGATE:
|
||||||
|
@ -193,11 +208,12 @@ def _compile_charset(charset, flags, code, fixup=None):
|
||||||
def _optimize_charset(charset, fixup):
|
def _optimize_charset(charset, fixup):
|
||||||
# internal: optimize character set
|
# internal: optimize character set
|
||||||
out = []
|
out = []
|
||||||
|
outappend = out.append
|
||||||
charmap = [False]*256
|
charmap = [False]*256
|
||||||
try:
|
try:
|
||||||
for op, av in charset:
|
for op, av in charset:
|
||||||
if op is NEGATE:
|
if op is NEGATE:
|
||||||
out.append((op, av))
|
outappend((op, av))
|
||||||
elif op is LITERAL:
|
elif op is LITERAL:
|
||||||
charmap[fixup(av)] = True
|
charmap[fixup(av)] = True
|
||||||
elif op is RANGE:
|
elif op is RANGE:
|
||||||
|
@ -212,35 +228,37 @@ def _optimize_charset(charset, fixup):
|
||||||
# compress character map
|
# compress character map
|
||||||
i = p = n = 0
|
i = p = n = 0
|
||||||
runs = []
|
runs = []
|
||||||
|
runsappend = runs.append
|
||||||
for c in charmap:
|
for c in charmap:
|
||||||
if c:
|
if c:
|
||||||
if n == 0:
|
if n == 0:
|
||||||
p = i
|
p = i
|
||||||
n = n + 1
|
n = n + 1
|
||||||
elif n:
|
elif n:
|
||||||
runs.append((p, n))
|
runsappend((p, n))
|
||||||
n = 0
|
n = 0
|
||||||
i = i + 1
|
i = i + 1
|
||||||
if n:
|
if n:
|
||||||
runs.append((p, n))
|
runsappend((p, n))
|
||||||
if len(runs) <= 2:
|
if len(runs) <= 2:
|
||||||
# use literal/range
|
# use literal/range
|
||||||
for p, n in runs:
|
for p, n in runs:
|
||||||
if n == 1:
|
if n == 1:
|
||||||
out.append((LITERAL, p))
|
outappend((LITERAL, p))
|
||||||
else:
|
else:
|
||||||
out.append((RANGE, (p, p+n-1)))
|
outappend((RANGE, (p, p+n-1)))
|
||||||
if len(out) < len(charset):
|
if len(out) < len(charset):
|
||||||
return out
|
return out
|
||||||
else:
|
else:
|
||||||
# use bitmap
|
# use bitmap
|
||||||
data = _mk_bitmap(charmap)
|
data = _mk_bitmap(charmap)
|
||||||
out.append((CHARSET, data))
|
outappend((CHARSET, data))
|
||||||
return out
|
return out
|
||||||
return charset
|
return charset
|
||||||
|
|
||||||
def _mk_bitmap(bits):
|
def _mk_bitmap(bits):
|
||||||
data = []
|
data = []
|
||||||
|
dataappend = data.append
|
||||||
if _sre.CODESIZE == 2:
|
if _sre.CODESIZE == 2:
|
||||||
start = (1, 0)
|
start = (1, 0)
|
||||||
else:
|
else:
|
||||||
|
@ -249,9 +267,9 @@ def _mk_bitmap(bits):
|
||||||
for c in bits:
|
for c in bits:
|
||||||
if c:
|
if c:
|
||||||
v = v + m
|
v = v + m
|
||||||
m = m << 1
|
m = m + m
|
||||||
if m > MAXCODE:
|
if m > MAXCODE:
|
||||||
data.append(v)
|
dataappend(v)
|
||||||
m, v = start
|
m, v = start
|
||||||
return data
|
return data
|
||||||
|
|
||||||
|
@ -295,7 +313,7 @@ def _optimize_unicode(charset, fixup):
|
||||||
elif op is LITERAL:
|
elif op is LITERAL:
|
||||||
charmap[fixup(av)] = True
|
charmap[fixup(av)] = True
|
||||||
elif op is RANGE:
|
elif op is RANGE:
|
||||||
for i in range(fixup(av[0]), fixup(av[1])+1):
|
for i in xrange(fixup(av[0]), fixup(av[1])+1):
|
||||||
charmap[i] = True
|
charmap[i] = True
|
||||||
elif op is CATEGORY:
|
elif op is CATEGORY:
|
||||||
# XXX: could expand category
|
# XXX: could expand category
|
||||||
|
@ -307,13 +325,13 @@ def _optimize_unicode(charset, fixup):
|
||||||
if sys.maxunicode != 65535:
|
if sys.maxunicode != 65535:
|
||||||
# XXX: negation does not work with big charsets
|
# XXX: negation does not work with big charsets
|
||||||
return charset
|
return charset
|
||||||
for i in range(65536):
|
for i in xrange(65536):
|
||||||
charmap[i] = not charmap[i]
|
charmap[i] = not charmap[i]
|
||||||
comps = {}
|
comps = {}
|
||||||
mapping = [0]*256
|
mapping = [0]*256
|
||||||
block = 0
|
block = 0
|
||||||
data = []
|
data = []
|
||||||
for i in range(256):
|
for i in xrange(256):
|
||||||
chunk = tuple(charmap[i*256:(i+1)*256])
|
chunk = tuple(charmap[i*256:(i+1)*256])
|
||||||
new = comps.setdefault(chunk, block)
|
new = comps.setdefault(chunk, block)
|
||||||
mapping[i] = new
|
mapping[i] = new
|
||||||
|
@ -348,19 +366,21 @@ def _compile_info(code, pattern, flags):
|
||||||
return # not worth it
|
return # not worth it
|
||||||
# look for a literal prefix
|
# look for a literal prefix
|
||||||
prefix = []
|
prefix = []
|
||||||
|
prefixappend = prefix.append
|
||||||
prefix_skip = 0
|
prefix_skip = 0
|
||||||
charset = [] # not used
|
charset = [] # not used
|
||||||
|
charsetappend = charset.append
|
||||||
if not (flags & SRE_FLAG_IGNORECASE):
|
if not (flags & SRE_FLAG_IGNORECASE):
|
||||||
# look for literal prefix
|
# look for literal prefix
|
||||||
for op, av in pattern.data:
|
for op, av in pattern.data:
|
||||||
if op is LITERAL:
|
if op is LITERAL:
|
||||||
if len(prefix) == prefix_skip:
|
if len(prefix) == prefix_skip:
|
||||||
prefix_skip = prefix_skip + 1
|
prefix_skip = prefix_skip + 1
|
||||||
prefix.append(av)
|
prefixappend(av)
|
||||||
elif op is SUBPATTERN and len(av[1]) == 1:
|
elif op is SUBPATTERN and len(av[1]) == 1:
|
||||||
op, av = av[1][0]
|
op, av = av[1][0]
|
||||||
if op is LITERAL:
|
if op is LITERAL:
|
||||||
prefix.append(av)
|
prefixappend(av)
|
||||||
else:
|
else:
|
||||||
break
|
break
|
||||||
else:
|
else:
|
||||||
|
@ -371,27 +391,29 @@ def _compile_info(code, pattern, flags):
|
||||||
if op is SUBPATTERN and av[1]:
|
if op is SUBPATTERN and av[1]:
|
||||||
op, av = av[1][0]
|
op, av = av[1][0]
|
||||||
if op is LITERAL:
|
if op is LITERAL:
|
||||||
charset.append((op, av))
|
charsetappend((op, av))
|
||||||
elif op is BRANCH:
|
elif op is BRANCH:
|
||||||
c = []
|
c = []
|
||||||
|
cappend = c.append
|
||||||
for p in av[1]:
|
for p in av[1]:
|
||||||
if not p:
|
if not p:
|
||||||
break
|
break
|
||||||
op, av = p[0]
|
op, av = p[0]
|
||||||
if op is LITERAL:
|
if op is LITERAL:
|
||||||
c.append((op, av))
|
cappend((op, av))
|
||||||
else:
|
else:
|
||||||
break
|
break
|
||||||
else:
|
else:
|
||||||
charset = c
|
charset = c
|
||||||
elif op is BRANCH:
|
elif op is BRANCH:
|
||||||
c = []
|
c = []
|
||||||
|
cappend = c.append
|
||||||
for p in av[1]:
|
for p in av[1]:
|
||||||
if not p:
|
if not p:
|
||||||
break
|
break
|
||||||
op, av = p[0]
|
op, av = p[0]
|
||||||
if op is LITERAL:
|
if op is LITERAL:
|
||||||
c.append((op, av))
|
cappend((op, av))
|
||||||
else:
|
else:
|
||||||
break
|
break
|
||||||
else:
|
else:
|
||||||
|
@ -432,7 +454,7 @@ def _compile_info(code, pattern, flags):
|
||||||
code.extend(prefix)
|
code.extend(prefix)
|
||||||
# generate overlap table
|
# generate overlap table
|
||||||
table = [-1] + ([0]*len(prefix))
|
table = [-1] + ([0]*len(prefix))
|
||||||
for i in range(len(prefix)):
|
for i in xrange(len(prefix)):
|
||||||
table[i+1] = table[i]+1
|
table[i+1] = table[i]+1
|
||||||
while table[i+1] > 0 and prefix[i] != prefix[table[i+1]-1]:
|
while table[i+1] > 0 and prefix[i] != prefix[table[i+1]-1]:
|
||||||
table[i+1] = table[table[i+1]-1]+1
|
table[i+1] = table[table[i+1]-1]+1
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue