Issue #13150: The tokenize module doesn't compile large regular expressions at startup anymore.

Instead, the re module's standard caching does its work.
This commit is contained in:
Antoine Pitrou 2011-10-11 15:45:56 +02:00
parent 699cd9f7f1
commit 10a99b024d
2 changed files with 19 additions and 19 deletions

View file

@ -114,19 +114,17 @@ PseudoToken = Whitespace + group(PseudoExtras, Number, Funny, ContStr, Name)
def _compile(expr): def _compile(expr):
return re.compile(expr, re.UNICODE) return re.compile(expr, re.UNICODE)
tokenprog, pseudoprog, single3prog, double3prog = map( endpats = {"'": Single, '"': Double,
_compile, (Token, PseudoToken, Single3, Double3)) "'''": Single3, '"""': Double3,
endprogs = {"'": _compile(Single), '"': _compile(Double), "r'''": Single3, 'r"""': Double3,
"'''": single3prog, '"""': double3prog, "b'''": Single3, 'b"""': Double3,
"r'''": single3prog, 'r"""': double3prog, "br'''": Single3, 'br"""': Double3,
"b'''": single3prog, 'b"""': double3prog, "R'''": Single3, 'R"""': Double3,
"br'''": single3prog, 'br"""': double3prog, "B'''": Single3, 'B"""': Double3,
"R'''": single3prog, 'R"""': double3prog, "bR'''": Single3, 'bR"""': Double3,
"B'''": single3prog, 'B"""': double3prog, "Br'''": Single3, 'Br"""': Double3,
"bR'''": single3prog, 'bR"""': double3prog, "BR'''": Single3, 'BR"""': Double3,
"Br'''": single3prog, 'Br"""': double3prog, 'r': None, 'R': None, 'b': None, 'B': None}
"BR'''": single3prog, 'BR"""': double3prog,
'r': None, 'R': None, 'b': None, 'B': None}
triple_quoted = {} triple_quoted = {}
for t in ("'''", '"""', for t in ("'''", '"""',
@ -143,8 +141,6 @@ for t in ("'", '"',
"bR'", 'bR"', "BR'", 'BR"' ): "bR'", 'bR"', "BR'", 'BR"' ):
single_quoted[t] = t single_quoted[t] = t
del _compile
tabsize = 8 tabsize = 8
class TokenError(Exception): pass class TokenError(Exception): pass
@ -466,7 +462,7 @@ def _tokenize(readline, encoding):
continued = 0 continued = 0
while pos < max: while pos < max:
pseudomatch = pseudoprog.match(line, pos) pseudomatch = _compile(PseudoToken).match(line, pos)
if pseudomatch: # scan for tokens if pseudomatch: # scan for tokens
start, end = pseudomatch.span(1) start, end = pseudomatch.span(1)
spos, epos, pos = (lnum, start), (lnum, end), end spos, epos, pos = (lnum, start), (lnum, end), end
@ -482,7 +478,7 @@ def _tokenize(readline, encoding):
assert not token.endswith("\n") assert not token.endswith("\n")
yield TokenInfo(COMMENT, token, spos, epos, line) yield TokenInfo(COMMENT, token, spos, epos, line)
elif token in triple_quoted: elif token in triple_quoted:
endprog = endprogs[token] endprog = _compile(endpats[token])
endmatch = endprog.match(line, pos) endmatch = endprog.match(line, pos)
if endmatch: # all on one line if endmatch: # all on one line
pos = endmatch.end(0) pos = endmatch.end(0)
@ -498,8 +494,9 @@ def _tokenize(readline, encoding):
token[:3] in single_quoted: token[:3] in single_quoted:
if token[-1] == '\n': # continued string if token[-1] == '\n': # continued string
strstart = (lnum, start) strstart = (lnum, start)
endprog = (endprogs[initial] or endprogs[token[1]] or endprog = _compile(endpats[initial] or
endprogs[token[2]]) endpats[token[1]] or
endpats[token[2]])
contstr, needcont = line[start:], 1 contstr, needcont = line[start:], 1
contline = line contline = line
break break

View file

@ -303,6 +303,9 @@ Core and Builtins
Library Library
------- -------
- Issue #13150: The tokenize module doesn't compile large regular expressions
at startup anymore.
- Issue #11171: Fix distutils.sysconfig.get_makefile_filename when Python was - Issue #11171: Fix distutils.sysconfig.get_makefile_filename when Python was
configured with different prefix and exec-prefix. configured with different prefix and exec-prefix.