Issue #13150: The tokenize module doesn't compile large regular expressions at startup anymore.

Instead, the re module's standard caching does its work.
2025-10-08 16:11:51 +00:00 · 2011-10-11 15:45:56 +02:00 · 2011-10-11 15:45:56 +02:00 · 10a99b024d
commit 10a99b024d
parent 699cd9f7f1
2 changed files with 19 additions and 19 deletions
--- a/Lib/tokenize.py
+++ b/Lib/tokenize.py
@ -114,19 +114,17 @@ PseudoToken = Whitespace + group(PseudoExtras, Number, Funny, ContStr, Name)
 def _compile(expr):
    return re.compile(expr, re.UNICODE)
-tokenprog, pseudoprog, single3prog, double3prog = map(
+endpats = {"'": Single, '"': Double,
-    _compile, (Token, PseudoToken, Single3, Double3))
+           "'''": Single3, '"""': Double3,
-endprogs = {"'": _compile(Single), '"': _compile(Double),
+           "r'''": Single3, 'r"""': Double3,
-            "'''": single3prog, '"""': double3prog,
+           "b'''": Single3, 'b"""': Double3,
-            "r'''": single3prog, 'r"""': double3prog,
+           "br'''": Single3, 'br"""': Double3,
-            "b'''": single3prog, 'b"""': double3prog,
+           "R'''": Single3, 'R"""': Double3,
-            "br'''": single3prog, 'br"""': double3prog,
+           "B'''": Single3, 'B"""': Double3,
-            "R'''": single3prog, 'R"""': double3prog,
+           "bR'''": Single3, 'bR"""': Double3,
-            "B'''": single3prog, 'B"""': double3prog,
+           "Br'''": Single3, 'Br"""': Double3,
-            "bR'''": single3prog, 'bR"""': double3prog,
+           "BR'''": Single3, 'BR"""': Double3,
-            "Br'''": single3prog, 'Br"""': double3prog,
+           'r': None, 'R': None, 'b': None, 'B': None}
            "BR'''": single3prog, 'BR"""': double3prog,
            'r': None, 'R': None, 'b': None, 'B': None}
 triple_quoted = {}
 for t in ("'''", '"""',
@ -143,8 +141,6 @@ for t in ("'", '"',
          "bR'", 'bR"', "BR'", 'BR"' ):
    single_quoted[t] = t
 del _compile
 tabsize = 8
 class TokenError(Exception): pass
@ -466,7 +462,7 @@ def _tokenize(readline, encoding):
            continued = 0
        while pos < max:
-            pseudomatch = pseudoprog.match(line, pos)
+            pseudomatch = _compile(PseudoToken).match(line, pos)
            if pseudomatch:                                # scan for tokens
                start, end = pseudomatch.span(1)
                spos, epos, pos = (lnum, start), (lnum, end), end
@ -482,7 +478,7 @@ def _tokenize(readline, encoding):
                    assert not token.endswith("\n")
                    yield TokenInfo(COMMENT, token, spos, epos, line)
                elif token in triple_quoted:
-                    endprog = endprogs[token]
+                    endprog = _compile(endpats[token])
                    endmatch = endprog.match(line, pos)
                    if endmatch:                           # all on one line
                        pos = endmatch.end(0)
@ -498,8 +494,9 @@ def _tokenize(readline, encoding):
                    token[:3] in single_quoted:
                    if token[-1] == '\n':                  # continued string
                        strstart = (lnum, start)
-                        endprog = (endprogs[initial] or endprogs[token[1]] or
+                        endprog = _compile(endpats[initial] or
-                                   endprogs[token[2]])
+                                           endpats[token[1]] or
                                           endpats[token[2]])
                        contstr, needcont = line[start:], 1
                        contline = line
                        break
--- a/Misc/NEWS
+++ b/Misc/NEWS
@ -303,6 +303,9 @@ Core and Builtins
 Library
 -------
 - Issue #13150: The tokenize module doesn't compile large regular expressions
  at startup anymore.
 - Issue #11171: Fix distutils.sysconfig.get_makefile_filename when Python was
  configured with different prefix and exec-prefix.