bpo-25324: copy tok_name before changing it (#1608)

* add test to check if were modifying token * copy list so import tokenize doesnt have side effects on token * shorten line * add tokenize tokens to token.h to get them to show up in token * move ERRORTOKEN back to its previous location, and fix nitpick * copy comments from token.h automatically * fix whitespace and make more pythonic * change to fix comments from @haypo * update token.rst and Misc/NEWS * change wording * some more wording changes
2025-07-07 19:35:27 +00:00 · 2017-05-31 15:00:21 +01:00 · 2017-05-31 15:00:21 +01:00 · fc354f0785
commit fc354f0785
parent 85aba238e4
7 changed files with 52 additions and 21 deletions
--- a/Lib/token.py
+++ b/Lib/token.py
@ -63,11 +63,17 @@ AT = 49
 ATEQUAL = 50
 RARROW = 51
 ELLIPSIS = 52
+# Don't forget to update the table _PyParser_TokenNames in tokenizer.c!
 OP = 53
 AWAIT = 54
 ASYNC = 55
 ERRORTOKEN = 56
-N_TOKENS = 57
+# These aren't used by the C tokenizer but are needed for tokenize.py
+COMMENT = 57
+NL = 58
+ENCODING = 59
+N_TOKENS = 60
+# Special definitions for cooperation with parser
 NT_OFFSET = 256
 #--end constants--

@ -102,15 +108,26 @@ def _main():
    with fp:
        lines = fp.read().split("\n")
    prog = re.compile(
-        "#define[ \t][ \t]*([A-Z0-9][A-Z0-9_]*)[ \t][ \t]*([0-9][0-9]*)",
+        r"#define[ \t][ \t]*([A-Z0-9][A-Z0-9_]*)[ \t][ \t]*([0-9][0-9]*)",
        re.IGNORECASE)
+    comment_regex = re.compile(
+        r"^\s*/\*\s*(.+?)\s*\*/\s*$",
+        re.IGNORECASE)
+
    tokens = {}
+    prev_val = None
    for line in lines:
        match = prog.match(line)
        if match:
            name, val = match.group(1, 2)
            val = int(val)
-            tokens[val] = name          # reverse so we can sort them...
+            tokens[val] = {'token': name}          # reverse so we can sort them...
+            prev_val = val
+        else:
+            comment_match = comment_regex.match(line)
+            if comment_match and prev_val is not None:
+                comment = comment_match.group(1)
+                tokens[prev_val]['comment'] = comment
    keys = sorted(tokens.keys())
    # load the output skeleton from the target:
    try:
@ -127,8 +144,10 @@ def _main():
        sys.stderr.write("target does not contain format markers")
        sys.exit(3)
    lines = []
-    for val in keys:
-        lines.append("%s = %d" % (tokens[val], val))
+    for key in keys:
+        lines.append("%s = %d" % (tokens[key]["token"], key))
+        if "comment" in tokens[key]:
+            lines.append("# %s" % tokens[key]["comment"])
    format[start:end] = lines
    try:
        fp = open(outFileName, 'w')