bpo-30455: Generate all token related code and docs from Grammar/Tokens. (GH-10370)

"Include/token.h", "Lib/token.py" (containing now some data moved from
"Lib/tokenize.py") and new files "Parser/token.c" (containing the code
moved from "Parser/tokenizer.c") and "Doc/library/token-list.inc" (included
in "Doc/library/token.rst") are now generated from "Grammar/Tokens" by
"Tools/scripts/generate_token.py". The script overwrites files only if
needed and can be used on the read-only sources tree.

"Lib/symbol.py" is now generated by "Tools/scripts/generate_symbol_py.py"
instead of been executable itself.

Added new make targets "regen-token" and "regen-symbol" which are now
dependencies of "regen-all".

The documentation contains now strings for operators and punctuation tokens.
This commit is contained in:
Serhiy Storchaka 2018-12-22 11:18:40 +02:00 committed by GitHub
parent c1b4b0f616
commit 8ac658114d
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
18 changed files with 940 additions and 462 deletions

134
Lib/token.py generated
View file

@ -1,15 +1,8 @@
"""Token constants (from "token.h")."""
"""Token constants."""
# Auto-generated by Tools/scripts/generate_token.py
__all__ = ['tok_name', 'ISTERMINAL', 'ISNONTERMINAL', 'ISEOF']
# This file is automatically generated; please don't muck it up!
#
# To update the symbols in this file, 'cd' to the top directory of
# the python source tree after building the interpreter and run:
#
# ./python Lib/token.py
#--start constants--
ENDMARKER = 0
NAME = 1
NUMBER = 2
@ -63,23 +56,70 @@ AT = 49
ATEQUAL = 50
RARROW = 51
ELLIPSIS = 52
# Don't forget to update the table _PyParser_TokenNames in tokenizer.c!
OP = 53
ERRORTOKEN = 54
# These aren't used by the C tokenizer but are needed for tokenize.py
ERRORTOKEN = 54
COMMENT = 55
NL = 56
ENCODING = 57
N_TOKENS = 58
# Special definitions for cooperation with parser
NT_OFFSET = 256
#--end constants--
tok_name = {value: name
for name, value in globals().items()
if isinstance(value, int) and not name.startswith('_')}
__all__.extend(tok_name.values())
EXACT_TOKEN_TYPES = {
'!=': NOTEQUAL,
'%': PERCENT,
'%=': PERCENTEQUAL,
'&': AMPER,
'&=': AMPEREQUAL,
'(': LPAR,
')': RPAR,
'*': STAR,
'**': DOUBLESTAR,
'**=': DOUBLESTAREQUAL,
'*=': STAREQUAL,
'+': PLUS,
'+=': PLUSEQUAL,
',': COMMA,
'-': MINUS,
'-=': MINEQUAL,
'->': RARROW,
'.': DOT,
'...': ELLIPSIS,
'/': SLASH,
'//': DOUBLESLASH,
'//=': DOUBLESLASHEQUAL,
'/=': SLASHEQUAL,
':': COLON,
';': SEMI,
'<': LESS,
'<<': LEFTSHIFT,
'<<=': LEFTSHIFTEQUAL,
'<=': LESSEQUAL,
'=': EQUAL,
'==': EQEQUAL,
'>': GREATER,
'>=': GREATEREQUAL,
'>>': RIGHTSHIFT,
'>>=': RIGHTSHIFTEQUAL,
'@': AT,
'@=': ATEQUAL,
'[': LSQB,
']': RSQB,
'^': CIRCUMFLEX,
'^=': CIRCUMFLEXEQUAL,
'{': LBRACE,
'|': VBAR,
'|=': VBAREQUAL,
'}': RBRACE,
'~': TILDE,
}
def ISTERMINAL(x):
return x < NT_OFFSET
@ -88,73 +128,3 @@ def ISNONTERMINAL(x):
def ISEOF(x):
return x == ENDMARKER
def _main():
import re
import sys
args = sys.argv[1:]
inFileName = args and args[0] or "Include/token.h"
outFileName = "Lib/token.py"
if len(args) > 1:
outFileName = args[1]
try:
fp = open(inFileName)
except OSError as err:
sys.stdout.write("I/O error: %s\n" % str(err))
sys.exit(1)
with fp:
lines = fp.read().split("\n")
prog = re.compile(
r"#define[ \t][ \t]*([A-Z0-9][A-Z0-9_]*)[ \t][ \t]*([0-9][0-9]*)",
re.IGNORECASE)
comment_regex = re.compile(
r"^\s*/\*\s*(.+?)\s*\*/\s*$",
re.IGNORECASE)
tokens = {}
prev_val = None
for line in lines:
match = prog.match(line)
if match:
name, val = match.group(1, 2)
val = int(val)
tokens[val] = {'token': name} # reverse so we can sort them...
prev_val = val
else:
comment_match = comment_regex.match(line)
if comment_match and prev_val is not None:
comment = comment_match.group(1)
tokens[prev_val]['comment'] = comment
keys = sorted(tokens.keys())
# load the output skeleton from the target:
try:
fp = open(outFileName)
except OSError as err:
sys.stderr.write("I/O error: %s\n" % str(err))
sys.exit(2)
with fp:
format = fp.read().split("\n")
try:
start = format.index("#--start constants--") + 1
end = format.index("#--end constants--")
except ValueError:
sys.stderr.write("target does not contain format markers")
sys.exit(3)
lines = []
for key in keys:
lines.append("%s = %d" % (tokens[key]["token"], key))
if "comment" in tokens[key]:
lines.append("# %s" % tokens[key]["comment"])
format[start:end] = lines
try:
fp = open(outFileName, 'w')
except OSError as err:
sys.stderr.write("I/O error: %s\n" % str(err))
sys.exit(4)
with fp:
fp.write("\n".join(format))
if __name__ == "__main__":
_main()