mirror of
https://github.com/python/cpython.git
synced 2025-07-07 19:35:27 +00:00
bpo-30455: Generate all token related code and docs from Grammar/Tokens. (GH-10370)
"Include/token.h", "Lib/token.py" (containing now some data moved from "Lib/tokenize.py") and new files "Parser/token.c" (containing the code moved from "Parser/tokenizer.c") and "Doc/library/token-list.inc" (included in "Doc/library/token.rst") are now generated from "Grammar/Tokens" by "Tools/scripts/generate_token.py". The script overwrites files only if needed and can be used on the read-only sources tree. "Lib/symbol.py" is now generated by "Tools/scripts/generate_symbol_py.py" instead of been executable itself. Added new make targets "regen-token" and "regen-symbol" which are now dependencies of "regen-all". The documentation contains now strings for operators and punctuation tokens.
This commit is contained in:
parent
c1b4b0f616
commit
8ac658114d
18 changed files with 940 additions and 462 deletions
134
Lib/token.py
generated
134
Lib/token.py
generated
|
@ -1,15 +1,8 @@
|
|||
"""Token constants (from "token.h")."""
|
||||
"""Token constants."""
|
||||
# Auto-generated by Tools/scripts/generate_token.py
|
||||
|
||||
__all__ = ['tok_name', 'ISTERMINAL', 'ISNONTERMINAL', 'ISEOF']
|
||||
|
||||
# This file is automatically generated; please don't muck it up!
|
||||
#
|
||||
# To update the symbols in this file, 'cd' to the top directory of
|
||||
# the python source tree after building the interpreter and run:
|
||||
#
|
||||
# ./python Lib/token.py
|
||||
|
||||
#--start constants--
|
||||
ENDMARKER = 0
|
||||
NAME = 1
|
||||
NUMBER = 2
|
||||
|
@ -63,23 +56,70 @@ AT = 49
|
|||
ATEQUAL = 50
|
||||
RARROW = 51
|
||||
ELLIPSIS = 52
|
||||
# Don't forget to update the table _PyParser_TokenNames in tokenizer.c!
|
||||
OP = 53
|
||||
ERRORTOKEN = 54
|
||||
# These aren't used by the C tokenizer but are needed for tokenize.py
|
||||
ERRORTOKEN = 54
|
||||
COMMENT = 55
|
||||
NL = 56
|
||||
ENCODING = 57
|
||||
N_TOKENS = 58
|
||||
# Special definitions for cooperation with parser
|
||||
NT_OFFSET = 256
|
||||
#--end constants--
|
||||
|
||||
tok_name = {value: name
|
||||
for name, value in globals().items()
|
||||
if isinstance(value, int) and not name.startswith('_')}
|
||||
__all__.extend(tok_name.values())
|
||||
|
||||
EXACT_TOKEN_TYPES = {
|
||||
'!=': NOTEQUAL,
|
||||
'%': PERCENT,
|
||||
'%=': PERCENTEQUAL,
|
||||
'&': AMPER,
|
||||
'&=': AMPEREQUAL,
|
||||
'(': LPAR,
|
||||
')': RPAR,
|
||||
'*': STAR,
|
||||
'**': DOUBLESTAR,
|
||||
'**=': DOUBLESTAREQUAL,
|
||||
'*=': STAREQUAL,
|
||||
'+': PLUS,
|
||||
'+=': PLUSEQUAL,
|
||||
',': COMMA,
|
||||
'-': MINUS,
|
||||
'-=': MINEQUAL,
|
||||
'->': RARROW,
|
||||
'.': DOT,
|
||||
'...': ELLIPSIS,
|
||||
'/': SLASH,
|
||||
'//': DOUBLESLASH,
|
||||
'//=': DOUBLESLASHEQUAL,
|
||||
'/=': SLASHEQUAL,
|
||||
':': COLON,
|
||||
';': SEMI,
|
||||
'<': LESS,
|
||||
'<<': LEFTSHIFT,
|
||||
'<<=': LEFTSHIFTEQUAL,
|
||||
'<=': LESSEQUAL,
|
||||
'=': EQUAL,
|
||||
'==': EQEQUAL,
|
||||
'>': GREATER,
|
||||
'>=': GREATEREQUAL,
|
||||
'>>': RIGHTSHIFT,
|
||||
'>>=': RIGHTSHIFTEQUAL,
|
||||
'@': AT,
|
||||
'@=': ATEQUAL,
|
||||
'[': LSQB,
|
||||
']': RSQB,
|
||||
'^': CIRCUMFLEX,
|
||||
'^=': CIRCUMFLEXEQUAL,
|
||||
'{': LBRACE,
|
||||
'|': VBAR,
|
||||
'|=': VBAREQUAL,
|
||||
'}': RBRACE,
|
||||
'~': TILDE,
|
||||
}
|
||||
|
||||
def ISTERMINAL(x):
|
||||
return x < NT_OFFSET
|
||||
|
||||
|
@ -88,73 +128,3 @@ def ISNONTERMINAL(x):
|
|||
|
||||
def ISEOF(x):
|
||||
return x == ENDMARKER
|
||||
|
||||
|
||||
def _main():
|
||||
import re
|
||||
import sys
|
||||
args = sys.argv[1:]
|
||||
inFileName = args and args[0] or "Include/token.h"
|
||||
outFileName = "Lib/token.py"
|
||||
if len(args) > 1:
|
||||
outFileName = args[1]
|
||||
try:
|
||||
fp = open(inFileName)
|
||||
except OSError as err:
|
||||
sys.stdout.write("I/O error: %s\n" % str(err))
|
||||
sys.exit(1)
|
||||
with fp:
|
||||
lines = fp.read().split("\n")
|
||||
prog = re.compile(
|
||||
r"#define[ \t][ \t]*([A-Z0-9][A-Z0-9_]*)[ \t][ \t]*([0-9][0-9]*)",
|
||||
re.IGNORECASE)
|
||||
comment_regex = re.compile(
|
||||
r"^\s*/\*\s*(.+?)\s*\*/\s*$",
|
||||
re.IGNORECASE)
|
||||
|
||||
tokens = {}
|
||||
prev_val = None
|
||||
for line in lines:
|
||||
match = prog.match(line)
|
||||
if match:
|
||||
name, val = match.group(1, 2)
|
||||
val = int(val)
|
||||
tokens[val] = {'token': name} # reverse so we can sort them...
|
||||
prev_val = val
|
||||
else:
|
||||
comment_match = comment_regex.match(line)
|
||||
if comment_match and prev_val is not None:
|
||||
comment = comment_match.group(1)
|
||||
tokens[prev_val]['comment'] = comment
|
||||
keys = sorted(tokens.keys())
|
||||
# load the output skeleton from the target:
|
||||
try:
|
||||
fp = open(outFileName)
|
||||
except OSError as err:
|
||||
sys.stderr.write("I/O error: %s\n" % str(err))
|
||||
sys.exit(2)
|
||||
with fp:
|
||||
format = fp.read().split("\n")
|
||||
try:
|
||||
start = format.index("#--start constants--") + 1
|
||||
end = format.index("#--end constants--")
|
||||
except ValueError:
|
||||
sys.stderr.write("target does not contain format markers")
|
||||
sys.exit(3)
|
||||
lines = []
|
||||
for key in keys:
|
||||
lines.append("%s = %d" % (tokens[key]["token"], key))
|
||||
if "comment" in tokens[key]:
|
||||
lines.append("# %s" % tokens[key]["comment"])
|
||||
format[start:end] = lines
|
||||
try:
|
||||
fp = open(outFileName, 'w')
|
||||
except OSError as err:
|
||||
sys.stderr.write("I/O error: %s\n" % str(err))
|
||||
sys.exit(4)
|
||||
with fp:
|
||||
fp.write("\n".join(format))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
_main()
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue