mirror of
https://github.com/python/cpython.git
synced 2025-07-15 23:35:23 +00:00
bpo-30455: Generate all token related code and docs from Grammar/Tokens. (GH-10370)
"Include/token.h", "Lib/token.py" (containing now some data moved from "Lib/tokenize.py") and new files "Parser/token.c" (containing the code moved from "Parser/tokenizer.c") and "Doc/library/token-list.inc" (included in "Doc/library/token.rst") are now generated from "Grammar/Tokens" by "Tools/scripts/generate_token.py". The script overwrites files only if needed and can be used on the read-only sources tree. "Lib/symbol.py" is now generated by "Tools/scripts/generate_symbol_py.py" instead of been executable itself. Added new make targets "regen-token" and "regen-symbol" which are now dependencies of "regen-all". The documentation contains now strings for operators and punctuation tokens.
This commit is contained in:
parent
c1b4b0f616
commit
8ac658114d
18 changed files with 940 additions and 462 deletions
268
Tools/scripts/generate_token.py
Normal file
268
Tools/scripts/generate_token.py
Normal file
|
@ -0,0 +1,268 @@
|
|||
#! /usr/bin/env python3
|
||||
# This script generates token related files from Grammar/Tokens:
|
||||
#
|
||||
# Doc/library/token-list.inc
|
||||
# Include/token.h
|
||||
# Parser/token.c
|
||||
# Lib/token.py
|
||||
|
||||
|
||||
NT_OFFSET = 256
|
||||
|
||||
def load_tokens(path):
|
||||
tok_names = []
|
||||
string_to_tok = {}
|
||||
ERRORTOKEN = None
|
||||
with open(path) as fp:
|
||||
for line in fp:
|
||||
line = line.strip()
|
||||
# strip comments
|
||||
i = line.find('#')
|
||||
if i >= 0:
|
||||
line = line[:i].strip()
|
||||
if not line:
|
||||
continue
|
||||
fields = line.split()
|
||||
name = fields[0]
|
||||
value = len(tok_names)
|
||||
if name == 'ERRORTOKEN':
|
||||
ERRORTOKEN = value
|
||||
string = fields[1] if len(fields) > 1 else None
|
||||
if string:
|
||||
string = eval(string)
|
||||
string_to_tok[string] = value
|
||||
tok_names.append(name)
|
||||
return tok_names, ERRORTOKEN, string_to_tok
|
||||
|
||||
|
||||
def update_file(file, content):
|
||||
try:
|
||||
with open(file, 'r') as fobj:
|
||||
if fobj.read() == content:
|
||||
return False
|
||||
except (OSError, ValueError):
|
||||
pass
|
||||
with open(file, 'w') as fobj:
|
||||
fobj.write(content)
|
||||
return True
|
||||
|
||||
|
||||
token_h_template = """\
|
||||
/* Auto-generated by Tools/scripts/generate_token.py */
|
||||
|
||||
/* Token types */
|
||||
#ifndef Py_LIMITED_API
|
||||
#ifndef Py_TOKEN_H
|
||||
#define Py_TOKEN_H
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#undef TILDE /* Prevent clash of our definition with system macro. Ex AIX, ioctl.h */
|
||||
|
||||
%s\
|
||||
#define N_TOKENS %d
|
||||
#define NT_OFFSET %d
|
||||
|
||||
/* Special definitions for cooperation with parser */
|
||||
|
||||
#define ISTERMINAL(x) ((x) < NT_OFFSET)
|
||||
#define ISNONTERMINAL(x) ((x) >= NT_OFFSET)
|
||||
#define ISEOF(x) ((x) == ENDMARKER)
|
||||
|
||||
|
||||
PyAPI_DATA(const char * const) _PyParser_TokenNames[]; /* Token names */
|
||||
PyAPI_FUNC(int) PyToken_OneChar(int);
|
||||
PyAPI_FUNC(int) PyToken_TwoChars(int, int);
|
||||
PyAPI_FUNC(int) PyToken_ThreeChars(int, int, int);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
#endif /* !Py_TOKEN_H */
|
||||
#endif /* Py_LIMITED_API */
|
||||
"""
|
||||
|
||||
def make_h(infile, outfile='Include/token.h'):
|
||||
tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile)
|
||||
|
||||
defines = []
|
||||
for value, name in enumerate(tok_names[:ERRORTOKEN + 1]):
|
||||
defines.append("#define %-15s %d\n" % (name, value))
|
||||
|
||||
if update_file(outfile, token_h_template % (
|
||||
''.join(defines),
|
||||
len(tok_names),
|
||||
NT_OFFSET
|
||||
)):
|
||||
print("%s regenerated from %s" % (outfile, infile))
|
||||
|
||||
|
||||
token_c_template = """\
|
||||
/* Auto-generated by Tools/scripts/generate_token.py */
|
||||
|
||||
#include "Python.h"
|
||||
#include "token.h"
|
||||
|
||||
/* Token names */
|
||||
|
||||
const char * const _PyParser_TokenNames[] = {
|
||||
%s\
|
||||
};
|
||||
|
||||
/* Return the token corresponding to a single character */
|
||||
|
||||
int
|
||||
PyToken_OneChar(int c1)
|
||||
{
|
||||
%s\
|
||||
return OP;
|
||||
}
|
||||
|
||||
int
|
||||
PyToken_TwoChars(int c1, int c2)
|
||||
{
|
||||
%s\
|
||||
return OP;
|
||||
}
|
||||
|
||||
int
|
||||
PyToken_ThreeChars(int c1, int c2, int c3)
|
||||
{
|
||||
%s\
|
||||
return OP;
|
||||
}
|
||||
"""
|
||||
|
||||
def generate_chars_to_token(mapping, n=1):
|
||||
result = []
|
||||
write = result.append
|
||||
indent = ' ' * n
|
||||
write(indent)
|
||||
write('switch (c%d) {\n' % (n,))
|
||||
for c in sorted(mapping):
|
||||
write(indent)
|
||||
value = mapping[c]
|
||||
if isinstance(value, dict):
|
||||
write("case '%s':\n" % (c,))
|
||||
write(generate_chars_to_token(value, n + 1))
|
||||
write(indent)
|
||||
write(' break;\n')
|
||||
else:
|
||||
write("case '%s': return %s;\n" % (c, value))
|
||||
write(indent)
|
||||
write('}\n')
|
||||
return ''.join(result)
|
||||
|
||||
def make_c(infile, outfile='Parser/token.c'):
|
||||
tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile)
|
||||
string_to_tok['<>'] = string_to_tok['!=']
|
||||
chars_to_token = {}
|
||||
for string, value in string_to_tok.items():
|
||||
assert 1 <= len(string) <= 3
|
||||
name = tok_names[value]
|
||||
m = chars_to_token.setdefault(len(string), {})
|
||||
for c in string[:-1]:
|
||||
m = m.setdefault(c, {})
|
||||
m[string[-1]] = name
|
||||
|
||||
names = []
|
||||
for value, name in enumerate(tok_names):
|
||||
if value >= ERRORTOKEN:
|
||||
name = '<%s>' % name
|
||||
names.append(' "%s",\n' % name)
|
||||
names.append(' "<N_TOKENS>",\n')
|
||||
|
||||
if update_file(outfile, token_c_template % (
|
||||
''.join(names),
|
||||
generate_chars_to_token(chars_to_token[1]),
|
||||
generate_chars_to_token(chars_to_token[2]),
|
||||
generate_chars_to_token(chars_to_token[3])
|
||||
)):
|
||||
print("%s regenerated from %s" % (outfile, infile))
|
||||
|
||||
|
||||
token_inc_template = """\
|
||||
.. Auto-generated by Tools/scripts/generate_token.py
|
||||
%s
|
||||
.. data:: N_TOKENS
|
||||
|
||||
.. data:: NT_OFFSET
|
||||
"""
|
||||
|
||||
def make_rst(infile, outfile='Doc/library/token-list.inc'):
|
||||
tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile)
|
||||
tok_to_string = {value: s for s, value in string_to_tok.items()}
|
||||
|
||||
names = []
|
||||
for value, name in enumerate(tok_names[:ERRORTOKEN + 1]):
|
||||
names.append('.. data:: %s' % (name,))
|
||||
if value in tok_to_string:
|
||||
names.append('')
|
||||
names.append(' Token value for ``"%s"``.' % tok_to_string[value])
|
||||
names.append('')
|
||||
|
||||
if update_file(outfile, token_inc_template % '\n'.join(names)):
|
||||
print("%s regenerated from %s" % (outfile, infile))
|
||||
|
||||
|
||||
token_py_template = '''\
|
||||
"""Token constants."""
|
||||
# Auto-generated by Tools/scripts/generate_token.py
|
||||
|
||||
__all__ = ['tok_name', 'ISTERMINAL', 'ISNONTERMINAL', 'ISEOF']
|
||||
|
||||
%s
|
||||
N_TOKENS = %d
|
||||
# Special definitions for cooperation with parser
|
||||
NT_OFFSET = %d
|
||||
|
||||
tok_name = {value: name
|
||||
for name, value in globals().items()
|
||||
if isinstance(value, int) and not name.startswith('_')}
|
||||
__all__.extend(tok_name.values())
|
||||
|
||||
EXACT_TOKEN_TYPES = {
|
||||
%s
|
||||
}
|
||||
|
||||
def ISTERMINAL(x):
|
||||
return x < NT_OFFSET
|
||||
|
||||
def ISNONTERMINAL(x):
|
||||
return x >= NT_OFFSET
|
||||
|
||||
def ISEOF(x):
|
||||
return x == ENDMARKER
|
||||
'''
|
||||
|
||||
def make_py(infile, outfile='Lib/token.py'):
|
||||
tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile)
|
||||
|
||||
constants = []
|
||||
for value, name in enumerate(tok_names):
|
||||
constants.append('%s = %d' % (name, value))
|
||||
constants.insert(ERRORTOKEN,
|
||||
"# These aren't used by the C tokenizer but are needed for tokenize.py")
|
||||
|
||||
token_types = []
|
||||
for s, value in sorted(string_to_tok.items()):
|
||||
token_types.append(' %r: %s,' % (s, tok_names[value]))
|
||||
|
||||
if update_file(outfile, token_py_template % (
|
||||
'\n'.join(constants),
|
||||
len(tok_names),
|
||||
NT_OFFSET,
|
||||
'\n'.join(token_types),
|
||||
)):
|
||||
print("%s regenerated from %s" % (outfile, infile))
|
||||
|
||||
|
||||
def main(op, infile='Grammar/Tokens', *args):
|
||||
make = globals()['make_' + op]
|
||||
make(infile, *args)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
import sys
|
||||
main(*sys.argv[1:])
|
Loading…
Add table
Add a link
Reference in a new issue