cpython/Tools/scripts/generate_token.py
Greg Price fa3a38d81f Mark files as executable that are meant as scripts. (GH-15354)
This is the converse of GH-15353 -- in addition to plenty of
scripts in the tree that are marked with the executable bit
(and so can be directly executed), there are a few that have
a leading `#!` which could let them be executed, but it doesn't
do anything because they don't have the executable bit set.

Here's a command which finds such files and marks them.  The
first line finds files in the tree with a `#!` line *anywhere*;
the next-to-last step checks that the *first* line is actually of
that form.  In between we filter out files that already have the
bit set, and some files that are meant as fragments to be
consumed by one or another kind of preprocessor.

    $ git grep -l '^#!' \
      | grep -vxFf <( \
          git ls-files --stage \
          | perl -lane 'print $F[3] if (!/^100644/)' \
        ) \
      | grep -ve '\.in$' -e '^Doc/includes/' \
      | while read f; do
          head -c2 "$f" | grep -qxF '#!' \
          && chmod a+x "$f"; \
        done
2019-09-09 07:16:33 -07:00

268 lines
6.6 KiB
Python
Executable file

#! /usr/bin/env python3
# This script generates token related files from Grammar/Tokens:
#
# Doc/library/token-list.inc
# Include/token.h
# Parser/token.c
# Lib/token.py
NT_OFFSET = 256
def load_tokens(path):
tok_names = []
string_to_tok = {}
ERRORTOKEN = None
with open(path) as fp:
for line in fp:
line = line.strip()
# strip comments
i = line.find('#')
if i >= 0:
line = line[:i].strip()
if not line:
continue
fields = line.split()
name = fields[0]
value = len(tok_names)
if name == 'ERRORTOKEN':
ERRORTOKEN = value
string = fields[1] if len(fields) > 1 else None
if string:
string = eval(string)
string_to_tok[string] = value
tok_names.append(name)
return tok_names, ERRORTOKEN, string_to_tok
def update_file(file, content):
try:
with open(file, 'r') as fobj:
if fobj.read() == content:
return False
except (OSError, ValueError):
pass
with open(file, 'w') as fobj:
fobj.write(content)
return True
token_h_template = """\
/* Auto-generated by Tools/scripts/generate_token.py */
/* Token types */
#ifndef Py_LIMITED_API
#ifndef Py_TOKEN_H
#define Py_TOKEN_H
#ifdef __cplusplus
extern "C" {
#endif
#undef TILDE /* Prevent clash of our definition with system macro. Ex AIX, ioctl.h */
%s\
#define N_TOKENS %d
#define NT_OFFSET %d
/* Special definitions for cooperation with parser */
#define ISTERMINAL(x) ((x) < NT_OFFSET)
#define ISNONTERMINAL(x) ((x) >= NT_OFFSET)
#define ISEOF(x) ((x) == ENDMARKER)
PyAPI_DATA(const char * const) _PyParser_TokenNames[]; /* Token names */
PyAPI_FUNC(int) PyToken_OneChar(int);
PyAPI_FUNC(int) PyToken_TwoChars(int, int);
PyAPI_FUNC(int) PyToken_ThreeChars(int, int, int);
#ifdef __cplusplus
}
#endif
#endif /* !Py_TOKEN_H */
#endif /* Py_LIMITED_API */
"""
def make_h(infile, outfile='Include/token.h'):
tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile)
defines = []
for value, name in enumerate(tok_names[:ERRORTOKEN + 1]):
defines.append("#define %-15s %d\n" % (name, value))
if update_file(outfile, token_h_template % (
''.join(defines),
len(tok_names),
NT_OFFSET
)):
print("%s regenerated from %s" % (outfile, infile))
token_c_template = """\
/* Auto-generated by Tools/scripts/generate_token.py */
#include "Python.h"
#include "token.h"
/* Token names */
const char * const _PyParser_TokenNames[] = {
%s\
};
/* Return the token corresponding to a single character */
int
PyToken_OneChar(int c1)
{
%s\
return OP;
}
int
PyToken_TwoChars(int c1, int c2)
{
%s\
return OP;
}
int
PyToken_ThreeChars(int c1, int c2, int c3)
{
%s\
return OP;
}
"""
def generate_chars_to_token(mapping, n=1):
result = []
write = result.append
indent = ' ' * n
write(indent)
write('switch (c%d) {\n' % (n,))
for c in sorted(mapping):
write(indent)
value = mapping[c]
if isinstance(value, dict):
write("case '%s':\n" % (c,))
write(generate_chars_to_token(value, n + 1))
write(indent)
write(' break;\n')
else:
write("case '%s': return %s;\n" % (c, value))
write(indent)
write('}\n')
return ''.join(result)
def make_c(infile, outfile='Parser/token.c'):
tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile)
string_to_tok['<>'] = string_to_tok['!=']
chars_to_token = {}
for string, value in string_to_tok.items():
assert 1 <= len(string) <= 3
name = tok_names[value]
m = chars_to_token.setdefault(len(string), {})
for c in string[:-1]:
m = m.setdefault(c, {})
m[string[-1]] = name
names = []
for value, name in enumerate(tok_names):
if value >= ERRORTOKEN:
name = '<%s>' % name
names.append(' "%s",\n' % name)
names.append(' "<N_TOKENS>",\n')
if update_file(outfile, token_c_template % (
''.join(names),
generate_chars_to_token(chars_to_token[1]),
generate_chars_to_token(chars_to_token[2]),
generate_chars_to_token(chars_to_token[3])
)):
print("%s regenerated from %s" % (outfile, infile))
token_inc_template = """\
.. Auto-generated by Tools/scripts/generate_token.py
%s
.. data:: N_TOKENS
.. data:: NT_OFFSET
"""
def make_rst(infile, outfile='Doc/library/token-list.inc'):
tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile)
tok_to_string = {value: s for s, value in string_to_tok.items()}
names = []
for value, name in enumerate(tok_names[:ERRORTOKEN + 1]):
names.append('.. data:: %s' % (name,))
if value in tok_to_string:
names.append('')
names.append(' Token value for ``"%s"``.' % tok_to_string[value])
names.append('')
if update_file(outfile, token_inc_template % '\n'.join(names)):
print("%s regenerated from %s" % (outfile, infile))
token_py_template = '''\
"""Token constants."""
# Auto-generated by Tools/scripts/generate_token.py
__all__ = ['tok_name', 'ISTERMINAL', 'ISNONTERMINAL', 'ISEOF']
%s
N_TOKENS = %d
# Special definitions for cooperation with parser
NT_OFFSET = %d
tok_name = {value: name
for name, value in globals().items()
if isinstance(value, int) and not name.startswith('_')}
__all__.extend(tok_name.values())
EXACT_TOKEN_TYPES = {
%s
}
def ISTERMINAL(x):
return x < NT_OFFSET
def ISNONTERMINAL(x):
return x >= NT_OFFSET
def ISEOF(x):
return x == ENDMARKER
'''
def make_py(infile, outfile='Lib/token.py'):
tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile)
constants = []
for value, name in enumerate(tok_names):
constants.append('%s = %d' % (name, value))
constants.insert(ERRORTOKEN,
"# These aren't used by the C tokenizer but are needed for tokenize.py")
token_types = []
for s, value in sorted(string_to_tok.items()):
token_types.append(' %r: %s,' % (s, tok_names[value]))
if update_file(outfile, token_py_template % (
'\n'.join(constants),
len(tok_names),
NT_OFFSET,
'\n'.join(token_types),
)):
print("%s regenerated from %s" % (outfile, infile))
def main(op, infile='Grammar/Tokens', *args):
make = globals()['make_' + op]
make(infile, *args)
if __name__ == '__main__':
import sys
main(*sys.argv[1:])