mirror of
https://github.com/python/cpython.git
synced 2025-07-07 19:35:27 +00:00
bpo-30455: Generate all token related code and docs from Grammar/Tokens. (GH-10370)
"Include/token.h", "Lib/token.py" (containing now some data moved from "Lib/tokenize.py") and new files "Parser/token.c" (containing the code moved from "Parser/tokenizer.c") and "Doc/library/token-list.inc" (included in "Doc/library/token.rst") are now generated from "Grammar/Tokens" by "Tools/scripts/generate_token.py". The script overwrites files only if needed and can be used on the read-only sources tree. "Lib/symbol.py" is now generated by "Tools/scripts/generate_symbol_py.py" instead of been executable itself. Added new make targets "regen-token" and "regen-symbol" which are now dependencies of "regen-all". The documentation contains now strings for operators and punctuation tokens.
This commit is contained in:
parent
c1b4b0f616
commit
8ac658114d
18 changed files with 940 additions and 462 deletions
|
@ -32,6 +32,7 @@ import itertools as _itertools
|
|||
import re
|
||||
import sys
|
||||
from token import *
|
||||
from token import EXACT_TOKEN_TYPES
|
||||
|
||||
cookie_re = re.compile(r'^[ \t\f]*#.*?coding[:=][ \t]*([-\w.]+)', re.ASCII)
|
||||
blank_re = re.compile(br'^[ \t\f]*(?:[#\r\n]|$)', re.ASCII)
|
||||
|
@ -41,55 +42,6 @@ __all__ = token.__all__ + ["tokenize", "generate_tokens", "detect_encoding",
|
|||
"untokenize", "TokenInfo"]
|
||||
del token
|
||||
|
||||
EXACT_TOKEN_TYPES = {
|
||||
'(': LPAR,
|
||||
')': RPAR,
|
||||
'[': LSQB,
|
||||
']': RSQB,
|
||||
':': COLON,
|
||||
',': COMMA,
|
||||
';': SEMI,
|
||||
'+': PLUS,
|
||||
'-': MINUS,
|
||||
'*': STAR,
|
||||
'/': SLASH,
|
||||
'|': VBAR,
|
||||
'&': AMPER,
|
||||
'<': LESS,
|
||||
'>': GREATER,
|
||||
'=': EQUAL,
|
||||
'.': DOT,
|
||||
'%': PERCENT,
|
||||
'{': LBRACE,
|
||||
'}': RBRACE,
|
||||
'==': EQEQUAL,
|
||||
'!=': NOTEQUAL,
|
||||
'<=': LESSEQUAL,
|
||||
'>=': GREATEREQUAL,
|
||||
'~': TILDE,
|
||||
'^': CIRCUMFLEX,
|
||||
'<<': LEFTSHIFT,
|
||||
'>>': RIGHTSHIFT,
|
||||
'**': DOUBLESTAR,
|
||||
'+=': PLUSEQUAL,
|
||||
'-=': MINEQUAL,
|
||||
'*=': STAREQUAL,
|
||||
'/=': SLASHEQUAL,
|
||||
'%=': PERCENTEQUAL,
|
||||
'&=': AMPEREQUAL,
|
||||
'|=': VBAREQUAL,
|
||||
'^=': CIRCUMFLEXEQUAL,
|
||||
'<<=': LEFTSHIFTEQUAL,
|
||||
'>>=': RIGHTSHIFTEQUAL,
|
||||
'**=': DOUBLESTAREQUAL,
|
||||
'//': DOUBLESLASH,
|
||||
'//=': DOUBLESLASHEQUAL,
|
||||
'...': ELLIPSIS,
|
||||
'->': RARROW,
|
||||
'@': AT,
|
||||
'@=': ATEQUAL,
|
||||
}
|
||||
|
||||
class TokenInfo(collections.namedtuple('TokenInfo', 'type string start end line')):
|
||||
def __repr__(self):
|
||||
annotated_type = '%d (%s)' % (self.type, tok_name[self.type])
|
||||
|
@ -163,17 +115,11 @@ Triple = group(StringPrefix + "'''", StringPrefix + '"""')
|
|||
String = group(StringPrefix + r"'[^\n'\\]*(?:\\.[^\n'\\]*)*'",
|
||||
StringPrefix + r'"[^\n"\\]*(?:\\.[^\n"\\]*)*"')
|
||||
|
||||
# Because of leftmost-then-longest match semantics, be sure to put the
|
||||
# longest operators first (e.g., if = came before ==, == would get
|
||||
# recognized as two instances of =).
|
||||
Operator = group(r"\*\*=?", r">>=?", r"<<=?", r"!=",
|
||||
r"//=?", r"->",
|
||||
r"[+\-*/%&@|^=<>]=?",
|
||||
r"~")
|
||||
|
||||
Bracket = '[][(){}]'
|
||||
Special = group(r'\r?\n', r'\.\.\.', r'[:;.,@]')
|
||||
Funny = group(Operator, Bracket, Special)
|
||||
# Sorting in reverse order puts the long operators before their prefixes.
|
||||
# Otherwise if = came before ==, == would get recognized as two instances
|
||||
# of =.
|
||||
Special = group(*map(re.escape, sorted(EXACT_TOKEN_TYPES, reverse=True)))
|
||||
Funny = group(r'\r?\n', Special)
|
||||
|
||||
PlainToken = group(Number, Funny, String, Name)
|
||||
Token = Ignore + PlainToken
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue