mirror of
https://github.com/python/cpython.git
synced 2025-08-31 05:58:33 +00:00
gh-104504: Run mypy on cases_generator in CI (and blacken the code) (gh-108090)
Co-authored-by: Alex Waygood <Alex.Waygood@Gmail.com>
This commit is contained in:
parent
fd19509220
commit
28cab71f95
11 changed files with 313 additions and 194 deletions
|
@ -4,132 +4,221 @@
|
|||
|
||||
import re
|
||||
from dataclasses import dataclass
|
||||
from collections.abc import Iterator
|
||||
|
||||
def choice(*opts):
|
||||
|
||||
def choice(*opts: str) -> str:
|
||||
return "|".join("(%s)" % opt for opt in opts)
|
||||
|
||||
|
||||
# Regexes
|
||||
|
||||
# Longer operators must go before shorter ones.
|
||||
|
||||
PLUSPLUS = r'\+\+'
|
||||
MINUSMINUS = r'--'
|
||||
PLUSPLUS = r"\+\+"
|
||||
MINUSMINUS = r"--"
|
||||
|
||||
# ->
|
||||
ARROW = r'->'
|
||||
ELLIPSIS = r'\.\.\.'
|
||||
ARROW = r"->"
|
||||
ELLIPSIS = r"\.\.\."
|
||||
|
||||
# Assignment operators
|
||||
TIMESEQUAL = r'\*='
|
||||
DIVEQUAL = r'/='
|
||||
MODEQUAL = r'%='
|
||||
PLUSEQUAL = r'\+='
|
||||
MINUSEQUAL = r'-='
|
||||
LSHIFTEQUAL = r'<<='
|
||||
RSHIFTEQUAL = r'>>='
|
||||
ANDEQUAL = r'&='
|
||||
OREQUAL = r'\|='
|
||||
XOREQUAL = r'\^='
|
||||
TIMESEQUAL = r"\*="
|
||||
DIVEQUAL = r"/="
|
||||
MODEQUAL = r"%="
|
||||
PLUSEQUAL = r"\+="
|
||||
MINUSEQUAL = r"-="
|
||||
LSHIFTEQUAL = r"<<="
|
||||
RSHIFTEQUAL = r">>="
|
||||
ANDEQUAL = r"&="
|
||||
OREQUAL = r"\|="
|
||||
XOREQUAL = r"\^="
|
||||
|
||||
# Operators
|
||||
PLUS = r'\+'
|
||||
MINUS = r'-'
|
||||
TIMES = r'\*'
|
||||
DIVIDE = r'/'
|
||||
MOD = r'%'
|
||||
NOT = r'~'
|
||||
XOR = r'\^'
|
||||
LOR = r'\|\|'
|
||||
LAND = r'&&'
|
||||
LSHIFT = r'<<'
|
||||
RSHIFT = r'>>'
|
||||
LE = r'<='
|
||||
GE = r'>='
|
||||
EQ = r'=='
|
||||
NE = r'!='
|
||||
LT = r'<'
|
||||
GT = r'>'
|
||||
LNOT = r'!'
|
||||
OR = r'\|'
|
||||
AND = r'&'
|
||||
EQUALS = r'='
|
||||
PLUS = r"\+"
|
||||
MINUS = r"-"
|
||||
TIMES = r"\*"
|
||||
DIVIDE = r"/"
|
||||
MOD = r"%"
|
||||
NOT = r"~"
|
||||
XOR = r"\^"
|
||||
LOR = r"\|\|"
|
||||
LAND = r"&&"
|
||||
LSHIFT = r"<<"
|
||||
RSHIFT = r">>"
|
||||
LE = r"<="
|
||||
GE = r">="
|
||||
EQ = r"=="
|
||||
NE = r"!="
|
||||
LT = r"<"
|
||||
GT = r">"
|
||||
LNOT = r"!"
|
||||
OR = r"\|"
|
||||
AND = r"&"
|
||||
EQUALS = r"="
|
||||
|
||||
# ?
|
||||
CONDOP = r'\?'
|
||||
CONDOP = r"\?"
|
||||
|
||||
# Delimiters
|
||||
LPAREN = r'\('
|
||||
RPAREN = r'\)'
|
||||
LBRACKET = r'\['
|
||||
RBRACKET = r'\]'
|
||||
LBRACE = r'\{'
|
||||
RBRACE = r'\}'
|
||||
COMMA = r','
|
||||
PERIOD = r'\.'
|
||||
SEMI = r';'
|
||||
COLON = r':'
|
||||
BACKSLASH = r'\\'
|
||||
LPAREN = r"\("
|
||||
RPAREN = r"\)"
|
||||
LBRACKET = r"\["
|
||||
RBRACKET = r"\]"
|
||||
LBRACE = r"\{"
|
||||
RBRACE = r"\}"
|
||||
COMMA = r","
|
||||
PERIOD = r"\."
|
||||
SEMI = r";"
|
||||
COLON = r":"
|
||||
BACKSLASH = r"\\"
|
||||
|
||||
operators = { op: pattern for op, pattern in globals().items() if op == op.upper() }
|
||||
operators = {op: pattern for op, pattern in globals().items() if op == op.upper()}
|
||||
for op in operators:
|
||||
globals()[op] = op
|
||||
opmap = { pattern.replace("\\", "") or '\\' : op for op, pattern in operators.items() }
|
||||
opmap = {pattern.replace("\\", "") or "\\": op for op, pattern in operators.items()}
|
||||
|
||||
# Macros
|
||||
macro = r'# *(ifdef|ifndef|undef|define|error|endif|if|else|include|#)'
|
||||
MACRO = 'MACRO'
|
||||
macro = r"# *(ifdef|ifndef|undef|define|error|endif|if|else|include|#)"
|
||||
MACRO = "MACRO"
|
||||
|
||||
id_re = r'[a-zA-Z_][0-9a-zA-Z_]*'
|
||||
IDENTIFIER = 'IDENTIFIER'
|
||||
id_re = r"[a-zA-Z_][0-9a-zA-Z_]*"
|
||||
IDENTIFIER = "IDENTIFIER"
|
||||
|
||||
suffix = r'([uU]?[lL]?[lL]?)'
|
||||
octal = r'0[0-7]+' + suffix
|
||||
hex = r'0[xX][0-9a-fA-F]+'
|
||||
decimal_digits = r'(0|[1-9][0-9]*)'
|
||||
suffix = r"([uU]?[lL]?[lL]?)"
|
||||
octal = r"0[0-7]+" + suffix
|
||||
hex = r"0[xX][0-9a-fA-F]+"
|
||||
decimal_digits = r"(0|[1-9][0-9]*)"
|
||||
decimal = decimal_digits + suffix
|
||||
|
||||
|
||||
exponent = r"""([eE][-+]?[0-9]+)"""
|
||||
fraction = r"""([0-9]*\.[0-9]+)|([0-9]+\.)"""
|
||||
float = '(((('+fraction+')'+exponent+'?)|([0-9]+'+exponent+'))[FfLl]?)'
|
||||
float = "((((" + fraction + ")" + exponent + "?)|([0-9]+" + exponent + "))[FfLl]?)"
|
||||
|
||||
number_re = choice(octal, hex, float, decimal)
|
||||
NUMBER = 'NUMBER'
|
||||
NUMBER = "NUMBER"
|
||||
|
||||
simple_escape = r"""([a-zA-Z._~!=&\^\-\\?'"])"""
|
||||
decimal_escape = r"""(\d+)"""
|
||||
hex_escape = r"""(x[0-9a-fA-F]+)"""
|
||||
escape_sequence = r"""(\\("""+simple_escape+'|'+decimal_escape+'|'+hex_escape+'))'
|
||||
string_char = r"""([^"\\\n]|"""+escape_sequence+')'
|
||||
str_re = '"'+string_char+'*"'
|
||||
STRING = 'STRING'
|
||||
char = r'\'.\'' # TODO: escape sequence
|
||||
CHARACTER = 'CHARACTER'
|
||||
escape_sequence = (
|
||||
r"""(\\(""" + simple_escape + "|" + decimal_escape + "|" + hex_escape + "))"
|
||||
)
|
||||
string_char = r"""([^"\\\n]|""" + escape_sequence + ")"
|
||||
str_re = '"' + string_char + '*"'
|
||||
STRING = "STRING"
|
||||
char = r"\'.\'" # TODO: escape sequence
|
||||
CHARACTER = "CHARACTER"
|
||||
|
||||
comment_re = r'//.*|/\*([^*]|\*[^/])*\*/'
|
||||
COMMENT = 'COMMENT'
|
||||
comment_re = r"//.*|/\*([^*]|\*[^/])*\*/"
|
||||
COMMENT = "COMMENT"
|
||||
|
||||
newline = r"\n"
|
||||
invalid = r"\S" # A single non-space character that's not caught by any of the other patterns
|
||||
matcher = re.compile(choice(id_re, number_re, str_re, char, newline, macro, comment_re, *operators.values(), invalid))
|
||||
letter = re.compile(r'[a-zA-Z_]')
|
||||
|
||||
kwds = (
|
||||
'AUTO', 'BREAK', 'CASE', 'CHAR', 'CONST',
|
||||
'CONTINUE', 'DEFAULT', 'DO', 'DOUBLE', 'ELSE', 'ENUM', 'EXTERN',
|
||||
'FLOAT', 'FOR', 'GOTO', 'IF', 'INLINE', 'INT', 'LONG', 'OVERRIDE',
|
||||
'REGISTER', 'OFFSETOF',
|
||||
'RESTRICT', 'RETURN', 'SHORT', 'SIGNED', 'SIZEOF', 'STATIC', 'STRUCT',
|
||||
'SWITCH', 'TYPEDEF', 'UNION', 'UNSIGNED', 'VOID',
|
||||
'VOLATILE', 'WHILE'
|
||||
invalid = (
|
||||
r"\S" # A single non-space character that's not caught by any of the other patterns
|
||||
)
|
||||
for name in kwds:
|
||||
globals()[name] = name
|
||||
keywords = { name.lower() : name for name in kwds }
|
||||
matcher = re.compile(
|
||||
choice(
|
||||
id_re,
|
||||
number_re,
|
||||
str_re,
|
||||
char,
|
||||
newline,
|
||||
macro,
|
||||
comment_re,
|
||||
*operators.values(),
|
||||
invalid,
|
||||
)
|
||||
)
|
||||
letter = re.compile(r"[a-zA-Z_]")
|
||||
|
||||
|
||||
kwds = []
|
||||
AUTO = "AUTO"
|
||||
kwds.append(AUTO)
|
||||
BREAK = "BREAK"
|
||||
kwds.append(BREAK)
|
||||
CASE = "CASE"
|
||||
kwds.append(CASE)
|
||||
CHAR = "CHAR"
|
||||
kwds.append(CHAR)
|
||||
CONST = "CONST"
|
||||
kwds.append(CONST)
|
||||
CONTINUE = "CONTINUE"
|
||||
kwds.append(CONTINUE)
|
||||
DEFAULT = "DEFAULT"
|
||||
kwds.append(DEFAULT)
|
||||
DO = "DO"
|
||||
kwds.append(DO)
|
||||
DOUBLE = "DOUBLE"
|
||||
kwds.append(DOUBLE)
|
||||
ELSE = "ELSE"
|
||||
kwds.append(ELSE)
|
||||
ENUM = "ENUM"
|
||||
kwds.append(ENUM)
|
||||
EXTERN = "EXTERN"
|
||||
kwds.append(EXTERN)
|
||||
FLOAT = "FLOAT"
|
||||
kwds.append(FLOAT)
|
||||
FOR = "FOR"
|
||||
kwds.append(FOR)
|
||||
GOTO = "GOTO"
|
||||
kwds.append(GOTO)
|
||||
IF = "IF"
|
||||
kwds.append(IF)
|
||||
INLINE = "INLINE"
|
||||
kwds.append(INLINE)
|
||||
INT = "INT"
|
||||
kwds.append(INT)
|
||||
LONG = "LONG"
|
||||
kwds.append(LONG)
|
||||
OVERRIDE = "OVERRIDE"
|
||||
kwds.append(OVERRIDE)
|
||||
REGISTER = "REGISTER"
|
||||
kwds.append(REGISTER)
|
||||
OFFSETOF = "OFFSETOF"
|
||||
kwds.append(OFFSETOF)
|
||||
RESTRICT = "RESTRICT"
|
||||
kwds.append(RESTRICT)
|
||||
RETURN = "RETURN"
|
||||
kwds.append(RETURN)
|
||||
SHORT = "SHORT"
|
||||
kwds.append(SHORT)
|
||||
SIGNED = "SIGNED"
|
||||
kwds.append(SIGNED)
|
||||
SIZEOF = "SIZEOF"
|
||||
kwds.append(SIZEOF)
|
||||
STATIC = "STATIC"
|
||||
kwds.append(STATIC)
|
||||
STRUCT = "STRUCT"
|
||||
kwds.append(STRUCT)
|
||||
SWITCH = "SWITCH"
|
||||
kwds.append(SWITCH)
|
||||
TYPEDEF = "TYPEDEF"
|
||||
kwds.append(TYPEDEF)
|
||||
UNION = "UNION"
|
||||
kwds.append(UNION)
|
||||
UNSIGNED = "UNSIGNED"
|
||||
kwds.append(UNSIGNED)
|
||||
VOID = "VOID"
|
||||
kwds.append(VOID)
|
||||
VOLATILE = "VOLATILE"
|
||||
kwds.append(VOLATILE)
|
||||
WHILE = "WHILE"
|
||||
kwds.append(WHILE)
|
||||
keywords = {name.lower(): name for name in kwds}
|
||||
|
||||
__all__ = []
|
||||
__all__.extend(kwds)
|
||||
|
||||
|
||||
def make_syntax_error(
|
||||
message: str, filename: str, line: int, column: int, line_text: str,
|
||||
message: str,
|
||||
filename: str | None,
|
||||
line: int,
|
||||
column: int,
|
||||
line_text: str,
|
||||
) -> SyntaxError:
|
||||
return SyntaxError(message, (filename, line, column, line_text))
|
||||
|
||||
|
@ -142,30 +231,30 @@ class Token:
|
|||
end: tuple[int, int]
|
||||
|
||||
@property
|
||||
def line(self):
|
||||
def line(self) -> int:
|
||||
return self.begin[0]
|
||||
|
||||
@property
|
||||
def column(self):
|
||||
def column(self) -> int:
|
||||
return self.begin[1]
|
||||
|
||||
@property
|
||||
def end_line(self):
|
||||
def end_line(self) -> int:
|
||||
return self.end[0]
|
||||
|
||||
@property
|
||||
def end_column(self):
|
||||
def end_column(self) -> int:
|
||||
return self.end[1]
|
||||
|
||||
@property
|
||||
def width(self):
|
||||
def width(self) -> int:
|
||||
return self.end[1] - self.begin[1]
|
||||
|
||||
def replaceText(self, txt):
|
||||
def replaceText(self, txt: str) -> "Token":
|
||||
assert isinstance(txt, str)
|
||||
return Token(self.kind, txt, self.begin, self.end)
|
||||
|
||||
def __repr__(self):
|
||||
def __repr__(self) -> str:
|
||||
b0, b1 = self.begin
|
||||
e0, e1 = self.end
|
||||
if b0 == e0:
|
||||
|
@ -174,7 +263,7 @@ class Token:
|
|||
return f"{self.kind}({self.text!r}, {b0}:{b1}, {e0}:{e1})"
|
||||
|
||||
|
||||
def tokenize(src, line=1, filename=None):
|
||||
def tokenize(src: str, line: int = 1, filename: str | None = None) -> Iterator[Token]:
|
||||
linestart = -1
|
||||
for m in matcher.finditer(src):
|
||||
start, end = m.span()
|
||||
|
@ -183,73 +272,75 @@ def tokenize(src, line=1, filename=None):
|
|||
kind = keywords[text]
|
||||
elif letter.match(text):
|
||||
kind = IDENTIFIER
|
||||
elif text == '...':
|
||||
elif text == "...":
|
||||
kind = ELLIPSIS
|
||||
elif text == '.':
|
||||
elif text == ".":
|
||||
kind = PERIOD
|
||||
elif text[0] in '0123456789.':
|
||||
elif text[0] in "0123456789.":
|
||||
kind = NUMBER
|
||||
elif text[0] == '"':
|
||||
kind = STRING
|
||||
elif text in opmap:
|
||||
kind = opmap[text]
|
||||
elif text == '\n':
|
||||
elif text == "\n":
|
||||
linestart = start
|
||||
line += 1
|
||||
kind = '\n'
|
||||
kind = "\n"
|
||||
elif text[0] == "'":
|
||||
kind = CHARACTER
|
||||
elif text[0] == '#':
|
||||
elif text[0] == "#":
|
||||
kind = MACRO
|
||||
elif text[0] == '/' and text[1] in '/*':
|
||||
elif text[0] == "/" and text[1] in "/*":
|
||||
kind = COMMENT
|
||||
else:
|
||||
lineend = src.find("\n", start)
|
||||
if lineend == -1:
|
||||
lineend = len(src)
|
||||
raise make_syntax_error(f"Bad token: {text}",
|
||||
filename, line, start-linestart+1, src[linestart:lineend])
|
||||
raise make_syntax_error(
|
||||
f"Bad token: {text}",
|
||||
filename,
|
||||
line,
|
||||
start - linestart + 1,
|
||||
src[linestart:lineend],
|
||||
)
|
||||
if kind == COMMENT:
|
||||
begin = line, start-linestart
|
||||
newlines = text.count('\n')
|
||||
begin = line, start - linestart
|
||||
newlines = text.count("\n")
|
||||
if newlines:
|
||||
linestart = start + text.rfind('\n')
|
||||
linestart = start + text.rfind("\n")
|
||||
line += newlines
|
||||
else:
|
||||
begin = line, start-linestart
|
||||
begin = line, start - linestart
|
||||
if kind != "\n":
|
||||
yield Token(kind, text, begin, (line, start-linestart+len(text)))
|
||||
|
||||
|
||||
__all__ = []
|
||||
__all__.extend([kind for kind in globals() if kind.upper() == kind])
|
||||
yield Token(kind, text, begin, (line, start - linestart + len(text)))
|
||||
|
||||
|
||||
def to_text(tkns: list[Token], dedent: int = 0) -> str:
|
||||
res: list[str] = []
|
||||
line, col = -1, 1+dedent
|
||||
line, col = -1, 1 + dedent
|
||||
for tkn in tkns:
|
||||
if line == -1:
|
||||
line, _ = tkn.begin
|
||||
l, c = tkn.begin
|
||||
#assert(l >= line), (line, txt, start, end)
|
||||
# assert(l >= line), (line, txt, start, end)
|
||||
while l > line:
|
||||
line += 1
|
||||
res.append('\n')
|
||||
col = 1+dedent
|
||||
res.append(' '*(c-col))
|
||||
res.append("\n")
|
||||
col = 1 + dedent
|
||||
res.append(" " * (c - col))
|
||||
text = tkn.text
|
||||
if dedent != 0 and tkn.kind == 'COMMENT' and '\n' in text:
|
||||
if dedent != 0 and tkn.kind == "COMMENT" and "\n" in text:
|
||||
if dedent < 0:
|
||||
text = text.replace('\n', '\n' + ' '*-dedent)
|
||||
text = text.replace("\n", "\n" + " " * -dedent)
|
||||
# TODO: dedent > 0
|
||||
res.append(text)
|
||||
line, col = tkn.end
|
||||
return ''.join(res)
|
||||
return "".join(res)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import sys
|
||||
|
||||
filename = sys.argv[1]
|
||||
if filename == "-c":
|
||||
src = sys.argv[2]
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue