gh-104504: Run mypy on cases_generator in CI (and blacken the code) (gh-108090)

Co-authored-by: Alex Waygood <Alex.Waygood@Gmail.com>
This commit is contained in:
Dong-hee Na 2023-08-18 22:42:45 +09:00 committed by GitHub
parent fd19509220
commit 28cab71f95
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
11 changed files with 313 additions and 194 deletions

View file

@ -4,132 +4,221 @@
import re
from dataclasses import dataclass
from collections.abc import Iterator
def choice(*opts):
def choice(*opts: str) -> str:
return "|".join("(%s)" % opt for opt in opts)
# Regexes
# Longer operators must go before shorter ones.
PLUSPLUS = r'\+\+'
MINUSMINUS = r'--'
PLUSPLUS = r"\+\+"
MINUSMINUS = r"--"
# ->
ARROW = r'->'
ELLIPSIS = r'\.\.\.'
ARROW = r"->"
ELLIPSIS = r"\.\.\."
# Assignment operators
TIMESEQUAL = r'\*='
DIVEQUAL = r'/='
MODEQUAL = r'%='
PLUSEQUAL = r'\+='
MINUSEQUAL = r'-='
LSHIFTEQUAL = r'<<='
RSHIFTEQUAL = r'>>='
ANDEQUAL = r'&='
OREQUAL = r'\|='
XOREQUAL = r'\^='
TIMESEQUAL = r"\*="
DIVEQUAL = r"/="
MODEQUAL = r"%="
PLUSEQUAL = r"\+="
MINUSEQUAL = r"-="
LSHIFTEQUAL = r"<<="
RSHIFTEQUAL = r">>="
ANDEQUAL = r"&="
OREQUAL = r"\|="
XOREQUAL = r"\^="
# Operators
PLUS = r'\+'
MINUS = r'-'
TIMES = r'\*'
DIVIDE = r'/'
MOD = r'%'
NOT = r'~'
XOR = r'\^'
LOR = r'\|\|'
LAND = r'&&'
LSHIFT = r'<<'
RSHIFT = r'>>'
LE = r'<='
GE = r'>='
EQ = r'=='
NE = r'!='
LT = r'<'
GT = r'>'
LNOT = r'!'
OR = r'\|'
AND = r'&'
EQUALS = r'='
PLUS = r"\+"
MINUS = r"-"
TIMES = r"\*"
DIVIDE = r"/"
MOD = r"%"
NOT = r"~"
XOR = r"\^"
LOR = r"\|\|"
LAND = r"&&"
LSHIFT = r"<<"
RSHIFT = r">>"
LE = r"<="
GE = r">="
EQ = r"=="
NE = r"!="
LT = r"<"
GT = r">"
LNOT = r"!"
OR = r"\|"
AND = r"&"
EQUALS = r"="
# ?
CONDOP = r'\?'
CONDOP = r"\?"
# Delimiters
LPAREN = r'\('
RPAREN = r'\)'
LBRACKET = r'\['
RBRACKET = r'\]'
LBRACE = r'\{'
RBRACE = r'\}'
COMMA = r','
PERIOD = r'\.'
SEMI = r';'
COLON = r':'
BACKSLASH = r'\\'
LPAREN = r"\("
RPAREN = r"\)"
LBRACKET = r"\["
RBRACKET = r"\]"
LBRACE = r"\{"
RBRACE = r"\}"
COMMA = r","
PERIOD = r"\."
SEMI = r";"
COLON = r":"
BACKSLASH = r"\\"
operators = { op: pattern for op, pattern in globals().items() if op == op.upper() }
operators = {op: pattern for op, pattern in globals().items() if op == op.upper()}
for op in operators:
globals()[op] = op
opmap = { pattern.replace("\\", "") or '\\' : op for op, pattern in operators.items() }
opmap = {pattern.replace("\\", "") or "\\": op for op, pattern in operators.items()}
# Macros
macro = r'# *(ifdef|ifndef|undef|define|error|endif|if|else|include|#)'
MACRO = 'MACRO'
macro = r"# *(ifdef|ifndef|undef|define|error|endif|if|else|include|#)"
MACRO = "MACRO"
id_re = r'[a-zA-Z_][0-9a-zA-Z_]*'
IDENTIFIER = 'IDENTIFIER'
id_re = r"[a-zA-Z_][0-9a-zA-Z_]*"
IDENTIFIER = "IDENTIFIER"
suffix = r'([uU]?[lL]?[lL]?)'
octal = r'0[0-7]+' + suffix
hex = r'0[xX][0-9a-fA-F]+'
decimal_digits = r'(0|[1-9][0-9]*)'
suffix = r"([uU]?[lL]?[lL]?)"
octal = r"0[0-7]+" + suffix
hex = r"0[xX][0-9a-fA-F]+"
decimal_digits = r"(0|[1-9][0-9]*)"
decimal = decimal_digits + suffix
exponent = r"""([eE][-+]?[0-9]+)"""
fraction = r"""([0-9]*\.[0-9]+)|([0-9]+\.)"""
float = '(((('+fraction+')'+exponent+'?)|([0-9]+'+exponent+'))[FfLl]?)'
float = "((((" + fraction + ")" + exponent + "?)|([0-9]+" + exponent + "))[FfLl]?)"
number_re = choice(octal, hex, float, decimal)
NUMBER = 'NUMBER'
NUMBER = "NUMBER"
simple_escape = r"""([a-zA-Z._~!=&\^\-\\?'"])"""
decimal_escape = r"""(\d+)"""
hex_escape = r"""(x[0-9a-fA-F]+)"""
escape_sequence = r"""(\\("""+simple_escape+'|'+decimal_escape+'|'+hex_escape+'))'
string_char = r"""([^"\\\n]|"""+escape_sequence+')'
str_re = '"'+string_char+'*"'
STRING = 'STRING'
char = r'\'.\'' # TODO: escape sequence
CHARACTER = 'CHARACTER'
escape_sequence = (
r"""(\\(""" + simple_escape + "|" + decimal_escape + "|" + hex_escape + "))"
)
string_char = r"""([^"\\\n]|""" + escape_sequence + ")"
str_re = '"' + string_char + '*"'
STRING = "STRING"
char = r"\'.\'" # TODO: escape sequence
CHARACTER = "CHARACTER"
comment_re = r'//.*|/\*([^*]|\*[^/])*\*/'
COMMENT = 'COMMENT'
comment_re = r"//.*|/\*([^*]|\*[^/])*\*/"
COMMENT = "COMMENT"
newline = r"\n"
invalid = r"\S" # A single non-space character that's not caught by any of the other patterns
matcher = re.compile(choice(id_re, number_re, str_re, char, newline, macro, comment_re, *operators.values(), invalid))
letter = re.compile(r'[a-zA-Z_]')
kwds = (
'AUTO', 'BREAK', 'CASE', 'CHAR', 'CONST',
'CONTINUE', 'DEFAULT', 'DO', 'DOUBLE', 'ELSE', 'ENUM', 'EXTERN',
'FLOAT', 'FOR', 'GOTO', 'IF', 'INLINE', 'INT', 'LONG', 'OVERRIDE',
'REGISTER', 'OFFSETOF',
'RESTRICT', 'RETURN', 'SHORT', 'SIGNED', 'SIZEOF', 'STATIC', 'STRUCT',
'SWITCH', 'TYPEDEF', 'UNION', 'UNSIGNED', 'VOID',
'VOLATILE', 'WHILE'
invalid = (
r"\S" # A single non-space character that's not caught by any of the other patterns
)
for name in kwds:
globals()[name] = name
keywords = { name.lower() : name for name in kwds }
matcher = re.compile(
choice(
id_re,
number_re,
str_re,
char,
newline,
macro,
comment_re,
*operators.values(),
invalid,
)
)
letter = re.compile(r"[a-zA-Z_]")
kwds = []
AUTO = "AUTO"
kwds.append(AUTO)
BREAK = "BREAK"
kwds.append(BREAK)
CASE = "CASE"
kwds.append(CASE)
CHAR = "CHAR"
kwds.append(CHAR)
CONST = "CONST"
kwds.append(CONST)
CONTINUE = "CONTINUE"
kwds.append(CONTINUE)
DEFAULT = "DEFAULT"
kwds.append(DEFAULT)
DO = "DO"
kwds.append(DO)
DOUBLE = "DOUBLE"
kwds.append(DOUBLE)
ELSE = "ELSE"
kwds.append(ELSE)
ENUM = "ENUM"
kwds.append(ENUM)
EXTERN = "EXTERN"
kwds.append(EXTERN)
FLOAT = "FLOAT"
kwds.append(FLOAT)
FOR = "FOR"
kwds.append(FOR)
GOTO = "GOTO"
kwds.append(GOTO)
IF = "IF"
kwds.append(IF)
INLINE = "INLINE"
kwds.append(INLINE)
INT = "INT"
kwds.append(INT)
LONG = "LONG"
kwds.append(LONG)
OVERRIDE = "OVERRIDE"
kwds.append(OVERRIDE)
REGISTER = "REGISTER"
kwds.append(REGISTER)
OFFSETOF = "OFFSETOF"
kwds.append(OFFSETOF)
RESTRICT = "RESTRICT"
kwds.append(RESTRICT)
RETURN = "RETURN"
kwds.append(RETURN)
SHORT = "SHORT"
kwds.append(SHORT)
SIGNED = "SIGNED"
kwds.append(SIGNED)
SIZEOF = "SIZEOF"
kwds.append(SIZEOF)
STATIC = "STATIC"
kwds.append(STATIC)
STRUCT = "STRUCT"
kwds.append(STRUCT)
SWITCH = "SWITCH"
kwds.append(SWITCH)
TYPEDEF = "TYPEDEF"
kwds.append(TYPEDEF)
UNION = "UNION"
kwds.append(UNION)
UNSIGNED = "UNSIGNED"
kwds.append(UNSIGNED)
VOID = "VOID"
kwds.append(VOID)
VOLATILE = "VOLATILE"
kwds.append(VOLATILE)
WHILE = "WHILE"
kwds.append(WHILE)
keywords = {name.lower(): name for name in kwds}
__all__ = []
__all__.extend(kwds)
def make_syntax_error(
message: str, filename: str, line: int, column: int, line_text: str,
message: str,
filename: str | None,
line: int,
column: int,
line_text: str,
) -> SyntaxError:
return SyntaxError(message, (filename, line, column, line_text))
@ -142,30 +231,30 @@ class Token:
end: tuple[int, int]
@property
def line(self):
def line(self) -> int:
return self.begin[0]
@property
def column(self):
def column(self) -> int:
return self.begin[1]
@property
def end_line(self):
def end_line(self) -> int:
return self.end[0]
@property
def end_column(self):
def end_column(self) -> int:
return self.end[1]
@property
def width(self):
def width(self) -> int:
return self.end[1] - self.begin[1]
def replaceText(self, txt):
def replaceText(self, txt: str) -> "Token":
assert isinstance(txt, str)
return Token(self.kind, txt, self.begin, self.end)
def __repr__(self):
def __repr__(self) -> str:
b0, b1 = self.begin
e0, e1 = self.end
if b0 == e0:
@ -174,7 +263,7 @@ class Token:
return f"{self.kind}({self.text!r}, {b0}:{b1}, {e0}:{e1})"
def tokenize(src, line=1, filename=None):
def tokenize(src: str, line: int = 1, filename: str | None = None) -> Iterator[Token]:
linestart = -1
for m in matcher.finditer(src):
start, end = m.span()
@ -183,73 +272,75 @@ def tokenize(src, line=1, filename=None):
kind = keywords[text]
elif letter.match(text):
kind = IDENTIFIER
elif text == '...':
elif text == "...":
kind = ELLIPSIS
elif text == '.':
elif text == ".":
kind = PERIOD
elif text[0] in '0123456789.':
elif text[0] in "0123456789.":
kind = NUMBER
elif text[0] == '"':
kind = STRING
elif text in opmap:
kind = opmap[text]
elif text == '\n':
elif text == "\n":
linestart = start
line += 1
kind = '\n'
kind = "\n"
elif text[0] == "'":
kind = CHARACTER
elif text[0] == '#':
elif text[0] == "#":
kind = MACRO
elif text[0] == '/' and text[1] in '/*':
elif text[0] == "/" and text[1] in "/*":
kind = COMMENT
else:
lineend = src.find("\n", start)
if lineend == -1:
lineend = len(src)
raise make_syntax_error(f"Bad token: {text}",
filename, line, start-linestart+1, src[linestart:lineend])
raise make_syntax_error(
f"Bad token: {text}",
filename,
line,
start - linestart + 1,
src[linestart:lineend],
)
if kind == COMMENT:
begin = line, start-linestart
newlines = text.count('\n')
begin = line, start - linestart
newlines = text.count("\n")
if newlines:
linestart = start + text.rfind('\n')
linestart = start + text.rfind("\n")
line += newlines
else:
begin = line, start-linestart
begin = line, start - linestart
if kind != "\n":
yield Token(kind, text, begin, (line, start-linestart+len(text)))
__all__ = []
__all__.extend([kind for kind in globals() if kind.upper() == kind])
yield Token(kind, text, begin, (line, start - linestart + len(text)))
def to_text(tkns: list[Token], dedent: int = 0) -> str:
res: list[str] = []
line, col = -1, 1+dedent
line, col = -1, 1 + dedent
for tkn in tkns:
if line == -1:
line, _ = tkn.begin
l, c = tkn.begin
#assert(l >= line), (line, txt, start, end)
# assert(l >= line), (line, txt, start, end)
while l > line:
line += 1
res.append('\n')
col = 1+dedent
res.append(' '*(c-col))
res.append("\n")
col = 1 + dedent
res.append(" " * (c - col))
text = tkn.text
if dedent != 0 and tkn.kind == 'COMMENT' and '\n' in text:
if dedent != 0 and tkn.kind == "COMMENT" and "\n" in text:
if dedent < 0:
text = text.replace('\n', '\n' + ' '*-dedent)
text = text.replace("\n", "\n" + " " * -dedent)
# TODO: dedent > 0
res.append(text)
line, col = tkn.end
return ''.join(res)
return "".join(res)
if __name__ == "__main__":
import sys
filename = sys.argv[1]
if filename == "-c":
src = sys.argv[2]