GH-111485: Separate out parsing, analysis and code-gen phases of tier 1 code generator (GH-112299)

This commit is contained in:
Mark Shannon 2023-12-07 12:49:40 +00:00 committed by GitHub
parent 3d712a9f4c
commit b449415b2f
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
15 changed files with 1675 additions and 526 deletions

View file

@ -112,7 +112,7 @@ STRING = "STRING"
char = r"\'.\'" # TODO: escape sequence
CHARACTER = "CHARACTER"
comment_re = r"//.*|/\*([^*]|\*[^/])*\*/"
comment_re = r"(//.*)|/\*([^*]|\*[^/])*\*/"
COMMENT = "COMMENT"
newline = r"\n"
@ -234,6 +234,7 @@ def make_syntax_error(
@dataclass(slots=True)
class Token:
filename: str
kind: str
text: str
begin: tuple[int, int]
@ -261,7 +262,7 @@ class Token:
def replaceText(self, txt: str) -> "Token":
assert isinstance(txt, str)
return Token(self.kind, txt, self.begin, self.end)
return Token(self.filename, self.kind, txt, self.begin, self.end)
def __repr__(self) -> str:
b0, b1 = self.begin
@ -272,7 +273,7 @@ class Token:
return f"{self.kind}({self.text!r}, {b0}:{b1}, {e0}:{e1})"
def tokenize(src: str, line: int = 1, filename: str | None = None) -> Iterator[Token]:
def tokenize(src: str, line: int = 1, filename: str = "") -> Iterator[Token]:
linestart = -1
for m in matcher.finditer(src):
start, end = m.span()
@ -323,7 +324,7 @@ def tokenize(src: str, line: int = 1, filename: str | None = None) -> Iterator[T
else:
begin = line, start - linestart
if kind != "\n":
yield Token(kind, text, begin, (line, start - linestart + len(text)))
yield Token(filename, kind, text, begin, (line, start - linestart + len(text)))
def to_text(tkns: list[Token], dedent: int = 0) -> str: