mirror of
https://github.com/python/cpython.git
synced 2025-08-31 14:07:50 +00:00
GH-98831: "Generate" the interpreter (#98830)
The switch cases (really TARGET(opcode) macros) have been moved from ceval.c to generated_cases.c.h. That file is generated from instruction definitions in bytecodes.c (which impersonates a C file so the C code it contains can be edited without custom support in e.g. VS Code). The code generator lives in Tools/cases_generator (it has a README.md explaining how it works). The DSL used to describe the instructions is a work in progress, described in https://github.com/faster-cpython/ideas/blob/main/3.12/interpreter_definition.md. This is surely a work-in-progress. An easy next step could be auto-generating super-instructions. **IMPORTANT: Merge Conflicts** If you get a merge conflict for instruction implementations in ceval.c, your best bet is to port your changes to bytecodes.c. That file looks almost the same as the original cases, except instead of `TARGET(NAME)` it uses `inst(NAME)`, and the trailing `DISPATCH()` call is omitted (the code generator adds it automatically).
This commit is contained in:
parent
2cfcaf5af6
commit
41bc101dd6
13 changed files with 8961 additions and 3851 deletions
104
Tools/cases_generator/plexer.py
Normal file
104
Tools/cases_generator/plexer.py
Normal file
|
@ -0,0 +1,104 @@
|
|||
import lexer as lx
|
||||
Token = lx.Token
|
||||
|
||||
|
||||
class PLexer:
|
||||
def __init__(self, src: str, filename: str|None = None):
|
||||
self.src = src
|
||||
self.filename = filename
|
||||
self.tokens = list(lx.tokenize(self.src, filename=filename))
|
||||
self.pos = 0
|
||||
|
||||
def getpos(self) -> int:
|
||||
# Current position
|
||||
return self.pos
|
||||
|
||||
def eof(self) -> bool:
|
||||
# Are we at EOF?
|
||||
return self.pos >= len(self.tokens)
|
||||
|
||||
def setpos(self, pos: int) -> None:
|
||||
# Reset position
|
||||
assert 0 <= pos <= len(self.tokens), (pos, len(self.tokens))
|
||||
self.pos = pos
|
||||
|
||||
def backup(self) -> None:
|
||||
# Back up position by 1
|
||||
assert self.pos > 0
|
||||
self.pos -= 1
|
||||
|
||||
def next(self, raw: bool = False) -> Token | None:
|
||||
# Return next token and advance position; None if at EOF
|
||||
# TODO: Return synthetic EOF token instead of None?
|
||||
while self.pos < len(self.tokens):
|
||||
tok = self.tokens[self.pos]
|
||||
self.pos += 1
|
||||
if raw or tok.kind != "COMMENT":
|
||||
return tok
|
||||
return None
|
||||
|
||||
def peek(self, raw: bool = False) -> Token | None:
|
||||
# Return next token without advancing position
|
||||
tok = self.next(raw=raw)
|
||||
self.backup()
|
||||
return tok
|
||||
|
||||
def maybe(self, kind: str, raw: bool = False) -> Token | None:
|
||||
# Return next token without advancing position if kind matches
|
||||
tok = self.peek(raw=raw)
|
||||
if tok and tok.kind == kind:
|
||||
return tok
|
||||
return None
|
||||
|
||||
def expect(self, kind: str) -> Token | None:
|
||||
# Return next token and advance position if kind matches
|
||||
tkn = self.next()
|
||||
if tkn is not None:
|
||||
if tkn.kind == kind:
|
||||
return tkn
|
||||
self.backup()
|
||||
return None
|
||||
|
||||
def require(self, kind: str) -> Token:
|
||||
# Return next token and advance position, requiring kind to match
|
||||
tkn = self.next()
|
||||
if tkn is not None and tkn.kind == kind:
|
||||
return tkn
|
||||
raise self.make_syntax_error(f"Expected {kind!r} but got {tkn and tkn.text!r}", tkn)
|
||||
|
||||
def extract_line(self, lineno: int) -> str:
|
||||
# Return source line `lineno` (1-based)
|
||||
lines = self.src.splitlines()
|
||||
if lineno > len(lines):
|
||||
return ""
|
||||
return lines[lineno - 1]
|
||||
|
||||
def make_syntax_error(self, message: str, tkn: Token|None = None) -> SyntaxError:
|
||||
# Construct a SyntaxError instance from message and token
|
||||
if tkn is None:
|
||||
tkn = self.peek()
|
||||
if tkn is None:
|
||||
tkn = self.tokens[-1]
|
||||
return lx.make_syntax_error(message,
|
||||
self.filename, tkn.line, tkn.column, self.extract_line(tkn.line))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import sys
|
||||
if sys.argv[1:]:
|
||||
filename = sys.argv[1]
|
||||
if filename == "-c" and sys.argv[2:]:
|
||||
src = sys.argv[2]
|
||||
filename = None
|
||||
else:
|
||||
with open(filename) as f:
|
||||
src = f.read()
|
||||
else:
|
||||
filename = None
|
||||
src = "if (x) { x.foo; // comment\n}"
|
||||
p = PLexer(src, filename)
|
||||
while not p.eof():
|
||||
tok = p.next(raw=True)
|
||||
left = repr(tok)
|
||||
right = lx.to_text([tok]).rstrip()
|
||||
print(f"{left:40.40} {right}")
|
Loading…
Add table
Add a link
Reference in a new issue