GH-98831: Add macro and op and their implementation to DSL (#99495)

Newly supported interpreter definition syntax:
- `op(NAME, (input_stack_effects -- output_stack_effects)) { ... }`
- `macro(NAME) = OP1 + OP2;`

Also some other random improvements:
- Convert `WITH_EXCEPT_START` to use stack effects
- Fix lexer to balk at unrecognized characters, e.g. `@`
- Fix moved output names; support object pointers in cache
- Introduce `error()` method to print errors
- Introduce read_uint16(p) as equivalent to `*p`

Co-authored-by: Brandt Bucher <brandtbucher@gmail.com>
This commit is contained in:
Guido van Rossum 2022-11-22 16:04:57 -08:00 committed by GitHub
parent f1a4a6a587
commit 8f18ac04d3
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 325 additions and 135 deletions

View file

@ -293,6 +293,12 @@ write_obj(uint16_t *p, PyObject *val)
memcpy(p, &val, sizeof(val)); memcpy(p, &val, sizeof(val));
} }
static inline uint16_t
read_u16(uint16_t *p)
{
return *p;
}
static inline uint32_t static inline uint32_t
read_u32(uint16_t *p) read_u32(uint16_t *p)
{ {

View file

@ -70,6 +70,8 @@ do { \
#define DISPATCH_SAME_OPARG() ((void)0) #define DISPATCH_SAME_OPARG() ((void)0)
#define inst(name, ...) case name: #define inst(name, ...) case name:
#define op(name, ...) /* NAME is ignored */
#define macro(name) static int MACRO_##name
#define super(name) static int SUPER_##name #define super(name) static int SUPER_##name
#define family(name, ...) static int family_##name #define family(name, ...) static int family_##name
@ -80,6 +82,7 @@ do { \
static PyObject *value, *value1, *value2, *left, *right, *res, *sum, *prod, *sub; static PyObject *value, *value1, *value2, *left, *right, *res, *sum, *prod, *sub;
static PyObject *container, *start, *stop, *v, *lhs, *rhs; static PyObject *container, *start, *stop, *v, *lhs, *rhs;
static PyObject *list, *tuple, *dict; static PyObject *list, *tuple, *dict;
static PyObject *exit_func, *lasti, *val;
static PyObject * static PyObject *
dummy_func( dummy_func(
@ -156,10 +159,7 @@ dummy_func(
res = NULL; res = NULL;
} }
inst(END_FOR, (value1, value2 --)) { macro(END_FOR) = POP_TOP + POP_TOP;
Py_DECREF(value1);
Py_DECREF(value2);
}
inst(UNARY_POSITIVE, (value -- res)) { inst(UNARY_POSITIVE, (value -- res)) {
res = PyNumber_Positive(value); res = PyNumber_Positive(value);
@ -2725,33 +2725,27 @@ dummy_func(
PUSH(res); PUSH(res);
} }
// stack effect: ( -- __0) inst(WITH_EXCEPT_START, (exit_func, lasti, unused, val -- exit_func, lasti, unused, val, res)) {
inst(WITH_EXCEPT_START) {
/* At the top of the stack are 4 values: /* At the top of the stack are 4 values:
- TOP = exc_info() - val: TOP = exc_info()
- SECOND = previous exception - unused: SECOND = previous exception
- THIRD: lasti of exception in exc_info() - lasti: THIRD = lasti of exception in exc_info()
- FOURTH: the context.__exit__ bound method - exit_func: FOURTH = the context.__exit__ bound method
We call FOURTH(type(TOP), TOP, GetTraceback(TOP)). We call FOURTH(type(TOP), TOP, GetTraceback(TOP)).
Then we push the __exit__ return value. Then we push the __exit__ return value.
*/ */
PyObject *exit_func; PyObject *exc, *tb;
PyObject *exc, *val, *tb, *res;
val = TOP();
assert(val && PyExceptionInstance_Check(val)); assert(val && PyExceptionInstance_Check(val));
exc = PyExceptionInstance_Class(val); exc = PyExceptionInstance_Class(val);
tb = PyException_GetTraceback(val); tb = PyException_GetTraceback(val);
Py_XDECREF(tb); Py_XDECREF(tb);
assert(PyLong_Check(PEEK(3))); assert(PyLong_Check(lasti));
exit_func = PEEK(4); (void)lasti; // Shut up compiler warning if asserts are off
PyObject *stack[4] = {NULL, exc, val, tb}; PyObject *stack[4] = {NULL, exc, val, tb};
res = PyObject_Vectorcall(exit_func, stack + 1, res = PyObject_Vectorcall(exit_func, stack + 1,
3 | PY_VECTORCALL_ARGUMENTS_OFFSET, NULL); 3 | PY_VECTORCALL_ARGUMENTS_OFFSET, NULL);
if (res == NULL) ERROR_IF(res == NULL, error);
goto error;
PUSH(res);
} }
// stack effect: ( -- __0) // stack effect: ( -- __0)

View file

@ -78,15 +78,6 @@
DISPATCH(); DISPATCH();
} }
TARGET(END_FOR) {
PyObject *value2 = PEEK(1);
PyObject *value1 = PEEK(2);
Py_DECREF(value1);
Py_DECREF(value2);
STACK_SHRINK(2);
DISPATCH();
}
TARGET(UNARY_POSITIVE) { TARGET(UNARY_POSITIVE) {
PyObject *value = PEEK(1); PyObject *value = PEEK(1);
PyObject *res; PyObject *res;
@ -446,7 +437,7 @@
TARGET(BINARY_SUBSCR_GETITEM) { TARGET(BINARY_SUBSCR_GETITEM) {
uint32_t type_version = read_u32(next_instr + 1); uint32_t type_version = read_u32(next_instr + 1);
uint16_t func_version = *(next_instr + 3); uint16_t func_version = read_u16(next_instr + 3);
PyObject *sub = PEEK(1); PyObject *sub = PEEK(1);
PyObject *container = PEEK(2); PyObject *container = PEEK(2);
PyTypeObject *tp = Py_TYPE(container); PyTypeObject *tp = Py_TYPE(container);
@ -2754,31 +2745,32 @@
} }
TARGET(WITH_EXCEPT_START) { TARGET(WITH_EXCEPT_START) {
PyObject *val = PEEK(1);
PyObject *lasti = PEEK(3);
PyObject *exit_func = PEEK(4);
PyObject *res;
/* At the top of the stack are 4 values: /* At the top of the stack are 4 values:
- TOP = exc_info() - val: TOP = exc_info()
- SECOND = previous exception - unused: SECOND = previous exception
- THIRD: lasti of exception in exc_info() - lasti: THIRD = lasti of exception in exc_info()
- FOURTH: the context.__exit__ bound method - exit_func: FOURTH = the context.__exit__ bound method
We call FOURTH(type(TOP), TOP, GetTraceback(TOP)). We call FOURTH(type(TOP), TOP, GetTraceback(TOP)).
Then we push the __exit__ return value. Then we push the __exit__ return value.
*/ */
PyObject *exit_func; PyObject *exc, *tb;
PyObject *exc, *val, *tb, *res;
val = TOP();
assert(val && PyExceptionInstance_Check(val)); assert(val && PyExceptionInstance_Check(val));
exc = PyExceptionInstance_Class(val); exc = PyExceptionInstance_Class(val);
tb = PyException_GetTraceback(val); tb = PyException_GetTraceback(val);
Py_XDECREF(tb); Py_XDECREF(tb);
assert(PyLong_Check(PEEK(3))); assert(PyLong_Check(lasti));
exit_func = PEEK(4); (void)lasti; // Shut up compiler warning if asserts are off
PyObject *stack[4] = {NULL, exc, val, tb}; PyObject *stack[4] = {NULL, exc, val, tb};
res = PyObject_Vectorcall(exit_func, stack + 1, res = PyObject_Vectorcall(exit_func, stack + 1,
3 | PY_VECTORCALL_ARGUMENTS_OFFSET, NULL); 3 | PY_VECTORCALL_ARGUMENTS_OFFSET, NULL);
if (res == NULL) if (res == NULL) goto error;
goto error; STACK_GROW(1);
POKE(1, res);
PUSH(res);
DISPATCH(); DISPATCH();
} }
@ -3711,13 +3703,14 @@
} }
TARGET(LOAD_FAST__LOAD_FAST) { TARGET(LOAD_FAST__LOAD_FAST) {
PyObject *_tmp_1;
PyObject *_tmp_2;
{ {
PyObject *value; PyObject *value;
value = GETLOCAL(oparg); value = GETLOCAL(oparg);
assert(value != NULL); assert(value != NULL);
Py_INCREF(value); Py_INCREF(value);
STACK_GROW(1); _tmp_1 = value;
POKE(1, value);
} }
NEXTOPARG(); NEXTOPARG();
next_instr++; next_instr++;
@ -3726,20 +3719,23 @@
value = GETLOCAL(oparg); value = GETLOCAL(oparg);
assert(value != NULL); assert(value != NULL);
Py_INCREF(value); Py_INCREF(value);
STACK_GROW(1); _tmp_2 = value;
POKE(1, value);
} }
STACK_GROW(2);
POKE(1, _tmp_2);
POKE(2, _tmp_1);
DISPATCH(); DISPATCH();
} }
TARGET(LOAD_FAST__LOAD_CONST) { TARGET(LOAD_FAST__LOAD_CONST) {
PyObject *_tmp_1;
PyObject *_tmp_2;
{ {
PyObject *value; PyObject *value;
value = GETLOCAL(oparg); value = GETLOCAL(oparg);
assert(value != NULL); assert(value != NULL);
Py_INCREF(value); Py_INCREF(value);
STACK_GROW(1); _tmp_1 = value;
POKE(1, value);
} }
NEXTOPARG(); NEXTOPARG();
next_instr++; next_instr++;
@ -3747,17 +3743,19 @@
PyObject *value; PyObject *value;
value = GETITEM(consts, oparg); value = GETITEM(consts, oparg);
Py_INCREF(value); Py_INCREF(value);
STACK_GROW(1); _tmp_2 = value;
POKE(1, value);
} }
STACK_GROW(2);
POKE(1, _tmp_2);
POKE(2, _tmp_1);
DISPATCH(); DISPATCH();
} }
TARGET(STORE_FAST__LOAD_FAST) { TARGET(STORE_FAST__LOAD_FAST) {
PyObject *_tmp_1 = PEEK(1);
{ {
PyObject *value = PEEK(1); PyObject *value = _tmp_1;
SETLOCAL(oparg, value); SETLOCAL(oparg, value);
STACK_SHRINK(1);
} }
NEXTOPARG(); NEXTOPARG();
next_instr++; next_instr++;
@ -3766,35 +3764,37 @@
value = GETLOCAL(oparg); value = GETLOCAL(oparg);
assert(value != NULL); assert(value != NULL);
Py_INCREF(value); Py_INCREF(value);
STACK_GROW(1); _tmp_1 = value;
POKE(1, value);
} }
POKE(1, _tmp_1);
DISPATCH(); DISPATCH();
} }
TARGET(STORE_FAST__STORE_FAST) { TARGET(STORE_FAST__STORE_FAST) {
PyObject *_tmp_1 = PEEK(2);
PyObject *_tmp_2 = PEEK(1);
{ {
PyObject *value = PEEK(1); PyObject *value = _tmp_2;
SETLOCAL(oparg, value); SETLOCAL(oparg, value);
STACK_SHRINK(1);
} }
NEXTOPARG(); NEXTOPARG();
next_instr++; next_instr++;
{ {
PyObject *value = PEEK(1); PyObject *value = _tmp_1;
SETLOCAL(oparg, value); SETLOCAL(oparg, value);
STACK_SHRINK(1);
} }
STACK_SHRINK(2);
DISPATCH(); DISPATCH();
} }
TARGET(LOAD_CONST__LOAD_FAST) { TARGET(LOAD_CONST__LOAD_FAST) {
PyObject *_tmp_1;
PyObject *_tmp_2;
{ {
PyObject *value; PyObject *value;
value = GETITEM(consts, oparg); value = GETITEM(consts, oparg);
Py_INCREF(value); Py_INCREF(value);
STACK_GROW(1); _tmp_1 = value;
POKE(1, value);
} }
NEXTOPARG(); NEXTOPARG();
next_instr++; next_instr++;
@ -3803,8 +3803,25 @@
value = GETLOCAL(oparg); value = GETLOCAL(oparg);
assert(value != NULL); assert(value != NULL);
Py_INCREF(value); Py_INCREF(value);
STACK_GROW(1); _tmp_2 = value;
POKE(1, value);
} }
STACK_GROW(2);
POKE(1, _tmp_2);
POKE(2, _tmp_1);
DISPATCH();
}
TARGET(END_FOR) {
PyObject *_tmp_1 = PEEK(2);
PyObject *_tmp_2 = PEEK(1);
{
PyObject *value = _tmp_2;
Py_DECREF(value);
}
{
PyObject *value = _tmp_1;
Py_DECREF(value);
}
STACK_SHRINK(2);
DISPATCH(); DISPATCH();
} }

View file

@ -2,9 +2,9 @@
What's currently here: What's currently here:
- lexer.py: lexer for C, originally written by Mark Shannon - `lexer.py`: lexer for C, originally written by Mark Shannon
- plexer.py: OO interface on top of lexer.py; main class: `PLexer` - `plexer.py`: OO interface on top of lexer.py; main class: `PLexer`
- parser.py: Parser for instruction definition DSL; main class `Parser` - `parser.py`: Parser for instruction definition DSL; main class `Parser`
- `generate_cases.py`: driver script to read `Python/bytecodes.c` and - `generate_cases.py`: driver script to read `Python/bytecodes.c` and
write `Python/generated_cases.c.h` write `Python/generated_cases.c.h`

View file

@ -5,6 +5,8 @@ Writes the cases to generated_cases.c.h, which is #included in ceval.c.
""" """
import argparse import argparse
import contextlib
import dataclasses
import os import os
import re import re
import sys import sys
@ -17,6 +19,8 @@ DEFAULT_OUTPUT = "Python/generated_cases.c.h"
BEGIN_MARKER = "// BEGIN BYTECODES //" BEGIN_MARKER = "// BEGIN BYTECODES //"
END_MARKER = "// END BYTECODES //" END_MARKER = "// END BYTECODES //"
RE_PREDICTED = r"(?s)(?:PREDICT\(|GO_TO_INSTRUCTION\(|DEOPT_IF\(.*?,\s*)(\w+)\);" RE_PREDICTED = r"(?s)(?:PREDICT\(|GO_TO_INSTRUCTION\(|DEOPT_IF\(.*?,\s*)(\w+)\);"
UNUSED = "unused"
BITS_PER_CODE_UNIT = 16
arg_parser = argparse.ArgumentParser() arg_parser = argparse.ArgumentParser()
arg_parser.add_argument("-i", "--input", type=str, default=DEFAULT_INPUT) arg_parser.add_argument("-i", "--input", type=str, default=DEFAULT_INPUT)
@ -51,9 +55,7 @@ class Instruction(parser.InstDef):
] ]
self.output_effects = self.outputs # For consistency/completeness self.output_effects = self.outputs # For consistency/completeness
def write( def write(self, f: typing.TextIO, indent: str, dedent: int = 0) -> None:
self, f: typing.TextIO, indent: str, dedent: int = 0
) -> None:
"""Write one instruction, sans prologue and epilogue.""" """Write one instruction, sans prologue and epilogue."""
if dedent < 0: if dedent < 0:
indent += " " * -dedent # DO WE NEED THIS? indent += " " * -dedent # DO WE NEED THIS?
@ -70,25 +72,33 @@ class Instruction(parser.InstDef):
# Write cache effect variable declarations # Write cache effect variable declarations
cache_offset = 0 cache_offset = 0
for ceffect in self.cache_effects: for ceffect in self.cache_effects:
if ceffect.name != "unused": if ceffect.name != UNUSED:
# TODO: if name is 'descr' use PyObject *descr = read_obj(...) bits = ceffect.size * BITS_PER_CODE_UNIT
bits = ceffect.size * 16 if bits == 64:
f.write(f"{indent} uint{bits}_t {ceffect.name} = ") # NOTE: We assume that 64-bit data in the cache
if ceffect.size == 1: # is always an object pointer.
f.write(f"*(next_instr + {cache_offset});\n") # If this becomes false, we need a way to specify
# syntactically what type the cache data is.
f.write(
f"{indent} PyObject *{ceffect.name} = "
f"read_obj(next_instr + {cache_offset});\n"
)
else: else:
f.write(f"read_u{bits}(next_instr + {cache_offset});\n") f.write(f"{indent} uint{bits}_t {ceffect.name} = "
f"read_u{bits}(next_instr + {cache_offset});\n")
cache_offset += ceffect.size cache_offset += ceffect.size
assert cache_offset == self.cache_offset assert cache_offset == self.cache_offset
# Write input stack effect variable declarations and initializations # Write input stack effect variable declarations and initializations
for i, seffect in enumerate(reversed(self.input_effects), 1): for i, seffect in enumerate(reversed(self.input_effects), 1):
if seffect.name != "unused": if seffect.name != UNUSED:
f.write(f"{indent} PyObject *{seffect.name} = PEEK({i});\n") f.write(f"{indent} PyObject *{seffect.name} = PEEK({i});\n")
# Write output stack effect variable declarations # Write output stack effect variable declarations
input_names = {seffect.name for seffect in self.input_effects}
input_names.add(UNUSED)
for seffect in self.output_effects: for seffect in self.output_effects:
if seffect.name != "unused": if seffect.name not in input_names:
f.write(f"{indent} PyObject *{seffect.name};\n") f.write(f"{indent} PyObject *{seffect.name};\n")
self.write_body(f, indent, dedent) self.write_body(f, indent, dedent)
@ -105,21 +115,22 @@ class Instruction(parser.InstDef):
f.write(f"{indent} STACK_SHRINK({-diff});\n") f.write(f"{indent} STACK_SHRINK({-diff});\n")
# Write output stack effect assignments # Write output stack effect assignments
input_names = [seffect.name for seffect in self.input_effects] unmoved_names = {UNUSED}
for i, output in enumerate(reversed(self.output_effects), 1): for ieffect, oeffect in zip(self.input_effects, self.output_effects):
if output.name not in input_names and output.name != "unused": if ieffect.name == oeffect.name:
f.write(f"{indent} POKE({i}, {output.name});\n") unmoved_names.add(ieffect.name)
for i, seffect in enumerate(reversed(self.output_effects)):
if seffect.name not in unmoved_names:
f.write(f"{indent} POKE({i+1}, {seffect.name});\n")
# Write cache effect # Write cache effect
if self.cache_offset: if self.cache_offset:
f.write(f"{indent} next_instr += {self.cache_offset};\n") f.write(f"{indent} next_instr += {self.cache_offset};\n")
def write_body( def write_body(self, f: typing.TextIO, ndent: str, dedent: int) -> None:
self, f: typing.TextIO, ndent: str, dedent: int
) -> None:
"""Write the instruction body.""" """Write the instruction body."""
# Get lines of text with proper dedelt # Get lines of text with proper dedent
blocklines = self.block.to_text(dedent=dedent).splitlines(True) blocklines = self.block.to_text(dedent=dedent).splitlines(True)
# Remove blank lines from both ends # Remove blank lines from both ends
@ -146,6 +157,13 @@ class Instruction(parser.InstDef):
# The code block is responsible for DECREF()ing them. # The code block is responsible for DECREF()ing them.
# NOTE: If the label doesn't exist, just add it to ceval.c. # NOTE: If the label doesn't exist, just add it to ceval.c.
ninputs = len(self.input_effects) ninputs = len(self.input_effects)
# Don't pop common input/output effects at the bottom!
# These aren't DECREF'ed so they can stay.
for ieff, oeff in zip(self.input_effects, self.output_effects):
if ieff.name == oeff.name:
ninputs -= 1
else:
break
if ninputs: if ninputs:
f.write(f"{space}if ({cond}) goto pop_{ninputs}_{label};\n") f.write(f"{space}if ({cond}) goto pop_{ninputs}_{label};\n")
else: else:
@ -154,6 +172,84 @@ class Instruction(parser.InstDef):
f.write(line) f.write(line)
@dataclasses.dataclass
class SuperComponent:
instr: Instruction
input_mapping: dict[str, parser.StackEffect]
output_mapping: dict[str, parser.StackEffect]
class SuperInstruction(parser.Super):
stack: list[str]
initial_sp: int
final_sp: int
parts: list[SuperComponent]
def __init__(self, sup: parser.Super):
super().__init__(sup.kind, sup.name, sup.ops)
self.context = sup.context
def analyze(self, a: "Analyzer") -> None:
components = self.check_components(a)
self.stack, self.initial_sp = self.super_macro_analysis(a, components)
sp = self.initial_sp
self.parts = []
for instr in components:
input_mapping = {}
for ieffect in reversed(instr.input_effects):
sp -= 1
if ieffect.name != UNUSED:
input_mapping[self.stack[sp]] = ieffect
output_mapping = {}
for oeffect in instr.output_effects:
if oeffect.name != UNUSED:
output_mapping[self.stack[sp]] = oeffect
sp += 1
self.parts.append(SuperComponent(instr, input_mapping, output_mapping))
self.final_sp = sp
def check_components(self, a: "Analyzer") -> list[Instruction]:
components: list[Instruction] = []
if not self.ops:
a.error(f"{self.kind.capitalize()}-instruction has no operands", self)
for name in self.ops:
if name not in a.instrs:
a.error(f"Unknown instruction {name!r}", self)
else:
instr = a.instrs[name]
if self.kind == "super" and instr.kind != "inst":
a.error(f"Super-instruction operand {instr.name} must be inst, not op", instr)
components.append(instr)
return components
def super_macro_analysis(
self, a: "Analyzer", components: list[Instruction]
) -> tuple[list[str], int]:
"""Analyze a super-instruction or macro.
Print an error if there's a cache effect (which we don't support yet).
Return the list of variable names and the initial stack pointer.
"""
lowest = current = highest = 0
for instr in components:
if instr.cache_effects:
a.error(
f"Super-instruction {self.name!r} has cache effects in {instr.name!r}",
instr,
)
current -= len(instr.input_effects)
lowest = min(lowest, current)
current += len(instr.output_effects)
highest = max(highest, current)
# At this point, 'current' is the net stack effect,
# and 'lowest' and 'highest' are the extremes.
# Note that 'lowest' may be negative.
stack = [f"_tmp_{i+1}" for i in range(highest - lowest)]
return stack, -lowest
class Analyzer: class Analyzer:
"""Parse input, analyze it, and write to output.""" """Parse input, analyze it, and write to output."""
@ -161,14 +257,26 @@ class Analyzer:
src: str src: str
errors: int = 0 errors: int = 0
def error(self, msg: str, node: parser.Node) -> None:
lineno = 0
if context := node.context:
# Use line number of first non-comment in the node
for token in context.owner.tokens[context.begin : context.end]:
lineno = token.line
if token.kind != "COMMENT":
break
print(f"{self.filename}:{lineno}: {msg}", file=sys.stderr)
self.errors += 1
def __init__(self, filename: str): def __init__(self, filename: str):
"""Read the input file.""" """Read the input file."""
self.filename = filename self.filename = filename
with open(filename) as f: with open(filename) as f:
self.src = f.read() self.src = f.read()
instrs: dict[str, Instruction] instrs: dict[str, Instruction] # Includes ops
supers: dict[str, parser.Super] supers: dict[str, parser.Super] # Includes macros
super_instrs: dict[str, SuperInstruction]
families: dict[str, parser.Family] families: dict[str, parser.Family]
def parse(self) -> None: def parse(self) -> None:
@ -180,7 +288,9 @@ class Analyzer:
if tkn.text == BEGIN_MARKER: if tkn.text == BEGIN_MARKER:
break break
else: else:
raise psr.make_syntax_error(f"Couldn't find {BEGIN_MARKER!r} in {psr.filename}") raise psr.make_syntax_error(
f"Couldn't find {BEGIN_MARKER!r} in {psr.filename}"
)
# Parse until end marker # Parse until end marker
self.instrs = {} self.instrs = {}
@ -198,7 +308,7 @@ class Analyzer:
print( print(
f"Read {len(self.instrs)} instructions, " f"Read {len(self.instrs)} instructions, "
f"{len(self.supers)} supers, " f"{len(self.supers)} supers/macros, "
f"and {len(self.families)} families from {self.filename}", f"and {len(self.families)} families from {self.filename}",
file=sys.stderr, file=sys.stderr,
) )
@ -211,6 +321,7 @@ class Analyzer:
self.find_predictions() self.find_predictions()
self.map_families() self.map_families()
self.check_families() self.check_families()
self.analyze_supers()
def find_predictions(self) -> None: def find_predictions(self) -> None:
"""Find the instructions that need PREDICTED() labels.""" """Find the instructions that need PREDICTED() labels."""
@ -219,11 +330,10 @@ class Analyzer:
if target_instr := self.instrs.get(target): if target_instr := self.instrs.get(target):
target_instr.predicted = True target_instr.predicted = True
else: else:
print( self.error(
f"Unknown instruction {target!r} predicted in {instr.name!r}", f"Unknown instruction {target!r} predicted in {instr.name!r}",
file=sys.stderr, instr, # TODO: Use better location
) )
self.errors += 1
def map_families(self) -> None: def map_families(self) -> None:
"""Make instruction names back to their family, if they have one.""" """Make instruction names back to their family, if they have one."""
@ -232,11 +342,10 @@ class Analyzer:
if member_instr := self.instrs.get(member): if member_instr := self.instrs.get(member):
member_instr.family = family member_instr.family = family
else: else:
print( self.error(
f"Unknown instruction {member!r} referenced in family {family.name!r}", f"Unknown instruction {member!r} referenced in family {family.name!r}",
file=sys.stderr, family,
) )
self.errors += 1
def check_families(self) -> None: def check_families(self) -> None:
"""Check each family: """Check each family:
@ -247,13 +356,11 @@ class Analyzer:
""" """
for family in self.families.values(): for family in self.families.values():
if len(family.members) < 2: if len(family.members) < 2:
print(f"Family {family.name!r} has insufficient members") self.error(f"Family {family.name!r} has insufficient members", family)
self.errors += 1
members = [member for member in family.members if member in self.instrs] members = [member for member in family.members if member in self.instrs]
if members != family.members: if members != family.members:
unknown = set(family.members) - set(members) unknown = set(family.members) - set(members)
print(f"Family {family.name!r} has unknown members: {unknown}") self.error(f"Family {family.name!r} has unknown members: {unknown}", family)
self.errors += 1
if len(members) < 2: if len(members) < 2:
continue continue
head = self.instrs[members[0]] head = self.instrs[members[0]]
@ -266,18 +373,21 @@ class Analyzer:
i = len(instr.input_effects) i = len(instr.input_effects)
o = len(instr.output_effects) o = len(instr.output_effects)
if (c, i, o) != (cache, input, output): if (c, i, o) != (cache, input, output):
self.errors += 1 self.error(
print(
f"Family {family.name!r} has inconsistent " f"Family {family.name!r} has inconsistent "
f"(cache, inputs, outputs) effects:", f"(cache, inputs, outputs) effects:\n"
file=sys.stderr,
)
print(
f" {family.members[0]} = {(cache, input, output)}; " f" {family.members[0]} = {(cache, input, output)}; "
f"{member} = {(c, i, o)}", f"{member} = {(c, i, o)}",
file=sys.stderr, family,
) )
self.errors += 1
def analyze_supers(self) -> None:
"""Analyze each super instruction."""
self.super_instrs = {}
for name, sup in self.supers.items():
dup = SuperInstruction(sup)
dup.analyze(self)
self.super_instrs[name] = dup
def write_instructions(self, filename: str) -> None: def write_instructions(self, filename: str) -> None:
"""Write instructions to output file.""" """Write instructions to output file."""
@ -289,7 +399,11 @@ class Analyzer:
f.write(f"// Do not edit!\n") f.write(f"// Do not edit!\n")
# Write regular instructions # Write regular instructions
n_instrs = 0
for name, instr in self.instrs.items(): for name, instr in self.instrs.items():
if instr.kind != "inst":
continue # ops are not real instructions
n_instrs += 1
f.write(f"\n{indent}TARGET({name}) {{\n") f.write(f"\n{indent}TARGET({name}) {{\n")
if instr.predicted: if instr.predicted:
f.write(f"{indent} PREDICTED({name});\n") f.write(f"{indent} PREDICTED({name});\n")
@ -298,26 +412,75 @@ class Analyzer:
f.write(f"{indent} DISPATCH();\n") f.write(f"{indent} DISPATCH();\n")
f.write(f"{indent}}}\n") f.write(f"{indent}}}\n")
# Write super-instructions # Write super-instructions and macros
for name, sup in self.supers.items(): n_supers = 0
components = [self.instrs[name] for name in sup.ops] n_macros = 0
f.write(f"\n{indent}TARGET({sup.name}) {{\n") for sup in self.super_instrs.values():
for i, instr in enumerate(components): if sup.kind == "super":
if i > 0: n_supers += 1
f.write(f"{indent} NEXTOPARG();\n") elif sup.kind == "macro":
f.write(f"{indent} next_instr++;\n") n_macros += 1
f.write(f"{indent} {{\n") self.write_super_macro(f, sup, indent)
instr.write(f, indent, dedent=-4)
f.write(f" {indent}}}\n")
f.write(f"{indent} DISPATCH();\n")
f.write(f"{indent}}}\n")
print( print(
f"Wrote {len(self.instrs)} instructions and " f"Wrote {n_instrs} instructions, {n_supers} supers, "
f"{len(self.supers)} super-instructions to {filename}", f"and {n_macros} macros to {filename}",
file=sys.stderr, file=sys.stderr,
) )
def write_super_macro(
self, f: typing.TextIO, sup: SuperInstruction, indent: str = ""
) -> None:
# TODO: Make write() and block() methods of some Formatter class
def write(arg: str) -> None:
if arg:
f.write(f"{indent}{arg}\n")
else:
f.write("\n")
@contextlib.contextmanager
def block(head: str):
if head:
write(head + " {")
else:
write("{")
nonlocal indent
indent += " "
yield
indent = indent[:-4]
write("}")
write("")
with block(f"TARGET({sup.name})"):
for i, var in enumerate(sup.stack):
if i < sup.initial_sp:
write(f"PyObject *{var} = PEEK({sup.initial_sp - i});")
else:
write(f"PyObject *{var};")
for i, comp in enumerate(sup.parts):
if i > 0 and sup.kind == "super":
write("NEXTOPARG();")
write("next_instr++;")
with block(""):
for var, ieffect in comp.input_mapping.items():
write(f"PyObject *{ieffect.name} = {var};")
for oeffect in comp.output_mapping.values():
write(f"PyObject *{oeffect.name};")
comp.instr.write_body(f, indent, dedent=-4)
for var, oeffect in comp.output_mapping.items():
write(f"{var} = {oeffect.name};")
if sup.final_sp > sup.initial_sp:
write(f"STACK_GROW({sup.final_sp - sup.initial_sp});")
elif sup.final_sp < sup.initial_sp:
write(f"STACK_SHRINK({sup.initial_sp - sup.final_sp});")
for i, var in enumerate(reversed(sup.stack[:sup.final_sp]), 1):
write(f"POKE({i}, {var});")
write("DISPATCH();")
def always_exits(block: parser.Block) -> bool: def always_exits(block: parser.Block) -> bool:
"""Determine whether a block always ends in a return/goto/etc.""" """Determine whether a block always ends in a return/goto/etc."""

View file

@ -112,7 +112,8 @@ comment_re = r'//.*|/\*([^*]|\*[^/])*\*/'
COMMENT = 'COMMENT' COMMENT = 'COMMENT'
newline = r"\n" newline = r"\n"
matcher = re.compile(choice(id_re, number_re, str_re, char, newline, macro, comment_re, *operators.values())) invalid = r"\S" # A single non-space character that's not caught by any of the other patterns
matcher = re.compile(choice(id_re, number_re, str_re, char, newline, macro, comment_re, *operators.values(), invalid))
letter = re.compile(r'[a-zA-Z_]') letter = re.compile(r'[a-zA-Z_]')
kwds = ( kwds = (
@ -177,7 +178,6 @@ class Token:
def tokenize(src, line=1, filename=None): def tokenize(src, line=1, filename=None):
linestart = -1 linestart = -1
# TODO: finditer() skips over unrecognized characters, e.g. '@'
for m in matcher.finditer(src): for m in matcher.finditer(src):
start, end = m.span() start, end = m.span()
text = m.group(0) text = m.group(0)

View file

@ -1,7 +1,7 @@
"""Parser for bytecodes.inst.""" """Parser for bytecodes.inst."""
from dataclasses import dataclass, field from dataclasses import dataclass, field
from typing import NamedTuple, Callable, TypeVar from typing import NamedTuple, Callable, TypeVar, Literal
import lexer as lx import lexer as lx
from plexer import PLexer from plexer import PLexer
@ -74,6 +74,7 @@ OutputEffect = StackEffect
@dataclass @dataclass
class InstHeader(Node): class InstHeader(Node):
kind: Literal["inst", "op"]
name: str name: str
inputs: list[InputEffect] inputs: list[InputEffect]
outputs: list[OutputEffect] outputs: list[OutputEffect]
@ -81,9 +82,14 @@ class InstHeader(Node):
@dataclass @dataclass
class InstDef(Node): class InstDef(Node):
# TODO: Merge InstHeader and InstDef
header: InstHeader header: InstHeader
block: Block block: Block
@property
def kind(self) -> str:
return self.header.kind
@property @property
def name(self) -> str: def name(self) -> str:
return self.header.name return self.header.name
@ -93,12 +99,13 @@ class InstDef(Node):
return self.header.inputs return self.header.inputs
@property @property
def outputs(self) -> list[StackEffect]: def outputs(self) -> list[OutputEffect]:
return self.header.outputs return self.header.outputs
@dataclass @dataclass
class Super(Node): class Super(Node):
kind: Literal["macro", "super"]
name: str name: str
ops: list[str] ops: list[str]
@ -122,10 +129,12 @@ class Parser(PLexer):
@contextual @contextual
def inst_header(self) -> InstHeader | None: def inst_header(self) -> InstHeader | None:
# inst(NAME) | inst(NAME, (inputs -- outputs)) # inst(NAME)
# | inst(NAME, (inputs -- outputs))
# | op(NAME, (inputs -- outputs))
# TODO: Error out when there is something unexpected. # TODO: Error out when there is something unexpected.
# TODO: Make INST a keyword in the lexer. # TODO: Make INST a keyword in the lexer.
if (tkn := self.expect(lx.IDENTIFIER)) and tkn.text == "inst": if (tkn := self.expect(lx.IDENTIFIER)) and (kind := tkn.text) in ("inst", "op"):
if (self.expect(lx.LPAREN) if (self.expect(lx.LPAREN)
and (tkn := self.expect(lx.IDENTIFIER))): and (tkn := self.expect(lx.IDENTIFIER))):
name = tkn.text name = tkn.text
@ -134,9 +143,10 @@ class Parser(PLexer):
if self.expect(lx.RPAREN): if self.expect(lx.RPAREN):
if ((tkn := self.peek()) if ((tkn := self.peek())
and tkn.kind == lx.LBRACE): and tkn.kind == lx.LBRACE):
return InstHeader(name, inp, outp) return InstHeader(kind, name, inp, outp)
elif self.expect(lx.RPAREN): elif self.expect(lx.RPAREN) and kind == "inst":
return InstHeader(name, [], []) # No legacy stack effect if kind is "op".
return InstHeader(kind, name, [], [])
return None return None
def stack_effect(self) -> tuple[list[InputEffect], list[OutputEffect]]: def stack_effect(self) -> tuple[list[InputEffect], list[OutputEffect]]:
@ -200,13 +210,13 @@ class Parser(PLexer):
@contextual @contextual
def super_def(self) -> Super | None: def super_def(self) -> Super | None:
if (tkn := self.expect(lx.IDENTIFIER)) and tkn.text == "super": if (tkn := self.expect(lx.IDENTIFIER)) and (kind := tkn.text) in ("super", "macro"):
if self.expect(lx.LPAREN): if self.expect(lx.LPAREN):
if (tkn := self.expect(lx.IDENTIFIER)): if (tkn := self.expect(lx.IDENTIFIER)):
if self.expect(lx.RPAREN): if self.expect(lx.RPAREN):
if self.expect(lx.EQUALS): if self.expect(lx.EQUALS):
if ops := self.ops(): if ops := self.ops():
res = Super(tkn.text, ops) res = Super(kind, tkn.text, ops)
return res return res
def ops(self) -> list[str] | None: def ops(self) -> list[str] | None:
@ -278,7 +288,7 @@ if __name__ == "__main__":
filename = sys.argv[1] filename = sys.argv[1]
if filename == "-c" and sys.argv[2:]: if filename == "-c" and sys.argv[2:]:
src = sys.argv[2] src = sys.argv[2]
filename = None filename = "<string>"
else: else:
with open(filename) as f: with open(filename) as f:
src = f.read() src = f.read()
@ -287,7 +297,7 @@ if __name__ == "__main__":
end = srclines.index("// END BYTECODES //") end = srclines.index("// END BYTECODES //")
src = "\n".join(srclines[begin+1 : end]) src = "\n".join(srclines[begin+1 : end])
else: else:
filename = None filename = "<default>"
src = "if (x) { x.foo; // comment\n}" src = "if (x) { x.foo; // comment\n}"
parser = Parser(src, filename) parser = Parser(src, filename)
x = parser.inst_def() or parser.super_def() or parser.family_def() x = parser.inst_def() or parser.super_def() or parser.family_def()