GH-98831: Update generate_cases.py: register inst, opcode_metadata.h (#100735)

(These aren't used yet, but may be coming soon,
and it's easier to keep this tool the same between branches.)

Added a sanity check for all this to compile.c.

Co-authored-by: Irit Katriel <iritkatriel@yahoo.com>
This commit is contained in:
Guido van Rossum 2023-01-05 13:01:07 -08:00 committed by GitHub
parent 28187141cc
commit 14b7f00fdf
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 456 additions and 37 deletions

View file

@ -21,6 +21,9 @@ DEFAULT_INPUT = os.path.relpath(
DEFAULT_OUTPUT = os.path.relpath(
os.path.join(os.path.dirname(__file__), "../../Python/generated_cases.c.h")
)
DEFAULT_METADATA_OUTPUT = os.path.relpath(
os.path.join(os.path.dirname(__file__), "../../Python/opcode_metadata.h")
)
BEGIN_MARKER = "// BEGIN BYTECODES //"
END_MARKER = "// END BYTECODES //"
RE_PREDICTED = r"^\s*(?:PREDICT\(|GO_TO_INSTRUCTION\(|DEOPT_IF\(.*?,\s*)(\w+)\);\s*$"
@ -37,6 +40,12 @@ arg_parser.add_argument(
arg_parser.add_argument(
"-o", "--output", type=str, help="Generated code", default=DEFAULT_OUTPUT
)
arg_parser.add_argument(
"-m",
"--metadata",
action="store_true",
help=f"Generate metadata instead, changes output default to {DEFAULT_METADATA_OUTPUT}",
)
class Formatter:
@ -96,6 +105,8 @@ class Formatter:
cast = self.cast(dst, src)
if m := re.match(r"^PEEK\((\d+)\)$", dst.name):
self.emit(f"POKE({m.group(1)}, {cast}{src.name});")
elif m := re.match(r"^REG\(oparg(\d+)\)$", dst.name):
self.emit(f"Py_XSETREF({dst.name}, {cast}{src.name});")
else:
self.emit(f"{dst.name} = {cast}{src.name};")
@ -109,7 +120,8 @@ class Instruction:
# Parts of the underlying instruction definition
inst: parser.InstDef
kind: typing.Literal["inst", "op"]
register: bool
kind: typing.Literal["inst", "op", "legacy"] # Legacy means no (input -- output)
name: str
block: parser.Block
block_text: list[str] # Block.text, less curlies, less PREDICT() calls
@ -121,6 +133,9 @@ class Instruction:
cache_effects: list[parser.CacheEffect]
input_effects: list[StackEffect]
output_effects: list[StackEffect]
# Parallel to input_effects
input_registers: list[str] = dataclasses.field(repr=False)
output_registers: list[str] = dataclasses.field(repr=False)
# Set later
family: parser.Family | None = None
@ -129,6 +144,7 @@ class Instruction:
def __init__(self, inst: parser.InstDef):
self.inst = inst
self.register = inst.register
self.kind = inst.kind
self.name = inst.name
self.block = inst.block
@ -150,9 +166,24 @@ class Instruction:
break
self.unmoved_names = frozenset(unmoved_names)
def analyze_registers(self, a: "Analyzer") -> None:
regs = iter(("REG(oparg1)", "REG(oparg2)", "REG(oparg3)"))
try:
self.input_registers = [
next(regs) for ieff in self.input_effects if ieff.name != UNUSED
]
self.output_registers = [
next(regs) for oeff in self.output_effects if oeff.name != UNUSED
]
except StopIteration: # Running out of registers
a.error(
f"Instruction {self.name} has too many register effects", node=self.inst
)
def write(self, out: Formatter) -> None:
"""Write one instruction, sans prologue and epilogue."""
# Write a static assertion that a family's cache size is correct
if family := self.family:
if self.name == family.members[0]:
if cache_size := family.size:
@ -161,10 +192,16 @@ class Instruction:
f'{self.cache_offset}, "incorrect cache size");'
)
# Write input stack effect variable declarations and initializations
for i, ieffect in enumerate(reversed(self.input_effects), 1):
src = StackEffect(f"PEEK({i})", "")
out.declare(ieffect, src)
if not self.register:
# Write input stack effect variable declarations and initializations
for i, ieffect in enumerate(reversed(self.input_effects), 1):
src = StackEffect(f"PEEK({i})", "")
out.declare(ieffect, src)
else:
# Write input register variable declarations and initializations
for ieffect, reg in zip(self.input_effects, self.input_registers):
src = StackEffect(reg, "")
out.declare(ieffect, src)
# Write output stack effect variable declarations
input_names = {ieffect.name for ieffect in self.input_effects}
@ -172,20 +209,28 @@ class Instruction:
if oeffect.name not in input_names:
out.declare(oeffect, None)
# out.emit(f"JUMPBY(OPSIZE({self.inst.name}) - 1);")
self.write_body(out, 0)
# Skip the rest if the block always exits
if self.always_exits:
return
# Write net stack growth/shrinkage
diff = len(self.output_effects) - len(self.input_effects)
out.stack_adjust(diff)
if not self.register:
# Write net stack growth/shrinkage
diff = len(self.output_effects) - len(self.input_effects)
out.stack_adjust(diff)
# Write output stack effect assignments
for i, oeffect in enumerate(reversed(self.output_effects), 1):
if oeffect.name not in self.unmoved_names:
dst = StackEffect(f"PEEK({i})", "")
# Write output stack effect assignments
for i, oeffect in enumerate(reversed(self.output_effects), 1):
if oeffect.name not in self.unmoved_names:
dst = StackEffect(f"PEEK({i})", "")
out.assign(dst, oeffect)
else:
# Write output register assignments
for oeffect, reg in zip(self.output_effects, self.output_registers):
dst = StackEffect(reg, "")
out.assign(dst, oeffect)
# Write cache effect
@ -209,7 +254,9 @@ class Instruction:
else:
typ = f"uint{bits}_t "
func = f"read_u{bits}"
out.emit(f"{typ}{ceffect.name} = {func}(&next_instr[{cache_offset}].cache);")
out.emit(
f"{typ}{ceffect.name} = {func}(&next_instr[{cache_offset}].cache);"
)
cache_offset += ceffect.size
assert cache_offset == self.cache_offset + cache_adjust
@ -222,14 +269,17 @@ class Instruction:
# ERROR_IF() must pop the inputs from the stack.
# The code block is responsible for DECREF()ing them.
# NOTE: If the label doesn't exist, just add it to ceval.c.
ninputs = len(self.input_effects)
# Don't pop common input/output effects at the bottom!
# These aren't DECREF'ed so they can stay.
for ieff, oeff in zip(self.input_effects, self.output_effects):
if ieff.name == oeff.name:
ninputs -= 1
else:
break
if not self.register:
ninputs = len(self.input_effects)
# Don't pop common input/output effects at the bottom!
# These aren't DECREF'ed so they can stay.
for ieff, oeff in zip(self.input_effects, self.output_effects):
if ieff.name == oeff.name:
ninputs -= 1
else:
break
else:
ninputs = 0
if ninputs:
out.write_raw(
f"{extra}{space}if ({cond}) goto pop_{ninputs}_{label};\n"
@ -237,10 +287,11 @@ class Instruction:
else:
out.write_raw(f"{extra}{space}if ({cond}) goto {label};\n")
elif m := re.match(r"(\s*)DECREF_INPUTS\(\);\s*$", line):
space = m.group(1)
for ieff in self.input_effects:
if ieff.name not in self.unmoved_names:
out.write_raw(f"{extra}{space}Py_DECREF({ieff.name});\n")
if not self.register:
space = m.group(1)
for ieff in self.input_effects:
if ieff.name not in self.unmoved_names:
out.write_raw(f"{extra}{space}Py_DECREF({ieff.name});\n")
else:
out.write_raw(extra + line)
@ -392,6 +443,7 @@ class Analyzer:
self.find_predictions()
self.map_families()
self.check_families()
self.analyze_register_instrs()
self.analyze_supers_and_macros()
def find_predictions(self) -> None:
@ -458,6 +510,11 @@ class Analyzer:
family,
)
def analyze_register_instrs(self) -> None:
for instr in self.instrs.values():
if instr.register:
instr.analyze_registers(self)
def analyze_supers_and_macros(self) -> None:
"""Analyze each super- and macro instruction."""
self.super_instrs = {}
@ -563,6 +620,129 @@ class Analyzer:
]
return stack, -lowest
def write_metadata(self) -> None:
"""Write instruction metadata to output file."""
with open(self.output_filename, "w") as f:
# Write provenance header
f.write(
f"// This file is generated by {os.path.relpath(__file__)} --metadata\n"
)
f.write(f"// from {os.path.relpath(self.filename)}\n")
f.write(f"// Do not edit!\n")
# Create formatter; the rest of the code uses this
self.out = Formatter(f, 0)
# Write variable definition
self.out.emit("enum Direction { DIR_NONE, DIR_READ, DIR_WRITE };")
self.out.emit("static const struct {")
with self.out.indent():
self.out.emit("short n_popped;")
self.out.emit("short n_pushed;")
self.out.emit("enum Direction dir_op1;")
self.out.emit("enum Direction dir_op2;")
self.out.emit("enum Direction dir_op3;")
self.out.emit("bool valid_entry;")
self.out.emit("char instr_format[10];")
self.out.emit("} _PyOpcode_opcode_metadata[256] = {")
# Write metadata for each instruction
for thing in self.everything:
match thing:
case parser.InstDef():
if thing.kind != "op":
self.write_metadata_for_inst(self.instrs[thing.name])
case parser.Super():
self.write_metadata_for_super(self.super_instrs[thing.name])
case parser.Macro():
self.write_metadata_for_macro(self.macro_instrs[thing.name])
case _:
typing.assert_never(thing)
# Write end of array
self.out.emit("};")
def get_format(self, thing: Instruction | SuperInstruction | MacroInstruction) -> str:
"""Get the format string for a single instruction."""
def instr_format(instr: Instruction) -> str:
if instr.register:
fmt = "IBBB"
else:
fmt = "IB"
cache = "C"
for ce in instr.cache_effects:
for _ in range(ce.size):
fmt += cache
cache = "0"
return fmt
match thing:
case Instruction():
format = instr_format(thing)
case SuperInstruction():
format = ""
for part in thing.parts:
format += instr_format(part.instr)
case MacroInstruction():
# Macros don't support register instructions yet
format = "IB"
cache = "C"
for part in thing.parts:
if isinstance(part, parser.CacheEffect):
for _ in range(part.size):
format += cache
cache = "0"
else:
assert isinstance(part, Component)
for ce in part.instr.cache_effects:
for _ in range(ce.size):
format += cache
cache = "0"
case _:
typing.assert_never(thing)
assert len(format) < 10 # Else update the size of instr_format above
return format
def write_metadata_for_inst(self, instr: Instruction) -> None:
"""Write metadata for a single instruction."""
dir_op1 = dir_op2 = dir_op3 = "DIR_NONE"
if instr.kind == "legacy":
n_popped = n_pushed = -1
assert not instr.register
else:
n_popped = len(instr.input_effects)
n_pushed = len(instr.output_effects)
if instr.register:
directions: list[str] = []
directions.extend("DIR_READ" for _ in instr.input_effects)
directions.extend("DIR_WRITE" for _ in instr.output_effects)
directions.extend("DIR_NONE" for _ in range(3))
dir_op1, dir_op2, dir_op3 = directions[:3]
format = self.get_format(instr)
self.out.emit(
f' [{instr.name}] = {{ {n_popped}, {n_pushed}, {dir_op1}, {dir_op2}, {dir_op3}, true, "{format}" }},'
)
def write_metadata_for_super(self, sup: SuperInstruction) -> None:
"""Write metadata for a super-instruction."""
n_popped = sum(len(comp.instr.input_effects) for comp in sup.parts)
n_pushed = sum(len(comp.instr.output_effects) for comp in sup.parts)
dir_op1 = dir_op2 = dir_op3 = "DIR_NONE"
format = self.get_format(sup)
self.out.emit(
f' [{sup.name}] = {{ {n_popped}, {n_pushed}, {dir_op1}, {dir_op2}, {dir_op3}, true, "{format}" }},'
)
def write_metadata_for_macro(self, mac: MacroInstruction) -> None:
"""Write metadata for a macro-instruction."""
parts = [comp for comp in mac.parts if isinstance(comp, Component)]
n_popped = sum(len(comp.instr.input_effects) for comp in parts)
n_pushed = sum(len(comp.instr.output_effects) for comp in parts)
dir_op1 = dir_op2 = dir_op3 = "DIR_NONE"
format = self.get_format(mac)
self.out.emit(
f' [{mac.name}] = {{ {n_popped}, {n_pushed}, {dir_op1}, {dir_op2}, {dir_op3}, true, "{format}" }},'
)
def write_instructions(self) -> None:
"""Write instructions to output file."""
with open(self.output_filename, "w") as f:
@ -571,7 +751,7 @@ class Analyzer:
f.write(f"// from {os.path.relpath(self.filename)}\n")
f.write(f"// Do not edit!\n")
# Create formatter; the rest of the code uses this.
# Create formatter; the rest of the code uses this
self.out = Formatter(f, 8)
# Write and count instructions of all kinds
@ -581,7 +761,7 @@ class Analyzer:
for thing in self.everything:
match thing:
case parser.InstDef():
if thing.kind == "inst":
if thing.kind != "op":
n_instrs += 1
self.write_instr(self.instrs[thing.name])
case parser.Super():
@ -616,9 +796,13 @@ class Analyzer:
with self.wrap_super_or_macro(sup):
first = True
for comp in sup.parts:
if not first:
if first:
pass
# self.out.emit("JUMPBY(OPSIZE(opcode) - 1);")
else:
self.out.emit("NEXTOPARG();")
self.out.emit("JUMPBY(1);")
# self.out.emit("JUMPBY(OPSIZE(opcode));")
first = False
comp.write_body(self.out, 0)
if comp.instr.cache_offset:
@ -711,12 +895,18 @@ def always_exits(lines: list[str]) -> bool:
def main():
"""Parse command line, parse input, analyze, write output."""
args = arg_parser.parse_args() # Prints message and sys.exit(2) on error
if args.metadata:
if args.output == DEFAULT_OUTPUT:
args.output = DEFAULT_METADATA_OUTPUT
a = Analyzer(args.input, args.output) # Raises OSError if input unreadable
a.parse() # Raises SyntaxError on failure
a.analyze() # Prints messages and sets a.errors on failure
if a.errors:
sys.exit(f"Found {a.errors} errors")
a.write_instructions() # Raises OSError if output can't be written
if args.metadata:
a.write_metadata()
else:
a.write_instructions() # Raises OSError if output can't be written
if __name__ == "__main__":