mirror of
https://github.com/python/cpython.git
synced 2025-10-09 16:34:44 +00:00
GH-98831: Update generate_cases.py: register inst, opcode_metadata.h (#100735)
(These aren't used yet, but may be coming soon, and it's easier to keep this tool the same between branches.) Added a sanity check for all this to compile.c. Co-authored-by: Irit Katriel <iritkatriel@yahoo.com>
This commit is contained in:
parent
28187141cc
commit
14b7f00fdf
6 changed files with 456 additions and 37 deletions
|
@ -21,6 +21,9 @@ DEFAULT_INPUT = os.path.relpath(
|
|||
DEFAULT_OUTPUT = os.path.relpath(
|
||||
os.path.join(os.path.dirname(__file__), "../../Python/generated_cases.c.h")
|
||||
)
|
||||
DEFAULT_METADATA_OUTPUT = os.path.relpath(
|
||||
os.path.join(os.path.dirname(__file__), "../../Python/opcode_metadata.h")
|
||||
)
|
||||
BEGIN_MARKER = "// BEGIN BYTECODES //"
|
||||
END_MARKER = "// END BYTECODES //"
|
||||
RE_PREDICTED = r"^\s*(?:PREDICT\(|GO_TO_INSTRUCTION\(|DEOPT_IF\(.*?,\s*)(\w+)\);\s*$"
|
||||
|
@ -37,6 +40,12 @@ arg_parser.add_argument(
|
|||
arg_parser.add_argument(
|
||||
"-o", "--output", type=str, help="Generated code", default=DEFAULT_OUTPUT
|
||||
)
|
||||
arg_parser.add_argument(
|
||||
"-m",
|
||||
"--metadata",
|
||||
action="store_true",
|
||||
help=f"Generate metadata instead, changes output default to {DEFAULT_METADATA_OUTPUT}",
|
||||
)
|
||||
|
||||
|
||||
class Formatter:
|
||||
|
@ -96,6 +105,8 @@ class Formatter:
|
|||
cast = self.cast(dst, src)
|
||||
if m := re.match(r"^PEEK\((\d+)\)$", dst.name):
|
||||
self.emit(f"POKE({m.group(1)}, {cast}{src.name});")
|
||||
elif m := re.match(r"^REG\(oparg(\d+)\)$", dst.name):
|
||||
self.emit(f"Py_XSETREF({dst.name}, {cast}{src.name});")
|
||||
else:
|
||||
self.emit(f"{dst.name} = {cast}{src.name};")
|
||||
|
||||
|
@ -109,7 +120,8 @@ class Instruction:
|
|||
|
||||
# Parts of the underlying instruction definition
|
||||
inst: parser.InstDef
|
||||
kind: typing.Literal["inst", "op"]
|
||||
register: bool
|
||||
kind: typing.Literal["inst", "op", "legacy"] # Legacy means no (input -- output)
|
||||
name: str
|
||||
block: parser.Block
|
||||
block_text: list[str] # Block.text, less curlies, less PREDICT() calls
|
||||
|
@ -121,6 +133,9 @@ class Instruction:
|
|||
cache_effects: list[parser.CacheEffect]
|
||||
input_effects: list[StackEffect]
|
||||
output_effects: list[StackEffect]
|
||||
# Parallel to input_effects
|
||||
input_registers: list[str] = dataclasses.field(repr=False)
|
||||
output_registers: list[str] = dataclasses.field(repr=False)
|
||||
|
||||
# Set later
|
||||
family: parser.Family | None = None
|
||||
|
@ -129,6 +144,7 @@ class Instruction:
|
|||
|
||||
def __init__(self, inst: parser.InstDef):
|
||||
self.inst = inst
|
||||
self.register = inst.register
|
||||
self.kind = inst.kind
|
||||
self.name = inst.name
|
||||
self.block = inst.block
|
||||
|
@ -150,9 +166,24 @@ class Instruction:
|
|||
break
|
||||
self.unmoved_names = frozenset(unmoved_names)
|
||||
|
||||
def analyze_registers(self, a: "Analyzer") -> None:
|
||||
regs = iter(("REG(oparg1)", "REG(oparg2)", "REG(oparg3)"))
|
||||
try:
|
||||
self.input_registers = [
|
||||
next(regs) for ieff in self.input_effects if ieff.name != UNUSED
|
||||
]
|
||||
self.output_registers = [
|
||||
next(regs) for oeff in self.output_effects if oeff.name != UNUSED
|
||||
]
|
||||
except StopIteration: # Running out of registers
|
||||
a.error(
|
||||
f"Instruction {self.name} has too many register effects", node=self.inst
|
||||
)
|
||||
|
||||
def write(self, out: Formatter) -> None:
|
||||
"""Write one instruction, sans prologue and epilogue."""
|
||||
# Write a static assertion that a family's cache size is correct
|
||||
|
||||
if family := self.family:
|
||||
if self.name == family.members[0]:
|
||||
if cache_size := family.size:
|
||||
|
@ -161,10 +192,16 @@ class Instruction:
|
|||
f'{self.cache_offset}, "incorrect cache size");'
|
||||
)
|
||||
|
||||
# Write input stack effect variable declarations and initializations
|
||||
for i, ieffect in enumerate(reversed(self.input_effects), 1):
|
||||
src = StackEffect(f"PEEK({i})", "")
|
||||
out.declare(ieffect, src)
|
||||
if not self.register:
|
||||
# Write input stack effect variable declarations and initializations
|
||||
for i, ieffect in enumerate(reversed(self.input_effects), 1):
|
||||
src = StackEffect(f"PEEK({i})", "")
|
||||
out.declare(ieffect, src)
|
||||
else:
|
||||
# Write input register variable declarations and initializations
|
||||
for ieffect, reg in zip(self.input_effects, self.input_registers):
|
||||
src = StackEffect(reg, "")
|
||||
out.declare(ieffect, src)
|
||||
|
||||
# Write output stack effect variable declarations
|
||||
input_names = {ieffect.name for ieffect in self.input_effects}
|
||||
|
@ -172,20 +209,28 @@ class Instruction:
|
|||
if oeffect.name not in input_names:
|
||||
out.declare(oeffect, None)
|
||||
|
||||
# out.emit(f"JUMPBY(OPSIZE({self.inst.name}) - 1);")
|
||||
|
||||
self.write_body(out, 0)
|
||||
|
||||
# Skip the rest if the block always exits
|
||||
if self.always_exits:
|
||||
return
|
||||
|
||||
# Write net stack growth/shrinkage
|
||||
diff = len(self.output_effects) - len(self.input_effects)
|
||||
out.stack_adjust(diff)
|
||||
if not self.register:
|
||||
# Write net stack growth/shrinkage
|
||||
diff = len(self.output_effects) - len(self.input_effects)
|
||||
out.stack_adjust(diff)
|
||||
|
||||
# Write output stack effect assignments
|
||||
for i, oeffect in enumerate(reversed(self.output_effects), 1):
|
||||
if oeffect.name not in self.unmoved_names:
|
||||
dst = StackEffect(f"PEEK({i})", "")
|
||||
# Write output stack effect assignments
|
||||
for i, oeffect in enumerate(reversed(self.output_effects), 1):
|
||||
if oeffect.name not in self.unmoved_names:
|
||||
dst = StackEffect(f"PEEK({i})", "")
|
||||
out.assign(dst, oeffect)
|
||||
else:
|
||||
# Write output register assignments
|
||||
for oeffect, reg in zip(self.output_effects, self.output_registers):
|
||||
dst = StackEffect(reg, "")
|
||||
out.assign(dst, oeffect)
|
||||
|
||||
# Write cache effect
|
||||
|
@ -209,7 +254,9 @@ class Instruction:
|
|||
else:
|
||||
typ = f"uint{bits}_t "
|
||||
func = f"read_u{bits}"
|
||||
out.emit(f"{typ}{ceffect.name} = {func}(&next_instr[{cache_offset}].cache);")
|
||||
out.emit(
|
||||
f"{typ}{ceffect.name} = {func}(&next_instr[{cache_offset}].cache);"
|
||||
)
|
||||
cache_offset += ceffect.size
|
||||
assert cache_offset == self.cache_offset + cache_adjust
|
||||
|
||||
|
@ -222,14 +269,17 @@ class Instruction:
|
|||
# ERROR_IF() must pop the inputs from the stack.
|
||||
# The code block is responsible for DECREF()ing them.
|
||||
# NOTE: If the label doesn't exist, just add it to ceval.c.
|
||||
ninputs = len(self.input_effects)
|
||||
# Don't pop common input/output effects at the bottom!
|
||||
# These aren't DECREF'ed so they can stay.
|
||||
for ieff, oeff in zip(self.input_effects, self.output_effects):
|
||||
if ieff.name == oeff.name:
|
||||
ninputs -= 1
|
||||
else:
|
||||
break
|
||||
if not self.register:
|
||||
ninputs = len(self.input_effects)
|
||||
# Don't pop common input/output effects at the bottom!
|
||||
# These aren't DECREF'ed so they can stay.
|
||||
for ieff, oeff in zip(self.input_effects, self.output_effects):
|
||||
if ieff.name == oeff.name:
|
||||
ninputs -= 1
|
||||
else:
|
||||
break
|
||||
else:
|
||||
ninputs = 0
|
||||
if ninputs:
|
||||
out.write_raw(
|
||||
f"{extra}{space}if ({cond}) goto pop_{ninputs}_{label};\n"
|
||||
|
@ -237,10 +287,11 @@ class Instruction:
|
|||
else:
|
||||
out.write_raw(f"{extra}{space}if ({cond}) goto {label};\n")
|
||||
elif m := re.match(r"(\s*)DECREF_INPUTS\(\);\s*$", line):
|
||||
space = m.group(1)
|
||||
for ieff in self.input_effects:
|
||||
if ieff.name not in self.unmoved_names:
|
||||
out.write_raw(f"{extra}{space}Py_DECREF({ieff.name});\n")
|
||||
if not self.register:
|
||||
space = m.group(1)
|
||||
for ieff in self.input_effects:
|
||||
if ieff.name not in self.unmoved_names:
|
||||
out.write_raw(f"{extra}{space}Py_DECREF({ieff.name});\n")
|
||||
else:
|
||||
out.write_raw(extra + line)
|
||||
|
||||
|
@ -392,6 +443,7 @@ class Analyzer:
|
|||
self.find_predictions()
|
||||
self.map_families()
|
||||
self.check_families()
|
||||
self.analyze_register_instrs()
|
||||
self.analyze_supers_and_macros()
|
||||
|
||||
def find_predictions(self) -> None:
|
||||
|
@ -458,6 +510,11 @@ class Analyzer:
|
|||
family,
|
||||
)
|
||||
|
||||
def analyze_register_instrs(self) -> None:
|
||||
for instr in self.instrs.values():
|
||||
if instr.register:
|
||||
instr.analyze_registers(self)
|
||||
|
||||
def analyze_supers_and_macros(self) -> None:
|
||||
"""Analyze each super- and macro instruction."""
|
||||
self.super_instrs = {}
|
||||
|
@ -563,6 +620,129 @@ class Analyzer:
|
|||
]
|
||||
return stack, -lowest
|
||||
|
||||
def write_metadata(self) -> None:
|
||||
"""Write instruction metadata to output file."""
|
||||
with open(self.output_filename, "w") as f:
|
||||
# Write provenance header
|
||||
f.write(
|
||||
f"// This file is generated by {os.path.relpath(__file__)} --metadata\n"
|
||||
)
|
||||
f.write(f"// from {os.path.relpath(self.filename)}\n")
|
||||
f.write(f"// Do not edit!\n")
|
||||
|
||||
# Create formatter; the rest of the code uses this
|
||||
self.out = Formatter(f, 0)
|
||||
|
||||
# Write variable definition
|
||||
self.out.emit("enum Direction { DIR_NONE, DIR_READ, DIR_WRITE };")
|
||||
self.out.emit("static const struct {")
|
||||
with self.out.indent():
|
||||
self.out.emit("short n_popped;")
|
||||
self.out.emit("short n_pushed;")
|
||||
self.out.emit("enum Direction dir_op1;")
|
||||
self.out.emit("enum Direction dir_op2;")
|
||||
self.out.emit("enum Direction dir_op3;")
|
||||
self.out.emit("bool valid_entry;")
|
||||
self.out.emit("char instr_format[10];")
|
||||
self.out.emit("} _PyOpcode_opcode_metadata[256] = {")
|
||||
|
||||
# Write metadata for each instruction
|
||||
for thing in self.everything:
|
||||
match thing:
|
||||
case parser.InstDef():
|
||||
if thing.kind != "op":
|
||||
self.write_metadata_for_inst(self.instrs[thing.name])
|
||||
case parser.Super():
|
||||
self.write_metadata_for_super(self.super_instrs[thing.name])
|
||||
case parser.Macro():
|
||||
self.write_metadata_for_macro(self.macro_instrs[thing.name])
|
||||
case _:
|
||||
typing.assert_never(thing)
|
||||
|
||||
# Write end of array
|
||||
self.out.emit("};")
|
||||
|
||||
def get_format(self, thing: Instruction | SuperInstruction | MacroInstruction) -> str:
|
||||
"""Get the format string for a single instruction."""
|
||||
def instr_format(instr: Instruction) -> str:
|
||||
if instr.register:
|
||||
fmt = "IBBB"
|
||||
else:
|
||||
fmt = "IB"
|
||||
cache = "C"
|
||||
for ce in instr.cache_effects:
|
||||
for _ in range(ce.size):
|
||||
fmt += cache
|
||||
cache = "0"
|
||||
return fmt
|
||||
match thing:
|
||||
case Instruction():
|
||||
format = instr_format(thing)
|
||||
case SuperInstruction():
|
||||
format = ""
|
||||
for part in thing.parts:
|
||||
format += instr_format(part.instr)
|
||||
case MacroInstruction():
|
||||
# Macros don't support register instructions yet
|
||||
format = "IB"
|
||||
cache = "C"
|
||||
for part in thing.parts:
|
||||
if isinstance(part, parser.CacheEffect):
|
||||
for _ in range(part.size):
|
||||
format += cache
|
||||
cache = "0"
|
||||
else:
|
||||
assert isinstance(part, Component)
|
||||
for ce in part.instr.cache_effects:
|
||||
for _ in range(ce.size):
|
||||
format += cache
|
||||
cache = "0"
|
||||
case _:
|
||||
typing.assert_never(thing)
|
||||
assert len(format) < 10 # Else update the size of instr_format above
|
||||
return format
|
||||
|
||||
def write_metadata_for_inst(self, instr: Instruction) -> None:
|
||||
"""Write metadata for a single instruction."""
|
||||
dir_op1 = dir_op2 = dir_op3 = "DIR_NONE"
|
||||
if instr.kind == "legacy":
|
||||
n_popped = n_pushed = -1
|
||||
assert not instr.register
|
||||
else:
|
||||
n_popped = len(instr.input_effects)
|
||||
n_pushed = len(instr.output_effects)
|
||||
if instr.register:
|
||||
directions: list[str] = []
|
||||
directions.extend("DIR_READ" for _ in instr.input_effects)
|
||||
directions.extend("DIR_WRITE" for _ in instr.output_effects)
|
||||
directions.extend("DIR_NONE" for _ in range(3))
|
||||
dir_op1, dir_op2, dir_op3 = directions[:3]
|
||||
format = self.get_format(instr)
|
||||
self.out.emit(
|
||||
f' [{instr.name}] = {{ {n_popped}, {n_pushed}, {dir_op1}, {dir_op2}, {dir_op3}, true, "{format}" }},'
|
||||
)
|
||||
|
||||
def write_metadata_for_super(self, sup: SuperInstruction) -> None:
|
||||
"""Write metadata for a super-instruction."""
|
||||
n_popped = sum(len(comp.instr.input_effects) for comp in sup.parts)
|
||||
n_pushed = sum(len(comp.instr.output_effects) for comp in sup.parts)
|
||||
dir_op1 = dir_op2 = dir_op3 = "DIR_NONE"
|
||||
format = self.get_format(sup)
|
||||
self.out.emit(
|
||||
f' [{sup.name}] = {{ {n_popped}, {n_pushed}, {dir_op1}, {dir_op2}, {dir_op3}, true, "{format}" }},'
|
||||
)
|
||||
|
||||
def write_metadata_for_macro(self, mac: MacroInstruction) -> None:
|
||||
"""Write metadata for a macro-instruction."""
|
||||
parts = [comp for comp in mac.parts if isinstance(comp, Component)]
|
||||
n_popped = sum(len(comp.instr.input_effects) for comp in parts)
|
||||
n_pushed = sum(len(comp.instr.output_effects) for comp in parts)
|
||||
dir_op1 = dir_op2 = dir_op3 = "DIR_NONE"
|
||||
format = self.get_format(mac)
|
||||
self.out.emit(
|
||||
f' [{mac.name}] = {{ {n_popped}, {n_pushed}, {dir_op1}, {dir_op2}, {dir_op3}, true, "{format}" }},'
|
||||
)
|
||||
|
||||
def write_instructions(self) -> None:
|
||||
"""Write instructions to output file."""
|
||||
with open(self.output_filename, "w") as f:
|
||||
|
@ -571,7 +751,7 @@ class Analyzer:
|
|||
f.write(f"// from {os.path.relpath(self.filename)}\n")
|
||||
f.write(f"// Do not edit!\n")
|
||||
|
||||
# Create formatter; the rest of the code uses this.
|
||||
# Create formatter; the rest of the code uses this
|
||||
self.out = Formatter(f, 8)
|
||||
|
||||
# Write and count instructions of all kinds
|
||||
|
@ -581,7 +761,7 @@ class Analyzer:
|
|||
for thing in self.everything:
|
||||
match thing:
|
||||
case parser.InstDef():
|
||||
if thing.kind == "inst":
|
||||
if thing.kind != "op":
|
||||
n_instrs += 1
|
||||
self.write_instr(self.instrs[thing.name])
|
||||
case parser.Super():
|
||||
|
@ -616,9 +796,13 @@ class Analyzer:
|
|||
with self.wrap_super_or_macro(sup):
|
||||
first = True
|
||||
for comp in sup.parts:
|
||||
if not first:
|
||||
if first:
|
||||
pass
|
||||
# self.out.emit("JUMPBY(OPSIZE(opcode) - 1);")
|
||||
else:
|
||||
self.out.emit("NEXTOPARG();")
|
||||
self.out.emit("JUMPBY(1);")
|
||||
# self.out.emit("JUMPBY(OPSIZE(opcode));")
|
||||
first = False
|
||||
comp.write_body(self.out, 0)
|
||||
if comp.instr.cache_offset:
|
||||
|
@ -711,12 +895,18 @@ def always_exits(lines: list[str]) -> bool:
|
|||
def main():
|
||||
"""Parse command line, parse input, analyze, write output."""
|
||||
args = arg_parser.parse_args() # Prints message and sys.exit(2) on error
|
||||
if args.metadata:
|
||||
if args.output == DEFAULT_OUTPUT:
|
||||
args.output = DEFAULT_METADATA_OUTPUT
|
||||
a = Analyzer(args.input, args.output) # Raises OSError if input unreadable
|
||||
a.parse() # Raises SyntaxError on failure
|
||||
a.analyze() # Prints messages and sets a.errors on failure
|
||||
if a.errors:
|
||||
sys.exit(f"Found {a.errors} errors")
|
||||
a.write_instructions() # Raises OSError if output can't be written
|
||||
if args.metadata:
|
||||
a.write_metadata()
|
||||
else:
|
||||
a.write_instructions() # Raises OSError if output can't be written
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue