mirror of
https://github.com/python/cpython.git
synced 2025-09-26 10:19:53 +00:00
gh-105481: generate op IDs from bytecode.c instead of hard coding them in opcode.py (#107971)
This commit is contained in:
parent
e88eb3775e
commit
665a4391e1
21 changed files with 1593 additions and 1521 deletions
|
@ -5,6 +5,7 @@ Writes the cases to generated_cases.c.h, which is #included in ceval.c.
|
|||
|
||||
import argparse
|
||||
import contextlib
|
||||
import itertools
|
||||
import os
|
||||
import posixpath
|
||||
import sys
|
||||
|
@ -36,6 +37,12 @@ THIS = os.path.relpath(__file__, ROOT).replace(os.path.sep, posixpath.sep)
|
|||
|
||||
DEFAULT_INPUT = os.path.relpath(os.path.join(ROOT, "Python/bytecodes.c"))
|
||||
DEFAULT_OUTPUT = os.path.relpath(os.path.join(ROOT, "Python/generated_cases.c.h"))
|
||||
DEFAULT_OPCODE_IDS_H_OUTPUT = os.path.relpath(
|
||||
os.path.join(ROOT, "Include/opcode_ids.h")
|
||||
)
|
||||
DEFAULT_OPCODE_TARGETS_H_OUTPUT = os.path.relpath(
|
||||
os.path.join(ROOT, "Python/opcode_targets.h")
|
||||
)
|
||||
DEFAULT_METADATA_OUTPUT = os.path.relpath(
|
||||
os.path.join(ROOT, "Include/internal/pycore_opcode_metadata.h")
|
||||
)
|
||||
|
@ -86,6 +93,20 @@ arg_parser = argparse.ArgumentParser(
|
|||
arg_parser.add_argument(
|
||||
"-o", "--output", type=str, help="Generated code", default=DEFAULT_OUTPUT
|
||||
)
|
||||
arg_parser.add_argument(
|
||||
"-n",
|
||||
"--opcode_ids_h",
|
||||
type=str,
|
||||
help="Header file with opcode number definitions",
|
||||
default=DEFAULT_OPCODE_IDS_H_OUTPUT,
|
||||
)
|
||||
arg_parser.add_argument(
|
||||
"-t",
|
||||
"--opcode_targets_h",
|
||||
type=str,
|
||||
help="File with opcode targets for computed gotos",
|
||||
default=DEFAULT_OPCODE_TARGETS_H_OUTPUT,
|
||||
)
|
||||
arg_parser.add_argument(
|
||||
"-m",
|
||||
"--metadata",
|
||||
|
@ -225,6 +246,129 @@ class Generator(Analyzer):
|
|||
self.out.write_raw(self.from_source_files())
|
||||
self.out.write_raw(f"{self.out.comment} Do not edit!\n")
|
||||
|
||||
def assign_opcode_ids(self):
|
||||
"""Assign IDs to opcodes"""
|
||||
|
||||
ops: list[(bool, str)] = [] # (has_arg, name) for each opcode
|
||||
instrumented_ops: list[str] = []
|
||||
|
||||
for instr in itertools.chain(
|
||||
[instr for instr in self.instrs.values() if instr.kind != "op"],
|
||||
self.macro_instrs.values()):
|
||||
|
||||
name = instr.name
|
||||
if name.startswith('INSTRUMENTED_'):
|
||||
instrumented_ops.append(name)
|
||||
else:
|
||||
ops.append((instr.instr_flags.HAS_ARG_FLAG, name))
|
||||
|
||||
# Special case: this instruction is implemented in ceval.c
|
||||
# rather than bytecodes.c, so we need to add it explicitly
|
||||
# here (at least until we add something to bytecodes.c to
|
||||
# declare external instructions).
|
||||
instrumented_ops.append('INSTRUMENTED_LINE')
|
||||
|
||||
# assert lists are unique
|
||||
assert len(set(ops)) == len(ops)
|
||||
assert len(set(instrumented_ops)) == len(instrumented_ops)
|
||||
|
||||
opname: list[str or None] = [None] * 512
|
||||
opmap: dict = {}
|
||||
markers: dict = {}
|
||||
|
||||
def map_op(op, name):
|
||||
assert op < len(opname)
|
||||
assert opname[op] is None
|
||||
assert name not in opmap
|
||||
opname[op] = name
|
||||
opmap[name] = op
|
||||
|
||||
|
||||
# 0 is reserved for cache entries. This helps debugging.
|
||||
map_op(0, 'CACHE')
|
||||
|
||||
# 17 is reserved as it is the initial value for the specializing counter.
|
||||
# This helps catch cases where we attempt to execute a cache.
|
||||
map_op(17, 'RESERVED')
|
||||
|
||||
# 166 is RESUME - it is hard coded as such in Tools/build/deepfreeze.py
|
||||
map_op(166, 'RESUME')
|
||||
|
||||
next_opcode = 1
|
||||
|
||||
for has_arg, name in sorted(ops):
|
||||
if name in opmap:
|
||||
continue # an anchored name, like CACHE
|
||||
while opname[next_opcode] is not None:
|
||||
next_opcode += 1
|
||||
assert next_opcode < 255
|
||||
map_op(next_opcode, name)
|
||||
|
||||
if has_arg and 'HAVE_ARGUMENT' not in markers:
|
||||
markers['HAVE_ARGUMENT'] = next_opcode
|
||||
|
||||
# Instrumented opcodes are at the end of the valid range
|
||||
min_instrumented = 254 - (len(instrumented_ops) - 1)
|
||||
assert next_opcode <= min_instrumented
|
||||
markers['MIN_INSTRUMENTED_OPCODE'] = min_instrumented
|
||||
for i, op in enumerate(instrumented_ops):
|
||||
map_op(min_instrumented + i, op)
|
||||
|
||||
# Pseudo opcodes are after the valid range
|
||||
for i, op in enumerate(sorted(self.pseudos)):
|
||||
map_op(256 + i, op)
|
||||
|
||||
assert 255 not in opmap # 255 is reserved
|
||||
self.opmap = opmap
|
||||
self.markers = markers
|
||||
|
||||
def write_opcode_ids(self, opcode_ids_h_filename, opcode_targets_filename):
|
||||
"""Write header file that defined the opcode IDs"""
|
||||
|
||||
with open(opcode_ids_h_filename, "w") as f:
|
||||
# Create formatter
|
||||
self.out = Formatter(f, 0)
|
||||
|
||||
self.write_provenance_header()
|
||||
|
||||
self.out.emit("")
|
||||
self.out.emit("#ifndef Py_OPCODE_IDS_H")
|
||||
self.out.emit("#define Py_OPCODE_IDS_H")
|
||||
self.out.emit("#ifdef __cplusplus")
|
||||
self.out.emit("extern \"C\" {")
|
||||
self.out.emit("#endif")
|
||||
self.out.emit("")
|
||||
self.out.emit("/* Instruction opcodes for compiled code */")
|
||||
|
||||
def define(name, opcode):
|
||||
self.out.emit(f"#define {name:<38} {opcode:>3}")
|
||||
|
||||
all_pairs = []
|
||||
# the second item in the tuple sorts the markers before the ops
|
||||
all_pairs.extend((i, 1, name) for (name, i) in self.markers.items())
|
||||
all_pairs.extend((i, 2, name) for (name, i) in self.opmap.items())
|
||||
for i, _, name in sorted(all_pairs):
|
||||
assert name is not None
|
||||
define(name, i)
|
||||
|
||||
self.out.emit("")
|
||||
self.out.emit("#ifdef __cplusplus")
|
||||
self.out.emit("}")
|
||||
self.out.emit("#endif")
|
||||
self.out.emit("#endif /* !Py_OPCODE_IDS_H */")
|
||||
|
||||
with open(opcode_targets_filename, "w") as f:
|
||||
# Create formatter
|
||||
self.out = Formatter(f, 0)
|
||||
|
||||
with self.out.block("static void *opcode_targets[256] =", ";"):
|
||||
targets = ["_unknown_opcode"] * 256
|
||||
for name, op in self.opmap.items():
|
||||
if op < 256:
|
||||
targets[op] = f"TARGET_{name}"
|
||||
f.write(",\n".join([f" &&{s}" for s in targets]))
|
||||
|
||||
|
||||
def write_metadata(self, metadata_filename: str, pymetadata_filename: str) -> None:
|
||||
"""Write instruction metadata to output file."""
|
||||
|
||||
|
@ -378,12 +522,46 @@ class Generator(Analyzer):
|
|||
):
|
||||
self.write_uop_items(lambda name, counter: f'[{name}] = "{name}",')
|
||||
|
||||
with self.metadata_item(
|
||||
f"const char *const _PyOpcode_OpName[{1 + max(self.opmap.values())}]", "=", ";"
|
||||
):
|
||||
for name in self.opmap:
|
||||
self.out.emit(f'[{name}] = "{name}",')
|
||||
|
||||
deoptcodes = {}
|
||||
for name, op in self.opmap.items():
|
||||
if op < 256:
|
||||
deoptcodes[name] = name
|
||||
for name, family in self.families.items():
|
||||
for m in family.members:
|
||||
deoptcodes[m] = name
|
||||
# special case:
|
||||
deoptcodes['BINARY_OP_INPLACE_ADD_UNICODE'] = 'BINARY_OP'
|
||||
|
||||
with self.metadata_item(
|
||||
f"const uint8_t _PyOpcode_Deopt[256]", "=", ";"
|
||||
):
|
||||
for opt, deopt in sorted(deoptcodes.items()):
|
||||
self.out.emit(f"[{opt}] = {deopt},")
|
||||
|
||||
self.out.emit("")
|
||||
self.out.emit("#define EXTRA_CASES \\")
|
||||
valid_opcodes = set(self.opmap.values())
|
||||
with self.out.indent():
|
||||
for op in range(256):
|
||||
if op not in valid_opcodes:
|
||||
self.out.emit(f"case {op}: \\")
|
||||
self.out.emit(" ;\n")
|
||||
|
||||
with open(pymetadata_filename, "w") as f:
|
||||
# Create formatter
|
||||
self.out = Formatter(f, 0, comment="#")
|
||||
|
||||
self.write_provenance_header()
|
||||
|
||||
# emit specializations
|
||||
specialized_ops = set()
|
||||
|
||||
self.out.emit("")
|
||||
self.out.emit("_specializations = {")
|
||||
for name, family in self.families.items():
|
||||
|
@ -392,6 +570,7 @@ class Generator(Analyzer):
|
|||
with self.out.indent():
|
||||
for m in family.members:
|
||||
self.out.emit(f'"{m}",')
|
||||
specialized_ops.update(family.members)
|
||||
self.out.emit(f"],")
|
||||
self.out.emit("}")
|
||||
|
||||
|
@ -402,14 +581,26 @@ class Generator(Analyzer):
|
|||
'_specializations["BINARY_OP"].append('
|
||||
'"BINARY_OP_INPLACE_ADD_UNICODE")'
|
||||
)
|
||||
specialized_ops.add("BINARY_OP_INPLACE_ADD_UNICODE")
|
||||
|
||||
# Make list of specialized instructions
|
||||
ops = sorted((id, name) for (name, id) in self.opmap.items())
|
||||
# emit specialized opmap
|
||||
self.out.emit("")
|
||||
self.out.emit(
|
||||
"_specialized_instructions = ["
|
||||
"opcode for family in _specializations.values() for opcode in family"
|
||||
"]"
|
||||
)
|
||||
with self.out.block("_specialized_opmap ="):
|
||||
for op, name in ops:
|
||||
if name in specialized_ops:
|
||||
self.out.emit(f"'{name}': {op},")
|
||||
|
||||
# emit opmap
|
||||
self.out.emit("")
|
||||
with self.out.block("opmap ="):
|
||||
for op, name in ops:
|
||||
if name not in specialized_ops:
|
||||
self.out.emit(f"'{name}': {op},")
|
||||
|
||||
for name in ['MIN_INSTRUMENTED_OPCODE', 'HAVE_ARGUMENT']:
|
||||
self.out.emit(f"{name} = {self.markers[name]}")
|
||||
|
||||
|
||||
def write_pseudo_instrs(self) -> None:
|
||||
"""Write the IS_PSEUDO_INSTR macro"""
|
||||
|
@ -683,6 +874,9 @@ def main():
|
|||
|
||||
# These raise OSError if output can't be written
|
||||
a.write_instructions(args.output, args.emit_line_directives)
|
||||
|
||||
a.assign_opcode_ids()
|
||||
a.write_opcode_ids(args.opcode_ids_h, args.opcode_targets_h)
|
||||
a.write_metadata(args.metadata, args.pymetadata)
|
||||
a.write_executor_instructions(args.executor_cases, args.emit_line_directives)
|
||||
a.write_abstract_interpreter_instructions(args.abstract_interpreter_cases,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue