Thoroughly refactor the cases generator (#107151)

This mostly extracts a whole bunch of stuff out of generate_cases.py into separate files, but there are a few other things going on here. - analysis.py: `Analyzer` etc. - instructions.py: `Instruction` etc. - flags.py: `InstructionFlags`, `variable_used`, `variable_used_unspecialized` - formatting.py: `Formatter` etc. - Rename parser.py to parsing.py, to avoid conflict with stdlib parser.py - Blackify most things - Fix most mypy errors - Remove output filenames from Generator state, add them to `write_instructions()` etc. - Fix unit tests
2025-11-01 18:51:43 +00:00 · 2023-07-24 09:38:23 -07:00 · 2023-07-24 09:38:23 -07:00 · 032f480909
commit 032f480909
parent ff5f94b72c
7 changed files with 1304 additions and 1169 deletions
--- a/Lib/test/test_generated_cases.py
+++ b/Lib/test/test_generated_cases.py
@ -6,8 +6,10 @@ from test import test_tools
 test_tools.skip_if_missing('cases_generator')
 with test_tools.imports_under_tool('cases_generator'):
    import analysis
    import formatting
    import generate_cases
-    from parser import StackEffect
+    from parsing import StackEffect
 class TestEffects(unittest.TestCase):
@ -27,37 +29,37 @@ class TestEffects(unittest.TestCase):
            StackEffect("q", "", "", ""),
            StackEffect("r", "", "", ""),
        ]
-        self.assertEqual(generate_cases.effect_size(x), (1, ""))
+        self.assertEqual(formatting.effect_size(x), (1, ""))
-        self.assertEqual(generate_cases.effect_size(y), (0, "oparg"))
+        self.assertEqual(formatting.effect_size(y), (0, "oparg"))
-        self.assertEqual(generate_cases.effect_size(z), (0, "oparg*2"))
+        self.assertEqual(formatting.effect_size(z), (0, "oparg*2"))
        self.assertEqual(
-            generate_cases.list_effect_size(input_effects),
+            formatting.list_effect_size(input_effects),
            (1, "oparg + oparg*2"),
        )
        self.assertEqual(
-            generate_cases.list_effect_size(output_effects),
+            formatting.list_effect_size(output_effects),
            (2, "oparg*4"),
        )
        self.assertEqual(
-            generate_cases.list_effect_size(other_effects),
+            formatting.list_effect_size(other_effects),
            (2, "(oparg<<1)"),
        )
        self.assertEqual(
-            generate_cases.string_effect_size(
+            formatting.string_effect_size(
-                generate_cases.list_effect_size(input_effects),
+                formatting.list_effect_size(input_effects),
            ), "1 + oparg + oparg*2",
        )
        self.assertEqual(
-            generate_cases.string_effect_size(
+            formatting.string_effect_size(
-                generate_cases.list_effect_size(output_effects),
+                formatting.list_effect_size(output_effects),
            ),
            "2 + oparg*4",
        )
        self.assertEqual(
-            generate_cases.string_effect_size(
+            formatting.string_effect_size(
-                generate_cases.list_effect_size(other_effects),
+                formatting.list_effect_size(other_effects),
            ),
            "2 + (oparg<<1)",
        )
@ -90,23 +92,17 @@ class TestGeneratedCases(unittest.TestCase):
    def run_cases_test(self, input: str, expected: str):
        with open(self.temp_input_filename, "w+") as temp_input:
-            temp_input.write(generate_cases.BEGIN_MARKER)
+            temp_input.write(analysis.BEGIN_MARKER)
            temp_input.write(input)
-            temp_input.write(generate_cases.END_MARKER)
+            temp_input.write(analysis.END_MARKER)
            temp_input.flush()
-        a = generate_cases.Analyzer(
+        a = generate_cases.Generator([self.temp_input_filename])
            [self.temp_input_filename],
            self.temp_output_filename,
            self.temp_metadata_filename,
            self.temp_pymetadata_filename,
            self.temp_executor_filename,
        )
        a.parse()
        a.analyze()
        if a.errors:
            raise RuntimeError(f"Found {a.errors} errors")
-        a.write_instructions()
+        a.write_instructions(self.temp_output_filename, False)
        with open(self.temp_output_filename) as temp_output:
            lines = temp_output.readlines()
--- a/Tools/cases_generator/analysis.py
+++ b/Tools/cases_generator/analysis.py
@ -0,0 +1,412 @@
 import re
 import sys
 import typing
 from flags import InstructionFlags, variable_used
 from formatting import prettify_filename, UNUSED
 from instructions import (
    ActiveCacheEffect,
    Component,
    Instruction,
    InstructionOrCacheEffect,
    MacroInstruction,
    MacroParts,
    OverriddenInstructionPlaceHolder,
    PseudoInstruction,
    StackEffectMapping,
 )
 import parsing
 from parsing import StackEffect
 BEGIN_MARKER = "// BEGIN BYTECODES //"
 END_MARKER = "// END BYTECODES //"
 RESERVED_WORDS = {
    "co_consts": "Use FRAME_CO_CONSTS.",
    "co_names": "Use FRAME_CO_NAMES.",
 }
 RE_PREDICTED = r"^\s*(?:GO_TO_INSTRUCTION\(|DEOPT_IF\(.*?,\s*)(\w+)\);\s*(?://.*)?$"
 class Analyzer:
    """Parse input, analyze it, and write to output."""
    input_filenames: list[str]
    errors: int = 0
    def __init__(self, input_filenames: list[str]):
        self.input_filenames = input_filenames
    def error(self, msg: str, node: parsing.Node) -> None:
        lineno = 0
        filename = "<unknown file>"
        if context := node.context:
            filename = context.owner.filename
            # Use line number of first non-comment in the node
            for token in context.owner.tokens[context.begin : context.end]:
                lineno = token.line
                if token.kind != "COMMENT":
                    break
        print(f"{filename}:{lineno}: {msg}", file=sys.stderr)
        self.errors += 1
    everything: list[
        parsing.InstDef
        | parsing.Macro
        | parsing.Pseudo
        | OverriddenInstructionPlaceHolder
    ]
    instrs: dict[str, Instruction]  # Includes ops
    macros: dict[str, parsing.Macro]
    macro_instrs: dict[str, MacroInstruction]
    families: dict[str, parsing.Family]
    pseudos: dict[str, parsing.Pseudo]
    pseudo_instrs: dict[str, PseudoInstruction]
    def parse(self) -> None:
        """Parse the source text.
        We only want the parser to see the stuff between the
        begin and end markers.
        """
        self.everything = []
        self.instrs = {}
        self.macros = {}
        self.families = {}
        self.pseudos = {}
        instrs_idx: dict[str, int] = dict()
        for filename in self.input_filenames:
            self.parse_file(filename, instrs_idx)
        files = " + ".join(self.input_filenames)
        print(
            f"Read {len(self.instrs)} instructions/ops, "
            f"{len(self.macros)} macros, {len(self.pseudos)} pseudos, "
            f"and {len(self.families)} families from {files}",
            file=sys.stderr,
        )
    def parse_file(self, filename: str, instrs_idx: dict[str, int]) -> None:
        with open(filename) as file:
            src = file.read()
        psr = parsing.Parser(src, filename=prettify_filename(filename))
        # Skip until begin marker
        while tkn := psr.next(raw=True):
            if tkn.text == BEGIN_MARKER:
                break
        else:
            raise psr.make_syntax_error(
                f"Couldn't find {BEGIN_MARKER!r} in {psr.filename}"
            )
        start = psr.getpos()
        # Find end marker, then delete everything after it
        while tkn := psr.next(raw=True):
            if tkn.text == END_MARKER:
                break
        del psr.tokens[psr.getpos() - 1 :]
        # Parse from start
        psr.setpos(start)
        thing: parsing.Node | None
        thing_first_token = psr.peek()
        while thing := psr.definition():
            thing = typing.cast(
                parsing.InstDef | parsing.Macro | parsing.Pseudo | parsing.Family, thing
            )
            if ws := [w for w in RESERVED_WORDS if variable_used(thing, w)]:
                self.error(
                    f"'{ws[0]}' is a reserved word. {RESERVED_WORDS[ws[0]]}", thing
                )
            match thing:
                case parsing.InstDef(name=name):
                    if name in self.instrs:
                        if not thing.override:
                            raise psr.make_syntax_error(
                                f"Duplicate definition of '{name}' @ {thing.context} "
                                f"previous definition @ {self.instrs[name].inst.context}",
                                thing_first_token,
                            )
                        self.everything[
                            instrs_idx[name]
                        ] = OverriddenInstructionPlaceHolder(name=name)
                    if name not in self.instrs and thing.override:
                        raise psr.make_syntax_error(
                            f"Definition of '{name}' @ {thing.context} is supposed to be "
                            "an override but no previous definition exists.",
                            thing_first_token,
                        )
                    self.instrs[name] = Instruction(thing)
                    instrs_idx[name] = len(self.everything)
                    self.everything.append(thing)
                case parsing.Macro(name):
                    self.macros[name] = thing
                    self.everything.append(thing)
                case parsing.Family(name):
                    self.families[name] = thing
                case parsing.Pseudo(name):
                    self.pseudos[name] = thing
                    self.everything.append(thing)
                case _:
                    typing.assert_never(thing)
        if not psr.eof():
            raise psr.make_syntax_error(f"Extra stuff at the end of {filename}")
    def analyze(self) -> None:
        """Analyze the inputs.
        Raises SystemExit if there is an error.
        """
        self.analyze_macros_and_pseudos()
        self.find_predictions()
        self.map_families()
        self.check_families()
    def find_predictions(self) -> None:
        """Find the instructions that need PREDICTED() labels."""
        for instr in self.instrs.values():
            targets: set[str] = set()
            for line in instr.block_text:
                if m := re.match(RE_PREDICTED, line):
                    targets.add(m.group(1))
            for target in targets:
                if target_instr := self.instrs.get(target):
                    target_instr.predicted = True
                elif target_macro := self.macro_instrs.get(target):
                    target_macro.predicted = True
                else:
                    self.error(
                        f"Unknown instruction {target!r} predicted in {instr.name!r}",
                        instr.inst,  # TODO: Use better location
                    )
    def map_families(self) -> None:
        """Link instruction names back to their family, if they have one."""
        for family in self.families.values():
            for member in [family.name] + family.members:
                if member_instr := self.instrs.get(member):
                    if (
                        member_instr.family is not family
                        and member_instr.family is not None
                    ):
                        self.error(
                            f"Instruction {member} is a member of multiple families "
                            f"({member_instr.family.name}, {family.name}).",
                            family,
                        )
                    else:
                        member_instr.family = family
                elif not self.macro_instrs.get(member):
                    self.error(
                        f"Unknown instruction {member!r} referenced in family {family.name!r}",
                        family,
                    )
    def check_families(self) -> None:
        """Check each family:
        - Must have at least 2 members (including head)
        - Head and all members must be known instructions
        - Head and all members must have the same cache, input and output effects
        """
        for family in self.families.values():
            if family.name not in self.macro_instrs and family.name not in self.instrs:
                self.error(
                    f"Family {family.name!r} has unknown instruction {family.name!r}",
                    family,
                )
            members = [
                member
                for member in family.members
                if member in self.instrs or member in self.macro_instrs
            ]
            if members != family.members:
                unknown = set(family.members) - set(members)
                self.error(
                    f"Family {family.name!r} has unknown members: {unknown}", family
                )
            expected_effects = self.effect_counts(family.name)
            for member in members:
                member_effects = self.effect_counts(member)
                if member_effects != expected_effects:
                    self.error(
                        f"Family {family.name!r} has inconsistent "
                        f"(cache, input, output) effects:\n"
                        f"  {family.name} = {expected_effects}; "
                        f"{member} = {member_effects}",
                        family,
                    )
    def effect_counts(self, name: str) -> tuple[int, int, int]:
        if instr := self.instrs.get(name):
            cache = instr.cache_offset
            input = len(instr.input_effects)
            output = len(instr.output_effects)
        elif mac := self.macro_instrs.get(name):
            cache = mac.cache_offset
            input, output = 0, 0
            for part in mac.parts:
                if isinstance(part, Component):
                    # A component may pop what the previous component pushed,
                    # so we offset the input/output counts by that.
                    delta_i = len(part.instr.input_effects)
                    delta_o = len(part.instr.output_effects)
                    offset = min(delta_i, output)
                    input += delta_i - offset
                    output += delta_o - offset
        else:
            assert False, f"Unknown instruction {name!r}"
        return cache, input, output
    def analyze_macros_and_pseudos(self) -> None:
        """Analyze each macro and pseudo instruction."""
        self.macro_instrs = {}
        self.pseudo_instrs = {}
        for name, macro in self.macros.items():
            self.macro_instrs[name] = self.analyze_macro(macro)
        for name, pseudo in self.pseudos.items():
            self.pseudo_instrs[name] = self.analyze_pseudo(pseudo)
    def analyze_macro(self, macro: parsing.Macro) -> MacroInstruction:
        components = self.check_macro_components(macro)
        stack, initial_sp = self.stack_analysis(components)
        sp = initial_sp
        parts: MacroParts = []
        flags = InstructionFlags.newEmpty()
        offset = 0
        for component in components:
            match component:
                case parsing.CacheEffect() as ceffect:
                    parts.append(ceffect)
                    offset += ceffect.size
                case Instruction() as instr:
                    part, sp, offset = self.analyze_instruction(
                        instr, stack, sp, offset
                    )
                    parts.append(part)
                    flags.add(instr.instr_flags)
                case _:
                    typing.assert_never(component)
        final_sp = sp
        format = "IB"
        if offset:
            format += "C" + "0" * (offset - 1)
        return MacroInstruction(
            macro.name, stack, initial_sp, final_sp, format, flags, macro, parts, offset
        )
    def analyze_pseudo(self, pseudo: parsing.Pseudo) -> PseudoInstruction:
        targets = [self.instrs[target] for target in pseudo.targets]
        assert targets
        # Make sure the targets have the same fmt
        fmts = list(set([t.instr_fmt for t in targets]))
        assert len(fmts) == 1
        assert len(list(set([t.instr_flags.bitmap() for t in targets]))) == 1
        return PseudoInstruction(pseudo.name, targets, fmts[0], targets[0].instr_flags)
    def analyze_instruction(
        self, instr: Instruction, stack: list[StackEffect], sp: int, offset: int
    ) -> tuple[Component, int, int]:
        input_mapping: StackEffectMapping = []
        for ieffect in reversed(instr.input_effects):
            sp -= 1
            input_mapping.append((stack[sp], ieffect))
        output_mapping: StackEffectMapping = []
        for oeffect in instr.output_effects:
            output_mapping.append((stack[sp], oeffect))
            sp += 1
        active_effects: list[ActiveCacheEffect] = []
        for ceffect in instr.cache_effects:
            if ceffect.name != UNUSED:
                active_effects.append(ActiveCacheEffect(ceffect, offset))
            offset += ceffect.size
        return (
            Component(instr, input_mapping, output_mapping, active_effects),
            sp,
            offset,
        )
    def check_macro_components(
        self, macro: parsing.Macro
    ) -> list[InstructionOrCacheEffect]:
        components: list[InstructionOrCacheEffect] = []
        for uop in macro.uops:
            match uop:
                case parsing.OpName(name):
                    if name not in self.instrs:
                        self.error(f"Unknown instruction {name!r}", macro)
                    components.append(self.instrs[name])
                case parsing.CacheEffect():
                    components.append(uop)
                case _:
                    typing.assert_never(uop)
        return components
    def stack_analysis(
        self, components: typing.Iterable[InstructionOrCacheEffect]
    ) -> tuple[list[StackEffect], int]:
        """Analyze a macro.
        Ignore cache effects.
        Return the list of variables (as StackEffects) and the initial stack pointer.
        """
        lowest = current = highest = 0
        conditions: dict[int, str] = {}  # Indexed by 'current'.
        last_instr: Instruction | None = None
        for thing in components:
            if isinstance(thing, Instruction):
                last_instr = thing
        for thing in components:
            match thing:
                case Instruction() as instr:
                    if any(
                        eff.size for eff in instr.input_effects + instr.output_effects
                    ):
                        # TODO: Eventually this will be needed, at least for macros.
                        self.error(
                            f"Instruction {instr.name!r} has variable-sized stack effect, "
                            "which are not supported in macro instructions",
                            instr.inst,  # TODO: Pass name+location of macro
                        )
                    if any(eff.cond for eff in instr.input_effects):
                        self.error(
                            f"Instruction {instr.name!r} has conditional input stack effect, "
                            "which are not supported in macro instructions",
                            instr.inst,  # TODO: Pass name+location of macro
                        )
                    if (
                        any(eff.cond for eff in instr.output_effects)
                        and instr is not last_instr
                    ):
                        self.error(
                            f"Instruction {instr.name!r} has conditional output stack effect, "
                            "but is not the last instruction in a macro",
                            instr.inst,  # TODO: Pass name+location of macro
                        )
                    current -= len(instr.input_effects)
                    lowest = min(lowest, current)
                    for eff in instr.output_effects:
                        if eff.cond:
                            conditions[current] = eff.cond
                        current += 1
                    highest = max(highest, current)
                case parsing.CacheEffect():
                    pass
                case _:
                    typing.assert_never(thing)
        # At this point, 'current' is the net stack effect,
        # and 'lowest' and 'highest' are the extremes.
        # Note that 'lowest' may be negative.
        stack = [
            StackEffect(f"_tmp_{i}", "", conditions.get(highest - i, ""))
            for i in reversed(range(1, highest - lowest + 1))
        ]
        return stack, -lowest
--- a/Tools/cases_generator/flags.py
+++ b/Tools/cases_generator/flags.py
@ -0,0 +1,102 @@
 import dataclasses
 from formatting import Formatter
 import lexer as lx
 import parsing
@dataclasses.dataclass
 class InstructionFlags:
    """Construct and manipulate instruction flags"""
    HAS_ARG_FLAG: bool
    HAS_CONST_FLAG: bool
    HAS_NAME_FLAG: bool
    HAS_JUMP_FLAG: bool
    HAS_FREE_FLAG: bool
    HAS_LOCAL_FLAG: bool
    def __post_init__(self):
        self.bitmask = {name: (1 << i) for i, name in enumerate(self.names())}
    @staticmethod
    def fromInstruction(instr: parsing.Node):
        has_free = (
            variable_used(instr, "PyCell_New")
            or variable_used(instr, "PyCell_GET")
            or variable_used(instr, "PyCell_SET")
        )
        return InstructionFlags(
            HAS_ARG_FLAG=variable_used(instr, "oparg"),
            HAS_CONST_FLAG=variable_used(instr, "FRAME_CO_CONSTS"),
            HAS_NAME_FLAG=variable_used(instr, "FRAME_CO_NAMES"),
            HAS_JUMP_FLAG=variable_used(instr, "JUMPBY"),
            HAS_FREE_FLAG=has_free,
            HAS_LOCAL_FLAG=(
                variable_used(instr, "GETLOCAL") or variable_used(instr, "SETLOCAL")
            )
            and not has_free,
        )
    @staticmethod
    def newEmpty():
        return InstructionFlags(False, False, False, False, False, False)
    def add(self, other: "InstructionFlags") -> None:
        for name, value in dataclasses.asdict(other).items():
            if value:
                setattr(self, name, value)
    def names(self, value=None):
        if value is None:
            return dataclasses.asdict(self).keys()
        return [n for n, v in dataclasses.asdict(self).items() if v == value]
    def bitmap(self) -> int:
        flags = 0
        for name in self.names():
            if getattr(self, name):
                flags |= self.bitmask[name]
        return flags
    @classmethod
    def emit_macros(cls, out: Formatter):
        flags = cls.newEmpty()
        for name, value in flags.bitmask.items():
            out.emit(f"#define {name} ({value})")
        for name, value in flags.bitmask.items():
            out.emit(
                f"#define OPCODE_{name[:-len('_FLAG')]}(OP) "
                f"(_PyOpcode_opcode_metadata[OP].flags & ({name}))"
            )
 def variable_used(node: parsing.Node, name: str) -> bool:
    """Determine whether a variable with a given name is used in a node."""
    return any(
        token.kind == "IDENTIFIER" and token.text == name for token in node.tokens
    )
 def variable_used_unspecialized(node: parsing.Node, name: str) -> bool:
    """Like variable_used(), but skips #if ENABLE_SPECIALIZATION blocks."""
    tokens: list[lx.Token] = []
    skipping = False
    for i, token in enumerate(node.tokens):
        if token.kind == "MACRO":
            text = "".join(token.text.split())
            # TODO: Handle nested #if
            if text == "#if":
                if (
                    i + 1 < len(node.tokens)
                    and node.tokens[i + 1].text == "ENABLE_SPECIALIZATION"
                ):
                    skipping = True
            elif text in ("#else", "#endif"):
                skipping = False
        if not skipping:
            tokens.append(token)
    return any(token.kind == "IDENTIFIER" and token.text == name for token in tokens)
--- a/Tools/cases_generator/formatting.py
+++ b/Tools/cases_generator/formatting.py
@ -0,0 +1,188 @@
 import contextlib
 import re
 import typing
 from parsing import StackEffect
 UNUSED = "unused"
 class Formatter:
    """Wraps an output stream with the ability to indent etc."""
    stream: typing.TextIO
    prefix: str
    emit_line_directives: bool = False
    lineno: int  # Next line number, 1-based
    filename: str  # Slightly improved stream.filename
    nominal_lineno: int
    nominal_filename: str
    def __init__(
            self, stream: typing.TextIO, indent: int,
                  emit_line_directives: bool = False, comment: str = "//",
    ) -> None:
        self.stream = stream
        self.prefix = " " * indent
        self.emit_line_directives = emit_line_directives
        self.comment = comment
        self.lineno = 1
        self.filename = prettify_filename(self.stream.name)
        self.nominal_lineno = 1
        self.nominal_filename = self.filename
    def write_raw(self, s: str) -> None:
        self.stream.write(s)
        newlines = s.count("\n")
        self.lineno += newlines
        self.nominal_lineno += newlines
    def emit(self, arg: str) -> None:
        if arg:
            self.write_raw(f"{self.prefix}{arg}\n")
        else:
            self.write_raw("\n")
    def set_lineno(self, lineno: int, filename: str) -> None:
        if self.emit_line_directives:
            if lineno != self.nominal_lineno or filename != self.nominal_filename:
                self.emit(f'#line {lineno} "{filename}"')
                self.nominal_lineno = lineno
                self.nominal_filename = filename
    def reset_lineno(self) -> None:
        if self.lineno != self.nominal_lineno or self.filename != self.nominal_filename:
            self.set_lineno(self.lineno + 1, self.filename)
    @contextlib.contextmanager
    def indent(self):
        self.prefix += "    "
        yield
        self.prefix = self.prefix[:-4]
    @contextlib.contextmanager
    def block(self, head: str, tail: str = ""):
        if head:
            self.emit(head + " {")
        else:
            self.emit("{")
        with self.indent():
            yield
        self.emit("}" + tail)
    def stack_adjust(
        self,
        input_effects: list[StackEffect],
        output_effects: list[StackEffect],
    ):
        shrink, isym = list_effect_size(input_effects)
        grow, osym = list_effect_size(output_effects)
        diff = grow - shrink
        if isym and isym != osym:
            self.emit(f"STACK_SHRINK({isym});")
        if diff < 0:
            self.emit(f"STACK_SHRINK({-diff});")
        if diff > 0:
            self.emit(f"STACK_GROW({diff});")
        if osym and osym != isym:
            self.emit(f"STACK_GROW({osym});")
    def declare(self, dst: StackEffect, src: StackEffect | None):
        if dst.name == UNUSED or dst.cond == "0":
            return
        typ = f"{dst.type}" if dst.type else "PyObject *"
        if src:
            cast = self.cast(dst, src)
            init = f" = {cast}{src.name}"
        elif dst.cond:
            init = " = NULL"
        else:
            init = ""
        sepa = "" if typ.endswith("*") else " "
        self.emit(f"{typ}{sepa}{dst.name}{init};")
    def assign(self, dst: StackEffect, src: StackEffect):
        if src.name == UNUSED:
            return
        if src.size:
            # Don't write sized arrays -- it's up to the user code.
            return
        cast = self.cast(dst, src)
        if re.match(r"^REG\(oparg(\d+)\)$", dst.name):
            self.emit(f"Py_XSETREF({dst.name}, {cast}{src.name});")
        else:
            stmt = f"{dst.name} = {cast}{src.name};"
            if src.cond and src.cond != "1":
                if src.cond == "0":
                    # It will not be executed
                    return
                stmt = f"if ({src.cond}) {{ {stmt} }}"
            self.emit(stmt)
    def cast(self, dst: StackEffect, src: StackEffect) -> str:
        return f"({dst.type or 'PyObject *'})" if src.type != dst.type else ""
 def prettify_filename(filename: str) -> str:
    # Make filename more user-friendly and less platform-specific,
    # it is only used for error reporting at this point.
    filename = filename.replace("\\", "/")
    if filename.startswith("./"):
        filename = filename[2:]
    if filename.endswith(".new"):
        filename = filename[:-4]
    return filename
 def list_effect_size(effects: list[StackEffect]) -> tuple[int, str]:
    numeric = 0
    symbolic: list[str] = []
    for effect in effects:
        diff, sym = effect_size(effect)
        numeric += diff
        if sym:
            symbolic.append(maybe_parenthesize(sym))
    return numeric, " + ".join(symbolic)
 def effect_size(effect: StackEffect) -> tuple[int, str]:
    """Return the 'size' impact of a stack effect.
    Returns a tuple (numeric, symbolic) where:
    - numeric is an int giving the statically analyzable size of the effect
    - symbolic is a string representing a variable effect (e.g. 'oparg*2')
    At most one of these will be non-zero / non-empty.
    """
    if effect.size:
        assert not effect.cond, "Array effects cannot have a condition"
        return 0, effect.size
    elif effect.cond:
        if effect.cond in ("0", "1"):
            return int(effect.cond), ""
        return 0, f"{maybe_parenthesize(effect.cond)} ? 1 : 0"
    else:
        return 1, ""
 def maybe_parenthesize(sym: str) -> str:
    """Add parentheses around a string if it contains an operator.
    An exception is made for '*' which is common and harmless
    in the context where the symbolic size is used.
    """
    if re.match(r"^[\s\w*]+$", sym):
        return sym
    else:
        return f"({sym})"
 def string_effect_size(arg: tuple[int, str]) -> str:
    numeric, symbolic = arg
    if numeric and symbolic:
        return f"{numeric} + {symbolic}"
    elif symbolic:
        return symbolic
    else:
        return str(numeric)
--- a/Tools/cases_generator/generate_cases.py
+++ b/Tools/cases_generator/generate_cases.py
--- a/Tools/cases_generator/instructions.py
+++ b/Tools/cases_generator/instructions.py
@ -0,0 +1,424 @@
 import dataclasses
 import re
 import typing
 from flags import InstructionFlags, variable_used_unspecialized
 from formatting import (
    Formatter,
    UNUSED,
    string_effect_size,
    list_effect_size,
    maybe_parenthesize,
 )
 import lexer as lx
 import parsing
 from parsing import StackEffect
 BITS_PER_CODE_UNIT = 16
@dataclasses.dataclass
 class ActiveCacheEffect:
    """Wraps a CacheEffect that is actually used, in context."""
    effect: parsing.CacheEffect
    offset: int
 FORBIDDEN_NAMES_IN_UOPS = (
    "resume_with_error",
    "kwnames",
    "next_instr",
    "oparg1",  # Proxy for super-instructions like LOAD_FAST_LOAD_FAST
    "JUMPBY",
    "DISPATCH",
    "INSTRUMENTED_JUMP",
    "throwflag",
    "exception_unwind",
    "import_from",
    "import_name",
    "_PyObject_CallNoArgs",  # Proxy for BEFORE_WITH
 )
 # Interpreter tiers
 TIER_ONE: typing.Final = 1  # Specializing adaptive interpreter (PEP 659)
 TIER_TWO: typing.Final = 2  # Experimental tracing interpreter
 Tiers: typing.TypeAlias = typing.Literal[1, 2]
@dataclasses.dataclass
 class Instruction:
    """An instruction with additional data and code."""
    # Parts of the underlying instruction definition
    inst: parsing.InstDef
    kind: typing.Literal["inst", "op"]
    name: str
    block: parsing.Block
    block_text: list[str]  # Block.text, less curlies, less PREDICT() calls
    block_line: int  # First line of block in original code
    # Computed by constructor
    always_exits: bool
    cache_offset: int
    cache_effects: list[parsing.CacheEffect]
    input_effects: list[StackEffect]
    output_effects: list[StackEffect]
    unmoved_names: frozenset[str]
    instr_fmt: str
    instr_flags: InstructionFlags
    active_caches: list[ActiveCacheEffect]
    # Set later
    family: parsing.Family | None = None
    predicted: bool = False
    def __init__(self, inst: parsing.InstDef):
        self.inst = inst
        self.kind = inst.kind
        self.name = inst.name
        self.block = inst.block
        self.block_text, self.check_eval_breaker, self.block_line = extract_block_text(
            self.block
        )
        self.always_exits = always_exits(self.block_text)
        self.cache_effects = [
            effect for effect in inst.inputs if isinstance(effect, parsing.CacheEffect)
        ]
        self.cache_offset = sum(c.size for c in self.cache_effects)
        self.input_effects = [
            effect for effect in inst.inputs if isinstance(effect, StackEffect)
        ]
        self.output_effects = inst.outputs  # For consistency/completeness
        unmoved_names: set[str] = set()
        for ieffect, oeffect in zip(self.input_effects, self.output_effects):
            if ieffect.name == oeffect.name:
                unmoved_names.add(ieffect.name)
            else:
                break
        self.unmoved_names = frozenset(unmoved_names)
        self.instr_flags = InstructionFlags.fromInstruction(inst)
        self.active_caches = []
        offset = 0
        for effect in self.cache_effects:
            if effect.name != UNUSED:
                self.active_caches.append(ActiveCacheEffect(effect, offset))
            offset += effect.size
        if self.instr_flags.HAS_ARG_FLAG:
            fmt = "IB"
        else:
            fmt = "IX"
        if offset:
            fmt += "C" + "0" * (offset - 1)
        self.instr_fmt = fmt
    def is_viable_uop(self) -> bool:
        """Whether this instruction is viable as a uop."""
        dprint: typing.Callable[..., None] = lambda *args, **kwargs: None
        # if self.name.startswith("CALL"):
        #     dprint = print
        if self.name == "EXIT_TRACE":
            return True  # This has 'return frame' but it's okay
        if self.always_exits:
            dprint(f"Skipping {self.name} because it always exits")
            return False
        if len(self.active_caches) > 1:
            # print(f"Skipping {self.name} because it has >1 cache entries")
            return False
        res = True
        for forbidden in FORBIDDEN_NAMES_IN_UOPS:
            # NOTE: To disallow unspecialized uops, use
            # if variable_used(self.inst, forbidden):
            if variable_used_unspecialized(self.inst, forbidden):
                dprint(f"Skipping {self.name} because it uses {forbidden}")
                res = False
        return res
    def write(self, out: Formatter, tier: Tiers = TIER_ONE) -> None:
        """Write one instruction, sans prologue and epilogue."""
        # Write a static assertion that a family's cache size is correct
        if family := self.family:
            if self.name == family.name:
                if cache_size := family.size:
                    out.emit(
                        f"static_assert({cache_size} == "
                        f'{self.cache_offset}, "incorrect cache size");'
                    )
        # Write input stack effect variable declarations and initializations
        ieffects = list(reversed(self.input_effects))
        for i, ieffect in enumerate(ieffects):
            isize = string_effect_size(
                list_effect_size([ieff for ieff in ieffects[: i + 1]])
            )
            if ieffect.size:
                src = StackEffect(
                    f"(stack_pointer - {maybe_parenthesize(isize)})", "PyObject **"
                )
            elif ieffect.cond:
                src = StackEffect(
                    f"({ieffect.cond}) ? stack_pointer[-{maybe_parenthesize(isize)}] : NULL",
                    "",
                )
            else:
                src = StackEffect(f"stack_pointer[-{maybe_parenthesize(isize)}]", "")
            out.declare(ieffect, src)
        # Write output stack effect variable declarations
        isize = string_effect_size(list_effect_size(self.input_effects))
        input_names = {ieffect.name for ieffect in self.input_effects}
        for i, oeffect in enumerate(self.output_effects):
            if oeffect.name not in input_names:
                if oeffect.size:
                    osize = string_effect_size(
                        list_effect_size([oeff for oeff in self.output_effects[:i]])
                    )
                    offset = "stack_pointer"
                    if isize != osize:
                        if isize != "0":
                            offset += f" - ({isize})"
                        if osize != "0":
                            offset += f" + {osize}"
                    src = StackEffect(offset, "PyObject **")
                    out.declare(oeffect, src)
                else:
                    out.declare(oeffect, None)
        # out.emit(f"next_instr += OPSIZE({self.inst.name}) - 1;")
        self.write_body(out, 0, self.active_caches, tier=tier)
        # Skip the rest if the block always exits
        if self.always_exits:
            return
        # Write net stack growth/shrinkage
        out.stack_adjust(
            [ieff for ieff in self.input_effects],
            [oeff for oeff in self.output_effects],
        )
        # Write output stack effect assignments
        oeffects = list(reversed(self.output_effects))
        for i, oeffect in enumerate(oeffects):
            if oeffect.name in self.unmoved_names:
                continue
            osize = string_effect_size(
                list_effect_size([oeff for oeff in oeffects[: i + 1]])
            )
            if oeffect.size:
                dst = StackEffect(
                    f"stack_pointer - {maybe_parenthesize(osize)}", "PyObject **"
                )
            else:
                dst = StackEffect(f"stack_pointer[-{maybe_parenthesize(osize)}]", "")
            out.assign(dst, oeffect)
        # Write cache effect
        if tier == TIER_ONE and self.cache_offset:
            out.emit(f"next_instr += {self.cache_offset};")
    def write_body(
        self,
        out: Formatter,
        dedent: int,
        active_caches: list[ActiveCacheEffect],
        tier: Tiers = TIER_ONE,
    ) -> None:
        """Write the instruction body."""
        # Write cache effect variable declarations and initializations
        for active in active_caches:
            ceffect = active.effect
            bits = ceffect.size * BITS_PER_CODE_UNIT
            if bits == 64:
                # NOTE: We assume that 64-bit data in the cache
                # is always an object pointer.
                # If this becomes false, we need a way to specify
                # syntactically what type the cache data is.
                typ = "PyObject *"
                func = "read_obj"
            else:
                typ = f"uint{bits}_t "
                func = f"read_u{bits}"
            if tier == TIER_ONE:
                out.emit(
                    f"{typ}{ceffect.name} = {func}(&next_instr[{active.offset}].cache);"
                )
            else:
                out.emit(f"{typ}{ceffect.name} = ({typ.strip()})operand;")
        # Write the body, substituting a goto for ERROR_IF() and other stuff
        assert dedent <= 0
        extra = " " * -dedent
        names_to_skip = self.unmoved_names | frozenset({UNUSED, "null"})
        offset = 0
        context = self.block.context
        assert context is not None and context.owner is not None
        filename = context.owner.filename
        for line in self.block_text:
            out.set_lineno(self.block_line + offset, filename)
            offset += 1
            if m := re.match(r"(\s*)ERROR_IF\((.+), (\w+)\);\s*(?://.*)?$", line):
                space, cond, label = m.groups()
                space = extra + space
                # ERROR_IF() must pop the inputs from the stack.
                # The code block is responsible for DECREF()ing them.
                # NOTE: If the label doesn't exist, just add it to ceval.c.
                # Don't pop common input/output effects at the bottom!
                # These aren't DECREF'ed so they can stay.
                ieffs = list(self.input_effects)
                oeffs = list(self.output_effects)
                while ieffs and oeffs and ieffs[0] == oeffs[0]:
                    ieffs.pop(0)
                    oeffs.pop(0)
                ninputs, symbolic = list_effect_size(ieffs)
                if ninputs:
                    label = f"pop_{ninputs}_{label}"
                if symbolic:
                    out.write_raw(
                        f"{space}if ({cond}) {{ STACK_SHRINK({symbolic}); goto {label}; }}\n"
                    )
                else:
                    out.write_raw(f"{space}if ({cond}) goto {label};\n")
            elif m := re.match(r"(\s*)DECREF_INPUTS\(\);\s*(?://.*)?$", line):
                out.reset_lineno()
                space = extra + m.group(1)
                for ieff in self.input_effects:
                    if ieff.name in names_to_skip:
                        continue
                    if ieff.size:
                        out.write_raw(
                            f"{space}for (int _i = {ieff.size}; --_i >= 0;) {{\n"
                        )
                        out.write_raw(f"{space}    Py_DECREF({ieff.name}[_i]);\n")
                        out.write_raw(f"{space}}}\n")
                    else:
                        decref = "XDECREF" if ieff.cond else "DECREF"
                        out.write_raw(f"{space}Py_{decref}({ieff.name});\n")
            else:
                out.write_raw(extra + line)
        out.reset_lineno()
 InstructionOrCacheEffect = Instruction | parsing.CacheEffect
 StackEffectMapping = list[tuple[StackEffect, StackEffect]]
@dataclasses.dataclass
 class Component:
    instr: Instruction
    input_mapping: StackEffectMapping
    output_mapping: StackEffectMapping
    active_caches: list[ActiveCacheEffect]
    def write_body(self, out: Formatter) -> None:
        with out.block(""):
            input_names = {ieffect.name for _, ieffect in self.input_mapping}
            for var, ieffect in self.input_mapping:
                out.declare(ieffect, var)
            for _, oeffect in self.output_mapping:
                if oeffect.name not in input_names:
                    out.declare(oeffect, None)
            self.instr.write_body(out, -4, self.active_caches)
            for var, oeffect in self.output_mapping:
                out.assign(var, oeffect)
 MacroParts = list[Component | parsing.CacheEffect]
@dataclasses.dataclass
 class MacroInstruction:
    """A macro instruction."""
    name: str
    stack: list[StackEffect]
    initial_sp: int
    final_sp: int
    instr_fmt: str
    instr_flags: InstructionFlags
    macro: parsing.Macro
    parts: MacroParts
    cache_offset: int
    predicted: bool = False
@dataclasses.dataclass
 class PseudoInstruction:
    """A pseudo instruction."""
    name: str
    targets: list[Instruction]
    instr_fmt: str
    instr_flags: InstructionFlags
@dataclasses.dataclass
 class OverriddenInstructionPlaceHolder:
    name: str
 AnyInstruction = Instruction | MacroInstruction | PseudoInstruction
 def extract_block_text(block: parsing.Block) -> tuple[list[str], bool, int]:
    # Get lines of text with proper dedent
    blocklines = block.text.splitlines(True)
    first_token: lx.Token = block.tokens[0]  # IndexError means the context is broken
    block_line = first_token.begin[0]
    # Remove blank lines from both ends
    while blocklines and not blocklines[0].strip():
        blocklines.pop(0)
        block_line += 1
    while blocklines and not blocklines[-1].strip():
        blocklines.pop()
    # Remove leading and trailing braces
    assert blocklines and blocklines[0].strip() == "{"
    assert blocklines and blocklines[-1].strip() == "}"
    blocklines.pop()
    blocklines.pop(0)
    block_line += 1
    # Remove trailing blank lines
    while blocklines and not blocklines[-1].strip():
        blocklines.pop()
    # Separate CHECK_EVAL_BREAKER() macro from end
    check_eval_breaker = (
        blocklines != [] and blocklines[-1].strip() == "CHECK_EVAL_BREAKER();"
    )
    if check_eval_breaker:
        del blocklines[-1]
    return blocklines, check_eval_breaker, block_line
 def always_exits(lines: list[str]) -> bool:
    """Determine whether a block always ends in a return/goto/etc."""
    if not lines:
        return False
    line = lines[-1].rstrip()
    # Indent must match exactly (TODO: Do something better)
    if line[:12] != " " * 12:
        return False
    line = line[12:]
    return line.startswith(
        (
            "goto ",
            "return ",
            "DISPATCH",
            "GO_TO_",
            "Py_UNREACHABLE()",
            "ERROR_IF(true, ",
        )
    )
--- a/Tools/cases_generator/parsing.py
+++ b/Tools/cases_generator/parsing.py
@ -1,7 +1,7 @@
 """Parser for bytecodes.inst."""
 from dataclasses import dataclass, field
-from typing import NamedTuple, Callable, TypeVar, Literal
+from typing import NamedTuple, Callable, TypeVar, Literal, cast
 import lexer as lx
 from plexer import PLexer
@ -19,7 +19,7 @@ def contextual(func: Callable[[P], N | None]) -> Callable[[P], N | None]:
        res = func(self)
        if res is None:
            self.setpos(begin)
-            return
+            return None
        end = self.getpos()
        res.context = Context(begin, end, self)
        return res
@ -147,6 +147,7 @@ class Parser(PLexer):
            return family
        if pseudo := self.pseudo_def():
            return pseudo
        return None
    @contextual
    def inst_def(self) -> InstDef | None:
@ -166,7 +167,8 @@ class Parser(PLexer):
        # TODO: Make INST a keyword in the lexer.
        override = bool(self.expect(lx.OVERRIDE))
        register = bool(self.expect(lx.REGISTER))
-        if (tkn := self.expect(lx.IDENTIFIER)) and (kind := tkn.text) in ("inst", "op"):
+        if (tkn := self.expect(lx.IDENTIFIER)) and tkn.text in ("inst", "op"):
            kind = cast(Literal["inst", "op"], tkn.text)
            if self.expect(lx.LPAREN) and (tkn := self.expect(lx.IDENTIFIER)):
                name = tkn.text
                if self.expect(lx.COMMA):
@ -190,6 +192,7 @@ class Parser(PLexer):
        # input (',' input)*
        here = self.getpos()
        if inp := self.input():
            inp = cast(InputEffect, inp)
            near = self.getpos()
            if self.expect(lx.COMMA):
                if rest := self.inputs():
@ -232,6 +235,7 @@ class Parser(PLexer):
                    raise self.make_syntax_error(f"Expected integer, got {num!r}")
                else:
                    return CacheEffect(tkn.text, size)
        return None
    @contextual
    def stack_effect(self) -> StackEffect | None:
@ -258,6 +262,7 @@ class Parser(PLexer):
                type_text = "PyObject **"
                size_text = size.text.strip()
            return StackEffect(tkn.text, type_text, cond_text, size_text)
        return None
    @contextual
    def expression(self) -> Expression | None:
@ -288,6 +293,7 @@ class Parser(PLexer):
    def op(self) -> OpName | None:
        if tkn := self.expect(lx.IDENTIFIER):
            return OpName(tkn.text)
        return None
    @contextual
    def macro_def(self) -> Macro | None:
@ -300,16 +306,20 @@ class Parser(PLexer):
                                self.require(lx.SEMI)
                                res = Macro(tkn.text, uops)
                                return res
        return None
    def uops(self) -> list[UOp] | None:
        if uop := self.uop():
            uop = cast(UOp, uop)
            uops = [uop]
            while self.expect(lx.PLUS):
                if uop := self.uop():
                    uop = cast(UOp, uop)
                    uops.append(uop)
                else:
                    raise self.make_syntax_error("Expected op name or cache effect")
            return uops
        return None
    @contextual
    def uop(self) -> UOp | None:
@ -327,6 +337,7 @@ class Parser(PLexer):
                raise self.make_syntax_error("Expected integer")
            else:
                return OpName(tkn.text)
        return None
    @contextual
    def family_def(self) -> Family | None:
@ -385,6 +396,7 @@ class Parser(PLexer):
    def block(self) -> Block | None:
        if self.c_blob():
            return Block()
        return None
    def c_blob(self) -> list[lx.Token]:
        tokens: list[lx.Token] = []