GH-98831: Implement array support in cases generator (#100912)

You can now write things like this: ``` inst(BUILD_STRING, (pieces[oparg] -- str)) { ... } inst(LIST_APPEND, (list, unused[oparg-1], v -- list, unused[oparg-1])) { ... } ``` Note that array output effects are only partially supported (they must be named `unused` or correspond to an input effect).
2025-08-19 00:00:48 +00:00 · 2023-01-17 15:59:19 -08:00 · 2023-01-17 15:59:19 -08:00 · 80e3e3423c
commit 80e3e3423c
parent f34176b77f
6 changed files with 398 additions and 177 deletions
--- a/Tools/cases_generator/generate_cases.py
+++ b/Tools/cases_generator/generate_cases.py
@ -15,14 +15,14 @@ import typing
 import parser
 from parser import StackEffect

-DEFAULT_INPUT = os.path.relpath(
-    os.path.join(os.path.dirname(__file__), "../../Python/bytecodes.c")
-)
-DEFAULT_OUTPUT = os.path.relpath(
-    os.path.join(os.path.dirname(__file__), "../../Python/generated_cases.c.h")
-)
+HERE = os.path.dirname(__file__)
+ROOT = os.path.join(HERE, "../..")
+THIS = os.path.relpath(__file__, ROOT)
+
+DEFAULT_INPUT = os.path.relpath(os.path.join(ROOT, "Python/bytecodes.c"))
+DEFAULT_OUTPUT = os.path.relpath(os.path.join(ROOT, "Python/generated_cases.c.h"))
 DEFAULT_METADATA_OUTPUT = os.path.relpath(
-    os.path.join(os.path.dirname(__file__), "../../Python/opcode_metadata.h")
+    os.path.join(ROOT, "Python/opcode_metadata.h")
 )
 BEGIN_MARKER = "// BEGIN BYTECODES //"
 END_MARKER = "// END BYTECODES //"
@ -48,6 +48,55 @@ arg_parser.add_argument(
 )


+def effect_size(effect: StackEffect) -> tuple[int, str]:
+    """Return the 'size' impact of a stack effect.
+
+    Returns a tuple (numeric, symbolic) where:
+
+    - numeric is an int giving the statically analyzable size of the effect
+    - symbolic is a string representing a variable effect (e.g. 'oparg*2')
+
+    At most one of these will be non-zero / non-empty.
+    """
+    if effect.size:
+        return 0, effect.size
+    else:
+        return 1, ""
+
+
+def maybe_parenthesize(sym: str) -> str:
+    """Add parentheses around a string if it contains an operator.
+
+    An exception is made for '*' which is common and harmless
+    in the context where the symbolic size is used.
+    """
+    if re.match(r"^[\s\w*]+$", sym):
+        return sym
+    else:
+        return f"({sym})"
+
+
+def list_effect_size(effects: list[StackEffect]) -> tuple[int, str]:
+    numeric = 0
+    symbolic: list[str] = []
+    for effect in effects:
+        diff, sym = effect_size(effect)
+        numeric += diff
+        if sym:
+            symbolic.append(maybe_parenthesize(sym))
+    return numeric, " + ".join(symbolic)
+
+
+def string_effect_size(arg: tuple[int, str]) -> str:
+    numeric, symbolic = arg
+    if numeric and symbolic:
+        return f"{numeric} + {symbolic}"
+    elif symbolic:
+        return symbolic
+    else:
+        return str(numeric)
+
+
 class Formatter:
    """Wraps an output stream with the ability to indent etc."""

@ -83,28 +132,43 @@ class Formatter:
            yield
        self.emit("}")

-    def stack_adjust(self, diff: int):
+    def stack_adjust(self, diff: int, input_effects: list[StackEffect], output_effects: list[StackEffect]):
+        # TODO: Get rid of 'diff' parameter
+        shrink, isym = list_effect_size(input_effects)
+        grow, osym = list_effect_size(output_effects)
+        diff += grow - shrink
+        if isym and isym != osym:
+            self.emit(f"STACK_SHRINK({isym});")
+        if diff < 0:
+            self.emit(f"STACK_SHRINK({-diff});")
        if diff > 0:
            self.emit(f"STACK_GROW({diff});")
-        elif diff < 0:
-            self.emit(f"STACK_SHRINK({-diff});")
+        if osym and osym != isym:
+            self.emit(f"STACK_GROW({osym});")

    def declare(self, dst: StackEffect, src: StackEffect | None):
        if dst.name == UNUSED:
            return
-        typ = f"{dst.type} " if dst.type else "PyObject *"
+        typ = f"{dst.type}" if dst.type else "PyObject *"
        init = ""
        if src:
            cast = self.cast(dst, src)
            init = f" = {cast}{src.name}"
-        self.emit(f"{typ}{dst.name}{init};")
+        sepa = "" if typ.endswith("*") else " "
+        self.emit(f"{typ}{sepa}{dst.name}{init};")

    def assign(self, dst: StackEffect, src: StackEffect):
        if src.name == UNUSED:
            return
        cast = self.cast(dst, src)
-        if m := re.match(r"^PEEK\((\d+)\)$", dst.name):
+        if m := re.match(r"^PEEK\((.*)\)$", dst.name):
            self.emit(f"POKE({m.group(1)}, {cast}{src.name});")
+        elif m := re.match(r"^&PEEK\(.*\)$", dst.name):
+            # NOTE: MOVE_ITEMS() does not actually exist.
+            # The only supported output array forms are:
+            # - unused[...]
+            # - X[...] where X[...] matches an input array exactly
+            self.emit(f"MOVE_ITEMS({dst.name}, {src.name}, {src.size});")
        elif m := re.match(r"^REG\(oparg(\d+)\)$", dst.name):
            self.emit(f"Py_XSETREF({dst.name}, {cast}{src.name});")
        else:
@ -172,7 +236,7 @@ class Instruction:
            num_dummies = (num_regs // 2) * 2 + 1 - num_regs
            fmt = "I" + "B"*num_regs + "X"*num_dummies
        else:
-            if variable_used(inst.block, "oparg"):
+            if variable_used(inst, "oparg"):
                fmt = "IB"
            else:
                fmt = "IX"
@ -200,7 +264,6 @@ class Instruction:
    def write(self, out: Formatter) -> None:
        """Write one instruction, sans prologue and epilogue."""
        # Write a static assertion that a family's cache size is correct
-
        if family := self.family:
            if self.name == family.members[0]:
                if cache_size := family.size:
@ -211,8 +274,13 @@ class Instruction:

        if not self.register:
            # Write input stack effect variable declarations and initializations
-            for i, ieffect in enumerate(reversed(self.input_effects), 1):
-                src = StackEffect(f"PEEK({i})", "")
+            ieffects = list(reversed(self.input_effects))
+            for i, ieffect in enumerate(ieffects):
+                isize = string_effect_size(list_effect_size(ieffects[:i+1]))
+                if ieffect.size:
+                    src = StackEffect(f"&PEEK({isize})", "PyObject **")
+                else:
+                    src = StackEffect(f"PEEK({isize})", "")
                out.declare(ieffect, src)
        else:
            # Write input register variable declarations and initializations
@ -236,14 +304,19 @@ class Instruction:

        if not self.register:
            # Write net stack growth/shrinkage
-            diff = len(self.output_effects) - len(self.input_effects)
-            out.stack_adjust(diff)
+            out.stack_adjust(0, self.input_effects, self.output_effects)

            # Write output stack effect assignments
-            for i, oeffect in enumerate(reversed(self.output_effects), 1):
-                if oeffect.name not in self.unmoved_names:
-                    dst = StackEffect(f"PEEK({i})", "")
-                    out.assign(dst, oeffect)
+            oeffects = list(reversed(self.output_effects))
+            for i, oeffect in enumerate(oeffects):
+                if oeffect.name in self.unmoved_names:
+                    continue
+                osize = string_effect_size(list_effect_size(oeffects[:i+1]))
+                if oeffect.size:
+                    dst = StackEffect(f"&PEEK({osize})", "PyObject **")
+                else:
+                    dst = StackEffect(f"PEEK({osize})", "")
+                out.assign(dst, oeffect)
        else:
            # Write output register assignments
            for oeffect, reg in zip(self.output_effects, self.output_registers):
@ -287,19 +360,21 @@ class Instruction:
                # The code block is responsible for DECREF()ing them.
                # NOTE: If the label doesn't exist, just add it to ceval.c.
                if not self.register:
-                    ninputs = len(self.input_effects)
                    # Don't pop common input/output effects at the bottom!
                    # These aren't DECREF'ed so they can stay.
-                    for ieff, oeff in zip(self.input_effects, self.output_effects):
-                        if ieff.name == oeff.name:
-                            ninputs -= 1
-                        else:
-                            break
+                    ieffs = list(self.input_effects)
+                    oeffs = list(self.output_effects)
+                    while ieffs and oeffs and ieffs[0] == oeffs[0]:
+                        ieffs.pop(0)
+                        oeffs.pop(0)
+                    ninputs, symbolic = list_effect_size(ieffs)
+                    if ninputs:
+                        label = f"pop_{ninputs}_{label}"
                else:
-                    ninputs = 0
-                if ninputs:
+                    symbolic = ""
+                if symbolic:
                    out.write_raw(
-                        f"{extra}{space}if ({cond}) goto pop_{ninputs}_{label};\n"
+                        f"{extra}{space}if ({cond}) {{ STACK_SHRINK({symbolic}); goto {label}; }}\n"
                    )
                else:
                    out.write_raw(f"{extra}{space}if ({cond}) goto {label};\n")
@ -635,6 +710,13 @@ class Analyzer:
        for thing in components:
            match thing:
                case Instruction() as instr:
+                    if any(eff.size for eff in instr.input_effects + instr.output_effects):
+                        # TODO: Eventually this will be needed, at least for macros.
+                        self.error(
+                            f"Instruction {instr.name!r} has variable-sized stack effect, "
+                            "which are not supported in super- or macro instructions",
+                            instr.inst,  # TODO: Pass name+location of super/macro
+                        )
                    current -= len(instr.input_effects)
                    lowest = min(lowest, current)
                    current += len(instr.output_effects)
@ -673,10 +755,8 @@ class Analyzer:

        with open(self.output_filename, "w") as f:
            # Write provenance header
-            f.write(
-                f"// This file is generated by {os.path.relpath(__file__)} --metadata\n"
-            )
-            f.write(f"// from {os.path.relpath(self.filename)}\n")
+            f.write(f"// This file is generated by {THIS} --metadata\n")
+            f.write(f"// from {os.path.relpath(self.filename, ROOT)}\n")
            f.write(f"// Do not edit!\n")

            # Create formatter; the rest of the code uses this
@ -719,8 +799,11 @@ class Analyzer:
            n_popped = n_pushed = -1
            assert not instr.register
        else:
-            n_popped = len(instr.input_effects)
-            n_pushed = len(instr.output_effects)
+            n_popped, sym_popped = list_effect_size(instr.input_effects)
+            n_pushed, sym_pushed = list_effect_size(instr.output_effects)
+            if sym_popped or sym_pushed:
+                # TODO: Record symbolic effects (how?)
+                n_popped = n_pushed = -1
            if instr.register:
                directions: list[str] = []
                directions.extend("DIR_READ" for _ in instr.input_effects)
@ -754,8 +837,8 @@ class Analyzer:
        """Write instructions to output file."""
        with open(self.output_filename, "w") as f:
            # Write provenance header
-            f.write(f"// This file is generated by {os.path.relpath(__file__)}\n")
-            f.write(f"// from {os.path.relpath(self.filename)}\n")
+            f.write(f"// This file is generated by {THIS}\n")
+            f.write(f"// from {os.path.relpath(self.filename, ROOT)}\n")
            f.write(f"// Do not edit!\n")

            # Create formatter; the rest of the code uses this
@ -848,7 +931,9 @@ class Analyzer:

            yield

-            self.out.stack_adjust(up.final_sp - up.initial_sp)
+            # TODO: Use slices of up.stack instead of numeric values
+            self.out.stack_adjust(up.final_sp - up.initial_sp, [], [])
+
            for i, var in enumerate(reversed(up.stack[: up.final_sp]), 1):
                dst = StackEffect(f"PEEK({i})", "")
                self.out.assign(dst, var)
@ -899,9 +984,9 @@ def always_exits(lines: list[str]) -> bool:
    )


-def variable_used(block: parser.Block, name: str) -> bool:
-    """Determine whether a variable with a given name is used in a block."""
-    return any(token.kind == "IDENTIFIER" and token.text == name for token in block.tokens)
+def variable_used(node: parser.Node, name: str) -> bool:
+    """Determine whether a variable with a given name is used in a node."""
+    return any(token.kind == "IDENTIFIER" and token.text == name for token in node.tokens)


 def main():