GH-98831: Simple input-output stack effects for bytecodes.c (#99120)

2025-08-03 08:34:29 +00:00 · 2022-11-08 08:22:56 -08:00 · 2022-11-08 08:22:56 -08:00 · f1a654648b
commit f1a654648b
parent c7065ce019
5 changed files with 384 additions and 406 deletions
--- a/Tools/cases_generator/generate_cases.py
+++ b/Tools/cases_generator/generate_cases.py
@ -5,10 +5,10 @@
 # TODO: Reuse C generation framework from deepfreeze.py?

 import argparse
-import io
 import os
 import re
 import sys
+from typing import TextIO, cast

 import parser
 from parser import InstDef  # TODO: Use parser.InstDef
@ -20,13 +20,13 @@ arg_parser.add_argument("-c", "--compare", action="store_true")
 arg_parser.add_argument("-q", "--quiet", action="store_true")


-def eopen(filename: str, mode: str = "r"):
+def eopen(filename: str, mode: str = "r") -> TextIO:
    if filename == "-":
        if "r" in mode:
            return sys.stdin
        else:
            return sys.stdout
-    return open(filename, mode)
+    return cast(TextIO, open(filename, mode))


 def parse_cases(
@ -67,42 +67,72 @@ def always_exits(block: parser.Block) -> bool:
    return line.startswith(("goto ", "return ", "DISPATCH", "GO_TO_", "Py_UNREACHABLE()"))


-def write_cases(f: io.TextIOBase, instrs: list[InstDef], supers: list[parser.Super]):
-    predictions = set()
-    for inst in instrs:
-        for target in re.findall(r"(?:PREDICT|GO_TO_INSTRUCTION)\((\w+)\)", inst.block.text):
+def write_instr(instr: InstDef, predictions: set[str], indent: str, f: TextIO, dedent: int = 0):
+    assert instr.block
+    if dedent < 0:
+        indent += " " * -dedent
+    # TODO: Is it better to count forward or backward?
+    for i, input in enumerate(reversed(instr.inputs), 1):
+        f.write(f"{indent}    PyObject *{input} = PEEK({i});\n")
+    for output in instr.outputs:
+        if output not in instr.inputs:
+            f.write(f"{indent}    PyObject *{output};\n")
+    assert instr.block is not None
+    blocklines = instr.block.to_text(dedent=dedent).splitlines(True)
+    # Remove blank lines from ends
+    while blocklines and not blocklines[0].strip():
+        blocklines.pop(0)
+    while blocklines and not blocklines[-1].strip():
+        blocklines.pop()
+    # Remove leading '{' and trailing '}'
+    assert blocklines and blocklines[0].strip() == "{"
+    assert blocklines and blocklines[-1].strip() == "}"
+    blocklines.pop()
+    blocklines.pop(0)
+    # Remove trailing blank lines
+    while blocklines and not blocklines[-1].strip():
+        blocklines.pop()
+    # Write the body
+    ninputs = len(instr.inputs or ())
+    for line in blocklines:
+        if m := re.match(r"(\s*)ERROR_IF\((.+), (\w+)\);\s*$", line):
+            space, cond, label = m.groups()
+            # ERROR_IF() must remove the inputs from the stack.
+            # The code block is responsible for DECREF()ing them.
+            if ninputs:
+                f.write(f"{space}if ({cond}) goto pop_{ninputs}_{label};\n")
+            else:
+                f.write(f"{space}if ({cond}) goto {label};\n")
+        else:
+            f.write(line)
+    noutputs = len(instr.outputs or ())
+    diff = noutputs - ninputs
+    if diff > 0:
+        f.write(f"{indent}    STACK_GROW({diff});\n")
+    elif diff < 0:
+        f.write(f"{indent}    STACK_SHRINK({-diff});\n")
+    for i, output in enumerate(reversed(instr.outputs or ()), 1):
+        if output not in (instr.inputs or ()):
+            f.write(f"{indent}    POKE({i}, {output});\n")
+    assert instr.block
+
+def write_cases(f: TextIO, instrs: list[InstDef], supers: list[parser.Super]):
+    predictions: set[str] = set()
+    for instr in instrs:
+        assert isinstance(instr, InstDef)
+        assert instr.block is not None
+        for target in re.findall(r"(?:PREDICT|GO_TO_INSTRUCTION)\((\w+)\)", instr.block.text):
            predictions.add(target)
    indent = "        "
    f.write(f"// This file is generated by {os.path.relpath(__file__)}\n")
-    f.write("// Do not edit!\n")
+    f.write(f"// Do not edit!\n")
    instr_index: dict[str, InstDef] = {}
    for instr in instrs:
-        assert isinstance(instr, InstDef)
        instr_index[instr.name] = instr
        f.write(f"\n{indent}TARGET({instr.name}) {{\n")
        if instr.name in predictions:
            f.write(f"{indent}    PREDICTED({instr.name});\n")
-        # input = ", ".join(instr.inputs)
-        # output = ", ".join(instr.outputs)
-        # f.write(f"{indent}    // {input} -- {output}\n")
-        assert instr.block
-        blocklines = instr.block.text.splitlines(True)
-        # Remove blank lines from ends
-        while blocklines and not blocklines[0].strip():
-            blocklines.pop(0)
-        while blocklines and not blocklines[-1].strip():
-            blocklines.pop()
-        # Remove leading '{' and trailing '}'
-        assert blocklines and blocklines[0].strip() == "{"
-        assert blocklines and blocklines[-1].strip() == "}"
-        blocklines.pop()
-        blocklines.pop(0)
-        # Remove trailing blank lines
-        while blocklines and not blocklines[-1].strip():
-            blocklines.pop()
-        # Write the body
-        for line in blocklines:
-            f.write(line)
+        write_instr(instr, predictions, indent, f)
        assert instr.block
        if not always_exits(instr.block):
            f.write(f"{indent}    DISPATCH();\n")
@ -114,14 +144,13 @@ def write_cases(f: io.TextIOBase, instrs: list[InstDef], supers: list[parser.Sup
        components = [instr_index[name] for name in sup.ops]
        f.write(f"\n{indent}TARGET({sup.name}) {{\n")
        for i, instr in enumerate(components):
+            assert instr.block
            if i > 0:
                f.write(f"{indent}    NEXTOPARG();\n")
                f.write(f"{indent}    next_instr++;\n")
-            text = instr.block.to_text(-4)
-            textlines = text.splitlines(True)
-            textlines = [line for line in textlines if not line.strip().startswith("PREDICTED(")]
-            text = "".join(textlines)
-            f.write(f"{indent}    {text.strip()}\n")
+            f.write(f"{indent}    {{\n")
+            write_instr(instr, predictions, indent, f, dedent=-4)
+            f.write(f"    {indent}}}\n")
        f.write(f"{indent}    DISPATCH();\n")
        f.write(f"{indent}}}\n")

--- a/Tools/cases_generator/parser.py
+++ b/Tools/cases_generator/parser.py
@ -57,11 +57,28 @@ class Block(Node):


@dataclass
-class InstDef(Node):
+class InstHeader(Node):
    name: str
-    inputs: list[str] | None
-    outputs: list[str] | None
-    block: Block | None
+    inputs: list[str]
+    outputs: list[str]
+
+
+@dataclass
+class InstDef(Node):
+    header: InstHeader
+    block: Block
+
+    @property
+    def name(self):
+        return self.header.name
+
+    @property
+    def inputs(self):
+        return self.header.inputs
+
+    @property
+    def outputs(self):
+        return self.header.outputs


@dataclass
@ -82,30 +99,42 @@ class Parser(PLexer):
    def inst_def(self) -> InstDef | None:
        if header := self.inst_header():
            if block := self.block():
-                header.block = block
-                return header
+                return InstDef(header, block)
            raise self.make_syntax_error("Expected block")
        return None

    @contextual
-    def inst_header(self):
+    def inst_header(self) -> InstHeader | None:
        # inst(NAME) | inst(NAME, (inputs -- outputs))
        # TODO: Error out when there is something unexpected.
-        # TODO: Make INST a keyword in the lexer.
+        # TODO: Make INST a keyword in the lexer.``
        if (tkn := self.expect(lx.IDENTIFIER)) and tkn.text == "inst":
            if (self.expect(lx.LPAREN)
                    and (tkn := self.expect(lx.IDENTIFIER))):
                name = tkn.text
                if self.expect(lx.COMMA):
                    inp, outp = self.stack_effect()
-                    if (self.expect(lx.RPAREN)
-                            and self.peek().kind == lx.LBRACE):
-                        return InstDef(name, inp, outp, [])
+                    if self.expect(lx.RPAREN):
+                        if ((tkn := self.peek())
+                                and tkn.kind == lx.LBRACE):
+                            self.check_overlaps(inp, outp)
+                            return InstHeader(name, inp, outp)
                elif self.expect(lx.RPAREN):
-                    return InstDef(name, None, None, [])
+                    return InstHeader(name, [], [])
        return None

-    def stack_effect(self):
+    def check_overlaps(self, inp: list[str], outp: list[str]):
+        for i, name in enumerate(inp):
+            try:
+                j = outp.index(name)
+            except ValueError:
+                continue
+            else:
+                if i != j:
+                    raise self.make_syntax_error(
+                        f"Input {name!r} at pos {i} repeated in output at different pos {j}")
+
+    def stack_effect(self) -> tuple[list[str], list[str]]:
        # '(' [inputs] '--' [outputs] ')'
        if self.expect(lx.LPAREN):
            inp = self.inputs() or []
@ -115,7 +144,7 @@ class Parser(PLexer):
                    return inp, outp
        raise self.make_syntax_error("Expected stack effect")

-    def inputs(self):
+    def inputs(self) -> list[str] | None:
        # input (, input)*
        here = self.getpos()
        if inp := self.input():
@ -128,7 +157,7 @@ class Parser(PLexer):
        self.setpos(here)
        return None

-    def input(self):
+    def input(self) -> str | None:
        # IDENTIFIER
        if (tkn := self.expect(lx.IDENTIFIER)):
            if self.expect(lx.LBRACKET):
@ -148,7 +177,7 @@ class Parser(PLexer):
            return "??"
        return None

-    def outputs(self):
+    def outputs(self) -> list[str] | None:
        # output (, output)*
        here = self.getpos()
        if outp := self.output():
@ -161,7 +190,7 @@ class Parser(PLexer):
        self.setpos(here)
        return None

-    def output(self):
+    def output(self) -> str | None:
        return self.input()  # TODO: They're not quite the same.

    @contextual
@ -176,7 +205,6 @@ class Parser(PLexer):
                                return res

    def ops(self) -> list[str] | None:
-        here = self.getpos()
        if tkn := self.expect(lx.IDENTIFIER):
            ops = [tkn.text]
            while self.expect(lx.PLUS):
@ -197,7 +225,7 @@ class Parser(PLexer):
                                    return Family(tkn.text, members)
        return None

-    def members(self):
+    def members(self) -> list[str] | None:
        here = self.getpos()
        if tkn := self.expect(lx.IDENTIFIER):
            near = self.getpos()
@ -214,8 +242,8 @@ class Parser(PLexer):
        tokens = self.c_blob()
        return Block(tokens)

-    def c_blob(self):
-        tokens = []
+    def c_blob(self) -> list[lx.Token]:
+        tokens: list[lx.Token] = []
        level = 0
        while tkn := self.next(raw=True):
            if tkn.kind in (lx.LBRACE, lx.LPAREN, lx.LBRACKET):