gh-102654: Insert #line directives in generated_cases.c.h (#102669)

This behavior is optional, because in some extreme cases it may just make debugging harder. The tool defaults it to off, but it is on in Makefile.pre.in. Also note that this makes diffs to generated_cases.c.h noisier, since whenever you insert or delete a line in bytecodes.c, all subsequent #line directives will change.
2025-09-02 06:57:58 +00:00 · 2023-03-15 08:37:36 -07:00 · 2023-03-15 08:37:36 -07:00 · 70185de1ab
commit 70185de1ab
parent 61b9ff35cb
3 changed files with 530 additions and 16 deletions
--- a/Makefile.pre.in
+++ b/Makefile.pre.in
@ -1485,6 +1485,7 @@ regen-cases:
 	PYTHONPATH=$(srcdir)/Tools/cases_generator \
 	$(PYTHON_FOR_REGEN) \
 	    $(srcdir)/Tools/cases_generator/generate_cases.py \
 		--emit-line-directives \
 		-o $(srcdir)/Python/generated_cases.c.h.new \
 		-m $(srcdir)/Python/opcode_metadata.h.new \
 		$(srcdir)/Python/bytecodes.c
--- a/Python/generated_cases.c.h
+++ b/Python/generated_cases.c.h
--- a/Tools/cases_generator/generate_cases.py
+++ b/Tools/cases_generator/generate_cases.py
@ -13,6 +13,7 @@ import re
 import sys
 import typing
 import lexer as lx
 import parser
 from parser import StackEffect
@ -43,6 +44,9 @@ arg_parser.add_argument(
 arg_parser.add_argument(
    "-m", "--metadata", type=str, help="Generated metadata", default=DEFAULT_METADATA_OUTPUT
 )
 arg_parser.add_argument(
    "-l", "--emit-line-directives", help="Emit #line directives", action="store_true"
 )
 arg_parser.add_argument(
    "input", nargs=argparse.REMAINDER, help="Instruction definition file(s)"
 )
@ -105,13 +109,34 @@ class Formatter:
    stream: typing.TextIO
    prefix: str
    emit_line_directives: bool = False
    lineno: int  # Next line number, 1-based
    filename: str  # Slightly improved stream.filename
    nominal_lineno: int
    nominal_filename: str
-    def __init__(self, stream: typing.TextIO, indent: int) -> None:
+    def __init__(
            self, stream: typing.TextIO, indent: int, emit_line_directives: bool = False
    ) -> None:
        self.stream = stream
        self.prefix = " " * indent
        self.emit_line_directives = emit_line_directives
        self.lineno = 1
        # Make filename more user-friendly and less platform-specific
        filename = self.stream.name.replace("\\", "/")
        if filename.startswith("./"):
            filename = filename[2:]
        if filename.endswith(".new"):
            filename = filename[:-4]
        self.filename = filename
        self.nominal_lineno = 1
        self.nominal_filename = filename
    def write_raw(self, s: str) -> None:
        self.stream.write(s)
        newlines = s.count("\n")
        self.lineno += newlines
        self.nominal_lineno += newlines
    def emit(self, arg: str) -> None:
        if arg:
@ -119,6 +144,17 @@ class Formatter:
        else:
            self.write_raw("\n")
    def set_lineno(self, lineno: int, filename: str) -> None:
        if self.emit_line_directives:
            if lineno != self.nominal_lineno or filename != self.nominal_filename:
                self.emit(f'#line {lineno} "{filename}"')
                self.nominal_lineno = lineno
                self.nominal_filename = filename
    def reset_lineno(self) -> None:
        if self.lineno != self.nominal_lineno or self.filename != self.nominal_filename:
            self.set_lineno(self.lineno + 1, self.filename)
    @contextlib.contextmanager
    def indent(self):
        self.prefix += "    "
@ -199,6 +235,7 @@ class Instruction:
    block: parser.Block
    block_text: list[str]  # Block.text, less curlies, less PREDICT() calls
    predictions: list[str]  # Prediction targets (instruction names)
    block_line: int  # First line of block in original code
    # Computed by constructor
    always_exits: bool
@ -223,7 +260,7 @@ class Instruction:
        self.kind = inst.kind
        self.name = inst.name
        self.block = inst.block
-        self.block_text, self.check_eval_breaker, self.predictions = \
+        self.block_text, self.check_eval_breaker, self.predictions, self.block_line = \
            extract_block_text(self.block)
        self.always_exits = always_exits(self.block_text)
        self.cache_effects = [
@ -388,7 +425,13 @@ class Instruction:
        assert dedent <= 0
        extra = " " * -dedent
        names_to_skip = self.unmoved_names | frozenset({UNUSED, "null"})
        offset = 0
        context = self.block.context
        assert context != None
        filename = context.owner.filename
        for line in self.block_text:
            out.set_lineno(self.block_line + offset, filename)
            offset += 1
            if m := re.match(r"(\s*)ERROR_IF\((.+), (\w+)\);\s*(?://.*)?$", line):
                space, cond, label = m.groups()
                space = extra + space
@ -416,6 +459,7 @@ class Instruction:
                    out.write_raw(f"{space}if ({cond}) goto {label};\n")
            elif m := re.match(r"(\s*)DECREF_INPUTS\(\);\s*(?://.*)?$", line):
                if not self.register:
                    out.reset_lineno()
                    space = extra + m.group(1)
                    for ieff in self.input_effects:
                        if ieff.name in names_to_skip:
@ -431,6 +475,7 @@ class Instruction:
                            out.write_raw(f"{space}Py_{decref}({ieff.name});\n")
            else:
                out.write_raw(extra + line)
        out.reset_lineno()
 InstructionOrCacheEffect = Instruction | parser.CacheEffect
@ -501,6 +546,7 @@ class Analyzer:
    output_filename: str
    metadata_filename: str
    errors: int = 0
    emit_line_directives: bool = False
    def __init__(self, input_filenames: list[str], output_filename: str, metadata_filename: str):
        """Read the input file."""
@ -561,6 +607,10 @@ class Analyzer:
        with open(filename) as file:
            src = file.read()
        # Make filename more user-friendly and less platform-specific
        filename = filename.replace("\\", "/")
        if filename.startswith("./"):
            filename = filename[2:]
        psr = parser.Parser(src, filename=filename)
        # Skip until begin marker
@ -972,14 +1022,15 @@ class Analyzer:
        format_enums = [INSTR_FMT_PREFIX + format for format in sorted(all_formats)]
        with open(self.metadata_filename, "w") as f:
-            # Write provenance header
+            # Create formatter
            f.write(f"// This file is generated by {THIS}\n")
            f.write(self.from_source_files())
            f.write(f"// Do not edit!\n")
            # Create formatter; the rest of the code uses this
            self.out = Formatter(f, 0)
            # Write provenance header
            self.out.write_raw(f"// This file is generated by {THIS}\n")
            self.out.write_raw(self.from_source_files())
            self.out.write_raw(f"// Do not edit!\n")
            self.write_stack_effect_functions()
            # Write type definitions
@ -1053,13 +1104,13 @@ class Analyzer:
    def write_instructions(self) -> None:
        """Write instructions to output file."""
        with open(self.output_filename, "w") as f:
-            # Write provenance header
+            # Create formatter
-            f.write(f"// This file is generated by {THIS}\n")
+            self.out = Formatter(f, 8, self.emit_line_directives)
            f.write(self.from_source_files())
            f.write(f"// Do not edit!\n")
-            # Create formatter; the rest of the code uses this
+            # Write provenance header
-            self.out = Formatter(f, 8)
+            self.out.write_raw(f"// This file is generated by {THIS}\n")
            self.out.write_raw(self.from_source_files())
            self.out.write_raw(f"// Do not edit!\n")
            # Write and count instructions of all kinds
            n_instrs = 0
@ -1179,13 +1230,16 @@ class Analyzer:
            self.out.emit(f"DISPATCH();")
-def extract_block_text(block: parser.Block) -> tuple[list[str], bool, list[str]]:
+def extract_block_text(block: parser.Block) -> tuple[list[str], bool, list[str], int]:
    # Get lines of text with proper dedent
    blocklines = block.text.splitlines(True)
    first_token: lx.Token = block.tokens[0]  # IndexError means the context is broken
    block_line = first_token.begin[0]
    # Remove blank lines from both ends
    while blocklines and not blocklines[0].strip():
        blocklines.pop(0)
        block_line += 1
    while blocklines and not blocklines[-1].strip():
        blocklines.pop()
@ -1194,6 +1248,7 @@ def extract_block_text(block: parser.Block) -> tuple[list[str], bool, list[str]]
    assert blocklines and blocklines[-1].strip() == "}"
    blocklines.pop()
    blocklines.pop(0)
    block_line += 1
    # Remove trailing blank lines
    while blocklines and not blocklines[-1].strip():
@ -1213,7 +1268,7 @@ def extract_block_text(block: parser.Block) -> tuple[list[str], bool, list[str]]
        predictions.insert(0, m.group(1))
        blocklines.pop()
-    return blocklines, check_eval_breaker, predictions
+    return blocklines, check_eval_breaker, predictions, block_line
 def always_exits(lines: list[str]) -> bool:
@ -1250,6 +1305,8 @@ def main():
    if len(args.input) == 0:
        args.input.append(DEFAULT_INPUT)
    a = Analyzer(args.input, args.output, args.metadata)  # Raises OSError if input unreadable
    if args.emit_line_directives:
        a.emit_line_directives = True
    a.parse()  # Raises SyntaxError on failure
    a.analyze()  # Prints messages and sets a.errors on failure
    if a.errors: