GH-98831: Implement array support in cases generator (#100912)

You can now write things like this:
```
inst(BUILD_STRING, (pieces[oparg] -- str)) { ... }
inst(LIST_APPEND, (list, unused[oparg-1], v -- list, unused[oparg-1])) { ... }
```
Note that array output effects are only partially supported (they must be named `unused` or correspond to an input effect).
This commit is contained in:
Guido van Rossum 2023-01-17 15:59:19 -08:00 committed by GitHub
parent f34176b77f
commit 80e3e3423c
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 398 additions and 177 deletions

View file

@ -15,14 +15,14 @@ import typing
import parser
from parser import StackEffect
DEFAULT_INPUT = os.path.relpath(
os.path.join(os.path.dirname(__file__), "../../Python/bytecodes.c")
)
DEFAULT_OUTPUT = os.path.relpath(
os.path.join(os.path.dirname(__file__), "../../Python/generated_cases.c.h")
)
HERE = os.path.dirname(__file__)
ROOT = os.path.join(HERE, "../..")
THIS = os.path.relpath(__file__, ROOT)
DEFAULT_INPUT = os.path.relpath(os.path.join(ROOT, "Python/bytecodes.c"))
DEFAULT_OUTPUT = os.path.relpath(os.path.join(ROOT, "Python/generated_cases.c.h"))
DEFAULT_METADATA_OUTPUT = os.path.relpath(
os.path.join(os.path.dirname(__file__), "../../Python/opcode_metadata.h")
os.path.join(ROOT, "Python/opcode_metadata.h")
)
BEGIN_MARKER = "// BEGIN BYTECODES //"
END_MARKER = "// END BYTECODES //"
@ -48,6 +48,55 @@ arg_parser.add_argument(
)
def effect_size(effect: StackEffect) -> tuple[int, str]:
"""Return the 'size' impact of a stack effect.
Returns a tuple (numeric, symbolic) where:
- numeric is an int giving the statically analyzable size of the effect
- symbolic is a string representing a variable effect (e.g. 'oparg*2')
At most one of these will be non-zero / non-empty.
"""
if effect.size:
return 0, effect.size
else:
return 1, ""
def maybe_parenthesize(sym: str) -> str:
"""Add parentheses around a string if it contains an operator.
An exception is made for '*' which is common and harmless
in the context where the symbolic size is used.
"""
if re.match(r"^[\s\w*]+$", sym):
return sym
else:
return f"({sym})"
def list_effect_size(effects: list[StackEffect]) -> tuple[int, str]:
numeric = 0
symbolic: list[str] = []
for effect in effects:
diff, sym = effect_size(effect)
numeric += diff
if sym:
symbolic.append(maybe_parenthesize(sym))
return numeric, " + ".join(symbolic)
def string_effect_size(arg: tuple[int, str]) -> str:
numeric, symbolic = arg
if numeric and symbolic:
return f"{numeric} + {symbolic}"
elif symbolic:
return symbolic
else:
return str(numeric)
class Formatter:
"""Wraps an output stream with the ability to indent etc."""
@ -83,28 +132,43 @@ class Formatter:
yield
self.emit("}")
def stack_adjust(self, diff: int):
def stack_adjust(self, diff: int, input_effects: list[StackEffect], output_effects: list[StackEffect]):
# TODO: Get rid of 'diff' parameter
shrink, isym = list_effect_size(input_effects)
grow, osym = list_effect_size(output_effects)
diff += grow - shrink
if isym and isym != osym:
self.emit(f"STACK_SHRINK({isym});")
if diff < 0:
self.emit(f"STACK_SHRINK({-diff});")
if diff > 0:
self.emit(f"STACK_GROW({diff});")
elif diff < 0:
self.emit(f"STACK_SHRINK({-diff});")
if osym and osym != isym:
self.emit(f"STACK_GROW({osym});")
def declare(self, dst: StackEffect, src: StackEffect | None):
if dst.name == UNUSED:
return
typ = f"{dst.type} " if dst.type else "PyObject *"
typ = f"{dst.type}" if dst.type else "PyObject *"
init = ""
if src:
cast = self.cast(dst, src)
init = f" = {cast}{src.name}"
self.emit(f"{typ}{dst.name}{init};")
sepa = "" if typ.endswith("*") else " "
self.emit(f"{typ}{sepa}{dst.name}{init};")
def assign(self, dst: StackEffect, src: StackEffect):
if src.name == UNUSED:
return
cast = self.cast(dst, src)
if m := re.match(r"^PEEK\((\d+)\)$", dst.name):
if m := re.match(r"^PEEK\((.*)\)$", dst.name):
self.emit(f"POKE({m.group(1)}, {cast}{src.name});")
elif m := re.match(r"^&PEEK\(.*\)$", dst.name):
# NOTE: MOVE_ITEMS() does not actually exist.
# The only supported output array forms are:
# - unused[...]
# - X[...] where X[...] matches an input array exactly
self.emit(f"MOVE_ITEMS({dst.name}, {src.name}, {src.size});")
elif m := re.match(r"^REG\(oparg(\d+)\)$", dst.name):
self.emit(f"Py_XSETREF({dst.name}, {cast}{src.name});")
else:
@ -172,7 +236,7 @@ class Instruction:
num_dummies = (num_regs // 2) * 2 + 1 - num_regs
fmt = "I" + "B"*num_regs + "X"*num_dummies
else:
if variable_used(inst.block, "oparg"):
if variable_used(inst, "oparg"):
fmt = "IB"
else:
fmt = "IX"
@ -200,7 +264,6 @@ class Instruction:
def write(self, out: Formatter) -> None:
"""Write one instruction, sans prologue and epilogue."""
# Write a static assertion that a family's cache size is correct
if family := self.family:
if self.name == family.members[0]:
if cache_size := family.size:
@ -211,8 +274,13 @@ class Instruction:
if not self.register:
# Write input stack effect variable declarations and initializations
for i, ieffect in enumerate(reversed(self.input_effects), 1):
src = StackEffect(f"PEEK({i})", "")
ieffects = list(reversed(self.input_effects))
for i, ieffect in enumerate(ieffects):
isize = string_effect_size(list_effect_size(ieffects[:i+1]))
if ieffect.size:
src = StackEffect(f"&PEEK({isize})", "PyObject **")
else:
src = StackEffect(f"PEEK({isize})", "")
out.declare(ieffect, src)
else:
# Write input register variable declarations and initializations
@ -236,14 +304,19 @@ class Instruction:
if not self.register:
# Write net stack growth/shrinkage
diff = len(self.output_effects) - len(self.input_effects)
out.stack_adjust(diff)
out.stack_adjust(0, self.input_effects, self.output_effects)
# Write output stack effect assignments
for i, oeffect in enumerate(reversed(self.output_effects), 1):
if oeffect.name not in self.unmoved_names:
dst = StackEffect(f"PEEK({i})", "")
out.assign(dst, oeffect)
oeffects = list(reversed(self.output_effects))
for i, oeffect in enumerate(oeffects):
if oeffect.name in self.unmoved_names:
continue
osize = string_effect_size(list_effect_size(oeffects[:i+1]))
if oeffect.size:
dst = StackEffect(f"&PEEK({osize})", "PyObject **")
else:
dst = StackEffect(f"PEEK({osize})", "")
out.assign(dst, oeffect)
else:
# Write output register assignments
for oeffect, reg in zip(self.output_effects, self.output_registers):
@ -287,19 +360,21 @@ class Instruction:
# The code block is responsible for DECREF()ing them.
# NOTE: If the label doesn't exist, just add it to ceval.c.
if not self.register:
ninputs = len(self.input_effects)
# Don't pop common input/output effects at the bottom!
# These aren't DECREF'ed so they can stay.
for ieff, oeff in zip(self.input_effects, self.output_effects):
if ieff.name == oeff.name:
ninputs -= 1
else:
break
ieffs = list(self.input_effects)
oeffs = list(self.output_effects)
while ieffs and oeffs and ieffs[0] == oeffs[0]:
ieffs.pop(0)
oeffs.pop(0)
ninputs, symbolic = list_effect_size(ieffs)
if ninputs:
label = f"pop_{ninputs}_{label}"
else:
ninputs = 0
if ninputs:
symbolic = ""
if symbolic:
out.write_raw(
f"{extra}{space}if ({cond}) goto pop_{ninputs}_{label};\n"
f"{extra}{space}if ({cond}) {{ STACK_SHRINK({symbolic}); goto {label}; }}\n"
)
else:
out.write_raw(f"{extra}{space}if ({cond}) goto {label};\n")
@ -635,6 +710,13 @@ class Analyzer:
for thing in components:
match thing:
case Instruction() as instr:
if any(eff.size for eff in instr.input_effects + instr.output_effects):
# TODO: Eventually this will be needed, at least for macros.
self.error(
f"Instruction {instr.name!r} has variable-sized stack effect, "
"which are not supported in super- or macro instructions",
instr.inst, # TODO: Pass name+location of super/macro
)
current -= len(instr.input_effects)
lowest = min(lowest, current)
current += len(instr.output_effects)
@ -673,10 +755,8 @@ class Analyzer:
with open(self.output_filename, "w") as f:
# Write provenance header
f.write(
f"// This file is generated by {os.path.relpath(__file__)} --metadata\n"
)
f.write(f"// from {os.path.relpath(self.filename)}\n")
f.write(f"// This file is generated by {THIS} --metadata\n")
f.write(f"// from {os.path.relpath(self.filename, ROOT)}\n")
f.write(f"// Do not edit!\n")
# Create formatter; the rest of the code uses this
@ -719,8 +799,11 @@ class Analyzer:
n_popped = n_pushed = -1
assert not instr.register
else:
n_popped = len(instr.input_effects)
n_pushed = len(instr.output_effects)
n_popped, sym_popped = list_effect_size(instr.input_effects)
n_pushed, sym_pushed = list_effect_size(instr.output_effects)
if sym_popped or sym_pushed:
# TODO: Record symbolic effects (how?)
n_popped = n_pushed = -1
if instr.register:
directions: list[str] = []
directions.extend("DIR_READ" for _ in instr.input_effects)
@ -754,8 +837,8 @@ class Analyzer:
"""Write instructions to output file."""
with open(self.output_filename, "w") as f:
# Write provenance header
f.write(f"// This file is generated by {os.path.relpath(__file__)}\n")
f.write(f"// from {os.path.relpath(self.filename)}\n")
f.write(f"// This file is generated by {THIS}\n")
f.write(f"// from {os.path.relpath(self.filename, ROOT)}\n")
f.write(f"// Do not edit!\n")
# Create formatter; the rest of the code uses this
@ -848,7 +931,9 @@ class Analyzer:
yield
self.out.stack_adjust(up.final_sp - up.initial_sp)
# TODO: Use slices of up.stack instead of numeric values
self.out.stack_adjust(up.final_sp - up.initial_sp, [], [])
for i, var in enumerate(reversed(up.stack[: up.final_sp]), 1):
dst = StackEffect(f"PEEK({i})", "")
self.out.assign(dst, var)
@ -899,9 +984,9 @@ def always_exits(lines: list[str]) -> bool:
)
def variable_used(block: parser.Block, name: str) -> bool:
"""Determine whether a variable with a given name is used in a block."""
return any(token.kind == "IDENTIFIER" and token.text == name for token in block.tokens)
def variable_used(node: parser.Node, name: str) -> bool:
"""Determine whether a variable with a given name is used in a node."""
return any(token.kind == "IDENTIFIER" and token.text == name for token in node.tokens)
def main():