mirror of
https://github.com/python/cpython.git
synced 2025-12-15 21:44:50 +00:00
gh-106581: Split CALL_PY_EXACT_ARGS into uops (#107760)
* Split `CALL_PY_EXACT_ARGS` into uops This is only the first step for doing `CALL` in Tier 2. The next step involves tracing into the called code object and back. After that we'll have to do the remaining `CALL` specialization. Finally we'll have to deal with `KW_NAMES`. Note: this moves setting `frame->return_offset` directly in front of `DISPATCH_INLINED()`, to make it easier to move it into `_PUSH_FRAME`.
This commit is contained in:
parent
665a4391e1
commit
dc8fdf5fd5
14 changed files with 412 additions and 116 deletions
|
|
@ -92,7 +92,7 @@ def variable_used_unspecialized(node: parsing.Node, name: str) -> bool:
|
|||
if text == "#if":
|
||||
if (
|
||||
i + 1 < len(node.tokens)
|
||||
and node.tokens[i + 1].text == "ENABLE_SPECIALIZATION"
|
||||
and node.tokens[i + 1].text in ("ENABLE_SPECIALIZATION", "TIER_ONE")
|
||||
):
|
||||
skipping = True
|
||||
elif text in ("#else", "#endif"):
|
||||
|
|
|
|||
|
|
@ -25,6 +25,7 @@ from instructions import (
|
|||
PseudoInstruction,
|
||||
StackEffect,
|
||||
OverriddenInstructionPlaceHolder,
|
||||
TIER_ONE,
|
||||
TIER_TWO,
|
||||
)
|
||||
import parsing
|
||||
|
|
@ -65,6 +66,7 @@ OPARG_SIZES = {
|
|||
"OPARG_CACHE_4": 4,
|
||||
"OPARG_TOP": 5,
|
||||
"OPARG_BOTTOM": 6,
|
||||
"OPARG_SAVE_IP": 7,
|
||||
}
|
||||
|
||||
INSTR_FMT_PREFIX = "INSTR_FMT_"
|
||||
|
|
@ -501,7 +503,9 @@ class Generator(Analyzer):
|
|||
if instr.kind == "inst" and instr.is_viable_uop():
|
||||
# Construct a dummy Component -- input/output mappings are not used
|
||||
part = Component(instr, instr.active_caches)
|
||||
self.write_macro_expansions(instr.name, [part])
|
||||
self.write_macro_expansions(
|
||||
instr.name, [part], instr.cache_offset
|
||||
)
|
||||
elif instr.kind == "inst" and variable_used(
|
||||
instr.inst, "oparg1"
|
||||
):
|
||||
|
|
@ -511,7 +515,9 @@ class Generator(Analyzer):
|
|||
self.write_super_expansions(instr.name)
|
||||
case parsing.Macro():
|
||||
mac = self.macro_instrs[thing.name]
|
||||
self.write_macro_expansions(mac.name, mac.parts)
|
||||
self.write_macro_expansions(
|
||||
mac.name, mac.parts, mac.cache_offset
|
||||
)
|
||||
case parsing.Pseudo():
|
||||
pass
|
||||
case _:
|
||||
|
|
@ -630,7 +636,9 @@ class Generator(Analyzer):
|
|||
if instr.kind == "op" and instr.is_viable_uop():
|
||||
add(instr.name)
|
||||
|
||||
def write_macro_expansions(self, name: str, parts: MacroParts) -> None:
|
||||
def write_macro_expansions(
|
||||
self, name: str, parts: MacroParts, cache_offset: int
|
||||
) -> None:
|
||||
"""Write the macro expansions for a macro-instruction."""
|
||||
# TODO: Refactor to share code with write_cody(), is_viaible_uop(), etc.
|
||||
offset = 0 # Cache effect offset
|
||||
|
|
@ -650,7 +658,10 @@ class Generator(Analyzer):
|
|||
)
|
||||
return
|
||||
if not part.active_caches:
|
||||
size, offset = OPARG_SIZES["OPARG_FULL"], 0
|
||||
if part.instr.name == "SAVE_IP":
|
||||
size, offset = OPARG_SIZES["OPARG_SAVE_IP"], cache_offset
|
||||
else:
|
||||
size, offset = OPARG_SIZES["OPARG_FULL"], 0
|
||||
else:
|
||||
# If this assert triggers, is_viable_uops() lied
|
||||
assert len(part.active_caches) == 1, (name, part.instr.name)
|
||||
|
|
@ -753,7 +764,9 @@ class Generator(Analyzer):
|
|||
case parsing.Macro():
|
||||
n_macros += 1
|
||||
mac = self.macro_instrs[thing.name]
|
||||
stacking.write_macro_instr(mac, self.out, self.families.get(mac.name))
|
||||
stacking.write_macro_instr(
|
||||
mac, self.out, self.families.get(mac.name)
|
||||
)
|
||||
# self.write_macro(self.macro_instrs[thing.name])
|
||||
case parsing.Pseudo():
|
||||
pass
|
||||
|
|
@ -789,7 +802,9 @@ class Generator(Analyzer):
|
|||
n_instrs += 1
|
||||
self.out.emit("")
|
||||
with self.out.block(f"case {thing.name}:"):
|
||||
instr.write(self.out, tier=TIER_TWO)
|
||||
stacking.write_single_instr(
|
||||
instr, self.out, tier=TIER_TWO
|
||||
)
|
||||
if instr.check_eval_breaker:
|
||||
self.out.emit("CHECK_EVAL_BREAKER();")
|
||||
self.out.emit("break;")
|
||||
|
|
@ -851,8 +866,13 @@ class Generator(Analyzer):
|
|||
with self.out.block(f"TARGET({name})"):
|
||||
if instr.predicted:
|
||||
self.out.emit(f"PREDICTED({name});")
|
||||
instr.write(self.out)
|
||||
self.out.static_assert_family_size(
|
||||
instr.name, instr.family, instr.cache_offset
|
||||
)
|
||||
stacking.write_single_instr(instr, self.out, tier=TIER_ONE)
|
||||
if not instr.always_exits:
|
||||
if instr.cache_offset:
|
||||
self.out.emit(f"next_instr += {instr.cache_offset};")
|
||||
if instr.check_eval_breaker:
|
||||
self.out.emit("CHECK_EVAL_BREAKER();")
|
||||
self.out.emit(f"DISPATCH();")
|
||||
|
|
|
|||
|
|
@ -59,7 +59,7 @@ class Instruction:
|
|||
block_line: int # First line of block in original code
|
||||
|
||||
# Computed by constructor
|
||||
always_exits: bool
|
||||
always_exits: str # If the block always exits, its last line; else ""
|
||||
has_deopt: bool
|
||||
cache_offset: int
|
||||
cache_effects: list[parsing.CacheEffect]
|
||||
|
|
@ -120,13 +120,13 @@ class Instruction:
|
|||
def is_viable_uop(self) -> bool:
|
||||
"""Whether this instruction is viable as a uop."""
|
||||
dprint: typing.Callable[..., None] = lambda *args, **kwargs: None
|
||||
# if self.name.startswith("CALL"):
|
||||
# dprint = print
|
||||
if "FRAME" in self.name:
|
||||
dprint = print
|
||||
|
||||
if self.name == "EXIT_TRACE":
|
||||
return True # This has 'return frame' but it's okay
|
||||
if self.always_exits:
|
||||
dprint(f"Skipping {self.name} because it always exits")
|
||||
dprint(f"Skipping {self.name} because it always exits: {self.always_exits}")
|
||||
return False
|
||||
if len(self.active_caches) > 1:
|
||||
# print(f"Skipping {self.name} because it has >1 cache entries")
|
||||
|
|
@ -140,23 +140,6 @@ class Instruction:
|
|||
res = False
|
||||
return res
|
||||
|
||||
def write(self, out: Formatter, tier: Tiers = TIER_ONE) -> None:
|
||||
"""Write one instruction, sans prologue and epilogue."""
|
||||
|
||||
# Write a static assertion that a family's cache size is correct
|
||||
out.static_assert_family_size(self.name, self.family, self.cache_offset)
|
||||
|
||||
# Write input stack effect variable declarations and initializations
|
||||
stacking.write_single_instr(self, out, tier)
|
||||
|
||||
# Skip the rest if the block always exits
|
||||
if self.always_exits:
|
||||
return
|
||||
|
||||
# Write cache effect
|
||||
if tier == TIER_ONE and self.cache_offset:
|
||||
out.emit(f"next_instr += {self.cache_offset};")
|
||||
|
||||
def write_body(
|
||||
self,
|
||||
out: Formatter,
|
||||
|
|
@ -341,16 +324,16 @@ def extract_block_text(block: parsing.Block) -> tuple[list[str], bool, int]:
|
|||
return blocklines, check_eval_breaker, block_line
|
||||
|
||||
|
||||
def always_exits(lines: list[str]) -> bool:
|
||||
def always_exits(lines: list[str]) -> str:
|
||||
"""Determine whether a block always ends in a return/goto/etc."""
|
||||
if not lines:
|
||||
return False
|
||||
return ""
|
||||
line = lines[-1].rstrip()
|
||||
# Indent must match exactly (TODO: Do something better)
|
||||
if line[:12] != " " * 12:
|
||||
return False
|
||||
return ""
|
||||
line = line[12:]
|
||||
return line.startswith(
|
||||
if line.startswith(
|
||||
(
|
||||
"goto ",
|
||||
"return ",
|
||||
|
|
@ -359,4 +342,6 @@ def always_exits(lines: list[str]) -> bool:
|
|||
"Py_UNREACHABLE()",
|
||||
"ERROR_IF(true, ",
|
||||
)
|
||||
)
|
||||
):
|
||||
return line
|
||||
return ""
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
import dataclasses
|
||||
import typing
|
||||
|
||||
from flags import variable_used_unspecialized
|
||||
from formatting import (
|
||||
Formatter,
|
||||
UNUSED,
|
||||
|
|
@ -146,6 +147,8 @@ class EffectManager:
|
|||
# Track offsets from stack pointer
|
||||
min_offset: StackOffset
|
||||
final_offset: StackOffset
|
||||
# Link to previous manager
|
||||
pred: "EffectManager | None" = None
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
|
|
@ -167,7 +170,8 @@ class EffectManager:
|
|||
self.pokes.append(StackItem(offset=self.final_offset.clone(), effect=eff))
|
||||
self.final_offset.higher(eff)
|
||||
|
||||
if pred:
|
||||
self.pred = pred
|
||||
while pred:
|
||||
# Replace push(x) + pop(y) with copy(x, y).
|
||||
# Check that the sources and destinations are disjoint.
|
||||
sources: set[str] = set()
|
||||
|
|
@ -192,6 +196,11 @@ class EffectManager:
|
|||
sources,
|
||||
destinations,
|
||||
)
|
||||
# See if we can get more copies of a earlier predecessor.
|
||||
if self.peeks and not pred.pokes and not pred.peeks:
|
||||
pred = pred.pred
|
||||
else:
|
||||
pred = None # Break
|
||||
|
||||
def adjust_deeper(self, eff: StackEffect) -> None:
|
||||
for peek in self.peeks:
|
||||
|
|
@ -295,6 +304,7 @@ def write_single_instr(
|
|||
[Component(instr, instr.active_caches)],
|
||||
out,
|
||||
tier,
|
||||
0,
|
||||
)
|
||||
except AssertionError as err:
|
||||
raise AssertionError(f"Error writing instruction {instr.name}") from err
|
||||
|
|
@ -303,37 +313,32 @@ def write_single_instr(
|
|||
def write_macro_instr(
|
||||
mac: MacroInstruction, out: Formatter, family: Family | None
|
||||
) -> None:
|
||||
parts = [part for part in mac.parts if isinstance(part, Component)]
|
||||
|
||||
cache_adjust = 0
|
||||
for part in mac.parts:
|
||||
match part:
|
||||
case CacheEffect(size=size):
|
||||
cache_adjust += size
|
||||
case Component(instr=instr):
|
||||
cache_adjust += instr.cache_offset
|
||||
case _:
|
||||
typing.assert_never(part)
|
||||
|
||||
parts = [
|
||||
part
|
||||
for part in mac.parts
|
||||
if isinstance(part, Component) and part.instr.name != "SAVE_IP"
|
||||
]
|
||||
out.emit("")
|
||||
with out.block(f"TARGET({mac.name})"):
|
||||
if mac.predicted:
|
||||
out.emit(f"PREDICTED({mac.name});")
|
||||
out.static_assert_family_size(mac.name, family, cache_adjust)
|
||||
out.static_assert_family_size(mac.name, family, mac.cache_offset)
|
||||
try:
|
||||
write_components(parts, out, TIER_ONE)
|
||||
next_instr_is_set = write_components(parts, out, TIER_ONE, mac.cache_offset)
|
||||
except AssertionError as err:
|
||||
raise AssertionError(f"Error writing macro {mac.name}") from err
|
||||
if cache_adjust:
|
||||
out.emit(f"next_instr += {cache_adjust};")
|
||||
out.emit("DISPATCH();")
|
||||
if not parts[-1].instr.always_exits and not next_instr_is_set:
|
||||
if mac.cache_offset:
|
||||
out.emit(f"next_instr += {mac.cache_offset};")
|
||||
out.emit("DISPATCH();")
|
||||
|
||||
|
||||
def write_components(
|
||||
parts: list[Component],
|
||||
out: Formatter,
|
||||
tier: Tiers,
|
||||
) -> None:
|
||||
cache_offset: int,
|
||||
) -> bool:
|
||||
managers = get_managers(parts)
|
||||
|
||||
all_vars: dict[str, StackEffect] = {}
|
||||
|
|
@ -354,6 +359,7 @@ def write_components(
|
|||
for name, eff in all_vars.items():
|
||||
out.declare(eff, None)
|
||||
|
||||
next_instr_is_set = False
|
||||
for mgr in managers:
|
||||
if len(parts) > 1:
|
||||
out.emit(f"// {mgr.instr.name}")
|
||||
|
|
@ -374,13 +380,25 @@ def write_components(
|
|||
poke.as_stack_effect(lax=True),
|
||||
)
|
||||
|
||||
if mgr.instr.name == "_PUSH_FRAME":
|
||||
# Adjust stack to min_offset (input effects materialized)
|
||||
out.stack_adjust(mgr.min_offset.deep, mgr.min_offset.high)
|
||||
# Use clone() since adjust_inverse() mutates final_offset
|
||||
mgr.adjust_inverse(mgr.final_offset.clone())
|
||||
|
||||
if mgr.instr.name == "SAVE_CURRENT_IP":
|
||||
next_instr_is_set = True
|
||||
if cache_offset:
|
||||
out.emit(f"next_instr += {cache_offset};")
|
||||
|
||||
if len(parts) == 1:
|
||||
mgr.instr.write_body(out, 0, mgr.active_caches, tier)
|
||||
else:
|
||||
with out.block(""):
|
||||
mgr.instr.write_body(out, -4, mgr.active_caches, tier)
|
||||
|
||||
if mgr is managers[-1]:
|
||||
if mgr is managers[-1] and not next_instr_is_set:
|
||||
# TODO: Explain why this adjustment is needed.
|
||||
out.stack_adjust(mgr.final_offset.deep, mgr.final_offset.high)
|
||||
# Use clone() since adjust_inverse() mutates final_offset
|
||||
mgr.adjust_inverse(mgr.final_offset.clone())
|
||||
|
|
@ -392,6 +410,8 @@ def write_components(
|
|||
poke.effect,
|
||||
)
|
||||
|
||||
return next_instr_is_set
|
||||
|
||||
|
||||
def write_single_instr_for_abstract_interp(
|
||||
instr: Instruction, out: Formatter
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue