mirror of
https://github.com/python/cpython.git
synced 2025-09-18 22:50:26 +00:00
gh-104584: Emit macro expansions to opcode_metadata.h (#106163)
This produces longer traces (superblocks?). Also improved debug output (uop names are now printed instead of numeric opcodes). This would be simpler if the numeric opcode values were generated by generate_cases.py, but that's another project. Refactored some code in generate_cases.py so the essential algorithm for cache effects is only run once. (Deciding which effects are used and what the total cache size is, regardless of what's used.)
This commit is contained in:
parent
c283a0cff5
commit
11731434df
4 changed files with 153 additions and 78 deletions
|
@ -2817,10 +2817,10 @@ _PyUopExecute(_PyExecutorObject *executor, _PyInterpreterFrame *frame, PyObject
|
||||||
oparg = (int)operand;
|
oparg = (int)operand;
|
||||||
#ifdef LLTRACE
|
#ifdef LLTRACE
|
||||||
if (lltrace >= 3) {
|
if (lltrace >= 3) {
|
||||||
const char *opname = opcode < 256 ? _PyOpcode_OpName[opcode] : "";
|
const char *opname = opcode < 256 ? _PyOpcode_OpName[opcode] : _PyOpcode_uop_name[opcode];
|
||||||
int stack_level = (int)(stack_pointer - _PyFrame_Stackbase(frame));
|
int stack_level = (int)(stack_pointer - _PyFrame_Stackbase(frame));
|
||||||
fprintf(stderr, " uop %s %d, operand %" PRIu64 ", stack_level %d\n",
|
fprintf(stderr, " uop %s, operand %" PRIu64 ", stack_level %d\n",
|
||||||
opname, opcode, operand, stack_level);
|
opname, operand, stack_level);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
pc++;
|
pc++;
|
||||||
|
|
|
@ -913,6 +913,9 @@ struct opcode_macro_expansion {
|
||||||
#ifndef NEED_OPCODE_METADATA
|
#ifndef NEED_OPCODE_METADATA
|
||||||
extern const struct opcode_metadata _PyOpcode_opcode_metadata[512];
|
extern const struct opcode_metadata _PyOpcode_opcode_metadata[512];
|
||||||
extern const struct opcode_macro_expansion _PyOpcode_macro_expansion[256];
|
extern const struct opcode_macro_expansion _PyOpcode_macro_expansion[256];
|
||||||
|
#ifdef Py_DEBUG
|
||||||
|
extern const char * const _PyOpcode_uop_name[512];
|
||||||
|
#endif
|
||||||
#else
|
#else
|
||||||
const struct opcode_metadata _PyOpcode_opcode_metadata[512] = {
|
const struct opcode_metadata _PyOpcode_opcode_metadata[512] = {
|
||||||
[NOP] = { true, INSTR_FMT_IX, 0 },
|
[NOP] = { true, INSTR_FMT_IX, 0 },
|
||||||
|
@ -1131,10 +1134,18 @@ const struct opcode_macro_expansion _PyOpcode_macro_expansion[256] = {
|
||||||
[STORE_FAST] = { .nuops = 1, .uops = { { STORE_FAST, 0, 0 } } },
|
[STORE_FAST] = { .nuops = 1, .uops = { { STORE_FAST, 0, 0 } } },
|
||||||
[POP_TOP] = { .nuops = 1, .uops = { { POP_TOP, 0, 0 } } },
|
[POP_TOP] = { .nuops = 1, .uops = { { POP_TOP, 0, 0 } } },
|
||||||
[PUSH_NULL] = { .nuops = 1, .uops = { { PUSH_NULL, 0, 0 } } },
|
[PUSH_NULL] = { .nuops = 1, .uops = { { PUSH_NULL, 0, 0 } } },
|
||||||
|
[END_FOR] = { .nuops = 2, .uops = { { POP_TOP, 0, 0 }, { POP_TOP, 0, 0 } } },
|
||||||
[END_SEND] = { .nuops = 1, .uops = { { END_SEND, 0, 0 } } },
|
[END_SEND] = { .nuops = 1, .uops = { { END_SEND, 0, 0 } } },
|
||||||
[UNARY_NEGATIVE] = { .nuops = 1, .uops = { { UNARY_NEGATIVE, 0, 0 } } },
|
[UNARY_NEGATIVE] = { .nuops = 1, .uops = { { UNARY_NEGATIVE, 0, 0 } } },
|
||||||
[UNARY_NOT] = { .nuops = 1, .uops = { { UNARY_NOT, 0, 0 } } },
|
[UNARY_NOT] = { .nuops = 1, .uops = { { UNARY_NOT, 0, 0 } } },
|
||||||
[UNARY_INVERT] = { .nuops = 1, .uops = { { UNARY_INVERT, 0, 0 } } },
|
[UNARY_INVERT] = { .nuops = 1, .uops = { { UNARY_INVERT, 0, 0 } } },
|
||||||
|
[BINARY_OP_MULTIPLY_INT] = { .nuops = 2, .uops = { { _GUARD_BOTH_INT, 0, 0 }, { _BINARY_OP_MULTIPLY_INT, 0, 0 } } },
|
||||||
|
[BINARY_OP_ADD_INT] = { .nuops = 2, .uops = { { _GUARD_BOTH_INT, 0, 0 }, { _BINARY_OP_ADD_INT, 0, 0 } } },
|
||||||
|
[BINARY_OP_SUBTRACT_INT] = { .nuops = 2, .uops = { { _GUARD_BOTH_INT, 0, 0 }, { _BINARY_OP_SUBTRACT_INT, 0, 0 } } },
|
||||||
|
[BINARY_OP_MULTIPLY_FLOAT] = { .nuops = 2, .uops = { { _GUARD_BOTH_FLOAT, 0, 0 }, { _BINARY_OP_MULTIPLY_FLOAT, 0, 0 } } },
|
||||||
|
[BINARY_OP_ADD_FLOAT] = { .nuops = 2, .uops = { { _GUARD_BOTH_FLOAT, 0, 0 }, { _BINARY_OP_ADD_FLOAT, 0, 0 } } },
|
||||||
|
[BINARY_OP_SUBTRACT_FLOAT] = { .nuops = 2, .uops = { { _GUARD_BOTH_FLOAT, 0, 0 }, { _BINARY_OP_SUBTRACT_FLOAT, 0, 0 } } },
|
||||||
|
[BINARY_OP_ADD_UNICODE] = { .nuops = 2, .uops = { { _GUARD_BOTH_UNICODE, 0, 0 }, { _BINARY_OP_ADD_UNICODE, 0, 0 } } },
|
||||||
[BINARY_SLICE] = { .nuops = 1, .uops = { { BINARY_SLICE, 0, 0 } } },
|
[BINARY_SLICE] = { .nuops = 1, .uops = { { BINARY_SLICE, 0, 0 } } },
|
||||||
[STORE_SLICE] = { .nuops = 1, .uops = { { STORE_SLICE, 0, 0 } } },
|
[STORE_SLICE] = { .nuops = 1, .uops = { { STORE_SLICE, 0, 0 } } },
|
||||||
[BINARY_SUBSCR_LIST_INT] = { .nuops = 1, .uops = { { BINARY_SUBSCR_LIST_INT, 0, 0 } } },
|
[BINARY_SUBSCR_LIST_INT] = { .nuops = 1, .uops = { { BINARY_SUBSCR_LIST_INT, 0, 0 } } },
|
||||||
|
@ -1162,6 +1173,9 @@ const struct opcode_macro_expansion _PyOpcode_macro_expansion[256] = {
|
||||||
[DELETE_ATTR] = { .nuops = 1, .uops = { { DELETE_ATTR, 0, 0 } } },
|
[DELETE_ATTR] = { .nuops = 1, .uops = { { DELETE_ATTR, 0, 0 } } },
|
||||||
[STORE_GLOBAL] = { .nuops = 1, .uops = { { STORE_GLOBAL, 0, 0 } } },
|
[STORE_GLOBAL] = { .nuops = 1, .uops = { { STORE_GLOBAL, 0, 0 } } },
|
||||||
[DELETE_GLOBAL] = { .nuops = 1, .uops = { { DELETE_GLOBAL, 0, 0 } } },
|
[DELETE_GLOBAL] = { .nuops = 1, .uops = { { DELETE_GLOBAL, 0, 0 } } },
|
||||||
|
[LOAD_LOCALS] = { .nuops = 1, .uops = { { _LOAD_LOCALS, 0, 0 } } },
|
||||||
|
[LOAD_NAME] = { .nuops = 2, .uops = { { _LOAD_LOCALS, 0, 0 }, { _LOAD_FROM_DICT_OR_GLOBALS, 0, 0 } } },
|
||||||
|
[LOAD_FROM_DICT_OR_GLOBALS] = { .nuops = 1, .uops = { { _LOAD_FROM_DICT_OR_GLOBALS, 0, 0 } } },
|
||||||
[DELETE_DEREF] = { .nuops = 1, .uops = { { DELETE_DEREF, 0, 0 } } },
|
[DELETE_DEREF] = { .nuops = 1, .uops = { { DELETE_DEREF, 0, 0 } } },
|
||||||
[LOAD_FROM_DICT_OR_DEREF] = { .nuops = 1, .uops = { { LOAD_FROM_DICT_OR_DEREF, 0, 0 } } },
|
[LOAD_FROM_DICT_OR_DEREF] = { .nuops = 1, .uops = { { LOAD_FROM_DICT_OR_DEREF, 0, 0 } } },
|
||||||
[LOAD_DEREF] = { .nuops = 1, .uops = { { LOAD_DEREF, 0, 0 } } },
|
[LOAD_DEREF] = { .nuops = 1, .uops = { { LOAD_DEREF, 0, 0 } } },
|
||||||
|
@ -1207,4 +1221,22 @@ const struct opcode_macro_expansion _PyOpcode_macro_expansion[256] = {
|
||||||
[COPY] = { .nuops = 1, .uops = { { COPY, 0, 0 } } },
|
[COPY] = { .nuops = 1, .uops = { { COPY, 0, 0 } } },
|
||||||
[SWAP] = { .nuops = 1, .uops = { { SWAP, 0, 0 } } },
|
[SWAP] = { .nuops = 1, .uops = { { SWAP, 0, 0 } } },
|
||||||
};
|
};
|
||||||
|
#ifdef Py_DEBUG
|
||||||
|
const char * const _PyOpcode_uop_name[512] = {
|
||||||
|
[300] = "EXIT_TRACE",
|
||||||
|
[301] = "SET_IP",
|
||||||
|
[302] = "_GUARD_BOTH_INT",
|
||||||
|
[303] = "_BINARY_OP_MULTIPLY_INT",
|
||||||
|
[304] = "_BINARY_OP_ADD_INT",
|
||||||
|
[305] = "_BINARY_OP_SUBTRACT_INT",
|
||||||
|
[306] = "_GUARD_BOTH_FLOAT",
|
||||||
|
[307] = "_BINARY_OP_MULTIPLY_FLOAT",
|
||||||
|
[308] = "_BINARY_OP_ADD_FLOAT",
|
||||||
|
[309] = "_BINARY_OP_SUBTRACT_FLOAT",
|
||||||
|
[310] = "_GUARD_BOTH_UNICODE",
|
||||||
|
[311] = "_BINARY_OP_ADD_UNICODE",
|
||||||
|
[312] = "_LOAD_LOCALS",
|
||||||
|
[313] = "_LOAD_FROM_DICT_OR_GLOBALS",
|
||||||
|
};
|
||||||
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -325,8 +325,8 @@ translate_bytecode_to_trace(
|
||||||
}
|
}
|
||||||
#define ADD_TO_TRACE(OPCODE, OPERAND) \
|
#define ADD_TO_TRACE(OPCODE, OPERAND) \
|
||||||
if (lltrace >= 2) { \
|
if (lltrace >= 2) { \
|
||||||
const char *opname = (OPCODE) < 256 ? _PyOpcode_OpName[(OPCODE)] : ""; \
|
const char *opname = (OPCODE) < 256 ? _PyOpcode_OpName[(OPCODE)] : _PyOpcode_uop_name[(OPCODE)]; \
|
||||||
fprintf(stderr, " ADD_TO_TRACE(%s %d, %" PRIu64 ")\n", opname, (OPCODE), (uint64_t)(OPERAND)); \
|
fprintf(stderr, " ADD_TO_TRACE(%s, %" PRIu64 ")\n", opname, (uint64_t)(OPERAND)); \
|
||||||
} \
|
} \
|
||||||
trace[trace_length].opcode = (OPCODE); \
|
trace[trace_length].opcode = (OPCODE); \
|
||||||
trace[trace_length].operand = (OPERAND); \
|
trace[trace_length].operand = (OPERAND); \
|
||||||
|
@ -474,6 +474,8 @@ PyUnstable_Optimizer_NewUOpOptimizer(void)
|
||||||
}
|
}
|
||||||
opt->optimize = uop_optimize;
|
opt->optimize = uop_optimize;
|
||||||
opt->resume_threshold = UINT16_MAX;
|
opt->resume_threshold = UINT16_MAX;
|
||||||
opt->backedge_threshold = 0;
|
// Need at least 3 iterations to settle specializations.
|
||||||
|
// A few lower bits of the counter are reserved for other flags.
|
||||||
|
opt->backedge_threshold = 3 << OPTIMIZER_BITS_IN_COUNTER;
|
||||||
return (PyObject *)opt;
|
return (PyObject *)opt;
|
||||||
}
|
}
|
||||||
|
|
|
@ -300,6 +300,13 @@ class InstructionFlags:
|
||||||
f"(_PyOpcode_opcode_metadata[(OP)].flags & ({name}))")
|
f"(_PyOpcode_opcode_metadata[(OP)].flags & ({name}))")
|
||||||
|
|
||||||
|
|
||||||
|
@dataclasses.dataclass
|
||||||
|
class ActiveCacheEffect:
|
||||||
|
"""Wraps a CacheEffect that is actually used, in context."""
|
||||||
|
effect: parser.CacheEffect
|
||||||
|
offset: int
|
||||||
|
|
||||||
|
|
||||||
FORBIDDEN_NAMES_IN_UOPS = (
|
FORBIDDEN_NAMES_IN_UOPS = (
|
||||||
"resume_with_error", # Proxy for "goto", which isn't an IDENTIFIER
|
"resume_with_error", # Proxy for "goto", which isn't an IDENTIFIER
|
||||||
"unbound_local_error",
|
"unbound_local_error",
|
||||||
|
@ -344,6 +351,7 @@ class Instruction:
|
||||||
unmoved_names: frozenset[str]
|
unmoved_names: frozenset[str]
|
||||||
instr_fmt: str
|
instr_fmt: str
|
||||||
instr_flags: InstructionFlags
|
instr_flags: InstructionFlags
|
||||||
|
active_caches: list[ActiveCacheEffect]
|
||||||
|
|
||||||
# Set later
|
# Set later
|
||||||
family: parser.Family | None = None
|
family: parser.Family | None = None
|
||||||
|
@ -375,15 +383,19 @@ class Instruction:
|
||||||
|
|
||||||
self.instr_flags = InstructionFlags.fromInstruction(inst)
|
self.instr_flags = InstructionFlags.fromInstruction(inst)
|
||||||
|
|
||||||
|
self.active_caches = []
|
||||||
|
offset = 0
|
||||||
|
for effect in self.cache_effects:
|
||||||
|
if effect.name != UNUSED:
|
||||||
|
self.active_caches.append(ActiveCacheEffect(effect, offset))
|
||||||
|
offset += effect.size
|
||||||
|
|
||||||
if self.instr_flags.HAS_ARG_FLAG:
|
if self.instr_flags.HAS_ARG_FLAG:
|
||||||
fmt = "IB"
|
fmt = "IB"
|
||||||
else:
|
else:
|
||||||
fmt = "IX"
|
fmt = "IX"
|
||||||
cache = "C"
|
if offset:
|
||||||
for ce in self.cache_effects:
|
fmt += "C" + "0"*(offset-1)
|
||||||
for _ in range(ce.size):
|
|
||||||
fmt += cache
|
|
||||||
cache = "0"
|
|
||||||
self.instr_fmt = fmt
|
self.instr_fmt = fmt
|
||||||
|
|
||||||
def is_viable_uop(self) -> bool:
|
def is_viable_uop(self) -> bool:
|
||||||
|
@ -392,18 +404,11 @@ class Instruction:
|
||||||
return False
|
return False
|
||||||
if self.instr_flags.HAS_ARG_FLAG:
|
if self.instr_flags.HAS_ARG_FLAG:
|
||||||
# If the instruction uses oparg, it cannot use any caches
|
# If the instruction uses oparg, it cannot use any caches
|
||||||
for c in self.cache_effects:
|
if self.active_caches:
|
||||||
if c.name != UNUSED:
|
return False
|
||||||
return False
|
|
||||||
else:
|
else:
|
||||||
# If it doesn't use oparg, it can have one cache entry
|
# If it doesn't use oparg, it can have one cache entry
|
||||||
caches: list[parser.CacheEffect] = []
|
if len(self.active_caches) > 1:
|
||||||
cache_offset = 0
|
|
||||||
for c in self.cache_effects:
|
|
||||||
if c.name != UNUSED:
|
|
||||||
caches.append(c)
|
|
||||||
cache_offset += c.size
|
|
||||||
if len(caches) > 1:
|
|
||||||
return False
|
return False
|
||||||
for forbidden in FORBIDDEN_NAMES_IN_UOPS:
|
for forbidden in FORBIDDEN_NAMES_IN_UOPS:
|
||||||
# TODO: Don't check in '#ifdef ENABLE_SPECIALIZATION' regions
|
# TODO: Don't check in '#ifdef ENABLE_SPECIALIZATION' regions
|
||||||
|
@ -458,7 +463,7 @@ class Instruction:
|
||||||
|
|
||||||
# out.emit(f"next_instr += OPSIZE({self.inst.name}) - 1;")
|
# out.emit(f"next_instr += OPSIZE({self.inst.name}) - 1;")
|
||||||
|
|
||||||
self.write_body(out, 0, tier=tier)
|
self.write_body(out, 0, self.active_caches, tier=tier)
|
||||||
|
|
||||||
# Skip the rest if the block always exits
|
# Skip the rest if the block always exits
|
||||||
if self.always_exits:
|
if self.always_exits:
|
||||||
|
@ -492,33 +497,30 @@ class Instruction:
|
||||||
self,
|
self,
|
||||||
out: Formatter,
|
out: Formatter,
|
||||||
dedent: int,
|
dedent: int,
|
||||||
cache_adjust: int = 0,
|
active_caches: list[ActiveCacheEffect],
|
||||||
tier: Tiers = TIER_ONE,
|
tier: Tiers = TIER_ONE,
|
||||||
) -> None:
|
) -> None:
|
||||||
"""Write the instruction body."""
|
"""Write the instruction body."""
|
||||||
# Write cache effect variable declarations and initializations
|
# Write cache effect variable declarations and initializations
|
||||||
cache_offset = cache_adjust
|
for active in active_caches:
|
||||||
for ceffect in self.cache_effects:
|
ceffect = active.effect
|
||||||
if ceffect.name != UNUSED:
|
bits = ceffect.size * BITS_PER_CODE_UNIT
|
||||||
bits = ceffect.size * BITS_PER_CODE_UNIT
|
if bits == 64:
|
||||||
if bits == 64:
|
# NOTE: We assume that 64-bit data in the cache
|
||||||
# NOTE: We assume that 64-bit data in the cache
|
# is always an object pointer.
|
||||||
# is always an object pointer.
|
# If this becomes false, we need a way to specify
|
||||||
# If this becomes false, we need a way to specify
|
# syntactically what type the cache data is.
|
||||||
# syntactically what type the cache data is.
|
typ = "PyObject *"
|
||||||
typ = "PyObject *"
|
func = "read_obj"
|
||||||
func = "read_obj"
|
else:
|
||||||
else:
|
typ = f"uint{bits}_t "
|
||||||
typ = f"uint{bits}_t "
|
func = f"read_u{bits}"
|
||||||
func = f"read_u{bits}"
|
if tier == TIER_ONE:
|
||||||
if tier == TIER_ONE:
|
out.emit(
|
||||||
out.emit(
|
f"{typ}{ceffect.name} = {func}(&next_instr[{active.offset}].cache);"
|
||||||
f"{typ}{ceffect.name} = {func}(&next_instr[{cache_offset}].cache);"
|
)
|
||||||
)
|
else:
|
||||||
else:
|
out.emit(f"{typ}{ceffect.name} = operand;")
|
||||||
out.emit(f"{typ}{ceffect.name} = operand;")
|
|
||||||
cache_offset += ceffect.size
|
|
||||||
assert cache_offset == self.cache_offset + cache_adjust
|
|
||||||
|
|
||||||
# Write the body, substituting a goto for ERROR_IF() and other stuff
|
# Write the body, substituting a goto for ERROR_IF() and other stuff
|
||||||
assert dedent <= 0
|
assert dedent <= 0
|
||||||
|
@ -583,8 +585,9 @@ class Component:
|
||||||
instr: Instruction
|
instr: Instruction
|
||||||
input_mapping: StackEffectMapping
|
input_mapping: StackEffectMapping
|
||||||
output_mapping: StackEffectMapping
|
output_mapping: StackEffectMapping
|
||||||
|
active_caches: list[ActiveCacheEffect]
|
||||||
|
|
||||||
def write_body(self, out: Formatter, cache_adjust: int) -> None:
|
def write_body(self, out: Formatter) -> None:
|
||||||
with out.block(""):
|
with out.block(""):
|
||||||
input_names = {ieffect.name for _, ieffect in self.input_mapping}
|
input_names = {ieffect.name for _, ieffect in self.input_mapping}
|
||||||
for var, ieffect in self.input_mapping:
|
for var, ieffect in self.input_mapping:
|
||||||
|
@ -593,7 +596,7 @@ class Component:
|
||||||
if oeffect.name not in input_names:
|
if oeffect.name not in input_names:
|
||||||
out.declare(oeffect, None)
|
out.declare(oeffect, None)
|
||||||
|
|
||||||
self.instr.write_body(out, dedent=-4, cache_adjust=cache_adjust)
|
self.instr.write_body(out, -4, self.active_caches)
|
||||||
|
|
||||||
for var, oeffect in self.output_mapping:
|
for var, oeffect in self.output_mapping:
|
||||||
out.assign(var, oeffect)
|
out.assign(var, oeffect)
|
||||||
|
@ -611,6 +614,7 @@ class MacroInstruction:
|
||||||
instr_flags: InstructionFlags
|
instr_flags: InstructionFlags
|
||||||
macro: parser.Macro
|
macro: parser.Macro
|
||||||
parts: list[Component | parser.CacheEffect]
|
parts: list[Component | parser.CacheEffect]
|
||||||
|
cache_offset: int
|
||||||
predicted: bool = False
|
predicted: bool = False
|
||||||
|
|
||||||
|
|
||||||
|
@ -873,11 +877,11 @@ class Analyzer:
|
||||||
cache = instr.cache_offset
|
cache = instr.cache_offset
|
||||||
input = len(instr.input_effects)
|
input = len(instr.input_effects)
|
||||||
output = len(instr.output_effects)
|
output = len(instr.output_effects)
|
||||||
elif macro := self.macro_instrs.get(name):
|
elif mac := self.macro_instrs.get(name):
|
||||||
cache, input, output = 0, 0, 0
|
cache = mac.cache_offset
|
||||||
for part in macro.parts:
|
input, output = 0, 0
|
||||||
|
for part in mac.parts:
|
||||||
if isinstance(part, Component):
|
if isinstance(part, Component):
|
||||||
cache += part.instr.cache_offset
|
|
||||||
# A component may pop what the previous component pushed,
|
# A component may pop what the previous component pushed,
|
||||||
# so we offset the input/output counts by that.
|
# so we offset the input/output counts by that.
|
||||||
delta_i = len(part.instr.input_effects)
|
delta_i = len(part.instr.input_effects)
|
||||||
|
@ -885,9 +889,6 @@ class Analyzer:
|
||||||
offset = min(delta_i, output)
|
offset = min(delta_i, output)
|
||||||
input += delta_i - offset
|
input += delta_i - offset
|
||||||
output += delta_o - offset
|
output += delta_o - offset
|
||||||
else:
|
|
||||||
assert isinstance(part, parser.CacheEffect), part
|
|
||||||
cache += part.size
|
|
||||||
else:
|
else:
|
||||||
assert False, f"Unknown instruction {name!r}"
|
assert False, f"Unknown instruction {name!r}"
|
||||||
return cache, input, output
|
return cache, input, output
|
||||||
|
@ -906,29 +907,25 @@ class Analyzer:
|
||||||
stack, initial_sp = self.stack_analysis(components)
|
stack, initial_sp = self.stack_analysis(components)
|
||||||
sp = initial_sp
|
sp = initial_sp
|
||||||
parts: list[Component | parser.CacheEffect] = []
|
parts: list[Component | parser.CacheEffect] = []
|
||||||
format = "IB"
|
|
||||||
flags = InstructionFlags.newEmpty()
|
flags = InstructionFlags.newEmpty()
|
||||||
cache = "C"
|
offset = 0
|
||||||
for component in components:
|
for component in components:
|
||||||
match component:
|
match component:
|
||||||
case parser.CacheEffect() as ceffect:
|
case parser.CacheEffect() as ceffect:
|
||||||
parts.append(ceffect)
|
parts.append(ceffect)
|
||||||
for _ in range(ceffect.size):
|
offset += ceffect.size
|
||||||
format += cache
|
|
||||||
cache = "0"
|
|
||||||
case Instruction() as instr:
|
case Instruction() as instr:
|
||||||
part, sp = self.analyze_instruction(instr, stack, sp)
|
part, sp, offset = self.analyze_instruction(instr, stack, sp, offset)
|
||||||
parts.append(part)
|
parts.append(part)
|
||||||
for ce in instr.cache_effects:
|
|
||||||
for _ in range(ce.size):
|
|
||||||
format += cache
|
|
||||||
cache = "0"
|
|
||||||
flags.add(instr.instr_flags)
|
flags.add(instr.instr_flags)
|
||||||
case _:
|
case _:
|
||||||
typing.assert_never(component)
|
typing.assert_never(component)
|
||||||
final_sp = sp
|
final_sp = sp
|
||||||
|
format = "IB"
|
||||||
|
if offset:
|
||||||
|
format += "C" + "0"*(offset-1)
|
||||||
return MacroInstruction(
|
return MacroInstruction(
|
||||||
macro.name, stack, initial_sp, final_sp, format, flags, macro, parts
|
macro.name, stack, initial_sp, final_sp, format, flags, macro, parts, offset
|
||||||
)
|
)
|
||||||
|
|
||||||
def analyze_pseudo(self, pseudo: parser.Pseudo) -> PseudoInstruction:
|
def analyze_pseudo(self, pseudo: parser.Pseudo) -> PseudoInstruction:
|
||||||
|
@ -941,8 +938,8 @@ class Analyzer:
|
||||||
return PseudoInstruction(pseudo.name, targets, fmts[0], targets[0].instr_flags)
|
return PseudoInstruction(pseudo.name, targets, fmts[0], targets[0].instr_flags)
|
||||||
|
|
||||||
def analyze_instruction(
|
def analyze_instruction(
|
||||||
self, instr: Instruction, stack: list[StackEffect], sp: int
|
self, instr: Instruction, stack: list[StackEffect], sp: int, offset: int
|
||||||
) -> tuple[Component, int]:
|
) -> tuple[Component, int, int]:
|
||||||
input_mapping: StackEffectMapping = []
|
input_mapping: StackEffectMapping = []
|
||||||
for ieffect in reversed(instr.input_effects):
|
for ieffect in reversed(instr.input_effects):
|
||||||
sp -= 1
|
sp -= 1
|
||||||
|
@ -951,7 +948,12 @@ class Analyzer:
|
||||||
for oeffect in instr.output_effects:
|
for oeffect in instr.output_effects:
|
||||||
output_mapping.append((stack[sp], oeffect))
|
output_mapping.append((stack[sp], oeffect))
|
||||||
sp += 1
|
sp += 1
|
||||||
return Component(instr, input_mapping, output_mapping), sp
|
active_effects: list[ActiveCacheEffect] = []
|
||||||
|
for ceffect in instr.cache_effects:
|
||||||
|
if ceffect.name != UNUSED:
|
||||||
|
active_effects.append(ActiveCacheEffect(ceffect, offset))
|
||||||
|
offset += ceffect.size
|
||||||
|
return Component(instr, input_mapping, output_mapping, active_effects), sp, offset
|
||||||
|
|
||||||
def check_macro_components(
|
def check_macro_components(
|
||||||
self, macro: parser.Macro
|
self, macro: parser.Macro
|
||||||
|
@ -1030,7 +1032,7 @@ class Analyzer:
|
||||||
|
|
||||||
def get_stack_effect_info(
|
def get_stack_effect_info(
|
||||||
self, thing: parser.InstDef | parser.Macro | parser.Pseudo
|
self, thing: parser.InstDef | parser.Macro | parser.Pseudo
|
||||||
) -> tuple[AnyInstruction | None, str, str]:
|
) -> tuple[AnyInstruction | None, str | None, str | None]:
|
||||||
def effect_str(effects: list[StackEffect]) -> str:
|
def effect_str(effects: list[StackEffect]) -> str:
|
||||||
n_effect, sym_effect = list_effect_size(effects)
|
n_effect, sym_effect = list_effect_size(effects)
|
||||||
if sym_effect:
|
if sym_effect:
|
||||||
|
@ -1108,6 +1110,7 @@ class Analyzer:
|
||||||
continue
|
continue
|
||||||
instr, popped, pushed = self.get_stack_effect_info(thing)
|
instr, popped, pushed = self.get_stack_effect_info(thing)
|
||||||
if instr is not None:
|
if instr is not None:
|
||||||
|
assert popped is not None and pushed is not None
|
||||||
popped_data.append((instr, popped))
|
popped_data.append((instr, popped))
|
||||||
pushed_data.append((instr, pushed))
|
pushed_data.append((instr, pushed))
|
||||||
|
|
||||||
|
@ -1182,7 +1185,8 @@ class Analyzer:
|
||||||
|
|
||||||
self.write_pseudo_instrs()
|
self.write_pseudo_instrs()
|
||||||
|
|
||||||
self.write_uop_defines()
|
self.out.emit("")
|
||||||
|
self.write_uop_items(lambda name, counter: f"#define {name} {counter}")
|
||||||
|
|
||||||
self.write_stack_effect_functions()
|
self.write_stack_effect_functions()
|
||||||
|
|
||||||
|
@ -1213,6 +1217,9 @@ class Analyzer:
|
||||||
self.out.emit("#ifndef NEED_OPCODE_METADATA")
|
self.out.emit("#ifndef NEED_OPCODE_METADATA")
|
||||||
self.out.emit("extern const struct opcode_metadata _PyOpcode_opcode_metadata[512];")
|
self.out.emit("extern const struct opcode_metadata _PyOpcode_opcode_metadata[512];")
|
||||||
self.out.emit("extern const struct opcode_macro_expansion _PyOpcode_macro_expansion[256];")
|
self.out.emit("extern const struct opcode_macro_expansion _PyOpcode_macro_expansion[256];")
|
||||||
|
self.out.emit("#ifdef Py_DEBUG")
|
||||||
|
self.out.emit("extern const char * const _PyOpcode_uop_name[512];")
|
||||||
|
self.out.emit("#endif")
|
||||||
self.out.emit("#else")
|
self.out.emit("#else")
|
||||||
|
|
||||||
self.out.emit("const struct opcode_metadata _PyOpcode_opcode_metadata[512] = {")
|
self.out.emit("const struct opcode_metadata _PyOpcode_opcode_metadata[512] = {")
|
||||||
|
@ -1246,19 +1253,27 @@ class Analyzer:
|
||||||
pass
|
pass
|
||||||
case parser.InstDef(name=name):
|
case parser.InstDef(name=name):
|
||||||
instr = self.instrs[name]
|
instr = self.instrs[name]
|
||||||
|
# Since an 'op' is not a bytecode, it has no expansion
|
||||||
if instr.kind != "op" and instr.is_viable_uop():
|
if instr.kind != "op" and instr.is_viable_uop():
|
||||||
|
# Double check there aren't any used cache effects.
|
||||||
|
# If this fails, see write_macro_expansions().
|
||||||
|
assert not instr.active_caches, (instr.name, instr.cache_effects)
|
||||||
self.out.emit(
|
self.out.emit(
|
||||||
f"[{name}] = "
|
f"[{name}] = "
|
||||||
f"{{ .nuops = 1, .uops = {{ {{ {name}, 0, 0 }} }} }},"
|
f"{{ .nuops = 1, .uops = {{ {{ {name}, 0, 0 }} }} }},"
|
||||||
)
|
)
|
||||||
case parser.Macro():
|
case parser.Macro():
|
||||||
# TODO: emit expansion if all parts are viable uops
|
self.write_macro_expansions(self.macro_instrs[thing.name])
|
||||||
pass
|
|
||||||
case parser.Pseudo():
|
case parser.Pseudo():
|
||||||
pass
|
pass
|
||||||
case _:
|
case _:
|
||||||
typing.assert_never(thing)
|
typing.assert_never(thing)
|
||||||
|
|
||||||
|
self.out.emit("#ifdef Py_DEBUG")
|
||||||
|
with self.out.block("const char * const _PyOpcode_uop_name[512] =", ";"):
|
||||||
|
self.write_uop_items(lambda name, counter: f"[{counter}] = \"{name}\",")
|
||||||
|
self.out.emit("#endif")
|
||||||
|
|
||||||
self.out.emit("#endif")
|
self.out.emit("#endif")
|
||||||
|
|
||||||
with open(self.pymetadata_filename, "w") as f:
|
with open(self.pymetadata_filename, "w") as f:
|
||||||
|
@ -1300,13 +1315,12 @@ class Analyzer:
|
||||||
self.out.emit(f" ((OP) == {op}) || \\")
|
self.out.emit(f" ((OP) == {op}) || \\")
|
||||||
self.out.emit(f" 0")
|
self.out.emit(f" 0")
|
||||||
|
|
||||||
def write_uop_defines(self) -> None:
|
def write_uop_items(self, make_text: typing.Callable[[str, int], str]) -> None:
|
||||||
"""Write '#define XXX NNN' for each uop"""
|
"""Write '#define XXX NNN' for each uop"""
|
||||||
self.out.emit("")
|
counter = 300 # TODO: Avoid collision with pseudo instructions
|
||||||
counter = 300
|
|
||||||
def add(name: str) -> None:
|
def add(name: str) -> None:
|
||||||
nonlocal counter
|
nonlocal counter
|
||||||
self.out.emit(f"#define {name} {counter}")
|
self.out.emit(make_text(name, counter))
|
||||||
counter += 1
|
counter += 1
|
||||||
add("EXIT_TRACE")
|
add("EXIT_TRACE")
|
||||||
add("SET_IP")
|
add("SET_IP")
|
||||||
|
@ -1314,6 +1328,32 @@ class Analyzer:
|
||||||
if instr.kind == "op" and instr.is_viable_uop():
|
if instr.kind == "op" and instr.is_viable_uop():
|
||||||
add(instr.name)
|
add(instr.name)
|
||||||
|
|
||||||
|
def write_macro_expansions(self, mac: MacroInstruction) -> None:
|
||||||
|
"""Write the macro expansions for a macro-instruction."""
|
||||||
|
# TODO: Refactor to share code with write_cody(), is_viaible_uop(), etc.
|
||||||
|
offset = 0 # Cache effect offset
|
||||||
|
expansions: list[tuple[str, int, int]] = [] # [(name, size, offset), ...]
|
||||||
|
for part in mac.parts:
|
||||||
|
if isinstance(part, Component):
|
||||||
|
# All component instructions must be viable uops
|
||||||
|
if not part.instr.is_viable_uop():
|
||||||
|
print(f"NOTE: Part {part.instr.name} of {mac.name} is not a viable uop")
|
||||||
|
return
|
||||||
|
if part.instr.instr_flags.HAS_ARG_FLAG or not part.active_caches:
|
||||||
|
size, offset = 0, 0
|
||||||
|
else:
|
||||||
|
# If this assert triggers, is_viable_uops() lied
|
||||||
|
assert len(part.active_caches) == 1, (mac.name, part.instr.name)
|
||||||
|
cache = part.active_caches[0]
|
||||||
|
size, offset = cache.effect.size, cache.offset
|
||||||
|
expansions.append((part.instr.name, size, offset))
|
||||||
|
assert len(expansions) > 0, f"Macro {mac.name} has empty expansion?!"
|
||||||
|
pieces = [f"{{ {name}, {size}, {offset} }}" for name, size, offset in expansions]
|
||||||
|
self.out.emit(
|
||||||
|
f"[{mac.name}] = "
|
||||||
|
f"{{ .nuops = {len(expansions)}, .uops = {{ {', '.join(pieces)} }} }},"
|
||||||
|
)
|
||||||
|
|
||||||
def emit_metadata_entry(
|
def emit_metadata_entry(
|
||||||
self, name: str, fmt: str, flags: InstructionFlags
|
self, name: str, fmt: str, flags: InstructionFlags
|
||||||
) -> None:
|
) -> None:
|
||||||
|
@ -1379,6 +1419,7 @@ class Analyzer:
|
||||||
for thing in self.everything:
|
for thing in self.everything:
|
||||||
match thing:
|
match thing:
|
||||||
case OverriddenInstructionPlaceHolder():
|
case OverriddenInstructionPlaceHolder():
|
||||||
|
# TODO: Is this helpful?
|
||||||
self.write_overridden_instr_place_holder(thing)
|
self.write_overridden_instr_place_holder(thing)
|
||||||
case parser.InstDef():
|
case parser.InstDef():
|
||||||
instr = self.instrs[thing.name]
|
instr = self.instrs[thing.name]
|
||||||
|
@ -1388,7 +1429,7 @@ class Analyzer:
|
||||||
instr.write(self.out, tier=TIER_TWO)
|
instr.write(self.out, tier=TIER_TWO)
|
||||||
self.out.emit("break;")
|
self.out.emit("break;")
|
||||||
case parser.Macro():
|
case parser.Macro():
|
||||||
pass # TODO
|
pass
|
||||||
case parser.Pseudo():
|
case parser.Pseudo():
|
||||||
pass
|
pass
|
||||||
case _:
|
case _:
|
||||||
|
@ -1429,7 +1470,7 @@ class Analyzer:
|
||||||
cache_adjust += size
|
cache_adjust += size
|
||||||
case Component() as comp:
|
case Component() as comp:
|
||||||
last_instr = comp.instr
|
last_instr = comp.instr
|
||||||
comp.write_body(self.out, cache_adjust)
|
comp.write_body(self.out)
|
||||||
cache_adjust += comp.instr.cache_offset
|
cache_adjust += comp.instr.cache_offset
|
||||||
|
|
||||||
if cache_adjust:
|
if cache_adjust:
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue