gh-104584: Emit macro expansions to opcode_metadata.h (#106163)

This produces longer traces (superblocks?).

Also improved debug output (uop names are now printed instead of numeric opcodes). This would be simpler if the numeric opcode values were generated by generate_cases.py, but that's another project.

Refactored some code in generate_cases.py so the essential algorithm for cache effects is only run once. (Deciding which effects are used and what the total cache size is, regardless of what's used.)
This commit is contained in:
Guido van Rossum 2023-06-28 11:28:07 -07:00 committed by GitHub
parent c283a0cff5
commit 11731434df
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 153 additions and 78 deletions

View file

@ -325,8 +325,8 @@ translate_bytecode_to_trace(
}
#define ADD_TO_TRACE(OPCODE, OPERAND) \
if (lltrace >= 2) { \
const char *opname = (OPCODE) < 256 ? _PyOpcode_OpName[(OPCODE)] : ""; \
fprintf(stderr, " ADD_TO_TRACE(%s %d, %" PRIu64 ")\n", opname, (OPCODE), (uint64_t)(OPERAND)); \
const char *opname = (OPCODE) < 256 ? _PyOpcode_OpName[(OPCODE)] : _PyOpcode_uop_name[(OPCODE)]; \
fprintf(stderr, " ADD_TO_TRACE(%s, %" PRIu64 ")\n", opname, (uint64_t)(OPERAND)); \
} \
trace[trace_length].opcode = (OPCODE); \
trace[trace_length].operand = (OPERAND); \
@ -474,6 +474,8 @@ PyUnstable_Optimizer_NewUOpOptimizer(void)
}
opt->optimize = uop_optimize;
opt->resume_threshold = UINT16_MAX;
opt->backedge_threshold = 0;
// Need at least 3 iterations to settle specializations.
// A few lower bits of the counter are reserved for other flags.
opt->backedge_threshold = 3 << OPTIMIZER_BITS_IN_COUNTER;
return (PyObject *)opt;
}