gh-111786: Use separate opcode vars for Tier 1 and Tier 2 (#112289)

This makes Windows about 3% faster on pyperformance benchmarks.
This commit is contained in:
Michael Droettboom 2023-11-20 18:13:44 -05:00 committed by GitHub
parent 8deb8bc2e5
commit 6a00a58f60
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -689,7 +689,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
#ifdef Py_STATS #ifdef Py_STATS
int lastopcode = 0; int lastopcode = 0;
#endif #endif
int opcode; /* Current opcode */ uint8_t opcode; /* Current opcode */
int oparg; /* Current opcode argument, if any */ int oparg; /* Current opcode argument, if any */
#ifdef LLTRACE #ifdef LLTRACE
int lltrace = 0; int lltrace = 0;
@ -776,9 +776,7 @@ resume_frame:
/* Start instructions */ /* Start instructions */
#if !USE_COMPUTED_GOTOS #if !USE_COMPUTED_GOTOS
dispatch_opcode: dispatch_opcode:
// Cast to an 8-bit value to improve the code generated by MSVC switch (opcode)
// (in combination with the EXTRA_CASES macro).
switch ((uint8_t)opcode)
#endif #endif
{ {
@ -822,7 +820,7 @@ resume_frame:
#if USE_COMPUTED_GOTOS #if USE_COMPUTED_GOTOS
_unknown_opcode: _unknown_opcode:
#else #else
EXTRA_CASES // From pycore_opcode.h, a 'case' for each unused opcode EXTRA_CASES // From pycore_opcode_metadata.h, a 'case' for each unused opcode
#endif #endif
/* Tell C compilers not to hold the opcode variable in the loop. /* Tell C compilers not to hold the opcode variable in the loop.
next_instr points the current instruction without TARGET(). */ next_instr points the current instruction without TARGET(). */
@ -994,28 +992,29 @@ enter_tier_two:
OPT_STAT_INC(traces_executed); OPT_STAT_INC(traces_executed);
_PyUOpInstruction *next_uop = current_executor->trace; _PyUOpInstruction *next_uop = current_executor->trace;
uint16_t uopcode;
#ifdef Py_STATS #ifdef Py_STATS
uint64_t trace_uop_execution_counter = 0; uint64_t trace_uop_execution_counter = 0;
#endif #endif
for (;;) { for (;;) {
opcode = next_uop->opcode; uopcode = next_uop->opcode;
DPRINTF(3, DPRINTF(3,
"%4d: uop %s, oparg %d, operand %" PRIu64 ", target %d, stack_level %d\n", "%4d: uop %s, oparg %d, operand %" PRIu64 ", target %d, stack_level %d\n",
(int)(next_uop - current_executor->trace), (int)(next_uop - current_executor->trace),
_PyUopName(opcode), _PyUopName(uopcode),
next_uop->oparg, next_uop->oparg,
next_uop->operand, next_uop->operand,
next_uop->target, next_uop->target,
(int)(stack_pointer - _PyFrame_Stackbase(frame))); (int)(stack_pointer - _PyFrame_Stackbase(frame)));
next_uop++; next_uop++;
OPT_STAT_INC(uops_executed); OPT_STAT_INC(uops_executed);
UOP_STAT_INC(opcode, execution_count); UOP_STAT_INC(uopcode, execution_count);
#ifdef Py_STATS #ifdef Py_STATS
trace_uop_execution_counter++; trace_uop_execution_counter++;
#endif #endif
switch (opcode) { switch (uopcode) {
#include "executor_cases.c.h" #include "executor_cases.c.h"
@ -1053,7 +1052,7 @@ pop_1_error_tier_two:
STACK_SHRINK(1); STACK_SHRINK(1);
error_tier_two: error_tier_two:
DPRINTF(2, "Error: [Uop %d (%s), oparg %d, operand %" PRIu64 ", target %d @ %d]\n", DPRINTF(2, "Error: [Uop %d (%s), oparg %d, operand %" PRIu64 ", target %d @ %d]\n",
opcode, _PyUopName(opcode), next_uop[-1].oparg, next_uop[-1].operand, next_uop[-1].target, uopcode, _PyUopName(uopcode), next_uop[-1].oparg, next_uop[-1].operand, next_uop[-1].target,
(int)(next_uop - current_executor->trace - 1)); (int)(next_uop - current_executor->trace - 1));
OPT_HIST(trace_uop_execution_counter, trace_run_length_hist); OPT_HIST(trace_uop_execution_counter, trace_run_length_hist);
frame->return_offset = 0; // Don't leave this random frame->return_offset = 0; // Don't leave this random
@ -1066,10 +1065,10 @@ deoptimize:
// On DEOPT_IF we just repeat the last instruction. // On DEOPT_IF we just repeat the last instruction.
// This presumes nothing was popped from the stack (nor pushed). // This presumes nothing was popped from the stack (nor pushed).
DPRINTF(2, "DEOPT: [Uop %d (%s), oparg %d, operand %" PRIu64 ", target %d @ %d]\n", DPRINTF(2, "DEOPT: [Uop %d (%s), oparg %d, operand %" PRIu64 ", target %d @ %d]\n",
opcode, _PyUopName(opcode), next_uop[-1].oparg, next_uop[-1].operand, next_uop[-1].target, uopcode, _PyUopName(uopcode), next_uop[-1].oparg, next_uop[-1].operand, next_uop[-1].target,
(int)(next_uop - current_executor->trace - 1)); (int)(next_uop - current_executor->trace - 1));
OPT_HIST(trace_uop_execution_counter, trace_run_length_hist); OPT_HIST(trace_uop_execution_counter, trace_run_length_hist);
UOP_STAT_INC(opcode, miss); UOP_STAT_INC(uopcode, miss);
frame->return_offset = 0; // Dispatch to frame->instr_ptr frame->return_offset = 0; // Dispatch to frame->instr_ptr
_PyFrame_SetStackPointer(frame, stack_pointer); _PyFrame_SetStackPointer(frame, stack_pointer);
frame->instr_ptr = next_uop[-1].target + _PyCode_CODE(_PyFrame_GetCode(frame)); frame->instr_ptr = next_uop[-1].target + _PyCode_CODE(_PyFrame_GetCode(frame));