gh-112287: Speed up Tier 2 (uop) interpreter a little (#112286)

This makes the Tier 2 interpreter a little faster.
I calculated by about 3%,
though I hesitate to claim an exact number.

This starts by doubling the trace size limit (to 512),
making it more likely that loops fit in a trace.

The rest of the approach is to only load
`oparg` and `operand` in cases that use them.
The code generator know when these are used.

For `oparg`, it will conditionally emit
```
oparg = CURRENT_OPARG();
```
at the top of the case block.
(The `oparg` variable may be referenced multiple times
by the instructions code block, so it must be in a variable.)

For `operand`, it will use `CURRENT_OPERAND()` directly
instead of referencing the `operand` variable,
which no longer exists.
(There is only one place where this will be used.)
This commit is contained in:
Guido van Rossum 2023-11-20 11:25:32 -08:00 committed by GitHub
parent c4c63211e8
commit 8deb8bc2e5
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 131 additions and 33 deletions

View file

@ -994,21 +994,18 @@ enter_tier_two:
OPT_STAT_INC(traces_executed);
_PyUOpInstruction *next_uop = current_executor->trace;
uint64_t operand;
#ifdef Py_STATS
uint64_t trace_uop_execution_counter = 0;
#endif
for (;;) {
opcode = next_uop->opcode;
oparg = next_uop->oparg;
operand = next_uop->operand;
DPRINTF(3,
"%4d: uop %s, oparg %d, operand %" PRIu64 ", target %d, stack_level %d\n",
(int)(next_uop - current_executor->trace),
_PyUopName(opcode),
oparg,
operand,
next_uop->oparg,
next_uop->operand,
next_uop->target,
(int)(stack_pointer - _PyFrame_Stackbase(frame)));
next_uop++;
@ -1025,8 +1022,9 @@ enter_tier_two:
default:
#ifdef Py_DEBUG
{
fprintf(stderr, "Unknown uop %d, oparg %d, operand %" PRIu64 "\n",
opcode, oparg, operand);
fprintf(stderr, "Unknown uop %d, oparg %d, operand %" PRIu64 " @ %d\n",
opcode, next_uop[-1].oparg, next_uop[-1].operand,
(int)(next_uop - current_executor->trace - 1));
Py_FatalError("Unknown uop");
}
#else
@ -1055,7 +1053,7 @@ pop_1_error_tier_two:
STACK_SHRINK(1);
error_tier_two:
DPRINTF(2, "Error: [Uop %d (%s), oparg %d, operand %" PRIu64 ", target %d @ %d]\n",
opcode, _PyUopName(opcode), oparg, operand, next_uop[-1].target,
opcode, _PyUopName(opcode), next_uop[-1].oparg, next_uop[-1].operand, next_uop[-1].target,
(int)(next_uop - current_executor->trace - 1));
OPT_HIST(trace_uop_execution_counter, trace_run_length_hist);
frame->return_offset = 0; // Don't leave this random
@ -1068,7 +1066,7 @@ deoptimize:
// On DEOPT_IF we just repeat the last instruction.
// This presumes nothing was popped from the stack (nor pushed).
DPRINTF(2, "DEOPT: [Uop %d (%s), oparg %d, operand %" PRIu64 ", target %d @ %d]\n",
opcode, _PyUopName(opcode), oparg, operand, next_uop[-1].target,
opcode, _PyUopName(opcode), next_uop[-1].oparg, next_uop[-1].operand, next_uop[-1].target,
(int)(next_uop - current_executor->trace - 1));
OPT_HIST(trace_uop_execution_counter, trace_run_length_hist);
UOP_STAT_INC(opcode, miss);