mirror of
https://github.com/python/cpython.git
synced 2025-08-02 16:13:13 +00:00
GH-113853: Guarantee forward progress in executors (GH-113854)
This commit is contained in:
parent
0d8fec79ca
commit
55824d01f8
5 changed files with 120 additions and 73 deletions
|
@ -2325,12 +2325,18 @@ dummy_func(
|
|||
// Double-check that the opcode isn't instrumented or something:
|
||||
if (ucounter > threshold && this_instr->op.code == JUMP_BACKWARD) {
|
||||
OPT_STAT_INC(attempts);
|
||||
int optimized = _PyOptimizer_BackEdge(frame, this_instr, next_instr, stack_pointer);
|
||||
_Py_CODEUNIT *start = this_instr;
|
||||
/* Back up over EXTENDED_ARGs so optimizer sees the whole instruction */
|
||||
while (oparg > 255) {
|
||||
oparg >>= 8;
|
||||
start--;
|
||||
}
|
||||
int optimized = _PyOptimizer_Optimize(frame, start, stack_pointer);
|
||||
ERROR_IF(optimized < 0, error);
|
||||
if (optimized) {
|
||||
// Rewind and enter the executor:
|
||||
assert(this_instr->op.code == ENTER_EXECUTOR);
|
||||
next_instr = this_instr;
|
||||
assert(start->op.code == ENTER_EXECUTOR);
|
||||
next_instr = start;
|
||||
this_instr[1].cache &= ((1 << OPTIMIZER_BITS_IN_COUNTER) - 1);
|
||||
}
|
||||
else {
|
||||
|
@ -2370,10 +2376,12 @@ dummy_func(
|
|||
GOTO_TIER_TWO();
|
||||
}
|
||||
else {
|
||||
code->co_executors->executors[oparg & 255] = NULL;
|
||||
/* ENTER_EXECUTOR will be the first code unit of the instruction */
|
||||
assert(oparg < 256);
|
||||
code->co_executors->executors[oparg] = NULL;
|
||||
opcode = this_instr->op.code = executor->vm_data.opcode;
|
||||
this_instr->op.arg = executor->vm_data.oparg;
|
||||
oparg = (oparg & (~255)) | executor->vm_data.oparg;
|
||||
oparg = executor->vm_data.oparg;
|
||||
Py_DECREF(executor);
|
||||
next_instr = this_instr;
|
||||
DISPATCH_GOTO();
|
||||
|
|
18
Python/generated_cases.c.h
generated
18
Python/generated_cases.c.h
generated
|
@ -2384,10 +2384,12 @@
|
|||
GOTO_TIER_TWO();
|
||||
}
|
||||
else {
|
||||
code->co_executors->executors[oparg & 255] = NULL;
|
||||
/* ENTER_EXECUTOR will be the first code unit of the instruction */
|
||||
assert(oparg < 256);
|
||||
code->co_executors->executors[oparg] = NULL;
|
||||
opcode = this_instr->op.code = executor->vm_data.opcode;
|
||||
this_instr->op.arg = executor->vm_data.oparg;
|
||||
oparg = (oparg & (~255)) | executor->vm_data.oparg;
|
||||
oparg = executor->vm_data.oparg;
|
||||
Py_DECREF(executor);
|
||||
next_instr = this_instr;
|
||||
DISPATCH_GOTO();
|
||||
|
@ -3289,12 +3291,18 @@
|
|||
// Double-check that the opcode isn't instrumented or something:
|
||||
if (ucounter > threshold && this_instr->op.code == JUMP_BACKWARD) {
|
||||
OPT_STAT_INC(attempts);
|
||||
int optimized = _PyOptimizer_BackEdge(frame, this_instr, next_instr, stack_pointer);
|
||||
_Py_CODEUNIT *start = this_instr;
|
||||
/* Back up over EXTENDED_ARGs so optimizer sees the whole instruction */
|
||||
while (oparg > 255) {
|
||||
oparg >>= 8;
|
||||
start--;
|
||||
}
|
||||
int optimized = _PyOptimizer_Optimize(frame, start, stack_pointer);
|
||||
if (optimized < 0) goto error;
|
||||
if (optimized) {
|
||||
// Rewind and enter the executor:
|
||||
assert(this_instr->op.code == ENTER_EXECUTOR);
|
||||
next_instr = this_instr;
|
||||
assert(start->op.code == ENTER_EXECUTOR);
|
||||
next_instr = start;
|
||||
this_instr[1].cache &= ((1 << OPTIMIZER_BITS_IN_COUNTER) - 1);
|
||||
}
|
||||
else {
|
||||
|
|
|
@ -162,24 +162,22 @@ PyUnstable_SetOptimizer(_PyOptimizerObject *optimizer)
|
|||
}
|
||||
|
||||
int
|
||||
_PyOptimizer_BackEdge(_PyInterpreterFrame *frame, _Py_CODEUNIT *src, _Py_CODEUNIT *dest, PyObject **stack_pointer)
|
||||
_PyOptimizer_Optimize(_PyInterpreterFrame *frame, _Py_CODEUNIT *start, PyObject **stack_pointer)
|
||||
{
|
||||
assert(src->op.code == JUMP_BACKWARD);
|
||||
PyCodeObject *code = (PyCodeObject *)frame->f_executable;
|
||||
assert(PyCode_Check(code));
|
||||
PyInterpreterState *interp = _PyInterpreterState_GET();
|
||||
if (!has_space_for_executor(code, src)) {
|
||||
if (!has_space_for_executor(code, start)) {
|
||||
return 0;
|
||||
}
|
||||
_PyOptimizerObject *opt = interp->optimizer;
|
||||
_PyExecutorObject *executor = NULL;
|
||||
/* Start optimizing at the destination to guarantee forward progress */
|
||||
int err = opt->optimize(opt, code, dest, &executor, (int)(stack_pointer - _PyFrame_Stackbase(frame)));
|
||||
int err = opt->optimize(opt, code, start, &executor, (int)(stack_pointer - _PyFrame_Stackbase(frame)));
|
||||
if (err <= 0) {
|
||||
assert(executor == NULL);
|
||||
return err;
|
||||
}
|
||||
int index = get_index_for_executor(code, src);
|
||||
int index = get_index_for_executor(code, start);
|
||||
if (index < 0) {
|
||||
/* Out of memory. Don't raise and assume that the
|
||||
* error will show up elsewhere.
|
||||
|
@ -190,7 +188,7 @@ _PyOptimizer_BackEdge(_PyInterpreterFrame *frame, _Py_CODEUNIT *src, _Py_CODEUNI
|
|||
Py_DECREF(executor);
|
||||
return 0;
|
||||
}
|
||||
insert_executor(code, src, index, executor);
|
||||
insert_executor(code, start, index, executor);
|
||||
Py_DECREF(executor);
|
||||
return 1;
|
||||
}
|
||||
|
@ -316,38 +314,6 @@ BRANCH_TO_GUARD[4][2] = {
|
|||
#define CONFIDENCE_RANGE 1000
|
||||
#define CONFIDENCE_CUTOFF 333
|
||||
|
||||
/* Returns 1 on success,
|
||||
* 0 if it failed to produce a worthwhile trace,
|
||||
* and -1 on an error.
|
||||
*/
|
||||
static int
|
||||
translate_bytecode_to_trace(
|
||||
PyCodeObject *code,
|
||||
_Py_CODEUNIT *instr,
|
||||
_PyUOpInstruction *trace,
|
||||
int buffer_size,
|
||||
_PyBloomFilter *dependencies)
|
||||
{
|
||||
PyCodeObject *initial_code = code;
|
||||
_Py_BloomFilter_Add(dependencies, initial_code);
|
||||
_Py_CODEUNIT *initial_instr = instr;
|
||||
int trace_length = 0;
|
||||
int max_length = buffer_size;
|
||||
struct {
|
||||
PyCodeObject *code;
|
||||
_Py_CODEUNIT *instr;
|
||||
} trace_stack[TRACE_STACK_SIZE];
|
||||
int trace_stack_depth = 0;
|
||||
int confidence = CONFIDENCE_RANGE; // Adjusted by branch instructions
|
||||
|
||||
#ifdef Py_DEBUG
|
||||
char *python_lltrace = Py_GETENV("PYTHON_LLTRACE");
|
||||
int lltrace = 0;
|
||||
if (python_lltrace != NULL && *python_lltrace >= '0') {
|
||||
lltrace = *python_lltrace - '0'; // TODO: Parse an int and all that
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef Py_DEBUG
|
||||
#define DPRINTF(level, ...) \
|
||||
if (lltrace >= (level)) { printf(__VA_ARGS__); }
|
||||
|
@ -403,6 +369,39 @@ translate_bytecode_to_trace(
|
|||
code = trace_stack[trace_stack_depth].code; \
|
||||
instr = trace_stack[trace_stack_depth].instr;
|
||||
|
||||
/* Returns 1 on success,
|
||||
* 0 if it failed to produce a worthwhile trace,
|
||||
* and -1 on an error.
|
||||
*/
|
||||
static int
|
||||
translate_bytecode_to_trace(
|
||||
PyCodeObject *code,
|
||||
_Py_CODEUNIT *instr,
|
||||
_PyUOpInstruction *trace,
|
||||
int buffer_size,
|
||||
_PyBloomFilter *dependencies)
|
||||
{
|
||||
bool progress_needed = true;
|
||||
PyCodeObject *initial_code = code;
|
||||
_Py_BloomFilter_Add(dependencies, initial_code);
|
||||
_Py_CODEUNIT *initial_instr = instr;
|
||||
int trace_length = 0;
|
||||
int max_length = buffer_size;
|
||||
struct {
|
||||
PyCodeObject *code;
|
||||
_Py_CODEUNIT *instr;
|
||||
} trace_stack[TRACE_STACK_SIZE];
|
||||
int trace_stack_depth = 0;
|
||||
int confidence = CONFIDENCE_RANGE; // Adjusted by branch instructions
|
||||
|
||||
#ifdef Py_DEBUG
|
||||
char *python_lltrace = Py_GETENV("PYTHON_LLTRACE");
|
||||
int lltrace = 0;
|
||||
if (python_lltrace != NULL && *python_lltrace >= '0') {
|
||||
lltrace = *python_lltrace - '0'; // TODO: Parse an int and all that
|
||||
}
|
||||
#endif
|
||||
|
||||
DPRINTF(4,
|
||||
"Optimizing %s (%s:%d) at byte offset %d\n",
|
||||
PyUnicode_AsUTF8(code->co_qualname),
|
||||
|
@ -410,6 +409,7 @@ translate_bytecode_to_trace(
|
|||
code->co_firstlineno,
|
||||
2 * INSTR_IP(initial_instr, code));
|
||||
uint32_t target = 0;
|
||||
|
||||
top: // Jump here after _PUSH_FRAME or likely branches
|
||||
for (;;) {
|
||||
target = INSTR_IP(instr, code);
|
||||
|
@ -421,6 +421,15 @@ top: // Jump here after _PUSH_FRAME or likely branches
|
|||
uint32_t oparg = instr->op.arg;
|
||||
uint32_t extended = 0;
|
||||
|
||||
if (opcode == ENTER_EXECUTOR) {
|
||||
assert(oparg < 256);
|
||||
_PyExecutorObject *executor =
|
||||
(_PyExecutorObject *)code->co_executors->executors[oparg];
|
||||
opcode = executor->vm_data.opcode;
|
||||
DPRINTF(2, " * ENTER_EXECUTOR -> %s\n", _PyOpcode_OpName[opcode]);
|
||||
oparg = executor->vm_data.oparg;
|
||||
}
|
||||
|
||||
if (opcode == EXTENDED_ARG) {
|
||||
instr++;
|
||||
extended = 1;
|
||||
|
@ -431,13 +440,23 @@ top: // Jump here after _PUSH_FRAME or likely branches
|
|||
goto done;
|
||||
}
|
||||
}
|
||||
assert(opcode != ENTER_EXECUTOR && opcode != EXTENDED_ARG);
|
||||
|
||||
if (opcode == ENTER_EXECUTOR) {
|
||||
_PyExecutorObject *executor =
|
||||
(_PyExecutorObject *)code->co_executors->executors[oparg&255];
|
||||
opcode = executor->vm_data.opcode;
|
||||
DPRINTF(2, " * ENTER_EXECUTOR -> %s\n", _PyOpcode_OpName[opcode]);
|
||||
oparg = (oparg & 0xffffff00) | executor->vm_data.oparg;
|
||||
/* Special case the first instruction,
|
||||
* so that we can guarantee forward progress */
|
||||
if (progress_needed) {
|
||||
progress_needed = false;
|
||||
if (opcode == JUMP_BACKWARD || opcode == JUMP_BACKWARD_NO_INTERRUPT) {
|
||||
instr += 1 + _PyOpcode_Caches[opcode] - (int32_t)oparg;
|
||||
initial_instr = instr;
|
||||
continue;
|
||||
}
|
||||
else {
|
||||
if (OPCODE_HAS_DEOPT(opcode)) {
|
||||
opcode = _PyOpcode_Deopt[opcode];
|
||||
}
|
||||
assert(!OPCODE_HAS_DEOPT(opcode));
|
||||
}
|
||||
}
|
||||
|
||||
switch (opcode) {
|
||||
|
@ -480,7 +499,9 @@ top: // Jump here after _PUSH_FRAME or likely branches
|
|||
case JUMP_BACKWARD:
|
||||
case JUMP_BACKWARD_NO_INTERRUPT:
|
||||
{
|
||||
if (instr + 2 - oparg == initial_instr && code == initial_code) {
|
||||
_Py_CODEUNIT *target = instr + 1 + _PyOpcode_Caches[opcode] - (int)oparg;
|
||||
if (target == initial_instr) {
|
||||
/* We have looped round to the start */
|
||||
RESERVE(1);
|
||||
ADD_TO_TRACE(_JUMP_TO_TOP, 0, 0, 0);
|
||||
}
|
||||
|
@ -641,19 +662,7 @@ done:
|
|||
}
|
||||
assert(code == initial_code);
|
||||
// Skip short traces like _SET_IP, LOAD_FAST, _SET_IP, _EXIT_TRACE
|
||||
if (trace_length > 4) {
|
||||
ADD_TO_TRACE(_EXIT_TRACE, 0, 0, target);
|
||||
DPRINTF(1,
|
||||
"Created a trace for %s (%s:%d) at byte offset %d -- length %d\n",
|
||||
PyUnicode_AsUTF8(code->co_qualname),
|
||||
PyUnicode_AsUTF8(code->co_filename),
|
||||
code->co_firstlineno,
|
||||
2 * INSTR_IP(initial_instr, code),
|
||||
trace_length);
|
||||
OPT_HIST(trace_length + buffer_size - max_length, trace_length_hist);
|
||||
return 1;
|
||||
}
|
||||
else {
|
||||
if (progress_needed || trace_length < 5) {
|
||||
OPT_STAT_INC(trace_too_short);
|
||||
DPRINTF(4,
|
||||
"No trace for %s (%s:%d) at byte offset %d\n",
|
||||
|
@ -661,15 +670,25 @@ done:
|
|||
PyUnicode_AsUTF8(code->co_filename),
|
||||
code->co_firstlineno,
|
||||
2 * INSTR_IP(initial_instr, code));
|
||||
return 0;
|
||||
}
|
||||
return 0;
|
||||
ADD_TO_TRACE(_EXIT_TRACE, 0, 0, target);
|
||||
DPRINTF(1,
|
||||
"Created a trace for %s (%s:%d) at byte offset %d -- length %d\n",
|
||||
PyUnicode_AsUTF8(code->co_qualname),
|
||||
PyUnicode_AsUTF8(code->co_filename),
|
||||
code->co_firstlineno,
|
||||
2 * INSTR_IP(initial_instr, code),
|
||||
trace_length);
|
||||
OPT_HIST(trace_length + buffer_size - max_length, trace_length_hist);
|
||||
return 1;
|
||||
}
|
||||
|
||||
#undef RESERVE
|
||||
#undef RESERVE_RAW
|
||||
#undef INSTR_IP
|
||||
#undef ADD_TO_TRACE
|
||||
#undef DPRINTF
|
||||
}
|
||||
|
||||
#define UNSET_BIT(array, bit) (array[(bit)>>5] &= ~(1<<((bit)&31)))
|
||||
#define SET_BIT(array, bit) (array[(bit)>>5] |= (1<<((bit)&31)))
|
||||
|
@ -854,10 +873,20 @@ counter_optimize(
|
|||
int Py_UNUSED(curr_stackentries)
|
||||
)
|
||||
{
|
||||
int oparg = instr->op.arg;
|
||||
while (instr->op.code == EXTENDED_ARG) {
|
||||
instr++;
|
||||
oparg = (oparg << 8) | instr->op.arg;
|
||||
}
|
||||
if (instr->op.code != JUMP_BACKWARD) {
|
||||
/* Counter optimizer can only handle backward edges */
|
||||
return 0;
|
||||
}
|
||||
_Py_CODEUNIT *target = instr + 1 + _PyOpcode_Caches[JUMP_BACKWARD] - oparg;
|
||||
_PyUOpInstruction buffer[3] = {
|
||||
{ .opcode = _LOAD_CONST_INLINE_BORROW, .operand = (uintptr_t)self },
|
||||
{ .opcode = _INTERNAL_INCREMENT_OPT_COUNTER },
|
||||
{ .opcode = _EXIT_TRACE, .target = (uint32_t)(instr - _PyCode_CODE(code)) }
|
||||
{ .opcode = _EXIT_TRACE, .target = (uint32_t)(target - _PyCode_CODE(code)) }
|
||||
};
|
||||
_PyBloomFilter empty;
|
||||
_Py_BloomFilter_Init(&empty);
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue