GH-104584: Assorted fixes for the optimizer API. (GH-105683)

* Add test for long loops

* Clear ENTER_EXECUTOR when deopting code objects.
This commit is contained in:
Mark Shannon 2023-06-19 10:32:20 +01:00 committed by GitHub
parent 4426279a43
commit 581619941e
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 298 additions and 204 deletions

View file

@ -2347,17 +2347,66 @@ class Test_Pep523API(unittest.TestCase):
class TestOptimizerAPI(unittest.TestCase):
def test_counter_optimizer(self):
opt = _testinternalcapi.get_counter_optimizer()
self.assertEqual(opt.get_count(), 0)
try:
@contextlib.contextmanager
def temporary_optimizer(self, opt):
_testinternalcapi.set_optimizer(opt)
self.assertEqual(opt.get_count(), 0)
for _ in range(1000):
pass
self.assertEqual(opt.get_count(), 1000)
try:
yield
finally:
_testinternalcapi.set_optimizer(None)
@contextlib.contextmanager
def clear_executors(self, func):
try:
yield
finally:
#Clear executors
func.__code__ = func.__code__.replace()
def test_get_set_optimizer(self):
self.assertEqual(_testinternalcapi.get_optimizer(), None)
opt = _testinternalcapi.get_counter_optimizer()
_testinternalcapi.set_optimizer(opt)
self.assertEqual(_testinternalcapi.get_optimizer(), opt)
_testinternalcapi.set_optimizer(None)
self.assertEqual(_testinternalcapi.get_optimizer(), None)
def test_counter_optimizer(self):
def loop():
for _ in range(1000):
pass
for repeat in range(5):
opt = _testinternalcapi.get_counter_optimizer()
with self.temporary_optimizer(opt):
self.assertEqual(opt.get_count(), 0)
with self.clear_executors(loop):
loop()
self.assertEqual(opt.get_count(), 1000)
def test_long_loop(self):
"Check that we aren't confused by EXTENDED_ARG"
def nop():
pass
def long_loop():
for _ in range(10):
nop(); nop(); nop(); nop(); nop(); nop(); nop(); nop();
nop(); nop(); nop(); nop(); nop(); nop(); nop(); nop();
nop(); nop(); nop(); nop(); nop(); nop(); nop(); nop();
nop(); nop(); nop(); nop(); nop(); nop(); nop(); nop();
nop(); nop(); nop(); nop(); nop(); nop(); nop(); nop();
nop(); nop(); nop(); nop(); nop(); nop(); nop(); nop();
nop(); nop(); nop(); nop(); nop(); nop(); nop(); nop();
opt = _testinternalcapi.get_counter_optimizer()
with self.temporary_optimizer(opt):
self.assertEqual(opt.get_count(), 0)
long_loop()
self.assertEqual(opt.get_count(), 10)
if __name__ == "__main__":
unittest.main()

View file

@ -840,6 +840,15 @@ set_optimizer(PyObject *self, PyObject *opt)
Py_RETURN_NONE;
}
static PyObject *
get_optimizer(PyObject *self, PyObject *Py_UNUSED(ignored))
{
PyObject *opt = (PyObject *)PyUnstable_GetOptimizer();
if (opt == NULL) {
Py_RETURN_NONE;
}
return opt;
}
static int _pending_callback(void *arg)
{
@ -982,6 +991,7 @@ static PyMethodDef module_functions[] = {
{"iframe_getcode", iframe_getcode, METH_O, NULL},
{"iframe_getline", iframe_getline, METH_O, NULL},
{"iframe_getlasti", iframe_getlasti, METH_O, NULL},
{"get_optimizer", get_optimizer, METH_NOARGS, NULL},
{"set_optimizer", set_optimizer, METH_O, NULL},
{"get_counter_optimizer", get_counter_optimizer, METH_NOARGS, NULL},
{"pending_threadfunc", _PyCFunction_CAST(pending_threadfunc),

View file

@ -1465,12 +1465,28 @@ PyCode_GetFreevars(PyCodeObject *code)
return _PyCode_GetFreevars(code);
}
static void
clear_executors(PyCodeObject *co)
{
for (int i = 0; i < co->co_executors->size; i++) {
Py_CLEAR(co->co_executors->executors[i]);
}
PyMem_Free(co->co_executors);
co->co_executors = NULL;
}
static void
deopt_code(PyCodeObject *code, _Py_CODEUNIT *instructions)
{
Py_ssize_t len = Py_SIZE(code);
for (int i = 0; i < len; i++) {
int opcode = _Py_GetBaseOpcode(code, i);
if (opcode == ENTER_EXECUTOR) {
_PyExecutorObject *exec = code->co_executors->executors[instructions[i].op.arg];
opcode = exec->vm_data.opcode;
instructions[i].op.arg = exec->vm_data.oparg;
}
assert(opcode != ENTER_EXECUTOR);
int caches = _PyOpcode_Caches[opcode];
instructions[i].op.code = opcode;
for (int j = 1; j <= caches; j++) {
@ -1679,10 +1695,7 @@ code_dealloc(PyCodeObject *co)
PyMem_Free(co_extra);
}
if (co->co_executors != NULL) {
for (int i = 0; i < co->co_executors->size; i++) {
Py_CLEAR(co->co_executors->executors[i]);
}
PyMem_Free(co->co_executors);
clear_executors(co);
}
Py_XDECREF(co->co_consts);
@ -2278,6 +2291,9 @@ void
_PyStaticCode_Fini(PyCodeObject *co)
{
deopt_code(co, _PyCode_CODE(co));
if (co->co_executors != NULL) {
clear_executors(co);
}
PyMem_Free(co->co_extra);
if (co->_co_cached != NULL) {
Py_CLEAR(co->_co_cached->_co_code);

View file

@ -2157,6 +2157,7 @@ dummy_func(
frame = cframe.current_frame;
goto error;
}
assert(frame == cframe.current_frame);
here[1].cache &= ((1 << OPTIMIZER_BITS_IN_COUNTER) -1);
goto resume_frame;
}
@ -2176,7 +2177,7 @@ dummy_func(
inst(ENTER_EXECUTOR, (--)) {
PyCodeObject *code = _PyFrame_GetCode(frame);
_PyExecutorObject *executor = (_PyExecutorObject *)code->co_executors->executors[oparg];
_PyExecutorObject *executor = (_PyExecutorObject *)code->co_executors->executors[oparg&255];
Py_INCREF(executor);
frame = executor->execute(executor, frame, stack_pointer);
if (frame == NULL) {

File diff suppressed because it is too large Load diff

View file

@ -9,20 +9,31 @@
#include <stdint.h>
#include <stddef.h>
/* Returns the index of the next space, or -1 if there is no
* more space. Doesn't set an exception. */
static int32_t
get_next_free_in_executor_array(PyCodeObject *code)
static bool
has_space_for_executor(PyCodeObject *code, _Py_CODEUNIT *instr)
{
if (instr->op.code == ENTER_EXECUTOR) {
return true;
}
if (code->co_executors == NULL) {
return true;
}
return code->co_executors->size < 256;
}
static int32_t
get_index_for_executor(PyCodeObject *code, _Py_CODEUNIT *instr)
{
if (instr->op.code == ENTER_EXECUTOR) {
return instr->op.arg;
}
_PyExecutorArray *old = code->co_executors;
int size = 0;
int capacity = 0;
if (old != NULL) {
size = old->size;
capacity = old->capacity;
if (capacity >= 256) {
return -1;
}
assert(size < 256);
}
assert(size <= capacity);
if (size == capacity) {
@ -40,46 +51,36 @@ get_next_free_in_executor_array(PyCodeObject *code)
code->co_executors = new;
}
assert(size < code->co_executors->capacity);
code->co_executors->size++;
return size;
}
static void
insert_executor(PyCodeObject *code, _Py_CODEUNIT *instr, int index, _PyExecutorObject *executor)
{
Py_INCREF(executor);
if (instr->op.code == ENTER_EXECUTOR) {
assert(index == instr->op.arg);
_PyExecutorObject *old = code->co_executors->executors[index];
executor->vm_data.opcode = old->vm_data.opcode;
executor->vm_data.oparg = old->vm_data.oparg;
old->vm_data.opcode = 0;
Py_INCREF(executor);
code->co_executors->executors[index] = executor;
Py_DECREF(old);
}
else {
Py_INCREF(executor);
assert(code->co_executors->size == index);
assert(code->co_executors->capacity > index);
executor->vm_data.opcode = instr->op.code;
executor->vm_data.oparg = instr->op.arg;
code->co_executors->executors[index] = executor;
assert(index < 256);
instr->op.code = ENTER_EXECUTOR;
instr->op.arg = index;
code->co_executors->size++;
}
return;
}
static int
get_executor_index(PyCodeObject *code, _Py_CODEUNIT *instr)
{
if (instr->op.code == ENTER_EXECUTOR) {
return instr->op.arg;
}
else {
return get_next_free_in_executor_array(code);
}
}
int
PyUnstable_Replace_Executor(PyCodeObject *code, _Py_CODEUNIT *instr, _PyExecutorObject *new)
{
@ -87,7 +88,7 @@ PyUnstable_Replace_Executor(PyCodeObject *code, _Py_CODEUNIT *instr, _PyExecutor
PyErr_Format(PyExc_ValueError, "No executor to replace");
return -1;
}
int index = get_executor_index(code, instr);
int index = instr->op.arg;
assert(index >= 0);
insert_executor(code, instr, index, new);
return 0;
@ -126,6 +127,8 @@ PyUnstable_GetOptimizer(void)
if (interp->optimizer == &_PyOptimizer_Default) {
return NULL;
}
assert(interp->optimizer_backedge_threshold == interp->optimizer->backedge_threshold);
assert(interp->optimizer_resume_threshold == interp->optimizer->resume_threshold);
Py_INCREF(interp->optimizer);
return interp->optimizer;
}
@ -151,23 +154,37 @@ _PyOptimizer_BackEdge(_PyInterpreterFrame *frame, _Py_CODEUNIT *src, _Py_CODEUNI
PyCodeObject *code = (PyCodeObject *)frame->f_executable;
assert(PyCode_Check(code));
PyInterpreterState *interp = PyInterpreterState_Get();
int index = get_executor_index(code, src);
if (index < 0) {
_PyFrame_SetStackPointer(frame, stack_pointer);
return frame;
if (!has_space_for_executor(code, src)) {
goto jump_to_destination;
}
_PyOptimizerObject *opt = interp->optimizer;
_PyExecutorObject *executor;
_PyExecutorObject *executor = NULL;
int err = opt->optimize(opt, code, dest, &executor);
if (err <= 0) {
assert(executor == NULL);
if (err < 0) {
return NULL;
}
_PyFrame_SetStackPointer(frame, stack_pointer);
return frame;
goto jump_to_destination;
}
int index = get_index_for_executor(code, src);
if (index < 0) {
/* Out of memory. Don't raise and assume that the
* error will show up elsewhere.
*
* If an optimizer has already produced an executor,
* it might get confused by the executor disappearing,
* but there is not much we can do about that here. */
Py_DECREF(executor);
goto jump_to_destination;
}
insert_executor(code, src, index, executor);
assert(frame->prev_instr == src);
return executor->execute(executor, frame, stack_pointer);
jump_to_destination:
frame->prev_instr = dest - 1;
_PyFrame_SetStackPointer(frame, stack_pointer);
return frame;
}
/** Test support **/