bpo-44525: Split calls into PRECALL and CALL (GH-30011)

* Add 3 new opcodes for calls: PRECALL_METHOD, CALL_NO_KW, CALL_KW.

* Update specialization to handle new CALL opcodes.

* Specialize call to method descriptors.

* Remove old CALL opcodes: CALL_FUNCTION, CALL_METHOD, CALL_METHOD_KW, CALL_FUNCTION_KW.
This commit is contained in:
Mark Shannon 2021-12-14 18:22:44 +00:00 committed by GitHub
parent d60457a667
commit 9f8f45144b
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
15 changed files with 395 additions and 236 deletions

View file

@ -1673,6 +1673,22 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr
CFrame cframe;
/* Variables used for making calls */
PyObject *kwnames;
int nargs;
/*
* It is only between a PRECALL_METHOD instruction and the following instruction,
* that these two values can be anything other than their defaults. */
int postcall_shrink = 1;
int extra_args = 0;
#define RESET_STACK_ADJUST_FOR_CALLS \
do { \
postcall_shrink = 1; \
extra_args = 0; \
} while (0)
#define STACK_ADJUST_IS_RESET \
(postcall_shrink == 1 && extra_args == 0)
/* WARNING: Because the CFrame lives on the C stack,
* but can be accessed from a heap allocated object (tstate)
* strict stack discipline must be maintained.
@ -1815,11 +1831,6 @@ check_eval_breaker:
switch (opcode) {
#endif
/* Variables used for making calls */
PyObject *kwnames;
int nargs;
int postcall_shrink;
/* BEWARE!
It is essential that any operation that fails must goto error
and that all operation that succeed call DISPATCH() ! */
@ -4319,7 +4330,7 @@ check_eval_breaker:
if (iter == NULL)
goto error;
PREDICT(FOR_ITER);
PREDICT(CALL_FUNCTION);
PREDICT(CALL_NO_KW);
DISPATCH();
}
@ -4661,7 +4672,7 @@ check_eval_breaker:
DISPATCH();
}
TARGET(CALL_METHOD) {
TARGET(PRECALL_METHOD) {
/* Designed to work in tamdem with LOAD_METHOD. */
/* `meth` is NULL when LOAD_METHOD thinks that it's not
a method call.
@ -4689,38 +4700,26 @@ check_eval_breaker:
make it accept the `self` as a first argument.
*/
int is_method = (PEEK(oparg + 2) != NULL);
oparg += is_method;
nargs = oparg;
kwnames = NULL;
extra_args = is_method;
postcall_shrink = 2-is_method;
goto call_function;
DISPATCH();
}
TARGET(CALL_METHOD_KW) {
/* Designed to work in tandem with LOAD_METHOD. Same as CALL_METHOD
but pops TOS to get a tuple of keyword names. */
TARGET(CALL_KW) {
kwnames = POP();
int is_method = (PEEK(oparg + 2) != NULL);
oparg += is_method;
oparg += extra_args;
extra_args = 0;
nargs = oparg - (int)PyTuple_GET_SIZE(kwnames);
postcall_shrink = 2-is_method;
goto call_function;
}
TARGET(CALL_FUNCTION_KW) {
kwnames = POP();
nargs = oparg - (int)PyTuple_GET_SIZE(kwnames);
postcall_shrink = 1;
goto call_function;
}
TARGET(CALL_FUNCTION) {
PREDICTED(CALL_FUNCTION);
STAT_INC(CALL_FUNCTION, unquickened);
TARGET(CALL_NO_KW) {
PyObject *function;
nargs = oparg;
PREDICTED(CALL_NO_KW);
STAT_INC(CALL_NO_KW, unquickened);
kwnames = NULL;
postcall_shrink = 1;
oparg += extra_args;
nargs = oparg;
call_function:
function = PEEK(oparg + 1);
if (Py_TYPE(function) == &PyMethod_Type) {
@ -4748,6 +4747,7 @@ check_eval_breaker:
stack_pointer, nargs, kwnames
);
STACK_SHRINK(postcall_shrink);
RESET_STACK_ADJUST_FOR_CALLS;
// The frame has stolen all the arguments from the stack,
// so there is no need to clean them up.
Py_XDECREF(kwnames);
@ -4780,6 +4780,7 @@ check_eval_breaker:
Py_DECREF(stack_pointer[i]);
}
STACK_SHRINK(postcall_shrink);
RESET_STACK_ADJUST_FOR_CALLS;
PUSH(res);
if (res == NULL) {
goto error;
@ -4788,43 +4789,46 @@ check_eval_breaker:
DISPATCH();
}
TARGET(CALL_FUNCTION_ADAPTIVE) {
TARGET(CALL_NO_KW_ADAPTIVE) {
SpecializedCacheEntry *cache = GET_CACHE();
nargs = cache->adaptive.original_oparg;
oparg = cache->adaptive.original_oparg;
if (cache->adaptive.counter == 0) {
next_instr--;
if (_Py_Specialize_CallFunction(
int nargs = oparg+extra_args;
if (_Py_Specialize_CallNoKw(
PEEK(nargs + 1), next_instr, nargs, cache, BUILTINS()) < 0) {
goto error;
}
DISPATCH();
}
else {
STAT_INC(CALL_FUNCTION, deferred);
STAT_INC(CALL_NO_KW, deferred);
cache->adaptive.counter--;
oparg = nargs;
kwnames = NULL;
postcall_shrink = 1;
oparg += extra_args;
nargs = oparg;
goto call_function;
}
}
TARGET(CALL_FUNCTION_PY_SIMPLE) {
TARGET(CALL_NO_KW_PY_SIMPLE) {
SpecializedCacheEntry *caches = GET_CACHE();
_PyAdaptiveEntry *cache0 = &caches[0].adaptive;
int argcount = cache0->original_oparg;
int argcount = cache0->original_oparg + extra_args;
DEOPT_IF(argcount != cache0->index, CALL_NO_KW);
_PyCallCache *cache1 = &caches[-1].call;
PyObject *callable = PEEK(argcount+1);
DEOPT_IF(!PyFunction_Check(callable), CALL_FUNCTION);
DEOPT_IF(!PyFunction_Check(callable), CALL_NO_KW);
PyFunctionObject *func = (PyFunctionObject *)callable;
DEOPT_IF(func->func_version != cache1->func_version, CALL_FUNCTION);
DEOPT_IF(func->func_version != cache1->func_version, CALL_NO_KW);
/* PEP 523 */
DEOPT_IF(tstate->interp->eval_frame != NULL, CALL_FUNCTION);
STAT_INC(CALL_FUNCTION, hit);
DEOPT_IF(tstate->interp->eval_frame != NULL, CALL_NO_KW);
STAT_INC(CALL_NO_KW, hit);
PyCodeObject *code = (PyCodeObject *)func->func_code;
size_t size = code->co_nlocalsplus + code->co_stacksize + FRAME_SPECIALS_SIZE;
InterpreterFrame *new_frame = _PyThreadState_BumpFramePointer(tstate, size);
if (new_frame == NULL) {
RESET_STACK_ADJUST_FOR_CALLS;
goto error;
}
_PyFrame_InitializeSpecials(new_frame, func,
@ -4842,7 +4846,8 @@ check_eval_breaker:
for (int i = argcount+deflen; i < code->co_nlocalsplus; i++) {
new_frame->localsplus[i] = NULL;
}
STACK_SHRINK(1);
STACK_SHRINK(postcall_shrink);
RESET_STACK_ADJUST_FOR_CALLS;
Py_DECREF(func);
_PyFrame_SetStackPointer(frame, stack_pointer);
new_frame->previous = frame;
@ -4851,14 +4856,15 @@ check_eval_breaker:
goto start_frame;
}
TARGET(CALL_FUNCTION_BUILTIN_O) {
TARGET(CALL_NO_KW_BUILTIN_O) {
assert(cframe.use_tracing == 0);
assert(STACK_ADJUST_IS_RESET);
/* Builtin METH_O functions */
PyObject *callable = SECOND();
DEOPT_IF(!PyCFunction_CheckExact(callable), CALL_FUNCTION);
DEOPT_IF(PyCFunction_GET_FLAGS(callable) != METH_O, CALL_FUNCTION);
STAT_INC(CALL_FUNCTION, hit);
DEOPT_IF(!PyCFunction_CheckExact(callable), CALL_NO_KW);
DEOPT_IF(PyCFunction_GET_FLAGS(callable) != METH_O, CALL_NO_KW);
STAT_INC(CALL_NO_KW, hit);
PyCFunction cfunc = PyCFunction_GET_FUNCTION(callable);
// This is slower but CPython promises to check all non-vectorcall
@ -4881,18 +4887,19 @@ check_eval_breaker:
DISPATCH();
}
TARGET(CALL_FUNCTION_BUILTIN_FAST) {
TARGET(CALL_NO_KW_BUILTIN_FAST) {
assert(cframe.use_tracing == 0);
assert(STACK_ADJUST_IS_RESET);
/* Builtin METH_FASTCALL functions, without keywords */
SpecializedCacheEntry *caches = GET_CACHE();
_PyAdaptiveEntry *cache0 = &caches[0].adaptive;
int nargs = cache0->original_oparg;
PyObject **pfunc = &PEEK(nargs + 1);
PyObject *callable = *pfunc;
DEOPT_IF(!PyCFunction_CheckExact(callable), CALL_FUNCTION);
DEOPT_IF(!PyCFunction_CheckExact(callable), CALL_NO_KW);
DEOPT_IF(PyCFunction_GET_FLAGS(callable) != METH_FASTCALL,
CALL_FUNCTION);
STAT_INC(CALL_FUNCTION, hit);
CALL_NO_KW);
STAT_INC(CALL_NO_KW, hit);
PyCFunction cfunc = PyCFunction_GET_FUNCTION(callable);
/* res = func(self, args, nargs) */
@ -4919,16 +4926,17 @@ check_eval_breaker:
DISPATCH();
}
TARGET(CALL_FUNCTION_LEN) {
TARGET(CALL_NO_KW_LEN) {
assert(cframe.use_tracing == 0);
assert(STACK_ADJUST_IS_RESET);
/* len(o) */
SpecializedCacheEntry *caches = GET_CACHE();
assert(caches[0].adaptive.original_oparg == 1);
_PyObjectCache *cache1 = &caches[-1].obj;
PyObject *callable = SECOND();
DEOPT_IF(callable != cache1->obj, CALL_FUNCTION);
STAT_INC(CALL_FUNCTION, hit);
DEOPT_IF(callable != cache1->obj, CALL_NO_KW);
STAT_INC(CALL_NO_KW, hit);
Py_ssize_t len_i = PyObject_Length(TOP());
if (len_i < 0) {
@ -4947,16 +4955,17 @@ check_eval_breaker:
DISPATCH();
}
TARGET(CALL_FUNCTION_ISINSTANCE) {
TARGET(CALL_NO_KW_ISINSTANCE) {
assert(cframe.use_tracing == 0);
assert(STACK_ADJUST_IS_RESET);
/* isinstance(o, o2) */
SpecializedCacheEntry *caches = GET_CACHE();
assert(caches[0].adaptive.original_oparg == 2);
_PyObjectCache *cache1 = &caches[-1].obj;
PyObject *callable = THIRD();
DEOPT_IF(callable != cache1->obj, CALL_FUNCTION);
STAT_INC(CALL_FUNCTION, hit);
DEOPT_IF(callable != cache1->obj, CALL_NO_KW);
STAT_INC(CALL_NO_KW, hit);
int retval = PyObject_IsInstance(SECOND(), TOP());
if (retval < 0) {
@ -4976,6 +4985,97 @@ check_eval_breaker:
DISPATCH();
}
TARGET(CALL_NO_KW_LIST_APPEND) {
assert(_Py_OPCODE(next_instr[-2]) == PRECALL_METHOD);
assert(GET_CACHE()->adaptive.original_oparg == 1);
DEOPT_IF(extra_args == 0, CALL_NO_KW);
PyObject *list = SECOND();
DEOPT_IF(!PyList_CheckExact(list), CALL_NO_KW);
STAT_INC(CALL_NO_KW, hit);
assert(extra_args == 1);
extra_args = 0;
assert(STACK_ADJUST_IS_RESET);
PyObject *arg = TOP();
int err = PyList_Append(list, arg);
if (err) {
goto error;
}
PyObject *callable = THIRD();
Py_DECREF(arg);
Py_DECREF(list);
Py_INCREF(Py_None);
STACK_SHRINK(2);
SET_TOP(Py_None);
Py_DECREF(callable);
DISPATCH();
}
TARGET(CALL_NO_KW_METHOD_DESCRIPTOR_O) {
assert(_Py_OPCODE(next_instr[-2]) == PRECALL_METHOD);
assert(GET_CACHE()->adaptive.original_oparg == 1);
DEOPT_IF(extra_args == 0, CALL_NO_KW);
assert(extra_args == 1);
PyObject *callable = THIRD();
DEOPT_IF(!Py_IS_TYPE(callable, &PyMethodDescr_Type), CALL_NO_KW);
DEOPT_IF(((PyMethodDescrObject *)callable)->d_method->ml_flags != METH_O, CALL_NO_KW);
STAT_INC(CALL_NO_KW, hit);
assert(extra_args == 1);
extra_args = 0;
assert(STACK_ADJUST_IS_RESET);
PyCFunction cfunc = ((PyMethodDescrObject *)callable)->d_method->ml_meth;
// This is slower but CPython promises to check all non-vectorcall
// function calls.
if (_Py_EnterRecursiveCall(tstate, " while calling a Python object")) {
goto error;
}
PyObject *arg = POP();
PyObject *self = POP();
PyObject *res = cfunc(self, arg);
_Py_LeaveRecursiveCall(tstate);
assert((res != NULL) ^ (_PyErr_Occurred(tstate) != NULL));
Py_DECREF(self);
Py_DECREF(arg);
SET_TOP(res);
Py_DECREF(callable);
if (res == NULL) {
goto error;
}
DISPATCH();
}
TARGET(CALL_NO_KW_METHOD_DESCRIPTOR_FAST) {
assert(_Py_OPCODE(next_instr[-2]) == PRECALL_METHOD);
/* Builtin METH_FASTCALL methods, without keywords */
SpecializedCacheEntry *caches = GET_CACHE();
_PyAdaptiveEntry *cache0 = &caches[0].adaptive;
DEOPT_IF(extra_args == 0, CALL_NO_KW);
assert(extra_args == 1);
int nargs = cache0->original_oparg;
PyObject *callable = PEEK(nargs + 2);
DEOPT_IF(!Py_IS_TYPE(callable, &PyMethodDescr_Type), CALL_NO_KW);
PyMethodDef *meth = ((PyMethodDescrObject *)callable)->d_method;
DEOPT_IF(meth->ml_flags != METH_FASTCALL, CALL_NO_KW);
STAT_INC(CALL_NO_KW, hit);
assert(extra_args == 1);
extra_args = 0;
assert(STACK_ADJUST_IS_RESET);
_PyCFunctionFast cfunc = (_PyCFunctionFast)(void(*)(void))meth->ml_meth;
PyObject *self = PEEK(nargs+1);
PyObject *res = cfunc(self, &PEEK(nargs), nargs);
assert((res != NULL) ^ (_PyErr_Occurred(tstate) != NULL));
/* Clear the stack of the arguments. */
STACK_SHRINK(nargs+1);
for (int i = 0; i <= nargs; i++) {
Py_DECREF(stack_pointer[i]);
}
SET_TOP(res);
Py_DECREF(callable);
if (res == NULL) {
goto error;
}
DISPATCH();
}
TARGET(CALL_FUNCTION_EX) {
PREDICTED(CALL_FUNCTION_EX);
PyObject *func, *callargs, *kwargs = NULL, *result;
@ -5289,7 +5389,7 @@ MISS_WITH_CACHE(LOAD_ATTR)
MISS_WITH_CACHE(STORE_ATTR)
MISS_WITH_CACHE(LOAD_GLOBAL)
MISS_WITH_CACHE(LOAD_METHOD)
MISS_WITH_CACHE(CALL_FUNCTION)
MISS_WITH_CACHE(CALL_NO_KW)
MISS_WITH_CACHE(BINARY_OP)
MISS_WITH_CACHE(COMPARE_OP)
MISS_WITH_CACHE(BINARY_SUBSCR)
@ -7509,7 +7609,7 @@ format_awaitable_error(PyThreadState *tstate, PyTypeObject *type, int prevprevop
"that does not implement __await__: %.100s",
type->tp_name);
}
else if (prevopcode == WITH_EXCEPT_START || (prevopcode == CALL_FUNCTION && prevprevopcode == DUP_TOP)) {
else if (prevopcode == WITH_EXCEPT_START || (prevopcode == CALL_NO_KW && prevprevopcode == DUP_TOP)) {
_PyErr_Format(tstate, PyExc_TypeError,
"'async with' received an object from __aexit__ "
"that does not implement __await__: %.100s",