GH-118095: Use broader specializations of CALL in tier 1, for better tier 2 support of calls. (GH-118322)

* Add CALL_PY_GENERAL, CALL_BOUND_METHOD_GENERAL and call CALL_NON_PY_GENERAL specializations.

* Remove CALL_PY_WITH_DEFAULTS specialization

* Use CALL_NON_PY_GENERAL in more cases when otherwise failing to specialize
This commit is contained in:
Mark Shannon 2024-05-04 12:11:11 +01:00 committed by GitHub
parent 00da0afa0d
commit 1ab6356ebe
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
19 changed files with 862 additions and 447 deletions

View file

@ -3042,7 +3042,6 @@ dummy_func(
family(CALL, INLINE_CACHE_ENTRIES_CALL) = {
CALL_BOUND_METHOD_EXACT_ARGS,
CALL_PY_EXACT_ARGS,
CALL_PY_WITH_DEFAULTS,
CALL_TYPE_1,
CALL_STR_1,
CALL_TUPLE_1,
@ -3058,6 +3057,9 @@ dummy_func(
CALL_METHOD_DESCRIPTOR_NOARGS,
CALL_METHOD_DESCRIPTOR_FAST,
CALL_ALLOC_AND_ENTER_INIT,
CALL_PY_GENERAL,
CALL_BOUND_METHOD_GENERAL,
CALL_NON_PY_GENERAL,
};
specializing op(_SPECIALIZE_CALL, (counter/1, callable, self_or_null, args[oparg] -- callable, self_or_null, args[oparg])) {
@ -3147,9 +3149,108 @@ dummy_func(
macro(CALL) = _SPECIALIZE_CALL + unused/2 + _CALL + _CHECK_PERIODIC;
op(_PY_FRAME_GENERAL, (callable, self_or_null, args[oparg] -- new_frame: _PyInterpreterFrame*)) {
// oparg counts all of the args, but *not* self:
int total_args = oparg;
if (self_or_null != NULL) {
args--;
total_args++;
}
assert(Py_TYPE(callable) == &PyFunction_Type);
int code_flags = ((PyCodeObject*)PyFunction_GET_CODE(callable))->co_flags;
PyObject *locals = code_flags & CO_OPTIMIZED ? NULL : Py_NewRef(PyFunction_GET_GLOBALS(callable));
new_frame = _PyEvalFramePushAndInit(
tstate, (PyFunctionObject *)callable, locals,
args, total_args, NULL
);
// The frame has stolen all the arguments from the stack,
// so there is no need to clean them up.
SYNC_SP();
if (new_frame == NULL) {
ERROR_NO_POP();
}
}
op(_CHECK_FUNCTION_VERSION, (func_version/2, callable, unused, unused[oparg] -- callable, unused, unused[oparg])) {
EXIT_IF(!PyFunction_Check(callable));
PyFunctionObject *func = (PyFunctionObject *)callable;
EXIT_IF(func->func_version != func_version);
}
macro(CALL_PY_GENERAL) =
unused/1 + // Skip over the counter
_CHECK_PEP_523 +
_CHECK_FUNCTION_VERSION +
_PY_FRAME_GENERAL +
_SAVE_RETURN_OFFSET +
_PUSH_FRAME;
op(_CHECK_METHOD_VERSION, (func_version/2, callable, null, unused[oparg] -- callable, null, unused[oparg])) {
EXIT_IF(Py_TYPE(callable) != &PyMethod_Type);
PyObject *func = ((PyMethodObject *)callable)->im_func;
EXIT_IF(!PyFunction_Check(func));
EXIT_IF(((PyFunctionObject *)func)->func_version != func_version);
EXIT_IF(null != NULL);
}
op(_EXPAND_METHOD, (callable, null, unused[oparg] -- method, self, unused[oparg])) {
assert(null == NULL);
assert(Py_TYPE(callable) == &PyMethod_Type);
self = ((PyMethodObject *)callable)->im_self;
Py_INCREF(self);
stack_pointer[-1 - oparg] = self; // Patch stack as it is used by _PY_FRAME_GENERAL
method = ((PyMethodObject *)callable)->im_func;
assert(PyFunction_Check(method));
Py_INCREF(method);
Py_DECREF(callable);
}
macro(CALL_BOUND_METHOD_GENERAL) =
unused/1 + // Skip over the counter
_CHECK_PEP_523 +
_CHECK_METHOD_VERSION +
_EXPAND_METHOD +
_PY_FRAME_GENERAL +
_SAVE_RETURN_OFFSET +
_PUSH_FRAME;
op(_CHECK_IS_NOT_PY_CALLABLE, (callable, unused, unused[oparg] -- callable, unused, unused[oparg])) {
EXIT_IF(PyFunction_Check(callable));
EXIT_IF(Py_TYPE(callable) == &PyMethod_Type);
}
op(_CALL_NON_PY_GENERAL, (callable, self_or_null, args[oparg] -- res)) {
#if TIER_ONE
assert(opcode != INSTRUMENTED_CALL);
#endif
int total_args = oparg;
if (self_or_null != NULL) {
args--;
total_args++;
}
/* Callable is not a normal Python function */
res = PyObject_Vectorcall(
callable, args,
total_args | PY_VECTORCALL_ARGUMENTS_OFFSET,
NULL);
assert((res != NULL) ^ (_PyErr_Occurred(tstate) != NULL));
Py_DECREF(callable);
for (int i = 0; i < total_args; i++) {
Py_DECREF(args[i]);
}
ERROR_IF(res == NULL, error);
}
macro(CALL_NON_PY_GENERAL) =
unused/1 + // Skip over the counter
unused/2 +
_CHECK_IS_NOT_PY_CALLABLE +
_CALL_NON_PY_GENERAL +
_CHECK_PERIODIC;
op(_CHECK_CALL_BOUND_METHOD_EXACT_ARGS, (callable, null, unused[oparg] -- callable, null, unused[oparg])) {
DEOPT_IF(null != NULL);
DEOPT_IF(Py_TYPE(callable) != &PyMethod_Type);
EXIT_IF(null != NULL);
EXIT_IF(Py_TYPE(callable) != &PyMethod_Type);
}
op(_INIT_CALL_BOUND_METHOD_EXACT_ARGS, (callable, unused, unused[oparg] -- func, self, unused[oparg])) {
@ -3227,40 +3328,6 @@ dummy_func(
_SAVE_RETURN_OFFSET +
_PUSH_FRAME;
inst(CALL_PY_WITH_DEFAULTS, (unused/1, func_version/2, callable, self_or_null, args[oparg] -- unused)) {
DEOPT_IF(tstate->interp->eval_frame);
int argcount = oparg;
if (self_or_null != NULL) {
args--;
argcount++;
}
DEOPT_IF(!PyFunction_Check(callable));
PyFunctionObject *func = (PyFunctionObject *)callable;
DEOPT_IF(func->func_version != func_version);
PyCodeObject *code = (PyCodeObject *)func->func_code;
assert(func->func_defaults);
assert(PyTuple_CheckExact(func->func_defaults));
int defcount = (int)PyTuple_GET_SIZE(func->func_defaults);
assert(defcount <= code->co_argcount);
int min_args = code->co_argcount - defcount;
DEOPT_IF(argcount > code->co_argcount);
DEOPT_IF(argcount < min_args);
DEOPT_IF(!_PyThreadState_HasStackSpace(tstate, code->co_framesize));
STAT_INC(CALL, hit);
_PyInterpreterFrame *new_frame = _PyFrame_PushUnchecked(tstate, func, code->co_argcount);
for (int i = 0; i < argcount; i++) {
new_frame->localsplus[i] = args[i];
}
for (int i = argcount; i < code->co_argcount; i++) {
PyObject *def = PyTuple_GET_ITEM(func->func_defaults, i - min_args);
new_frame->localsplus[i] = Py_NewRef(def);
}
// Manipulate stack and cache directly since we leave using DISPATCH_INLINED().
STACK_SHRINK(oparg + 2);
frame->return_offset = (uint16_t)(next_instr - this_instr);
DISPATCH_INLINED(new_frame);
}
inst(CALL_TYPE_1, (unused/1, unused/2, callable, null, arg -- res)) {
assert(oparg == 1);
DEOPT_IF(null != NULL);