GH-118095: Use broader specializations of CALL in tier 1, for better tier 2 support of calls. (GH-118322)

* Add CALL_PY_GENERAL, CALL_BOUND_METHOD_GENERAL and call CALL_NON_PY_GENERAL specializations.

* Remove CALL_PY_WITH_DEFAULTS specialization

* Use CALL_NON_PY_GENERAL in more cases when otherwise failing to specialize
This commit is contained in:
Mark Shannon 2024-05-04 12:11:11 +01:00 committed by GitHub
parent 00da0afa0d
commit 1ab6356ebe
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
19 changed files with 862 additions and 447 deletions

View file

@ -3042,7 +3042,6 @@ dummy_func(
family(CALL, INLINE_CACHE_ENTRIES_CALL) = {
CALL_BOUND_METHOD_EXACT_ARGS,
CALL_PY_EXACT_ARGS,
CALL_PY_WITH_DEFAULTS,
CALL_TYPE_1,
CALL_STR_1,
CALL_TUPLE_1,
@ -3058,6 +3057,9 @@ dummy_func(
CALL_METHOD_DESCRIPTOR_NOARGS,
CALL_METHOD_DESCRIPTOR_FAST,
CALL_ALLOC_AND_ENTER_INIT,
CALL_PY_GENERAL,
CALL_BOUND_METHOD_GENERAL,
CALL_NON_PY_GENERAL,
};
specializing op(_SPECIALIZE_CALL, (counter/1, callable, self_or_null, args[oparg] -- callable, self_or_null, args[oparg])) {
@ -3147,9 +3149,108 @@ dummy_func(
macro(CALL) = _SPECIALIZE_CALL + unused/2 + _CALL + _CHECK_PERIODIC;
op(_PY_FRAME_GENERAL, (callable, self_or_null, args[oparg] -- new_frame: _PyInterpreterFrame*)) {
// oparg counts all of the args, but *not* self:
int total_args = oparg;
if (self_or_null != NULL) {
args--;
total_args++;
}
assert(Py_TYPE(callable) == &PyFunction_Type);
int code_flags = ((PyCodeObject*)PyFunction_GET_CODE(callable))->co_flags;
PyObject *locals = code_flags & CO_OPTIMIZED ? NULL : Py_NewRef(PyFunction_GET_GLOBALS(callable));
new_frame = _PyEvalFramePushAndInit(
tstate, (PyFunctionObject *)callable, locals,
args, total_args, NULL
);
// The frame has stolen all the arguments from the stack,
// so there is no need to clean them up.
SYNC_SP();
if (new_frame == NULL) {
ERROR_NO_POP();
}
}
op(_CHECK_FUNCTION_VERSION, (func_version/2, callable, unused, unused[oparg] -- callable, unused, unused[oparg])) {
EXIT_IF(!PyFunction_Check(callable));
PyFunctionObject *func = (PyFunctionObject *)callable;
EXIT_IF(func->func_version != func_version);
}
macro(CALL_PY_GENERAL) =
unused/1 + // Skip over the counter
_CHECK_PEP_523 +
_CHECK_FUNCTION_VERSION +
_PY_FRAME_GENERAL +
_SAVE_RETURN_OFFSET +
_PUSH_FRAME;
op(_CHECK_METHOD_VERSION, (func_version/2, callable, null, unused[oparg] -- callable, null, unused[oparg])) {
EXIT_IF(Py_TYPE(callable) != &PyMethod_Type);
PyObject *func = ((PyMethodObject *)callable)->im_func;
EXIT_IF(!PyFunction_Check(func));
EXIT_IF(((PyFunctionObject *)func)->func_version != func_version);
EXIT_IF(null != NULL);
}
op(_EXPAND_METHOD, (callable, null, unused[oparg] -- method, self, unused[oparg])) {
assert(null == NULL);
assert(Py_TYPE(callable) == &PyMethod_Type);
self = ((PyMethodObject *)callable)->im_self;
Py_INCREF(self);
stack_pointer[-1 - oparg] = self; // Patch stack as it is used by _PY_FRAME_GENERAL
method = ((PyMethodObject *)callable)->im_func;
assert(PyFunction_Check(method));
Py_INCREF(method);
Py_DECREF(callable);
}
macro(CALL_BOUND_METHOD_GENERAL) =
unused/1 + // Skip over the counter
_CHECK_PEP_523 +
_CHECK_METHOD_VERSION +
_EXPAND_METHOD +
_PY_FRAME_GENERAL +
_SAVE_RETURN_OFFSET +
_PUSH_FRAME;
op(_CHECK_IS_NOT_PY_CALLABLE, (callable, unused, unused[oparg] -- callable, unused, unused[oparg])) {
EXIT_IF(PyFunction_Check(callable));
EXIT_IF(Py_TYPE(callable) == &PyMethod_Type);
}
op(_CALL_NON_PY_GENERAL, (callable, self_or_null, args[oparg] -- res)) {
#if TIER_ONE
assert(opcode != INSTRUMENTED_CALL);
#endif
int total_args = oparg;
if (self_or_null != NULL) {
args--;
total_args++;
}
/* Callable is not a normal Python function */
res = PyObject_Vectorcall(
callable, args,
total_args | PY_VECTORCALL_ARGUMENTS_OFFSET,
NULL);
assert((res != NULL) ^ (_PyErr_Occurred(tstate) != NULL));
Py_DECREF(callable);
for (int i = 0; i < total_args; i++) {
Py_DECREF(args[i]);
}
ERROR_IF(res == NULL, error);
}
macro(CALL_NON_PY_GENERAL) =
unused/1 + // Skip over the counter
unused/2 +
_CHECK_IS_NOT_PY_CALLABLE +
_CALL_NON_PY_GENERAL +
_CHECK_PERIODIC;
op(_CHECK_CALL_BOUND_METHOD_EXACT_ARGS, (callable, null, unused[oparg] -- callable, null, unused[oparg])) {
DEOPT_IF(null != NULL);
DEOPT_IF(Py_TYPE(callable) != &PyMethod_Type);
EXIT_IF(null != NULL);
EXIT_IF(Py_TYPE(callable) != &PyMethod_Type);
}
op(_INIT_CALL_BOUND_METHOD_EXACT_ARGS, (callable, unused, unused[oparg] -- func, self, unused[oparg])) {
@ -3227,40 +3328,6 @@ dummy_func(
_SAVE_RETURN_OFFSET +
_PUSH_FRAME;
inst(CALL_PY_WITH_DEFAULTS, (unused/1, func_version/2, callable, self_or_null, args[oparg] -- unused)) {
DEOPT_IF(tstate->interp->eval_frame);
int argcount = oparg;
if (self_or_null != NULL) {
args--;
argcount++;
}
DEOPT_IF(!PyFunction_Check(callable));
PyFunctionObject *func = (PyFunctionObject *)callable;
DEOPT_IF(func->func_version != func_version);
PyCodeObject *code = (PyCodeObject *)func->func_code;
assert(func->func_defaults);
assert(PyTuple_CheckExact(func->func_defaults));
int defcount = (int)PyTuple_GET_SIZE(func->func_defaults);
assert(defcount <= code->co_argcount);
int min_args = code->co_argcount - defcount;
DEOPT_IF(argcount > code->co_argcount);
DEOPT_IF(argcount < min_args);
DEOPT_IF(!_PyThreadState_HasStackSpace(tstate, code->co_framesize));
STAT_INC(CALL, hit);
_PyInterpreterFrame *new_frame = _PyFrame_PushUnchecked(tstate, func, code->co_argcount);
for (int i = 0; i < argcount; i++) {
new_frame->localsplus[i] = args[i];
}
for (int i = argcount; i < code->co_argcount; i++) {
PyObject *def = PyTuple_GET_ITEM(func->func_defaults, i - min_args);
new_frame->localsplus[i] = Py_NewRef(def);
}
// Manipulate stack and cache directly since we leave using DISPATCH_INLINED().
STACK_SHRINK(oparg + 2);
frame->return_offset = (uint16_t)(next_instr - this_instr);
DISPATCH_INLINED(new_frame);
}
inst(CALL_TYPE_1, (unused/1, unused/2, callable, null, arg -- res)) {
assert(oparg == 1);
DEOPT_IF(null != NULL);

View file

@ -247,10 +247,6 @@ static PyObject * import_name(PyThreadState *, _PyInterpreterFrame *,
static PyObject * import_from(PyThreadState *, PyObject *, PyObject *);
static int check_args_iterable(PyThreadState *, PyObject *func, PyObject *vararg);
static int get_exception_handler(PyCodeObject *, int, int*, int*, int*);
static _PyInterpreterFrame *
_PyEvalFramePushAndInit(PyThreadState *tstate, PyFunctionObject *func,
PyObject *locals, PyObject* const* args,
size_t argcount, PyObject *kwnames);
static _PyInterpreterFrame *
_PyEvalFramePushAndInit_Ex(PyThreadState *tstate, PyFunctionObject *func,
PyObject *locals, Py_ssize_t nargs, PyObject *callargs, PyObject *kwargs);
@ -1716,7 +1712,7 @@ _PyEval_FrameClearAndPop(PyThreadState *tstate, _PyInterpreterFrame * frame)
}
/* Consumes references to func, locals and all the args */
static _PyInterpreterFrame *
_PyInterpreterFrame *
_PyEvalFramePushAndInit(PyThreadState *tstate, PyFunctionObject *func,
PyObject *locals, PyObject* const* args,
size_t argcount, PyObject *kwnames)
@ -1736,6 +1732,8 @@ _PyEvalFramePushAndInit(PyThreadState *tstate, PyFunctionObject *func,
return frame;
fail:
/* Consume the references */
Py_DECREF(func);
Py_XDECREF(locals);
for (size_t i = 0; i < argcount; i++) {
Py_DECREF(args[i]);
}

View file

@ -3032,6 +3032,153 @@
break;
}
case _PY_FRAME_GENERAL: {
PyObject **args;
PyObject *self_or_null;
PyObject *callable;
_PyInterpreterFrame *new_frame;
oparg = CURRENT_OPARG();
args = &stack_pointer[-oparg];
self_or_null = stack_pointer[-1 - oparg];
callable = stack_pointer[-2 - oparg];
// oparg counts all of the args, but *not* self:
int total_args = oparg;
if (self_or_null != NULL) {
args--;
total_args++;
}
assert(Py_TYPE(callable) == &PyFunction_Type);
int code_flags = ((PyCodeObject*)PyFunction_GET_CODE(callable))->co_flags;
PyObject *locals = code_flags & CO_OPTIMIZED ? NULL : Py_NewRef(PyFunction_GET_GLOBALS(callable));
new_frame = _PyEvalFramePushAndInit(
tstate, (PyFunctionObject *)callable, locals,
args, total_args, NULL
);
// The frame has stolen all the arguments from the stack,
// so there is no need to clean them up.
stack_pointer += -2 - oparg;
if (new_frame == NULL) {
JUMP_TO_ERROR();
}
stack_pointer[0] = (PyObject *)new_frame;
stack_pointer += 1;
break;
}
case _CHECK_FUNCTION_VERSION: {
PyObject *callable;
oparg = CURRENT_OPARG();
callable = stack_pointer[-2 - oparg];
uint32_t func_version = (uint32_t)CURRENT_OPERAND();
if (!PyFunction_Check(callable)) {
UOP_STAT_INC(uopcode, miss);
JUMP_TO_JUMP_TARGET();
}
PyFunctionObject *func = (PyFunctionObject *)callable;
if (func->func_version != func_version) {
UOP_STAT_INC(uopcode, miss);
JUMP_TO_JUMP_TARGET();
}
break;
}
case _CHECK_METHOD_VERSION: {
PyObject *null;
PyObject *callable;
oparg = CURRENT_OPARG();
null = stack_pointer[-1 - oparg];
callable = stack_pointer[-2 - oparg];
uint32_t func_version = (uint32_t)CURRENT_OPERAND();
if (Py_TYPE(callable) != &PyMethod_Type) {
UOP_STAT_INC(uopcode, miss);
JUMP_TO_JUMP_TARGET();
}
PyObject *func = ((PyMethodObject *)callable)->im_func;
if (!PyFunction_Check(func)) {
UOP_STAT_INC(uopcode, miss);
JUMP_TO_JUMP_TARGET();
}
if (((PyFunctionObject *)func)->func_version != func_version) {
UOP_STAT_INC(uopcode, miss);
JUMP_TO_JUMP_TARGET();
}
if (null != NULL) {
UOP_STAT_INC(uopcode, miss);
JUMP_TO_JUMP_TARGET();
}
break;
}
case _EXPAND_METHOD: {
PyObject *null;
PyObject *callable;
PyObject *method;
PyObject *self;
oparg = CURRENT_OPARG();
null = stack_pointer[-1 - oparg];
callable = stack_pointer[-2 - oparg];
assert(null == NULL);
assert(Py_TYPE(callable) == &PyMethod_Type);
self = ((PyMethodObject *)callable)->im_self;
Py_INCREF(self);
stack_pointer[-1 - oparg] = self; // Patch stack as it is used by _PY_FRAME_GENERAL
method = ((PyMethodObject *)callable)->im_func;
assert(PyFunction_Check(method));
Py_INCREF(method);
Py_DECREF(callable);
stack_pointer[-2 - oparg] = method;
stack_pointer[-1 - oparg] = self;
break;
}
case _CHECK_IS_NOT_PY_CALLABLE: {
PyObject *callable;
oparg = CURRENT_OPARG();
callable = stack_pointer[-2 - oparg];
if (PyFunction_Check(callable)) {
UOP_STAT_INC(uopcode, miss);
JUMP_TO_JUMP_TARGET();
}
if (Py_TYPE(callable) == &PyMethod_Type) {
UOP_STAT_INC(uopcode, miss);
JUMP_TO_JUMP_TARGET();
}
break;
}
case _CALL_NON_PY_GENERAL: {
PyObject **args;
PyObject *self_or_null;
PyObject *callable;
PyObject *res;
oparg = CURRENT_OPARG();
args = &stack_pointer[-oparg];
self_or_null = stack_pointer[-1 - oparg];
callable = stack_pointer[-2 - oparg];
#if TIER_ONE
assert(opcode != INSTRUMENTED_CALL);
#endif
int total_args = oparg;
if (self_or_null != NULL) {
args--;
total_args++;
}
/* Callable is not a normal Python function */
res = PyObject_Vectorcall(
callable, args,
total_args | PY_VECTORCALL_ARGUMENTS_OFFSET,
NULL);
assert((res != NULL) ^ (_PyErr_Occurred(tstate) != NULL));
Py_DECREF(callable);
for (int i = 0; i < total_args; i++) {
Py_DECREF(args[i]);
}
if (res == NULL) JUMP_TO_ERROR();
stack_pointer[-2 - oparg] = res;
stack_pointer += -1 - oparg;
break;
}
case _CHECK_CALL_BOUND_METHOD_EXACT_ARGS: {
PyObject *null;
PyObject *callable;
@ -3276,8 +3423,6 @@
break;
}
/* _CALL_PY_WITH_DEFAULTS is not a viable micro-op for tier 2 because it uses the 'this_instr' variable */
case _CALL_TYPE_1: {
PyObject *arg;
PyObject *null;

View file

@ -1002,6 +1002,97 @@
DISPATCH();
}
TARGET(CALL_BOUND_METHOD_GENERAL) {
_Py_CODEUNIT *this_instr = frame->instr_ptr = next_instr;
next_instr += 4;
INSTRUCTION_STATS(CALL_BOUND_METHOD_GENERAL);
static_assert(INLINE_CACHE_ENTRIES_CALL == 3, "incorrect cache size");
PyObject *null;
PyObject *callable;
PyObject *method;
PyObject *self;
PyObject **args;
PyObject *self_or_null;
_PyInterpreterFrame *new_frame;
/* Skip 1 cache entry */
// _CHECK_PEP_523
{
DEOPT_IF(tstate->interp->eval_frame, CALL);
}
// _CHECK_METHOD_VERSION
null = stack_pointer[-1 - oparg];
callable = stack_pointer[-2 - oparg];
{
uint32_t func_version = read_u32(&this_instr[2].cache);
DEOPT_IF(Py_TYPE(callable) != &PyMethod_Type, CALL);
PyObject *func = ((PyMethodObject *)callable)->im_func;
DEOPT_IF(!PyFunction_Check(func), CALL);
DEOPT_IF(((PyFunctionObject *)func)->func_version != func_version, CALL);
DEOPT_IF(null != NULL, CALL);
}
// _EXPAND_METHOD
{
assert(null == NULL);
assert(Py_TYPE(callable) == &PyMethod_Type);
self = ((PyMethodObject *)callable)->im_self;
Py_INCREF(self);
stack_pointer[-1 - oparg] = self; // Patch stack as it is used by _PY_FRAME_GENERAL
method = ((PyMethodObject *)callable)->im_func;
assert(PyFunction_Check(method));
Py_INCREF(method);
Py_DECREF(callable);
}
// _PY_FRAME_GENERAL
args = &stack_pointer[-oparg];
self_or_null = self;
callable = method;
{
// oparg counts all of the args, but *not* self:
int total_args = oparg;
if (self_or_null != NULL) {
args--;
total_args++;
}
assert(Py_TYPE(callable) == &PyFunction_Type);
int code_flags = ((PyCodeObject*)PyFunction_GET_CODE(callable))->co_flags;
PyObject *locals = code_flags & CO_OPTIMIZED ? NULL : Py_NewRef(PyFunction_GET_GLOBALS(callable));
new_frame = _PyEvalFramePushAndInit(
tstate, (PyFunctionObject *)callable, locals,
args, total_args, NULL
);
// The frame has stolen all the arguments from the stack,
// so there is no need to clean them up.
stack_pointer += -2 - oparg;
if (new_frame == NULL) {
goto error;
}
}
// _SAVE_RETURN_OFFSET
{
#if TIER_ONE
frame->return_offset = (uint16_t)(next_instr - this_instr);
#endif
#if TIER_TWO
frame->return_offset = oparg;
#endif
}
// _PUSH_FRAME
{
// Write it out explicitly because it's subtly different.
// Eventually this should be the only occurrence of this code.
assert(tstate->interp->eval_frame == NULL);
_PyFrame_SetStackPointer(frame, stack_pointer);
new_frame->previous = frame;
CALL_STAT_INC(inlined_py_calls);
frame = tstate->current_frame = new_frame;
tstate->py_recursion_remaining--;
LOAD_SP();
LOAD_IP(0);
LLTRACE_RESUME_FRAME();
}
DISPATCH();
}
TARGET(CALL_BUILTIN_CLASS) {
frame->instr_ptr = next_instr;
next_instr += 4;
@ -1713,6 +1804,56 @@
DISPATCH();
}
TARGET(CALL_NON_PY_GENERAL) {
frame->instr_ptr = next_instr;
next_instr += 4;
INSTRUCTION_STATS(CALL_NON_PY_GENERAL);
static_assert(INLINE_CACHE_ENTRIES_CALL == 3, "incorrect cache size");
PyObject *callable;
PyObject **args;
PyObject *self_or_null;
PyObject *res;
/* Skip 1 cache entry */
/* Skip 2 cache entries */
// _CHECK_IS_NOT_PY_CALLABLE
callable = stack_pointer[-2 - oparg];
{
DEOPT_IF(PyFunction_Check(callable), CALL);
DEOPT_IF(Py_TYPE(callable) == &PyMethod_Type, CALL);
}
// _CALL_NON_PY_GENERAL
args = &stack_pointer[-oparg];
self_or_null = stack_pointer[-1 - oparg];
{
#if TIER_ONE
assert(opcode != INSTRUMENTED_CALL);
#endif
int total_args = oparg;
if (self_or_null != NULL) {
args--;
total_args++;
}
/* Callable is not a normal Python function */
res = PyObject_Vectorcall(
callable, args,
total_args | PY_VECTORCALL_ARGUMENTS_OFFSET,
NULL);
assert((res != NULL) ^ (_PyErr_Occurred(tstate) != NULL));
Py_DECREF(callable);
for (int i = 0; i < total_args; i++) {
Py_DECREF(args[i]);
}
if (res == NULL) { stack_pointer += -2 - oparg; goto error; }
}
// _CHECK_PERIODIC
{
}
stack_pointer[-2 - oparg] = res;
stack_pointer += -1 - oparg;
CHECK_EVAL_BREAKER();
DISPATCH();
}
TARGET(CALL_PY_EXACT_ARGS) {
_Py_CODEUNIT *this_instr = frame->instr_ptr = next_instr;
next_instr += 4;
@ -1786,50 +1927,76 @@
DISPATCH();
}
TARGET(CALL_PY_WITH_DEFAULTS) {
TARGET(CALL_PY_GENERAL) {
_Py_CODEUNIT *this_instr = frame->instr_ptr = next_instr;
next_instr += 4;
INSTRUCTION_STATS(CALL_PY_WITH_DEFAULTS);
INSTRUCTION_STATS(CALL_PY_GENERAL);
static_assert(INLINE_CACHE_ENTRIES_CALL == 3, "incorrect cache size");
PyObject *callable;
PyObject **args;
PyObject *self_or_null;
PyObject *callable;
_PyInterpreterFrame *new_frame;
/* Skip 1 cache entry */
// _CHECK_PEP_523
{
DEOPT_IF(tstate->interp->eval_frame, CALL);
}
// _CHECK_FUNCTION_VERSION
callable = stack_pointer[-2 - oparg];
{
uint32_t func_version = read_u32(&this_instr[2].cache);
DEOPT_IF(!PyFunction_Check(callable), CALL);
PyFunctionObject *func = (PyFunctionObject *)callable;
DEOPT_IF(func->func_version != func_version, CALL);
}
// _PY_FRAME_GENERAL
args = &stack_pointer[-oparg];
self_or_null = stack_pointer[-1 - oparg];
callable = stack_pointer[-2 - oparg];
uint32_t func_version = read_u32(&this_instr[2].cache);
DEOPT_IF(tstate->interp->eval_frame, CALL);
int argcount = oparg;
if (self_or_null != NULL) {
args--;
argcount++;
{
// oparg counts all of the args, but *not* self:
int total_args = oparg;
if (self_or_null != NULL) {
args--;
total_args++;
}
assert(Py_TYPE(callable) == &PyFunction_Type);
int code_flags = ((PyCodeObject*)PyFunction_GET_CODE(callable))->co_flags;
PyObject *locals = code_flags & CO_OPTIMIZED ? NULL : Py_NewRef(PyFunction_GET_GLOBALS(callable));
new_frame = _PyEvalFramePushAndInit(
tstate, (PyFunctionObject *)callable, locals,
args, total_args, NULL
);
// The frame has stolen all the arguments from the stack,
// so there is no need to clean them up.
stack_pointer += -2 - oparg;
if (new_frame == NULL) {
goto error;
}
}
DEOPT_IF(!PyFunction_Check(callable), CALL);
PyFunctionObject *func = (PyFunctionObject *)callable;
DEOPT_IF(func->func_version != func_version, CALL);
PyCodeObject *code = (PyCodeObject *)func->func_code;
assert(func->func_defaults);
assert(PyTuple_CheckExact(func->func_defaults));
int defcount = (int)PyTuple_GET_SIZE(func->func_defaults);
assert(defcount <= code->co_argcount);
int min_args = code->co_argcount - defcount;
DEOPT_IF(argcount > code->co_argcount, CALL);
DEOPT_IF(argcount < min_args, CALL);
DEOPT_IF(!_PyThreadState_HasStackSpace(tstate, code->co_framesize), CALL);
STAT_INC(CALL, hit);
_PyInterpreterFrame *new_frame = _PyFrame_PushUnchecked(tstate, func, code->co_argcount);
for (int i = 0; i < argcount; i++) {
new_frame->localsplus[i] = args[i];
// _SAVE_RETURN_OFFSET
{
#if TIER_ONE
frame->return_offset = (uint16_t)(next_instr - this_instr);
#endif
#if TIER_TWO
frame->return_offset = oparg;
#endif
}
for (int i = argcount; i < code->co_argcount; i++) {
PyObject *def = PyTuple_GET_ITEM(func->func_defaults, i - min_args);
new_frame->localsplus[i] = Py_NewRef(def);
// _PUSH_FRAME
{
// Write it out explicitly because it's subtly different.
// Eventually this should be the only occurrence of this code.
assert(tstate->interp->eval_frame == NULL);
_PyFrame_SetStackPointer(frame, stack_pointer);
new_frame->previous = frame;
CALL_STAT_INC(inlined_py_calls);
frame = tstate->current_frame = new_frame;
tstate->py_recursion_remaining--;
LOAD_SP();
LOAD_IP(0);
LLTRACE_RESUME_FRAME();
}
// Manipulate stack and cache directly since we leave using DISPATCH_INLINED().
STACK_SHRINK(oparg + 2);
frame->return_offset = (uint16_t)(next_instr - this_instr);
DISPATCH_INLINED(new_frame);
DISPATCH();
}
TARGET(CALL_STR_1) {

View file

@ -163,6 +163,7 @@ static void *opcode_targets[256] = {
&&TARGET_BINARY_SUBSCR_TUPLE_INT,
&&TARGET_CALL_ALLOC_AND_ENTER_INIT,
&&TARGET_CALL_BOUND_METHOD_EXACT_ARGS,
&&TARGET_CALL_BOUND_METHOD_GENERAL,
&&TARGET_CALL_BUILTIN_CLASS,
&&TARGET_CALL_BUILTIN_FAST,
&&TARGET_CALL_BUILTIN_FAST_WITH_KEYWORDS,
@ -174,8 +175,9 @@ static void *opcode_targets[256] = {
&&TARGET_CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS,
&&TARGET_CALL_METHOD_DESCRIPTOR_NOARGS,
&&TARGET_CALL_METHOD_DESCRIPTOR_O,
&&TARGET_CALL_NON_PY_GENERAL,
&&TARGET_CALL_PY_EXACT_ARGS,
&&TARGET_CALL_PY_WITH_DEFAULTS,
&&TARGET_CALL_PY_GENERAL,
&&TARGET_CALL_STR_1,
&&TARGET_CALL_TUPLE_1,
&&TARGET_CALL_TYPE_1,
@ -233,8 +235,6 @@ static void *opcode_targets[256] = {
&&_unknown_opcode,
&&_unknown_opcode,
&&_unknown_opcode,
&&_unknown_opcode,
&&_unknown_opcode,
&&TARGET_INSTRUMENTED_RESUME,
&&TARGET_INSTRUMENTED_END_FOR,
&&TARGET_INSTRUMENTED_END_SEND,

View file

@ -987,6 +987,7 @@ static void make_exit(_PyUOpInstruction *inst, int opcode, int target)
{
inst->opcode = opcode;
inst->oparg = 0;
inst->operand = 0;
inst->format = UOP_FORMAT_TARGET;
inst->target = target;
}

View file

@ -629,6 +629,15 @@ dummy_func(void) {
frame_new(ctx, co, localsplus_start, n_locals_already_filled, 0));
}
op(_PY_FRAME_GENERAL, (callable, self_or_null, args[oparg] -- new_frame: _Py_UOpsAbstractFrame *)) {
/* The _Py_UOpsAbstractFrame design assumes that we can copy arguments across directly */
(void)callable;
(void)self_or_null;
(void)args;
first_valid_check_stack = NULL;
goto done;
}
op(_POP_FRAME, (retval -- res)) {
SYNC_SP();
ctx->frame->stack_pointer = stack_pointer;
@ -718,7 +727,7 @@ dummy_func(void) {
if (first_valid_check_stack == NULL) {
first_valid_check_stack = corresponding_check_stack;
}
else {
else if (corresponding_check_stack) {
// delete all but the first valid _CHECK_STACK_SPACE
corresponding_check_stack->opcode = _NOP;
}

View file

@ -1559,6 +1559,58 @@
break;
}
case _PY_FRAME_GENERAL: {
_Py_UopsSymbol **args;
_Py_UopsSymbol *self_or_null;
_Py_UopsSymbol *callable;
_Py_UOpsAbstractFrame *new_frame;
args = &stack_pointer[-oparg];
self_or_null = stack_pointer[-1 - oparg];
callable = stack_pointer[-2 - oparg];
/* The _Py_UOpsAbstractFrame design assumes that we can copy arguments across directly */
(void)callable;
(void)self_or_null;
(void)args;
first_valid_check_stack = NULL;
goto done;
stack_pointer[-2 - oparg] = (_Py_UopsSymbol *)new_frame;
stack_pointer += -1 - oparg;
break;
}
case _CHECK_FUNCTION_VERSION: {
break;
}
case _CHECK_METHOD_VERSION: {
break;
}
case _EXPAND_METHOD: {
_Py_UopsSymbol *method;
_Py_UopsSymbol *self;
method = sym_new_not_null(ctx);
if (method == NULL) goto out_of_space;
self = sym_new_not_null(ctx);
if (self == NULL) goto out_of_space;
stack_pointer[-2 - oparg] = method;
stack_pointer[-1 - oparg] = self;
break;
}
case _CHECK_IS_NOT_PY_CALLABLE: {
break;
}
case _CALL_NON_PY_GENERAL: {
_Py_UopsSymbol *res;
res = sym_new_not_null(ctx);
if (res == NULL) goto out_of_space;
stack_pointer[-2 - oparg] = res;
stack_pointer += -1 - oparg;
break;
}
case _CHECK_CALL_BOUND_METHOD_EXACT_ARGS: {
_Py_UopsSymbol *null;
_Py_UopsSymbol *callable;
@ -1692,7 +1744,7 @@
if (first_valid_check_stack == NULL) {
first_valid_check_stack = corresponding_check_stack;
}
else {
else if (corresponding_check_stack) {
// delete all but the first valid _CHECK_STACK_SPACE
corresponding_check_stack->opcode = _NOP;
}
@ -1700,8 +1752,6 @@
break;
}
/* _CALL_PY_WITH_DEFAULTS is not a viable micro-op for tier 2 */
case _CALL_TYPE_1: {
_Py_UopsSymbol *res;
res = sym_new_not_null(ctx);

View file

@ -1789,8 +1789,7 @@ specialize_class_call(PyObject *callable, _Py_CODEUNIT *instr, int nargs)
return -1;
}
if (Py_TYPE(tp) != &PyType_Type) {
SPECIALIZATION_FAIL(CALL, SPEC_FAIL_CALL_METACLASS);
return -1;
goto generic;
}
if (tp->tp_new == PyBaseObject_Type.tp_new) {
PyFunctionObject *init = get_init_for_simple_managed_python_class(tp);
@ -1807,59 +1806,12 @@ specialize_class_call(PyObject *callable, _Py_CODEUNIT *instr, int nargs)
_Py_SET_OPCODE(*instr, CALL_ALLOC_AND_ENTER_INIT);
return 0;
}
return -1;
}
SPECIALIZATION_FAIL(CALL, SPEC_FAIL_CALL_CLASS_MUTABLE);
return -1;
generic:
instr->op.code = CALL_NON_PY_GENERAL;
return 0;
}
#ifdef Py_STATS
static int
builtin_call_fail_kind(int ml_flags)
{
switch (ml_flags & (METH_VARARGS | METH_FASTCALL | METH_NOARGS | METH_O |
METH_KEYWORDS | METH_METHOD)) {
case METH_VARARGS:
return SPEC_FAIL_CALL_CFUNC_VARARGS;
case METH_VARARGS | METH_KEYWORDS:
return SPEC_FAIL_CALL_CFUNC_VARARGS_KEYWORDS;
case METH_NOARGS:
return SPEC_FAIL_CALL_CFUNC_NOARGS;
case METH_METHOD | METH_FASTCALL | METH_KEYWORDS:
return SPEC_FAIL_CALL_CFUNC_METHOD_FASTCALL_KEYWORDS;
/* These cases should be optimized, but return "other" just in case */
case METH_O:
case METH_FASTCALL:
case METH_FASTCALL | METH_KEYWORDS:
return SPEC_FAIL_OTHER;
default:
return SPEC_FAIL_CALL_BAD_CALL_FLAGS;
}
}
static int
meth_descr_call_fail_kind(int ml_flags)
{
switch (ml_flags & (METH_VARARGS | METH_FASTCALL | METH_NOARGS | METH_O |
METH_KEYWORDS | METH_METHOD)) {
case METH_VARARGS:
return SPEC_FAIL_CALL_METH_DESCR_VARARGS;
case METH_VARARGS | METH_KEYWORDS:
return SPEC_FAIL_CALL_METH_DESCR_VARARGS_KEYWORDS;
case METH_METHOD | METH_FASTCALL | METH_KEYWORDS:
return SPEC_FAIL_CALL_METH_DESCR_METHOD_FASTCALL_KEYWORDS;
/* These cases should be optimized, but return "other" just in case */
case METH_NOARGS:
case METH_O:
case METH_FASTCALL:
case METH_FASTCALL | METH_KEYWORDS:
return SPEC_FAIL_OTHER;
default:
return SPEC_FAIL_CALL_BAD_CALL_FLAGS;
}
}
#endif // Py_STATS
static int
specialize_method_descriptor(PyMethodDescrObject *descr, _Py_CODEUNIT *instr,
int nargs)
@ -1901,8 +1853,8 @@ specialize_method_descriptor(PyMethodDescrObject *descr, _Py_CODEUNIT *instr,
return 0;
}
}
SPECIALIZATION_FAIL(CALL, meth_descr_call_fail_kind(descr->d_method->ml_flags));
return -1;
instr->op.code = CALL_NON_PY_GENERAL;
return 0;
}
static int
@ -1917,36 +1869,25 @@ specialize_py_call(PyFunctionObject *func, _Py_CODEUNIT *instr, int nargs,
SPECIALIZATION_FAIL(CALL, SPEC_FAIL_CALL_PEP_523);
return -1;
}
if (kind != SIMPLE_FUNCTION) {
SPECIALIZATION_FAIL(CALL, kind);
int argcount = -1;
if (kind == SPEC_FAIL_CODE_NOT_OPTIMIZED) {
SPECIALIZATION_FAIL(CALL, SPEC_FAIL_CODE_NOT_OPTIMIZED);
return -1;
}
int argcount = code->co_argcount;
int defcount = func->func_defaults == NULL ? 0 : (int)PyTuple_GET_SIZE(func->func_defaults);
int min_args = argcount-defcount;
// GH-105840: min_args is negative when somebody sets too many __defaults__!
if (min_args < 0 || nargs > argcount || nargs < min_args) {
SPECIALIZATION_FAIL(CALL, SPEC_FAIL_WRONG_NUMBER_ARGUMENTS);
return -1;
if (kind == SIMPLE_FUNCTION) {
argcount = code->co_argcount;
}
assert(nargs <= argcount && nargs >= min_args);
assert(min_args >= 0 && defcount >= 0);
assert(defcount == 0 || func->func_defaults != NULL);
int version = _PyFunction_GetVersionForCurrentState(func);
if (version == 0) {
SPECIALIZATION_FAIL(CALL, SPEC_FAIL_OUT_OF_VERSIONS);
return -1;
}
write_u32(cache->func_version, version);
if (argcount == nargs) {
if (argcount == nargs + bound_method) {
instr->op.code = bound_method ? CALL_BOUND_METHOD_EXACT_ARGS : CALL_PY_EXACT_ARGS;
}
else if (bound_method) {
SPECIALIZATION_FAIL(CALL, SPEC_FAIL_CALL_BOUND_METHOD);
return -1;
}
else {
instr->op.code = CALL_PY_WITH_DEFAULTS;
instr->op.code = bound_method ? CALL_BOUND_METHOD_GENERAL : CALL_PY_GENERAL;
}
return 0;
}
@ -1955,6 +1896,7 @@ static int
specialize_c_call(PyObject *callable, _Py_CODEUNIT *instr, int nargs)
{
if (PyCFunction_GET_FUNCTION(callable) == NULL) {
SPECIALIZATION_FAIL(CALL, SPEC_FAIL_OTHER);
return 1;
}
switch (PyCFunction_GET_FLAGS(callable) &
@ -1991,39 +1933,11 @@ specialize_c_call(PyObject *callable, _Py_CODEUNIT *instr, int nargs)
return 0;
}
default:
SPECIALIZATION_FAIL(CALL,
builtin_call_fail_kind(PyCFunction_GET_FLAGS(callable)));
return 1;
instr->op.code = CALL_NON_PY_GENERAL;
return 0;
}
}
#ifdef Py_STATS
static int
call_fail_kind(PyObject *callable)
{
assert(!PyCFunction_CheckExact(callable));
assert(!PyFunction_Check(callable));
assert(!PyType_Check(callable));
assert(!Py_IS_TYPE(callable, &PyMethodDescr_Type));
assert(!PyMethod_Check(callable));
if (PyInstanceMethod_Check(callable)) {
return SPEC_FAIL_CALL_INSTANCE_METHOD;
}
// builtin method
else if (PyCMethod_Check(callable)) {
return SPEC_FAIL_CALL_CMETHOD;
}
else if (Py_TYPE(callable) == &PyWrapperDescr_Type) {
return SPEC_FAIL_CALL_OPERATOR_WRAPPER;
}
else if (Py_TYPE(callable) == &_PyMethodWrapper_Type) {
return SPEC_FAIL_CALL_METHOD_WRAPPER;
}
return SPEC_FAIL_OTHER;
}
#endif // Py_STATS
void
_Py_Specialize_Call(PyObject *callable, _Py_CODEUNIT *instr, int nargs)
{
@ -2047,7 +1961,7 @@ _Py_Specialize_Call(PyObject *callable, _Py_CODEUNIT *instr, int nargs)
else if (PyMethod_Check(callable)) {
PyObject *func = ((PyMethodObject *)callable)->im_func;
if (PyFunction_Check(func)) {
fail = specialize_py_call((PyFunctionObject *)func, instr, nargs+1, true);
fail = specialize_py_call((PyFunctionObject *)func, instr, nargs, true);
}
else {
SPECIALIZATION_FAIL(CALL, SPEC_FAIL_CALL_BOUND_METHOD);
@ -2055,8 +1969,8 @@ _Py_Specialize_Call(PyObject *callable, _Py_CODEUNIT *instr, int nargs)
}
}
else {
SPECIALIZATION_FAIL(CALL, call_fail_kind(callable));
fail = -1;
instr->op.code = CALL_NON_PY_GENERAL;
fail = 0;
}
if (fail) {
STAT_INC(CALL, failure);