gh-87729: add LOAD_SUPER_ATTR instruction for faster super() (#103497)

This speeds up `super()` (by around 85%, for a simple one-level
`super().meth()` microbenchmark) by avoiding allocation of a new
single-use `super()` object on each use.
This commit is contained in:
Carl Meyer 2023-04-24 16:22:14 -06:00 committed by GitHub
parent 22bed58e53
commit 0dc8b50d33
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
18 changed files with 783 additions and 408 deletions

View file

@ -25,6 +25,7 @@
#include "pycore_sliceobject.h" // _PyBuildSlice_ConsumeRefs
#include "pycore_sysmodule.h" // _PySys_Audit()
#include "pycore_tuple.h" // _PyTuple_ITEMS()
#include "pycore_typeobject.h" // _PySuper_Lookup()
#include "pycore_emscripten_signal.h" // _Py_CHECK_EMSCRIPTEN_SIGNALS
#include "pycore_dict.h"
@ -1553,6 +1554,36 @@ dummy_func(
PREDICT(JUMP_BACKWARD);
}
inst(LOAD_SUPER_ATTR, (global_super, class, self -- res2 if (oparg & 1), res)) {
PyObject *name = GETITEM(frame->f_code->co_names, oparg >> 2);
if (global_super == (PyObject *)&PySuper_Type && PyType_Check(class)) {
int method = 0;
Py_DECREF(global_super);
res = _PySuper_Lookup((PyTypeObject *)class, self, name, oparg & 1 ? &method : NULL);
Py_DECREF(class);
if (res == NULL) {
Py_DECREF(self);
ERROR_IF(true, error);
}
// Works with CALL, pushes two values: either `meth | self` or `NULL | meth`.
if (method) {
res2 = res;
res = self; // transfer ownership
} else {
res2 = NULL;
Py_DECREF(self);
}
} else {
PyObject *stack[] = {class, self};
PyObject *super = PyObject_Vectorcall(global_super, stack, oparg & 2, NULL);
DECREF_INPUTS();
ERROR_IF(super == NULL, error);
res = PyObject_GetAttr(super, name);
Py_DECREF(super);
ERROR_IF(res == NULL, error);
}
}
family(load_attr, INLINE_CACHE_ENTRIES_LOAD_ATTR) = {
LOAD_ATTR,
LOAD_ATTR_INSTANCE_VALUE,

View file

@ -21,6 +21,7 @@
#include "pycore_sliceobject.h" // _PyBuildSlice_ConsumeRefs
#include "pycore_sysmodule.h" // _PySys_Audit()
#include "pycore_tuple.h" // _PyTuple_ITEMS()
#include "pycore_typeobject.h" // _PySuper_Lookup()
#include "pycore_emscripten_signal.h" // _Py_CHECK_EMSCRIPTEN_SIGNALS
#include "pycore_dict.h"

View file

@ -829,6 +829,10 @@ stack_effect(int opcode, int oparg, int jump)
case LOAD_METHOD:
return 1;
case LOAD_SUPER_METHOD:
case LOAD_ZERO_SUPER_METHOD:
case LOAD_ZERO_SUPER_ATTR:
return -1;
default:
return PY_INVALID_STACK_EFFECT;
}
@ -1047,6 +1051,24 @@ compiler_addop_name(struct compiler_unit *u, location loc,
arg <<= 1;
arg |= 1;
}
if (opcode == LOAD_SUPER_ATTR) {
arg <<= 2;
arg |= 2;
}
if (opcode == LOAD_SUPER_METHOD) {
opcode = LOAD_SUPER_ATTR;
arg <<= 2;
arg |= 3;
}
if (opcode == LOAD_ZERO_SUPER_ATTR) {
opcode = LOAD_SUPER_ATTR;
arg <<= 2;
}
if (opcode == LOAD_ZERO_SUPER_METHOD) {
opcode = LOAD_SUPER_ATTR;
arg <<= 2;
arg |= 1;
}
return codegen_addop_i(&u->u_instr_sequence, opcode, arg, loc);
}
@ -4230,6 +4252,89 @@ is_import_originated(struct compiler *c, expr_ty e)
return flags & DEF_IMPORT;
}
static int
can_optimize_super_call(struct compiler *c, expr_ty attr)
{
expr_ty e = attr->v.Attribute.value;
if (e->kind != Call_kind ||
e->v.Call.func->kind != Name_kind ||
!_PyUnicode_EqualToASCIIString(e->v.Call.func->v.Name.id, "super") ||
_PyUnicode_EqualToASCIIString(attr->v.Attribute.attr, "__class__") ||
asdl_seq_LEN(e->v.Call.keywords) != 0) {
return 0;
}
Py_ssize_t num_args = asdl_seq_LEN(e->v.Call.args);
PyObject *super_name = e->v.Call.func->v.Name.id;
// detect statically-visible shadowing of 'super' name
int scope = _PyST_GetScope(c->u->u_ste, super_name);
if (scope != GLOBAL_IMPLICIT) {
return 0;
}
scope = _PyST_GetScope(c->c_st->st_top, super_name);
if (scope != 0) {
return 0;
}
if (num_args == 2) {
for (Py_ssize_t i = 0; i < num_args; i++) {
expr_ty elt = asdl_seq_GET(e->v.Call.args, i);
if (elt->kind == Starred_kind) {
return 0;
}
}
// exactly two non-starred args; we can just load
// the provided args
return 1;
}
if (num_args != 0) {
return 0;
}
// we need the following for zero-arg super():
// enclosing function should have at least one argument
if (c->u->u_metadata.u_argcount == 0 &&
c->u->u_metadata.u_posonlyargcount == 0) {
return 0;
}
// __class__ cell should be available
if (get_ref_type(c, &_Py_ID(__class__)) == FREE) {
return 1;
}
return 0;
}
static int
load_args_for_super(struct compiler *c, expr_ty e) {
location loc = LOC(e);
// load super() global
PyObject *super_name = e->v.Call.func->v.Name.id;
RETURN_IF_ERROR(compiler_nameop(c, loc, super_name, Load));
if (asdl_seq_LEN(e->v.Call.args) == 2) {
VISIT(c, expr, asdl_seq_GET(e->v.Call.args, 0));
VISIT(c, expr, asdl_seq_GET(e->v.Call.args, 1));
return SUCCESS;
}
// load __class__ cell
PyObject *name = &_Py_ID(__class__);
assert(get_ref_type(c, name) == FREE);
RETURN_IF_ERROR(compiler_nameop(c, loc, name, Load));
// load self (first argument)
Py_ssize_t i = 0;
PyObject *key, *value;
if (!PyDict_Next(c->u->u_metadata.u_varnames, &i, &key, &value)) {
return ERROR;
}
RETURN_IF_ERROR(compiler_nameop(c, loc, key, Load));
return SUCCESS;
}
// If an attribute access spans multiple lines, update the current start
// location to point to the attribute name.
static location
@ -4297,11 +4402,21 @@ maybe_optimize_method_call(struct compiler *c, expr_ty e)
return 0;
}
}
/* Alright, we can optimize the code. */
VISIT(c, expr, meth->v.Attribute.value);
location loc = LOC(meth);
loc = update_start_location_to_match_attr(c, loc, meth);
ADDOP_NAME(c, loc, LOAD_METHOD, meth->v.Attribute.attr, names);
if (can_optimize_super_call(c, meth)) {
RETURN_IF_ERROR(load_args_for_super(c, meth->v.Attribute.value));
int opcode = asdl_seq_LEN(meth->v.Attribute.value->v.Call.args) ?
LOAD_SUPER_METHOD : LOAD_ZERO_SUPER_METHOD;
ADDOP_NAME(c, loc, opcode, meth->v.Attribute.attr, names);
} else {
VISIT(c, expr, meth->v.Attribute.value);
loc = update_start_location_to_match_attr(c, loc, meth);
ADDOP_NAME(c, loc, LOAD_METHOD, meth->v.Attribute.attr, names);
}
VISIT_SEQ(c, expr, e->v.Call.args);
if (kwdsl) {
@ -5309,6 +5424,13 @@ compiler_visit_expr1(struct compiler *c, expr_ty e)
return compiler_formatted_value(c, e);
/* The following exprs can be assignment targets. */
case Attribute_kind:
if (e->v.Attribute.ctx == Load && can_optimize_super_call(c, e)) {
RETURN_IF_ERROR(load_args_for_super(c, e->v.Attribute.value));
int opcode = asdl_seq_LEN(e->v.Attribute.value->v.Call.args) ?
LOAD_SUPER_ATTR : LOAD_ZERO_SUPER_ATTR;
ADDOP_NAME(c, loc, opcode, e->v.Attribute.attr, names);
return SUCCESS;
}
VISIT(c, expr, e->v.Attribute.value);
loc = LOC(e);
loc = update_start_location_to_match_attr(c, loc, e);

File diff suppressed because it is too large Load diff

View file

@ -205,6 +205,8 @@ _PyOpcode_num_popped(int opcode, int oparg, bool jump) {
return 1;
case MAP_ADD:
return 2;
case LOAD_SUPER_ATTR:
return 3;
case LOAD_ATTR:
return 1;
case LOAD_ATTR_INSTANCE_VALUE:
@ -589,6 +591,8 @@ _PyOpcode_num_pushed(int opcode, int oparg, bool jump) {
return 0;
case MAP_ADD:
return 0;
case LOAD_SUPER_ATTR:
return ((oparg & 1) ? 1 : 0) + 1;
case LOAD_ATTR:
return ((oparg & 1) ? 1 : 0) + 1;
case LOAD_ATTR_INSTANCE_VALUE:
@ -879,6 +883,7 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[256] = {
[DICT_UPDATE] = { true, INSTR_FMT_IB },
[DICT_MERGE] = { true, INSTR_FMT_IB },
[MAP_ADD] = { true, INSTR_FMT_IB },
[LOAD_SUPER_ATTR] = { true, INSTR_FMT_IB },
[LOAD_ATTR] = { true, INSTR_FMT_IBC00000000 },
[LOAD_ATTR_INSTANCE_VALUE] = { true, INSTR_FMT_IBC00000000 },
[LOAD_ATTR_MODULE] = { true, INSTR_FMT_IBC00000000 },

View file

@ -140,9 +140,9 @@ static void *opcode_targets[256] = {
&&TARGET_STORE_DEREF,
&&TARGET_DELETE_DEREF,
&&TARGET_JUMP_BACKWARD,
&&TARGET_STORE_FAST__STORE_FAST,
&&TARGET_LOAD_SUPER_ATTR,
&&TARGET_CALL_FUNCTION_EX,
&&TARGET_STORE_SUBSCR_DICT,
&&TARGET_STORE_FAST__STORE_FAST,
&&TARGET_EXTENDED_ARG,
&&TARGET_LIST_APPEND,
&&TARGET_SET_ADD,
@ -152,15 +152,15 @@ static void *opcode_targets[256] = {
&&TARGET_YIELD_VALUE,
&&TARGET_RESUME,
&&TARGET_MATCH_CLASS,
&&TARGET_STORE_SUBSCR_DICT,
&&TARGET_STORE_SUBSCR_LIST_INT,
&&TARGET_UNPACK_SEQUENCE_LIST,
&&TARGET_FORMAT_VALUE,
&&TARGET_BUILD_CONST_KEY_MAP,
&&TARGET_BUILD_STRING,
&&TARGET_UNPACK_SEQUENCE_LIST,
&&TARGET_UNPACK_SEQUENCE_TUPLE,
&&TARGET_UNPACK_SEQUENCE_TWO_TUPLE,
&&TARGET_SEND_GEN,
&&_unknown_opcode,
&&TARGET_LIST_EXTEND,
&&TARGET_SET_UPDATE,
&&TARGET_DICT_MERGE,