gh-131738: optimize builtin any/all/tuple calls with a generator expression arg (#131737)

This commit is contained in:
Irit Katriel 2025-03-28 10:35:20 +00:00 committed by GitHub
parent 674dbf3b3a
commit 2c8f329dc6
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
16 changed files with 199 additions and 38 deletions

View file

@ -792,7 +792,9 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) {
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(aggregate_class));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(alias));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(align));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(all));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(allow_code));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(any));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(append));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(arg));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(argdefs));

View file

@ -283,7 +283,9 @@ struct _Py_global_strings {
STRUCT_FOR_ID(aggregate_class)
STRUCT_FOR_ID(alias)
STRUCT_FOR_ID(align)
STRUCT_FOR_ID(all)
STRUCT_FOR_ID(allow_code)
STRUCT_FOR_ID(any)
STRUCT_FOR_ID(append)
STRUCT_FOR_ID(arg)
STRUCT_FOR_ID(argdefs)

View file

@ -9,6 +9,7 @@ extern "C" {
#include "pycore_ast_state.h" // struct ast_state
#include "pycore_llist.h" // struct llist_node
#include "pycore_opcode_utils.h" // NUM_COMMON_CONSTANTS
#include "pycore_pymath.h" // _PY_SHORT_FLOAT_REPR
#include "pycore_structs.h" // PyHamtObject
#include "pycore_tstate.h" // _PyThreadStateImpl
@ -912,6 +913,7 @@ struct _is {
struct ast_state ast;
struct types_state types;
struct callable_cache callable_cache;
PyObject *common_consts[NUM_COMMON_CONSTANTS];
bool jit;
struct _PyExecutorObject *executor_list_head;
size_t trace_run_counter;

View file

@ -272,6 +272,7 @@ Known values:
Python 3.14a6 3617 (Branch monitoring for async for loops)
Python 3.14a6 3618 (Add oparg to END_ASYNC_FOR)
Python 3.14a6 3619 (Renumber RESUME opcode from 149 to 128)
Python 3.14a6 3620 (Optimize bytecode for all/any/tuple called on a genexp)
Python 3.15 will start with 3650
@ -284,7 +285,7 @@ PC/launcher.c must also be updated.
*/
#define PYC_MAGIC_NUMBER 3619
#define PYC_MAGIC_NUMBER 3620
/* This is equivalent to converting PYC_MAGIC_NUMBER to 2 bytes
(little-endian) and then appending b'\r\n'. */
#define PYC_MAGIC_NUMBER_TOKEN \

View file

@ -8,8 +8,6 @@ extern "C" {
# error "this header requires Py_BUILD_CORE define"
#endif
#include "opcode_ids.h"
#define MAX_REAL_OPCODE 254
#define IS_WITHIN_OPCODE_RANGE(opcode) \
@ -67,7 +65,10 @@ extern "C" {
/* Values used as the oparg for LOAD_COMMON_CONSTANT */
#define CONSTANT_ASSERTIONERROR 0
#define CONSTANT_NOTIMPLEMENTEDERROR 1
#define NUM_COMMON_CONSTANTS 2
#define CONSTANT_BUILTIN_TUPLE 2
#define CONSTANT_BUILTIN_ALL 3
#define CONSTANT_BUILTIN_ANY 4
#define NUM_COMMON_CONSTANTS 5
/* Values used in the oparg for RESUME */
#define RESUME_AT_FUNC_START 0

View file

@ -790,7 +790,9 @@ extern "C" {
INIT_ID(aggregate_class), \
INIT_ID(alias), \
INIT_ID(align), \
INIT_ID(all), \
INIT_ID(allow_code), \
INIT_ID(any), \
INIT_ID(append), \
INIT_ID(arg), \
INIT_ID(argdefs), \

View file

@ -920,10 +920,18 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) {
_PyUnicode_InternStatic(interp, &string);
assert(_PyUnicode_CheckConsistency(string, 1));
assert(PyUnicode_GET_LENGTH(string) != 1);
string = &_Py_ID(all);
_PyUnicode_InternStatic(interp, &string);
assert(_PyUnicode_CheckConsistency(string, 1));
assert(PyUnicode_GET_LENGTH(string) != 1);
string = &_Py_ID(allow_code);
_PyUnicode_InternStatic(interp, &string);
assert(_PyUnicode_CheckConsistency(string, 1));
assert(PyUnicode_GET_LENGTH(string) != 1);
string = &_Py_ID(any);
_PyUnicode_InternStatic(interp, &string);
assert(_PyUnicode_CheckConsistency(string, 1));
assert(PyUnicode_GET_LENGTH(string) != 1);
string = &_Py_ID(append);
_PyUnicode_InternStatic(interp, &string);
assert(_PyUnicode_CheckConsistency(string, 1));

View file

@ -9,6 +9,7 @@ __all__ = ["cmp_op", "stack_effect", "hascompare", "opname", "opmap",
"HAVE_ARGUMENT", "EXTENDED_ARG", "hasarg", "hasconst", "hasname",
"hasjump", "hasjrel", "hasjabs", "hasfree", "haslocal", "hasexc"]
import builtins
import _opcode
from _opcode import stack_effect
@ -38,7 +39,8 @@ hasexc = [op for op in opmap.values() if _opcode.has_exc(op)]
_intrinsic_1_descs = _opcode.get_intrinsic1_descs()
_intrinsic_2_descs = _opcode.get_intrinsic2_descs()
_special_method_names = _opcode.get_special_method_names()
_common_constants = [AssertionError, NotImplementedError]
_common_constants = [builtins.AssertionError, builtins.NotImplementedError,
builtins.tuple, builtins.all, builtins.any]
_nb_ops = _opcode.get_nb_ops()
hascompare = [opmap["COMPARE_OP"]]

View file

@ -225,6 +225,8 @@ class BuiltinTest(ComplexesAreIdenticalMixin, unittest.TestCase):
self.assertEqual(all(x > 42 for x in S), True)
S = [50, 40, 60]
self.assertEqual(all(x > 42 for x in S), False)
S = [50, 40, 60, TestFailingBool()]
self.assertEqual(all(x > 42 for x in S), False)
def test_any(self):
self.assertEqual(any([None, None, None]), False)
@ -238,9 +240,59 @@ class BuiltinTest(ComplexesAreIdenticalMixin, unittest.TestCase):
self.assertEqual(any([1, TestFailingBool()]), True) # Short-circuit
S = [40, 60, 30]
self.assertEqual(any(x > 42 for x in S), True)
S = [40, 60, 30, TestFailingBool()]
self.assertEqual(any(x > 42 for x in S), True)
S = [10, 20, 30]
self.assertEqual(any(x > 42 for x in S), False)
def test_all_any_tuple_optimization(self):
def f_all():
return all(x-2 for x in [1,2,3])
def f_any():
return any(x-1 for x in [1,2,3])
def f_tuple():
return tuple(2*x for x in [1,2,3])
funcs = [f_all, f_any, f_tuple]
for f in funcs:
# check that generator code object is not duplicated
code_objs = [c for c in f.__code__.co_consts if isinstance(c, type(f.__code__))]
self.assertEqual(len(code_objs), 1)
# check the overriding the builtins works
global all, any, tuple
saved = all, any, tuple
try:
all = lambda x : "all"
any = lambda x : "any"
tuple = lambda x : "tuple"
overridden_outputs = [f() for f in funcs]
finally:
all, any, tuple = saved
self.assertEqual(overridden_outputs, ['all', 'any', 'tuple'])
# Now repeat, overriding the builtins module as well
saved = all, any, tuple
try:
builtins.all = all = lambda x : "all"
builtins.any = any = lambda x : "any"
builtins.tuple = tuple = lambda x : "tuple"
overridden_outputs = [f() for f in funcs]
finally:
all, any, tuple = saved
builtins.all, builtins.any, builtins.tuple = saved
self.assertEqual(overridden_outputs, ['all', 'any', 'tuple'])
def test_ascii(self):
self.assertEqual(ascii(''), '\'\'')
self.assertEqual(ascii(0), '0')

View file

@ -0,0 +1 @@
Compiler emits optimized code for builtin any/all/tuple calls over a generator expression.

View file

@ -19,6 +19,8 @@
#include "pycore_warnings.h" // _PyErr_WarnUnawaitedCoroutine()
#include "opcode_ids.h" // RESUME, etc
// Forward declarations
static PyObject* gen_close(PyObject *, PyObject *);
static PyObject* async_gen_asend_new(PyAsyncGenObject *, PyObject *);

View file

@ -1402,16 +1402,8 @@ dummy_func(
inst(LOAD_COMMON_CONSTANT, ( -- value)) {
// Keep in sync with _common_constants in opcode.py
// If we ever have more than two constants, use a lookup table
PyObject *val;
if (oparg == CONSTANT_ASSERTIONERROR) {
val = PyExc_AssertionError;
}
else {
assert(oparg == CONSTANT_NOTIMPLEMENTEDERROR);
val = PyExc_NotImplementedError;
}
value = PyStackRef_FromPyObjectImmortal(val);
assert(oparg < NUM_COMMON_CONSTANTS);
value = PyStackRef_FromPyObjectNew(tstate->interp->common_consts[oparg]);
}
inst(LOAD_BUILD_CLASS, ( -- bc)) {

View file

@ -3820,6 +3820,92 @@ update_start_location_to_match_attr(compiler *c, location loc,
return loc;
}
static int
maybe_optimize_function_call(compiler *c, expr_ty e, jump_target_label end)
{
asdl_expr_seq *args = e->v.Call.args;
asdl_keyword_seq *kwds = e->v.Call.keywords;
expr_ty func = e->v.Call.func;
if (! (func->kind == Name_kind &&
asdl_seq_LEN(args) == 1 &&
asdl_seq_LEN(kwds) == 0 &&
asdl_seq_GET(args, 0)->kind == GeneratorExp_kind))
{
return 0;
}
location loc = LOC(func);
int optimized = 0;
NEW_JUMP_TARGET_LABEL(c, skip_optimization);
int const_oparg = -1;
PyObject *initial_res = NULL;
int continue_jump_opcode = -1;
if (_PyUnicode_EqualToASCIIString(func->v.Name.id, "all")) {
const_oparg = CONSTANT_BUILTIN_ALL;
initial_res = Py_True;
continue_jump_opcode = POP_JUMP_IF_TRUE;
}
else if (_PyUnicode_EqualToASCIIString(func->v.Name.id, "any")) {
const_oparg = CONSTANT_BUILTIN_ANY;
initial_res = Py_False;
continue_jump_opcode = POP_JUMP_IF_FALSE;
}
else if (_PyUnicode_EqualToASCIIString(func->v.Name.id, "tuple")) {
const_oparg = CONSTANT_BUILTIN_TUPLE;
}
if (const_oparg != -1) {
ADDOP_I(c, loc, COPY, 1); // the function
ADDOP_I(c, loc, LOAD_COMMON_CONSTANT, const_oparg);
ADDOP_COMPARE(c, loc, Is);
ADDOP_JUMP(c, loc, POP_JUMP_IF_FALSE, skip_optimization);
ADDOP(c, loc, POP_TOP);
if (const_oparg == CONSTANT_BUILTIN_TUPLE) {
ADDOP_I(c, loc, BUILD_LIST, 0);
}
expr_ty generator_exp = asdl_seq_GET(args, 0);
VISIT(c, expr, generator_exp);
NEW_JUMP_TARGET_LABEL(c, loop);
NEW_JUMP_TARGET_LABEL(c, cleanup);
USE_LABEL(c, loop);
ADDOP_JUMP(c, loc, FOR_ITER, cleanup);
if (const_oparg == CONSTANT_BUILTIN_TUPLE) {
ADDOP_I(c, loc, LIST_APPEND, 2);
ADDOP_JUMP(c, loc, JUMP, loop);
}
else {
ADDOP(c, loc, TO_BOOL);
ADDOP_JUMP(c, loc, continue_jump_opcode, loop);
}
ADDOP(c, NO_LOCATION, POP_ITER);
if (const_oparg != CONSTANT_BUILTIN_TUPLE) {
ADDOP_LOAD_CONST(c, loc, initial_res == Py_True ? Py_False : Py_True);
}
ADDOP_JUMP(c, loc, JUMP, end);
USE_LABEL(c, cleanup);
ADDOP(c, NO_LOCATION, END_FOR);
ADDOP(c, NO_LOCATION, POP_ITER);
if (const_oparg == CONSTANT_BUILTIN_TUPLE) {
ADDOP_I(c, loc, CALL_INTRINSIC_1, INTRINSIC_LIST_TO_TUPLE);
}
else {
ADDOP_LOAD_CONST(c, loc, initial_res);
}
optimized = 1;
ADDOP_JUMP(c, loc, JUMP, end);
}
USE_LABEL(c, skip_optimization);
return optimized;
}
// Return 1 if the method call was optimized, 0 if not, and -1 on error.
static int
maybe_optimize_method_call(compiler *c, expr_ty e)
@ -3926,14 +4012,18 @@ codegen_call(compiler *c, expr_ty e)
if (ret == 1) {
return SUCCESS;
}
NEW_JUMP_TARGET_LABEL(c, skip_normal_call);
RETURN_IF_ERROR(check_caller(c, e->v.Call.func));
VISIT(c, expr, e->v.Call.func);
RETURN_IF_ERROR(maybe_optimize_function_call(c, e, skip_normal_call));
location loc = LOC(e->v.Call.func);
ADDOP(c, loc, PUSH_NULL);
loc = LOC(e);
return codegen_call_helper(c, loc, 0,
e->v.Call.args,
e->v.Call.keywords);
ret = codegen_call_helper(c, loc, 0,
e->v.Call.args,
e->v.Call.keywords);
USE_LABEL(c, skip_normal_call);
return ret;
}
static int

View file

@ -1899,16 +1899,8 @@
_PyStackRef value;
oparg = CURRENT_OPARG();
// Keep in sync with _common_constants in opcode.py
// If we ever have more than two constants, use a lookup table
PyObject *val;
if (oparg == CONSTANT_ASSERTIONERROR) {
val = PyExc_AssertionError;
}
else {
assert(oparg == CONSTANT_NOTIMPLEMENTEDERROR);
val = PyExc_NotImplementedError;
}
value = PyStackRef_FromPyObjectImmortal(val);
assert(oparg < NUM_COMMON_CONSTANTS);
value = PyStackRef_FromPyObjectNew(tstate->interp->common_consts[oparg]);
stack_pointer[0] = value;
stack_pointer += 1;
assert(WITHIN_STACK_BOUNDS());

View file

@ -8723,16 +8723,8 @@
INSTRUCTION_STATS(LOAD_COMMON_CONSTANT);
_PyStackRef value;
// Keep in sync with _common_constants in opcode.py
// If we ever have more than two constants, use a lookup table
PyObject *val;
if (oparg == CONSTANT_ASSERTIONERROR) {
val = PyExc_AssertionError;
}
else {
assert(oparg == CONSTANT_NOTIMPLEMENTEDERROR);
val = PyExc_NotImplementedError;
}
value = PyStackRef_FromPyObjectImmortal(val);
assert(oparg < NUM_COMMON_CONSTANTS);
value = PyStackRef_FromPyObjectNew(tstate->interp->common_consts[oparg]);
stack_pointer[0] = value;
stack_pointer += 1;
assert(WITHIN_STACK_BOUNDS());

View file

@ -790,6 +790,26 @@ pycore_init_builtins(PyThreadState *tstate)
}
interp->callable_cache.len = len;
PyObject *all = PyDict_GetItemWithError(builtins_dict, &_Py_ID(all));
if (!all) {
goto error;
}
PyObject *any = PyDict_GetItemWithError(builtins_dict, &_Py_ID(any));
if (!any) {
goto error;
}
interp->common_consts[CONSTANT_ASSERTIONERROR] = PyExc_AssertionError;
interp->common_consts[CONSTANT_NOTIMPLEMENTEDERROR] = PyExc_NotImplementedError;
interp->common_consts[CONSTANT_BUILTIN_TUPLE] = (PyObject*)&PyTuple_Type;
interp->common_consts[CONSTANT_BUILTIN_ALL] = all;
interp->common_consts[CONSTANT_BUILTIN_ANY] = any;
for (int i=0; i < NUM_COMMON_CONSTANTS; i++) {
assert(interp->common_consts[i] != NULL);
}
PyObject *list_append = _PyType_Lookup(&PyList_Type, &_Py_ID(append));
if (list_append == NULL) {
goto error;