mirror of
https://github.com/python/cpython.git
synced 2025-11-17 01:25:57 +00:00
bpo-37358: Use vectorcall for functools.partial (GH-14284)
https://bugs.python.org/issue37358
This commit is contained in:
parent
dc3f99fa77
commit
ed184c06e2
2 changed files with 100 additions and 67 deletions
|
|
@ -0,0 +1 @@
|
||||||
|
Optimized ``functools.partial`` by using vectorcall.
|
||||||
|
|
@ -18,13 +18,15 @@ typedef struct {
|
||||||
PyObject *fn;
|
PyObject *fn;
|
||||||
PyObject *args;
|
PyObject *args;
|
||||||
PyObject *kw;
|
PyObject *kw;
|
||||||
PyObject *dict;
|
PyObject *dict; /* __dict__ */
|
||||||
PyObject *weakreflist; /* List of weak references */
|
PyObject *weakreflist; /* List of weak references */
|
||||||
int use_fastcall;
|
vectorcallfunc vectorcall;
|
||||||
} partialobject;
|
} partialobject;
|
||||||
|
|
||||||
static PyTypeObject partial_type;
|
static PyTypeObject partial_type;
|
||||||
|
|
||||||
|
static void partial_setvectorcall(partialobject *pto);
|
||||||
|
|
||||||
static PyObject *
|
static PyObject *
|
||||||
partial_new(PyTypeObject *type, PyObject *args, PyObject *kw)
|
partial_new(PyTypeObject *type, PyObject *args, PyObject *kw)
|
||||||
{
|
{
|
||||||
|
|
@ -107,8 +109,7 @@ partial_new(PyTypeObject *type, PyObject *args, PyObject *kw)
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
pto->use_fastcall = (_PyVectorcall_Function(func) != NULL);
|
partial_setvectorcall(pto);
|
||||||
|
|
||||||
return (PyObject *)pto;
|
return (PyObject *)pto;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -126,77 +127,107 @@ partial_dealloc(partialobject *pto)
|
||||||
Py_TYPE(pto)->tp_free(pto);
|
Py_TYPE(pto)->tp_free(pto);
|
||||||
}
|
}
|
||||||
|
|
||||||
static PyObject *
|
|
||||||
partial_fastcall(partialobject *pto, PyObject **args, Py_ssize_t nargs,
|
/* Merging keyword arguments using the vectorcall convention is messy, so
|
||||||
PyObject *kwargs)
|
* if we would need to do that, we stop using vectorcall and fall back
|
||||||
|
* to using partial_call() instead. */
|
||||||
|
_Py_NO_INLINE static PyObject *
|
||||||
|
partial_vectorcall_fallback(partialobject *pto, PyObject *const *args,
|
||||||
|
size_t nargsf, PyObject *kwnames)
|
||||||
{
|
{
|
||||||
|
pto->vectorcall = NULL;
|
||||||
|
Py_ssize_t nargs = PyVectorcall_NARGS(nargsf);
|
||||||
|
return _PyObject_MakeTpCall((PyObject *)pto, args, nargs, kwnames);
|
||||||
|
}
|
||||||
|
|
||||||
|
static PyObject *
|
||||||
|
partial_vectorcall(partialobject *pto, PyObject *const *args,
|
||||||
|
size_t nargsf, PyObject *kwnames)
|
||||||
|
{
|
||||||
|
/* pto->kw is mutable, so need to check every time */
|
||||||
|
if (PyDict_GET_SIZE(pto->kw)) {
|
||||||
|
return partial_vectorcall_fallback(pto, args, nargsf, kwnames);
|
||||||
|
}
|
||||||
|
|
||||||
|
Py_ssize_t nargs = PyVectorcall_NARGS(nargsf);
|
||||||
|
Py_ssize_t nargs_total = nargs;
|
||||||
|
if (kwnames != NULL) {
|
||||||
|
nargs_total += PyTuple_GET_SIZE(kwnames);
|
||||||
|
}
|
||||||
|
|
||||||
|
PyObject **pto_args = _PyTuple_ITEMS(pto->args);
|
||||||
|
Py_ssize_t pto_nargs = PyTuple_GET_SIZE(pto->args);
|
||||||
|
|
||||||
|
/* Fast path if we're called without arguments */
|
||||||
|
if (nargs_total == 0) {
|
||||||
|
return _PyObject_Vectorcall(pto->fn, pto_args, pto_nargs, NULL);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Fast path using PY_VECTORCALL_ARGUMENTS_OFFSET to prepend a single
|
||||||
|
* positional argument */
|
||||||
|
if (pto_nargs == 1 && (nargsf & PY_VECTORCALL_ARGUMENTS_OFFSET)) {
|
||||||
|
PyObject **newargs = (PyObject **)args - 1;
|
||||||
|
PyObject *tmp = newargs[0];
|
||||||
|
newargs[0] = pto_args[0];
|
||||||
|
PyObject *ret = _PyObject_Vectorcall(pto->fn, newargs, nargs + 1, kwnames);
|
||||||
|
newargs[0] = tmp;
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
Py_ssize_t newnargs_total = pto_nargs + nargs_total;
|
||||||
|
|
||||||
PyObject *small_stack[_PY_FASTCALL_SMALL_STACK];
|
PyObject *small_stack[_PY_FASTCALL_SMALL_STACK];
|
||||||
PyObject *ret;
|
PyObject *ret;
|
||||||
PyObject **stack, **stack_buf = NULL;
|
PyObject **stack;
|
||||||
Py_ssize_t nargs2, pto_nargs;
|
|
||||||
|
|
||||||
pto_nargs = PyTuple_GET_SIZE(pto->args);
|
if (newnargs_total <= (Py_ssize_t)Py_ARRAY_LENGTH(small_stack)) {
|
||||||
nargs2 = pto_nargs + nargs;
|
|
||||||
|
|
||||||
if (pto_nargs == 0) {
|
|
||||||
stack = args;
|
|
||||||
}
|
|
||||||
else if (nargs == 0) {
|
|
||||||
stack = _PyTuple_ITEMS(pto->args);
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
if (nargs2 <= (Py_ssize_t)Py_ARRAY_LENGTH(small_stack)) {
|
|
||||||
stack = small_stack;
|
stack = small_stack;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
stack_buf = PyMem_Malloc(nargs2 * sizeof(PyObject *));
|
stack = PyMem_Malloc(newnargs_total * sizeof(PyObject *));
|
||||||
if (stack_buf == NULL) {
|
if (stack == NULL) {
|
||||||
PyErr_NoMemory();
|
PyErr_NoMemory();
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
stack = stack_buf;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* use borrowed references */
|
/* Copy to new stack, using borrowed references */
|
||||||
memcpy(stack,
|
memcpy(stack, pto_args, pto_nargs * sizeof(PyObject*));
|
||||||
_PyTuple_ITEMS(pto->args),
|
memcpy(stack + pto_nargs, args, nargs_total * sizeof(PyObject*));
|
||||||
pto_nargs * sizeof(PyObject*));
|
|
||||||
memcpy(&stack[pto_nargs],
|
|
||||||
args,
|
|
||||||
nargs * sizeof(PyObject*));
|
|
||||||
}
|
|
||||||
|
|
||||||
ret = _PyObject_FastCallDict(pto->fn, stack, nargs2, kwargs);
|
ret = _PyObject_Vectorcall(pto->fn, stack, pto_nargs + nargs, kwnames);
|
||||||
PyMem_Free(stack_buf);
|
if (stack != small_stack) {
|
||||||
|
PyMem_Free(stack);
|
||||||
|
}
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
static PyObject *
|
/* Set pto->vectorcall depending on the parameters of the partial object */
|
||||||
partial_call_impl(partialobject *pto, PyObject *args, PyObject *kwargs)
|
static void
|
||||||
|
partial_setvectorcall(partialobject *pto)
|
||||||
{
|
{
|
||||||
PyObject *ret, *args2;
|
if (_PyVectorcall_Function(pto->fn) == NULL) {
|
||||||
|
/* Don't use vectorcall if the underlying function doesn't support it */
|
||||||
/* Note: tupleconcat() is optimized for empty tuples */
|
pto->vectorcall = NULL;
|
||||||
args2 = PySequence_Concat(pto->args, args);
|
|
||||||
if (args2 == NULL) {
|
|
||||||
return NULL;
|
|
||||||
}
|
}
|
||||||
assert(PyTuple_Check(args2));
|
/* We could have a special case if there are no arguments,
|
||||||
|
* but that is unlikely (why use partial without arguments?),
|
||||||
ret = PyObject_Call(pto->fn, args2, kwargs);
|
* so we don't optimize that */
|
||||||
Py_DECREF(args2);
|
else {
|
||||||
return ret;
|
pto->vectorcall = (vectorcallfunc)partial_vectorcall;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
static PyObject *
|
static PyObject *
|
||||||
partial_call(partialobject *pto, PyObject *args, PyObject *kwargs)
|
partial_call(partialobject *pto, PyObject *args, PyObject *kwargs)
|
||||||
{
|
{
|
||||||
PyObject *kwargs2, *res;
|
|
||||||
|
|
||||||
assert(PyCallable_Check(pto->fn));
|
assert(PyCallable_Check(pto->fn));
|
||||||
assert(PyTuple_Check(pto->args));
|
assert(PyTuple_Check(pto->args));
|
||||||
assert(PyDict_Check(pto->kw));
|
assert(PyDict_Check(pto->kw));
|
||||||
|
|
||||||
|
/* Merge keywords */
|
||||||
|
PyObject *kwargs2;
|
||||||
if (PyDict_GET_SIZE(pto->kw) == 0) {
|
if (PyDict_GET_SIZE(pto->kw) == 0) {
|
||||||
/* kwargs can be NULL */
|
/* kwargs can be NULL */
|
||||||
kwargs2 = kwargs;
|
kwargs2 = kwargs;
|
||||||
|
|
@ -219,16 +250,16 @@ partial_call(partialobject *pto, PyObject *args, PyObject *kwargs)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Merge positional arguments */
|
||||||
|
/* Note: tupleconcat() is optimized for empty tuples */
|
||||||
|
PyObject *args2 = PySequence_Concat(pto->args, args);
|
||||||
|
if (args2 == NULL) {
|
||||||
|
Py_XDECREF(kwargs2);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
if (pto->use_fastcall) {
|
PyObject *res = PyObject_Call(pto->fn, args2, kwargs2);
|
||||||
res = partial_fastcall(pto,
|
Py_DECREF(args2);
|
||||||
_PyTuple_ITEMS(args),
|
|
||||||
PyTuple_GET_SIZE(args),
|
|
||||||
kwargs2);
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
res = partial_call_impl(pto, args, kwargs2);
|
|
||||||
}
|
|
||||||
Py_XDECREF(kwargs2);
|
Py_XDECREF(kwargs2);
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
@ -365,11 +396,11 @@ partial_setstate(partialobject *pto, PyObject *state)
|
||||||
Py_INCREF(dict);
|
Py_INCREF(dict);
|
||||||
|
|
||||||
Py_INCREF(fn);
|
Py_INCREF(fn);
|
||||||
pto->use_fastcall = (_PyVectorcall_Function(fn) != NULL);
|
|
||||||
Py_SETREF(pto->fn, fn);
|
Py_SETREF(pto->fn, fn);
|
||||||
Py_SETREF(pto->args, fnargs);
|
Py_SETREF(pto->args, fnargs);
|
||||||
Py_SETREF(pto->kw, kw);
|
Py_SETREF(pto->kw, kw);
|
||||||
Py_XSETREF(pto->dict, dict);
|
Py_XSETREF(pto->dict, dict);
|
||||||
|
partial_setvectorcall(pto);
|
||||||
Py_RETURN_NONE;
|
Py_RETURN_NONE;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -386,7 +417,7 @@ static PyTypeObject partial_type = {
|
||||||
0, /* tp_itemsize */
|
0, /* tp_itemsize */
|
||||||
/* methods */
|
/* methods */
|
||||||
(destructor)partial_dealloc, /* tp_dealloc */
|
(destructor)partial_dealloc, /* tp_dealloc */
|
||||||
0, /* tp_vectorcall_offset */
|
offsetof(partialobject, vectorcall),/* tp_vectorcall_offset */
|
||||||
0, /* tp_getattr */
|
0, /* tp_getattr */
|
||||||
0, /* tp_setattr */
|
0, /* tp_setattr */
|
||||||
0, /* tp_as_async */
|
0, /* tp_as_async */
|
||||||
|
|
@ -401,7 +432,8 @@ static PyTypeObject partial_type = {
|
||||||
PyObject_GenericSetAttr, /* tp_setattro */
|
PyObject_GenericSetAttr, /* tp_setattro */
|
||||||
0, /* tp_as_buffer */
|
0, /* tp_as_buffer */
|
||||||
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC |
|
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC |
|
||||||
Py_TPFLAGS_BASETYPE, /* tp_flags */
|
Py_TPFLAGS_BASETYPE |
|
||||||
|
_Py_TPFLAGS_HAVE_VECTORCALL, /* tp_flags */
|
||||||
partial_doc, /* tp_doc */
|
partial_doc, /* tp_doc */
|
||||||
(traverseproc)partial_traverse, /* tp_traverse */
|
(traverseproc)partial_traverse, /* tp_traverse */
|
||||||
0, /* tp_clear */
|
0, /* tp_clear */
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue