bpo-37358: Use vectorcall for functools.partial (GH-14284)

https://bugs.python.org/issue37358
This commit is contained in:
Jeroen Demeyer 2019-07-13 16:39:18 +02:00 committed by Miss Islington (bot)
parent dc3f99fa77
commit ed184c06e2
2 changed files with 100 additions and 67 deletions

View file

@ -0,0 +1 @@
Optimized ``functools.partial`` by using vectorcall.

View file

@ -18,13 +18,15 @@ typedef struct {
PyObject *fn; PyObject *fn;
PyObject *args; PyObject *args;
PyObject *kw; PyObject *kw;
PyObject *dict; PyObject *dict; /* __dict__ */
PyObject *weakreflist; /* List of weak references */ PyObject *weakreflist; /* List of weak references */
int use_fastcall; vectorcallfunc vectorcall;
} partialobject; } partialobject;
static PyTypeObject partial_type; static PyTypeObject partial_type;
static void partial_setvectorcall(partialobject *pto);
static PyObject * static PyObject *
partial_new(PyTypeObject *type, PyObject *args, PyObject *kw) partial_new(PyTypeObject *type, PyObject *args, PyObject *kw)
{ {
@ -107,8 +109,7 @@ partial_new(PyTypeObject *type, PyObject *args, PyObject *kw)
return NULL; return NULL;
} }
pto->use_fastcall = (_PyVectorcall_Function(func) != NULL); partial_setvectorcall(pto);
return (PyObject *)pto; return (PyObject *)pto;
} }
@ -126,77 +127,107 @@ partial_dealloc(partialobject *pto)
Py_TYPE(pto)->tp_free(pto); Py_TYPE(pto)->tp_free(pto);
} }
static PyObject *
partial_fastcall(partialobject *pto, PyObject **args, Py_ssize_t nargs, /* Merging keyword arguments using the vectorcall convention is messy, so
PyObject *kwargs) * if we would need to do that, we stop using vectorcall and fall back
* to using partial_call() instead. */
_Py_NO_INLINE static PyObject *
partial_vectorcall_fallback(partialobject *pto, PyObject *const *args,
size_t nargsf, PyObject *kwnames)
{ {
pto->vectorcall = NULL;
Py_ssize_t nargs = PyVectorcall_NARGS(nargsf);
return _PyObject_MakeTpCall((PyObject *)pto, args, nargs, kwnames);
}
static PyObject *
partial_vectorcall(partialobject *pto, PyObject *const *args,
size_t nargsf, PyObject *kwnames)
{
/* pto->kw is mutable, so need to check every time */
if (PyDict_GET_SIZE(pto->kw)) {
return partial_vectorcall_fallback(pto, args, nargsf, kwnames);
}
Py_ssize_t nargs = PyVectorcall_NARGS(nargsf);
Py_ssize_t nargs_total = nargs;
if (kwnames != NULL) {
nargs_total += PyTuple_GET_SIZE(kwnames);
}
PyObject **pto_args = _PyTuple_ITEMS(pto->args);
Py_ssize_t pto_nargs = PyTuple_GET_SIZE(pto->args);
/* Fast path if we're called without arguments */
if (nargs_total == 0) {
return _PyObject_Vectorcall(pto->fn, pto_args, pto_nargs, NULL);
}
/* Fast path using PY_VECTORCALL_ARGUMENTS_OFFSET to prepend a single
* positional argument */
if (pto_nargs == 1 && (nargsf & PY_VECTORCALL_ARGUMENTS_OFFSET)) {
PyObject **newargs = (PyObject **)args - 1;
PyObject *tmp = newargs[0];
newargs[0] = pto_args[0];
PyObject *ret = _PyObject_Vectorcall(pto->fn, newargs, nargs + 1, kwnames);
newargs[0] = tmp;
return ret;
}
Py_ssize_t newnargs_total = pto_nargs + nargs_total;
PyObject *small_stack[_PY_FASTCALL_SMALL_STACK]; PyObject *small_stack[_PY_FASTCALL_SMALL_STACK];
PyObject *ret; PyObject *ret;
PyObject **stack, **stack_buf = NULL; PyObject **stack;
Py_ssize_t nargs2, pto_nargs;
pto_nargs = PyTuple_GET_SIZE(pto->args); if (newnargs_total <= (Py_ssize_t)Py_ARRAY_LENGTH(small_stack)) {
nargs2 = pto_nargs + nargs;
if (pto_nargs == 0) {
stack = args;
}
else if (nargs == 0) {
stack = _PyTuple_ITEMS(pto->args);
}
else {
if (nargs2 <= (Py_ssize_t)Py_ARRAY_LENGTH(small_stack)) {
stack = small_stack; stack = small_stack;
} }
else { else {
stack_buf = PyMem_Malloc(nargs2 * sizeof(PyObject *)); stack = PyMem_Malloc(newnargs_total * sizeof(PyObject *));
if (stack_buf == NULL) { if (stack == NULL) {
PyErr_NoMemory(); PyErr_NoMemory();
return NULL; return NULL;
} }
stack = stack_buf;
} }
/* use borrowed references */ /* Copy to new stack, using borrowed references */
memcpy(stack, memcpy(stack, pto_args, pto_nargs * sizeof(PyObject*));
_PyTuple_ITEMS(pto->args), memcpy(stack + pto_nargs, args, nargs_total * sizeof(PyObject*));
pto_nargs * sizeof(PyObject*));
memcpy(&stack[pto_nargs],
args,
nargs * sizeof(PyObject*));
}
ret = _PyObject_FastCallDict(pto->fn, stack, nargs2, kwargs); ret = _PyObject_Vectorcall(pto->fn, stack, pto_nargs + nargs, kwnames);
PyMem_Free(stack_buf); if (stack != small_stack) {
PyMem_Free(stack);
}
return ret; return ret;
} }
static PyObject * /* Set pto->vectorcall depending on the parameters of the partial object */
partial_call_impl(partialobject *pto, PyObject *args, PyObject *kwargs) static void
partial_setvectorcall(partialobject *pto)
{ {
PyObject *ret, *args2; if (_PyVectorcall_Function(pto->fn) == NULL) {
/* Don't use vectorcall if the underlying function doesn't support it */
/* Note: tupleconcat() is optimized for empty tuples */ pto->vectorcall = NULL;
args2 = PySequence_Concat(pto->args, args); }
if (args2 == NULL) { /* We could have a special case if there are no arguments,
return NULL; * but that is unlikely (why use partial without arguments?),
* so we don't optimize that */
else {
pto->vectorcall = (vectorcallfunc)partial_vectorcall;
} }
assert(PyTuple_Check(args2));
ret = PyObject_Call(pto->fn, args2, kwargs);
Py_DECREF(args2);
return ret;
} }
static PyObject * static PyObject *
partial_call(partialobject *pto, PyObject *args, PyObject *kwargs) partial_call(partialobject *pto, PyObject *args, PyObject *kwargs)
{ {
PyObject *kwargs2, *res; assert(PyCallable_Check(pto->fn));
assert(PyTuple_Check(pto->args));
assert (PyCallable_Check(pto->fn)); assert(PyDict_Check(pto->kw));
assert (PyTuple_Check(pto->args));
assert (PyDict_Check(pto->kw));
/* Merge keywords */
PyObject *kwargs2;
if (PyDict_GET_SIZE(pto->kw) == 0) { if (PyDict_GET_SIZE(pto->kw) == 0) {
/* kwargs can be NULL */ /* kwargs can be NULL */
kwargs2 = kwargs; kwargs2 = kwargs;
@ -219,16 +250,16 @@ partial_call(partialobject *pto, PyObject *args, PyObject *kwargs)
} }
} }
/* Merge positional arguments */
/* Note: tupleconcat() is optimized for empty tuples */
PyObject *args2 = PySequence_Concat(pto->args, args);
if (args2 == NULL) {
Py_XDECREF(kwargs2);
return NULL;
}
if (pto->use_fastcall) { PyObject *res = PyObject_Call(pto->fn, args2, kwargs2);
res = partial_fastcall(pto, Py_DECREF(args2);
_PyTuple_ITEMS(args),
PyTuple_GET_SIZE(args),
kwargs2);
}
else {
res = partial_call_impl(pto, args, kwargs2);
}
Py_XDECREF(kwargs2); Py_XDECREF(kwargs2);
return res; return res;
} }
@ -365,11 +396,11 @@ partial_setstate(partialobject *pto, PyObject *state)
Py_INCREF(dict); Py_INCREF(dict);
Py_INCREF(fn); Py_INCREF(fn);
pto->use_fastcall = (_PyVectorcall_Function(fn) != NULL);
Py_SETREF(pto->fn, fn); Py_SETREF(pto->fn, fn);
Py_SETREF(pto->args, fnargs); Py_SETREF(pto->args, fnargs);
Py_SETREF(pto->kw, kw); Py_SETREF(pto->kw, kw);
Py_XSETREF(pto->dict, dict); Py_XSETREF(pto->dict, dict);
partial_setvectorcall(pto);
Py_RETURN_NONE; Py_RETURN_NONE;
} }
@ -386,7 +417,7 @@ static PyTypeObject partial_type = {
0, /* tp_itemsize */ 0, /* tp_itemsize */
/* methods */ /* methods */
(destructor)partial_dealloc, /* tp_dealloc */ (destructor)partial_dealloc, /* tp_dealloc */
0, /* tp_vectorcall_offset */ offsetof(partialobject, vectorcall),/* tp_vectorcall_offset */
0, /* tp_getattr */ 0, /* tp_getattr */
0, /* tp_setattr */ 0, /* tp_setattr */
0, /* tp_as_async */ 0, /* tp_as_async */
@ -401,7 +432,8 @@ static PyTypeObject partial_type = {
PyObject_GenericSetAttr, /* tp_setattro */ PyObject_GenericSetAttr, /* tp_setattro */
0, /* tp_as_buffer */ 0, /* tp_as_buffer */
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC |
Py_TPFLAGS_BASETYPE, /* tp_flags */ Py_TPFLAGS_BASETYPE |
_Py_TPFLAGS_HAVE_VECTORCALL, /* tp_flags */
partial_doc, /* tp_doc */ partial_doc, /* tp_doc */
(traverseproc)partial_traverse, /* tp_traverse */ (traverseproc)partial_traverse, /* tp_traverse */
0, /* tp_clear */ 0, /* tp_clear */