mirror of
				https://github.com/python/cpython.git
				synced 2025-11-03 19:34:08 +00:00 
			
		
		
		
	bpo-37358: Use vectorcall for functools.partial (GH-14284)
https://bugs.python.org/issue37358
This commit is contained in:
		
							parent
							
								
									dc3f99fa77
								
							
						
					
					
						commit
						ed184c06e2
					
				
					 2 changed files with 100 additions and 67 deletions
				
			
		| 
						 | 
					@ -0,0 +1 @@
 | 
				
			||||||
 | 
					Optimized ``functools.partial`` by using vectorcall.
 | 
				
			||||||
| 
						 | 
					@ -18,13 +18,15 @@ typedef struct {
 | 
				
			||||||
    PyObject *fn;
 | 
					    PyObject *fn;
 | 
				
			||||||
    PyObject *args;
 | 
					    PyObject *args;
 | 
				
			||||||
    PyObject *kw;
 | 
					    PyObject *kw;
 | 
				
			||||||
    PyObject *dict;
 | 
					    PyObject *dict;        /* __dict__ */
 | 
				
			||||||
    PyObject *weakreflist; /* List of weak references */
 | 
					    PyObject *weakreflist; /* List of weak references */
 | 
				
			||||||
    int use_fastcall;
 | 
					    vectorcallfunc vectorcall;
 | 
				
			||||||
} partialobject;
 | 
					} partialobject;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static PyTypeObject partial_type;
 | 
					static PyTypeObject partial_type;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static void partial_setvectorcall(partialobject *pto);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static PyObject *
 | 
					static PyObject *
 | 
				
			||||||
partial_new(PyTypeObject *type, PyObject *args, PyObject *kw)
 | 
					partial_new(PyTypeObject *type, PyObject *args, PyObject *kw)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
| 
						 | 
					@ -107,8 +109,7 @@ partial_new(PyTypeObject *type, PyObject *args, PyObject *kw)
 | 
				
			||||||
        return NULL;
 | 
					        return NULL;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    pto->use_fastcall = (_PyVectorcall_Function(func) != NULL);
 | 
					    partial_setvectorcall(pto);
 | 
				
			||||||
 | 
					 | 
				
			||||||
    return (PyObject *)pto;
 | 
					    return (PyObject *)pto;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -126,77 +127,107 @@ partial_dealloc(partialobject *pto)
 | 
				
			||||||
    Py_TYPE(pto)->tp_free(pto);
 | 
					    Py_TYPE(pto)->tp_free(pto);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static PyObject *
 | 
					
 | 
				
			||||||
partial_fastcall(partialobject *pto, PyObject **args, Py_ssize_t nargs,
 | 
					/* Merging keyword arguments using the vectorcall convention is messy, so
 | 
				
			||||||
                 PyObject *kwargs)
 | 
					 * if we would need to do that, we stop using vectorcall and fall back
 | 
				
			||||||
 | 
					 * to using partial_call() instead. */
 | 
				
			||||||
 | 
					_Py_NO_INLINE static PyObject *
 | 
				
			||||||
 | 
					partial_vectorcall_fallback(partialobject *pto, PyObject *const *args,
 | 
				
			||||||
 | 
					                            size_t nargsf, PyObject *kwnames)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
 | 
					    pto->vectorcall = NULL;
 | 
				
			||||||
 | 
					    Py_ssize_t nargs = PyVectorcall_NARGS(nargsf);
 | 
				
			||||||
 | 
					    return _PyObject_MakeTpCall((PyObject *)pto, args, nargs, kwnames);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static PyObject *
 | 
				
			||||||
 | 
					partial_vectorcall(partialobject *pto, PyObject *const *args,
 | 
				
			||||||
 | 
					                   size_t nargsf, PyObject *kwnames)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					    /* pto->kw is mutable, so need to check every time */
 | 
				
			||||||
 | 
					    if (PyDict_GET_SIZE(pto->kw)) {
 | 
				
			||||||
 | 
					        return partial_vectorcall_fallback(pto, args, nargsf, kwnames);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    Py_ssize_t nargs = PyVectorcall_NARGS(nargsf);
 | 
				
			||||||
 | 
					    Py_ssize_t nargs_total = nargs;
 | 
				
			||||||
 | 
					    if (kwnames != NULL) {
 | 
				
			||||||
 | 
					        nargs_total += PyTuple_GET_SIZE(kwnames);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    PyObject **pto_args = _PyTuple_ITEMS(pto->args);
 | 
				
			||||||
 | 
					    Py_ssize_t pto_nargs = PyTuple_GET_SIZE(pto->args);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    /* Fast path if we're called without arguments */
 | 
				
			||||||
 | 
					    if (nargs_total == 0) {
 | 
				
			||||||
 | 
					        return _PyObject_Vectorcall(pto->fn, pto_args, pto_nargs, NULL);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    /* Fast path using PY_VECTORCALL_ARGUMENTS_OFFSET to prepend a single
 | 
				
			||||||
 | 
					     * positional argument */
 | 
				
			||||||
 | 
					    if (pto_nargs == 1 && (nargsf & PY_VECTORCALL_ARGUMENTS_OFFSET)) {
 | 
				
			||||||
 | 
					        PyObject **newargs = (PyObject **)args - 1;
 | 
				
			||||||
 | 
					        PyObject *tmp = newargs[0];
 | 
				
			||||||
 | 
					        newargs[0] = pto_args[0];
 | 
				
			||||||
 | 
					        PyObject *ret = _PyObject_Vectorcall(pto->fn, newargs, nargs + 1, kwnames);
 | 
				
			||||||
 | 
					        newargs[0] = tmp;
 | 
				
			||||||
 | 
					        return ret;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    Py_ssize_t newnargs_total = pto_nargs + nargs_total;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    PyObject *small_stack[_PY_FASTCALL_SMALL_STACK];
 | 
					    PyObject *small_stack[_PY_FASTCALL_SMALL_STACK];
 | 
				
			||||||
    PyObject *ret;
 | 
					    PyObject *ret;
 | 
				
			||||||
    PyObject **stack, **stack_buf = NULL;
 | 
					    PyObject **stack;
 | 
				
			||||||
    Py_ssize_t nargs2, pto_nargs;
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
    pto_nargs = PyTuple_GET_SIZE(pto->args);
 | 
					    if (newnargs_total <= (Py_ssize_t)Py_ARRAY_LENGTH(small_stack)) {
 | 
				
			||||||
    nargs2 = pto_nargs + nargs;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    if (pto_nargs == 0) {
 | 
					 | 
				
			||||||
        stack = args;
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
    else if (nargs == 0) {
 | 
					 | 
				
			||||||
        stack = _PyTuple_ITEMS(pto->args);
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
    else {
 | 
					 | 
				
			||||||
        if (nargs2 <= (Py_ssize_t)Py_ARRAY_LENGTH(small_stack)) {
 | 
					 | 
				
			||||||
        stack = small_stack;
 | 
					        stack = small_stack;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
    else {
 | 
					    else {
 | 
				
			||||||
            stack_buf = PyMem_Malloc(nargs2 * sizeof(PyObject *));
 | 
					        stack = PyMem_Malloc(newnargs_total * sizeof(PyObject *));
 | 
				
			||||||
            if (stack_buf == NULL) {
 | 
					        if (stack == NULL) {
 | 
				
			||||||
            PyErr_NoMemory();
 | 
					            PyErr_NoMemory();
 | 
				
			||||||
            return NULL;
 | 
					            return NULL;
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
            stack = stack_buf;
 | 
					 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        /* use borrowed references */
 | 
					    /* Copy to new stack, using borrowed references */
 | 
				
			||||||
        memcpy(stack,
 | 
					    memcpy(stack, pto_args, pto_nargs * sizeof(PyObject*));
 | 
				
			||||||
               _PyTuple_ITEMS(pto->args),
 | 
					    memcpy(stack + pto_nargs, args, nargs_total * sizeof(PyObject*));
 | 
				
			||||||
               pto_nargs * sizeof(PyObject*));
 | 
					 | 
				
			||||||
        memcpy(&stack[pto_nargs],
 | 
					 | 
				
			||||||
               args,
 | 
					 | 
				
			||||||
               nargs * sizeof(PyObject*));
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
    ret = _PyObject_FastCallDict(pto->fn, stack, nargs2, kwargs);
 | 
					    ret = _PyObject_Vectorcall(pto->fn, stack, pto_nargs + nargs, kwnames);
 | 
				
			||||||
    PyMem_Free(stack_buf);
 | 
					    if (stack != small_stack) {
 | 
				
			||||||
 | 
					        PyMem_Free(stack);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
    return ret;
 | 
					    return ret;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static PyObject *
 | 
					/* Set pto->vectorcall depending on the parameters of the partial object */
 | 
				
			||||||
partial_call_impl(partialobject *pto, PyObject *args, PyObject *kwargs)
 | 
					static void
 | 
				
			||||||
 | 
					partial_setvectorcall(partialobject *pto)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
    PyObject *ret, *args2;
 | 
					    if (_PyVectorcall_Function(pto->fn) == NULL) {
 | 
				
			||||||
 | 
					        /* Don't use vectorcall if the underlying function doesn't support it */
 | 
				
			||||||
    /* Note: tupleconcat() is optimized for empty tuples */
 | 
					        pto->vectorcall = NULL;
 | 
				
			||||||
    args2 = PySequence_Concat(pto->args, args);
 | 
					    }
 | 
				
			||||||
    if (args2 == NULL) {
 | 
					    /* We could have a special case if there are no arguments,
 | 
				
			||||||
        return NULL;
 | 
					     * but that is unlikely (why use partial without arguments?),
 | 
				
			||||||
 | 
					     * so we don't optimize that */
 | 
				
			||||||
 | 
					    else {
 | 
				
			||||||
 | 
					        pto->vectorcall = (vectorcallfunc)partial_vectorcall;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
    assert(PyTuple_Check(args2));
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    ret = PyObject_Call(pto->fn, args2, kwargs);
 | 
					 | 
				
			||||||
    Py_DECREF(args2);
 | 
					 | 
				
			||||||
    return ret;
 | 
					 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static PyObject *
 | 
					static PyObject *
 | 
				
			||||||
partial_call(partialobject *pto, PyObject *args, PyObject *kwargs)
 | 
					partial_call(partialobject *pto, PyObject *args, PyObject *kwargs)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
    PyObject *kwargs2, *res;
 | 
					    assert(PyCallable_Check(pto->fn));
 | 
				
			||||||
 | 
					    assert(PyTuple_Check(pto->args));
 | 
				
			||||||
    assert (PyCallable_Check(pto->fn));
 | 
					    assert(PyDict_Check(pto->kw));
 | 
				
			||||||
    assert (PyTuple_Check(pto->args));
 | 
					 | 
				
			||||||
    assert (PyDict_Check(pto->kw));
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    /* Merge keywords */
 | 
				
			||||||
 | 
					    PyObject *kwargs2;
 | 
				
			||||||
    if (PyDict_GET_SIZE(pto->kw) == 0) {
 | 
					    if (PyDict_GET_SIZE(pto->kw) == 0) {
 | 
				
			||||||
        /* kwargs can be NULL */
 | 
					        /* kwargs can be NULL */
 | 
				
			||||||
        kwargs2 = kwargs;
 | 
					        kwargs2 = kwargs;
 | 
				
			||||||
| 
						 | 
					@ -219,16 +250,16 @@ partial_call(partialobject *pto, PyObject *args, PyObject *kwargs)
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    /* Merge positional arguments */
 | 
				
			||||||
 | 
					    /* Note: tupleconcat() is optimized for empty tuples */
 | 
				
			||||||
 | 
					    PyObject *args2 = PySequence_Concat(pto->args, args);
 | 
				
			||||||
 | 
					    if (args2 == NULL) {
 | 
				
			||||||
 | 
					        Py_XDECREF(kwargs2);
 | 
				
			||||||
 | 
					        return NULL;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    if (pto->use_fastcall) {
 | 
					    PyObject *res = PyObject_Call(pto->fn, args2, kwargs2);
 | 
				
			||||||
        res = partial_fastcall(pto,
 | 
					    Py_DECREF(args2);
 | 
				
			||||||
                               _PyTuple_ITEMS(args),
 | 
					 | 
				
			||||||
                               PyTuple_GET_SIZE(args),
 | 
					 | 
				
			||||||
                               kwargs2);
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
    else {
 | 
					 | 
				
			||||||
        res = partial_call_impl(pto, args, kwargs2);
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
    Py_XDECREF(kwargs2);
 | 
					    Py_XDECREF(kwargs2);
 | 
				
			||||||
    return res;
 | 
					    return res;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
| 
						 | 
					@ -365,11 +396,11 @@ partial_setstate(partialobject *pto, PyObject *state)
 | 
				
			||||||
        Py_INCREF(dict);
 | 
					        Py_INCREF(dict);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    Py_INCREF(fn);
 | 
					    Py_INCREF(fn);
 | 
				
			||||||
    pto->use_fastcall = (_PyVectorcall_Function(fn) != NULL);
 | 
					 | 
				
			||||||
    Py_SETREF(pto->fn, fn);
 | 
					    Py_SETREF(pto->fn, fn);
 | 
				
			||||||
    Py_SETREF(pto->args, fnargs);
 | 
					    Py_SETREF(pto->args, fnargs);
 | 
				
			||||||
    Py_SETREF(pto->kw, kw);
 | 
					    Py_SETREF(pto->kw, kw);
 | 
				
			||||||
    Py_XSETREF(pto->dict, dict);
 | 
					    Py_XSETREF(pto->dict, dict);
 | 
				
			||||||
 | 
					    partial_setvectorcall(pto);
 | 
				
			||||||
    Py_RETURN_NONE;
 | 
					    Py_RETURN_NONE;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -386,7 +417,7 @@ static PyTypeObject partial_type = {
 | 
				
			||||||
    0,                                  /* tp_itemsize */
 | 
					    0,                                  /* tp_itemsize */
 | 
				
			||||||
    /* methods */
 | 
					    /* methods */
 | 
				
			||||||
    (destructor)partial_dealloc,        /* tp_dealloc */
 | 
					    (destructor)partial_dealloc,        /* tp_dealloc */
 | 
				
			||||||
    0,                                  /* tp_vectorcall_offset */
 | 
					    offsetof(partialobject, vectorcall),/* tp_vectorcall_offset */
 | 
				
			||||||
    0,                                  /* tp_getattr */
 | 
					    0,                                  /* tp_getattr */
 | 
				
			||||||
    0,                                  /* tp_setattr */
 | 
					    0,                                  /* tp_setattr */
 | 
				
			||||||
    0,                                  /* tp_as_async */
 | 
					    0,                                  /* tp_as_async */
 | 
				
			||||||
| 
						 | 
					@ -401,7 +432,8 @@ static PyTypeObject partial_type = {
 | 
				
			||||||
    PyObject_GenericSetAttr,            /* tp_setattro */
 | 
					    PyObject_GenericSetAttr,            /* tp_setattro */
 | 
				
			||||||
    0,                                  /* tp_as_buffer */
 | 
					    0,                                  /* tp_as_buffer */
 | 
				
			||||||
    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC |
 | 
					    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC |
 | 
				
			||||||
        Py_TPFLAGS_BASETYPE,            /* tp_flags */
 | 
					        Py_TPFLAGS_BASETYPE |
 | 
				
			||||||
 | 
					        _Py_TPFLAGS_HAVE_VECTORCALL,    /* tp_flags */
 | 
				
			||||||
    partial_doc,                        /* tp_doc */
 | 
					    partial_doc,                        /* tp_doc */
 | 
				
			||||||
    (traverseproc)partial_traverse,     /* tp_traverse */
 | 
					    (traverseproc)partial_traverse,     /* tp_traverse */
 | 
				
			||||||
    0,                                  /* tp_clear */
 | 
					    0,                                  /* tp_clear */
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue