gh-106581: Project through calls (#108067)

This finishes the work begun in gh-107760. When, while projecting a superblock, we encounter a call to a short, simple function, the superblock will now enter the function using `_PUSH_FRAME`, continue through it, and leave it using `_POP_FRAME`, and then continue through the original code. Multiple frame pushes and pops are even possible. It is also possible to stop appending to the superblock in the middle of a called function, when running out of space or encountering an unsupported bytecode.
This commit is contained in:
Guido van Rossum 2023-08-17 11:29:58 -07:00 committed by GitHub
parent 292a22bdc2
commit 61c7249759
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
16 changed files with 409 additions and 109 deletions

View file

@ -223,7 +223,73 @@ error:
return NULL;
}
uint32_t _PyFunction_GetVersionForCurrentState(PyFunctionObject *func)
/*
Function versions
-----------------
Function versions are used to detect when a function object has been
updated, invalidating inline cache data used by the `CALL` bytecode
(notably `CALL_PY_EXACT_ARGS` and a few other `CALL` specializations).
They are also used by the Tier 2 superblock creation code to find
the function being called (and from there the code object).
How does a function's `func_version` field get initialized?
- `PyFunction_New` and friends initialize it to 0.
- The `MAKE_FUNCTION` instruction sets it from the code's `co_version`.
- It is reset to 0 when various attributes like `__code__` are set.
- A new version is allocated by `_PyFunction_GetVersionForCurrentState`
when the specializer needs a version and the version is 0.
The latter allocates versions using a counter in the interpreter state;
when the counter wraps around to 0, no more versions are allocated.
There is one other special case: functions with a non-standard
`vectorcall` field are not given a version.
When the function version is 0, the `CALL` bytecode is not specialized.
Code object versions
--------------------
So where to code objects get their `co_version`? There is a single
static global counter, `_Py_next_func_version`. This is initialized in
the generated (!) file `Python/deepfreeze/deepfreeze.c`, to 1 plus the
number of deep-frozen function objects in that file.
(In `_bootstrap_python.c` and `freeze_module.c` it is initialized to 1.)
Code objects get a new `co_version` allocated from this counter upon
creation. Since code objects are nominally immutable, `co_version` can
not be invalidated. The only way it can be 0 is when 2**32 or more
code objects have been created during the process's lifetime.
(The counter isn't reset by `fork()`, extending the lifetime.)
*/
void
_PyFunction_SetVersion(PyFunctionObject *func, uint32_t version)
{
func->func_version = version;
if (version != 0) {
PyInterpreterState *interp = _PyInterpreterState_GET();
interp->func_state.func_version_cache[
version % FUNC_VERSION_CACHE_SIZE] = func;
}
}
PyFunctionObject *
_PyFunction_LookupByVersion(uint32_t version)
{
PyInterpreterState *interp = _PyInterpreterState_GET();
PyFunctionObject *func = interp->func_state.func_version_cache[
version % FUNC_VERSION_CACHE_SIZE];
if (func != NULL && func->func_version == version) {
return (PyFunctionObject *)Py_NewRef(func);
}
return NULL;
}
uint32_t
_PyFunction_GetVersionForCurrentState(PyFunctionObject *func)
{
if (func->func_version != 0) {
return func->func_version;
@ -236,7 +302,7 @@ uint32_t _PyFunction_GetVersionForCurrentState(PyFunctionObject *func)
return 0;
}
uint32_t v = interp->func_state.next_version++;
func->func_version = v;
_PyFunction_SetVersion(func, v);
return v;
}
@ -851,6 +917,15 @@ func_dealloc(PyFunctionObject *op)
if (op->func_weakreflist != NULL) {
PyObject_ClearWeakRefs((PyObject *) op);
}
if (op->func_version != 0) {
PyInterpreterState *interp = _PyInterpreterState_GET();
PyFunctionObject **slot =
interp->func_state.func_version_cache
+ (op->func_version % FUNC_VERSION_CACHE_SIZE);
if (*slot == op) {
*slot = NULL;
}
}
(void)func_clear(op);
// These aren't cleared by func_clear().
Py_DECREF(op->func_code);