mirror of
https://github.com/python/cpython.git
synced 2025-12-10 02:50:09 +00:00
GH-96421: Insert shim frame on entry to interpreter (GH-96319)
* Adds EXIT_INTERPRETER instruction to exit PyEval_EvalDefault() * Simplifies RETURN_VALUE, YIELD_VALUE and RETURN_GENERATOR instructions as they no longer need to check for entry frames.
This commit is contained in:
parent
dbf2faf579
commit
1e197e63e2
24 changed files with 450 additions and 346 deletions
|
|
@ -2264,3 +2264,78 @@ _PyStaticCode_Init(PyCodeObject *co)
|
|||
_PyCode_Quicken(co);
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define MAX_CODE_UNITS_PER_LOC_ENTRY 8
|
||||
|
||||
PyCodeObject *
|
||||
_Py_MakeShimCode(const _PyShimCodeDef *codedef)
|
||||
{
|
||||
PyObject *name = NULL;
|
||||
PyObject *co_code = NULL;
|
||||
PyObject *lines = NULL;
|
||||
PyCodeObject *codeobj = NULL;
|
||||
uint8_t *loc_table = NULL;
|
||||
|
||||
name = _PyUnicode_FromASCII(codedef->cname, strlen(codedef->cname));
|
||||
if (name == NULL) {
|
||||
goto cleanup;
|
||||
}
|
||||
co_code = PyBytes_FromStringAndSize(
|
||||
(const char *)codedef->code, codedef->codelen);
|
||||
if (co_code == NULL) {
|
||||
goto cleanup;
|
||||
}
|
||||
int code_units = codedef->codelen / sizeof(_Py_CODEUNIT);
|
||||
int loc_entries = (code_units + MAX_CODE_UNITS_PER_LOC_ENTRY - 1) /
|
||||
MAX_CODE_UNITS_PER_LOC_ENTRY;
|
||||
loc_table = PyMem_Malloc(loc_entries);
|
||||
if (loc_table == NULL) {
|
||||
PyErr_NoMemory();
|
||||
goto cleanup;
|
||||
}
|
||||
for (int i = 0; i < loc_entries-1; i++) {
|
||||
loc_table[i] = 0x80 | (PY_CODE_LOCATION_INFO_NONE << 3) | 7;
|
||||
code_units -= MAX_CODE_UNITS_PER_LOC_ENTRY;
|
||||
}
|
||||
assert(loc_entries > 0);
|
||||
assert(code_units > 0 && code_units <= MAX_CODE_UNITS_PER_LOC_ENTRY);
|
||||
loc_table[loc_entries-1] = 0x80 |
|
||||
(PY_CODE_LOCATION_INFO_NONE << 3) | (code_units-1);
|
||||
lines = PyBytes_FromStringAndSize((const char *)loc_table, loc_entries);
|
||||
PyMem_Free(loc_table);
|
||||
if (lines == NULL) {
|
||||
goto cleanup;
|
||||
}
|
||||
_Py_DECLARE_STR(shim_name, "<shim>");
|
||||
struct _PyCodeConstructor con = {
|
||||
.filename = &_Py_STR(shim_name),
|
||||
.name = name,
|
||||
.qualname = name,
|
||||
.flags = CO_NEWLOCALS | CO_OPTIMIZED,
|
||||
|
||||
.code = co_code,
|
||||
.firstlineno = 1,
|
||||
.linetable = lines,
|
||||
|
||||
.consts = (PyObject *)&_Py_SINGLETON(tuple_empty),
|
||||
.names = (PyObject *)&_Py_SINGLETON(tuple_empty),
|
||||
|
||||
.localsplusnames = (PyObject *)&_Py_SINGLETON(tuple_empty),
|
||||
.localspluskinds = (PyObject *)&_Py_SINGLETON(bytes_empty),
|
||||
|
||||
.argcount = 0,
|
||||
.posonlyargcount = 0,
|
||||
.kwonlyargcount = 0,
|
||||
|
||||
.stacksize = codedef->stacksize,
|
||||
|
||||
.exceptiontable = (PyObject *)&_Py_SINGLETON(bytes_empty),
|
||||
};
|
||||
|
||||
codeobj = _PyCode_New(&con);
|
||||
cleanup:
|
||||
Py_XDECREF(name);
|
||||
Py_XDECREF(co_code);
|
||||
Py_XDECREF(lines);
|
||||
return codeobj;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -9,10 +9,10 @@ results in poor locality of reference.
|
|||
|
||||
In 3.11, rather than have these frames scattered about memory,
|
||||
as happens for heap-allocated objects, frames are allocated
|
||||
contiguously in a per-thread stack.
|
||||
contiguously in a per-thread stack.
|
||||
This improves performance significantly for two reasons:
|
||||
* It reduces allocation overhead to a pointer comparison and increment.
|
||||
* Stack allocated data has the best possible locality and will always be in
|
||||
* Stack allocated data has the best possible locality and will always be in
|
||||
CPU cache.
|
||||
|
||||
Generator and coroutines still need heap allocated activation records, but
|
||||
|
|
@ -63,7 +63,7 @@ We may implement this in the future.
|
|||
|
||||
> In a contiguous stack, we would need to save one fewer registers, as the
|
||||
> top of the caller's activation record would be the same at the base of the
|
||||
> callee's. However, since some activation records are kept on the heap we
|
||||
> callee's. However, since some activation records are kept on the heap we
|
||||
> cannot do this.
|
||||
|
||||
### Generators and Coroutines
|
||||
|
|
@ -85,7 +85,7 @@ and builtins, than strong references to both globals and builtins.
|
|||
### Frame objects
|
||||
|
||||
When creating a backtrace or when calling `sys._getframe()` the frame becomes
|
||||
visible to Python code. When this happens a new `PyFrameObject` is created
|
||||
visible to Python code. When this happens a new `PyFrameObject` is created
|
||||
and a strong reference to it placed in the `frame_obj` field of the specials
|
||||
section. The `frame_obj` field is initially `NULL`.
|
||||
|
||||
|
|
@ -104,7 +104,7 @@ Generator objects have a `_PyInterpreterFrame` embedded in them.
|
|||
This means that creating a generator requires only a single allocation,
|
||||
reducing allocation overhead and improving locality of reference.
|
||||
The embedded frame is linked into the per-thread frame when iterated or
|
||||
awaited.
|
||||
awaited.
|
||||
|
||||
If a frame object associated with a generator outlives the generator, then
|
||||
the embedded `_PyInterpreterFrame` is copied into the frame object.
|
||||
|
|
@ -119,4 +119,14 @@ Thus, some of the field names may be a bit misleading.
|
|||
|
||||
For example the `f_globals` field has a `f_` prefix implying it belongs to the
|
||||
`PyFrameObject` struct, although it belongs to the `_PyInterpreterFrame` struct.
|
||||
We may rationalize this naming scheme for 3.12.
|
||||
We may rationalize this naming scheme for 3.12.
|
||||
|
||||
|
||||
### Shim frames
|
||||
|
||||
On entry to `_PyEval_EvalFrameDefault()` a shim `_PyInterpreterFrame` is pushed.
|
||||
This frame is stored on the C stack, and popped when `_PyEval_EvalFrameDefault()`
|
||||
returns. This extra frame is inserted so that `RETURN_VALUE`, `YIELD_VALUE`, and
|
||||
`RETURN_GENERATOR` do not need to check whether the current frame is the entry frame.
|
||||
The shim frame points to a special code object containing the `INTERPRETER_EXIT`
|
||||
instruction which cleans up the shim frame and returns.
|
||||
|
|
|
|||
|
|
@ -1329,15 +1329,15 @@ PyFrame_LocalsToFast(PyFrameObject *f, int clear)
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
int _PyFrame_IsEntryFrame(PyFrameObject *frame)
|
||||
int
|
||||
_PyFrame_IsEntryFrame(PyFrameObject *frame)
|
||||
{
|
||||
assert(frame != NULL);
|
||||
assert(!_PyFrame_IsIncomplete(frame->f_frame));
|
||||
return frame->f_frame->is_entry;
|
||||
_PyInterpreterFrame *f = frame->f_frame;
|
||||
assert(!_PyFrame_IsIncomplete(f));
|
||||
return f->previous && f->previous->owner == FRAME_OWNED_BY_CSTACK;
|
||||
}
|
||||
|
||||
|
||||
PyCodeObject *
|
||||
PyFrame_GetCode(PyFrameObject *frame)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -207,8 +207,6 @@ gen_send_ex2(PyGenObject *gen, PyObject *arg, PyObject **presult,
|
|||
Py_INCREF(result);
|
||||
_PyFrame_StackPush(frame, result);
|
||||
|
||||
frame->previous = tstate->cframe->current_frame;
|
||||
|
||||
_PyErr_StackItem *prev_exc_info = tstate->exc_info;
|
||||
gen->gi_exc_state.previous_item = prev_exc_info;
|
||||
tstate->exc_info = &gen->gi_exc_state;
|
||||
|
|
@ -223,14 +221,8 @@ gen_send_ex2(PyGenObject *gen, PyObject *arg, PyObject **presult,
|
|||
result = _PyEval_EvalFrame(tstate, frame, exc);
|
||||
assert(tstate->exc_info == prev_exc_info);
|
||||
assert(gen->gi_exc_state.previous_item == NULL);
|
||||
if (gen->gi_frame_state == FRAME_EXECUTING) {
|
||||
gen->gi_frame_state = FRAME_COMPLETED;
|
||||
}
|
||||
assert(tstate->cframe->current_frame == frame->previous);
|
||||
/* Don't keep the reference to previous any longer than necessary. It
|
||||
* may keep a chain of frames alive or it could create a reference
|
||||
* cycle. */
|
||||
frame->previous = NULL;
|
||||
assert(gen->gi_frame_state != FRAME_EXECUTING);
|
||||
assert(frame->previous == NULL);
|
||||
|
||||
/* If the generator just returned (as opposed to yielding), signal
|
||||
* that the generator is exhausted. */
|
||||
|
|
@ -255,8 +247,7 @@ gen_send_ex2(PyGenObject *gen, PyObject *arg, PyObject **presult,
|
|||
/* first clean reference cycle through stored exception traceback */
|
||||
_PyErr_ClearExcState(&gen->gi_exc_state);
|
||||
|
||||
gen->gi_frame_state = FRAME_CLEARED;
|
||||
_PyFrame_Clear(frame);
|
||||
assert(gen->gi_frame_state == FRAME_CLEARED);
|
||||
*presult = result;
|
||||
return result ? PYGEN_RETURN : PYGEN_ERROR;
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue