GH-96421: Insert shim frame on entry to interpreter (GH-96319)

* Adds EXIT_INTERPRETER instruction to exit PyEval_EvalDefault()

* Simplifies RETURN_VALUE, YIELD_VALUE and RETURN_GENERATOR instructions as they no longer need to check for entry frames.
This commit is contained in:
Mark Shannon 2022-11-10 04:34:57 -08:00 committed by GitHub
parent dbf2faf579
commit 1e197e63e2
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
24 changed files with 450 additions and 346 deletions

View file

@ -2264,3 +2264,78 @@ _PyStaticCode_Init(PyCodeObject *co)
_PyCode_Quicken(co);
return 0;
}
#define MAX_CODE_UNITS_PER_LOC_ENTRY 8
PyCodeObject *
_Py_MakeShimCode(const _PyShimCodeDef *codedef)
{
PyObject *name = NULL;
PyObject *co_code = NULL;
PyObject *lines = NULL;
PyCodeObject *codeobj = NULL;
uint8_t *loc_table = NULL;
name = _PyUnicode_FromASCII(codedef->cname, strlen(codedef->cname));
if (name == NULL) {
goto cleanup;
}
co_code = PyBytes_FromStringAndSize(
(const char *)codedef->code, codedef->codelen);
if (co_code == NULL) {
goto cleanup;
}
int code_units = codedef->codelen / sizeof(_Py_CODEUNIT);
int loc_entries = (code_units + MAX_CODE_UNITS_PER_LOC_ENTRY - 1) /
MAX_CODE_UNITS_PER_LOC_ENTRY;
loc_table = PyMem_Malloc(loc_entries);
if (loc_table == NULL) {
PyErr_NoMemory();
goto cleanup;
}
for (int i = 0; i < loc_entries-1; i++) {
loc_table[i] = 0x80 | (PY_CODE_LOCATION_INFO_NONE << 3) | 7;
code_units -= MAX_CODE_UNITS_PER_LOC_ENTRY;
}
assert(loc_entries > 0);
assert(code_units > 0 && code_units <= MAX_CODE_UNITS_PER_LOC_ENTRY);
loc_table[loc_entries-1] = 0x80 |
(PY_CODE_LOCATION_INFO_NONE << 3) | (code_units-1);
lines = PyBytes_FromStringAndSize((const char *)loc_table, loc_entries);
PyMem_Free(loc_table);
if (lines == NULL) {
goto cleanup;
}
_Py_DECLARE_STR(shim_name, "<shim>");
struct _PyCodeConstructor con = {
.filename = &_Py_STR(shim_name),
.name = name,
.qualname = name,
.flags = CO_NEWLOCALS | CO_OPTIMIZED,
.code = co_code,
.firstlineno = 1,
.linetable = lines,
.consts = (PyObject *)&_Py_SINGLETON(tuple_empty),
.names = (PyObject *)&_Py_SINGLETON(tuple_empty),
.localsplusnames = (PyObject *)&_Py_SINGLETON(tuple_empty),
.localspluskinds = (PyObject *)&_Py_SINGLETON(bytes_empty),
.argcount = 0,
.posonlyargcount = 0,
.kwonlyargcount = 0,
.stacksize = codedef->stacksize,
.exceptiontable = (PyObject *)&_Py_SINGLETON(bytes_empty),
};
codeobj = _PyCode_New(&con);
cleanup:
Py_XDECREF(name);
Py_XDECREF(co_code);
Py_XDECREF(lines);
return codeobj;
}

View file

@ -9,10 +9,10 @@ results in poor locality of reference.
In 3.11, rather than have these frames scattered about memory,
as happens for heap-allocated objects, frames are allocated
contiguously in a per-thread stack.
contiguously in a per-thread stack.
This improves performance significantly for two reasons:
* It reduces allocation overhead to a pointer comparison and increment.
* Stack allocated data has the best possible locality and will always be in
* Stack allocated data has the best possible locality and will always be in
CPU cache.
Generator and coroutines still need heap allocated activation records, but
@ -63,7 +63,7 @@ We may implement this in the future.
> In a contiguous stack, we would need to save one fewer registers, as the
> top of the caller's activation record would be the same at the base of the
> callee's. However, since some activation records are kept on the heap we
> callee's. However, since some activation records are kept on the heap we
> cannot do this.
### Generators and Coroutines
@ -85,7 +85,7 @@ and builtins, than strong references to both globals and builtins.
### Frame objects
When creating a backtrace or when calling `sys._getframe()` the frame becomes
visible to Python code. When this happens a new `PyFrameObject` is created
visible to Python code. When this happens a new `PyFrameObject` is created
and a strong reference to it placed in the `frame_obj` field of the specials
section. The `frame_obj` field is initially `NULL`.
@ -104,7 +104,7 @@ Generator objects have a `_PyInterpreterFrame` embedded in them.
This means that creating a generator requires only a single allocation,
reducing allocation overhead and improving locality of reference.
The embedded frame is linked into the per-thread frame when iterated or
awaited.
awaited.
If a frame object associated with a generator outlives the generator, then
the embedded `_PyInterpreterFrame` is copied into the frame object.
@ -119,4 +119,14 @@ Thus, some of the field names may be a bit misleading.
For example the `f_globals` field has a `f_` prefix implying it belongs to the
`PyFrameObject` struct, although it belongs to the `_PyInterpreterFrame` struct.
We may rationalize this naming scheme for 3.12.
We may rationalize this naming scheme for 3.12.
### Shim frames
On entry to `_PyEval_EvalFrameDefault()` a shim `_PyInterpreterFrame` is pushed.
This frame is stored on the C stack, and popped when `_PyEval_EvalFrameDefault()`
returns. This extra frame is inserted so that `RETURN_VALUE`, `YIELD_VALUE`, and
`RETURN_GENERATOR` do not need to check whether the current frame is the entry frame.
The shim frame points to a special code object containing the `INTERPRETER_EXIT`
instruction which cleans up the shim frame and returns.

View file

@ -1329,15 +1329,15 @@ PyFrame_LocalsToFast(PyFrameObject *f, int clear)
}
}
int _PyFrame_IsEntryFrame(PyFrameObject *frame)
int
_PyFrame_IsEntryFrame(PyFrameObject *frame)
{
assert(frame != NULL);
assert(!_PyFrame_IsIncomplete(frame->f_frame));
return frame->f_frame->is_entry;
_PyInterpreterFrame *f = frame->f_frame;
assert(!_PyFrame_IsIncomplete(f));
return f->previous && f->previous->owner == FRAME_OWNED_BY_CSTACK;
}
PyCodeObject *
PyFrame_GetCode(PyFrameObject *frame)
{

View file

@ -207,8 +207,6 @@ gen_send_ex2(PyGenObject *gen, PyObject *arg, PyObject **presult,
Py_INCREF(result);
_PyFrame_StackPush(frame, result);
frame->previous = tstate->cframe->current_frame;
_PyErr_StackItem *prev_exc_info = tstate->exc_info;
gen->gi_exc_state.previous_item = prev_exc_info;
tstate->exc_info = &gen->gi_exc_state;
@ -223,14 +221,8 @@ gen_send_ex2(PyGenObject *gen, PyObject *arg, PyObject **presult,
result = _PyEval_EvalFrame(tstate, frame, exc);
assert(tstate->exc_info == prev_exc_info);
assert(gen->gi_exc_state.previous_item == NULL);
if (gen->gi_frame_state == FRAME_EXECUTING) {
gen->gi_frame_state = FRAME_COMPLETED;
}
assert(tstate->cframe->current_frame == frame->previous);
/* Don't keep the reference to previous any longer than necessary. It
* may keep a chain of frames alive or it could create a reference
* cycle. */
frame->previous = NULL;
assert(gen->gi_frame_state != FRAME_EXECUTING);
assert(frame->previous == NULL);
/* If the generator just returned (as opposed to yielding), signal
* that the generator is exhausted. */
@ -255,8 +247,7 @@ gen_send_ex2(PyGenObject *gen, PyObject *arg, PyObject **presult,
/* first clean reference cycle through stored exception traceback */
_PyErr_ClearExcState(&gen->gi_exc_state);
gen->gi_frame_state = FRAME_CLEARED;
_PyFrame_Clear(frame);
assert(gen->gi_frame_state == FRAME_CLEARED);
*presult = result;
return result ? PYGEN_RETURN : PYGEN_ERROR;
}