mirror of
				https://github.com/python/cpython.git
				synced 2025-11-04 11:49:12 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			570 lines
		
	
	
	
		
			18 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			570 lines
		
	
	
	
		
			18 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
/*
 | 
						|
 | 
						|
Perf trampoline instrumentation
 | 
						|
===============================
 | 
						|
 | 
						|
This file contains instrumentation to allow to associate
 | 
						|
calls to the CPython eval loop back to the names of the Python
 | 
						|
functions and filename being executed.
 | 
						|
 | 
						|
Many native performance profilers like the Linux perf tools are
 | 
						|
only available to 'see' the C stack when sampling from the profiled
 | 
						|
process. This means that if we have the following python code:
 | 
						|
 | 
						|
    import time
 | 
						|
    def foo(n):
 | 
						|
        # Some CPU intensive code
 | 
						|
 | 
						|
    def bar(n):
 | 
						|
        foo(n)
 | 
						|
 | 
						|
    def baz(n):
 | 
						|
        bar(n)
 | 
						|
 | 
						|
    baz(10000000)
 | 
						|
 | 
						|
A performance profiler that is only able to see native frames will
 | 
						|
produce the following backtrace when sampling from foo():
 | 
						|
 | 
						|
    _PyEval_EvalFrameDefault -----> Evaluation frame of foo()
 | 
						|
    _PyEval_Vector
 | 
						|
    _PyFunction_Vectorcall
 | 
						|
    PyObject_Vectorcall
 | 
						|
    call_function
 | 
						|
 | 
						|
    _PyEval_EvalFrameDefault ------> Evaluation frame of bar()
 | 
						|
    _PyEval_EvalFrame
 | 
						|
    _PyEval_Vector
 | 
						|
    _PyFunction_Vectorcall
 | 
						|
    PyObject_Vectorcall
 | 
						|
    call_function
 | 
						|
 | 
						|
    _PyEval_EvalFrameDefault -------> Evaluation frame of baz()
 | 
						|
    _PyEval_EvalFrame
 | 
						|
    _PyEval_Vector
 | 
						|
    _PyFunction_Vectorcall
 | 
						|
    PyObject_Vectorcall
 | 
						|
    call_function
 | 
						|
 | 
						|
    ...
 | 
						|
 | 
						|
    Py_RunMain
 | 
						|
 | 
						|
Because the profiler is only able to see the native frames and the native
 | 
						|
function that runs the evaluation loop is the same (_PyEval_EvalFrameDefault)
 | 
						|
then the profiler and any reporter generated by it will not be able to
 | 
						|
associate the names of the Python functions and the filenames associated with
 | 
						|
those calls, rendering the results useless in the Python world.
 | 
						|
 | 
						|
To fix this problem, we introduce the concept of a trampoline frame. A
 | 
						|
trampoline frame is a piece of code that is unique per Python code object that
 | 
						|
is executed before entering the CPython eval loop. This piece of code just
 | 
						|
calls the original Python evaluation function (_PyEval_EvalFrameDefault) and
 | 
						|
forwards all the arguments received. In this way, when a profiler samples
 | 
						|
frames from the previous example it will see;
 | 
						|
 | 
						|
    _PyEval_EvalFrameDefault -----> Evaluation frame of foo()
 | 
						|
    [Jit compiled code 3]
 | 
						|
    _PyEval_Vector
 | 
						|
    _PyFunction_Vectorcall
 | 
						|
    PyObject_Vectorcall
 | 
						|
    call_function
 | 
						|
 | 
						|
    _PyEval_EvalFrameDefault ------> Evaluation frame of bar()
 | 
						|
    [Jit compiled code 2]
 | 
						|
    _PyEval_EvalFrame
 | 
						|
    _PyEval_Vector
 | 
						|
    _PyFunction_Vectorcall
 | 
						|
    PyObject_Vectorcall
 | 
						|
    call_function
 | 
						|
 | 
						|
    _PyEval_EvalFrameDefault -------> Evaluation frame of baz()
 | 
						|
    [Jit compiled code 1]
 | 
						|
    _PyEval_EvalFrame
 | 
						|
    _PyEval_Vector
 | 
						|
    _PyFunction_Vectorcall
 | 
						|
    PyObject_Vectorcall
 | 
						|
    call_function
 | 
						|
 | 
						|
    ...
 | 
						|
 | 
						|
    Py_RunMain
 | 
						|
 | 
						|
When we generate every unique copy of the trampoline (what here we called "[Jit
 | 
						|
compiled code N]") we write the relationship between the compiled code and the
 | 
						|
Python function that is associated with it. Every profiler requires this
 | 
						|
information in a different format. For example, the Linux "perf" profiler
 | 
						|
requires a file in "/tmp/perf-PID.map" (name and location not configurable)
 | 
						|
with the following format:
 | 
						|
 | 
						|
    <compiled code address> <compiled code size> <name of the compiled code>
 | 
						|
 | 
						|
If this file is available when "perf" generates reports, it will automatically
 | 
						|
associate every trampoline with the Python function that it is associated with
 | 
						|
allowing it to generate reports that include Python information. These reports
 | 
						|
then can also be filtered in a way that *only* Python information appears.
 | 
						|
 | 
						|
Notice that for this to work, there must be a unique copied of the trampoline
 | 
						|
per Python code object even if the code in the trampoline is the same. To
 | 
						|
achieve this we have a assembly template in Objects/asm_trampiline.S that is
 | 
						|
compiled into the Python executable/shared library. This template generates a
 | 
						|
symbol that maps the start of the assembly code and another that marks the end
 | 
						|
of the assembly code for the trampoline.  Then, every time we need a unique
 | 
						|
trampoline for a Python code object, we copy the assembly code into a mmaped
 | 
						|
area that has executable permissions and we return the start of that area as
 | 
						|
our trampoline function.
 | 
						|
 | 
						|
Asking for a mmap-ed memory area for trampoline is very wasteful so we
 | 
						|
allocate big arenas of memory in a single mmap call, we populate the entire
 | 
						|
arena with copies of the trampoline (this allows us to now have to invalidate
 | 
						|
the icache for the instructions in the page) and then we return the next
 | 
						|
available chunk every time someone asks for a new trampoline. We keep a linked
 | 
						|
list of arenas in case the current memory arena is exhausted and another one is
 | 
						|
needed.
 | 
						|
 | 
						|
For the best results, Python should be compiled with
 | 
						|
CFLAGS="-fno-omit-frame-pointer -mno-omit-leaf-frame-pointer" as this allows
 | 
						|
profilers to unwind using only the frame pointer and not on DWARF debug
 | 
						|
information (note that as trampilines are dynamically generated there won't be
 | 
						|
any DWARF information available for them).
 | 
						|
*/
 | 
						|
 | 
						|
#include "Python.h"
 | 
						|
#include "pycore_ceval.h"         // _PyPerf_Callbacks
 | 
						|
#include "pycore_frame.h"
 | 
						|
#include "pycore_interp.h"
 | 
						|
 | 
						|
 | 
						|
#ifdef PY_HAVE_PERF_TRAMPOLINE
 | 
						|
 | 
						|
#include <fcntl.h>
 | 
						|
#include <stdio.h>
 | 
						|
#include <stdlib.h>
 | 
						|
#include <sys/mman.h>             // mmap()
 | 
						|
#include <sys/types.h>
 | 
						|
#include <unistd.h>               // sysconf()
 | 
						|
#include <sys/time.h>           // gettimeofday()
 | 
						|
 | 
						|
 | 
						|
#if defined(__arm__) || defined(__arm64__) || defined(__aarch64__)
 | 
						|
#define PY_HAVE_INVALIDATE_ICACHE
 | 
						|
 | 
						|
#if defined(__clang__) || defined(__GNUC__)
 | 
						|
extern void __clear_cache(void *, void*);
 | 
						|
#endif
 | 
						|
 | 
						|
static void invalidate_icache(char* begin, char*end) {
 | 
						|
#if defined(__clang__) || defined(__GNUC__)
 | 
						|
    return __clear_cache(begin, end);
 | 
						|
#else
 | 
						|
    return;
 | 
						|
#endif
 | 
						|
}
 | 
						|
#endif
 | 
						|
 | 
						|
/* The function pointer is passed as last argument. The other three arguments
 | 
						|
 * are passed in the same order as the function requires. This results in
 | 
						|
 * shorter, more efficient ASM code for trampoline.
 | 
						|
 */
 | 
						|
typedef PyObject *(*py_evaluator)(PyThreadState *, _PyInterpreterFrame *,
 | 
						|
                                  int throwflag);
 | 
						|
typedef PyObject *(*py_trampoline)(PyThreadState *, _PyInterpreterFrame *, int,
 | 
						|
                                   py_evaluator);
 | 
						|
 | 
						|
extern void *_Py_trampoline_func_start;  // Start of the template of the
 | 
						|
                                         // assembly trampoline
 | 
						|
extern void *
 | 
						|
    _Py_trampoline_func_end;  // End of the template of the assembly trampoline
 | 
						|
 | 
						|
struct code_arena_st {
 | 
						|
    char *start_addr;    // Start of the memory arena
 | 
						|
    char *current_addr;  // Address of the current trampoline within the arena
 | 
						|
    size_t size;         // Size of the memory arena
 | 
						|
    size_t size_left;    // Remaining size of the memory arena
 | 
						|
    size_t code_size;    // Size of the code of every trampoline in the arena
 | 
						|
    struct code_arena_st
 | 
						|
        *prev;  // Pointer to the arena  or NULL if this is the first arena.
 | 
						|
};
 | 
						|
 | 
						|
typedef struct code_arena_st code_arena_t;
 | 
						|
typedef struct trampoline_api_st trampoline_api_t;
 | 
						|
 | 
						|
enum perf_trampoline_type {
 | 
						|
    PERF_TRAMPOLINE_UNSET = 0,
 | 
						|
    PERF_TRAMPOLINE_TYPE_MAP = 1,
 | 
						|
    PERF_TRAMPOLINE_TYPE_JITDUMP = 2,
 | 
						|
};
 | 
						|
 | 
						|
#define perf_status _PyRuntime.ceval.perf.status
 | 
						|
#define extra_code_index _PyRuntime.ceval.perf.extra_code_index
 | 
						|
#define perf_code_arena _PyRuntime.ceval.perf.code_arena
 | 
						|
#define trampoline_api _PyRuntime.ceval.perf.trampoline_api
 | 
						|
#define perf_map_file _PyRuntime.ceval.perf.map_file
 | 
						|
#define persist_after_fork _PyRuntime.ceval.perf.persist_after_fork
 | 
						|
#define perf_trampoline_type _PyRuntime.ceval.perf.perf_trampoline_type
 | 
						|
 | 
						|
static void
 | 
						|
perf_map_write_entry(void *state, const void *code_addr,
 | 
						|
                         unsigned int code_size, PyCodeObject *co)
 | 
						|
{
 | 
						|
    const char *entry = "";
 | 
						|
    if (co->co_qualname != NULL) {
 | 
						|
        entry = PyUnicode_AsUTF8(co->co_qualname);
 | 
						|
    }
 | 
						|
    const char *filename = "";
 | 
						|
    if (co->co_filename != NULL) {
 | 
						|
        filename = PyUnicode_AsUTF8(co->co_filename);
 | 
						|
    }
 | 
						|
    size_t perf_map_entry_size = snprintf(NULL, 0, "py::%s:%s", entry, filename) + 1;
 | 
						|
    char* perf_map_entry = (char*) PyMem_RawMalloc(perf_map_entry_size);
 | 
						|
    if (perf_map_entry == NULL) {
 | 
						|
        return;
 | 
						|
    }
 | 
						|
    snprintf(perf_map_entry, perf_map_entry_size, "py::%s:%s", entry, filename);
 | 
						|
    PyUnstable_WritePerfMapEntry(code_addr, code_size, perf_map_entry);
 | 
						|
    PyMem_RawFree(perf_map_entry);
 | 
						|
}
 | 
						|
 | 
						|
static void*
 | 
						|
perf_map_init_state(void)
 | 
						|
{
 | 
						|
    PyUnstable_PerfMapState_Init();
 | 
						|
    trampoline_api.code_padding = 0;
 | 
						|
    perf_trampoline_type = PERF_TRAMPOLINE_TYPE_MAP;
 | 
						|
    return NULL;
 | 
						|
}
 | 
						|
 | 
						|
static int
 | 
						|
perf_map_free_state(void *state)
 | 
						|
{
 | 
						|
    PyUnstable_PerfMapState_Fini();
 | 
						|
    return 0;
 | 
						|
}
 | 
						|
 | 
						|
_PyPerf_Callbacks _Py_perfmap_callbacks = {
 | 
						|
    &perf_map_init_state,
 | 
						|
    &perf_map_write_entry,
 | 
						|
    &perf_map_free_state,
 | 
						|
};
 | 
						|
 | 
						|
 | 
						|
static size_t round_up(int64_t value, int64_t multiple) {
 | 
						|
    if (multiple == 0) {
 | 
						|
        // Avoid division by zero
 | 
						|
        return value;
 | 
						|
    }
 | 
						|
 | 
						|
    int64_t remainder = value % multiple;
 | 
						|
    if (remainder == 0) {
 | 
						|
        // Value is already a multiple of 'multiple'
 | 
						|
        return value;
 | 
						|
    }
 | 
						|
 | 
						|
    // Calculate the difference to the next multiple
 | 
						|
    int64_t difference = multiple - remainder;
 | 
						|
 | 
						|
    // Add the difference to the value
 | 
						|
    int64_t rounded_up_value = value + difference;
 | 
						|
 | 
						|
    return rounded_up_value;
 | 
						|
}
 | 
						|
 | 
						|
// TRAMPOLINE MANAGEMENT API
 | 
						|
 | 
						|
static int
 | 
						|
new_code_arena(void)
 | 
						|
{
 | 
						|
    // non-trivial programs typically need 64 to 256 kiB.
 | 
						|
    size_t mem_size = 4096 * 16;
 | 
						|
    assert(mem_size % sysconf(_SC_PAGESIZE) == 0);
 | 
						|
    char *memory =
 | 
						|
        mmap(NULL,  // address
 | 
						|
             mem_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS,
 | 
						|
             -1,  // fd (not used here)
 | 
						|
             0);  // offset (not used here)
 | 
						|
    if (memory == MAP_FAILED) {
 | 
						|
        PyErr_SetFromErrno(PyExc_OSError);
 | 
						|
        PyErr_FormatUnraisable("Failed to create new mmap for perf trampoline");
 | 
						|
        perf_status = PERF_STATUS_FAILED;
 | 
						|
        return -1;
 | 
						|
    }
 | 
						|
    void *start = &_Py_trampoline_func_start;
 | 
						|
    void *end = &_Py_trampoline_func_end;
 | 
						|
    size_t code_size = end - start;
 | 
						|
    size_t chunk_size = round_up(code_size + trampoline_api.code_padding, 16);
 | 
						|
    // TODO: Check the effect of alignment of the code chunks. Initial investigation
 | 
						|
    // showed that this has no effect on performance in x86-64 or aarch64 and the current
 | 
						|
    // version has the advantage that the unwinder in GDB can unwind across JIT-ed code.
 | 
						|
    //
 | 
						|
    // We should check the values in the future and see if there is a
 | 
						|
    // measurable performance improvement by rounding trampolines up to 32-bit
 | 
						|
    // or 64-bit alignment.
 | 
						|
 | 
						|
    size_t n_copies = mem_size / chunk_size;
 | 
						|
    for (size_t i = 0; i < n_copies; i++) {
 | 
						|
        memcpy(memory + i * chunk_size, start, code_size * sizeof(char));
 | 
						|
    }
 | 
						|
    // Some systems may prevent us from creating executable code on the fly.
 | 
						|
    int res = mprotect(memory, mem_size, PROT_READ | PROT_EXEC);
 | 
						|
    if (res == -1) {
 | 
						|
        PyErr_SetFromErrno(PyExc_OSError);
 | 
						|
        munmap(memory, mem_size);
 | 
						|
        PyErr_FormatUnraisable("Failed to set mmap for perf trampoline to "
 | 
						|
                               "PROT_READ | PROT_EXEC");
 | 
						|
        return -1;
 | 
						|
    }
 | 
						|
 | 
						|
#ifdef PY_HAVE_INVALIDATE_ICACHE
 | 
						|
    // Before the JIT can run a block of code that has been emitted it must invalidate
 | 
						|
    // the instruction cache on some platforms like arm and aarch64.
 | 
						|
    invalidate_icache(memory, memory + mem_size);
 | 
						|
#endif
 | 
						|
 | 
						|
    code_arena_t *new_arena = PyMem_RawCalloc(1, sizeof(code_arena_t));
 | 
						|
    if (new_arena == NULL) {
 | 
						|
        PyErr_NoMemory();
 | 
						|
        munmap(memory, mem_size);
 | 
						|
        PyErr_FormatUnraisable("Failed to allocate new code arena struct for perf trampoline");
 | 
						|
        return -1;
 | 
						|
    }
 | 
						|
 | 
						|
    new_arena->start_addr = memory;
 | 
						|
    new_arena->current_addr = memory;
 | 
						|
    new_arena->size = mem_size;
 | 
						|
    new_arena->size_left = mem_size;
 | 
						|
    new_arena->code_size = code_size;
 | 
						|
    new_arena->prev = perf_code_arena;
 | 
						|
    perf_code_arena = new_arena;
 | 
						|
    return 0;
 | 
						|
}
 | 
						|
 | 
						|
static void
 | 
						|
free_code_arenas(void)
 | 
						|
{
 | 
						|
    code_arena_t *cur = perf_code_arena;
 | 
						|
    code_arena_t *prev;
 | 
						|
    perf_code_arena = NULL;  // invalid static pointer
 | 
						|
    while (cur) {
 | 
						|
        munmap(cur->start_addr, cur->size);
 | 
						|
        prev = cur->prev;
 | 
						|
        PyMem_RawFree(cur);
 | 
						|
        cur = prev;
 | 
						|
    }
 | 
						|
}
 | 
						|
 | 
						|
static inline py_trampoline
 | 
						|
code_arena_new_code(code_arena_t *code_arena)
 | 
						|
{
 | 
						|
    py_trampoline trampoline = (py_trampoline)code_arena->current_addr;
 | 
						|
    size_t total_code_size = round_up(code_arena->code_size + trampoline_api.code_padding, 16);
 | 
						|
    code_arena->size_left -= total_code_size;
 | 
						|
    code_arena->current_addr += total_code_size;
 | 
						|
    return trampoline;
 | 
						|
}
 | 
						|
 | 
						|
static inline py_trampoline
 | 
						|
compile_trampoline(void)
 | 
						|
{
 | 
						|
    size_t total_code_size = round_up(perf_code_arena->code_size + trampoline_api.code_padding, 16);
 | 
						|
    if ((perf_code_arena == NULL) ||
 | 
						|
        (perf_code_arena->size_left <= total_code_size)) {
 | 
						|
        if (new_code_arena() < 0) {
 | 
						|
            return NULL;
 | 
						|
        }
 | 
						|
    }
 | 
						|
    assert(perf_code_arena->size_left <= perf_code_arena->size);
 | 
						|
    return code_arena_new_code(perf_code_arena);
 | 
						|
}
 | 
						|
 | 
						|
static PyObject *
 | 
						|
py_trampoline_evaluator(PyThreadState *ts, _PyInterpreterFrame *frame,
 | 
						|
                        int throw)
 | 
						|
{
 | 
						|
    if (perf_status == PERF_STATUS_FAILED ||
 | 
						|
        perf_status == PERF_STATUS_NO_INIT) {
 | 
						|
        goto default_eval;
 | 
						|
    }
 | 
						|
    PyCodeObject *co = _PyFrame_GetCode(frame);
 | 
						|
    py_trampoline f = NULL;
 | 
						|
    assert(extra_code_index != -1);
 | 
						|
    int ret = _PyCode_GetExtra((PyObject *)co, extra_code_index, (void **)&f);
 | 
						|
    if (ret != 0 || f == NULL) {
 | 
						|
        // This is the first time we see this code object so we need
 | 
						|
        // to compile a trampoline for it.
 | 
						|
        py_trampoline new_trampoline = compile_trampoline();
 | 
						|
        if (new_trampoline == NULL) {
 | 
						|
            goto default_eval;
 | 
						|
        }
 | 
						|
        trampoline_api.write_state(trampoline_api.state, new_trampoline,
 | 
						|
                                   perf_code_arena->code_size, co);
 | 
						|
        _PyCode_SetExtra((PyObject *)co, extra_code_index,
 | 
						|
                         (void *)new_trampoline);
 | 
						|
        f = new_trampoline;
 | 
						|
    }
 | 
						|
    assert(f != NULL);
 | 
						|
    return f(ts, frame, throw, _PyEval_EvalFrameDefault);
 | 
						|
default_eval:
 | 
						|
    // Something failed, fall back to the default evaluator.
 | 
						|
    return _PyEval_EvalFrameDefault(ts, frame, throw);
 | 
						|
}
 | 
						|
#endif  // PY_HAVE_PERF_TRAMPOLINE
 | 
						|
 | 
						|
int PyUnstable_PerfTrampoline_CompileCode(PyCodeObject *co)
 | 
						|
{
 | 
						|
#ifdef PY_HAVE_PERF_TRAMPOLINE
 | 
						|
    py_trampoline f = NULL;
 | 
						|
    assert(extra_code_index != -1);
 | 
						|
    int ret = _PyCode_GetExtra((PyObject *)co, extra_code_index, (void **)&f);
 | 
						|
    if (ret != 0 || f == NULL) {
 | 
						|
        py_trampoline new_trampoline = compile_trampoline();
 | 
						|
        if (new_trampoline == NULL) {
 | 
						|
            return 0;
 | 
						|
        }
 | 
						|
        trampoline_api.write_state(trampoline_api.state, new_trampoline,
 | 
						|
                                   perf_code_arena->code_size, co);
 | 
						|
        return _PyCode_SetExtra((PyObject *)co, extra_code_index,
 | 
						|
                         (void *)new_trampoline);
 | 
						|
    }
 | 
						|
#endif // PY_HAVE_PERF_TRAMPOLINE
 | 
						|
    return 0;
 | 
						|
}
 | 
						|
 | 
						|
int
 | 
						|
_PyIsPerfTrampolineActive(void)
 | 
						|
{
 | 
						|
#ifdef PY_HAVE_PERF_TRAMPOLINE
 | 
						|
    PyThreadState *tstate = _PyThreadState_GET();
 | 
						|
    return tstate->interp->eval_frame == py_trampoline_evaluator;
 | 
						|
#endif
 | 
						|
    return 0;
 | 
						|
}
 | 
						|
 | 
						|
void
 | 
						|
_PyPerfTrampoline_GetCallbacks(_PyPerf_Callbacks *callbacks)
 | 
						|
{
 | 
						|
    if (callbacks == NULL) {
 | 
						|
        return;
 | 
						|
    }
 | 
						|
#ifdef PY_HAVE_PERF_TRAMPOLINE
 | 
						|
    callbacks->init_state = trampoline_api.init_state;
 | 
						|
    callbacks->write_state = trampoline_api.write_state;
 | 
						|
    callbacks->free_state = trampoline_api.free_state;
 | 
						|
#endif
 | 
						|
    return;
 | 
						|
}
 | 
						|
 | 
						|
int
 | 
						|
_PyPerfTrampoline_SetCallbacks(_PyPerf_Callbacks *callbacks)
 | 
						|
{
 | 
						|
    if (callbacks == NULL) {
 | 
						|
        return -1;
 | 
						|
    }
 | 
						|
#ifdef PY_HAVE_PERF_TRAMPOLINE
 | 
						|
    if (trampoline_api.state) {
 | 
						|
        _PyPerfTrampoline_Fini();
 | 
						|
    }
 | 
						|
    trampoline_api.init_state = callbacks->init_state;
 | 
						|
    trampoline_api.write_state = callbacks->write_state;
 | 
						|
    trampoline_api.free_state = callbacks->free_state;
 | 
						|
    trampoline_api.state = NULL;
 | 
						|
#endif
 | 
						|
    return 0;
 | 
						|
}
 | 
						|
 | 
						|
int
 | 
						|
_PyPerfTrampoline_Init(int activate)
 | 
						|
{
 | 
						|
#ifdef PY_HAVE_PERF_TRAMPOLINE
 | 
						|
    PyThreadState *tstate = _PyThreadState_GET();
 | 
						|
    if (tstate->interp->eval_frame &&
 | 
						|
        tstate->interp->eval_frame != py_trampoline_evaluator) {
 | 
						|
        PyErr_SetString(PyExc_RuntimeError,
 | 
						|
                        "Trampoline cannot be initialized as a custom eval "
 | 
						|
                        "frame is already present");
 | 
						|
        return -1;
 | 
						|
    }
 | 
						|
    if (!activate) {
 | 
						|
        tstate->interp->eval_frame = NULL;
 | 
						|
        perf_status = PERF_STATUS_NO_INIT;
 | 
						|
    }
 | 
						|
    else {
 | 
						|
        tstate->interp->eval_frame = py_trampoline_evaluator;
 | 
						|
        if (new_code_arena() < 0) {
 | 
						|
            return -1;
 | 
						|
        }
 | 
						|
        extra_code_index = _PyEval_RequestCodeExtraIndex(NULL);
 | 
						|
        if (extra_code_index == -1) {
 | 
						|
            return -1;
 | 
						|
        }
 | 
						|
        if (trampoline_api.state == NULL && trampoline_api.init_state != NULL) {
 | 
						|
            trampoline_api.state = trampoline_api.init_state();
 | 
						|
        }
 | 
						|
        perf_status = PERF_STATUS_OK;
 | 
						|
    }
 | 
						|
#endif
 | 
						|
    return 0;
 | 
						|
}
 | 
						|
 | 
						|
int
 | 
						|
_PyPerfTrampoline_Fini(void)
 | 
						|
{
 | 
						|
#ifdef PY_HAVE_PERF_TRAMPOLINE
 | 
						|
    if (perf_status != PERF_STATUS_OK) {
 | 
						|
        return 0;
 | 
						|
    }
 | 
						|
    PyThreadState *tstate = _PyThreadState_GET();
 | 
						|
    if (tstate->interp->eval_frame == py_trampoline_evaluator) {
 | 
						|
        tstate->interp->eval_frame = NULL;
 | 
						|
    }
 | 
						|
    if (perf_status == PERF_STATUS_OK) {
 | 
						|
        trampoline_api.free_state(trampoline_api.state);
 | 
						|
        perf_trampoline_type = PERF_TRAMPOLINE_UNSET;
 | 
						|
    }
 | 
						|
    extra_code_index = -1;
 | 
						|
    perf_status = PERF_STATUS_NO_INIT;
 | 
						|
#endif
 | 
						|
    return 0;
 | 
						|
}
 | 
						|
 | 
						|
void _PyPerfTrampoline_FreeArenas(void) {
 | 
						|
#ifdef PY_HAVE_PERF_TRAMPOLINE
 | 
						|
    free_code_arenas();
 | 
						|
#endif
 | 
						|
    return;
 | 
						|
}
 | 
						|
 | 
						|
int
 | 
						|
PyUnstable_PerfTrampoline_SetPersistAfterFork(int enable){
 | 
						|
#ifdef PY_HAVE_PERF_TRAMPOLINE
 | 
						|
    persist_after_fork = enable;
 | 
						|
    return persist_after_fork;
 | 
						|
#endif
 | 
						|
    return 0;
 | 
						|
}
 | 
						|
 | 
						|
PyStatus
 | 
						|
_PyPerfTrampoline_AfterFork_Child(void)
 | 
						|
{
 | 
						|
#ifdef PY_HAVE_PERF_TRAMPOLINE
 | 
						|
    if (persist_after_fork) {
 | 
						|
        if (perf_trampoline_type != PERF_TRAMPOLINE_TYPE_MAP) {
 | 
						|
            return PyStatus_Error("Failed to copy perf map file as perf trampoline type is not type map.");
 | 
						|
        }
 | 
						|
        _PyPerfTrampoline_Fini();
 | 
						|
        char filename[256];
 | 
						|
        pid_t parent_pid = getppid();
 | 
						|
        snprintf(filename, sizeof(filename), "/tmp/perf-%d.map", parent_pid);
 | 
						|
        if (PyUnstable_CopyPerfMapFile(filename) != 0) {
 | 
						|
            return PyStatus_Error("Failed to copy perf map file.");
 | 
						|
        }
 | 
						|
    } else {
 | 
						|
        // Restart trampoline in file in child.
 | 
						|
        int was_active = _PyIsPerfTrampolineActive();
 | 
						|
        _PyPerfTrampoline_Fini();
 | 
						|
        if (was_active) {
 | 
						|
            _PyPerfTrampoline_Init(1);
 | 
						|
        }
 | 
						|
    }
 | 
						|
#endif
 | 
						|
    return PyStatus_Ok();
 | 
						|
}
 |