mirror of
https://github.com/python/cpython.git
synced 2025-08-25 03:04:55 +00:00
gh-118518: Allow perf to work without frame pointers (#112254)
This commit is contained in:
parent
999f0c5122
commit
1b22d801b8
19 changed files with 892 additions and 39 deletions
|
@ -143,6 +143,8 @@ any DWARF information available for them).
|
|||
#include <sys/mman.h> // mmap()
|
||||
#include <sys/types.h>
|
||||
#include <unistd.h> // sysconf()
|
||||
#include <sys/time.h> // gettimeofday()
|
||||
|
||||
|
||||
#if defined(__arm__) || defined(__arm64__) || defined(__aarch64__)
|
||||
#define PY_HAVE_INVALIDATE_ICACHE
|
||||
|
@ -187,12 +189,19 @@ struct code_arena_st {
|
|||
typedef struct code_arena_st code_arena_t;
|
||||
typedef struct trampoline_api_st trampoline_api_t;
|
||||
|
||||
enum perf_trampoline_type {
|
||||
PERF_TRAMPOLINE_UNSET = 0,
|
||||
PERF_TRAMPOLINE_TYPE_MAP = 1,
|
||||
PERF_TRAMPOLINE_TYPE_JITDUMP = 2,
|
||||
};
|
||||
|
||||
#define perf_status _PyRuntime.ceval.perf.status
|
||||
#define extra_code_index _PyRuntime.ceval.perf.extra_code_index
|
||||
#define perf_code_arena _PyRuntime.ceval.perf.code_arena
|
||||
#define trampoline_api _PyRuntime.ceval.perf.trampoline_api
|
||||
#define perf_map_file _PyRuntime.ceval.perf.map_file
|
||||
#define persist_after_fork _PyRuntime.ceval.perf.persist_after_fork
|
||||
#define perf_trampoline_type _PyRuntime.ceval.perf.perf_trampoline_type
|
||||
|
||||
static void
|
||||
perf_map_write_entry(void *state, const void *code_addr,
|
||||
|
@ -220,6 +229,8 @@ static void*
|
|||
perf_map_init_state(void)
|
||||
{
|
||||
PyUnstable_PerfMapState_Init();
|
||||
trampoline_api.code_padding = 0;
|
||||
perf_trampoline_type = PERF_TRAMPOLINE_TYPE_MAP;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
@ -236,6 +247,30 @@ _PyPerf_Callbacks _Py_perfmap_callbacks = {
|
|||
&perf_map_free_state,
|
||||
};
|
||||
|
||||
|
||||
static size_t round_up(int64_t value, int64_t multiple) {
|
||||
if (multiple == 0) {
|
||||
// Avoid division by zero
|
||||
return value;
|
||||
}
|
||||
|
||||
int64_t remainder = value % multiple;
|
||||
if (remainder == 0) {
|
||||
// Value is already a multiple of 'multiple'
|
||||
return value;
|
||||
}
|
||||
|
||||
// Calculate the difference to the next multiple
|
||||
int64_t difference = multiple - remainder;
|
||||
|
||||
// Add the difference to the value
|
||||
int64_t rounded_up_value = value + difference;
|
||||
|
||||
return rounded_up_value;
|
||||
}
|
||||
|
||||
// TRAMPOLINE MANAGEMENT API
|
||||
|
||||
static int
|
||||
new_code_arena(void)
|
||||
{
|
||||
|
@ -256,6 +291,7 @@ new_code_arena(void)
|
|||
void *start = &_Py_trampoline_func_start;
|
||||
void *end = &_Py_trampoline_func_end;
|
||||
size_t code_size = end - start;
|
||||
size_t chunk_size = round_up(code_size + trampoline_api.code_padding, 16);
|
||||
// TODO: Check the effect of alignment of the code chunks. Initial investigation
|
||||
// showed that this has no effect on performance in x86-64 or aarch64 and the current
|
||||
// version has the advantage that the unwinder in GDB can unwind across JIT-ed code.
|
||||
|
@ -264,9 +300,9 @@ new_code_arena(void)
|
|||
// measurable performance improvement by rounding trampolines up to 32-bit
|
||||
// or 64-bit alignment.
|
||||
|
||||
size_t n_copies = mem_size / code_size;
|
||||
size_t n_copies = mem_size / chunk_size;
|
||||
for (size_t i = 0; i < n_copies; i++) {
|
||||
memcpy(memory + i * code_size, start, code_size * sizeof(char));
|
||||
memcpy(memory + i * chunk_size, start, code_size * sizeof(char));
|
||||
}
|
||||
// Some systems may prevent us from creating executable code on the fly.
|
||||
int res = mprotect(memory, mem_size, PROT_READ | PROT_EXEC);
|
||||
|
@ -320,16 +356,18 @@ static inline py_trampoline
|
|||
code_arena_new_code(code_arena_t *code_arena)
|
||||
{
|
||||
py_trampoline trampoline = (py_trampoline)code_arena->current_addr;
|
||||
code_arena->size_left -= code_arena->code_size;
|
||||
code_arena->current_addr += code_arena->code_size;
|
||||
size_t total_code_size = round_up(code_arena->code_size + trampoline_api.code_padding, 16);
|
||||
code_arena->size_left -= total_code_size;
|
||||
code_arena->current_addr += total_code_size;
|
||||
return trampoline;
|
||||
}
|
||||
|
||||
static inline py_trampoline
|
||||
compile_trampoline(void)
|
||||
{
|
||||
size_t total_code_size = round_up(perf_code_arena->code_size + trampoline_api.code_padding, 16);
|
||||
if ((perf_code_arena == NULL) ||
|
||||
(perf_code_arena->size_left <= perf_code_arena->code_size)) {
|
||||
(perf_code_arena->size_left <= total_code_size)) {
|
||||
if (new_code_arena() < 0) {
|
||||
return NULL;
|
||||
}
|
||||
|
@ -480,6 +518,7 @@ _PyPerfTrampoline_Fini(void)
|
|||
}
|
||||
if (perf_status == PERF_STATUS_OK) {
|
||||
trampoline_api.free_state(trampoline_api.state);
|
||||
perf_trampoline_type = PERF_TRAMPOLINE_UNSET;
|
||||
}
|
||||
extra_code_index = -1;
|
||||
perf_status = PERF_STATUS_NO_INIT;
|
||||
|
@ -508,6 +547,9 @@ _PyPerfTrampoline_AfterFork_Child(void)
|
|||
{
|
||||
#ifdef PY_HAVE_PERF_TRAMPOLINE
|
||||
if (persist_after_fork) {
|
||||
if (perf_trampoline_type != PERF_TRAMPOLINE_TYPE_MAP) {
|
||||
return PyStatus_Error("Failed to copy perf map file as perf trampoline type is not type map.");
|
||||
}
|
||||
_PyPerfTrampoline_Fini();
|
||||
char filename[256];
|
||||
pid_t parent_pid = getppid();
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue