gh-118518: Allow perf to work without frame pointers (#112254)

This commit is contained in:
Pablo Galindo Salgado 2024-05-05 03:07:29 +02:00 committed by GitHub
parent 999f0c5122
commit 1b22d801b8
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
19 changed files with 892 additions and 39 deletions

View file

@ -143,6 +143,8 @@ any DWARF information available for them).
#include <sys/mman.h> // mmap()
#include <sys/types.h>
#include <unistd.h> // sysconf()
#include <sys/time.h> // gettimeofday()
#if defined(__arm__) || defined(__arm64__) || defined(__aarch64__)
#define PY_HAVE_INVALIDATE_ICACHE
@ -187,12 +189,19 @@ struct code_arena_st {
typedef struct code_arena_st code_arena_t;
typedef struct trampoline_api_st trampoline_api_t;
enum perf_trampoline_type {
PERF_TRAMPOLINE_UNSET = 0,
PERF_TRAMPOLINE_TYPE_MAP = 1,
PERF_TRAMPOLINE_TYPE_JITDUMP = 2,
};
#define perf_status _PyRuntime.ceval.perf.status
#define extra_code_index _PyRuntime.ceval.perf.extra_code_index
#define perf_code_arena _PyRuntime.ceval.perf.code_arena
#define trampoline_api _PyRuntime.ceval.perf.trampoline_api
#define perf_map_file _PyRuntime.ceval.perf.map_file
#define persist_after_fork _PyRuntime.ceval.perf.persist_after_fork
#define perf_trampoline_type _PyRuntime.ceval.perf.perf_trampoline_type
static void
perf_map_write_entry(void *state, const void *code_addr,
@ -220,6 +229,8 @@ static void*
perf_map_init_state(void)
{
PyUnstable_PerfMapState_Init();
trampoline_api.code_padding = 0;
perf_trampoline_type = PERF_TRAMPOLINE_TYPE_MAP;
return NULL;
}
@ -236,6 +247,30 @@ _PyPerf_Callbacks _Py_perfmap_callbacks = {
&perf_map_free_state,
};
static size_t round_up(int64_t value, int64_t multiple) {
if (multiple == 0) {
// Avoid division by zero
return value;
}
int64_t remainder = value % multiple;
if (remainder == 0) {
// Value is already a multiple of 'multiple'
return value;
}
// Calculate the difference to the next multiple
int64_t difference = multiple - remainder;
// Add the difference to the value
int64_t rounded_up_value = value + difference;
return rounded_up_value;
}
// TRAMPOLINE MANAGEMENT API
static int
new_code_arena(void)
{
@ -256,6 +291,7 @@ new_code_arena(void)
void *start = &_Py_trampoline_func_start;
void *end = &_Py_trampoline_func_end;
size_t code_size = end - start;
size_t chunk_size = round_up(code_size + trampoline_api.code_padding, 16);
// TODO: Check the effect of alignment of the code chunks. Initial investigation
// showed that this has no effect on performance in x86-64 or aarch64 and the current
// version has the advantage that the unwinder in GDB can unwind across JIT-ed code.
@ -264,9 +300,9 @@ new_code_arena(void)
// measurable performance improvement by rounding trampolines up to 32-bit
// or 64-bit alignment.
size_t n_copies = mem_size / code_size;
size_t n_copies = mem_size / chunk_size;
for (size_t i = 0; i < n_copies; i++) {
memcpy(memory + i * code_size, start, code_size * sizeof(char));
memcpy(memory + i * chunk_size, start, code_size * sizeof(char));
}
// Some systems may prevent us from creating executable code on the fly.
int res = mprotect(memory, mem_size, PROT_READ | PROT_EXEC);
@ -320,16 +356,18 @@ static inline py_trampoline
code_arena_new_code(code_arena_t *code_arena)
{
py_trampoline trampoline = (py_trampoline)code_arena->current_addr;
code_arena->size_left -= code_arena->code_size;
code_arena->current_addr += code_arena->code_size;
size_t total_code_size = round_up(code_arena->code_size + trampoline_api.code_padding, 16);
code_arena->size_left -= total_code_size;
code_arena->current_addr += total_code_size;
return trampoline;
}
static inline py_trampoline
compile_trampoline(void)
{
size_t total_code_size = round_up(perf_code_arena->code_size + trampoline_api.code_padding, 16);
if ((perf_code_arena == NULL) ||
(perf_code_arena->size_left <= perf_code_arena->code_size)) {
(perf_code_arena->size_left <= total_code_size)) {
if (new_code_arena() < 0) {
return NULL;
}
@ -480,6 +518,7 @@ _PyPerfTrampoline_Fini(void)
}
if (perf_status == PERF_STATUS_OK) {
trampoline_api.free_state(trampoline_api.state);
perf_trampoline_type = PERF_TRAMPOLINE_UNSET;
}
extra_code_index = -1;
perf_status = PERF_STATUS_NO_INIT;
@ -508,6 +547,9 @@ _PyPerfTrampoline_AfterFork_Child(void)
{
#ifdef PY_HAVE_PERF_TRAMPOLINE
if (persist_after_fork) {
if (perf_trampoline_type != PERF_TRAMPOLINE_TYPE_MAP) {
return PyStatus_Error("Failed to copy perf map file as perf trampoline type is not type map.");
}
_PyPerfTrampoline_Fini();
char filename[256];
pid_t parent_pid = getppid();