gh-136541: Fix several problems of perf trampolines in x86_64 and aarch64 (#136500)

This commit fixes the following problems:

* The x86_64 trampolines are not preserving frame pointers
* The hardcoded offsets to the code segment from the FDE only worked properly for x64_64
* The CIE data was not following conventions of aarch64
* The eh_frame for aarch64 was not fully correct
This commit is contained in:
Pablo Galindo Salgado 2025-07-11 14:32:35 +01:00 committed by GitHub
parent 7de8ea7be6
commit 236f733d8f
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 148 additions and 41 deletions

View file

@ -230,6 +230,7 @@ perf_map_init_state(void)
{
PyUnstable_PerfMapState_Init();
trampoline_api.code_padding = 0;
trampoline_api.code_alignment = 32;
perf_trampoline_type = PERF_TRAMPOLINE_TYPE_MAP;
return NULL;
}
@ -291,7 +292,9 @@ new_code_arena(void)
void *start = &_Py_trampoline_func_start;
void *end = &_Py_trampoline_func_end;
size_t code_size = end - start;
size_t chunk_size = round_up(code_size + trampoline_api.code_padding, 16);
size_t unaligned_size = code_size + trampoline_api.code_padding;
size_t chunk_size = round_up(unaligned_size, trampoline_api.code_alignment);
assert(chunk_size % trampoline_api.code_alignment == 0);
// TODO: Check the effect of alignment of the code chunks. Initial investigation
// showed that this has no effect on performance in x86-64 or aarch64 and the current
// version has the advantage that the unwinder in GDB can unwind across JIT-ed code.
@ -356,7 +359,9 @@ static inline py_trampoline
code_arena_new_code(code_arena_t *code_arena)
{
py_trampoline trampoline = (py_trampoline)code_arena->current_addr;
size_t total_code_size = round_up(code_arena->code_size + trampoline_api.code_padding, 16);
size_t total_code_size = round_up(code_arena->code_size + trampoline_api.code_padding,
trampoline_api.code_alignment);
assert(total_code_size % trampoline_api.code_alignment == 0);
code_arena->size_left -= total_code_size;
code_arena->current_addr += total_code_size;
return trampoline;
@ -489,9 +494,6 @@ _PyPerfTrampoline_Init(int activate)
}
else {
_PyInterpreterState_SetEvalFrameFunc(tstate->interp, py_trampoline_evaluator);
if (new_code_arena() < 0) {
return -1;
}
extra_code_index = _PyEval_RequestCodeExtraIndex(NULL);
if (extra_code_index == -1) {
return -1;
@ -499,6 +501,9 @@ _PyPerfTrampoline_Init(int activate)
if (trampoline_api.state == NULL && trampoline_api.init_state != NULL) {
trampoline_api.state = trampoline_api.init_state();
}
if (new_code_arena() < 0) {
return -1;
}
perf_status = PERF_STATUS_OK;
}
#endif