mirror of
https://github.com/python/cpython.git
synced 2025-07-19 17:25:54 +00:00
gh-136541: Fix several problems of perf trampolines in x86_64 and aarch64 (#136500)
This commit fixes the following problems: * The x86_64 trampolines are not preserving frame pointers * The hardcoded offsets to the code segment from the FDE only worked properly for x64_64 * The CIE data was not following conventions of aarch64 * The eh_frame for aarch64 was not fully correct
This commit is contained in:
parent
7de8ea7be6
commit
236f733d8f
5 changed files with 148 additions and 41 deletions
|
@ -230,6 +230,7 @@ perf_map_init_state(void)
|
|||
{
|
||||
PyUnstable_PerfMapState_Init();
|
||||
trampoline_api.code_padding = 0;
|
||||
trampoline_api.code_alignment = 32;
|
||||
perf_trampoline_type = PERF_TRAMPOLINE_TYPE_MAP;
|
||||
return NULL;
|
||||
}
|
||||
|
@ -291,7 +292,9 @@ new_code_arena(void)
|
|||
void *start = &_Py_trampoline_func_start;
|
||||
void *end = &_Py_trampoline_func_end;
|
||||
size_t code_size = end - start;
|
||||
size_t chunk_size = round_up(code_size + trampoline_api.code_padding, 16);
|
||||
size_t unaligned_size = code_size + trampoline_api.code_padding;
|
||||
size_t chunk_size = round_up(unaligned_size, trampoline_api.code_alignment);
|
||||
assert(chunk_size % trampoline_api.code_alignment == 0);
|
||||
// TODO: Check the effect of alignment of the code chunks. Initial investigation
|
||||
// showed that this has no effect on performance in x86-64 or aarch64 and the current
|
||||
// version has the advantage that the unwinder in GDB can unwind across JIT-ed code.
|
||||
|
@ -356,7 +359,9 @@ static inline py_trampoline
|
|||
code_arena_new_code(code_arena_t *code_arena)
|
||||
{
|
||||
py_trampoline trampoline = (py_trampoline)code_arena->current_addr;
|
||||
size_t total_code_size = round_up(code_arena->code_size + trampoline_api.code_padding, 16);
|
||||
size_t total_code_size = round_up(code_arena->code_size + trampoline_api.code_padding,
|
||||
trampoline_api.code_alignment);
|
||||
assert(total_code_size % trampoline_api.code_alignment == 0);
|
||||
code_arena->size_left -= total_code_size;
|
||||
code_arena->current_addr += total_code_size;
|
||||
return trampoline;
|
||||
|
@ -489,9 +494,6 @@ _PyPerfTrampoline_Init(int activate)
|
|||
}
|
||||
else {
|
||||
_PyInterpreterState_SetEvalFrameFunc(tstate->interp, py_trampoline_evaluator);
|
||||
if (new_code_arena() < 0) {
|
||||
return -1;
|
||||
}
|
||||
extra_code_index = _PyEval_RequestCodeExtraIndex(NULL);
|
||||
if (extra_code_index == -1) {
|
||||
return -1;
|
||||
|
@ -499,6 +501,9 @@ _PyPerfTrampoline_Init(int activate)
|
|||
if (trampoline_api.state == NULL && trampoline_api.init_state != NULL) {
|
||||
trampoline_api.state = trampoline_api.init_state();
|
||||
}
|
||||
if (new_code_arena() < 0) {
|
||||
return -1;
|
||||
}
|
||||
perf_status = PERF_STATUS_OK;
|
||||
}
|
||||
#endif
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue