mirror of
https://github.com/python/cpython.git
synced 2025-09-26 18:29:57 +00:00
gh-109587: Allow "precompiled" perf-trampolines to largely mitigate the cost of enabling perf-trampolines (#109666)
This commit is contained in:
parent
3d2f1f0b83
commit
21f068d80c
8 changed files with 199 additions and 10 deletions
|
@ -21,3 +21,6 @@ PyAPI_FUNC(int) PyUnstable_WritePerfMapEntry(
|
||||||
unsigned int code_size,
|
unsigned int code_size,
|
||||||
const char *entry_name);
|
const char *entry_name);
|
||||||
PyAPI_FUNC(void) PyUnstable_PerfMapState_Fini(void);
|
PyAPI_FUNC(void) PyUnstable_PerfMapState_Fini(void);
|
||||||
|
PyAPI_FUNC(int) PyUnstable_CopyPerfMapFile(const char* parent_filename);
|
||||||
|
PyAPI_FUNC(int) PyUnstable_PerfTrampoline_CompileCode(PyCodeObject *);
|
||||||
|
PyAPI_FUNC(int) PyUnstable_PerfTrampoline_SetPersistAfterFork(int enable);
|
||||||
|
|
|
@ -55,6 +55,7 @@ struct _ceval_runtime_state {
|
||||||
struct code_arena_st *code_arena;
|
struct code_arena_st *code_arena;
|
||||||
struct trampoline_api_st trampoline_api;
|
struct trampoline_api_st trampoline_api;
|
||||||
FILE *map_file;
|
FILE *map_file;
|
||||||
|
Py_ssize_t persist_after_fork;
|
||||||
#else
|
#else
|
||||||
int _not_used;
|
int _not_used;
|
||||||
#endif
|
#endif
|
||||||
|
@ -68,6 +69,7 @@ struct _ceval_runtime_state {
|
||||||
{ \
|
{ \
|
||||||
.status = PERF_STATUS_NO_INIT, \
|
.status = PERF_STATUS_NO_INIT, \
|
||||||
.extra_code_index = -1, \
|
.extra_code_index = -1, \
|
||||||
|
.persist_after_fork = 0, \
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
# define _PyEval_RUNTIME_PERF_INIT {0}
|
# define _PyEval_RUNTIME_PERF_INIT {0}
|
||||||
|
|
|
@ -1,6 +1,3 @@
|
||||||
|
|
||||||
/* System module interface */
|
|
||||||
|
|
||||||
#ifndef Py_SYSMODULE_H
|
#ifndef Py_SYSMODULE_H
|
||||||
#define Py_SYSMODULE_H
|
#define Py_SYSMODULE_H
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
|
|
|
@ -353,6 +353,82 @@ class TestPerfProfiler(unittest.TestCase):
|
||||||
self.assertNotIn(f"py::bar:{script}", stdout)
|
self.assertNotIn(f"py::bar:{script}", stdout)
|
||||||
self.assertNotIn(f"py::baz:{script}", stdout)
|
self.assertNotIn(f"py::baz:{script}", stdout)
|
||||||
|
|
||||||
|
def test_pre_fork_compile(self):
|
||||||
|
code = """if 1:
|
||||||
|
import sys
|
||||||
|
import os
|
||||||
|
import sysconfig
|
||||||
|
from _testinternalcapi import (
|
||||||
|
compile_perf_trampoline_entry,
|
||||||
|
perf_trampoline_set_persist_after_fork,
|
||||||
|
)
|
||||||
|
|
||||||
|
def foo_fork():
|
||||||
|
pass
|
||||||
|
|
||||||
|
def bar_fork():
|
||||||
|
foo_fork()
|
||||||
|
|
||||||
|
def foo():
|
||||||
|
pass
|
||||||
|
|
||||||
|
def bar():
|
||||||
|
foo()
|
||||||
|
|
||||||
|
def compile_trampolines_for_all_functions():
|
||||||
|
perf_trampoline_set_persist_after_fork(1)
|
||||||
|
for _, obj in globals().items():
|
||||||
|
if callable(obj) and hasattr(obj, '__code__'):
|
||||||
|
compile_perf_trampoline_entry(obj.__code__)
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
compile_trampolines_for_all_functions()
|
||||||
|
pid = os.fork()
|
||||||
|
if pid == 0:
|
||||||
|
print(os.getpid())
|
||||||
|
bar_fork()
|
||||||
|
else:
|
||||||
|
bar()
|
||||||
|
"""
|
||||||
|
|
||||||
|
with temp_dir() as script_dir:
|
||||||
|
script = make_script(script_dir, "perftest", code)
|
||||||
|
with subprocess.Popen(
|
||||||
|
[sys.executable, "-Xperf", script],
|
||||||
|
universal_newlines=True,
|
||||||
|
stderr=subprocess.PIPE,
|
||||||
|
stdout=subprocess.PIPE,
|
||||||
|
) as process:
|
||||||
|
stdout, stderr = process.communicate()
|
||||||
|
|
||||||
|
self.assertEqual(process.returncode, 0)
|
||||||
|
self.assertNotIn("Error:", stderr)
|
||||||
|
child_pid = int(stdout.strip())
|
||||||
|
perf_file = pathlib.Path(f"/tmp/perf-{process.pid}.map")
|
||||||
|
perf_child_file = pathlib.Path(f"/tmp/perf-{child_pid}.map")
|
||||||
|
self.assertTrue(perf_file.exists())
|
||||||
|
self.assertTrue(perf_child_file.exists())
|
||||||
|
|
||||||
|
perf_file_contents = perf_file.read_text()
|
||||||
|
self.assertIn(f"py::foo:{script}", perf_file_contents)
|
||||||
|
self.assertIn(f"py::bar:{script}", perf_file_contents)
|
||||||
|
self.assertIn(f"py::foo_fork:{script}", perf_file_contents)
|
||||||
|
self.assertIn(f"py::bar_fork:{script}", perf_file_contents)
|
||||||
|
|
||||||
|
child_perf_file_contents = perf_child_file.read_text()
|
||||||
|
self.assertIn(f"py::foo_fork:{script}", child_perf_file_contents)
|
||||||
|
self.assertIn(f"py::bar_fork:{script}", child_perf_file_contents)
|
||||||
|
|
||||||
|
# Pre-compiled perf-map entries of a forked process must be
|
||||||
|
# identical in both the parent and child perf-map files.
|
||||||
|
perf_file_lines = perf_file_contents.split("\n")
|
||||||
|
for line in perf_file_lines:
|
||||||
|
if (
|
||||||
|
f"py::foo_fork:{script}" in line
|
||||||
|
or f"py::bar_fork:{script}" in line
|
||||||
|
):
|
||||||
|
self.assertIn(line, child_perf_file_contents)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
|
|
@ -0,0 +1,2 @@
|
||||||
|
Introduced :c:func:`PyUnstable_PerfTrampoline_CompileCode`, :c:func:`PyUnstable_PerfTrampoline_SetPersistAfterFork` and
|
||||||
|
:c:func:`PyUnstable_CopyPerfMapFile`. These functions allow extension modules to initialize trampolines eagerly, after the application is "warmed up". This makes it possible to have perf-trampolines running in an always-enabled fashion.
|
|
@ -1556,6 +1556,36 @@ _testinternalcapi_test_long_numbits_impl(PyObject *module)
|
||||||
Py_RETURN_NONE;
|
Py_RETURN_NONE;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static PyObject *
|
||||||
|
compile_perf_trampoline_entry(PyObject *self, PyObject *args)
|
||||||
|
{
|
||||||
|
PyObject *co;
|
||||||
|
if (!PyArg_ParseTuple(args, "O!", &PyCode_Type, &co)) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
int ret = PyUnstable_PerfTrampoline_CompileCode((PyCodeObject *)co);
|
||||||
|
if (ret != 0) {
|
||||||
|
PyErr_SetString(PyExc_AssertionError, "Failed to compile trampoline");
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
return PyLong_FromLong(ret);
|
||||||
|
}
|
||||||
|
|
||||||
|
static PyObject *
|
||||||
|
perf_trampoline_set_persist_after_fork(PyObject *self, PyObject *args)
|
||||||
|
{
|
||||||
|
int enable;
|
||||||
|
if (!PyArg_ParseTuple(args, "i", &enable)) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
int ret = PyUnstable_PerfTrampoline_SetPersistAfterFork(enable);
|
||||||
|
if (ret == 0) {
|
||||||
|
PyErr_SetString(PyExc_AssertionError, "Failed to set persist_after_fork");
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
return PyLong_FromLong(ret);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
static PyMethodDef module_functions[] = {
|
static PyMethodDef module_functions[] = {
|
||||||
{"get_configs", get_configs, METH_NOARGS},
|
{"get_configs", get_configs, METH_NOARGS},
|
||||||
|
@ -1613,6 +1643,8 @@ static PyMethodDef module_functions[] = {
|
||||||
{"run_in_subinterp_with_config",
|
{"run_in_subinterp_with_config",
|
||||||
_PyCFunction_CAST(run_in_subinterp_with_config),
|
_PyCFunction_CAST(run_in_subinterp_with_config),
|
||||||
METH_VARARGS | METH_KEYWORDS},
|
METH_VARARGS | METH_KEYWORDS},
|
||||||
|
{"compile_perf_trampoline_entry", compile_perf_trampoline_entry, METH_VARARGS},
|
||||||
|
{"perf_trampoline_set_persist_after_fork", perf_trampoline_set_persist_after_fork, METH_VARARGS},
|
||||||
_TESTINTERNALCAPI_WRITE_UNRAISABLE_EXC_METHODDEF
|
_TESTINTERNALCAPI_WRITE_UNRAISABLE_EXC_METHODDEF
|
||||||
_TESTINTERNALCAPI_TEST_LONG_NUMBITS_METHODDEF
|
_TESTINTERNALCAPI_TEST_LONG_NUMBITS_METHODDEF
|
||||||
{NULL, NULL} /* sentinel */
|
{NULL, NULL} /* sentinel */
|
||||||
|
|
|
@ -193,7 +193,7 @@ typedef struct trampoline_api_st trampoline_api_t;
|
||||||
#define perf_code_arena _PyRuntime.ceval.perf.code_arena
|
#define perf_code_arena _PyRuntime.ceval.perf.code_arena
|
||||||
#define trampoline_api _PyRuntime.ceval.perf.trampoline_api
|
#define trampoline_api _PyRuntime.ceval.perf.trampoline_api
|
||||||
#define perf_map_file _PyRuntime.ceval.perf.map_file
|
#define perf_map_file _PyRuntime.ceval.perf.map_file
|
||||||
|
#define persist_after_fork _PyRuntime.ceval.perf.persist_after_fork
|
||||||
|
|
||||||
static void
|
static void
|
||||||
perf_map_write_entry(void *state, const void *code_addr,
|
perf_map_write_entry(void *state, const void *code_addr,
|
||||||
|
@ -361,6 +361,26 @@ default_eval:
|
||||||
}
|
}
|
||||||
#endif // PY_HAVE_PERF_TRAMPOLINE
|
#endif // PY_HAVE_PERF_TRAMPOLINE
|
||||||
|
|
||||||
|
int PyUnstable_PerfTrampoline_CompileCode(PyCodeObject *co)
|
||||||
|
{
|
||||||
|
#ifdef PY_HAVE_PERF_TRAMPOLINE
|
||||||
|
py_trampoline f = NULL;
|
||||||
|
assert(extra_code_index != -1);
|
||||||
|
int ret = _PyCode_GetExtra((PyObject *)co, extra_code_index, (void **)&f);
|
||||||
|
if (ret != 0 || f == NULL) {
|
||||||
|
py_trampoline new_trampoline = compile_trampoline();
|
||||||
|
if (new_trampoline == NULL) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
trampoline_api.write_state(trampoline_api.state, new_trampoline,
|
||||||
|
perf_code_arena->code_size, co);
|
||||||
|
return _PyCode_SetExtra((PyObject *)co, extra_code_index,
|
||||||
|
(void *)new_trampoline);
|
||||||
|
}
|
||||||
|
#endif // PY_HAVE_PERF_TRAMPOLINE
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
int
|
int
|
||||||
_PyIsPerfTrampolineActive(void)
|
_PyIsPerfTrampolineActive(void)
|
||||||
{
|
{
|
||||||
|
@ -448,16 +468,34 @@ _PyPerfTrampoline_Fini(void)
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
PyUnstable_PerfTrampoline_SetPersistAfterFork(int enable){
|
||||||
|
#ifdef PY_HAVE_PERF_TRAMPOLINE
|
||||||
|
persist_after_fork = enable;
|
||||||
|
return persist_after_fork;
|
||||||
|
#endif
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
PyStatus
|
PyStatus
|
||||||
_PyPerfTrampoline_AfterFork_Child(void)
|
_PyPerfTrampoline_AfterFork_Child(void)
|
||||||
{
|
{
|
||||||
#ifdef PY_HAVE_PERF_TRAMPOLINE
|
#ifdef PY_HAVE_PERF_TRAMPOLINE
|
||||||
// Restart trampoline in file in child.
|
|
||||||
int was_active = _PyIsPerfTrampolineActive();
|
|
||||||
_PyPerfTrampoline_Fini();
|
|
||||||
PyUnstable_PerfMapState_Fini();
|
PyUnstable_PerfMapState_Fini();
|
||||||
if (was_active) {
|
if (persist_after_fork) {
|
||||||
_PyPerfTrampoline_Init(1);
|
char filename[256];
|
||||||
|
pid_t parent_pid = getppid();
|
||||||
|
snprintf(filename, sizeof(filename), "/tmp/perf-%d.map", parent_pid);
|
||||||
|
if (PyUnstable_CopyPerfMapFile(filename) != 0) {
|
||||||
|
return PyStatus_Error("Failed to copy perf map file.");
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Restart trampoline in file in child.
|
||||||
|
int was_active = _PyIsPerfTrampolineActive();
|
||||||
|
_PyPerfTrampoline_Fini();
|
||||||
|
if (was_active) {
|
||||||
|
_PyPerfTrampoline_Init(1);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
return PyStatus_Ok();
|
return PyStatus_Ok();
|
||||||
|
|
|
@ -2361,7 +2361,7 @@ PyAPI_FUNC(int) PyUnstable_WritePerfMapEntry(
|
||||||
#ifndef MS_WINDOWS
|
#ifndef MS_WINDOWS
|
||||||
if (perf_map_state.perf_map == NULL) {
|
if (perf_map_state.perf_map == NULL) {
|
||||||
int ret = PyUnstable_PerfMapState_Init();
|
int ret = PyUnstable_PerfMapState_Init();
|
||||||
if(ret != 0){
|
if (ret != 0){
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -2388,6 +2388,45 @@ PyAPI_FUNC(void) PyUnstable_PerfMapState_Fini(void) {
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
PyAPI_FUNC(int) PyUnstable_CopyPerfMapFile(const char* parent_filename) {
|
||||||
|
#ifndef MS_WINDOWS
|
||||||
|
FILE* from = fopen(parent_filename, "r");
|
||||||
|
if (!from) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (perf_map_state.perf_map == NULL) {
|
||||||
|
int ret = PyUnstable_PerfMapState_Init();
|
||||||
|
if (ret != 0) {
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
char buf[4096];
|
||||||
|
PyThread_acquire_lock(perf_map_state.map_lock, 1);
|
||||||
|
int fflush_result = 0, result = 0;
|
||||||
|
while (1) {
|
||||||
|
size_t bytes_read = fread(buf, 1, sizeof(buf), from);
|
||||||
|
size_t bytes_written = fwrite(buf, 1, bytes_read, perf_map_state.perf_map);
|
||||||
|
fflush_result = fflush(perf_map_state.perf_map);
|
||||||
|
if (fflush_result != 0 || bytes_read == 0 || bytes_written < bytes_read) {
|
||||||
|
result = -1;
|
||||||
|
goto close_and_release;
|
||||||
|
}
|
||||||
|
if (bytes_read < sizeof(buf) && feof(from)) {
|
||||||
|
goto close_and_release;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
close_and_release:
|
||||||
|
fclose(from);
|
||||||
|
PyThread_release_lock(perf_map_state.map_lock);
|
||||||
|
return result;
|
||||||
|
#endif
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
static PyMethodDef sys_methods[] = {
|
static PyMethodDef sys_methods[] = {
|
||||||
/* Might as well keep this in alphabetic order */
|
/* Might as well keep this in alphabetic order */
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue