GH-113464: Speed up JIT builds (GH-122839)

This commit is contained in:
Brandt Bucher 2024-08-14 07:53:46 -07:00 committed by GitHub
parent 6ae942f412
commit 51185923a8
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 19 additions and 5 deletions

View file

@ -182,15 +182,27 @@ class _Target(typing.Generic[_S, _R]):
async def _build_stencils(self) -> dict[str, _stencils.StencilGroup]: async def _build_stencils(self) -> dict[str, _stencils.StencilGroup]:
generated_cases = PYTHON_EXECUTOR_CASES_C_H.read_text() generated_cases = PYTHON_EXECUTOR_CASES_C_H.read_text()
opnames = sorted(re.findall(r"\n {8}case (\w+): \{\n", generated_cases)) cases_and_opnames = sorted(
re.findall(
r"\n {8}(case (\w+): \{\n.*?\n {8}\})", generated_cases, flags=re.DOTALL
)
)
tasks = [] tasks = []
with tempfile.TemporaryDirectory() as tempdir: with tempfile.TemporaryDirectory() as tempdir:
work = pathlib.Path(tempdir).resolve() work = pathlib.Path(tempdir).resolve()
async with asyncio.TaskGroup() as group: async with asyncio.TaskGroup() as group:
coro = self._compile("trampoline", TOOLS_JIT / "trampoline.c", work) coro = self._compile("trampoline", TOOLS_JIT / "trampoline.c", work)
tasks.append(group.create_task(coro, name="trampoline")) tasks.append(group.create_task(coro, name="trampoline"))
for opname in opnames: template = TOOLS_JIT_TEMPLATE_C.read_text()
coro = self._compile(opname, TOOLS_JIT_TEMPLATE_C, work) for case, opname in cases_and_opnames:
# Write out a copy of the template with *only* this case
# inserted. This is about twice as fast as #include'ing all
# of executor_cases.c.h each time we compile (since the C
# compiler wastes a bunch of time parsing the dead code for
# all of the other cases):
c = work / f"{opname}.c"
c.write_text(template.replace("CASE", case))
coro = self._compile(opname, c, work)
tasks.append(group.create_task(coro, name=opname)) tasks.append(group.create_task(coro, name=opname))
return {task.get_name(): task.result() for task in tasks} return {task.get_name(): task.result() for task in tasks}

View file

@ -84,6 +84,8 @@ do { \
#undef WITHIN_STACK_BOUNDS #undef WITHIN_STACK_BOUNDS
#define WITHIN_STACK_BOUNDS() 1 #define WITHIN_STACK_BOUNDS() 1
#define TIER_TWO 2
_Py_CODEUNIT * _Py_CODEUNIT *
_JIT_ENTRY(_PyInterpreterFrame *frame, _PyStackRef *stack_pointer, PyThreadState *tstate) _JIT_ENTRY(_PyInterpreterFrame *frame, _PyStackRef *stack_pointer, PyThreadState *tstate)
{ {
@ -107,9 +109,9 @@ _JIT_ENTRY(_PyInterpreterFrame *frame, _PyStackRef *stack_pointer, PyThreadState
OPT_STAT_INC(uops_executed); OPT_STAT_INC(uops_executed);
UOP_STAT_INC(uopcode, execution_count); UOP_STAT_INC(uopcode, execution_count);
// The actual instruction definitions (only one will be used):
switch (uopcode) { switch (uopcode) {
#include "executor_cases.c.h" // The actual instruction definition gets inserted here:
CASE
default: default:
Py_UNREACHABLE(); Py_UNREACHABLE();
} }