GH-130887: Always remove trailing jumps in AArch64 JIT stencils (GH-131042)

This commit is contained in:
Diego Russo 2025-03-25 17:15:36 +00:00 committed by GitHub
parent 0a91456ad1
commit ea0453ee97
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 35 additions and 9 deletions

View file

@ -0,0 +1 @@
Optimize the AArch64 code generation for the JIT. Patch by Diego Russo

View file

@ -209,7 +209,24 @@ class Stencil:
self.disassembly.append(f"{offset:x}: {' '.join(['00'] * padding)}")
self.body.extend([0] * padding)
def remove_jump(self, *, alignment: int = 1) -> None:
def add_nops(self, nop: bytes, alignment: int) -> None:
"""Add NOPs until there is alignment. Fail if it is not possible."""
offset = len(self.body)
nop_size = len(nop)
# Calculate the gap to the next multiple of alignment.
gap = -offset % alignment
if gap:
if gap % nop_size == 0:
count = gap // nop_size
self.body.extend(nop * count)
else:
raise ValueError(
f"Cannot add nops of size '{nop_size}' to a body with "
f"offset '{offset}' to align with '{alignment}'"
)
def remove_jump(self) -> None:
"""Remove a zero-length continuation jump, if it exists."""
hole = max(self.holes, key=lambda hole: hole.offset)
match hole:
@ -244,7 +261,7 @@ class Stencil:
jump = b"\x00\x00\x00\x14"
case _:
return
if self.body[offset:] == jump and offset % alignment == 0:
if self.body[offset:] == jump:
self.body = self.body[:offset]
self.holes.remove(hole)
@ -266,10 +283,7 @@ class StencilGroup:
_trampolines: set[int] = dataclasses.field(default_factory=set, init=False)
def process_relocations(
self,
known_symbols: dict[str, int],
*,
alignment: int = 1,
self, known_symbols: dict[str, int], *, alignment: int = 1, nop: bytes = b""
) -> None:
"""Fix up all GOT and internal relocations for this stencil group."""
for hole in self.code.holes.copy():
@ -289,8 +303,8 @@ class StencilGroup:
self._trampolines.add(ordinal)
hole.addend = ordinal
hole.symbol = None
self.code.remove_jump(alignment=alignment)
self.code.pad(alignment)
self.code.remove_jump()
self.code.add_nops(nop=nop, alignment=alignment)
self.data.pad(8)
for stencil in [self.code, self.data]:
for hole in stencil.holes:

View file

@ -44,6 +44,15 @@ class _Target(typing.Generic[_S, _R]):
verbose: bool = False
known_symbols: dict[str, int] = dataclasses.field(default_factory=dict)
def _get_nop(self) -> bytes:
if re.fullmatch(r"aarch64-.*", self.triple):
nop = b"\x1f\x20\x03\xD5"
elif re.fullmatch(r"x86_64-.*|i686.*", self.triple):
nop = b"\x90"
else:
raise ValueError(f"NOP not defined for {self.triple}")
return nop
def _compute_digest(self, out: pathlib.Path) -> str:
hasher = hashlib.sha256()
hasher.update(self.triple.encode())
@ -172,7 +181,9 @@ class _Target(typing.Generic[_S, _R]):
stencil_groups = {task.get_name(): task.result() for task in tasks}
for stencil_group in stencil_groups.values():
stencil_group.process_relocations(
known_symbols=self.known_symbols, alignment=self.alignment
known_symbols=self.known_symbols,
alignment=self.alignment,
nop=self._get_nop(),
)
return stencil_groups