mirror of
https://github.com/python/cpython.git
synced 2025-08-30 21:48:47 +00:00
GH-130887: Always remove trailing jumps in AArch64 JIT stencils (GH-131042)
This commit is contained in:
parent
0a91456ad1
commit
ea0453ee97
3 changed files with 35 additions and 9 deletions
|
@ -0,0 +1 @@
|
|||
Optimize the AArch64 code generation for the JIT. Patch by Diego Russo
|
|
@ -209,7 +209,24 @@ class Stencil:
|
|||
self.disassembly.append(f"{offset:x}: {' '.join(['00'] * padding)}")
|
||||
self.body.extend([0] * padding)
|
||||
|
||||
def remove_jump(self, *, alignment: int = 1) -> None:
|
||||
def add_nops(self, nop: bytes, alignment: int) -> None:
|
||||
"""Add NOPs until there is alignment. Fail if it is not possible."""
|
||||
offset = len(self.body)
|
||||
nop_size = len(nop)
|
||||
|
||||
# Calculate the gap to the next multiple of alignment.
|
||||
gap = -offset % alignment
|
||||
if gap:
|
||||
if gap % nop_size == 0:
|
||||
count = gap // nop_size
|
||||
self.body.extend(nop * count)
|
||||
else:
|
||||
raise ValueError(
|
||||
f"Cannot add nops of size '{nop_size}' to a body with "
|
||||
f"offset '{offset}' to align with '{alignment}'"
|
||||
)
|
||||
|
||||
def remove_jump(self) -> None:
|
||||
"""Remove a zero-length continuation jump, if it exists."""
|
||||
hole = max(self.holes, key=lambda hole: hole.offset)
|
||||
match hole:
|
||||
|
@ -244,7 +261,7 @@ class Stencil:
|
|||
jump = b"\x00\x00\x00\x14"
|
||||
case _:
|
||||
return
|
||||
if self.body[offset:] == jump and offset % alignment == 0:
|
||||
if self.body[offset:] == jump:
|
||||
self.body = self.body[:offset]
|
||||
self.holes.remove(hole)
|
||||
|
||||
|
@ -266,10 +283,7 @@ class StencilGroup:
|
|||
_trampolines: set[int] = dataclasses.field(default_factory=set, init=False)
|
||||
|
||||
def process_relocations(
|
||||
self,
|
||||
known_symbols: dict[str, int],
|
||||
*,
|
||||
alignment: int = 1,
|
||||
self, known_symbols: dict[str, int], *, alignment: int = 1, nop: bytes = b""
|
||||
) -> None:
|
||||
"""Fix up all GOT and internal relocations for this stencil group."""
|
||||
for hole in self.code.holes.copy():
|
||||
|
@ -289,8 +303,8 @@ class StencilGroup:
|
|||
self._trampolines.add(ordinal)
|
||||
hole.addend = ordinal
|
||||
hole.symbol = None
|
||||
self.code.remove_jump(alignment=alignment)
|
||||
self.code.pad(alignment)
|
||||
self.code.remove_jump()
|
||||
self.code.add_nops(nop=nop, alignment=alignment)
|
||||
self.data.pad(8)
|
||||
for stencil in [self.code, self.data]:
|
||||
for hole in stencil.holes:
|
||||
|
|
|
@ -44,6 +44,15 @@ class _Target(typing.Generic[_S, _R]):
|
|||
verbose: bool = False
|
||||
known_symbols: dict[str, int] = dataclasses.field(default_factory=dict)
|
||||
|
||||
def _get_nop(self) -> bytes:
|
||||
if re.fullmatch(r"aarch64-.*", self.triple):
|
||||
nop = b"\x1f\x20\x03\xD5"
|
||||
elif re.fullmatch(r"x86_64-.*|i686.*", self.triple):
|
||||
nop = b"\x90"
|
||||
else:
|
||||
raise ValueError(f"NOP not defined for {self.triple}")
|
||||
return nop
|
||||
|
||||
def _compute_digest(self, out: pathlib.Path) -> str:
|
||||
hasher = hashlib.sha256()
|
||||
hasher.update(self.triple.encode())
|
||||
|
@ -172,7 +181,9 @@ class _Target(typing.Generic[_S, _R]):
|
|||
stencil_groups = {task.get_name(): task.result() for task in tasks}
|
||||
for stencil_group in stencil_groups.values():
|
||||
stencil_group.process_relocations(
|
||||
known_symbols=self.known_symbols, alignment=self.alignment
|
||||
known_symbols=self.known_symbols,
|
||||
alignment=self.alignment,
|
||||
nop=self._get_nop(),
|
||||
)
|
||||
return stencil_groups
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue