mirror of
				https://github.com/python/cpython.git
				synced 2025-10-20 13:43:01 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			384 lines
		
	
	
	
		
			15 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			384 lines
		
	
	
	
		
			15 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| """Core data structures for compiled code templates."""
 | |
| 
 | |
| import dataclasses
 | |
| import enum
 | |
| import sys
 | |
| import typing
 | |
| 
 | |
| import _schema
 | |
| 
 | |
| 
 | |
| @enum.unique
 | |
| class HoleValue(enum.Enum):
 | |
|     """
 | |
|     Different "base" values that can be patched into holes (usually combined with the
 | |
|     address of a symbol and/or an addend).
 | |
|     """
 | |
| 
 | |
|     # The base address of the machine code for the current uop (exposed as _JIT_ENTRY):
 | |
|     CODE = enum.auto()
 | |
|     # The base address of the machine code for the next uop (exposed as _JIT_CONTINUE):
 | |
|     CONTINUE = enum.auto()
 | |
|     # The base address of the read-only data for this uop:
 | |
|     DATA = enum.auto()
 | |
|     # The address of the current executor (exposed as _JIT_EXECUTOR):
 | |
|     EXECUTOR = enum.auto()
 | |
|     # The base address of the "global" offset table located in the read-only data.
 | |
|     # Shouldn't be present in the final stencils, since these are all replaced with
 | |
|     # equivalent DATA values:
 | |
|     GOT = enum.auto()
 | |
|     # The current uop's oparg (exposed as _JIT_OPARG):
 | |
|     OPARG = enum.auto()
 | |
|     # The current uop's operand on 64-bit platforms (exposed as _JIT_OPERAND):
 | |
|     OPERAND = enum.auto()
 | |
|     # The current uop's operand on 32-bit platforms (exposed as _JIT_OPERAND_HI/LO):
 | |
|     OPERAND_HI = enum.auto()
 | |
|     OPERAND_LO = enum.auto()
 | |
|     # The current uop's target (exposed as _JIT_TARGET):
 | |
|     TARGET = enum.auto()
 | |
|     # The base address of the machine code for the jump target (exposed as _JIT_JUMP_TARGET):
 | |
|     JUMP_TARGET = enum.auto()
 | |
|     # The base address of the machine code for the error jump target (exposed as _JIT_ERROR_TARGET):
 | |
|     ERROR_TARGET = enum.auto()
 | |
|     # The index of the exit to be jumped through (exposed as _JIT_EXIT_INDEX):
 | |
|     EXIT_INDEX = enum.auto()
 | |
|     # The base address of the machine code for the first uop (exposed as _JIT_TOP):
 | |
|     TOP = enum.auto()
 | |
|     # A hardcoded value of zero (used for symbol lookups):
 | |
|     ZERO = enum.auto()
 | |
| 
 | |
| 
 | |
| # Map relocation types to our JIT's patch functions. "r" suffixes indicate that
 | |
| # the patch function is relative. "x" suffixes indicate that they are "relaxing"
 | |
| # (see comments in jit.c for more info):
 | |
| _PATCH_FUNCS = {
 | |
|     # aarch64-apple-darwin:
 | |
|     "ARM64_RELOC_BRANCH26": "patch_aarch64_26r",
 | |
|     "ARM64_RELOC_GOT_LOAD_PAGE21": "patch_aarch64_21rx",
 | |
|     "ARM64_RELOC_GOT_LOAD_PAGEOFF12": "patch_aarch64_12x",
 | |
|     "ARM64_RELOC_PAGE21": "patch_aarch64_21r",
 | |
|     "ARM64_RELOC_PAGEOFF12": "patch_aarch64_12",
 | |
|     "ARM64_RELOC_UNSIGNED": "patch_64",
 | |
|     # x86_64-pc-windows-msvc:
 | |
|     "IMAGE_REL_AMD64_REL32": "patch_x86_64_32rx",
 | |
|     # aarch64-pc-windows-msvc:
 | |
|     "IMAGE_REL_ARM64_BRANCH26": "patch_aarch64_26r",
 | |
|     "IMAGE_REL_ARM64_PAGEBASE_REL21": "patch_aarch64_21rx",
 | |
|     "IMAGE_REL_ARM64_PAGEOFFSET_12A": "patch_aarch64_12",
 | |
|     "IMAGE_REL_ARM64_PAGEOFFSET_12L": "patch_aarch64_12x",
 | |
|     # i686-pc-windows-msvc:
 | |
|     "IMAGE_REL_I386_DIR32": "patch_32",
 | |
|     "IMAGE_REL_I386_REL32": "patch_x86_64_32rx",
 | |
|     # aarch64-unknown-linux-gnu:
 | |
|     "R_AARCH64_ABS64": "patch_64",
 | |
|     "R_AARCH64_ADD_ABS_LO12_NC": "patch_aarch64_12",
 | |
|     "R_AARCH64_ADR_GOT_PAGE": "patch_aarch64_21rx",
 | |
|     "R_AARCH64_ADR_PREL_PG_HI21": "patch_aarch64_21r",
 | |
|     "R_AARCH64_CALL26": "patch_aarch64_26r",
 | |
|     "R_AARCH64_JUMP26": "patch_aarch64_26r",
 | |
|     "R_AARCH64_LD64_GOT_LO12_NC": "patch_aarch64_12x",
 | |
|     "R_AARCH64_MOVW_UABS_G0_NC": "patch_aarch64_16a",
 | |
|     "R_AARCH64_MOVW_UABS_G1_NC": "patch_aarch64_16b",
 | |
|     "R_AARCH64_MOVW_UABS_G2_NC": "patch_aarch64_16c",
 | |
|     "R_AARCH64_MOVW_UABS_G3": "patch_aarch64_16d",
 | |
|     # x86_64-unknown-linux-gnu:
 | |
|     "R_X86_64_64": "patch_64",
 | |
|     "R_X86_64_GOTPCREL": "patch_32r",
 | |
|     "R_X86_64_GOTPCRELX": "patch_x86_64_32rx",
 | |
|     "R_X86_64_PC32": "patch_32r",
 | |
|     "R_X86_64_REX_GOTPCRELX": "patch_x86_64_32rx",
 | |
|     # x86_64-apple-darwin:
 | |
|     "X86_64_RELOC_BRANCH": "patch_32r",
 | |
|     "X86_64_RELOC_GOT": "patch_x86_64_32rx",
 | |
|     "X86_64_RELOC_GOT_LOAD": "patch_x86_64_32rx",
 | |
|     "X86_64_RELOC_SIGNED": "patch_32r",
 | |
|     "X86_64_RELOC_UNSIGNED": "patch_64",
 | |
| }
 | |
| # Translate HoleValues to C expressions:
 | |
| _HOLE_EXPRS = {
 | |
|     HoleValue.CODE: "(uintptr_t)code",
 | |
|     HoleValue.CONTINUE: "(uintptr_t)code + sizeof(code_body)",
 | |
|     HoleValue.DATA: "(uintptr_t)data",
 | |
|     HoleValue.EXECUTOR: "(uintptr_t)executor",
 | |
|     # These should all have been turned into DATA values by process_relocations:
 | |
|     # HoleValue.GOT: "",
 | |
|     HoleValue.OPARG: "instruction->oparg",
 | |
|     HoleValue.OPERAND: "instruction->operand",
 | |
|     HoleValue.OPERAND_HI: "(instruction->operand >> 32)",
 | |
|     HoleValue.OPERAND_LO: "(instruction->operand & UINT32_MAX)",
 | |
|     HoleValue.TARGET: "instruction->target",
 | |
|     HoleValue.JUMP_TARGET: "instruction_starts[instruction->jump_target]",
 | |
|     HoleValue.ERROR_TARGET: "instruction_starts[instruction->error_target]",
 | |
|     HoleValue.EXIT_INDEX: "instruction->exit_index",
 | |
|     HoleValue.TOP: "instruction_starts[1]",
 | |
|     HoleValue.ZERO: "",
 | |
| }
 | |
| 
 | |
| 
 | |
| @dataclasses.dataclass
 | |
| class Hole:
 | |
|     """
 | |
|     A "hole" in the stencil to be patched with a computed runtime value.
 | |
| 
 | |
|     Analogous to relocation records in an object file.
 | |
|     """
 | |
| 
 | |
|     offset: int
 | |
|     kind: _schema.HoleKind
 | |
|     # Patch with this base value:
 | |
|     value: HoleValue
 | |
|     # ...plus the address of this symbol:
 | |
|     symbol: str | None
 | |
|     # ...plus this addend:
 | |
|     addend: int
 | |
|     func: str = dataclasses.field(init=False)
 | |
|     # Convenience method:
 | |
|     replace = dataclasses.replace
 | |
| 
 | |
|     def __post_init__(self) -> None:
 | |
|         self.func = _PATCH_FUNCS[self.kind]
 | |
| 
 | |
|     def fold(self, other: typing.Self) -> typing.Self | None:
 | |
|         """Combine two holes into a single hole, if possible."""
 | |
|         if (
 | |
|             self.offset + 4 == other.offset
 | |
|             and self.value == other.value
 | |
|             and self.symbol == other.symbol
 | |
|             and self.addend == other.addend
 | |
|             and self.func == "patch_aarch64_21rx"
 | |
|             and other.func == "patch_aarch64_12x"
 | |
|         ):
 | |
|             # These can *only* be properly relaxed when they appear together and
 | |
|             # patch the same value:
 | |
|             folded = self.replace()
 | |
|             folded.func = "patch_aarch64_33rx"
 | |
|             return folded
 | |
|         return None
 | |
| 
 | |
|     def as_c(self, where: str) -> str:
 | |
|         """Dump this hole as a call to a patch_* function."""
 | |
|         location = f"{where} + {self.offset:#x}"
 | |
|         value = _HOLE_EXPRS[self.value]
 | |
|         if self.symbol:
 | |
|             if value:
 | |
|                 value += " + "
 | |
|             value += f"(uintptr_t)&{self.symbol}"
 | |
|         if _signed(self.addend):
 | |
|             if value:
 | |
|                 value += " + "
 | |
|             value += f"{_signed(self.addend):#x}"
 | |
|         return f"{self.func}({location}, {value});"
 | |
| 
 | |
| 
 | |
| @dataclasses.dataclass
 | |
| class Stencil:
 | |
|     """
 | |
|     A contiguous block of machine code or data to be copied-and-patched.
 | |
| 
 | |
|     Analogous to a section or segment in an object file.
 | |
|     """
 | |
| 
 | |
|     body: bytearray = dataclasses.field(default_factory=bytearray, init=False)
 | |
|     holes: list[Hole] = dataclasses.field(default_factory=list, init=False)
 | |
|     disassembly: list[str] = dataclasses.field(default_factory=list, init=False)
 | |
| 
 | |
|     def pad(self, alignment: int) -> None:
 | |
|         """Pad the stencil to the given alignment."""
 | |
|         offset = len(self.body)
 | |
|         padding = -offset % alignment
 | |
|         self.disassembly.append(f"{offset:x}: {' '.join(['00'] * padding)}")
 | |
|         self.body.extend([0] * padding)
 | |
| 
 | |
|     def emit_aarch64_trampoline(self, hole: Hole) -> None:
 | |
|         """Even with the large code model, AArch64 Linux insists on 28-bit jumps."""
 | |
|         base = len(self.body)
 | |
|         where = slice(hole.offset, hole.offset + 4)
 | |
|         instruction = int.from_bytes(self.body[where], sys.byteorder)
 | |
|         instruction &= 0xFC000000
 | |
|         instruction |= ((base - hole.offset) >> 2) & 0x03FFFFFF
 | |
|         self.body[where] = instruction.to_bytes(4, sys.byteorder)
 | |
|         self.disassembly += [
 | |
|             f"{base + 4 * 0:x}: d2800008      mov     x8, #0x0",
 | |
|             f"{base + 4 * 0:016x}:  R_AARCH64_MOVW_UABS_G0_NC    {hole.symbol}",
 | |
|             f"{base + 4 * 1:x}: f2a00008      movk    x8, #0x0, lsl #16",
 | |
|             f"{base + 4 * 1:016x}:  R_AARCH64_MOVW_UABS_G1_NC    {hole.symbol}",
 | |
|             f"{base + 4 * 2:x}: f2c00008      movk    x8, #0x0, lsl #32",
 | |
|             f"{base + 4 * 2:016x}:  R_AARCH64_MOVW_UABS_G2_NC    {hole.symbol}",
 | |
|             f"{base + 4 * 3:x}: f2e00008      movk    x8, #0x0, lsl #48",
 | |
|             f"{base + 4 * 3:016x}:  R_AARCH64_MOVW_UABS_G3       {hole.symbol}",
 | |
|             f"{base + 4 * 4:x}: d61f0100      br      x8",
 | |
|         ]
 | |
|         for code in [
 | |
|             0xD2800008.to_bytes(4, sys.byteorder),
 | |
|             0xF2A00008.to_bytes(4, sys.byteorder),
 | |
|             0xF2C00008.to_bytes(4, sys.byteorder),
 | |
|             0xF2E00008.to_bytes(4, sys.byteorder),
 | |
|             0xD61F0100.to_bytes(4, sys.byteorder),
 | |
|         ]:
 | |
|             self.body.extend(code)
 | |
|         for i, kind in enumerate(
 | |
|             [
 | |
|                 "R_AARCH64_MOVW_UABS_G0_NC",
 | |
|                 "R_AARCH64_MOVW_UABS_G1_NC",
 | |
|                 "R_AARCH64_MOVW_UABS_G2_NC",
 | |
|                 "R_AARCH64_MOVW_UABS_G3",
 | |
|             ]
 | |
|         ):
 | |
|             self.holes.append(hole.replace(offset=base + 4 * i, kind=kind))
 | |
| 
 | |
|     def remove_jump(self, *, alignment: int = 1) -> None:
 | |
|         """Remove a zero-length continuation jump, if it exists."""
 | |
|         hole = max(self.holes, key=lambda hole: hole.offset)
 | |
|         match hole:
 | |
|             case Hole(
 | |
|                 offset=offset,
 | |
|                 kind="IMAGE_REL_AMD64_REL32",
 | |
|                 value=HoleValue.GOT,
 | |
|                 symbol="_JIT_CONTINUE",
 | |
|                 addend=-4,
 | |
|             ) as hole:
 | |
|                 # jmp qword ptr [rip]
 | |
|                 jump = b"\x48\xFF\x25\x00\x00\x00\x00"
 | |
|                 offset -= 3
 | |
|             case Hole(
 | |
|                 offset=offset,
 | |
|                 kind="IMAGE_REL_I386_REL32" | "X86_64_RELOC_BRANCH",
 | |
|                 value=HoleValue.CONTINUE,
 | |
|                 symbol=None,
 | |
|                 addend=-4,
 | |
|             ) as hole:
 | |
|                 # jmp 5
 | |
|                 jump = b"\xE9\x00\x00\x00\x00"
 | |
|                 offset -= 1
 | |
|             case Hole(
 | |
|                 offset=offset,
 | |
|                 kind="R_AARCH64_JUMP26",
 | |
|                 value=HoleValue.CONTINUE,
 | |
|                 symbol=None,
 | |
|                 addend=0,
 | |
|             ) as hole:
 | |
|                 # b #4
 | |
|                 jump = b"\x00\x00\x00\x14"
 | |
|             case Hole(
 | |
|                 offset=offset,
 | |
|                 kind="R_X86_64_GOTPCRELX",
 | |
|                 value=HoleValue.GOT,
 | |
|                 symbol="_JIT_CONTINUE",
 | |
|                 addend=addend,
 | |
|             ) as hole:
 | |
|                 assert _signed(addend) == -4
 | |
|                 # jmp qword ptr [rip]
 | |
|                 jump = b"\xFF\x25\x00\x00\x00\x00"
 | |
|                 offset -= 2
 | |
|             case _:
 | |
|                 return
 | |
|         if self.body[offset:] == jump and offset % alignment == 0:
 | |
|             self.body = self.body[:offset]
 | |
|             self.holes.remove(hole)
 | |
| 
 | |
| 
 | |
| @dataclasses.dataclass
 | |
| class StencilGroup:
 | |
|     """
 | |
|     Code and data corresponding to a given micro-opcode.
 | |
| 
 | |
|     Analogous to an entire object file.
 | |
|     """
 | |
| 
 | |
|     code: Stencil = dataclasses.field(default_factory=Stencil, init=False)
 | |
|     data: Stencil = dataclasses.field(default_factory=Stencil, init=False)
 | |
|     symbols: dict[int | str, tuple[HoleValue, int]] = dataclasses.field(
 | |
|         default_factory=dict, init=False
 | |
|     )
 | |
|     _got: dict[str, int] = dataclasses.field(default_factory=dict, init=False)
 | |
| 
 | |
|     def process_relocations(self, *, alignment: int = 1) -> None:
 | |
|         """Fix up all GOT and internal relocations for this stencil group."""
 | |
|         for hole in self.code.holes.copy():
 | |
|             if (
 | |
|                 hole.kind
 | |
|                 in {"R_AARCH64_CALL26", "R_AARCH64_JUMP26", "ARM64_RELOC_BRANCH26"}
 | |
|                 and hole.value is HoleValue.ZERO
 | |
|             ):
 | |
|                 self.code.pad(alignment)
 | |
|                 self.code.emit_aarch64_trampoline(hole)
 | |
|                 self.code.holes.remove(hole)
 | |
|         self.code.remove_jump(alignment=alignment)
 | |
|         self.code.pad(alignment)
 | |
|         self.data.pad(8)
 | |
|         for stencil in [self.code, self.data]:
 | |
|             for hole in stencil.holes:
 | |
|                 if hole.value is HoleValue.GOT:
 | |
|                     assert hole.symbol is not None
 | |
|                     hole.value = HoleValue.DATA
 | |
|                     hole.addend += self._global_offset_table_lookup(hole.symbol)
 | |
|                     hole.symbol = None
 | |
|                 elif hole.symbol in self.symbols:
 | |
|                     hole.value, addend = self.symbols[hole.symbol]
 | |
|                     hole.addend += addend
 | |
|                     hole.symbol = None
 | |
|                 elif (
 | |
|                     hole.kind in {"IMAGE_REL_AMD64_REL32"}
 | |
|                     and hole.value is HoleValue.ZERO
 | |
|                 ):
 | |
|                     raise ValueError(
 | |
|                         f"Add PyAPI_FUNC(...) or PyAPI_DATA(...) to declaration of {hole.symbol}!"
 | |
|                     )
 | |
|         self._emit_global_offset_table()
 | |
|         self.code.holes.sort(key=lambda hole: hole.offset)
 | |
|         self.data.holes.sort(key=lambda hole: hole.offset)
 | |
| 
 | |
|     def _global_offset_table_lookup(self, symbol: str) -> int:
 | |
|         return len(self.data.body) + self._got.setdefault(symbol, 8 * len(self._got))
 | |
| 
 | |
|     def _emit_global_offset_table(self) -> None:
 | |
|         got = len(self.data.body)
 | |
|         for s, offset in self._got.items():
 | |
|             if s in self.symbols:
 | |
|                 value, addend = self.symbols[s]
 | |
|                 symbol = None
 | |
|             else:
 | |
|                 value, symbol = symbol_to_value(s)
 | |
|                 addend = 0
 | |
|             self.data.holes.append(
 | |
|                 Hole(got + offset, "R_X86_64_64", value, symbol, addend)
 | |
|             )
 | |
|             value_part = value.name if value is not HoleValue.ZERO else ""
 | |
|             if value_part and not symbol and not addend:
 | |
|                 addend_part = ""
 | |
|             else:
 | |
|                 signed = "+" if symbol is not None else ""
 | |
|                 addend_part = f"&{symbol}" if symbol else ""
 | |
|                 addend_part += f"{_signed(addend):{signed}#x}"
 | |
|                 if value_part:
 | |
|                     value_part += "+"
 | |
|             self.data.disassembly.append(
 | |
|                 f"{len(self.data.body):x}: {value_part}{addend_part}"
 | |
|             )
 | |
|             self.data.body.extend([0] * 8)
 | |
| 
 | |
|     def as_c(self, opname: str) -> str:
 | |
|         """Dump this hole as a StencilGroup initializer."""
 | |
|         return f"{{emit_{opname}, {len(self.code.body)}, {len(self.data.body)}}}"
 | |
| 
 | |
| 
 | |
| def symbol_to_value(symbol: str) -> tuple[HoleValue, str | None]:
 | |
|     """
 | |
|     Convert a symbol name to a HoleValue and a symbol name.
 | |
| 
 | |
|     Some symbols (starting with "_JIT_") are special and are converted to their
 | |
|     own HoleValues.
 | |
|     """
 | |
|     if symbol.startswith("_JIT_"):
 | |
|         try:
 | |
|             return HoleValue[symbol.removeprefix("_JIT_")], None
 | |
|         except KeyError:
 | |
|             pass
 | |
|     return HoleValue.ZERO, symbol
 | |
| 
 | |
| 
 | |
| def _signed(value: int) -> int:
 | |
|     value %= 1 << 64
 | |
|     if value & (1 << 63):
 | |
|         value -= 1 << 64
 | |
|     return value
 | 
