mirror of
https://github.com/python/cpython.git
synced 2025-08-02 16:13:13 +00:00
GH-135904: Improve the JIT's performance on macOS (GH-136528)
This commit is contained in:
parent
a68ddea3bf
commit
3d8c38f6db
6 changed files with 73 additions and 61 deletions
11
Python/jit.c
11
Python/jit.c
|
@ -431,8 +431,10 @@ void patch_aarch64_trampoline(unsigned char *location, int ordinal, jit_state *s
|
||||||
|
|
||||||
#if defined(__aarch64__) || defined(_M_ARM64)
|
#if defined(__aarch64__) || defined(_M_ARM64)
|
||||||
#define TRAMPOLINE_SIZE 16
|
#define TRAMPOLINE_SIZE 16
|
||||||
|
#define DATA_ALIGN 8
|
||||||
#else
|
#else
|
||||||
#define TRAMPOLINE_SIZE 0
|
#define TRAMPOLINE_SIZE 0
|
||||||
|
#define DATA_ALIGN 1
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// Generate and patch AArch64 trampolines. The symbols to jump to are stored
|
// Generate and patch AArch64 trampolines. The symbols to jump to are stored
|
||||||
|
@ -522,8 +524,9 @@ _PyJIT_Compile(_PyExecutorObject *executor, const _PyUOpInstruction trace[], siz
|
||||||
// Round up to the nearest page:
|
// Round up to the nearest page:
|
||||||
size_t page_size = get_page_size();
|
size_t page_size = get_page_size();
|
||||||
assert((page_size & (page_size - 1)) == 0);
|
assert((page_size & (page_size - 1)) == 0);
|
||||||
size_t padding = page_size - ((code_size + state.trampolines.size + data_size) & (page_size - 1));
|
size_t code_padding = DATA_ALIGN - ((code_size + state.trampolines.size) & (DATA_ALIGN - 1));
|
||||||
size_t total_size = code_size + state.trampolines.size + data_size + padding;
|
size_t padding = page_size - ((code_size + state.trampolines.size + code_padding + data_size) & (page_size - 1));
|
||||||
|
size_t total_size = code_size + state.trampolines.size + code_padding + data_size + padding;
|
||||||
unsigned char *memory = jit_alloc(total_size);
|
unsigned char *memory = jit_alloc(total_size);
|
||||||
if (memory == NULL) {
|
if (memory == NULL) {
|
||||||
return -1;
|
return -1;
|
||||||
|
@ -545,7 +548,7 @@ _PyJIT_Compile(_PyExecutorObject *executor, const _PyUOpInstruction trace[], siz
|
||||||
// Loop again to emit the code:
|
// Loop again to emit the code:
|
||||||
unsigned char *code = memory;
|
unsigned char *code = memory;
|
||||||
state.trampolines.mem = memory + code_size;
|
state.trampolines.mem = memory + code_size;
|
||||||
unsigned char *data = memory + code_size + state.trampolines.size;
|
unsigned char *data = memory + code_size + state.trampolines.size + code_padding;
|
||||||
// Compile the shim, which handles converting between the native
|
// Compile the shim, which handles converting between the native
|
||||||
// calling convention and the calling convention used by jitted code
|
// calling convention and the calling convention used by jitted code
|
||||||
// (which may be different for efficiency reasons).
|
// (which may be different for efficiency reasons).
|
||||||
|
@ -567,7 +570,7 @@ _PyJIT_Compile(_PyExecutorObject *executor, const _PyUOpInstruction trace[], siz
|
||||||
code += group->code_size;
|
code += group->code_size;
|
||||||
data += group->data_size;
|
data += group->data_size;
|
||||||
assert(code == memory + code_size);
|
assert(code == memory + code_size);
|
||||||
assert(data == memory + code_size + state.trampolines.size + data_size);
|
assert(data == memory + code_size + state.trampolines.size + code_padding + data_size);
|
||||||
#ifdef MAP_JIT
|
#ifdef MAP_JIT
|
||||||
pthread_jit_write_protect_np(1);
|
pthread_jit_write_protect_np(1);
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -70,21 +70,21 @@ class Optimizer:
|
||||||
|
|
||||||
path: pathlib.Path
|
path: pathlib.Path
|
||||||
_: dataclasses.KW_ONLY
|
_: dataclasses.KW_ONLY
|
||||||
# prefix used to mangle symbols on some platforms:
|
# Prefixes used to mangle local labels and symbols:
|
||||||
prefix: str = ""
|
label_prefix: str
|
||||||
|
symbol_prefix: str
|
||||||
# The first block in the linked list:
|
# The first block in the linked list:
|
||||||
_root: _Block = dataclasses.field(init=False, default_factory=_Block)
|
_root: _Block = dataclasses.field(init=False, default_factory=_Block)
|
||||||
_labels: dict[str, _Block] = dataclasses.field(init=False, default_factory=dict)
|
_labels: dict[str, _Block] = dataclasses.field(init=False, default_factory=dict)
|
||||||
# No groups:
|
# No groups:
|
||||||
_re_noninstructions: typing.ClassVar[re.Pattern[str]] = re.compile(
|
_re_noninstructions: typing.ClassVar[re.Pattern[str]] = re.compile(
|
||||||
r"\s*(?:\.|#|//|$)"
|
r"\s*(?:\.|#|//|;|$)"
|
||||||
)
|
)
|
||||||
# One group (label):
|
# One group (label):
|
||||||
_re_label: typing.ClassVar[re.Pattern[str]] = re.compile(
|
_re_label: typing.ClassVar[re.Pattern[str]] = re.compile(
|
||||||
r'\s*(?P<label>[\w."$?@]+):'
|
r'\s*(?P<label>[\w."$?@]+):'
|
||||||
)
|
)
|
||||||
# Override everything that follows in subclasses:
|
# Override everything that follows in subclasses:
|
||||||
_alignment: typing.ClassVar[int] = 1
|
|
||||||
_branches: typing.ClassVar[dict[str, str | None]] = {}
|
_branches: typing.ClassVar[dict[str, str | None]] = {}
|
||||||
# Two groups (instruction and target):
|
# Two groups (instruction and target):
|
||||||
_re_branch: typing.ClassVar[re.Pattern[str]] = _RE_NEVER_MATCH
|
_re_branch: typing.ClassVar[re.Pattern[str]] = _RE_NEVER_MATCH
|
||||||
|
@ -131,8 +131,12 @@ class Optimizer:
|
||||||
block.fallthrough = False
|
block.fallthrough = False
|
||||||
|
|
||||||
def _preprocess(self, text: str) -> str:
|
def _preprocess(self, text: str) -> str:
|
||||||
# Override this method to do preprocessing of the textual assembly:
|
# Override this method to do preprocessing of the textual assembly.
|
||||||
return text
|
# In all cases, replace references to the _JIT_CONTINUE symbol with
|
||||||
|
# references to a local _JIT_CONTINUE label (which we will add later):
|
||||||
|
continue_symbol = rf"\b{re.escape(self.symbol_prefix)}_JIT_CONTINUE\b"
|
||||||
|
continue_label = f"{self.label_prefix}_JIT_CONTINUE"
|
||||||
|
return re.sub(continue_symbol, continue_label, text)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def _invert_branch(cls, line: str, target: str) -> str | None:
|
def _invert_branch(cls, line: str, target: str) -> str | None:
|
||||||
|
@ -197,15 +201,12 @@ class Optimizer:
|
||||||
# jmp FOO
|
# jmp FOO
|
||||||
# After:
|
# After:
|
||||||
# jmp FOO
|
# jmp FOO
|
||||||
# .balign 8
|
|
||||||
# _JIT_CONTINUE:
|
# _JIT_CONTINUE:
|
||||||
# This lets the assembler encode _JIT_CONTINUE jumps at build time!
|
# This lets the assembler encode _JIT_CONTINUE jumps at build time!
|
||||||
align = _Block()
|
continuation = self._lookup_label(f"{self.label_prefix}_JIT_CONTINUE")
|
||||||
align.noninstructions.append(f"\t.balign\t{self._alignment}")
|
|
||||||
continuation = self._lookup_label(f"{self.prefix}_JIT_CONTINUE")
|
|
||||||
assert continuation.label
|
assert continuation.label
|
||||||
continuation.noninstructions.append(f"{continuation.label}:")
|
continuation.noninstructions.append(f"{continuation.label}:")
|
||||||
end.link, align.link, continuation.link = align, continuation, end.link
|
end.link, continuation.link = continuation, end.link
|
||||||
|
|
||||||
def _mark_hot_blocks(self) -> None:
|
def _mark_hot_blocks(self) -> None:
|
||||||
# Start with the last block, and perform a DFS to find all blocks that
|
# Start with the last block, and perform a DFS to find all blocks that
|
||||||
|
@ -285,8 +286,6 @@ class Optimizer:
|
||||||
class OptimizerAArch64(Optimizer): # pylint: disable = too-few-public-methods
|
class OptimizerAArch64(Optimizer): # pylint: disable = too-few-public-methods
|
||||||
"""aarch64-apple-darwin/aarch64-pc-windows-msvc/aarch64-unknown-linux-gnu"""
|
"""aarch64-apple-darwin/aarch64-pc-windows-msvc/aarch64-unknown-linux-gnu"""
|
||||||
|
|
||||||
# TODO: @diegorusso
|
|
||||||
_alignment = 8
|
|
||||||
# https://developer.arm.com/documentation/ddi0602/2025-03/Base-Instructions/B--Branch-
|
# https://developer.arm.com/documentation/ddi0602/2025-03/Base-Instructions/B--Branch-
|
||||||
_re_jump = re.compile(r"\s*b\s+(?P<target>[\w.]+)")
|
_re_jump = re.compile(r"\s*b\s+(?P<target>[\w.]+)")
|
||||||
|
|
||||||
|
@ -302,18 +301,3 @@ class OptimizerX86(Optimizer): # pylint: disable = too-few-public-methods
|
||||||
_re_jump = re.compile(r"\s*jmp\s+(?P<target>[\w.]+)")
|
_re_jump = re.compile(r"\s*jmp\s+(?P<target>[\w.]+)")
|
||||||
# https://www.felixcloutier.com/x86/ret
|
# https://www.felixcloutier.com/x86/ret
|
||||||
_re_return = re.compile(r"\s*ret\b")
|
_re_return = re.compile(r"\s*ret\b")
|
||||||
|
|
||||||
|
|
||||||
class OptimizerX8664Windows(OptimizerX86): # pylint: disable = too-few-public-methods
|
|
||||||
"""x86_64-pc-windows-msvc"""
|
|
||||||
|
|
||||||
def _preprocess(self, text: str) -> str:
|
|
||||||
text = super()._preprocess(text)
|
|
||||||
# Before:
|
|
||||||
# rex64 jmpq *__imp__JIT_CONTINUE(%rip)
|
|
||||||
# After:
|
|
||||||
# jmp _JIT_CONTINUE
|
|
||||||
far_indirect_jump = (
|
|
||||||
rf"rex64\s+jmpq\s+\*__imp_(?P<target>{self.prefix}_JIT_\w+)\(%rip\)"
|
|
||||||
)
|
|
||||||
return re.sub(far_indirect_jump, r"jmp\t\g<target>", text)
|
|
||||||
|
|
|
@ -44,7 +44,8 @@ class _Target(typing.Generic[_S, _R]):
|
||||||
_: dataclasses.KW_ONLY
|
_: dataclasses.KW_ONLY
|
||||||
args: typing.Sequence[str] = ()
|
args: typing.Sequence[str] = ()
|
||||||
optimizer: type[_optimizers.Optimizer] = _optimizers.Optimizer
|
optimizer: type[_optimizers.Optimizer] = _optimizers.Optimizer
|
||||||
prefix: str = ""
|
label_prefix: typing.ClassVar[str]
|
||||||
|
symbol_prefix: typing.ClassVar[str]
|
||||||
stable: bool = False
|
stable: bool = False
|
||||||
debug: bool = False
|
debug: bool = False
|
||||||
verbose: bool = False
|
verbose: bool = False
|
||||||
|
@ -172,7 +173,9 @@ class _Target(typing.Generic[_S, _R]):
|
||||||
*shlex.split(self.cflags),
|
*shlex.split(self.cflags),
|
||||||
]
|
]
|
||||||
await _llvm.run("clang", args_s, echo=self.verbose)
|
await _llvm.run("clang", args_s, echo=self.verbose)
|
||||||
self.optimizer(s, prefix=self.prefix).run()
|
self.optimizer(
|
||||||
|
s, label_prefix=self.label_prefix, symbol_prefix=self.symbol_prefix
|
||||||
|
).run()
|
||||||
args_o = [f"--target={self.triple}", "-c", "-o", f"{o}", f"{s}"]
|
args_o = [f"--target={self.triple}", "-c", "-o", f"{o}", f"{s}"]
|
||||||
await _llvm.run("clang", args_o, echo=self.verbose)
|
await _llvm.run("clang", args_o, echo=self.verbose)
|
||||||
return await self._parse(o)
|
return await self._parse(o)
|
||||||
|
@ -274,7 +277,7 @@ class _COFF(
|
||||||
symbol = wrapped_symbol["Symbol"]
|
symbol = wrapped_symbol["Symbol"]
|
||||||
offset = base + symbol["Value"]
|
offset = base + symbol["Value"]
|
||||||
name = symbol["Name"]
|
name = symbol["Name"]
|
||||||
name = name.removeprefix(self.prefix)
|
name = name.removeprefix(self.symbol_prefix)
|
||||||
if name not in group.symbols:
|
if name not in group.symbols:
|
||||||
group.symbols[name] = value, offset
|
group.symbols[name] = value, offset
|
||||||
for wrapped_relocation in section["Relocations"]:
|
for wrapped_relocation in section["Relocations"]:
|
||||||
|
@ -285,9 +288,9 @@ class _COFF(
|
||||||
def _unwrap_dllimport(self, name: str) -> tuple[_stencils.HoleValue, str | None]:
|
def _unwrap_dllimport(self, name: str) -> tuple[_stencils.HoleValue, str | None]:
|
||||||
if name.startswith("__imp_"):
|
if name.startswith("__imp_"):
|
||||||
name = name.removeprefix("__imp_")
|
name = name.removeprefix("__imp_")
|
||||||
name = name.removeprefix(self.prefix)
|
name = name.removeprefix(self.symbol_prefix)
|
||||||
return _stencils.HoleValue.GOT, name
|
return _stencils.HoleValue.GOT, name
|
||||||
name = name.removeprefix(self.prefix)
|
name = name.removeprefix(self.symbol_prefix)
|
||||||
return _stencils.symbol_to_value(name)
|
return _stencils.symbol_to_value(name)
|
||||||
|
|
||||||
def _handle_relocation(
|
def _handle_relocation(
|
||||||
|
@ -335,9 +338,24 @@ class _COFF(
|
||||||
return _stencils.Hole(offset, kind, value, symbol, addend)
|
return _stencils.Hole(offset, kind, value, symbol, addend)
|
||||||
|
|
||||||
|
|
||||||
|
class _COFF32(_COFF):
|
||||||
|
# These mangle like Mach-O and other "older" formats:
|
||||||
|
label_prefix = "L"
|
||||||
|
symbol_prefix = "_"
|
||||||
|
|
||||||
|
|
||||||
|
class _COFF64(_COFF):
|
||||||
|
# These mangle like ELF and other "newer" formats:
|
||||||
|
label_prefix = ".L"
|
||||||
|
symbol_prefix = ""
|
||||||
|
|
||||||
|
|
||||||
class _ELF(
|
class _ELF(
|
||||||
_Target[_schema.ELFSection, _schema.ELFRelocation]
|
_Target[_schema.ELFSection, _schema.ELFRelocation]
|
||||||
): # pylint: disable = too-few-public-methods
|
): # pylint: disable = too-few-public-methods
|
||||||
|
label_prefix = ".L"
|
||||||
|
symbol_prefix = ""
|
||||||
|
|
||||||
def _handle_section(
|
def _handle_section(
|
||||||
self, section: _schema.ELFSection, group: _stencils.StencilGroup
|
self, section: _schema.ELFSection, group: _stencils.StencilGroup
|
||||||
) -> None:
|
) -> None:
|
||||||
|
@ -374,7 +392,7 @@ class _ELF(
|
||||||
symbol = wrapped_symbol["Symbol"]
|
symbol = wrapped_symbol["Symbol"]
|
||||||
offset = len(stencil.body) + symbol["Value"]
|
offset = len(stencil.body) + symbol["Value"]
|
||||||
name = symbol["Name"]["Name"]
|
name = symbol["Name"]["Name"]
|
||||||
name = name.removeprefix(self.prefix)
|
name = name.removeprefix(self.symbol_prefix)
|
||||||
group.symbols[name] = value, offset
|
group.symbols[name] = value, offset
|
||||||
stencil.body.extend(section["SectionData"]["Bytes"])
|
stencil.body.extend(section["SectionData"]["Bytes"])
|
||||||
assert not section["Relocations"]
|
assert not section["Relocations"]
|
||||||
|
@ -409,7 +427,7 @@ class _ELF(
|
||||||
},
|
},
|
||||||
}:
|
}:
|
||||||
offset += base
|
offset += base
|
||||||
s = s.removeprefix(self.prefix)
|
s = s.removeprefix(self.symbol_prefix)
|
||||||
value, symbol = _stencils.HoleValue.GOT, s
|
value, symbol = _stencils.HoleValue.GOT, s
|
||||||
case {
|
case {
|
||||||
"Addend": addend,
|
"Addend": addend,
|
||||||
|
@ -418,7 +436,7 @@ class _ELF(
|
||||||
"Type": {"Name": kind},
|
"Type": {"Name": kind},
|
||||||
}:
|
}:
|
||||||
offset += base
|
offset += base
|
||||||
s = s.removeprefix(self.prefix)
|
s = s.removeprefix(self.symbol_prefix)
|
||||||
value, symbol = _stencils.symbol_to_value(s)
|
value, symbol = _stencils.symbol_to_value(s)
|
||||||
case _:
|
case _:
|
||||||
raise NotImplementedError(relocation)
|
raise NotImplementedError(relocation)
|
||||||
|
@ -428,6 +446,9 @@ class _ELF(
|
||||||
class _MachO(
|
class _MachO(
|
||||||
_Target[_schema.MachOSection, _schema.MachORelocation]
|
_Target[_schema.MachOSection, _schema.MachORelocation]
|
||||||
): # pylint: disable = too-few-public-methods
|
): # pylint: disable = too-few-public-methods
|
||||||
|
label_prefix = "L"
|
||||||
|
symbol_prefix = "_"
|
||||||
|
|
||||||
def _handle_section(
|
def _handle_section(
|
||||||
self, section: _schema.MachOSection, group: _stencils.StencilGroup
|
self, section: _schema.MachOSection, group: _stencils.StencilGroup
|
||||||
) -> None:
|
) -> None:
|
||||||
|
@ -435,10 +456,10 @@ class _MachO(
|
||||||
assert "SectionData" in section
|
assert "SectionData" in section
|
||||||
flags = {flag["Name"] for flag in section["Attributes"]["Flags"]}
|
flags = {flag["Name"] for flag in section["Attributes"]["Flags"]}
|
||||||
name = section["Name"]["Value"]
|
name = section["Name"]["Value"]
|
||||||
name = name.removeprefix(self.prefix)
|
name = name.removeprefix(self.symbol_prefix)
|
||||||
if "Debug" in flags:
|
if "Debug" in flags:
|
||||||
return
|
return
|
||||||
if "SomeInstructions" in flags:
|
if "PureInstructions" in flags:
|
||||||
value = _stencils.HoleValue.CODE
|
value = _stencils.HoleValue.CODE
|
||||||
stencil = group.code
|
stencil = group.code
|
||||||
start_address = 0
|
start_address = 0
|
||||||
|
@ -459,7 +480,7 @@ class _MachO(
|
||||||
symbol = wrapped_symbol["Symbol"]
|
symbol = wrapped_symbol["Symbol"]
|
||||||
offset = symbol["Value"] - start_address
|
offset = symbol["Value"] - start_address
|
||||||
name = symbol["Name"]["Name"]
|
name = symbol["Name"]["Name"]
|
||||||
name = name.removeprefix(self.prefix)
|
name = name.removeprefix(self.symbol_prefix)
|
||||||
group.symbols[name] = value, offset
|
group.symbols[name] = value, offset
|
||||||
assert "Relocations" in section
|
assert "Relocations" in section
|
||||||
for wrapped_relocation in section["Relocations"]:
|
for wrapped_relocation in section["Relocations"]:
|
||||||
|
@ -484,7 +505,7 @@ class _MachO(
|
||||||
},
|
},
|
||||||
}:
|
}:
|
||||||
offset += base
|
offset += base
|
||||||
s = s.removeprefix(self.prefix)
|
s = s.removeprefix(self.symbol_prefix)
|
||||||
value, symbol = _stencils.HoleValue.GOT, s
|
value, symbol = _stencils.HoleValue.GOT, s
|
||||||
addend = 0
|
addend = 0
|
||||||
case {
|
case {
|
||||||
|
@ -493,7 +514,7 @@ class _MachO(
|
||||||
"Type": {"Name": "X86_64_RELOC_GOT" | "X86_64_RELOC_GOT_LOAD" as kind},
|
"Type": {"Name": "X86_64_RELOC_GOT" | "X86_64_RELOC_GOT_LOAD" as kind},
|
||||||
}:
|
}:
|
||||||
offset += base
|
offset += base
|
||||||
s = s.removeprefix(self.prefix)
|
s = s.removeprefix(self.symbol_prefix)
|
||||||
value, symbol = _stencils.HoleValue.GOT, s
|
value, symbol = _stencils.HoleValue.GOT, s
|
||||||
addend = (
|
addend = (
|
||||||
int.from_bytes(raw[offset : offset + 4], "little", signed=True) - 4
|
int.from_bytes(raw[offset : offset + 4], "little", signed=True) - 4
|
||||||
|
@ -508,7 +529,7 @@ class _MachO(
|
||||||
"Type": {"Name": "X86_64_RELOC_BRANCH" | "X86_64_RELOC_SIGNED" as kind},
|
"Type": {"Name": "X86_64_RELOC_BRANCH" | "X86_64_RELOC_SIGNED" as kind},
|
||||||
}:
|
}:
|
||||||
offset += base
|
offset += base
|
||||||
s = s.removeprefix(self.prefix)
|
s = s.removeprefix(self.symbol_prefix)
|
||||||
value, symbol = _stencils.symbol_to_value(s)
|
value, symbol = _stencils.symbol_to_value(s)
|
||||||
addend = (
|
addend = (
|
||||||
int.from_bytes(raw[offset : offset + 4], "little", signed=True) - 4
|
int.from_bytes(raw[offset : offset + 4], "little", signed=True) - 4
|
||||||
|
@ -523,7 +544,7 @@ class _MachO(
|
||||||
"Type": {"Name": kind},
|
"Type": {"Name": kind},
|
||||||
}:
|
}:
|
||||||
offset += base
|
offset += base
|
||||||
s = s.removeprefix(self.prefix)
|
s = s.removeprefix(self.symbol_prefix)
|
||||||
value, symbol = _stencils.symbol_to_value(s)
|
value, symbol = _stencils.symbol_to_value(s)
|
||||||
addend = 0
|
addend = 0
|
||||||
case _:
|
case _:
|
||||||
|
@ -531,19 +552,19 @@ class _MachO(
|
||||||
return _stencils.Hole(offset, kind, value, symbol, addend)
|
return _stencils.Hole(offset, kind, value, symbol, addend)
|
||||||
|
|
||||||
|
|
||||||
def get_target(host: str) -> _COFF | _ELF | _MachO:
|
def get_target(host: str) -> _COFF32 | _COFF64 | _ELF | _MachO:
|
||||||
"""Build a _Target for the given host "triple" and options."""
|
"""Build a _Target for the given host "triple" and options."""
|
||||||
optimizer: type[_optimizers.Optimizer]
|
optimizer: type[_optimizers.Optimizer]
|
||||||
target: _COFF | _ELF | _MachO
|
target: _COFF32 | _COFF64 | _ELF | _MachO
|
||||||
if re.fullmatch(r"aarch64-apple-darwin.*", host):
|
if re.fullmatch(r"aarch64-apple-darwin.*", host):
|
||||||
condition = "defined(__aarch64__) && defined(__APPLE__)"
|
condition = "defined(__aarch64__) && defined(__APPLE__)"
|
||||||
optimizer = _optimizers.OptimizerAArch64
|
optimizer = _optimizers.OptimizerAArch64
|
||||||
target = _MachO(host, condition, optimizer=optimizer, prefix="_")
|
target = _MachO(host, condition, optimizer=optimizer)
|
||||||
elif re.fullmatch(r"aarch64-pc-windows-msvc", host):
|
elif re.fullmatch(r"aarch64-pc-windows-msvc", host):
|
||||||
args = ["-fms-runtime-lib=dll", "-fplt"]
|
args = ["-fms-runtime-lib=dll", "-fplt"]
|
||||||
condition = "defined(_M_ARM64)"
|
condition = "defined(_M_ARM64)"
|
||||||
optimizer = _optimizers.OptimizerAArch64
|
optimizer = _optimizers.OptimizerAArch64
|
||||||
target = _COFF(host, condition, args=args, optimizer=optimizer)
|
target = _COFF64(host, condition, args=args, optimizer=optimizer)
|
||||||
elif re.fullmatch(r"aarch64-.*-linux-gnu", host):
|
elif re.fullmatch(r"aarch64-.*-linux-gnu", host):
|
||||||
# -mno-outline-atomics: Keep intrinsics from being emitted.
|
# -mno-outline-atomics: Keep intrinsics from being emitted.
|
||||||
args = ["-fpic", "-mno-outline-atomics"]
|
args = ["-fpic", "-mno-outline-atomics"]
|
||||||
|
@ -555,16 +576,16 @@ def get_target(host: str) -> _COFF | _ELF | _MachO:
|
||||||
args = ["-DPy_NO_ENABLE_SHARED", "-Wno-ignored-attributes"]
|
args = ["-DPy_NO_ENABLE_SHARED", "-Wno-ignored-attributes"]
|
||||||
optimizer = _optimizers.OptimizerX86
|
optimizer = _optimizers.OptimizerX86
|
||||||
condition = "defined(_M_IX86)"
|
condition = "defined(_M_IX86)"
|
||||||
target = _COFF(host, condition, args=args, optimizer=optimizer, prefix="_")
|
target = _COFF32(host, condition, args=args, optimizer=optimizer)
|
||||||
elif re.fullmatch(r"x86_64-apple-darwin.*", host):
|
elif re.fullmatch(r"x86_64-apple-darwin.*", host):
|
||||||
condition = "defined(__x86_64__) && defined(__APPLE__)"
|
condition = "defined(__x86_64__) && defined(__APPLE__)"
|
||||||
optimizer = _optimizers.OptimizerX86
|
optimizer = _optimizers.OptimizerX86
|
||||||
target = _MachO(host, condition, optimizer=optimizer, prefix="_")
|
target = _MachO(host, condition, optimizer=optimizer)
|
||||||
elif re.fullmatch(r"x86_64-pc-windows-msvc", host):
|
elif re.fullmatch(r"x86_64-pc-windows-msvc", host):
|
||||||
args = ["-fms-runtime-lib=dll"]
|
args = ["-fms-runtime-lib=dll"]
|
||||||
condition = "defined(_M_X64)"
|
condition = "defined(_M_X64)"
|
||||||
optimizer = _optimizers.OptimizerX8664Windows
|
optimizer = _optimizers.OptimizerX86
|
||||||
target = _COFF(host, condition, args=args, optimizer=optimizer)
|
target = _COFF64(host, condition, args=args, optimizer=optimizer)
|
||||||
elif re.fullmatch(r"x86_64-.*-linux-gnu", host):
|
elif re.fullmatch(r"x86_64-.*-linux-gnu", host):
|
||||||
args = ["-fno-pic", "-mcmodel=medium", "-mlarge-data-threshold=0"]
|
args = ["-fno-pic", "-mcmodel=medium", "-mlarge-data-threshold=0"]
|
||||||
condition = "defined(__x86_64__) && defined(__linux__)"
|
condition = "defined(__x86_64__) && defined(__linux__)"
|
||||||
|
|
|
@ -6,3 +6,7 @@ typedef jit_func __attribute__((preserve_none)) jit_func_preserve_none;
|
||||||
#define PATCH_VALUE(TYPE, NAME, ALIAS) \
|
#define PATCH_VALUE(TYPE, NAME, ALIAS) \
|
||||||
PyAPI_DATA(void) ALIAS; \
|
PyAPI_DATA(void) ALIAS; \
|
||||||
TYPE NAME = (TYPE)(uintptr_t)&ALIAS;
|
TYPE NAME = (TYPE)(uintptr_t)&ALIAS;
|
||||||
|
|
||||||
|
#define DECLARE_TARGET(NAME) \
|
||||||
|
_Py_CODEUNIT *__attribute__((preserve_none, visibility("hidden"))) \
|
||||||
|
NAME(_PyInterpreterFrame *frame, _PyStackRef *stack_pointer, PyThreadState *tstate);
|
||||||
|
|
|
@ -10,6 +10,6 @@ _Py_CODEUNIT *
|
||||||
_JIT_ENTRY(_PyInterpreterFrame *frame, _PyStackRef *stack_pointer, PyThreadState *tstate)
|
_JIT_ENTRY(_PyInterpreterFrame *frame, _PyStackRef *stack_pointer, PyThreadState *tstate)
|
||||||
{
|
{
|
||||||
// Note that this is *not* a tail call:
|
// Note that this is *not* a tail call:
|
||||||
PATCH_VALUE(jit_func_preserve_none, call, _JIT_CONTINUE);
|
DECLARE_TARGET(_JIT_CONTINUE);
|
||||||
return call(frame, stack_pointer, tstate);
|
return _JIT_CONTINUE(frame, stack_pointer, tstate);
|
||||||
}
|
}
|
||||||
|
|
|
@ -74,10 +74,10 @@ do { \
|
||||||
do { \
|
do { \
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
#define PATCH_JUMP(ALIAS) \
|
#define PATCH_JUMP(ALIAS) \
|
||||||
do { \
|
do { \
|
||||||
PATCH_VALUE(jit_func_preserve_none, jump, ALIAS); \
|
DECLARE_TARGET(ALIAS); \
|
||||||
__attribute__((musttail)) return jump(frame, stack_pointer, tstate); \
|
__attribute__((musttail)) return ALIAS(frame, stack_pointer, tstate); \
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
#undef JUMP_TO_JUMP_TARGET
|
#undef JUMP_TO_JUMP_TARGET
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue