Allow download of Python distribution variants with newer CPU instruction sets (#9781)

Supersedes https://github.com/astral-sh/uv/pull/8517 with an alternative
approach of making all the variants available instead of replacing the
x86_64 (v1) variant with x86_64_v2.

Doesn't add automatic inference of the supported instructions, but that
should be doable per @charliermarsh's comment there. Going to do it as a
follow-up since this has been pretty time consuming.

e.g.,

```
❯ cargo run -q -- python install cpython-3.12.8-linux-x86_64_v3-gnu
Installed Python 3.12.8 in 2.72s
 + cpython-3.12.8-linux-x86_64_v3-gnu
```

Co-authored-by: j178 <10510431+j178@users.noreply.github.com>
This commit is contained in:
Zanie Blue 2024-12-10 14:26:45 -06:00 committed by GitHub
parent fd420db197
commit 761dafd0d1
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 23040 additions and 2044 deletions

File diff suppressed because it is too large Load diff

View file

@ -50,7 +50,7 @@ import json
import logging
import os
import re
from dataclasses import dataclass, field
from dataclasses import asdict, dataclass, field
from enum import StrEnum
from pathlib import Path
from typing import Generator, Iterable, NamedTuple, Self
@ -72,11 +72,40 @@ def batched(iterable: Iterable, n: int) -> Generator[tuple, None, None]:
yield batch
@dataclass(frozen=True)
class Arch:
# The architecture family, e.g. "x86_64", "aarch64".
family: str
# The architecture variant, e.g., "v2" in "x86_64_v2"
variant: str | None = None
def key(self) -> str:
return str(self)
def __str__(self) -> str:
return (self.family + "_" + self.variant) if self.variant else self.family
def __gt__(self, other) -> bool:
return (self.family, self.variant or "") > (other.family, other.variant or "")
def __lt__(self, other) -> bool:
return (self.family, self.variant or "") < (other.family, other.variant or "")
type PlatformTripleKey = tuple[str, str, str]
class PlatformTriple(NamedTuple):
# The operating system, e.g. "linux", "macos", "windows".
platform: str
arch: str
# The architecture, e.g. "x86_64", "aarch64".
arch: Arch
# The libc implementation, e.g. "gnu", "musl", "none".
libc: str
def key(self) -> PlatformTripleKey:
return (self.platform, self.arch.key(), self.libc)
class Version(NamedTuple):
major: int
@ -229,12 +258,12 @@ class CPythonFinder(Finder):
downloads = []
for version_downloads in downloads_by_version.values():
selected: dict[
tuple[PlatformTriple, Variant | None],
tuple[PlatformTripleKey, Variant | None],
tuple[PythonDownload, tuple[int, int]],
] = {}
for download in version_downloads:
priority = self._get_priority(download)
existing = selected.get((download.triple, download.variant))
existing = selected.get((download.triple.key(), download.variant))
if existing:
existing_download, existing_priority = existing
# Skip if we have a flavor with higher priority already (indicated by a smaller value)
@ -247,7 +276,7 @@ class CPythonFinder(Finder):
existing_download.flavor,
)
continue
selected[(download.triple, download.variant)] = (
selected[(download.triple.key(), download.variant)] = (
download,
priority,
)
@ -368,11 +397,12 @@ class CPythonFinder(Finder):
return PlatformTriple(operating_system, arch, libc)
def _normalize_arch(self, arch: str) -> str:
def _normalize_arch(self, arch: str) -> Arch:
arch = self.ARCH_MAP.get(arch, arch)
pieces = arch.split("_")
# Strip `_vN` from `x86_64`
return "_".join(pieces[:2])
family = "_".join(pieces[:2])
variant = pieces[2] if len(pieces) > 2 else None
return Arch(family, variant)
def _normalize_os(self, os: str) -> str:
return os
@ -472,8 +502,8 @@ class PyPyFinder(Finder):
return list(results.values())
def _normalize_arch(self, arch: str) -> str:
return self.ARCH_MAPPING.get(arch, arch)
def _normalize_arch(self, arch: str) -> Arch:
return Arch(self.ARCH_MAPPING.get(arch, arch), None)
def _normalize_os(self, os: str) -> str:
return self.PLATFORM_MAPPING.get(os, os)
@ -539,7 +569,7 @@ def render(downloads: list[PythonDownload]) -> None:
)
results[key] = {
"name": download.implementation,
"arch": download.triple.arch,
"arch": asdict(download.triple.arch),
"os": download.triple.platform,
"libc": download.triple.libc,
"major": download.version.major,

File diff suppressed because it is too large Load diff

View file

@ -3,6 +3,7 @@
use uv_pep440::{Prerelease, PrereleaseKind};
use crate::PythonVariant;
use crate::platform::ArchVariant;
pub(crate) const PYTHON_DOWNLOADS: &[ManagedPythonDownload] = &[
{{#versions}}
@ -13,7 +14,10 @@ pub(crate) const PYTHON_DOWNLOADS: &[ManagedPythonDownload] = &[
patch: {{value.patch}},
prerelease: {{value.prerelease}},
implementation: LenientImplementationName::Known(ImplementationName::{{value.name}}),
arch: Arch(target_lexicon::Architecture::{{value.arch}}),
arch: Arch{
family: target_lexicon::Architecture::{{value.arch_family}},
variant: {{value.arch_variant}},
},
os: Os(target_lexicon::OperatingSystem::{{value.os}}),
{{#value.libc}}
libc: Libc::Some(target_lexicon::Environment::{{.}}),

View file

@ -13,10 +13,29 @@ pub enum Error {
UnknownArch(String),
#[error("Unknown libc environment: {0}")]
UnknownLibc(String),
#[error("Unsupported variant `{0}` for architecture `{1}`")]
UnsupportedVariant(String, String),
}
/// Architecture variants, e.g., with support for different instruction sets
#[derive(Debug, Eq, PartialEq, Clone, Copy, Hash)]
pub enum ArchVariant {
/// Targets 64-bit Intel/AMD CPUs newer than Nehalem (2008).
/// Includes SSE3, SSE4 and other post-2003 CPU instructions.
V2,
/// Targets 64-bit Intel/AMD CPUs newer than Haswell (2013) and Excavator (2015).
/// Includes AVX, AVX2, MOVBE and other newer CPU instructions.
V3,
/// Targets 64-bit Intel/AMD CPUs with AVX-512 instructions (post-2017 Intel CPUs).
/// Many post-2017 Intel CPUs do not support AVX-512.
V4,
}
#[derive(Debug, Eq, PartialEq, Clone, Copy, Hash)]
pub struct Arch(pub(crate) target_lexicon::Architecture);
pub struct Arch {
pub(crate) family: target_lexicon::Architecture,
pub(crate) variant: Option<ArchVariant>,
}
#[derive(Debug, Eq, PartialEq, Clone, Copy, Hash)]
pub struct Os(pub(crate) target_lexicon::OperatingSystem);
@ -78,7 +97,10 @@ impl Os {
impl Arch {
pub fn from_env() -> Self {
Self(target_lexicon::HOST.architecture)
Self {
family: target_lexicon::HOST.architecture,
variant: None,
}
}
}
@ -102,12 +124,16 @@ impl Display for Os {
impl Display for Arch {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match &**self {
match self.family {
target_lexicon::Architecture::X86_32(target_lexicon::X86_32Architecture::I686) => {
write!(f, "x86")
write!(f, "x86")?;
}
inner => write!(f, "{inner}"),
inner => write!(f, "{inner}")?,
}
if let Some(variant) = self.variant {
write!(f, "_{variant}")?;
}
Ok(())
}
}
@ -131,25 +157,69 @@ impl FromStr for Arch {
type Err = Error;
fn from_str(s: &str) -> Result<Self, Self::Err> {
let inner = match s {
// Allow users to specify "x86" as a shorthand for the "i686" variant, they should not need
// to specify the exact architecture and this variant is what we have downloads for.
"x86" => target_lexicon::Architecture::X86_32(target_lexicon::X86_32Architecture::I686),
_ => target_lexicon::Architecture::from_str(s)
.map_err(|()| Error::UnknownArch(s.to_string()))?,
};
if matches!(inner, target_lexicon::Architecture::Unknown) {
return Err(Error::UnknownArch(s.to_string()));
fn parse_family(s: &str) -> Result<target_lexicon::Architecture, Error> {
let inner = match s {
// Allow users to specify "x86" as a shorthand for the "i686" variant, they should not need
// to specify the exact architecture and this variant is what we have downloads for.
"x86" => {
target_lexicon::Architecture::X86_32(target_lexicon::X86_32Architecture::I686)
}
_ => target_lexicon::Architecture::from_str(s)
.map_err(|()| Error::UnknownArch(s.to_string()))?,
};
if matches!(inner, target_lexicon::Architecture::Unknown) {
return Err(Error::UnknownArch(s.to_string()));
}
Ok(inner)
}
Ok(Self(inner))
// First check for a variant
if let Some((Ok(family), Ok(variant))) = s
.rsplit_once('_')
.map(|(family, variant)| (parse_family(family), ArchVariant::from_str(variant)))
{
// We only support variants for `x86_64` right now
if !matches!(family, target_lexicon::Architecture::X86_64) {
return Err(Error::UnsupportedVariant(
variant.to_string(),
family.to_string(),
));
}
return Ok(Self {
family,
variant: Some(variant),
});
}
let family = parse_family(s)?;
Ok(Self {
family,
variant: None,
})
}
}
impl Deref for Arch {
type Target = target_lexicon::Architecture;
impl FromStr for ArchVariant {
type Err = ();
fn deref(&self) -> &Self::Target {
&self.0
fn from_str(s: &str) -> Result<Self, Self::Err> {
match s {
"v2" => Ok(Self::V2),
"v3" => Ok(Self::V3),
"v4" => Ok(Self::V4),
_ => Err(()),
}
}
}
impl Display for ArchVariant {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::V2 => write!(f, "v2"),
Self::V3 => write!(f, "v3"),
Self::V4 => write!(f, "v4"),
}
}
}
@ -164,25 +234,48 @@ impl Deref for Os {
impl From<&uv_platform_tags::Arch> for Arch {
fn from(value: &uv_platform_tags::Arch) -> Self {
match value {
uv_platform_tags::Arch::Aarch64 => Self(target_lexicon::Architecture::Aarch64(
target_lexicon::Aarch64Architecture::Aarch64,
)),
uv_platform_tags::Arch::Armv6L => Self(target_lexicon::Architecture::Arm(
target_lexicon::ArmArchitecture::Armv6,
)),
uv_platform_tags::Arch::Armv7L => Self(target_lexicon::Architecture::Arm(
target_lexicon::ArmArchitecture::Armv7,
)),
uv_platform_tags::Arch::S390X => Self(target_lexicon::Architecture::S390x),
uv_platform_tags::Arch::Powerpc64 => Self(target_lexicon::Architecture::Powerpc64),
uv_platform_tags::Arch::Powerpc64Le => Self(target_lexicon::Architecture::Powerpc64le),
uv_platform_tags::Arch::X86 => Self(target_lexicon::Architecture::X86_32(
target_lexicon::X86_32Architecture::I686,
)),
uv_platform_tags::Arch::X86_64 => Self(target_lexicon::Architecture::X86_64),
uv_platform_tags::Arch::Riscv64 => Self(target_lexicon::Architecture::Riscv64(
target_lexicon::Riscv64Architecture::Riscv64,
)),
uv_platform_tags::Arch::Aarch64 => Self {
family: target_lexicon::Architecture::Aarch64(
target_lexicon::Aarch64Architecture::Aarch64,
),
variant: None,
},
uv_platform_tags::Arch::Armv6L => Self {
family: target_lexicon::Architecture::Arm(target_lexicon::ArmArchitecture::Armv6),
variant: None,
},
uv_platform_tags::Arch::Armv7L => Self {
family: target_lexicon::Architecture::Arm(target_lexicon::ArmArchitecture::Armv7),
variant: None,
},
uv_platform_tags::Arch::S390X => Self {
family: target_lexicon::Architecture::S390x,
variant: None,
},
uv_platform_tags::Arch::Powerpc64 => Self {
family: target_lexicon::Architecture::Powerpc64,
variant: None,
},
uv_platform_tags::Arch::Powerpc64Le => Self {
family: target_lexicon::Architecture::Powerpc64le,
variant: None,
},
uv_platform_tags::Arch::X86 => Self {
family: target_lexicon::Architecture::X86_32(
target_lexicon::X86_32Architecture::I686,
),
variant: None,
},
uv_platform_tags::Arch::X86_64 => Self {
family: target_lexicon::Architecture::X86_64,
variant: None,
},
uv_platform_tags::Arch::Riscv64 => Self {
family: target_lexicon::Architecture::Riscv64(
target_lexicon::Riscv64Architecture::Riscv64,
),
variant: None,
},
}
}
}

View file

@ -62,17 +62,23 @@ def prepare_variant(variant: str | None) -> str | None:
raise ValueError(f"Unknown variant: {variant}")
def prepare_arch(arch: str) -> str:
match arch:
def prepare_arch(arch: dict) -> tuple[str, str]:
match arch["family"]:
# Special constructors
case "i686":
return "X86_32(target_lexicon::X86_32Architecture::I686)"
family = "X86_32(target_lexicon::X86_32Architecture::I686)"
case "aarch64":
return "Aarch64(target_lexicon::Aarch64Architecture::Aarch64)"
family = "Aarch64(target_lexicon::Aarch64Architecture::Aarch64)"
case "armv7":
return "Arm(target_lexicon::ArmArchitecture::Armv7)"
case _:
return arch.capitalize()
family = "Arm(target_lexicon::ArmArchitecture::Armv7)"
case value:
family = value.capitalize()
variant = (
f"Some(ArchVariant::{arch['variant'].capitalize()})"
if arch["variant"]
else "None"
)
return family, variant
def prepare_prerelease(prerelease: str) -> str:
@ -86,7 +92,7 @@ def prepare_prerelease(prerelease: str) -> str:
def prepare_value(value: dict) -> dict:
value["os"] = value["os"].title()
value["arch"] = prepare_arch(value["arch"])
value["arch_family"], value["arch_variant"] = prepare_arch(value["arch"])
value["name"] = prepare_name(value["name"])
value["libc"] = prepare_libc(value["libc"])
value["prerelease"] = prepare_prerelease(value["prerelease"])