Replace Python bootstrapping script with Rust implementation (#2842)

See https://github.com/astral-sh/uv/issues/2617

Note this also includes:
- #2918 
- #2931 (pending)

A first step towards Python toolchain management in Rust.

First, we add a new crate to manage Python download metadata:

- Adds a new `uv-toolchain` crate
- Adds Rust structs for Python version download metadata
- Duplicates the script which downloads Python version metadata
- Adds a script to generate Rust code from the JSON metadata
- Adds a utility to download and extract the Python version

I explored some alternatives like a build script using things like
`serde` and `uneval` to automatically construct the code from our
structs but deemed it to heavy. Unlike Rye, I don't generate the Rust
directly from the web requests and have an intermediate JSON layer to
speed up iteration on the Rust types.

Next, we add add a `uv-dev` command `fetch-python` to download Python
versions per the bootstrapping script.

- Downloads a requested version or reads from `.python-versions`
- Extracts to `UV_BOOTSTRAP_DIR`
- Links executables for path extension

This command is not really intended to be user facing, but it's a good
PoC for the `uv-toolchain` API. Hash checking (via the sha256) isn't
implemented yet, we can do that in a follow-up.

Finally, we remove the `scripts/bootstrap` directory, update CI to use
the new command, and update the CONTRIBUTING docs.

<img width="1023" alt="Screenshot 2024-04-08 at 17 12 15"
src="57bd3cf1-7477-4bb8-a8e9-802a00d772cb">
This commit is contained in:
Zanie Blue 2024-04-10 11:22:41 -05:00 committed by GitHub
parent 7cd98d2499
commit 44e39bdca3
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
31 changed files with 8170 additions and 3703 deletions

View file

@ -1,256 +0,0 @@
#!/usr/bin/env python3.12
"""
Fetch Python version metadata.
Generates the bootstrap `versions.json` file.
Usage:
python fetch-version-metadata.py
Acknowledgements:
Derived from https://github.com/mitsuhiko/rye/tree/f9822267a7f00332d15be8551f89a212e7bc9017
Originally authored by Armin Ronacher under the MIT license
"""
import argparse
import hashlib
import json
import logging
import re
import urllib.error
import urllib.request
from itertools import chain
from pathlib import Path
from urllib.parse import unquote
SELF_DIR = Path(__file__).parent
RELEASE_URL = "https://api.github.com/repos/indygreg/python-build-standalone/releases"
HEADERS = {
"X-GitHub-Api-Version": "2022-11-28",
}
VERSIONS_FILE = SELF_DIR / "versions.json"
FLAVOR_PREFERENCES = [
"shared-pgo",
"shared-noopt",
"shared-noopt",
"static-noopt",
"gnu-pgo+lto",
"gnu-lto",
"gnu-pgo",
"pgo+lto",
"lto",
"pgo",
]
HIDDEN_FLAVORS = [
"debug",
"noopt",
"install_only",
]
SPECIAL_TRIPLES = {
"macos": "x86_64-apple-darwin",
"linux64": "x86_64-unknown-linux-gnu",
"windows-amd64": "x86_64-pc-windows",
"windows-x86": "i686-pc-windows",
"linux64-musl": "x86_64-unknown-linux-musl",
}
_filename_re = re.compile(
r"""(?x)
^
cpython-(?P<ver>\d+\.\d+\.\d+?)
(?:\+\d+)?
-(?P<triple>.*?)
(?:-[\dT]+)?\.tar\.(?:gz|zst)
$
"""
)
_suffix_re = re.compile(
r"""(?x)^(.*?)-(%s)$"""
% (
"|".join(
map(
re.escape,
sorted(FLAVOR_PREFERENCES + HIDDEN_FLAVORS, key=len, reverse=True),
)
)
)
)
# to match the output of the `arch` command
ARCH_MAP = {"aarch64": "arm64"}
def parse_filename(filename):
match = _filename_re.match(filename)
if match is None:
return
version, triple = match.groups()
if triple.endswith("-full"):
triple = triple[:-5]
match = _suffix_re.match(triple)
if match is not None:
triple, suffix = match.groups()
else:
suffix = None
return (version, triple, suffix)
def normalize_triple(triple):
if "-static" in triple:
logging.debug("Skipping %r: unknown triple", triple)
return
triple = SPECIAL_TRIPLES.get(triple, triple)
pieces = triple.split("-")
try:
arch = pieces[0]
# Normalize
arch = ARCH_MAP.get(arch, arch)
platform = pieces[2]
if pieces[2] == "linux":
# On linux, the triple has four segments, the last one is the libc
libc = pieces[3]
else:
libc = "none"
except IndexError:
logging.debug("Skipping %r: unknown triple", triple)
return
return "%s-%s-%s" % (arch, platform, libc)
def read_sha256(url):
try:
resp = urllib.request.urlopen(url + ".sha256")
except urllib.error.HTTPError:
return None
assert resp.status == 200
return resp.read().strip()
def sha256(path):
h = hashlib.sha256()
with open(path, "rb") as file:
while True:
# Reading is buffered, so we can read smaller chunks.
chunk = file.read(h.block_size)
if not chunk:
break
h.update(chunk)
return h.hexdigest()
def _sort_key(info):
triple, flavor, url = info
try:
pref = FLAVOR_PREFERENCES.index(flavor)
except ValueError:
pref = len(FLAVOR_PREFERENCES) + 1
return pref
def find():
"""
Find available Python versions and write metadata to a file.
"""
results = {}
for page in range(1, 100):
logging.debug("Reading release page %s...", page)
resp = urllib.request.urlopen("%s?page=%d" % (RELEASE_URL, page))
rows = json.loads(resp.read())
if not rows:
break
for row in rows:
for asset in row["assets"]:
url = asset["browser_download_url"]
base_name = unquote(url.rsplit("/")[-1])
if base_name.endswith(".sha256"):
continue
info = parse_filename(base_name)
if info is None:
continue
py_ver, triple, flavor = info
if "-static" in triple or (flavor and "noopt" in flavor):
continue
triple = normalize_triple(triple)
if triple is None:
continue
results.setdefault(py_ver, []).append((triple, flavor, url))
cpython_results = {}
for py_ver, choices in results.items():
choices.sort(key=_sort_key)
urls = {}
for triple, flavor, url in choices:
triple = tuple(triple.split("-"))
if triple in urls:
continue
urls[triple] = url
cpython_results[tuple(map(int, py_ver.split(".")))] = urls
final_results = {}
for interpreter, py_ver, choices in sorted(
chain(
(("cpython",) + x for x in cpython_results.items()),
),
key=lambda x: x[:2],
reverse=True,
):
for (arch, platform, libc), url in sorted(choices.items()):
key = "%s-%s.%s.%s-%s-%s-%s" % (interpreter, *py_ver, platform, arch, libc)
logging.info("Found %s", key)
sha256 = read_sha256(url)
final_results[key] = {
"name": interpreter,
"arch": arch,
"os": platform,
"libc": libc,
"major": py_ver[0],
"minor": py_ver[1],
"patch": py_ver[2],
"url": url,
"sha256": sha256,
}
VERSIONS_FILE.parent.mkdir(parents=True, exist_ok=True)
VERSIONS_FILE.write_text(json.dumps(final_results, indent=2))
def main():
parser = argparse.ArgumentParser(description="Fetch Python version metadata.")
parser.add_argument(
"-v",
"--verbose",
action="store_true",
help="Enable debug logging",
)
parser.add_argument(
"-q",
"--quiet",
action="store_true",
help="Disable logging",
)
args = parser.parse_args()
if args.quiet:
log_level = logging.CRITICAL
elif args.verbose:
log_level = logging.DEBUG
else:
log_level = logging.INFO
logging.basicConfig(
level=log_level,
format="%(asctime)s %(levelname)s %(message)s",
datefmt="%Y-%m-%d %H:%M:%S",
)
find()
if __name__ == "__main__":
main()

View file

@ -1,204 +0,0 @@
#!/usr/bin/env python3
# /// script
# requires-python = ">=3.11"
# dependencies = [
# "zstandard==0.22.0",
# ]
# ///
#
# Download required Python versions and install to `bin`
# Uses prebuilt Python distributions from indygreg/python-build-standalone
#
# This script can be run without Python installed via `install.sh`
#
# Requirements
#
# pip install zstandard==0.22.0
#
# Usage
#
# python scripts/bootstrap/install.py
#
# Or
#
# pipx run scripts/bootstrap/install.py
#
# The Python versions are installed from `.python_versions`.
# Python versions are linked in-order such that the _last_ defined version will be the default.
#
# Version metadata can be updated with `fetch-version-metadata.py`
import concurrent.futures
import hashlib
import json
import os
import platform
import shutil
import sys
import sysconfig
import tarfile
import tempfile
import urllib.parse
import urllib.request
from pathlib import Path
try:
import zstandard
except ImportError:
print("ERROR: zstandard is required; install with `pip install zstandard==0.22.0`")
sys.exit(1)
# Setup some file paths
THIS_DIR = Path(__file__).parent
ROOT_DIR = THIS_DIR.parent.parent
if bin_dir := os.environ.get("UV_BOOTSTRAP_DIR"):
BIN_DIR = Path(bin_dir)
else:
BIN_DIR = ROOT_DIR / "bin"
INSTALL_DIR = BIN_DIR / "versions"
VERSIONS_FILE = ROOT_DIR / ".python-versions"
VERSIONS_METADATA_FILE = THIS_DIR / "versions.json"
# Map system information to those in the versions metadata
ARCH_MAP = {"aarch64": "arm64", "amd64": "x86_64"}
PLATFORM_MAP = {"win32": "windows"}
PLATFORM = sys.platform
ARCH = platform.machine().lower()
INTERPRETER = "cpython"
def decompress_file(archive_path: Path, output_path: Path):
if str(archive_path).endswith(".tar.zst"):
dctx = zstandard.ZstdDecompressor()
with tempfile.TemporaryFile(suffix=".tar") as ofh:
with archive_path.open("rb") as ifh:
dctx.copy_stream(ifh, ofh)
ofh.seek(0)
with tarfile.open(fileobj=ofh) as z:
z.extractall(output_path)
else:
raise ValueError(f"Unknown archive type {archive_path.suffix}")
def sha256_file(path: Path):
h = hashlib.sha256()
with open(path, "rb") as file:
while True:
# Reading is buffered, so we can read smaller chunks.
chunk = file.read(h.block_size)
if not chunk:
break
h.update(chunk)
return h.hexdigest()
versions_metadata = json.loads(VERSIONS_METADATA_FILE.read_text())
versions = VERSIONS_FILE.read_text().splitlines()
def get_key(version):
if platform.system() == "Linux":
libc = sysconfig.get_config_var("SOABI").split("-")[-1]
else:
libc = "none"
key = f"{INTERPRETER}-{version}-{PLATFORM_MAP.get(PLATFORM, PLATFORM)}-{ARCH_MAP.get(ARCH, ARCH)}-{libc}"
return key
def download(version):
key = get_key(version)
install_dir = INSTALL_DIR / f"{INTERPRETER}@{version}"
print(f"Downloading {key}")
url = versions_metadata[key]["url"]
if not url:
print(f"No matching download for {key}")
sys.exit(1)
filename = url.split("/")[-1]
print(f"Downloading {urllib.parse.unquote(filename)}")
download_path = THIS_DIR / filename
with urllib.request.urlopen(url) as response:
with download_path.open("wb") as download_file:
shutil.copyfileobj(response, download_file)
sha = versions_metadata[key]["sha256"]
if not sha:
print(f"WARNING: no checksum for {key}")
else:
print("Verifying checksum...", end="")
if sha256_file(download_path) != sha:
print(" FAILED!")
sys.exit(1)
print(" OK")
if install_dir.exists():
shutil.rmtree(install_dir)
print("Extracting to", install_dir)
install_dir.parent.mkdir(parents=True, exist_ok=True)
# n.b. do not use `.with_suffix` as it will replace the patch Python version
extract_dir = Path(str(install_dir) + ".tmp")
decompress_file(THIS_DIR / filename, extract_dir)
(extract_dir / "python").rename(install_dir)
(THIS_DIR / filename).unlink()
extract_dir.rmdir()
return install_dir
def install(version, install_dir):
key = get_key(version)
if PLATFORM == "win32":
executable = install_dir / "install" / "python.exe"
else:
# Use relative paths for links so if the bin is moved they don't break
executable = (
"." / install_dir.relative_to(BIN_DIR) / "install" / "bin" / "python3"
)
major = versions_metadata[key]["major"]
minor = versions_metadata[key]["minor"]
# Link as all version tuples, later versions in the file will take precedence
BIN_DIR.mkdir(parents=True, exist_ok=True)
targets = [
(BIN_DIR / f"python{version}"),
(BIN_DIR / f"python{major}.{minor}"),
(BIN_DIR / f"python{major}"),
(BIN_DIR / "python"),
]
for target in targets:
target.unlink(missing_ok=True)
if PLATFORM == "win32":
target.hardlink_to(executable)
else:
target.symlink_to(executable)
print(f"Installed executables for python{version}")
if __name__ == "__main__":
if INSTALL_DIR.exists():
print("Removing existing installations...")
shutil.rmtree(INSTALL_DIR)
# Download in parallel
with concurrent.futures.ProcessPoolExecutor(max_workers=len(versions)) as executor:
futures = [
(version, executor.submit(download, version)) for version in versions
]
# Install sequentially so overrides are respected
for version, future in futures:
install_dir = future.result()
install(version, install_dir)
print("Done!")

File diff suppressed because it is too large Load diff

View file

@ -13,14 +13,14 @@ use assert_cmd::assert::OutputAssertExt;
use assert_fs::fixture::{FileWriteStr, PathChild};
use predicates::prelude::predicate;
use common::{create_bin_with_executables, get_bin, uv_snapshot, TestContext};
use common::{python_path_with_versions, get_bin, uv_snapshot, TestContext};
mod common;
/// Provision python binaries and return a `pip compile` command with options shared across all scenarios.
fn command(context: &TestContext, python_versions: &[&str]) -> Command {
let bin = create_bin_with_executables(&context.temp_dir, python_versions)
.expect("Failed to create bin dir");
let python_path = python_path_with_versions(&context.temp_dir, python_versions)
.expect("Failed to create Python test path");
let mut command = Command::new(get_bin());
command
.arg("pip")
@ -34,7 +34,7 @@ fn command(context: &TestContext, python_versions: &[&str]) -> Command {
.arg(context.cache_dir.path())
.env("VIRTUAL_ENV", context.venv.as_os_str())
.env("UV_NO_WRAP", "1")
.env("UV_TEST_PYTHON_PATH", bin)
.env("UV_TEST_PYTHON_PATH", python_path)
.current_dir(&context.temp_dir);
if cfg!(all(windows, debug_assertions)) {