mirror of
https://github.com/astral-sh/uv.git
synced 2025-11-01 20:31:12 +00:00
Replace Python bootstrapping script with Rust implementation (#2842)
See https://github.com/astral-sh/uv/issues/2617
Note this also includes:
- #2918
- #2931 (pending)
A first step towards Python toolchain management in Rust.
First, we add a new crate to manage Python download metadata:
- Adds a new `uv-toolchain` crate
- Adds Rust structs for Python version download metadata
- Duplicates the script which downloads Python version metadata
- Adds a script to generate Rust code from the JSON metadata
- Adds a utility to download and extract the Python version
I explored some alternatives like a build script using things like
`serde` and `uneval` to automatically construct the code from our
structs but deemed it to heavy. Unlike Rye, I don't generate the Rust
directly from the web requests and have an intermediate JSON layer to
speed up iteration on the Rust types.
Next, we add add a `uv-dev` command `fetch-python` to download Python
versions per the bootstrapping script.
- Downloads a requested version or reads from `.python-versions`
- Extracts to `UV_BOOTSTRAP_DIR`
- Links executables for path extension
This command is not really intended to be user facing, but it's a good
PoC for the `uv-toolchain` API. Hash checking (via the sha256) isn't
implemented yet, we can do that in a follow-up.
Finally, we remove the `scripts/bootstrap` directory, update CI to use
the new command, and update the CONTRIBUTING docs.
<img width="1023" alt="Screenshot 2024-04-08 at 17 12 15"
src="57bd3cf1-7477-4bb8-a8e9-802a00d772cb">
This commit is contained in:
parent
7cd98d2499
commit
44e39bdca3
31 changed files with 8170 additions and 3703 deletions
292
crates/uv-toolchain/fetch-version-metadata.py
Executable file
292
crates/uv-toolchain/fetch-version-metadata.py
Executable file
|
|
@ -0,0 +1,292 @@
|
|||
#!/usr/bin/env python3.12
|
||||
"""
|
||||
Fetch Python version metadata.
|
||||
|
||||
Generates the `python-version-metadata.json` file.
|
||||
|
||||
Usage:
|
||||
|
||||
python fetch-version-metadata.py
|
||||
|
||||
Acknowledgements:
|
||||
|
||||
Derived from https://github.com/mitsuhiko/rye/tree/f9822267a7f00332d15be8551f89a212e7bc9017
|
||||
Originally authored by Armin Ronacher under the MIT license
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import hashlib
|
||||
import json
|
||||
import logging
|
||||
import re
|
||||
import urllib.error
|
||||
import urllib.request
|
||||
from itertools import chain
|
||||
from pathlib import Path
|
||||
from urllib.parse import unquote
|
||||
|
||||
SELF_DIR = Path(__file__).parent
|
||||
RELEASE_URL = "https://api.github.com/repos/indygreg/python-build-standalone/releases"
|
||||
HEADERS = {
|
||||
"X-GitHub-Api-Version": "2022-11-28",
|
||||
}
|
||||
VERSIONS_FILE = SELF_DIR / "python-version-metadata.json"
|
||||
FLAVOR_PREFERENCES = [
|
||||
"shared-pgo",
|
||||
"shared-noopt",
|
||||
"shared-noopt",
|
||||
"static-noopt",
|
||||
"gnu-pgo+lto",
|
||||
"gnu-lto",
|
||||
"gnu-pgo",
|
||||
"pgo+lto",
|
||||
"lto",
|
||||
"pgo",
|
||||
]
|
||||
HIDDEN_FLAVORS = [
|
||||
"debug",
|
||||
"noopt",
|
||||
"install_only",
|
||||
]
|
||||
SPECIAL_TRIPLES = {
|
||||
"macos": "x86_64-apple-darwin",
|
||||
"linux64": "x86_64-unknown-linux-gnu",
|
||||
"windows-amd64": "x86_64-pc-windows",
|
||||
"windows-x86": "i686-pc-windows",
|
||||
"windows-amd64-shared": "x86_64-pc-windows",
|
||||
"windows-x86-shared": "i686-pc-windows",
|
||||
"linux64-musl": "x86_64-unknown-linux-musl",
|
||||
}
|
||||
|
||||
_filename_re = re.compile(
|
||||
r"""(?x)
|
||||
^
|
||||
cpython-(?P<ver>\d+\.\d+\.\d+?)
|
||||
(?:\+\d+)?
|
||||
-(?P<triple>.*?)
|
||||
(?:-[\dT]+)?\.tar\.(?:gz|zst)
|
||||
$
|
||||
"""
|
||||
)
|
||||
_suffix_re = re.compile(
|
||||
r"""(?x)^(.*?)-(%s)$"""
|
||||
% (
|
||||
"|".join(
|
||||
map(
|
||||
re.escape,
|
||||
sorted(FLAVOR_PREFERENCES + HIDDEN_FLAVORS, key=len, reverse=True),
|
||||
)
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
# Normalized mappings to match the Rust types
|
||||
ARCH_MAP = {
|
||||
"ppc64": "powerpc64",
|
||||
"ppc64le": "powerpc64le",
|
||||
"i686": "x86",
|
||||
"i386": "x86",
|
||||
}
|
||||
OS_MAP = {"darwin": "macos"}
|
||||
|
||||
|
||||
def parse_filename(filename):
|
||||
match = _filename_re.match(filename)
|
||||
if match is None:
|
||||
return
|
||||
version, triple = match.groups()
|
||||
if triple.endswith("-full"):
|
||||
triple = triple[:-5]
|
||||
match = _suffix_re.match(triple)
|
||||
if match is not None:
|
||||
triple, suffix = match.groups()
|
||||
else:
|
||||
suffix = None
|
||||
return (version, triple, suffix)
|
||||
|
||||
|
||||
def normalize_triple(triple):
|
||||
if "-static" in triple:
|
||||
logging.debug("Skipping %r: unknown triple", triple)
|
||||
return
|
||||
triple = SPECIAL_TRIPLES.get(triple, triple)
|
||||
pieces = triple.split("-")
|
||||
try:
|
||||
arch = normalize_arch(pieces[0])
|
||||
operating_system = normalize_os(pieces[2])
|
||||
if pieces[2] == "linux":
|
||||
# On linux, the triple has four segments, the last one is the libc
|
||||
libc = pieces[3]
|
||||
else:
|
||||
libc = "none"
|
||||
except IndexError:
|
||||
logging.debug("Skipping %r: unknown triple", triple)
|
||||
return
|
||||
return "%s-%s-%s" % (arch, operating_system, libc)
|
||||
|
||||
|
||||
def normalize_arch(arch):
|
||||
arch = ARCH_MAP.get(arch, arch)
|
||||
pieces = arch.split("_")
|
||||
# Strip `_vN` from `x86_64`
|
||||
return "_".join(pieces[:2])
|
||||
|
||||
|
||||
def normalize_os(os):
|
||||
return OS_MAP.get(os, os)
|
||||
|
||||
|
||||
def read_sha256(url):
|
||||
try:
|
||||
resp = urllib.request.urlopen(url + ".sha256")
|
||||
except urllib.error.HTTPError:
|
||||
return None
|
||||
assert resp.status == 200
|
||||
return resp.read().decode().strip()
|
||||
|
||||
|
||||
def sha256(path):
|
||||
h = hashlib.sha256()
|
||||
|
||||
with open(path, "rb") as file:
|
||||
while True:
|
||||
# Reading is buffered, so we can read smaller chunks.
|
||||
chunk = file.read(h.block_size)
|
||||
if not chunk:
|
||||
break
|
||||
h.update(chunk)
|
||||
|
||||
return h.hexdigest()
|
||||
|
||||
|
||||
def _sort_by_flavor_preference(info):
|
||||
_triple, flavor, _url = info
|
||||
try:
|
||||
pref = FLAVOR_PREFERENCES.index(flavor)
|
||||
except ValueError:
|
||||
pref = len(FLAVOR_PREFERENCES) + 1
|
||||
return pref
|
||||
|
||||
|
||||
def _sort_by_interpreter_and_version(info):
|
||||
interpreter, version_tuple, _ = info
|
||||
return (interpreter, version_tuple)
|
||||
|
||||
|
||||
def find():
|
||||
"""
|
||||
Find available Python versions and write metadata to a file.
|
||||
"""
|
||||
results = {}
|
||||
|
||||
# Collect all available Python downloads
|
||||
for page in range(1, 100):
|
||||
logging.debug("Reading release page %s...", page)
|
||||
resp = urllib.request.urlopen("%s?page=%d" % (RELEASE_URL, page))
|
||||
rows = json.loads(resp.read())
|
||||
if not rows:
|
||||
break
|
||||
for row in rows:
|
||||
for asset in row["assets"]:
|
||||
url = asset["browser_download_url"]
|
||||
base_name = unquote(url.rsplit("/")[-1])
|
||||
if base_name.endswith(".sha256"):
|
||||
continue
|
||||
info = parse_filename(base_name)
|
||||
if info is None:
|
||||
continue
|
||||
py_ver, triple, flavor = info
|
||||
if "-static" in triple or (flavor and "noopt" in flavor):
|
||||
continue
|
||||
triple = normalize_triple(triple)
|
||||
if triple is None:
|
||||
continue
|
||||
results.setdefault(py_ver, []).append((triple, flavor, url))
|
||||
|
||||
# Collapse CPython variants to a single URL flavor per triple
|
||||
cpython_results: dict[tuple[int, int, int], dict[tuple[str, str, str], str]] = {}
|
||||
for py_ver, choices in results.items():
|
||||
urls = {}
|
||||
for triple, flavor, url in sorted(choices, key=_sort_by_flavor_preference):
|
||||
triple = tuple(triple.split("-"))
|
||||
# Skip existing triples, preferring the first flavor
|
||||
if triple in urls:
|
||||
continue
|
||||
urls[triple] = url
|
||||
cpython_results[tuple(map(int, py_ver.split(".")))] = urls
|
||||
|
||||
# Collect variants across interpreter kinds
|
||||
# TODO(zanieb): Note we only support CPython downloads at this time
|
||||
# but this will include PyPy chain in the future.
|
||||
final_results = {}
|
||||
for interpreter, py_ver, choices in sorted(
|
||||
chain(
|
||||
(("cpython",) + x for x in cpython_results.items()),
|
||||
),
|
||||
key=_sort_by_interpreter_and_version,
|
||||
# Reverse the ordering so newer versions are first
|
||||
reverse=True,
|
||||
):
|
||||
# Sort by the remaining information for determinism
|
||||
# This groups download metadata in triple component order
|
||||
for (arch, operating_system, libc), url in sorted(choices.items()):
|
||||
key = "%s-%s.%s.%s-%s-%s-%s" % (
|
||||
interpreter,
|
||||
*py_ver,
|
||||
operating_system,
|
||||
arch,
|
||||
libc,
|
||||
)
|
||||
logging.info("Found %s", key)
|
||||
sha256 = read_sha256(url)
|
||||
|
||||
final_results[key] = {
|
||||
"name": interpreter,
|
||||
"arch": arch,
|
||||
"os": operating_system,
|
||||
"libc": libc,
|
||||
"major": py_ver[0],
|
||||
"minor": py_ver[1],
|
||||
"patch": py_ver[2],
|
||||
"url": url,
|
||||
"sha256": sha256,
|
||||
}
|
||||
|
||||
VERSIONS_FILE.parent.mkdir(parents=True, exist_ok=True)
|
||||
VERSIONS_FILE.write_text(json.dumps(final_results, indent=2))
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Fetch Python version metadata.")
|
||||
parser.add_argument(
|
||||
"-v",
|
||||
"--verbose",
|
||||
action="store_true",
|
||||
help="Enable debug logging",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-q",
|
||||
"--quiet",
|
||||
action="store_true",
|
||||
help="Disable logging",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.quiet:
|
||||
log_level = logging.CRITICAL
|
||||
elif args.verbose:
|
||||
log_level = logging.DEBUG
|
||||
else:
|
||||
log_level = logging.INFO
|
||||
|
||||
logging.basicConfig(
|
||||
level=log_level,
|
||||
format="%(asctime)s %(levelname)s %(message)s",
|
||||
datefmt="%Y-%m-%d %H:%M:%S",
|
||||
)
|
||||
|
||||
find()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Loading…
Add table
Add a link
Reference in a new issue