uv/scripts/bootstrap/fetch-version-metadata.py
Zanie Blue 21577ad002
Add bootstrapping and isolation of development Python versions (#1105)
Replaces https://github.com/astral-sh/puffin/pull/1068 and #1070 which
were more complicated than I wanted.

- Introduces a `.python-versions` file which defines the Python versions
needed for development
- Adds a Bash script at `scripts/bootstrap/install` which installs the
required Python versions from `python-build-standalone` to `./bin`
- Checks in a `versions.json` file with metadata about available
versions on each platform and a `fetch-version` Python script derived
from `rye` for updating the versions
- Updates CI to use these Python builds instead of the `setup-python`
action
- Updates to the latest packse scenarios which require Python 3.8+
instead of 3.7+ since we cannot use 3.7 anymore and includes new test
coverage of patch Python version requests
- Adds a `PUFFIN_PYTHON_PATH` variable to prevent lookup of system
Python versions for isolation during development

Tested on Linux (via CI) and macOS (locally) — presumably it will be a
bit more complicated to do proper Windows support.
2024-01-26 12:12:48 -06:00

273 lines
6.7 KiB
Python
Executable file

#!/usr/bin/env python3.12
"""
Fetch Python version metadata.
Generates the bootstrap `versions.json` file.
Installation:
pip install requests==2.31.0
Usage:
scripts/bootstrap/fetch-versions
Acknowledgements:
Derived from https://github.com/mitsuhiko/rye/tree/f9822267a7f00332d15be8551f89a212e7bc9017
Originally authored by Armin Ronacher under the MIT license
"""
import argparse
import hashlib
import json
import logging
import os
import re
import sys
from itertools import chain
from pathlib import Path
from urllib.parse import unquote
try:
import requests
except ImportError:
print("ERROR: requests is required; install with `pip install requests==2.31.0`")
sys.exit(1)
SELF_DIR = Path(__file__).parent
RELEASE_URL = "https://api.github.com/repos/indygreg/python-build-standalone/releases"
HEADERS = {
"X-GitHub-Api-Version": "2022-11-28",
}
VERSIONS_FILE = SELF_DIR / "versions.json"
FLAVOR_PREFERENCES = [
"shared-pgo",
"shared-noopt",
"shared-noopt",
"static-noopt",
"gnu-pgo+lto",
"gnu-lto",
"gnu-pgo",
"pgo+lto",
"lto",
"pgo",
]
HIDDEN_FLAVORS = [
"debug",
"noopt",
"install_only",
]
SPECIAL_TRIPLES = {
"macos": "x86_64-apple-darwin",
"linux64": "x86_64-unknown-linux",
"windows-amd64": "x86_64-pc-windows",
"windows-x86": "i686-pc-windows",
"linux64-musl": "x86_64-unknown-linux",
}
_filename_re = re.compile(
r"""(?x)
^
cpython-(?P<ver>\d+\.\d+\.\d+?)
(?:\+\d+)?
-(?P<triple>.*?)
(?:-[\dT]+)?\.tar\.(?:gz|zst)
$
"""
)
_suffix_re = re.compile(
r"""(?x)^(.*?)-(%s)$"""
% (
"|".join(
map(
re.escape,
sorted(FLAVOR_PREFERENCES + HIDDEN_FLAVORS, key=len, reverse=True),
)
)
)
)
# to match the output of the `arch` command
ARCH_MAP = {"aarch64": "arm64"}
def parse_filename(filename):
match = _filename_re.match(filename)
if match is None:
return
version, triple = match.groups()
if triple.endswith("-full"):
triple = triple[:-5]
match = _suffix_re.match(triple)
if match is not None:
triple, suffix = match.groups()
else:
suffix = None
return (version, triple, suffix)
def normalize_triple(triple):
if "-musl" in triple or "-static" in triple:
logging.debug("Skipping %r: unknown triple", triple)
return
triple = SPECIAL_TRIPLES.get(triple, triple)
pieces = triple.split("-")
try:
arch = pieces[0]
# Normalize
arch = ARCH_MAP.get(arch, arch)
platform = pieces[2]
except IndexError:
logging.debug("Skipping %r: unknown triple", triple)
return
return "%s-%s" % (arch, platform)
def read_sha256(session, url):
resp = session.get(url + ".sha256")
if not resp.ok:
return None
return resp.text.strip()
def sha256(path):
h = hashlib.sha256()
with open(path, "rb") as file:
while True:
# Reading is buffered, so we can read smaller chunks.
chunk = file.read(h.block_size)
if not chunk:
break
h.update(chunk)
return h.hexdigest()
def _sort_key(info):
triple, flavor, url = info
try:
pref = FLAVOR_PREFERENCES.index(flavor)
except ValueError:
pref = len(FLAVOR_PREFERENCES) + 1
return pref
def get_session() -> requests.Session:
session = requests.Session()
session.headers = HEADERS.copy()
token = os.environ.get("GITHUB_TOKEN")
if token:
session.headers["Authorization"] = "Bearer " + token
else:
logging.warning(
"An authentication token was not found at `GITHUB_TOKEN`, rate limits may be encountered.",
)
return session
def find(args):
"""
Find available Python versions and write metadata to a file.
"""
results = {}
session = get_session()
for page in range(1, 100):
logging.debug("Reading release page %s...", page)
resp = session.get("%s?page=%d" % (RELEASE_URL, page))
rows = resp.json()
if not rows:
break
for row in rows:
for asset in row["assets"]:
url = asset["browser_download_url"]
base_name = unquote(url.rsplit("/")[-1])
if base_name.endswith(".sha256"):
continue
info = parse_filename(base_name)
if info is None:
continue
py_ver, triple, flavor = info
if "-static" in triple or (flavor and "noopt" in flavor):
continue
triple = normalize_triple(triple)
if triple is None:
continue
results.setdefault(py_ver, []).append((triple, flavor, url))
cpython_results = {}
for py_ver, choices in results.items():
choices.sort(key=_sort_key)
urls = {}
for triple, flavor, url in choices:
triple = tuple(triple.split("-"))
if triple in urls:
continue
urls[triple] = url
cpython_results[tuple(map(int, py_ver.split(".")))] = urls
final_results = {}
for interpreter, py_ver, choices in sorted(
chain(
(("cpython",) + x for x in cpython_results.items()),
),
key=lambda x: x[:2],
reverse=True,
):
for (arch, platform), url in sorted(choices.items()):
key = "%s-%s.%s.%s-%s-%s" % (interpreter, *py_ver, platform, arch)
logging.info("Found %s", key)
sha256 = read_sha256(session, url)
final_results[key] = {
"name": interpreter,
"arch": arch,
"os": platform,
"major": py_ver[0],
"minor": py_ver[1],
"patch": py_ver[2],
"url": url,
"sha256": sha256,
}
VERSIONS_FILE.parent.mkdir(parents=True, exist_ok=True)
VERSIONS_FILE.write_text(json.dumps(final_results, indent=2))
def main():
parser = argparse.ArgumentParser(description="Fetch Python version metadata.")
parser.add_argument(
"-v",
"--verbose",
action="store_true",
help="Enable debug logging",
)
parser.add_argument(
"-q",
"--quiet",
action="store_true",
help="Disable logging",
)
args = parser.parse_args()
if args.quiet:
log_level = logging.CRITICAL
elif args.verbose:
log_level = logging.DEBUG
else:
log_level = logging.INFO
logging.basicConfig(
level=log_level,
format="%(asctime)s %(levelname)s %(message)s",
datefmt="%Y-%m-%d %H:%M:%S",
)
find(args)
if __name__ == "__main__":
main()