mirror of
https://github.com/astral-sh/uv.git
synced 2025-07-07 21:35:00 +00:00
Improvements to the Python metadata fetch script (#4780)
This fell out of my investigation of https://github.com/astral-sh/uv/issues/4774 but the bug was fixed by the reporter in #4775 - Adds support for `GH_TOKEN` authentication again — basically needed to avoid rate limits when hacking on this. - Clarifies some handling and logging of flavors
This commit is contained in:
parent
81442f0b4c
commit
c0875fd8fe
1 changed files with 37 additions and 18 deletions
|
@ -19,6 +19,7 @@ import hashlib
|
||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
import re
|
import re
|
||||||
|
import os
|
||||||
import urllib.error
|
import urllib.error
|
||||||
import urllib.request
|
import urllib.request
|
||||||
from itertools import chain
|
from itertools import chain
|
||||||
|
@ -65,7 +66,7 @@ _filename_re = re.compile(
|
||||||
$
|
$
|
||||||
"""
|
"""
|
||||||
)
|
)
|
||||||
_suffix_re = re.compile(
|
_flavor_re = re.compile(
|
||||||
r"""(?x)^(.*?)-(%s)$"""
|
r"""(?x)^(.*?)-(%s)$"""
|
||||||
% (
|
% (
|
||||||
"|".join(
|
"|".join(
|
||||||
|
@ -91,12 +92,13 @@ def parse_filename(filename):
|
||||||
version, triple = match.groups()
|
version, triple = match.groups()
|
||||||
if triple.endswith("-full"):
|
if triple.endswith("-full"):
|
||||||
triple = triple[:-5]
|
triple = triple[:-5]
|
||||||
match = _suffix_re.match(triple)
|
match = _flavor_re.match(triple)
|
||||||
if match is not None:
|
if match is not None:
|
||||||
triple, suffix = match.groups()
|
triple, flavor = match.groups()
|
||||||
else:
|
else:
|
||||||
suffix = None
|
flavor = None
|
||||||
return (version, triple, suffix)
|
|
||||||
|
return (version, triple, flavor)
|
||||||
|
|
||||||
|
|
||||||
def normalize_triple(triple):
|
def normalize_triple(triple):
|
||||||
|
@ -132,7 +134,7 @@ def normalize_os(os):
|
||||||
|
|
||||||
def read_sha256(url):
|
def read_sha256(url):
|
||||||
try:
|
try:
|
||||||
resp = urllib.request.urlopen(url + ".sha256")
|
resp = request(url + ".sha256")
|
||||||
except urllib.error.HTTPError:
|
except urllib.error.HTTPError:
|
||||||
return None
|
return None
|
||||||
assert resp.status == 200
|
assert resp.status == 200
|
||||||
|
@ -153,8 +155,9 @@ def sha256(path):
|
||||||
return h.hexdigest()
|
return h.hexdigest()
|
||||||
|
|
||||||
|
|
||||||
def _sort_by_flavor_preference(info):
|
def _get_flavor_priority(flavor):
|
||||||
_triple, flavor, _url = info
|
"""
|
||||||
|
Returns the priority of a flavor. Lower is better."""
|
||||||
try:
|
try:
|
||||||
pref = FLAVOR_PREFERENCES.index(flavor)
|
pref = FLAVOR_PREFERENCES.index(flavor)
|
||||||
except ValueError:
|
except ValueError:
|
||||||
|
@ -167,6 +170,14 @@ def _sort_by_interpreter_and_version(info):
|
||||||
return (interpreter, version_tuple)
|
return (interpreter, version_tuple)
|
||||||
|
|
||||||
|
|
||||||
|
def request(url):
|
||||||
|
request = urllib.request.Request(url)
|
||||||
|
token = os.getenv("GH_TOKEN")
|
||||||
|
if token:
|
||||||
|
request.add_header("Authorization", "Bearer: {token}")
|
||||||
|
return urllib.request.urlopen(request)
|
||||||
|
|
||||||
|
|
||||||
def find():
|
def find():
|
||||||
"""
|
"""
|
||||||
Find available Python versions and write metadata to a file.
|
Find available Python versions and write metadata to a file.
|
||||||
|
@ -176,7 +187,7 @@ def find():
|
||||||
# Collect all available Python downloads
|
# Collect all available Python downloads
|
||||||
for page in range(1, 100):
|
for page in range(1, 100):
|
||||||
logging.debug("Reading release page %s...", page)
|
logging.debug("Reading release page %s...", page)
|
||||||
resp = urllib.request.urlopen("%s?page=%d" % (RELEASE_URL, page))
|
resp = request("%s?page=%d" % (RELEASE_URL, page))
|
||||||
rows = json.loads(resp.read())
|
rows = json.loads(resp.read())
|
||||||
if not rows:
|
if not rows:
|
||||||
break
|
break
|
||||||
|
@ -194,6 +205,7 @@ def find():
|
||||||
continue
|
continue
|
||||||
triple = normalize_triple(triple)
|
triple = normalize_triple(triple)
|
||||||
if triple is None:
|
if triple is None:
|
||||||
|
logging.debug("Skipping %s: unsupported triple", url)
|
||||||
continue
|
continue
|
||||||
results.setdefault(py_ver, []).append((triple, flavor, url))
|
results.setdefault(py_ver, []).append((triple, flavor, url))
|
||||||
|
|
||||||
|
@ -201,13 +213,21 @@ def find():
|
||||||
cpython_results: dict[tuple[int, int, int], dict[tuple[str, str, str], str]] = {}
|
cpython_results: dict[tuple[int, int, int], dict[tuple[str, str, str], str]] = {}
|
||||||
for py_ver, choices in results.items():
|
for py_ver, choices in results.items():
|
||||||
urls = {}
|
urls = {}
|
||||||
for triple, flavor, url in sorted(choices, key=_sort_by_flavor_preference):
|
for triple, flavor, url in choices:
|
||||||
triple = tuple(triple.split("-"))
|
triple = tuple(triple.split("-"))
|
||||||
# Skip existing triples, preferring the first flavor
|
priority = _get_flavor_priority(flavor)
|
||||||
if triple in urls:
|
existing = urls.get(triple)
|
||||||
continue
|
if existing:
|
||||||
urls[triple] = url
|
_, _, existing_priority = existing
|
||||||
cpython_results[tuple(map(int, py_ver.split(".")))] = urls
|
# Skip if we have a flavor with higher priority already
|
||||||
|
if priority >= existing_priority:
|
||||||
|
continue
|
||||||
|
urls[triple] = (url, flavor, priority)
|
||||||
|
|
||||||
|
# Drop the priorities
|
||||||
|
cpython_results[tuple(map(int, py_ver.split(".")))] = {
|
||||||
|
triple: (url, flavor) for triple, (url, flavor, _) in urls.items()
|
||||||
|
}
|
||||||
|
|
||||||
# Collect variants across interpreter kinds
|
# Collect variants across interpreter kinds
|
||||||
# TODO(zanieb): Note we only support CPython downloads at this time
|
# TODO(zanieb): Note we only support CPython downloads at this time
|
||||||
|
@ -223,7 +243,7 @@ def find():
|
||||||
):
|
):
|
||||||
# Sort by the remaining information for determinism
|
# Sort by the remaining information for determinism
|
||||||
# This groups download metadata in triple component order
|
# This groups download metadata in triple component order
|
||||||
for (arch, operating_system, libc), url in sorted(choices.items()):
|
for (arch, operating_system, libc), (url, flavor) in sorted(choices.items()):
|
||||||
key = "%s-%s.%s.%s-%s-%s-%s" % (
|
key = "%s-%s.%s.%s-%s-%s-%s" % (
|
||||||
interpreter,
|
interpreter,
|
||||||
*py_ver,
|
*py_ver,
|
||||||
|
@ -231,9 +251,8 @@ def find():
|
||||||
arch,
|
arch,
|
||||||
libc,
|
libc,
|
||||||
)
|
)
|
||||||
logging.info("Found %s", key)
|
logging.info("Found %s (%s)", key, flavor)
|
||||||
sha256 = read_sha256(url)
|
sha256 = read_sha256(url)
|
||||||
|
|
||||||
final_results[key] = {
|
final_results[key] = {
|
||||||
"name": interpreter,
|
"name": interpreter,
|
||||||
"arch": arch,
|
"arch": arch,
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue