mirror of
https://github.com/astral-sh/uv.git
synced 2025-07-07 21:35:00 +00:00
add helper script to download the needed files to mirror for UV_PYTHON_INSTALL_MIRROR (#8548)
Some checks are pending
CI / check system | python on macos x86_64 (push) Blocked by required conditions
CI / cargo clippy | ubuntu (push) Blocked by required conditions
CI / Determine changes (push) Waiting to run
CI / lint (push) Waiting to run
CI / cargo clippy | windows (push) Blocked by required conditions
CI / cargo dev generate-all (push) Blocked by required conditions
CI / cargo shear (push) Waiting to run
CI / cargo test | ubuntu (push) Blocked by required conditions
CI / cargo test | macos (push) Blocked by required conditions
CI / cargo test | windows (push) Blocked by required conditions
CI / check windows trampoline | aarch64 (push) Blocked by required conditions
CI / check windows trampoline | i686 (push) Blocked by required conditions
CI / check windows trampoline | x86_64 (push) Blocked by required conditions
CI / test windows trampoline | i686 (push) Blocked by required conditions
CI / test windows trampoline | x86_64 (push) Blocked by required conditions
CI / typos (push) Waiting to run
CI / mkdocs (push) Waiting to run
CI / build binary | linux (push) Blocked by required conditions
CI / build binary | macos aarch64 (push) Blocked by required conditions
CI / build binary | macos x86_64 (push) Blocked by required conditions
CI / build binary | windows (push) Blocked by required conditions
CI / cargo build (msrv) (push) Blocked by required conditions
CI / build binary | freebsd (push) Blocked by required conditions
CI / ecosystem test | prefecthq/prefect (push) Blocked by required conditions
CI / ecosystem test | pallets/flask (push) Blocked by required conditions
CI / integration test | conda on ubuntu (push) Blocked by required conditions
CI / integration test | free-threaded on linux (push) Blocked by required conditions
CI / integration test | free-threaded on windows (push) Blocked by required conditions
CI / integration test | pypy on ubuntu (push) Blocked by required conditions
CI / integration test | pypy on windows (push) Blocked by required conditions
CI / integration test | graalpy on ubuntu (push) Blocked by required conditions
CI / integration test | graalpy on windows (push) Blocked by required conditions
CI / integration test | github actions (push) Blocked by required conditions
CI / integration test | determine publish changes (push) Blocked by required conditions
CI / integration test | uv publish (push) Blocked by required conditions
CI / check cache | ubuntu (push) Blocked by required conditions
CI / check cache | macos aarch64 (push) Blocked by required conditions
CI / check system | python on debian (push) Blocked by required conditions
CI / check system | python on fedora (push) Blocked by required conditions
CI / check system | python on ubuntu (push) Blocked by required conditions
CI / check system | python on opensuse (push) Blocked by required conditions
CI / check system | homebrew python on macos aarch64 (push) Blocked by required conditions
CI / check system | python on rocky linux 8 (push) Blocked by required conditions
CI / check system | python on rocky linux 9 (push) Blocked by required conditions
CI / check system | pypy on ubuntu (push) Blocked by required conditions
CI / check system | pyston (push) Blocked by required conditions
CI / check system | alpine (push) Blocked by required conditions
CI / check system | python on macos aarch64 (push) Blocked by required conditions
CI / check system | python3.10 on windows (push) Blocked by required conditions
CI / check system | python3.10 on windows x86 (push) Blocked by required conditions
CI / check system | python3.13 on windows (push) Blocked by required conditions
CI / check system | python3.12 via chocolatey (push) Blocked by required conditions
CI / check system | python3.9 via pyenv (push) Blocked by required conditions
CI / check system | python3.13 (push) Blocked by required conditions
CI / check system | conda3.11 on linux (push) Blocked by required conditions
CI / check system | conda3.8 on linux (push) Blocked by required conditions
CI / check system | conda3.11 on macos (push) Blocked by required conditions
CI / check system | conda3.8 on macos (push) Blocked by required conditions
CI / check system | conda3.11 on windows (push) Blocked by required conditions
CI / check system | conda3.8 on windows (push) Blocked by required conditions
CI / check system | amazonlinux (push) Blocked by required conditions
CI / check system | embedded python3.10 on windows (push) Blocked by required conditions
CI / benchmarks (push) Blocked by required conditions
Some checks are pending
CI / check system | python on macos x86_64 (push) Blocked by required conditions
CI / cargo clippy | ubuntu (push) Blocked by required conditions
CI / Determine changes (push) Waiting to run
CI / lint (push) Waiting to run
CI / cargo clippy | windows (push) Blocked by required conditions
CI / cargo dev generate-all (push) Blocked by required conditions
CI / cargo shear (push) Waiting to run
CI / cargo test | ubuntu (push) Blocked by required conditions
CI / cargo test | macos (push) Blocked by required conditions
CI / cargo test | windows (push) Blocked by required conditions
CI / check windows trampoline | aarch64 (push) Blocked by required conditions
CI / check windows trampoline | i686 (push) Blocked by required conditions
CI / check windows trampoline | x86_64 (push) Blocked by required conditions
CI / test windows trampoline | i686 (push) Blocked by required conditions
CI / test windows trampoline | x86_64 (push) Blocked by required conditions
CI / typos (push) Waiting to run
CI / mkdocs (push) Waiting to run
CI / build binary | linux (push) Blocked by required conditions
CI / build binary | macos aarch64 (push) Blocked by required conditions
CI / build binary | macos x86_64 (push) Blocked by required conditions
CI / build binary | windows (push) Blocked by required conditions
CI / cargo build (msrv) (push) Blocked by required conditions
CI / build binary | freebsd (push) Blocked by required conditions
CI / ecosystem test | prefecthq/prefect (push) Blocked by required conditions
CI / ecosystem test | pallets/flask (push) Blocked by required conditions
CI / integration test | conda on ubuntu (push) Blocked by required conditions
CI / integration test | free-threaded on linux (push) Blocked by required conditions
CI / integration test | free-threaded on windows (push) Blocked by required conditions
CI / integration test | pypy on ubuntu (push) Blocked by required conditions
CI / integration test | pypy on windows (push) Blocked by required conditions
CI / integration test | graalpy on ubuntu (push) Blocked by required conditions
CI / integration test | graalpy on windows (push) Blocked by required conditions
CI / integration test | github actions (push) Blocked by required conditions
CI / integration test | determine publish changes (push) Blocked by required conditions
CI / integration test | uv publish (push) Blocked by required conditions
CI / check cache | ubuntu (push) Blocked by required conditions
CI / check cache | macos aarch64 (push) Blocked by required conditions
CI / check system | python on debian (push) Blocked by required conditions
CI / check system | python on fedora (push) Blocked by required conditions
CI / check system | python on ubuntu (push) Blocked by required conditions
CI / check system | python on opensuse (push) Blocked by required conditions
CI / check system | homebrew python on macos aarch64 (push) Blocked by required conditions
CI / check system | python on rocky linux 8 (push) Blocked by required conditions
CI / check system | python on rocky linux 9 (push) Blocked by required conditions
CI / check system | pypy on ubuntu (push) Blocked by required conditions
CI / check system | pyston (push) Blocked by required conditions
CI / check system | alpine (push) Blocked by required conditions
CI / check system | python on macos aarch64 (push) Blocked by required conditions
CI / check system | python3.10 on windows (push) Blocked by required conditions
CI / check system | python3.10 on windows x86 (push) Blocked by required conditions
CI / check system | python3.13 on windows (push) Blocked by required conditions
CI / check system | python3.12 via chocolatey (push) Blocked by required conditions
CI / check system | python3.9 via pyenv (push) Blocked by required conditions
CI / check system | python3.13 (push) Blocked by required conditions
CI / check system | conda3.11 on linux (push) Blocked by required conditions
CI / check system | conda3.8 on linux (push) Blocked by required conditions
CI / check system | conda3.11 on macos (push) Blocked by required conditions
CI / check system | conda3.8 on macos (push) Blocked by required conditions
CI / check system | conda3.11 on windows (push) Blocked by required conditions
CI / check system | conda3.8 on windows (push) Blocked by required conditions
CI / check system | amazonlinux (push) Blocked by required conditions
CI / check system | embedded python3.10 on windows (push) Blocked by required conditions
CI / benchmarks (push) Blocked by required conditions
## Summary I added `crates/uv-python/create-mirror.py` to make it easy to download all the needed files to create a mirror for Python distributions in an offline environment. the script also has an option to iterate over the git history of the `download-metadata.json` to make sure we have all the files needed for all the uv versions ## Test Plan ``` uv run create-mirror.py --from-all-history --os linux --arch x86_64 --name cpython 2024-10-25 01:31:12,973 - INFO - Starting download of 466 files. Downloading: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 466/466 [06:11<00:00, 1.26file/s] Successfully downloaded: 466 now you can run UV_PYTHON_INSTALL_MIRROR='file:///home/meitar/dev/uv/crates/uv-python/mirror' uv python install ``` then checked (the `unshare` command make sure that the process don't have any netwok) ``` UV_PYTHON_INSTALL_MIRROR=file:///home/meitar/dev/uv/crates/uv-python/mirror sudo -E unshare -n /home/meitar/.local/bin/uv python install 3.13 Searching for Python versions matching: Python 3.13 Installed Python 3.13.0 in 2.91s + cpython-3.13.0-linux-x86_64-gnu ``` --------- Co-authored-by: Charlie Marsh <charlie.r.marsh@gmail.com>
This commit is contained in:
parent
28d4ef35f9
commit
d2693dab87
1 changed files with 263 additions and 0 deletions
263
scripts/create-python-mirror.py
Normal file
263
scripts/create-python-mirror.py
Normal file
|
@ -0,0 +1,263 @@
|
|||
"""Create a mirror of Python distributions for use with uv.
|
||||
|
||||
Example usage:
|
||||
uv run ./scripts/create-python-mirror.py --name cpython --arch x86_64 --os linux
|
||||
"""
|
||||
|
||||
# /// script
|
||||
# requires-python = ">=3.8"
|
||||
# dependencies = [
|
||||
# "gitpython",
|
||||
# "httpx",
|
||||
# "tqdm",
|
||||
# ]
|
||||
# ///
|
||||
|
||||
import argparse
|
||||
import asyncio
|
||||
import hashlib
|
||||
import json
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Optional, Set, Tuple
|
||||
from urllib.parse import unquote
|
||||
|
||||
import httpx
|
||||
from git import GitCommandError, Repo
|
||||
from tqdm import tqdm
|
||||
|
||||
SELF_DIR = Path(__file__).parent
|
||||
REPO_ROOT = SELF_DIR.parent
|
||||
VERSIONS_FILE = REPO_ROOT / "crates" / "uv-python" / "download-metadata.json"
|
||||
PREFIXES = [
|
||||
"https://github.com/indygreg/python-build-standalone/releases/download/",
|
||||
"https://downloads.python.org/pypy/",
|
||||
]
|
||||
|
||||
|
||||
logging.basicConfig(
|
||||
level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
|
||||
)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
logging.getLogger("httpx").setLevel(logging.WARNING)
|
||||
logging.getLogger("httpcore").setLevel(logging.WARNING)
|
||||
|
||||
|
||||
def sanitize_url(url: str) -> Path:
|
||||
"""Remove the prefix from the URL, decode it, and convert it to a relative path."""
|
||||
for prefix in PREFIXES:
|
||||
if url.startswith(prefix):
|
||||
return Path(unquote(url[len(prefix) :])) # Decode the URL path
|
||||
return Path(unquote(url)) # Fallback to full decoded path if no prefix matched
|
||||
|
||||
|
||||
def sha256_checksum(file_path: Path) -> str:
|
||||
"""Calculate the SHA-256 checksum of a file."""
|
||||
hasher = hashlib.sha256()
|
||||
with open(file_path, "rb") as f:
|
||||
for chunk in iter(lambda: f.read(8192), b""):
|
||||
hasher.update(chunk)
|
||||
return hasher.hexdigest()
|
||||
|
||||
|
||||
def collect_metadata_from_git_history() -> List[Dict]:
|
||||
"""Collect all metadata entries from the history of the VERSIONS_FILE."""
|
||||
metadata = []
|
||||
try:
|
||||
repo = Repo(REPO_ROOT, search_parent_directories=True)
|
||||
|
||||
for commit in repo.iter_commits(paths=VERSIONS_FILE):
|
||||
try:
|
||||
# Ensure the file exists in the commit tree
|
||||
blob = commit.tree / str(VERSIONS_FILE.relative_to(REPO_ROOT))
|
||||
content = blob.data_stream.read().decode()
|
||||
data = json.loads(content)
|
||||
metadata.extend(data.values())
|
||||
except KeyError:
|
||||
logger.warning(
|
||||
f"File {VERSIONS_FILE} not found in commit {commit.hexsha}. Skipping."
|
||||
)
|
||||
except json.JSONDecodeError as e:
|
||||
logger.error(f"Error decoding JSON in commit {commit.hexsha}: {e}")
|
||||
|
||||
except GitCommandError as e:
|
||||
logger.error(f"Git command error: {e}")
|
||||
except Exception as e:
|
||||
logger.exception(f"Unexpected error while collecting metadata: {e}")
|
||||
|
||||
return metadata
|
||||
|
||||
|
||||
def filter_metadata(
|
||||
metadata: List[Dict], name: Optional[str], arch: Optional[str], os: Optional[str]
|
||||
) -> List[Dict]:
|
||||
"""Filter the metadata based on name, architecture, and OS, ensuring unique URLs."""
|
||||
filtered = [
|
||||
entry
|
||||
for entry in metadata
|
||||
if (not name or entry["name"] == name)
|
||||
and (not arch or entry["arch"] == arch)
|
||||
and (not os or entry["os"] == os)
|
||||
]
|
||||
# Use a set to ensure unique URLs
|
||||
unique_urls = set()
|
||||
unique_filtered = []
|
||||
for entry in filtered:
|
||||
if entry["url"] not in unique_urls:
|
||||
unique_urls.add(entry["url"])
|
||||
unique_filtered.append(entry)
|
||||
return unique_filtered
|
||||
|
||||
|
||||
async def download_file(
|
||||
client: httpx.AsyncClient,
|
||||
url: str,
|
||||
dest: Path,
|
||||
expected_sha256: Optional[str],
|
||||
progress_bar,
|
||||
errors,
|
||||
):
|
||||
"""Download a file and verify its SHA-256 checksum if provided."""
|
||||
if dest.exists() and expected_sha256 and sha256_checksum(dest) == expected_sha256:
|
||||
logger.debug(
|
||||
f"File {dest} already exists and SHA-256 matches. Skipping download."
|
||||
)
|
||||
progress_bar.update(1)
|
||||
return True # Success, even though skipped
|
||||
elif dest.exists() and expected_sha256 is None:
|
||||
logger.debug(
|
||||
f"File {dest} already exists no SHA-256 provided. Skipping download."
|
||||
)
|
||||
progress_bar.update(1)
|
||||
return True # Success, even though skipped
|
||||
|
||||
if not any(url.startswith(prefix) for prefix in PREFIXES):
|
||||
error_msg = f"No valid prefix found for {url}. Skipping."
|
||||
logger.warning(error_msg)
|
||||
errors.append((url, error_msg))
|
||||
progress_bar.update(1)
|
||||
return False
|
||||
|
||||
dest.parent.mkdir(parents=True, exist_ok=True)
|
||||
logger.debug(f"Downloading {url} to {dest}")
|
||||
|
||||
try:
|
||||
async with client.stream("GET", url) as response:
|
||||
response.raise_for_status()
|
||||
with open(dest, "wb") as f:
|
||||
async for chunk in response.aiter_bytes():
|
||||
f.write(chunk)
|
||||
|
||||
if expected_sha256 and sha256_checksum(dest) != expected_sha256:
|
||||
error_msg = f"SHA-256 mismatch for {dest}. Deleting corrupted file."
|
||||
logger.error(error_msg)
|
||||
dest.unlink()
|
||||
errors.append((url, "Checksum mismatch"))
|
||||
progress_bar.update(1)
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
error_msg = f"Failed to download {url}: {str(e)}"
|
||||
logger.error(error_msg)
|
||||
errors.append((url, str(e)))
|
||||
progress_bar.update(1)
|
||||
return False
|
||||
|
||||
progress_bar.update(1)
|
||||
return True
|
||||
|
||||
|
||||
async def download_files(
|
||||
urls: Set[Tuple[str, Optional[str]]], target: Path, max_concurrent: int
|
||||
):
|
||||
"""Download files with a limit on concurrent downloads using httpx."""
|
||||
async with httpx.AsyncClient(follow_redirects=True) as client:
|
||||
progress_bar = tqdm(total=len(urls), desc="Downloading", unit="file")
|
||||
sem = asyncio.Semaphore(max_concurrent)
|
||||
errors: List[Tuple[str, str]] = [] # To collect errors
|
||||
success_count = 0 # Track number of successful downloads
|
||||
|
||||
async def sem_download(url, sha256):
|
||||
nonlocal success_count
|
||||
async with sem:
|
||||
success = await download_file(
|
||||
client,
|
||||
url,
|
||||
target / sanitize_url(url),
|
||||
sha256,
|
||||
progress_bar,
|
||||
errors,
|
||||
)
|
||||
if success:
|
||||
success_count += 1
|
||||
|
||||
tasks = [sem_download(url, sha256) for url, sha256 in urls]
|
||||
await asyncio.gather(*tasks)
|
||||
progress_bar.close()
|
||||
|
||||
return success_count, errors
|
||||
|
||||
|
||||
def parse_arguments():
|
||||
"""Parse command-line arguments using argparse."""
|
||||
parser = argparse.ArgumentParser(description="Download and mirror Python builds.")
|
||||
parser.add_argument("--name", help="Filter by name (e.g., 'cpython').")
|
||||
parser.add_argument("--arch", help="Filter by architecture (e.g., 'aarch64').")
|
||||
parser.add_argument("--os", help="Filter by operating system (e.g., 'darwin').")
|
||||
parser.add_argument(
|
||||
"--max-concurrent",
|
||||
type=int,
|
||||
default=20,
|
||||
help="Maximum number of simultaneous downloads.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--from-all-history",
|
||||
action="store_true",
|
||||
help="Collect URLs from the entire git history.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--target",
|
||||
default=SELF_DIR / "mirror",
|
||||
help="Directory to store the downloaded files.",
|
||||
)
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def main():
|
||||
"""Main function to run the CLI."""
|
||||
args = parse_arguments()
|
||||
|
||||
if args.from_all_history:
|
||||
metadata = collect_metadata_from_git_history()
|
||||
else:
|
||||
with open(VERSIONS_FILE) as f:
|
||||
metadata = list(json.load(f).values())
|
||||
|
||||
filtered_metadata = filter_metadata(metadata, args.name, args.arch, args.os)
|
||||
urls = {(entry["url"], entry["sha256"]) for entry in filtered_metadata}
|
||||
|
||||
if not urls:
|
||||
logger.error("No URLs found.")
|
||||
return
|
||||
|
||||
target = Path(args.target)
|
||||
logger.info(f"Downloading {len(urls)} files to {target}...")
|
||||
try:
|
||||
success_count, errors = asyncio.run(
|
||||
download_files(urls, target, args.max_concurrent)
|
||||
)
|
||||
print(f"Successfully downloaded: {success_count} files.")
|
||||
if errors:
|
||||
print("Failed downloads:")
|
||||
for url, error in errors:
|
||||
print(f"- {url}: {error}")
|
||||
print(
|
||||
f"Example usage: `UV_PYTHON_INSTALL_MIRROR='file://{target.absolute()}' uv python install 3.13`"
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Error during download: {e}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
Loading…
Add table
Add a link
Reference in a new issue