Add additional version filter to mirror script. (#10055)

## Summary

Adds regular expression based version filter to python mirror script.

## Test Plan

Manually using `uv run ./scripts/create-python-mirror.py --name cpython
--arch x86_64 --os linux --version "3.13.\d+$"`
This commit is contained in:
bw513 2024-12-20 18:50:59 +00:00 committed by GitHub
parent cf14a62de7
commit 2ca5c2ba70
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -18,6 +18,7 @@ import asyncio
import hashlib import hashlib
import json import json
import logging import logging
import re
from pathlib import Path from pathlib import Path
from typing import Dict, List, Optional, Set, Tuple from typing import Dict, List, Optional, Set, Tuple
from urllib.parse import unquote from urllib.parse import unquote
@ -98,8 +99,20 @@ def check_arch(entry, arch):
return False return False
def match_version(entry, pattern):
"""Checks whether pattern matches against the entries version."""
vers = f"{entry['major']}.{entry['minor']}.{entry['patch']}"
if entry["prerelease"] != "":
vers += f"-{entry['prerelease']}"
return pattern.match(vers) is not None
def filter_metadata( def filter_metadata(
metadata: List[Dict], name: Optional[str], arch: Optional[str], os: Optional[str] metadata: List[Dict],
name: Optional[str],
arch: Optional[str],
os: Optional[str],
version: Optional[re.Pattern],
) -> List[Dict]: ) -> List[Dict]:
"""Filter the metadata based on name, architecture, and OS, ensuring unique URLs.""" """Filter the metadata based on name, architecture, and OS, ensuring unique URLs."""
filtered = [ filtered = [
@ -108,6 +121,7 @@ def filter_metadata(
if (not name or entry["name"] == name) if (not name or entry["name"] == name)
and (not arch or check_arch(entry["arch"], arch)) and (not arch or check_arch(entry["arch"], arch))
and (not os or entry["os"] == os) and (not os or entry["os"] == os)
and (not version or match_version(entry, version))
] ]
# Use a set to ensure unique URLs # Use a set to ensure unique URLs
unique_urls = set() unique_urls = set()
@ -214,6 +228,9 @@ def parse_arguments():
parser.add_argument("--name", help="Filter by name (e.g., 'cpython').") parser.add_argument("--name", help="Filter by name (e.g., 'cpython').")
parser.add_argument("--arch", help="Filter by architecture (e.g., 'aarch64').") parser.add_argument("--arch", help="Filter by architecture (e.g., 'aarch64').")
parser.add_argument("--os", help="Filter by operating system (e.g., 'darwin').") parser.add_argument("--os", help="Filter by operating system (e.g., 'darwin').")
parser.add_argument(
"--version", help="Filter version by regex (e.g., '3.13.\\d+$')."
)
parser.add_argument( parser.add_argument(
"--max-concurrent", "--max-concurrent",
type=int, type=int,
@ -243,7 +260,10 @@ def main():
with open(VERSIONS_FILE) as f: with open(VERSIONS_FILE) as f:
metadata = list(json.load(f).values()) metadata = list(json.load(f).values())
filtered_metadata = filter_metadata(metadata, args.name, args.arch, args.os) version = re.compile(args.version) if args.version else None
filtered_metadata = filter_metadata(
metadata, args.name, args.arch, args.os, version
)
urls = {(entry["url"], entry["sha256"]) for entry in filtered_metadata} urls = {(entry["url"], entry["sha256"]) for entry in filtered_metadata}
if not urls: if not urls: