mirror of
https://github.com/python/cpython.git
synced 2025-11-01 18:51:43 +00:00
bpo-40334: PEP 617 implementation: New PEG parser for CPython (GH-19503)
Co-authored-by: Guido van Rossum <guido@python.org> Co-authored-by: Lysandros Nikolaou <lisandrosnik@gmail.com>
This commit is contained in:
parent
a81849b031
commit
c5fc156852
91 changed files with 27057 additions and 146 deletions
86
Tools/peg_generator/scripts/download_pypi_packages.py
Executable file
86
Tools/peg_generator/scripts/download_pypi_packages.py
Executable file
|
|
@ -0,0 +1,86 @@
|
|||
#!/usr/bin/env python3.8
|
||||
|
||||
import argparse
|
||||
import os
|
||||
import json
|
||||
|
||||
from typing import Dict, Any
|
||||
from urllib.request import urlretrieve
|
||||
|
||||
argparser = argparse.ArgumentParser(
|
||||
prog="download_pypi_packages", description="Helper program to download PyPI packages",
|
||||
)
|
||||
argparser.add_argument(
|
||||
"-n", "--number", type=int, default=100, help="Number of packages to download"
|
||||
)
|
||||
argparser.add_argument(
|
||||
"-a", "--all", action="store_true", help="Download all packages listed in the json file"
|
||||
)
|
||||
|
||||
|
||||
def load_json(filename: str) -> Dict[Any, Any]:
|
||||
with open(os.path.join("data", f"{filename}.json"), "r") as f:
|
||||
j = json.loads(f.read())
|
||||
return j
|
||||
|
||||
|
||||
def remove_json(filename: str) -> None:
|
||||
path = os.path.join("data", f"{filename}.json")
|
||||
os.remove(path)
|
||||
|
||||
|
||||
def download_package_json(package_name: str) -> None:
|
||||
url = f"https://pypi.org/pypi/{package_name}/json"
|
||||
urlretrieve(url, os.path.join("data", f"{package_name}.json"))
|
||||
|
||||
|
||||
def download_package_code(name: str, package_json: Dict[Any, Any]) -> None:
|
||||
source_index = -1
|
||||
for idx, url_info in enumerate(package_json["urls"]):
|
||||
if url_info["python_version"] == "source":
|
||||
source_index = idx
|
||||
break
|
||||
filename = package_json["urls"][source_index]["filename"]
|
||||
url = package_json["urls"][source_index]["url"]
|
||||
urlretrieve(url, os.path.join("data", "pypi", filename))
|
||||
|
||||
|
||||
def main() -> None:
|
||||
args = argparser.parse_args()
|
||||
number_packages = args.number
|
||||
all_packages = args.all
|
||||
|
||||
top_pypi_packages = load_json("top-pypi-packages-365-days")
|
||||
if all_packages:
|
||||
top_pypi_packages = top_pypi_packages["rows"]
|
||||
elif number_packages >= 0 and number_packages <= 4000:
|
||||
top_pypi_packages = top_pypi_packages["rows"][:number_packages]
|
||||
else:
|
||||
raise AssertionError("Unknown value for NUMBER_OF_PACKAGES")
|
||||
|
||||
try:
|
||||
os.mkdir(os.path.join("data", "pypi"))
|
||||
except FileExistsError:
|
||||
pass
|
||||
|
||||
for package in top_pypi_packages:
|
||||
package_name = package["project"]
|
||||
|
||||
print(f"Downloading JSON Data for {package_name}... ", end="")
|
||||
download_package_json(package_name)
|
||||
print("Done")
|
||||
|
||||
package_json = load_json(package_name)
|
||||
try:
|
||||
print(f"Dowloading and compressing package {package_name} ... ", end="")
|
||||
download_package_code(package_name, package_json)
|
||||
print("Done")
|
||||
except (IndexError, KeyError):
|
||||
print(f"Could not locate source for {package_name}")
|
||||
continue
|
||||
finally:
|
||||
remove_json(package_name)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Loading…
Add table
Add a link
Reference in a new issue