Add pypi 10k packages with most dependents dataset (#711)

From manual inspection, this dataset generated through the [libraries.io
API](https://libraries.io/api#project-search) seems more mainstream than
the current 8k one, which is also preserved. I've added the dataset to
the repo because the API requires an API key.
This commit is contained in:
konsti 2023-12-24 19:31:52 +01:00 committed by GitHub
parent 5bce699ee1
commit e23292641f
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
11 changed files with 315 additions and 23 deletions

2
scripts/popular_packages/.gitignore vendored Normal file
View file

@ -0,0 +1,2 @@
pypi_8k_downloads.txt
pypi_10k_most_dependents.txt

View file

@ -0,0 +1,86 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "initial_id",
"metadata": {},
"outputs": [],
"source": [
"\"\"\"To update `pypi_10k_most_dependents.txt`, enter your `api_key` from https://libraries.io/account.\n",
"\n",
"The latest version is available at: https://gist.github.com/charliermarsh/07afd9f543dfea68408a4a42cede4be4.\n",
"\"\"\"\n",
"\n",
"import httpx\n",
"\n",
"from pathlib import Path\n",
"\n",
"api_key = \"\"\n",
"responses = {}"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d2532bf8c426af5",
"metadata": {
"collapsed": false,
"jupyter": {
"outputs_hidden": false
}
},
"outputs": [],
"source": [
"for i in range(100): # 100 pages with 100 per page -> 10k\n",
" print(i)\n",
" if not i in responses:\n",
" # https://libraries.io/api#project-search\n",
" sort = \"dependents_count\"\n",
" url = f\"https://libraries.io/api/search?platforms=Pypi&per_page=100&page={i+1}&sort{sort}&api_key={api_key}\"\n",
" responses[i] = httpx.get(url, timeout=30.0).json()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "7bc80702b6f8ebc3",
"metadata": {
"collapsed": false,
"jupyter": {
"outputs_hidden": false
}
},
"outputs": [],
"source": [
"flat_list = []\n",
"for response in responses.values():\n",
" for entry in response:\n",
" flat_list.append(entry[\"name\"])\n",
"print(flat_list)\n",
"Path().parent.joinpath(\"pypi_10k_most_dependents.txt\").write_text(\"\\n\".join(flat_list))"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View file

@ -1,4 +1,4 @@
#!/usr/bin/env bash
curl https://hugovk.github.io/top-pypi-packages/top-pypi-packages-30-days.min.json | jq -r ".rows | .[].project" > pypi_top_8k_flat.txt
curl https://hugovk.github.io/top-pypi-packages/top-pypi-packages-30-days.min.json | jq -r ".rows | .[].project" > pypi_8k_downloads.txt

View file

@ -1 +0,0 @@
pypi_top_8k_flat.txt