Add script for ecosystem wide checks of all rules and fixes (#4326)

* Add script for ecosystem wide checks of all rules and fixes

This adds my personal script for checking an entire checkout of ~2.1k packages for
panics, autofix errors and similar problems. It's not really meant to be used by anybody else but i thought it's better if it lives in the repo than if it doesn't.

For reference, this is the current output of failing autofixes: https://gist.github.com/konstin/c3fada0135af6cacec74f166adf87a00. Trimmed down to the useful information: https://gist.github.com/konstin/c864f4c300c7903a24fdda49635c5da9

* Keep github template intact

* Remove the need for ripgrep

* sort output
This commit is contained in:
konstin 2023-05-22 15:23:25 +02:00 committed by GitHub
parent cbe344f4d5
commit 550b643e33
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 124 additions and 3 deletions

1
.gitignore vendored
View file

@ -5,6 +5,7 @@ mkdocs.yml
.overrides
ruff-old
github_search*.jsonl
.venv*
###
# Rust.gitignore

View file

@ -0,0 +1,84 @@
"""This is @konstin's scripts for checking an entire checkout of ~2.1k packages for
panics, autofix errors and similar problems.
It's a less elaborate, more hacky version of check_ecosystem.py
"""
import json
import subprocess
from pathlib import Path
from subprocess import CalledProcessError
from typing import NamedTuple, Optional
from tqdm import tqdm
class Repository(NamedTuple):
"""A GitHub repository at a specific ref."""
org: str
repo: str
ref: Optional[str]
def main() -> None:
checkouts = Path("checkouts")
out_dir = Path("ecosystem_fix_all_results")
github_search_json = Path("github_search.jsonl")
# Somehow it doesn't like plain ruff
ruff = Path.cwd().joinpath("ruff")
out_dir.mkdir(parents=True, exist_ok=True)
repositories = []
for line in github_search_json.read_text().splitlines():
item = json.loads(line)
# Pick only the easier case for now.
if item["path"] != "pyproject.toml":
continue
repositories.append(
Repository(
item["owner"],
item["repo"],
item.get("ref"),
),
)
successes = 0
errors = 0
for repository in tqdm(repositories):
project_dir = checkouts.joinpath(repository.org).joinpath(repository.repo)
if not project_dir.is_dir():
tqdm.write(f"Missing {project_dir}")
errors += 1
continue
try:
ruff_args = [
"check",
"--no-cache",
"--exit-zero",
"--select",
"ALL",
"--fix",
]
output = subprocess.run(
[ruff, *ruff_args, "."],
cwd=project_dir,
capture_output=True,
text=True,
)
except CalledProcessError as e:
tqdm.write(f"Ruff failed on {project_dir}: {e}")
errors += 1
continue
org_repo = f"{repository.org}:{repository.repo}"
out_dir.joinpath(f"{org_repo}.stdout.txt").write_text(output.stdout)
out_dir.joinpath(f"{org_repo}.stderr.txt").write_text(output.stderr)
successes += 1
print(f"Success: {successes} Error {errors}")
if __name__ == "__main__":
main()

View file

@ -0,0 +1,26 @@
#!/bin/bash
# This is @konstin's setup for checking an entire checkout of ~2.1k packages for
# panics, autofix errors and similar problems.
#
# We put this in a docker container because processing random scraped code from GitHub is
# [kinda dangerous](https://moyix.blogspot.com/2022/09/someones-been-messing-with-my-subnormals.html)
# https://stackoverflow.com/a/246128/3549270
SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
time docker run --rm -it \
-w /app \
-v "${SCRIPT_DIR}/../target/checkouts:/app/checkouts" \
-v "${SCRIPT_DIR}/../target/ecosystem_fix_all_results:/app/ecosystem_fix_all_results" \
-v "${SCRIPT_DIR}/../target/release/ruff:/app/ruff" \
-v "${SCRIPT_DIR}/../ecosystem_all.py:/app/ecosystem_all.py" \
-v "${SCRIPT_DIR}/../github_search.jsonl:/app/github_search.jsonl" \
-v "${SCRIPT_DIR}/../.venv-3.11:/app/.venv" \
-v "${SCRIPT_DIR}/ecosystem_fix_all_check_entrypoint.sh:/app/ecosystem_fix_all_check_entrypoint.sh" \
-v "${SCRIPT_DIR}/ecosystem_fix_all_check.py:/app/ecosystem_fix_all_check.py" \
python:3.11 ./ecosystem_fix_all_check_entrypoint.sh
# grep the autofix errors
grep -R "the rule codes" "${SCRIPT_DIR}/../target/ecosystem_fix_all_results" | sort > "${SCRIPT_DIR}/../target/autofix-errors.txt"
# Make sure we didn't have an early exit
echo "Done"

View file

@ -0,0 +1,9 @@
#!/bin/bash
# Wrapper for ecosystem_fix_all_check.py
if [ ! -d ".venv/bin" ]; then
python -m venv .venv
.venv/bin/pip install tqdm
fi
.venv/bin/python ecosystem_fix_all_check.py

View file

@ -21,13 +21,13 @@ VERSIONS: list[tuple[int, int]] = [
]
class FakeConfig: # noqa: D101
class FakeConfig:
intersphinx_timeout = None
tls_verify = True
user_agent = ""
class FakeApp: # noqa: D101
class FakeApp:
srcdir = ""
config = FakeConfig()

View file

@ -12,6 +12,7 @@ line-length = 88
select = ["ALL"]
ignore = [
"C901", # McCabe complexity
"D", # pydocstyle
"PL", # pylint
"S", # bandit
"G", # flake8-logging

View file

@ -47,7 +47,7 @@ pub(crate) static CONFUSABLES: Lazy<FxHashMap<u32, u8>> = Lazy::new(|| {
return prelude + "\n".join(tuples) + postlude
def main() -> None: # noqa: D103
def main() -> None:
print("Retrieving data...")
mapping_data = get_mapping_data()
formatted_data = format_confusables_rs(mapping_data)