mirror of
https://github.com/astral-sh/ruff.git
synced 2025-11-01 20:31:57 +00:00
Ecosystem CI: Allow storing checkouts locally (#4192)
* Ecosystem CI: Allow storing checkouts locally This adds a --checkouts options to (re)use a local directory instead of checkouts into a tempdir * Fix missing path conversion
This commit is contained in:
parent
3c2f41b615
commit
6a52577630
3 changed files with 114 additions and 63 deletions
2
.gitignore
vendored
2
.gitignore
vendored
|
|
@ -3,8 +3,8 @@
|
||||||
crates/ruff/resources/test/cpython
|
crates/ruff/resources/test/cpython
|
||||||
mkdocs.yml
|
mkdocs.yml
|
||||||
.overrides
|
.overrides
|
||||||
github_search.jsonl
|
|
||||||
ruff-old
|
ruff-old
|
||||||
|
github_search*.jsonl
|
||||||
|
|
||||||
###
|
###
|
||||||
# Rust.gitignore
|
# Rust.gitignore
|
||||||
|
|
|
||||||
|
|
@ -17,6 +17,12 @@
|
||||||
# docker buildx build -f scripts/Dockerfile.ecosystem -t ruff-ecosystem-checker --load .
|
# docker buildx build -f scripts/Dockerfile.ecosystem -t ruff-ecosystem-checker --load .
|
||||||
# docker run --rm -v ./target/x86_64-unknown-linux-musl/debug/ruff:/app/ruff-new -v ./ruff-old:/app/ruff-old ruff-ecosystem-checker
|
# docker run --rm -v ./target/x86_64-unknown-linux-musl/debug/ruff:/app/ruff-new -v ./ruff-old:/app/ruff-old ruff-ecosystem-checker
|
||||||
# ```
|
# ```
|
||||||
|
# You can customize this, e.g. cache the git checkouts and use a custom json file:
|
||||||
|
# ```
|
||||||
|
# docker run -v ./target/x86_64-unknown-linux-musl/debug/ruff:/app/ruff-new -v ./ruff-old:/app/ruff-old \
|
||||||
|
# -v ./target/checkouts:/app/checkouts -v ./github_search.jsonl:/app/github_search.jsonl \
|
||||||
|
# --rm ruff-ecosystem-checker python check_ecosystem.py -v ruff-new ruff-old --checkouts checkouts > output.txt
|
||||||
|
# ```
|
||||||
|
|
||||||
FROM python:3.11
|
FROM python:3.11
|
||||||
RUN mkdir /app
|
RUN mkdir /app
|
||||||
|
|
|
||||||
|
|
@ -14,8 +14,9 @@ import json
|
||||||
import logging
|
import logging
|
||||||
import re
|
import re
|
||||||
import tempfile
|
import tempfile
|
||||||
|
import time
|
||||||
from asyncio.subprocess import PIPE, create_subprocess_exec
|
from asyncio.subprocess import PIPE, create_subprocess_exec
|
||||||
from contextlib import asynccontextmanager
|
from contextlib import asynccontextmanager, nullcontext
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import TYPE_CHECKING, NamedTuple, Optional, Self
|
from typing import TYPE_CHECKING, NamedTuple, Optional, Self
|
||||||
|
|
||||||
|
|
@ -36,37 +37,41 @@ class Repository(NamedTuple):
|
||||||
exclude: str = ""
|
exclude: str = ""
|
||||||
|
|
||||||
@asynccontextmanager
|
@asynccontextmanager
|
||||||
async def clone(self: Self) -> "AsyncIterator[Path]":
|
async def clone(self: Self, checkout_dir: Path) -> "AsyncIterator[Path]":
|
||||||
"""Shallow clone this repository to a temporary directory."""
|
"""Shallow clone this repository to a temporary directory."""
|
||||||
with tempfile.TemporaryDirectory() as tmpdir:
|
if checkout_dir.exists():
|
||||||
logger.debug(f"Cloning {self.org}/{self.repo}")
|
logger.debug(f"Reusing {self.org}/{self.repo}")
|
||||||
git_command = [
|
yield Path(checkout_dir)
|
||||||
"git",
|
return
|
||||||
"clone",
|
|
||||||
"--config",
|
|
||||||
"advice.detachedHead=false",
|
|
||||||
"--quiet",
|
|
||||||
"--depth",
|
|
||||||
"1",
|
|
||||||
"--no-tags",
|
|
||||||
]
|
|
||||||
if self.ref:
|
|
||||||
git_command.extend(["--branch", self.ref])
|
|
||||||
|
|
||||||
git_command.extend(
|
logger.debug(f"Cloning {self.org}/{self.repo}")
|
||||||
[
|
git_command = [
|
||||||
f"https://github.com/{self.org}/{self.repo}",
|
"git",
|
||||||
tmpdir,
|
"clone",
|
||||||
],
|
"--config",
|
||||||
)
|
"advice.detachedHead=false",
|
||||||
|
"--quiet",
|
||||||
|
"--depth",
|
||||||
|
"1",
|
||||||
|
"--no-tags",
|
||||||
|
]
|
||||||
|
if self.ref:
|
||||||
|
git_command.extend(["--branch", self.ref])
|
||||||
|
|
||||||
process = await create_subprocess_exec(*git_command)
|
git_command.extend(
|
||||||
|
[
|
||||||
|
f"https://github.com/{self.org}/{self.repo}",
|
||||||
|
checkout_dir,
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
await process.wait()
|
process = await create_subprocess_exec(*git_command)
|
||||||
|
|
||||||
logger.debug(f"Finished cloning {self.org}/{self.repo}")
|
await process.wait()
|
||||||
|
|
||||||
yield Path(tmpdir)
|
logger.debug(f"Finished cloning {self.org}/{self.repo}")
|
||||||
|
|
||||||
|
yield Path(checkout_dir)
|
||||||
|
|
||||||
|
|
||||||
REPOSITORIES = {
|
REPOSITORIES = {
|
||||||
|
|
@ -106,6 +111,8 @@ async def check(
|
||||||
ruff_args.extend(["--ignore", ignore])
|
ruff_args.extend(["--ignore", ignore])
|
||||||
if exclude:
|
if exclude:
|
||||||
ruff_args.extend(["--exclude", exclude])
|
ruff_args.extend(["--exclude", exclude])
|
||||||
|
|
||||||
|
start = time.time()
|
||||||
proc = await create_subprocess_exec(
|
proc = await create_subprocess_exec(
|
||||||
ruff.absolute(),
|
ruff.absolute(),
|
||||||
*ruff_args,
|
*ruff_args,
|
||||||
|
|
@ -114,10 +121,10 @@ async def check(
|
||||||
stderr=PIPE,
|
stderr=PIPE,
|
||||||
cwd=path,
|
cwd=path,
|
||||||
)
|
)
|
||||||
|
|
||||||
result, err = await proc.communicate()
|
result, err = await proc.communicate()
|
||||||
|
end = time.time()
|
||||||
|
|
||||||
logger.debug(f"Finished checking {name} with {ruff}")
|
logger.debug(f"Finished checking {name} with {ruff} in {end - start:.2f}")
|
||||||
|
|
||||||
if proc.returncode != 0:
|
if proc.returncode != 0:
|
||||||
raise RuffError(err.decode("utf8"))
|
raise RuffError(err.decode("utf8"))
|
||||||
|
|
@ -150,41 +157,58 @@ class Diff(NamedTuple):
|
||||||
yield f"+ {line}"
|
yield f"+ {line}"
|
||||||
|
|
||||||
|
|
||||||
async def compare(ruff1: Path, ruff2: Path, repo: Repository) -> Diff | None:
|
async def compare(
|
||||||
|
ruff1: Path,
|
||||||
|
ruff2: Path,
|
||||||
|
repo: Repository,
|
||||||
|
checkouts: Optional[Path] = None,
|
||||||
|
) -> Diff | None:
|
||||||
"""Check a specific repository against two versions of ruff."""
|
"""Check a specific repository against two versions of ruff."""
|
||||||
removed, added = set(), set()
|
removed, added = set(), set()
|
||||||
|
|
||||||
async with repo.clone() as path:
|
# Allows to keep the checkouts locations
|
||||||
try:
|
if checkouts:
|
||||||
async with asyncio.TaskGroup() as tg:
|
checkout_dir = checkouts.joinpath(repo.org).joinpath(repo.repo)
|
||||||
check1 = tg.create_task(
|
# Don't create the repodir itself, we need that for checking for existing
|
||||||
check(
|
# clones
|
||||||
ruff=ruff1,
|
checkout_dir.parent.mkdir(exist_ok=True, parents=True)
|
||||||
path=path,
|
location_context = nullcontext(checkout_dir)
|
||||||
name=f"{repo.org}/{repo.repo}",
|
else:
|
||||||
select=repo.select,
|
location_context = tempfile.TemporaryDirectory()
|
||||||
ignore=repo.ignore,
|
|
||||||
exclude=repo.exclude,
|
|
||||||
),
|
|
||||||
)
|
|
||||||
check2 = tg.create_task(
|
|
||||||
check(
|
|
||||||
ruff=ruff2,
|
|
||||||
path=path,
|
|
||||||
name=f"{repo.org}/{repo.repo}",
|
|
||||||
select=repo.select,
|
|
||||||
ignore=repo.ignore,
|
|
||||||
exclude=repo.exclude,
|
|
||||||
),
|
|
||||||
)
|
|
||||||
except ExceptionGroup as e:
|
|
||||||
raise e.exceptions[0] from e
|
|
||||||
|
|
||||||
for line in difflib.ndiff(check1.result(), check2.result()):
|
with location_context as checkout_dir:
|
||||||
if line.startswith("- "):
|
checkout_dir = Path(checkout_dir)
|
||||||
removed.add(line[2:])
|
async with repo.clone(checkout_dir) as path:
|
||||||
elif line.startswith("+ "):
|
try:
|
||||||
added.add(line[2:])
|
async with asyncio.TaskGroup() as tg:
|
||||||
|
check1 = tg.create_task(
|
||||||
|
check(
|
||||||
|
ruff=ruff1,
|
||||||
|
path=path,
|
||||||
|
name=f"{repo.org}/{repo.repo}",
|
||||||
|
select=repo.select,
|
||||||
|
ignore=repo.ignore,
|
||||||
|
exclude=repo.exclude,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
check2 = tg.create_task(
|
||||||
|
check(
|
||||||
|
ruff=ruff2,
|
||||||
|
path=path,
|
||||||
|
name=f"{repo.org}/{repo.repo}",
|
||||||
|
select=repo.select,
|
||||||
|
ignore=repo.ignore,
|
||||||
|
exclude=repo.exclude,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
except ExceptionGroup as e:
|
||||||
|
raise e.exceptions[0] from e
|
||||||
|
|
||||||
|
for line in difflib.ndiff(check1.result(), check2.result()):
|
||||||
|
if line.startswith("- "):
|
||||||
|
removed.add(line[2:])
|
||||||
|
elif line.startswith("+ "):
|
||||||
|
added.add(line[2:])
|
||||||
|
|
||||||
return Diff(removed, added)
|
return Diff(removed, added)
|
||||||
|
|
||||||
|
|
@ -226,7 +250,13 @@ def read_projects_jsonl(projects_jsonl: Path) -> dict[str, Repository]:
|
||||||
return repositories
|
return repositories
|
||||||
|
|
||||||
|
|
||||||
async def main(*, ruff1: Path, ruff2: Path, projects_jsonl: Optional[Path]) -> None:
|
async def main(
|
||||||
|
*,
|
||||||
|
ruff1: Path,
|
||||||
|
ruff2: Path,
|
||||||
|
projects_jsonl: Optional[Path],
|
||||||
|
checkouts: Optional[Path] = None,
|
||||||
|
) -> None:
|
||||||
"""Check two versions of ruff against a corpus of open-source code."""
|
"""Check two versions of ruff against a corpus of open-source code."""
|
||||||
if projects_jsonl:
|
if projects_jsonl:
|
||||||
repositories = read_projects_jsonl(projects_jsonl)
|
repositories = read_projects_jsonl(projects_jsonl)
|
||||||
|
|
@ -236,7 +266,7 @@ async def main(*, ruff1: Path, ruff2: Path, projects_jsonl: Optional[Path]) -> N
|
||||||
logger.debug(f"Checking {len(repositories)} projects")
|
logger.debug(f"Checking {len(repositories)} projects")
|
||||||
|
|
||||||
results = await asyncio.gather(
|
results = await asyncio.gather(
|
||||||
*[compare(ruff1, ruff2, repo) for repo in repositories.values()],
|
*[compare(ruff1, ruff2, repo, checkouts) for repo in repositories.values()],
|
||||||
return_exceptions=True,
|
return_exceptions=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -353,6 +383,14 @@ if __name__ == "__main__":
|
||||||
"Supports both github_search_*.jsonl and known-github-tomls.jsonl."
|
"Supports both github_search_*.jsonl and known-github-tomls.jsonl."
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--checkouts",
|
||||||
|
type=Path,
|
||||||
|
help=(
|
||||||
|
"Location for the git checkouts, in case you want to save them"
|
||||||
|
" (defaults to temporary directory)"
|
||||||
|
),
|
||||||
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"-v",
|
"-v",
|
||||||
"--verbose",
|
"--verbose",
|
||||||
|
|
@ -375,4 +413,11 @@ if __name__ == "__main__":
|
||||||
else:
|
else:
|
||||||
logging.basicConfig(level=logging.INFO)
|
logging.basicConfig(level=logging.INFO)
|
||||||
|
|
||||||
asyncio.run(main(ruff1=args.ruff1, ruff2=args.ruff2, projects_jsonl=args.projects))
|
asyncio.run(
|
||||||
|
main(
|
||||||
|
ruff1=args.ruff1,
|
||||||
|
ruff2=args.ruff2,
|
||||||
|
projects_jsonl=args.projects,
|
||||||
|
checkouts=args.checkouts,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue