mirror of
https://github.com/astral-sh/ruff.git
synced 2025-07-24 13:33:50 +00:00

Closes #7239 - Refactors `scripts/check_ecosystem.py` into a new Python project at `python/ruff-ecosystem` - Includes [documentation](https://github.com/astral-sh/ruff/blob/zanie/ecosystem-format/python/ruff-ecosystem/README.md) now - Provides a `ruff-ecosystem` CLI - Fixes bug where `ruff check` report included "fixable" summary line - Adds truncation to `ruff check` reports - Otherwise we often won't see the `ruff format` reports - The truncation uses some very simple heuristics and could be improved in the future - Identifies diagnostic changes that occur just because a violation's fix available changes - We still show the diff for the line because it's could matter _where_ this changes, but we could improve this - Similarly, we could improve detection of diagnostic changes where just the message changes - Adds support for JSON ecosystem check output - I added this primarily for development purposes - If there are no changes, only errors while processing projects, we display a different summary message - When caching repositories, we now checkout the requested ref - Adds `ruff format` reports, which format with the baseline then the use `format --diff` to generate a report - Runs all CI jobs when the CI workflow is changed ## Known problems - Since we must format the project to get a baseline, the permalink line numbers do not exactly correspond to the correct range - This looks... hard. I tried using `git diff` and some wonky hunk matching to recover the original line numbers but it doesn't seem worth it. I think we should probably commit the formatted changes to a fork or something if we want great results here. Consequently, I've just used the start line instead of a range for now. - I don't love the comment structure — it'd be nice, perhaps, to have separate headings for the linter and formatter. - However, the `pr-comment` workflow is an absolute pain to change because it runs _separately_ from this pull request so I if I want to make edits to it I can only test it via manual workflow dispatch. - Lines are not printed "as we go" which means they're all held in memory, presumably this would be a problem for large-scale ecosystem checks - We are encountering a hard limit with the maximum comment length supported by GitHub. We will need to move the bulk of the report elsewhere. ## Future work - Update `ruff-ecosystem` to support non-default projects and `check_ecosystem_all.py` behavior - Remove existing ecosystem check scripts - Add preview mode toggle (#8076) - Add a toggle for truncation - Add hints for quick reproduction of runs locally - Consider parsing JSON output of Ruff instead of using regex to parse the text output - Links to project repositories should use the commit hash we checked against - When caching repositories, we should pull the latest changes for the ref - Sort check diffs by path and rule code only (changes in messages should not change order) - Update check diffs to distinguish between new violations and changes in messages - Add "fix" diffs - Remove existing formatter similarity reports - On release pull request, compare to the previous tag instead --------- Co-authored-by: konsti <konstin@mailbox.org>
168 lines
4.6 KiB
Python
168 lines
4.6 KiB
Python
"""
|
|
Abstractions and utilities for working with projects to run ecosystem checks on.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
from asyncio import create_subprocess_exec
|
|
from dataclasses import dataclass, field
|
|
from enum import Enum
|
|
from pathlib import Path
|
|
from subprocess import PIPE
|
|
from typing import Self
|
|
|
|
from ruff_ecosystem import logger
|
|
from ruff_ecosystem.check import CheckOptions
|
|
from ruff_ecosystem.format import FormatOptions
|
|
from ruff_ecosystem.types import Serializable
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
class Project(Serializable):
|
|
"""
|
|
An ecosystem target
|
|
"""
|
|
|
|
repo: Repository
|
|
check_options: CheckOptions = field(default_factory=lambda: CheckOptions())
|
|
format_options: FormatOptions = field(default_factory=lambda: FormatOptions())
|
|
|
|
|
|
class RuffCommand(Enum):
|
|
check = "check"
|
|
format = "format"
|
|
|
|
|
|
class ProjectSetupError(Exception):
|
|
"""An error setting up a project."""
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
class Repository(Serializable):
|
|
"""
|
|
A remote GitHub repository.
|
|
"""
|
|
|
|
owner: str
|
|
name: str
|
|
ref: str | None
|
|
|
|
@property
|
|
def fullname(self) -> str:
|
|
return f"{self.owner}/{self.name}"
|
|
|
|
@property
|
|
def url(self: Self) -> str:
|
|
return f"https://github.com/{self.owner}/{self.name}"
|
|
|
|
async def clone(self: Self, checkout_dir: Path) -> ClonedRepository:
|
|
"""
|
|
Shallow clone this repository
|
|
"""
|
|
if checkout_dir.exists():
|
|
logger.debug(f"Reusing {self.owner}:{self.name}")
|
|
|
|
if self.ref:
|
|
logger.debug(f"Checking out ref {self.ref}")
|
|
process = await create_subprocess_exec(
|
|
*["git", "checkout", "-f", self.ref],
|
|
cwd=checkout_dir,
|
|
env={"GIT_TERMINAL_PROMPT": "0"},
|
|
stdout=PIPE,
|
|
stderr=PIPE,
|
|
)
|
|
if await process.wait() != 0:
|
|
_, stderr = await process.communicate()
|
|
raise ProjectSetupError(
|
|
f"Failed to checkout {self.ref}: {stderr.decode()}"
|
|
)
|
|
|
|
return await ClonedRepository.from_path(checkout_dir, self)
|
|
|
|
logger.debug(f"Cloning {self.owner}:{self.name} to {checkout_dir}")
|
|
command = [
|
|
"git",
|
|
"clone",
|
|
"--config",
|
|
"advice.detachedHead=false",
|
|
"--quiet",
|
|
"--depth",
|
|
"1",
|
|
"--no-tags",
|
|
]
|
|
if self.ref:
|
|
command.extend(["--branch", self.ref])
|
|
|
|
command.extend(
|
|
[
|
|
f"https://github.com/{self.owner}/{self.name}",
|
|
str(checkout_dir),
|
|
],
|
|
)
|
|
|
|
process = await create_subprocess_exec(
|
|
*command, env={"GIT_TERMINAL_PROMPT": "0"}
|
|
)
|
|
|
|
status_code = await process.wait()
|
|
|
|
logger.debug(
|
|
f"Finished cloning {self.fullname} with status {status_code}",
|
|
)
|
|
return await ClonedRepository.from_path(checkout_dir, self)
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
class ClonedRepository(Repository, Serializable):
|
|
"""
|
|
A cloned GitHub repository, which includes the hash of the current commit.
|
|
"""
|
|
|
|
commit_hash: str
|
|
path: Path
|
|
|
|
def url_for(
|
|
self: Self,
|
|
path: str,
|
|
line_number: int | None = None,
|
|
end_line_number: int | None = None,
|
|
) -> str:
|
|
"""
|
|
Return the remote GitHub URL for the given path in this repository.
|
|
"""
|
|
url = f"https://github.com/{self.owner}/{self.name}/blob/{self.commit_hash}/{path}"
|
|
if line_number:
|
|
url += f"#L{line_number}"
|
|
if end_line_number:
|
|
url += f"-L{end_line_number}"
|
|
return url
|
|
|
|
@property
|
|
def url(self: Self) -> str:
|
|
return f"https://github.com/{self.owner}/{self.name}@{self.commit_hash}"
|
|
|
|
@classmethod
|
|
async def from_path(cls, path: Path, repo: Repository):
|
|
return cls(
|
|
name=repo.name,
|
|
owner=repo.owner,
|
|
ref=repo.ref,
|
|
path=path,
|
|
commit_hash=await cls._get_head_commit(path),
|
|
)
|
|
|
|
@staticmethod
|
|
async def _get_head_commit(checkout_dir: Path) -> str:
|
|
"""
|
|
Return the commit sha for the repository in the checkout directory.
|
|
"""
|
|
process = await create_subprocess_exec(
|
|
*["git", "rev-parse", "HEAD"],
|
|
cwd=checkout_dir,
|
|
stdout=PIPE,
|
|
)
|
|
stdout, _ = await process.communicate()
|
|
if await process.wait() != 0:
|
|
raise ProjectSetupError(f"Failed to retrieve commit sha at {checkout_dir}")
|
|
|
|
return stdout.decode().strip()
|