mirror of
https://github.com/astral-sh/ruff.git
synced 2025-09-27 04:19:18 +00:00
Rewrite ecosystem checks and add ruff format
reports (#8223)
Closes #7239 - Refactors `scripts/check_ecosystem.py` into a new Python project at `python/ruff-ecosystem` - Includes [documentation](https://github.com/astral-sh/ruff/blob/zanie/ecosystem-format/python/ruff-ecosystem/README.md) now - Provides a `ruff-ecosystem` CLI - Fixes bug where `ruff check` report included "fixable" summary line - Adds truncation to `ruff check` reports - Otherwise we often won't see the `ruff format` reports - The truncation uses some very simple heuristics and could be improved in the future - Identifies diagnostic changes that occur just because a violation's fix available changes - We still show the diff for the line because it's could matter _where_ this changes, but we could improve this - Similarly, we could improve detection of diagnostic changes where just the message changes - Adds support for JSON ecosystem check output - I added this primarily for development purposes - If there are no changes, only errors while processing projects, we display a different summary message - When caching repositories, we now checkout the requested ref - Adds `ruff format` reports, which format with the baseline then the use `format --diff` to generate a report - Runs all CI jobs when the CI workflow is changed ## Known problems - Since we must format the project to get a baseline, the permalink line numbers do not exactly correspond to the correct range - This looks... hard. I tried using `git diff` and some wonky hunk matching to recover the original line numbers but it doesn't seem worth it. I think we should probably commit the formatted changes to a fork or something if we want great results here. Consequently, I've just used the start line instead of a range for now. - I don't love the comment structure — it'd be nice, perhaps, to have separate headings for the linter and formatter. - However, the `pr-comment` workflow is an absolute pain to change because it runs _separately_ from this pull request so I if I want to make edits to it I can only test it via manual workflow dispatch. - Lines are not printed "as we go" which means they're all held in memory, presumably this would be a problem for large-scale ecosystem checks - We are encountering a hard limit with the maximum comment length supported by GitHub. We will need to move the bulk of the report elsewhere. ## Future work - Update `ruff-ecosystem` to support non-default projects and `check_ecosystem_all.py` behavior - Remove existing ecosystem check scripts - Add preview mode toggle (#8076) - Add a toggle for truncation - Add hints for quick reproduction of runs locally - Consider parsing JSON output of Ruff instead of using regex to parse the text output - Links to project repositories should use the commit hash we checked against - When caching repositories, we should pull the latest changes for the ref - Sort check diffs by path and rule code only (changes in messages should not change order) - Update check diffs to distinguish between new violations and changes in messages - Add "fix" diffs - Remove existing formatter similarity reports - On release pull request, compare to the previous tag instead --------- Co-authored-by: konsti <konstin@mailbox.org>
This commit is contained in:
parent
5f26411577
commit
fc94857a20
14 changed files with 1555 additions and 8 deletions
144
python/ruff-ecosystem/ruff_ecosystem/main.py
Normal file
144
python/ruff-ecosystem/ruff_ecosystem/main.py
Normal file
|
@ -0,0 +1,144 @@
|
|||
import asyncio
|
||||
import dataclasses
|
||||
import json
|
||||
from enum import Enum
|
||||
from pathlib import Path
|
||||
from typing import Awaitable, TypeVar
|
||||
|
||||
from ruff_ecosystem import logger
|
||||
from ruff_ecosystem.check import compare_check, markdown_check_result
|
||||
from ruff_ecosystem.format import compare_format, markdown_format_result
|
||||
from ruff_ecosystem.projects import (
|
||||
Project,
|
||||
RuffCommand,
|
||||
)
|
||||
from ruff_ecosystem.types import Comparison, Result, Serializable
|
||||
|
||||
T = TypeVar("T")
|
||||
GITHUB_MAX_COMMENT_LENGTH = 65536
|
||||
|
||||
|
||||
class OutputFormat(Enum):
|
||||
markdown = "markdown"
|
||||
json = "json"
|
||||
|
||||
|
||||
async def main(
|
||||
command: RuffCommand,
|
||||
ruff_baseline_executable: Path,
|
||||
ruff_comparison_executable: Path,
|
||||
targets: list[Project],
|
||||
project_dir: Path,
|
||||
format: OutputFormat,
|
||||
max_parallelism: int = 50,
|
||||
raise_on_failure: bool = False,
|
||||
) -> None:
|
||||
logger.debug("Using command %s", command.value)
|
||||
logger.debug("Using baseline executable at %s", ruff_baseline_executable)
|
||||
logger.debug("Using comparison executable at %s", ruff_comparison_executable)
|
||||
logger.debug("Using checkout_dir directory %s", project_dir)
|
||||
logger.debug("Checking %s targets", len(targets))
|
||||
|
||||
# Limit parallelism to avoid high memory consumption
|
||||
semaphore = asyncio.Semaphore(max_parallelism)
|
||||
|
||||
async def limited_parallelism(coroutine: Awaitable[T]) -> T:
|
||||
async with semaphore:
|
||||
return await coroutine
|
||||
|
||||
comparisons: list[Exception | Comparison] = await asyncio.gather(
|
||||
*[
|
||||
limited_parallelism(
|
||||
clone_and_compare(
|
||||
command,
|
||||
ruff_baseline_executable,
|
||||
ruff_comparison_executable,
|
||||
target,
|
||||
project_dir,
|
||||
)
|
||||
)
|
||||
for target in targets
|
||||
],
|
||||
return_exceptions=not raise_on_failure,
|
||||
)
|
||||
comparisons_by_target = dict(zip(targets, comparisons, strict=True))
|
||||
|
||||
# Split comparisons into errored / completed
|
||||
errored, completed = [], []
|
||||
for target, comparison in comparisons_by_target.items():
|
||||
if isinstance(comparison, Exception):
|
||||
errored.append((target, comparison))
|
||||
else:
|
||||
completed.append((target, comparison))
|
||||
|
||||
result = Result(completed=completed, errored=errored)
|
||||
|
||||
match format:
|
||||
case OutputFormat.json:
|
||||
print(json.dumps(result, indent=4, cls=JSONEncoder))
|
||||
case OutputFormat.markdown:
|
||||
match command:
|
||||
case RuffCommand.check:
|
||||
print(markdown_check_result(result))
|
||||
case RuffCommand.format:
|
||||
print(markdown_format_result(result))
|
||||
case _:
|
||||
raise ValueError(f"Unknown target Ruff command {command}")
|
||||
case _:
|
||||
raise ValueError(f"Unknown output format {format}")
|
||||
|
||||
return None
|
||||
|
||||
|
||||
async def clone_and_compare(
|
||||
command: RuffCommand,
|
||||
ruff_baseline_executable: Path,
|
||||
ruff_comparison_executable: Path,
|
||||
target: Project,
|
||||
project_dir: Path,
|
||||
) -> Comparison:
|
||||
"""Check a specific repository against two versions of ruff."""
|
||||
assert ":" not in target.repo.owner
|
||||
assert ":" not in target.repo.name
|
||||
|
||||
match command:
|
||||
case RuffCommand.check:
|
||||
compare, options = (
|
||||
compare_check,
|
||||
target.check_options,
|
||||
)
|
||||
case RuffCommand.format:
|
||||
compare, options = (
|
||||
compare_format,
|
||||
target.format_options,
|
||||
)
|
||||
case _:
|
||||
raise ValueError(f"Unknown target Ruff command {command}")
|
||||
|
||||
checkout_dir = project_dir.joinpath(f"{target.repo.owner}:{target.repo.name}")
|
||||
cloned_repo = await target.repo.clone(checkout_dir)
|
||||
|
||||
try:
|
||||
return await compare(
|
||||
ruff_baseline_executable,
|
||||
ruff_comparison_executable,
|
||||
options,
|
||||
cloned_repo,
|
||||
)
|
||||
except ExceptionGroup as e:
|
||||
raise e.exceptions[0] from e
|
||||
|
||||
|
||||
class JSONEncoder(json.JSONEncoder):
|
||||
def default(self, o):
|
||||
if isinstance(o, Serializable):
|
||||
return o.jsonable()
|
||||
if dataclasses.is_dataclass(o):
|
||||
return dataclasses.asdict(o)
|
||||
if isinstance(o, set):
|
||||
return tuple(o)
|
||||
if isinstance(o, Path):
|
||||
return str(o)
|
||||
if isinstance(o, Exception):
|
||||
return str(o)
|
||||
return super().default(o)
|
Loading…
Add table
Add a link
Reference in a new issue