mirror of
https://github.com/astral-sh/ruff.git
synced 2025-08-03 18:28:24 +00:00
Rewrite ecosystem checks and add ruff format
reports (#8223)
Closes #7239 - Refactors `scripts/check_ecosystem.py` into a new Python project at `python/ruff-ecosystem` - Includes [documentation](https://github.com/astral-sh/ruff/blob/zanie/ecosystem-format/python/ruff-ecosystem/README.md) now - Provides a `ruff-ecosystem` CLI - Fixes bug where `ruff check` report included "fixable" summary line - Adds truncation to `ruff check` reports - Otherwise we often won't see the `ruff format` reports - The truncation uses some very simple heuristics and could be improved in the future - Identifies diagnostic changes that occur just because a violation's fix available changes - We still show the diff for the line because it's could matter _where_ this changes, but we could improve this - Similarly, we could improve detection of diagnostic changes where just the message changes - Adds support for JSON ecosystem check output - I added this primarily for development purposes - If there are no changes, only errors while processing projects, we display a different summary message - When caching repositories, we now checkout the requested ref - Adds `ruff format` reports, which format with the baseline then the use `format --diff` to generate a report - Runs all CI jobs when the CI workflow is changed ## Known problems - Since we must format the project to get a baseline, the permalink line numbers do not exactly correspond to the correct range - This looks... hard. I tried using `git diff` and some wonky hunk matching to recover the original line numbers but it doesn't seem worth it. I think we should probably commit the formatted changes to a fork or something if we want great results here. Consequently, I've just used the start line instead of a range for now. - I don't love the comment structure — it'd be nice, perhaps, to have separate headings for the linter and formatter. - However, the `pr-comment` workflow is an absolute pain to change because it runs _separately_ from this pull request so I if I want to make edits to it I can only test it via manual workflow dispatch. - Lines are not printed "as we go" which means they're all held in memory, presumably this would be a problem for large-scale ecosystem checks - We are encountering a hard limit with the maximum comment length supported by GitHub. We will need to move the bulk of the report elsewhere. ## Future work - Update `ruff-ecosystem` to support non-default projects and `check_ecosystem_all.py` behavior - Remove existing ecosystem check scripts - Add preview mode toggle (#8076) - Add a toggle for truncation - Add hints for quick reproduction of runs locally - Consider parsing JSON output of Ruff instead of using regex to parse the text output - Links to project repositories should use the commit hash we checked against - When caching repositories, we should pull the latest changes for the ref - Sort check diffs by path and rule code only (changes in messages should not change order) - Update check diffs to distinguish between new violations and changes in messages - Add "fix" diffs - Remove existing formatter similarity reports - On release pull request, compare to the previous tag instead --------- Co-authored-by: konsti <konstin@mailbox.org>
This commit is contained in:
parent
5f26411577
commit
fc94857a20
14 changed files with 1555 additions and 8 deletions
166
python/ruff-ecosystem/ruff_ecosystem/cli.py
Normal file
166
python/ruff-ecosystem/ruff_ecosystem/cli.py
Normal file
|
@ -0,0 +1,166 @@
|
|||
import argparse
|
||||
import asyncio
|
||||
import logging
|
||||
import os
|
||||
import shutil
|
||||
import sys
|
||||
import sysconfig
|
||||
import tempfile
|
||||
from contextlib import nullcontext
|
||||
from pathlib import Path
|
||||
from signal import SIGINT, SIGTERM
|
||||
|
||||
from ruff_ecosystem import logger
|
||||
from ruff_ecosystem.defaults import DEFAULT_TARGETS
|
||||
from ruff_ecosystem.main import OutputFormat, main
|
||||
from ruff_ecosystem.projects import RuffCommand
|
||||
|
||||
|
||||
def excepthook(type, value, tb):
|
||||
if hasattr(sys, "ps1") or not sys.stderr.isatty():
|
||||
# we are in interactive mode or we don't have a tty so call the default
|
||||
sys.__excepthook__(type, value, tb)
|
||||
else:
|
||||
import pdb
|
||||
import traceback
|
||||
|
||||
traceback.print_exception(type, value, tb)
|
||||
print()
|
||||
pdb.post_mortem(tb)
|
||||
|
||||
|
||||
def entrypoint():
|
||||
args = parse_args()
|
||||
|
||||
if args.pdb:
|
||||
sys.excepthook = excepthook
|
||||
|
||||
if args.verbose:
|
||||
logging.basicConfig(level=logging.DEBUG)
|
||||
else:
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
|
||||
# Use a temporary directory for caching if no cache is specified
|
||||
cache_context = (
|
||||
tempfile.TemporaryDirectory() if not args.cache else nullcontext(args.cache)
|
||||
)
|
||||
|
||||
ruff_baseline = args.ruff_baseline
|
||||
if not args.ruff_baseline.exists():
|
||||
ruff_baseline = get_executable_path(str(args.ruff_baseline))
|
||||
if not ruff_baseline:
|
||||
print(
|
||||
f"Could not find ruff baseline executable: {args.ruff_baseline}",
|
||||
sys.stderr,
|
||||
)
|
||||
exit(1)
|
||||
logger.info(
|
||||
"Resolved baseline executable %s to %s", args.ruff_baseline, ruff_baseline
|
||||
)
|
||||
|
||||
ruff_comparison = args.ruff_comparison
|
||||
if not args.ruff_comparison.exists():
|
||||
ruff_comparison = get_executable_path(str(args.ruff_comparison))
|
||||
if not ruff_comparison:
|
||||
print(
|
||||
f"Could not find ruff comparison executable: {args.ruff_comparison}",
|
||||
sys.stderr,
|
||||
)
|
||||
exit(1)
|
||||
logger.info(
|
||||
"Resolved comparison executable %s to %s",
|
||||
args.ruff_comparison,
|
||||
ruff_comparison,
|
||||
)
|
||||
|
||||
with cache_context as cache:
|
||||
loop = asyncio.get_event_loop()
|
||||
main_task = asyncio.ensure_future(
|
||||
main(
|
||||
command=RuffCommand(args.ruff_command),
|
||||
ruff_baseline_executable=ruff_baseline,
|
||||
ruff_comparison_executable=ruff_comparison,
|
||||
targets=DEFAULT_TARGETS,
|
||||
format=OutputFormat(args.output_format),
|
||||
project_dir=Path(cache),
|
||||
raise_on_failure=args.pdb,
|
||||
)
|
||||
)
|
||||
# https://stackoverflow.com/a/58840987/3549270
|
||||
for signal in [SIGINT, SIGTERM]:
|
||||
loop.add_signal_handler(signal, main_task.cancel)
|
||||
try:
|
||||
loop.run_until_complete(main_task)
|
||||
finally:
|
||||
loop.close()
|
||||
|
||||
|
||||
def parse_args() -> argparse.Namespace:
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Check two versions of ruff against a corpus of open-source code.",
|
||||
)
|
||||
|
||||
# TODO: Support non-default `--targets`
|
||||
# parser.add_argument(
|
||||
# "--targets",
|
||||
# type=Path,
|
||||
# help=(
|
||||
# "Optional JSON files to use over the default repositories. "
|
||||
# "Supports both github_search_*.jsonl and known-github-tomls.jsonl."
|
||||
# ),
|
||||
# )
|
||||
parser.add_argument(
|
||||
"--cache",
|
||||
type=Path,
|
||||
help="Location for caching cloned repositories",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--output-format",
|
||||
choices=[option.name for option in OutputFormat],
|
||||
default="json",
|
||||
help="Location for caching cloned repositories",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-v",
|
||||
"--verbose",
|
||||
action="store_true",
|
||||
help="Enable debug logging",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--pdb",
|
||||
action="store_true",
|
||||
help="Enable debugging on failure",
|
||||
)
|
||||
parser.add_argument(
|
||||
"ruff_command",
|
||||
choices=[option.name for option in RuffCommand],
|
||||
help="The Ruff command to test",
|
||||
)
|
||||
parser.add_argument(
|
||||
"ruff_baseline",
|
||||
type=Path,
|
||||
)
|
||||
parser.add_argument(
|
||||
"ruff_comparison",
|
||||
type=Path,
|
||||
)
|
||||
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def get_executable_path(name: str) -> Path | None:
|
||||
# Add suffix for Windows executables
|
||||
name += ".exe" if sys.platform == "win32" and not name.endswith(".exe") else ""
|
||||
|
||||
path = os.path.join(sysconfig.get_path("scripts"), name)
|
||||
|
||||
# The executable in the current interpreter's scripts directory.
|
||||
if os.path.exists(path):
|
||||
return Path(path)
|
||||
|
||||
# The executable in the global environment.
|
||||
environment_path = shutil.which(name)
|
||||
if environment_path:
|
||||
return Path(environment_path)
|
||||
|
||||
return None
|
Loading…
Add table
Add a link
Reference in a new issue