Rewrite ecosystem checks and add ruff format reports (#8223)

Closes #7239 

- Refactors `scripts/check_ecosystem.py` into a new Python project at
`python/ruff-ecosystem`
- Includes
[documentation](https://github.com/astral-sh/ruff/blob/zanie/ecosystem-format/python/ruff-ecosystem/README.md)
now
    - Provides a `ruff-ecosystem` CLI
- Fixes bug where `ruff check` report included "fixable" summary line
- Adds truncation to `ruff check` reports
    - Otherwise we often won't see the `ruff format` reports
- The truncation uses some very simple heuristics and could be improved
in the future
- Identifies diagnostic changes that occur just because a violation's
fix available changes
- We still show the diff for the line because it's could matter _where_
this changes, but we could improve this
- Similarly, we could improve detection of diagnostic changes where just
the message changes
- Adds support for JSON ecosystem check output
    - I added this primarily for development purposes
- If there are no changes, only errors while processing projects, we
display a different summary message
- When caching repositories, we now checkout the requested ref
- Adds `ruff format` reports, which format with the baseline then the
use `format --diff` to generate a report
- Runs all CI jobs when the CI workflow is changed

## Known problems

- Since we must format the project to get a baseline, the permalink line
numbers do not exactly correspond to the correct range
- This looks... hard. I tried using `git diff` and some wonky hunk
matching to recover the original line numbers but it doesn't seem worth
it. I think we should probably commit the formatted changes to a fork or
something if we want great results here. Consequently, I've just used
the start line instead of a range for now.
- I don't love the comment structure — it'd be nice, perhaps, to have
separate headings for the linter and formatter.
- However, the `pr-comment` workflow is an absolute pain to change
because it runs _separately_ from this pull request so I if I want to
make edits to it I can only test it via manual workflow dispatch.
- Lines are not printed "as we go" which means they're all held in
memory, presumably this would be a problem for large-scale ecosystem
checks
- We are encountering a hard limit with the maximum comment length
supported by GitHub. We will need to move the bulk of the report
elsewhere.

## Future work

- Update `ruff-ecosystem` to support non-default projects and
`check_ecosystem_all.py` behavior
- Remove existing ecosystem check scripts
- Add preview mode toggle (#8076)
- Add a toggle for truncation
- Add hints for quick reproduction of runs locally
- Consider parsing JSON output of Ruff instead of using regex to parse
the text output
- Links to project repositories should use the commit hash we checked
against
- When caching repositories, we should pull the latest changes for the
ref
- Sort check diffs by path and rule code only (changes in messages
should not change order)
- Update check diffs to distinguish between new violations and changes
in messages
- Add "fix" diffs
- Remove existing formatter similarity reports
- On release pull request, compare to the previous tag instead

---------

Co-authored-by: konsti <konstin@mailbox.org>
This commit is contained in:
Zanie Blue 2023-10-27 17:28:01 -05:00 committed by GitHub
parent 5f26411577
commit fc94857a20
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
14 changed files with 1555 additions and 8 deletions

View file

@ -0,0 +1,166 @@
import argparse
import asyncio
import logging
import os
import shutil
import sys
import sysconfig
import tempfile
from contextlib import nullcontext
from pathlib import Path
from signal import SIGINT, SIGTERM
from ruff_ecosystem import logger
from ruff_ecosystem.defaults import DEFAULT_TARGETS
from ruff_ecosystem.main import OutputFormat, main
from ruff_ecosystem.projects import RuffCommand
def excepthook(type, value, tb):
if hasattr(sys, "ps1") or not sys.stderr.isatty():
# we are in interactive mode or we don't have a tty so call the default
sys.__excepthook__(type, value, tb)
else:
import pdb
import traceback
traceback.print_exception(type, value, tb)
print()
pdb.post_mortem(tb)
def entrypoint():
args = parse_args()
if args.pdb:
sys.excepthook = excepthook
if args.verbose:
logging.basicConfig(level=logging.DEBUG)
else:
logging.basicConfig(level=logging.INFO)
# Use a temporary directory for caching if no cache is specified
cache_context = (
tempfile.TemporaryDirectory() if not args.cache else nullcontext(args.cache)
)
ruff_baseline = args.ruff_baseline
if not args.ruff_baseline.exists():
ruff_baseline = get_executable_path(str(args.ruff_baseline))
if not ruff_baseline:
print(
f"Could not find ruff baseline executable: {args.ruff_baseline}",
sys.stderr,
)
exit(1)
logger.info(
"Resolved baseline executable %s to %s", args.ruff_baseline, ruff_baseline
)
ruff_comparison = args.ruff_comparison
if not args.ruff_comparison.exists():
ruff_comparison = get_executable_path(str(args.ruff_comparison))
if not ruff_comparison:
print(
f"Could not find ruff comparison executable: {args.ruff_comparison}",
sys.stderr,
)
exit(1)
logger.info(
"Resolved comparison executable %s to %s",
args.ruff_comparison,
ruff_comparison,
)
with cache_context as cache:
loop = asyncio.get_event_loop()
main_task = asyncio.ensure_future(
main(
command=RuffCommand(args.ruff_command),
ruff_baseline_executable=ruff_baseline,
ruff_comparison_executable=ruff_comparison,
targets=DEFAULT_TARGETS,
format=OutputFormat(args.output_format),
project_dir=Path(cache),
raise_on_failure=args.pdb,
)
)
# https://stackoverflow.com/a/58840987/3549270
for signal in [SIGINT, SIGTERM]:
loop.add_signal_handler(signal, main_task.cancel)
try:
loop.run_until_complete(main_task)
finally:
loop.close()
def parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(
description="Check two versions of ruff against a corpus of open-source code.",
)
# TODO: Support non-default `--targets`
# parser.add_argument(
# "--targets",
# type=Path,
# help=(
# "Optional JSON files to use over the default repositories. "
# "Supports both github_search_*.jsonl and known-github-tomls.jsonl."
# ),
# )
parser.add_argument(
"--cache",
type=Path,
help="Location for caching cloned repositories",
)
parser.add_argument(
"--output-format",
choices=[option.name for option in OutputFormat],
default="json",
help="Location for caching cloned repositories",
)
parser.add_argument(
"-v",
"--verbose",
action="store_true",
help="Enable debug logging",
)
parser.add_argument(
"--pdb",
action="store_true",
help="Enable debugging on failure",
)
parser.add_argument(
"ruff_command",
choices=[option.name for option in RuffCommand],
help="The Ruff command to test",
)
parser.add_argument(
"ruff_baseline",
type=Path,
)
parser.add_argument(
"ruff_comparison",
type=Path,
)
return parser.parse_args()
def get_executable_path(name: str) -> Path | None:
# Add suffix for Windows executables
name += ".exe" if sys.platform == "win32" and not name.endswith(".exe") else ""
path = os.path.join(sysconfig.get_path("scripts"), name)
# The executable in the current interpreter's scripts directory.
if os.path.exists(path):
return Path(path)
# The executable in the global environment.
environment_path = shutil.which(name)
if environment_path:
return Path(environment_path)
return None