diff --git a/scripts/check_ecosystem.py b/scripts/check_ecosystem.py index ff788f9edf..59ebb9a056 100755 --- a/scripts/check_ecosystem.py +++ b/scripts/check_ecosystem.py @@ -82,6 +82,17 @@ class Repository(NamedTuple): yield Path(checkout_dir) + def url_for(self: Self, path: str, lnum: int | None = None) -> str: + """Return the GitHub URL for the given path and line number, if given.""" + # Default to main branch + url = ( + f"https://github.com/{self.org}/{self.repo}" + f"/blob/{self.ref or 'main'}/{path}" + ) + if lnum: + url += f"#L{lnum}" + return url + REPOSITORIES: list[Repository] = [ Repository("apache", "airflow", "main", select="ALL"), @@ -272,6 +283,11 @@ def read_projects_jsonl(projects_jsonl: Path) -> dict[tuple[str, str], Repositor return repositories +DIFF_LINE_RE = re.compile( + r"^(?P
[+-]) (?P(?P [^:]+):(?P \d+):\d+:) (?P .*)$", +) + + T = TypeVar("T") @@ -352,18 +368,27 @@ async def main( print(" ") print() - diff_str = "\n".join(diff) + repo = repositories[(org, repo)] + diff_lines = list(diff) - print("```diff") - print(diff_str) - print("```") + print("
") + for line in diff_lines: + match = DIFF_LINE_RE.match(line) + if match is None: + print(line) + continue + + pre, inner, path, lnum, post = match.groups() + url = repo.url_for(path, int(lnum)) + print(f"{pre} {inner} {post}") + print("") print() print("") print("") # Count rule changes - for line in diff_str.splitlines(): + for line in diff_lines: # Find rule change for current line or construction # +/ : : : matches = re.search(r": ([A-Z]{1,4}[0-9]{3,4})", line)