mirror of
				https://github.com/python/cpython.git
				synced 2025-11-04 11:49:12 +00:00 
			
		
		
		
	gh-101100: Only show GitHub check annotations on changed doc paragraphs (GH-108065)
* Only show GitHub check annotations on changed doc paragraphs
* Improve check-warnings script arg parsing following Hugo's suggestions
* Factor filtering warnings by modified diffs into helper function
* Build docs on unmerged branch so warning lines match & avoid deep clone
---------
(cherry picked from commit eb953d6e44)
Co-authored-by: C.A.M. Gerlach <CAM.Gerlach@Gerlach.CAM>
Co-authored-by: Hugo van Kemenade <hugovk@users.noreply.github.com>
Co-authored-by: Adam Turner <9087854+AA-Turner@users.noreply.github.com>
		
	
			
		
			
				
	
	
		
			311 lines
		
	
	
	
		
			9.5 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			311 lines
		
	
	
	
		
			9.5 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
#!/usr/bin/env python3
 | 
						|
"""
 | 
						|
Check the output of running Sphinx in nit-picky mode (missing references).
 | 
						|
"""
 | 
						|
from __future__ import annotations
 | 
						|
 | 
						|
import argparse
 | 
						|
import itertools
 | 
						|
import os
 | 
						|
import re
 | 
						|
import subprocess
 | 
						|
import sys
 | 
						|
from pathlib import Path
 | 
						|
from typing import TextIO
 | 
						|
 | 
						|
# Exclude these whether they're dirty or clean,
 | 
						|
# because they trigger a rebuild of dirty files.
 | 
						|
EXCLUDE_FILES = {
 | 
						|
    "Doc/whatsnew/changelog.rst",
 | 
						|
}
 | 
						|
 | 
						|
# Subdirectories of Doc/ to exclude.
 | 
						|
EXCLUDE_SUBDIRS = {
 | 
						|
    ".env",
 | 
						|
    ".venv",
 | 
						|
    "env",
 | 
						|
    "includes",
 | 
						|
    "venv",
 | 
						|
}
 | 
						|
 | 
						|
# Regex pattern to match the parts of a Sphinx warning
 | 
						|
WARNING_PATTERN = re.compile(
 | 
						|
    r"(?P<file>([A-Za-z]:[\\/])?[^:]+):(?P<line>\d+): WARNING: (?P<msg>.+)"
 | 
						|
)
 | 
						|
 | 
						|
# Regex pattern to match the line numbers in a Git unified diff
 | 
						|
DIFF_PATTERN = re.compile(
 | 
						|
    r"^@@ -(?P<linea>\d+)(?:,(?P<removed>\d+))? \+(?P<lineb>\d+)(?:,(?P<added>\d+))? @@",
 | 
						|
    flags=re.MULTILINE,
 | 
						|
)
 | 
						|
 | 
						|
 | 
						|
def get_diff_files(ref_a: str, ref_b: str, filter_mode: str = "") -> set[Path]:
 | 
						|
    """List the files changed between two Git refs, filtered by change type."""
 | 
						|
    added_files_result = subprocess.run(
 | 
						|
        [
 | 
						|
            "git",
 | 
						|
            "diff",
 | 
						|
            f"--diff-filter={filter_mode}",
 | 
						|
            "--name-only",
 | 
						|
            f"{ref_a}...{ref_b}",
 | 
						|
            "--",
 | 
						|
        ],
 | 
						|
        stdout=subprocess.PIPE,
 | 
						|
        check=True,
 | 
						|
        text=True,
 | 
						|
        encoding="UTF-8",
 | 
						|
    )
 | 
						|
 | 
						|
    added_files = added_files_result.stdout.strip().split("\n")
 | 
						|
    return {Path(file.strip()) for file in added_files if file.strip()}
 | 
						|
 | 
						|
 | 
						|
def get_diff_lines(ref_a: str, ref_b: str, file: Path) -> list[int]:
 | 
						|
    """List the lines changed between two Git refs for a specific file."""
 | 
						|
    diff_output = subprocess.run(
 | 
						|
        [
 | 
						|
            "git",
 | 
						|
            "diff",
 | 
						|
            "--unified=0",
 | 
						|
            f"{ref_a}...{ref_b}",
 | 
						|
            "--",
 | 
						|
            str(file),
 | 
						|
        ],
 | 
						|
        stdout=subprocess.PIPE,
 | 
						|
        check=True,
 | 
						|
        text=True,
 | 
						|
        encoding="UTF-8",
 | 
						|
    )
 | 
						|
 | 
						|
    # Scrape line offsets + lengths from diff and convert to line numbers
 | 
						|
    line_matches = DIFF_PATTERN.finditer(diff_output.stdout)
 | 
						|
    # Removed and added line counts are 1 if not printed
 | 
						|
    line_match_values = [
 | 
						|
        line_match.groupdict(default=1) for line_match in line_matches
 | 
						|
    ]
 | 
						|
    line_ints = [
 | 
						|
        (int(match_value["lineb"]), int(match_value["added"]))
 | 
						|
        for match_value in line_match_values
 | 
						|
    ]
 | 
						|
    line_ranges = [
 | 
						|
        range(line_b, line_b + added) for line_b, added in line_ints
 | 
						|
    ]
 | 
						|
    line_numbers = list(itertools.chain(*line_ranges))
 | 
						|
 | 
						|
    return line_numbers
 | 
						|
 | 
						|
 | 
						|
def get_para_line_numbers(file_obj: TextIO) -> list[list[int]]:
 | 
						|
    """Get the line numbers of text in a file object, grouped by paragraph."""
 | 
						|
    paragraphs = []
 | 
						|
    prev_line = None
 | 
						|
    for lineno, line in enumerate(file_obj):
 | 
						|
        lineno = lineno + 1
 | 
						|
        if prev_line is None or (line.strip() and not prev_line.strip()):
 | 
						|
            paragraph = [lineno - 1]
 | 
						|
            paragraphs.append(paragraph)
 | 
						|
        paragraph.append(lineno)
 | 
						|
        prev_line = line
 | 
						|
    return paragraphs
 | 
						|
 | 
						|
 | 
						|
def filter_and_parse_warnings(
 | 
						|
    warnings: list[str], files: set[Path]
 | 
						|
) -> list[re.Match[str]]:
 | 
						|
    """Get the warnings matching passed files and parse them with regex."""
 | 
						|
    filtered_warnings = [
 | 
						|
        warning
 | 
						|
        for warning in warnings
 | 
						|
        if any(str(file) in warning for file in files)
 | 
						|
    ]
 | 
						|
    warning_matches = [
 | 
						|
        WARNING_PATTERN.fullmatch(warning.strip())
 | 
						|
        for warning in filtered_warnings
 | 
						|
    ]
 | 
						|
    non_null_matches = [warning for warning in warning_matches if warning]
 | 
						|
    return non_null_matches
 | 
						|
 | 
						|
 | 
						|
def filter_warnings_by_diff(
 | 
						|
    warnings: list[re.Match[str]], ref_a: str, ref_b: str, file: Path
 | 
						|
) -> list[re.Match[str]]:
 | 
						|
    """Filter the passed per-file warnings to just those on changed lines."""
 | 
						|
    diff_lines = get_diff_lines(ref_a, ref_b, file)
 | 
						|
    with file.open(encoding="UTF-8") as file_obj:
 | 
						|
        paragraphs = get_para_line_numbers(file_obj)
 | 
						|
    touched_paras = [
 | 
						|
        para_lines
 | 
						|
        for para_lines in paragraphs
 | 
						|
        if set(diff_lines) & set(para_lines)
 | 
						|
    ]
 | 
						|
    touched_para_lines = set(itertools.chain(*touched_paras))
 | 
						|
    warnings_infile = [
 | 
						|
        warning for warning in warnings if str(file) in warning["file"]
 | 
						|
    ]
 | 
						|
    warnings_touched = [
 | 
						|
        warning
 | 
						|
        for warning in warnings_infile
 | 
						|
        if int(warning["line"]) in touched_para_lines
 | 
						|
    ]
 | 
						|
    return warnings_touched
 | 
						|
 | 
						|
 | 
						|
def process_touched_warnings(
 | 
						|
    warnings: list[str], ref_a: str, ref_b: str
 | 
						|
) -> list[re.Match[str]]:
 | 
						|
    """Filter a list of Sphinx warnings to those affecting touched lines."""
 | 
						|
    added_files, modified_files = tuple(
 | 
						|
        get_diff_files(ref_a, ref_b, filter_mode=mode) for mode in ("A", "M")
 | 
						|
    )
 | 
						|
 | 
						|
    warnings_added = filter_and_parse_warnings(warnings, added_files)
 | 
						|
    warnings_modified = filter_and_parse_warnings(warnings, modified_files)
 | 
						|
 | 
						|
    modified_files_warned = {
 | 
						|
        file
 | 
						|
        for file in modified_files
 | 
						|
        if any(str(file) in warning["file"] for warning in warnings_modified)
 | 
						|
    }
 | 
						|
 | 
						|
    warnings_modified_touched = [
 | 
						|
        filter_warnings_by_diff(warnings_modified, ref_a, ref_b, file)
 | 
						|
        for file in modified_files_warned
 | 
						|
    ]
 | 
						|
    warnings_touched = warnings_added + list(
 | 
						|
        itertools.chain(*warnings_modified_touched)
 | 
						|
    )
 | 
						|
 | 
						|
    return warnings_touched
 | 
						|
 | 
						|
 | 
						|
def annotate_diff(
 | 
						|
    warnings: list[str], ref_a: str = "main", ref_b: str = "HEAD"
 | 
						|
) -> None:
 | 
						|
    """
 | 
						|
    Convert Sphinx warning messages to GitHub Actions for changed paragraphs.
 | 
						|
 | 
						|
    Converts lines like:
 | 
						|
        .../Doc/library/cgi.rst:98: WARNING: reference target not found
 | 
						|
    to:
 | 
						|
        ::warning file=.../Doc/library/cgi.rst,line=98::reference target not found
 | 
						|
 | 
						|
    See:
 | 
						|
    https://docs.github.com/en/actions/using-workflows/workflow-commands-for-github-actions#setting-a-warning-message
 | 
						|
    """
 | 
						|
    warnings_touched = process_touched_warnings(warnings, ref_a, ref_b)
 | 
						|
    print("Emitting doc warnings matching modified lines:")
 | 
						|
    for warning in warnings_touched:
 | 
						|
        print("::warning file={file},line={line}::{msg}".format_map(warning))
 | 
						|
        print(warning[0])
 | 
						|
    if not warnings_touched:
 | 
						|
        print("None")
 | 
						|
 | 
						|
 | 
						|
def fail_if_regression(
 | 
						|
    warnings: list[str], files_with_expected_nits: set[str], files_with_nits: set[str]
 | 
						|
) -> int:
 | 
						|
    """
 | 
						|
    Ensure some files always pass Sphinx nit-picky mode (no missing references).
 | 
						|
    These are files which are *not* in .nitignore.
 | 
						|
    """
 | 
						|
    all_rst = {
 | 
						|
        str(rst)
 | 
						|
        for rst in Path("Doc/").rglob("*.rst")
 | 
						|
        if rst.parts[1] not in EXCLUDE_SUBDIRS
 | 
						|
    }
 | 
						|
    should_be_clean = all_rst - files_with_expected_nits - EXCLUDE_FILES
 | 
						|
    problem_files = sorted(should_be_clean & files_with_nits)
 | 
						|
    if problem_files:
 | 
						|
        print("\nError: must not contain warnings:\n")
 | 
						|
        for filename in problem_files:
 | 
						|
            print(filename)
 | 
						|
            for warning in warnings:
 | 
						|
                if filename in warning:
 | 
						|
                    if match := WARNING_PATTERN.fullmatch(warning):
 | 
						|
                        print("  {line}: {msg}".format_map(match))
 | 
						|
        return -1
 | 
						|
    return 0
 | 
						|
 | 
						|
 | 
						|
def fail_if_improved(
 | 
						|
    files_with_expected_nits: set[str], files_with_nits: set[str]
 | 
						|
) -> int:
 | 
						|
    """
 | 
						|
    We may have fixed warnings in some files so that the files are now completely clean.
 | 
						|
    Good news! Let's add them to .nitignore to prevent regression.
 | 
						|
    """
 | 
						|
    files_with_no_nits = files_with_expected_nits - files_with_nits
 | 
						|
    if files_with_no_nits:
 | 
						|
        print("\nCongratulations! You improved:\n")
 | 
						|
        for filename in sorted(files_with_no_nits):
 | 
						|
            print(filename)
 | 
						|
        print("\nPlease remove from Doc/tools/.nitignore\n")
 | 
						|
        return -1
 | 
						|
    return 0
 | 
						|
 | 
						|
 | 
						|
def main(argv: list[str] | None = None) -> int:
 | 
						|
    parser = argparse.ArgumentParser()
 | 
						|
    parser.add_argument(
 | 
						|
        "--annotate-diff",
 | 
						|
        nargs="*",
 | 
						|
        metavar=("BASE_REF", "HEAD_REF"),
 | 
						|
        help="Add GitHub Actions annotations on the diff for warnings on "
 | 
						|
        "lines changed between the given refs (main and HEAD, by default)",
 | 
						|
    )
 | 
						|
    parser.add_argument(
 | 
						|
        "--fail-if-regression",
 | 
						|
        action="store_true",
 | 
						|
        help="Fail if known-good files have warnings",
 | 
						|
    )
 | 
						|
    parser.add_argument(
 | 
						|
        "--fail-if-improved",
 | 
						|
        action="store_true",
 | 
						|
        help="Fail if new files with no nits are found",
 | 
						|
    )
 | 
						|
 | 
						|
    args = parser.parse_args(argv)
 | 
						|
    if args.annotate_diff is not None and len(args.annotate_diff) > 2:
 | 
						|
        parser.error(
 | 
						|
            "--annotate-diff takes between 0 and 2 ref args, not "
 | 
						|
            f"{len(args.annotate_diff)} {tuple(args.annotate_diff)}"
 | 
						|
        )
 | 
						|
    exit_code = 0
 | 
						|
 | 
						|
    wrong_directory_msg = "Must run this script from the repo root"
 | 
						|
    assert Path("Doc").exists() and Path("Doc").is_dir(), wrong_directory_msg
 | 
						|
 | 
						|
    with Path("Doc/sphinx-warnings.txt").open(encoding="UTF-8") as f:
 | 
						|
        warnings = f.read().splitlines()
 | 
						|
 | 
						|
    cwd = str(Path.cwd()) + os.path.sep
 | 
						|
    files_with_nits = {
 | 
						|
        warning.removeprefix(cwd).split(":")[0]
 | 
						|
        for warning in warnings
 | 
						|
        if "Doc/" in warning
 | 
						|
    }
 | 
						|
 | 
						|
    with Path("Doc/tools/.nitignore").open(encoding="UTF-8") as clean_files:
 | 
						|
        files_with_expected_nits = {
 | 
						|
            filename.strip()
 | 
						|
            for filename in clean_files
 | 
						|
            if filename.strip() and not filename.startswith("#")
 | 
						|
        }
 | 
						|
 | 
						|
    if args.annotate_diff is not None:
 | 
						|
        annotate_diff(warnings, *args.annotate_diff)
 | 
						|
 | 
						|
    if args.fail_if_regression:
 | 
						|
        exit_code += fail_if_regression(
 | 
						|
            warnings, files_with_expected_nits, files_with_nits
 | 
						|
        )
 | 
						|
 | 
						|
    if args.fail_if_improved:
 | 
						|
        exit_code += fail_if_improved(files_with_expected_nits, files_with_nits)
 | 
						|
 | 
						|
    return exit_code
 | 
						|
 | 
						|
 | 
						|
if __name__ == "__main__":
 | 
						|
    sys.exit(main())
 |