mirror of
				https://github.com/astral-sh/ruff.git
				synced 2025-10-25 09:28:14 +00:00 
			
		
		
		
	 d78d10dd94
			
		
	
	
		d78d10dd94
		
			
		
	
	
	
	
		
			
			<!-- Thank you for contributing to Ruff/ty! To help us out with reviewing, please consider the following: - Does this pull request include a summary of the change? (See below.) - Does this pull request include a descriptive title? (Please prefix with `[ty]` for ty pull requests.) - Does this pull request include references to any relevant issues? --> ## Summary <!-- What's the purpose of the change? What does it do, and why? --> Part of #18972 This PR makes [indentation-with-invalid-multiple-comment (E114)](https://docs.astral.sh/ruff/rules/indentation-with-invalid-multiple-comment/#indentation-with-invalid-multiple-comment-e114)'s example not raise a syntax error by adding a 4 space indented `...`. The example still gave `E114` without this, but adding the `...` both makes the change in indentation of the comment clearer, and makes it not give a `SyntaxError`. ## Test Plan <!-- How was it tested? --> N/A, no functionality/tests affected
		
			
				
	
	
		
			393 lines
		
	
	
	
		
			12 KiB
		
	
	
	
		
			Python
		
	
	
		
			Executable file
		
	
	
	
	
			
		
		
	
	
			393 lines
		
	
	
	
		
			12 KiB
		
	
	
	
		
			Python
		
	
	
		
			Executable file
		
	
	
	
	
| #!/usr/bin/env python3
 | |
| """Check code snippets in docs are formatted by Ruff."""
 | |
| 
 | |
| from __future__ import annotations
 | |
| 
 | |
| import argparse
 | |
| import json
 | |
| import os
 | |
| import re
 | |
| import subprocess
 | |
| import textwrap
 | |
| from pathlib import Path
 | |
| from re import Match
 | |
| from typing import TYPE_CHECKING, Literal
 | |
| 
 | |
| if TYPE_CHECKING:
 | |
|     from collections.abc import Sequence
 | |
| 
 | |
| SNIPPED_RE = re.compile(
 | |
|     r"(?P<before>^(?P<indent>\x20*)```(?:\s*(?P<language>\w+))?\n)"
 | |
|     r"(?P<code>.*?)"
 | |
|     r"(?P<after>^(?P=indent)```\s*$)",
 | |
|     re.DOTALL | re.MULTILINE,
 | |
| )
 | |
| 
 | |
| # Long explanation: https://www.rexegg.com/regex-best-trick.html
 | |
| #
 | |
| # Short explanation:
 | |
| # Match both code blocks and shortcut links, then discard the former.
 | |
| # Whatever matched by the second branch is guaranteed to never be
 | |
| # part of a code block, as that would already be caught by the first.
 | |
| BACKTICKED_SHORTCUT_LINK_RE = re.compile(
 | |
|     rf"""(?msx)
 | |
|     (?:{SNIPPED_RE}
 | |
|     |  \[`(?P<name>[^`\n]+)`](?![\[(])
 | |
|     )
 | |
|     """
 | |
| )
 | |
| 
 | |
| # For some rules, we don't want Ruff to fix the formatting as this would "fix" the
 | |
| # example.
 | |
| KNOWN_FORMATTING_VIOLATIONS = [
 | |
|     "avoidable-escaped-quote",
 | |
|     "bad-quotes-docstring",
 | |
|     "bad-quotes-inline-string",
 | |
|     "bad-quotes-multiline-string",
 | |
|     "blank-line-after-decorator",
 | |
|     "blank-line-before-class",
 | |
|     "blank-line-before-function",
 | |
|     "blank-line-between-methods",
 | |
|     "blank-lines-after-function-or-class",
 | |
|     "blank-lines-before-nested-definition",
 | |
|     "blank-lines-top-level",
 | |
|     "docstring-tab-indentation",
 | |
|     "explicit-string-concatenation",
 | |
|     "f-string-missing-placeholders",
 | |
|     "incorrect-blank-line-after-class",
 | |
|     "incorrect-blank-line-before-class",
 | |
|     "indentation-with-invalid-multiple",
 | |
|     "indentation-with-invalid-multiple-comment",
 | |
|     "line-too-long",
 | |
|     "missing-trailing-comma",
 | |
|     "missing-whitespace",
 | |
|     "missing-whitespace-after-keyword",
 | |
|     "missing-whitespace-around-arithmetic-operator",
 | |
|     "missing-whitespace-around-bitwise-or-shift-operator",
 | |
|     "missing-whitespace-around-modulo-operator",
 | |
|     "missing-whitespace-around-operator",
 | |
|     "missing-whitespace-around-parameter-equals",
 | |
|     "module-import-not-at-top-of-file",
 | |
|     "multi-line-implicit-string-concatenation",
 | |
|     "multiple-leading-hashes-for-block-comment",
 | |
|     "multiple-spaces-after-comma",
 | |
|     "multiple-spaces-after-keyword",
 | |
|     "multiple-spaces-after-operator",
 | |
|     "multiple-spaces-before-keyword",
 | |
|     "multiple-spaces-before-operator",
 | |
|     "multiple-statements-on-one-line-colon",
 | |
|     "multiple-statements-on-one-line-semicolon",
 | |
|     "no-indented-block-comment",
 | |
|     "no-return-argument-annotation-in-stub",
 | |
|     "no-space-after-block-comment",
 | |
|     "no-space-after-inline-comment",
 | |
|     "non-empty-stub-body",
 | |
|     "over-indentation",
 | |
|     "over-indented",
 | |
|     "pass-statement-stub-body",
 | |
|     "prohibited-trailing-comma",
 | |
|     "redundant-backslash",
 | |
|     "shebang-leading-whitespace",
 | |
|     "single-line-implicit-string-concatenation",
 | |
|     "surrounding-whitespace",
 | |
|     "too-few-spaces-before-inline-comment",
 | |
|     "too-many-blank-lines",
 | |
|     "too-many-boolean-expressions",
 | |
|     "trailing-comma-on-bare-tuple",
 | |
|     "triple-single-quotes",
 | |
|     "under-indentation",
 | |
|     "unexpected-indentation-comment",
 | |
|     "unexpected-spaces-around-keyword-parameter-equals",
 | |
|     "unicode-kind-prefix",
 | |
|     "unnecessary-class-parentheses",
 | |
|     "unnecessary-escaped-quote",
 | |
|     "useless-semicolon",
 | |
|     "whitespace-after-decorator",
 | |
|     "whitespace-after-open-bracket",
 | |
|     "whitespace-before-close-bracket",
 | |
|     "whitespace-before-parameters",
 | |
|     "whitespace-before-punctuation",
 | |
| ]
 | |
| 
 | |
| # For some docs, Ruff is unable to parse the example code.
 | |
| KNOWN_PARSE_ERRORS = [
 | |
|     "blank-line-with-whitespace",
 | |
|     "indented-form-feed",
 | |
|     "missing-newline-at-end-of-file",
 | |
|     "mixed-spaces-and-tabs",
 | |
|     "no-indented-block",
 | |
|     "non-pep695-type-alias",  # requires Python 3.12
 | |
|     "syntax-error",
 | |
|     "tab-after-comma",
 | |
|     "tab-after-keyword",
 | |
|     "tab-after-operator",
 | |
|     "tab-before-keyword",
 | |
|     "tab-before-operator",
 | |
|     "too-many-newlines-at-end-of-file",
 | |
|     "trailing-whitespace",
 | |
|     "unexpected-indentation",
 | |
| ]
 | |
| 
 | |
| 
 | |
| class CodeBlockError(Exception):
 | |
|     """A code block parse error."""
 | |
| 
 | |
| 
 | |
| class InvalidInput(ValueError):
 | |
|     """Raised when ruff fails to parse file."""
 | |
| 
 | |
| 
 | |
| def format_str(code: str, extension: Literal["py", "pyi"]) -> str:
 | |
|     """Format a code block with ruff by writing to a temporary file."""
 | |
|     # Run ruff to format the tmp file
 | |
|     try:
 | |
|         completed_process = subprocess.run(
 | |
|             ["ruff", "format", "--stdin-filename", f"file.{extension}", "-"],
 | |
|             check=True,
 | |
|             capture_output=True,
 | |
|             text=True,
 | |
|             input=code,
 | |
|         )
 | |
|     except subprocess.CalledProcessError as e:
 | |
|         err = e.stderr
 | |
|         if "error: Failed to parse" in err:
 | |
|             raise InvalidInput(err) from e
 | |
| 
 | |
|         raise NotImplementedError(
 | |
|             "This error has not been handled correctly, please update "
 | |
|             f"`check_docs_formatted.py\n\nError:\n\n{err}",
 | |
|         ) from e
 | |
| 
 | |
|     return completed_process.stdout
 | |
| 
 | |
| 
 | |
| def format_contents(src: str) -> tuple[str, Sequence[CodeBlockError]]:
 | |
|     """Format a single docs content."""
 | |
|     errors: list[CodeBlockError] = []
 | |
| 
 | |
|     def _snipped_match(match: Match[str]) -> str:
 | |
|         language = match["language"]
 | |
|         extension: Literal["py", "pyi"]
 | |
|         match language:
 | |
|             case "python":
 | |
|                 extension = "py"
 | |
|             case "pyi":
 | |
|                 extension = "pyi"
 | |
|             case _:
 | |
|                 # We are only interested in checking the formatting of py or pyi code
 | |
|                 # blocks so we can return early if the language is not one of these.
 | |
|                 return f"{match['before']}{match['code']}{match['after']}"
 | |
| 
 | |
|         code = textwrap.dedent(match["code"])
 | |
|         try:
 | |
|             code = format_str(code, extension)
 | |
|         except InvalidInput as e:
 | |
|             errors.append(CodeBlockError(e))
 | |
|         except NotImplementedError as e:
 | |
|             raise e
 | |
| 
 | |
|         code = textwrap.indent(code, match["indent"])
 | |
|         return f"{match['before']}{code}{match['after']}"
 | |
| 
 | |
|     src = SNIPPED_RE.sub(_snipped_match, src)
 | |
|     return src, errors
 | |
| 
 | |
| 
 | |
| def format_file(file: Path, error_known: bool, args: argparse.Namespace) -> int:
 | |
|     """Check the formatting of a single docs file.
 | |
| 
 | |
|     Returns the exit code for the script.
 | |
|     """
 | |
|     with file.open() as f:
 | |
|         contents = f.read()
 | |
| 
 | |
|     if file.parent.name == "rules":
 | |
|         # Check contents contains "What it does" section
 | |
|         if "## What it does" not in contents:
 | |
|             print(f"Docs for `{file.name}` are missing the `What it does` section.")
 | |
|             return 1
 | |
| 
 | |
|         # Check contents contains "Why is this bad?" section
 | |
|         if "## Why is this bad?" not in contents:
 | |
|             print(f"Docs for `{file.name}` are missing the `Why is this bad?` section.")
 | |
|             return 1
 | |
| 
 | |
|     # Remove everything before the first example
 | |
|     contents = contents[contents.find("## Example") :]
 | |
| 
 | |
|     # Remove everything after the last example
 | |
|     contents = contents[: contents.rfind("```")] + "```"
 | |
| 
 | |
|     new_contents, errors = format_contents(contents)
 | |
| 
 | |
|     if errors and not args.skip_errors and not error_known:
 | |
|         for error in errors:
 | |
|             rule_name = file.name.split(".")[0]
 | |
|             print(
 | |
|                 f"Docs parse error for `{rule_name}` docs. Either fix or add to "
 | |
|                 f"`KNOWN_PARSE_ERRORS`. {error}",
 | |
|             )
 | |
| 
 | |
|         return 2
 | |
| 
 | |
|     if contents != new_contents:
 | |
|         rule_name = file.name.split(".")[0]
 | |
|         print(
 | |
|             f"Rule `{rule_name}` docs are not formatted. Either format the rule or add "
 | |
|             f"to `KNOWN_FORMATTING_VIOLATIONS`. The example section should be "
 | |
|             f"rewritten to:",
 | |
|         )
 | |
| 
 | |
|         # Add indentation so that snipped can be copied directly to docs
 | |
|         for line in new_contents.splitlines():
 | |
|             output_line = "///"
 | |
|             if len(line) > 0:
 | |
|                 output_line = f"{output_line} {line}"
 | |
| 
 | |
|             print(output_line)
 | |
| 
 | |
|         print("\n")
 | |
| 
 | |
|         return 1
 | |
| 
 | |
|     return 0
 | |
| 
 | |
| 
 | |
| def find_backticked_shortcut_links(
 | |
|     path: Path, all_config_names: dict[str, object]
 | |
| ) -> set[str]:
 | |
|     """Check for links of the form: [`foobar`].
 | |
| 
 | |
|     See explanation at #16010.
 | |
|     """
 | |
| 
 | |
|     with path.open() as file:
 | |
|         contents = file.read()
 | |
| 
 | |
|     broken_link_names: set[str] = set()
 | |
| 
 | |
|     for match in BACKTICKED_SHORTCUT_LINK_RE.finditer(contents):
 | |
|         name = match["name"]
 | |
| 
 | |
|         if name is not None and name not in all_config_names:
 | |
|             broken_link_names.add(name)
 | |
| 
 | |
|     return broken_link_names
 | |
| 
 | |
| 
 | |
| def main(argv: Sequence[str] | None = None) -> int:
 | |
|     """Check code snippets in docs are formatted by Ruff."""
 | |
|     parser = argparse.ArgumentParser(
 | |
|         description="Check code snippets in docs are formatted by Ruff.",
 | |
|     )
 | |
|     parser.add_argument("--skip-errors", action="store_true")
 | |
|     parser.add_argument("--generate-docs", action="store_true")
 | |
|     args = parser.parse_args(argv)
 | |
| 
 | |
|     if args.generate_docs:
 | |
|         # Generate docs
 | |
|         from generate_mkdocs import main as generate_docs
 | |
| 
 | |
|         generate_docs()
 | |
| 
 | |
|     # Get static docs
 | |
|     static_docs = [Path("docs") / f for f in os.listdir("docs") if f.endswith(".md")]
 | |
| 
 | |
|     # Check rules generated
 | |
|     if not Path("docs/rules").exists():
 | |
|         print("Please generate rules first.")
 | |
|         return 1
 | |
| 
 | |
|     # Get generated rules
 | |
|     generated_docs = [
 | |
|         Path("docs/rules") / f for f in os.listdir("docs/rules") if f.endswith(".md")
 | |
|     ]
 | |
| 
 | |
|     if len(generated_docs) == 0:
 | |
|         print("Please generate rules first.")
 | |
|         return 1
 | |
| 
 | |
|     # Check known formatting violations and parse errors are sorted alphabetically and
 | |
|     # have no duplicates. This will reduce the diff when adding new violations
 | |
| 
 | |
|     for known_list, file_string in [
 | |
|         (KNOWN_FORMATTING_VIOLATIONS, "formatting violations"),
 | |
|         (KNOWN_PARSE_ERRORS, "parse errors"),
 | |
|     ]:
 | |
|         if known_list != sorted(known_list):
 | |
|             print(
 | |
|                 f"Known {file_string} is not sorted alphabetically. Please sort and "
 | |
|                 f"re-run.",
 | |
|             )
 | |
|             return 1
 | |
| 
 | |
|         duplicates = list({x for x in known_list if known_list.count(x) > 1})
 | |
|         if len(duplicates) > 0:
 | |
|             print(f"Known {file_string} has duplicates:")
 | |
|             print("\n".join([f"  - {x}" for x in duplicates]))
 | |
|             print("Please remove them and re-run.")
 | |
|             return 1
 | |
| 
 | |
|     ruff_config_output = subprocess.check_output(
 | |
|         ["ruff", "config", "--output-format", "json"], encoding="utf-8"
 | |
|     )
 | |
|     all_config_names = json.loads(ruff_config_output)
 | |
| 
 | |
|     violations = 0
 | |
|     errors = 0
 | |
|     broken_links: dict[str, set[str]] = {}
 | |
|     print("Checking docs formatting...")
 | |
|     for file in [*static_docs, *generated_docs]:
 | |
|         rule_name = file.name.split(".")[0]
 | |
|         if rule_name in KNOWN_FORMATTING_VIOLATIONS:
 | |
|             continue
 | |
| 
 | |
|         error_known = rule_name in KNOWN_PARSE_ERRORS
 | |
| 
 | |
|         result = format_file(file, error_known, args)
 | |
|         if result == 1:
 | |
|             violations += 1
 | |
|         elif result == 2 and not error_known:
 | |
|             errors += 1
 | |
| 
 | |
|         broken_links_in_file = find_backticked_shortcut_links(file, all_config_names)
 | |
| 
 | |
|         if broken_links_in_file:
 | |
|             broken_links[file.name] = broken_links_in_file
 | |
| 
 | |
|     if violations > 0:
 | |
|         print(f"Formatting violations identified: {violations}")
 | |
| 
 | |
|     if errors > 0:
 | |
|         print(f"New code block parse errors identified: {errors}")
 | |
| 
 | |
|     if broken_links:
 | |
|         print()
 | |
|         print("Do not use backticked shortcut links: [`foobar`]")
 | |
|         print(
 | |
|             "They work with Mkdocs but cannot be rendered by CommonMark and GFM-compliant implementers."
 | |
|         )
 | |
|         print("Instead, use an explicit label:")
 | |
|         print("```markdown")
 | |
|         print("[`lorem.ipsum`][lorem-ipsum]")
 | |
|         print()
 | |
|         print("[lorem-ipsum]: https://example.com/")
 | |
|         print("```")
 | |
| 
 | |
|         print()
 | |
|         print("The following links are found to be broken:")
 | |
| 
 | |
|         for filename, link_names in broken_links.items():
 | |
|             print(f"- {filename}:")
 | |
|             print("\n".join(f"  - {name}" for name in link_names))
 | |
| 
 | |
|     if violations > 0 or errors > 0 or broken_links:
 | |
|         return 1
 | |
| 
 | |
|     print("All docs are formatted correctly.")
 | |
| 
 | |
|     return 0
 | |
| 
 | |
| 
 | |
| if __name__ == "__main__":
 | |
|     raise SystemExit(main())
 |