Check for backtick-quoted shortcut links in CI (#16114)

## Summary Follow-up to #16035. `check_docs_formatted.py` will now report backtick-quoted shortcut links in rule documentation. It uses a regular expression to find them. Such a link: * Starts with `[`, followed by <code>\`</code>, then a "name" sequence of at least one non-backtick non-newline character, followed by another <code>\`</code>, then ends with `]`. * Is not followed by either a `[` or a `(`. * Is not placed within a code block. If the name is a known Ruff option name, that link is not considered a violation. ## Test Plan Manual.
2025-07-07 13:15:06 +00:00 · 2025-02-14 14:37:46 +07:00 · 2025-02-14 14:37:46 +07:00 · 1db8392a5a
commit 1db8392a5a
parent 81e202ed52
1 changed files with 70 additions and 2 deletions
--- a/scripts/check_docs_formatted.py
+++ b/scripts/check_docs_formatted.py
@ -4,6 +4,7 @@
 from __future__ import annotations

 import argparse
+import json
 import os
 import re
 import subprocess
@ -16,12 +17,26 @@ if TYPE_CHECKING:
    from collections.abc import Sequence

 SNIPPED_RE = re.compile(
-    r"(?P<before>^(?P<indent> *)```(?:\s*(?P<language>\w+))?\n)"
+    r"(?P<before>^(?P<indent>\x20*)```(?:\s*(?P<language>\w+))?\n)"
    r"(?P<code>.*?)"
    r"(?P<after>^(?P=indent)```\s*$)",
    re.DOTALL | re.MULTILINE,
 )

+# Long explanation: https://www.rexegg.com/regex-best-trick.html
+#
+# Short explanation:
+# Match both code blocks and shortcut links, then discard the former.
+# Whatever matched by the second branch is guaranteed to never be
+# part of a code block, as that would already be caught by the first.
+BACKTICKED_SHORTCUT_LINK_RE = re.compile(
+    rf"""(?msx)
+    (?:{SNIPPED_RE}
+    |  \[`(?P<name>[^`\n]+)`](?![\[(])
+    )
+    """
+)
+
 # For some rules, we don't want Ruff to fix the formatting as this would "fix" the
 # example.
 KNOWN_FORMATTING_VIOLATIONS = [
@ -238,6 +253,28 @@ def format_file(file: Path, error_known: bool, args: argparse.Namespace) -> int:
    return 0


+def find_backticked_shortcut_links(
+    path: Path, all_config_names: dict[str, object]
+) -> set[str]:
+    """Check for links of the form: [`foobar`].
+
+    See explanation at #16010.
+    """
+
+    with path.open() as file:
+        contents = file.read()
+
+    broken_link_names: set[str] = set()
+
+    for match in BACKTICKED_SHORTCUT_LINK_RE.finditer(contents):
+        name = match["name"]
+
+        if name is not None and name not in all_config_names:
+            broken_link_names.add(name)
+
+    return broken_link_names
+
+
 def main(argv: Sequence[str] | None = None) -> int:
    """Check code snippets in docs are formatted by Ruff."""
    parser = argparse.ArgumentParser(
@ -291,8 +328,14 @@ def main(argv: Sequence[str] | None = None) -> int:
            print("Please remove them and re-run.")
            return 1

+    ruff_config_output = subprocess.check_output(
+        ["ruff", "config", "--output-format", "json"], encoding="utf-8"
+    )
+    all_config_names = json.loads(ruff_config_output)
+
    violations = 0
    errors = 0
+    broken_links: dict[str, set[str]] = {}
    print("Checking docs formatting...")
    for file in [*static_docs, *generated_docs]:
        rule_name = file.name.split(".")[0]
@ -307,13 +350,38 @@ def main(argv: Sequence[str] | None = None) -> int:
        elif result == 2 and not error_known:
            errors += 1

+        broken_links_in_file = find_backticked_shortcut_links(file, all_config_names)
+
+        if broken_links_in_file:
+            broken_links[file.name] = broken_links_in_file
+
    if violations > 0:
        print(f"Formatting violations identified: {violations}")

    if errors > 0:
        print(f"New code block parse errors identified: {errors}")

-    if violations > 0 or errors > 0:
+    if broken_links:
+        print()
+        print("Do not use backticked shortcut links: [`foobar`]")
+        print(
+            "They work with Mkdocs but cannot be rendered by CommonMark and GFM-compliant implementers."
+        )
+        print("Instead, use an explicit label:")
+        print("```markdown")
+        print("[`lorem.ipsum`][lorem-ipsum]")
+        print()
+        print("[lorem-ipsum]: https://example.com/")
+        print("```")
+
+        print()
+        print("The following links are found to be broken:")
+
+        for filename, link_names in broken_links.items():
+            print(f"- {filename}:")
+            print("\n".join(f"  - {name}" for name in link_names))
+
+    if violations > 0 or errors > 0 or broken_links:
        return 1

    print("All docs are formatted correctly.")