Skip namespace package enforcement for PEP 723 scripts (#13974)

## Summary Vendors the PEP 723 parser from [uv](debe67ffdb/crates/uv-scripts/src/lib.rs (L283)). Closes https://github.com/astral-sh/ruff/issues/13912.
2025-08-18 17:41:12 +00:00 · 2024-10-28 22:11:31 -04:00 · 2024-10-28 22:11:31 -04:00 · b6847b371e
commit b6847b371e
parent b19862c64a
8 changed files with 177 additions and 3 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@ -2620,6 +2620,7 @@ dependencies = [
 "compact_str",
 "is-macro",
 "itertools 0.13.0",
+ "memchr",
 "ruff_cache",
 "ruff_macros",
 "ruff_python_trivia",
--- a/crates/ruff_linter/resources/test/fixtures/flake8_no_pep420/test_pass_pep723/script.py
+++ b/crates/ruff_linter/resources/test/fixtures/flake8_no_pep420/test_pass_pep723/script.py
@ -0,0 +1,14 @@
+# /// script
+# requires-python = ">=3.11"
+# dependencies = [
+#   "requests<3",
+#   "rich",
+# ]
+# ///
+
+import requests
+from rich.pretty import pprint
+
+resp = requests.get("https://peps.python.org/api/peps.json")
+data = resp.json()
+pprint([(k, v["title"]) for k, v in data.items()][:10])
--- a/crates/ruff_linter/src/rules/flake8_no_pep420/mod.rs
+++ b/crates/ruff_linter/src/rules/flake8_no_pep420/mod.rs
@ -13,14 +13,15 @@ mod tests {
    use crate::settings::LinterSettings;
    use crate::test::{test_path, test_resource_path};

-    #[test_case(Path::new("test_pass_init"), Path::new("example.py"))]
    #[test_case(Path::new("test_fail_empty"), Path::new("example.py"))]
    #[test_case(Path::new("test_fail_nonempty"), Path::new("example.py"))]
-    #[test_case(Path::new("test_pass_shebang"), Path::new("example.py"))]
    #[test_case(Path::new("test_ignored"), Path::new("example.py"))]
+    #[test_case(Path::new("test_pass_init"), Path::new("example.py"))]
    #[test_case(Path::new("test_pass_namespace_package"), Path::new("example.py"))]
+    #[test_case(Path::new("test_pass_pep723"), Path::new("script.py"))]
    #[test_case(Path::new("test_pass_pyi"), Path::new("example.pyi"))]
    #[test_case(Path::new("test_pass_script"), Path::new("script"))]
+    #[test_case(Path::new("test_pass_shebang"), Path::new("example.py"))]
    fn test_flake8_no_pep420(path: &Path, filename: &Path) -> Result<()> {
        let snapshot = format!("{}", path.to_string_lossy());
        let p = PathBuf::from(format!(
--- a/crates/ruff_linter/src/rules/flake8_no_pep420/rules/implicit_namespace_package.rs
+++ b/crates/ruff_linter/src/rules/flake8_no_pep420/rules/implicit_namespace_package.rs
@ -2,6 +2,7 @@ use std::path::{Path, PathBuf};

 use ruff_diagnostics::{Diagnostic, Violation};
 use ruff_macros::{derive_message_formats, violation};
+use ruff_python_ast::script::ScriptTag;
 use ruff_python_ast::PySourceType;
 use ruff_python_trivia::CommentRanges;
 use ruff_text_size::{TextRange, TextSize};
@ -65,6 +66,8 @@ pub(crate) fn implicit_namespace_package(
        && !comment_ranges
            .first().filter(|range| range.start() == TextSize::from(0))
            .is_some_and(|range| ShebangDirective::try_extract(locator.slice(*range)).is_some())
+        // Ignore PEP 723 scripts.
+        && ScriptTag::parse(locator.contents().as_bytes()).is_none()
    {
        #[cfg(all(test, windows))]
        let path = path
--- a/crates/ruff_linter/src/rules/flake8_no_pep420/snapshots/ruff_linterrulesflake8_no_pep420teststest_pass_pep723.snap
+++ b/crates/ruff_linter/src/rules/flake8_no_pep420/snapshots/ruff_linterrulesflake8_no_pep420teststest_pass_pep723.snap
@ -0,0 +1,4 @@
+---
+source: crates/ruff_linter/src/rules/flake8_no_pep420/mod.rs
+---
+
--- a/crates/ruff_python_ast/Cargo.toml
+++ b/crates/ruff_python_ast/Cargo.toml
@ -21,12 +21,13 @@ ruff_text_size = { workspace = true }

 aho-corasick = { workspace = true }
 bitflags = { workspace = true }
+compact_str = { workspace = true }
 is-macro = { workspace = true }
 itertools = { workspace = true }
+memchr = { workspace = true }
 rustc-hash = { workspace = true }
 schemars = { workspace = true, optional = true }
 serde = { workspace = true, optional = true }
-compact_str = { workspace = true }

 [features]
 schemars = ["dep:schemars"]
--- a/crates/ruff_python_ast/src/lib.rs
+++ b/crates/ruff_python_ast/src/lib.rs
@ -18,6 +18,7 @@ mod node;
 mod nodes;
 pub mod parenthesize;
 pub mod relocate;
+pub mod script;
 pub mod statement_visitor;
 pub mod stmt_if;
 pub mod str;
--- a/crates/ruff_python_ast/src/script.rs
+++ b/crates/ruff_python_ast/src/script.rs
@ -0,0 +1,149 @@
+use std::sync::LazyLock;
+
+use memchr::memmem::Finder;
+
+static FINDER: LazyLock<Finder> = LazyLock::new(|| Finder::new(b"# /// script"));
+
+/// PEP 723 metadata as parsed from a `script` comment block.
+///
+/// See: <https://peps.python.org/pep-0723/>
+///
+/// Vendored from: <https://github.com/astral-sh/uv/blob/debe67ffdb0cd7835734100e909b2d8f79613743/crates/uv-scripts/src/lib.rs#L283>
+#[derive(Debug, Clone, Eq, PartialEq)]
+pub struct ScriptTag {
+    /// The content of the script before the metadata block.
+    prelude: String,
+    /// The metadata block.
+    metadata: String,
+    /// The content of the script after the metadata block.
+    postlude: String,
+}
+
+impl ScriptTag {
+    /// Given the contents of a Python file, extract the `script` metadata block with leading
+    /// comment hashes removed, any preceding shebang or content (prelude), and the remaining Python
+    /// script.
+    ///
+    /// Given the following input string representing the contents of a Python script:
+    ///
+    /// ```python
+    /// #!/usr/bin/env python3
+    /// # /// script
+    /// # requires-python = '>=3.11'
+    /// # dependencies = [
+    /// #   'requests<3',
+    /// #   'rich',
+    /// # ]
+    /// # ///
+    ///
+    /// import requests
+    ///
+    /// print("Hello, World!")
+    /// ```
+    ///
+    /// This function would return:
+    ///
+    /// - Preamble: `#!/usr/bin/env python3\n`
+    /// - Metadata: `requires-python = '>=3.11'\ndependencies = [\n  'requests<3',\n  'rich',\n]`
+    /// - Postlude: `import requests\n\nprint("Hello, World!")\n`
+    ///
+    /// See: <https://peps.python.org/pep-0723/>
+    pub fn parse(contents: &[u8]) -> Option<Self> {
+        // Identify the opening pragma.
+        let index = FINDER.find(contents)?;
+
+        // The opening pragma must be the first line, or immediately preceded by a newline.
+        if !(index == 0 || matches!(contents[index - 1], b'\r' | b'\n')) {
+            return None;
+        }
+
+        // Extract the preceding content.
+        let prelude = std::str::from_utf8(&contents[..index]).ok()?;
+
+        // Decode as UTF-8.
+        let contents = &contents[index..];
+        let contents = std::str::from_utf8(contents).ok()?;
+
+        let mut lines = contents.lines();
+
+        // Ensure that the first line is exactly `# /// script`.
+        if !lines.next().is_some_and(|line| line == "# /// script") {
+            return None;
+        }
+
+        // > Every line between these two lines (# /// TYPE and # ///) MUST be a comment starting
+        // > with #. If there are characters after the # then the first character MUST be a space. The
+        // > embedded content is formed by taking away the first two characters of each line if the
+        // > second character is a space, otherwise just the first character (which means the line
+        // > consists of only a single #).
+        let mut toml = vec![];
+
+        // Extract the content that follows the metadata block.
+        let mut python_script = vec![];
+
+        while let Some(line) = lines.next() {
+            // Remove the leading `#`.
+            let Some(line) = line.strip_prefix('#') else {
+                python_script.push(line);
+                python_script.extend(lines);
+                break;
+            };
+
+            // If the line is empty, continue.
+            if line.is_empty() {
+                toml.push("");
+                continue;
+            }
+
+            // Otherwise, the line _must_ start with ` `.
+            let Some(line) = line.strip_prefix(' ') else {
+                python_script.push(line);
+                python_script.extend(lines);
+                break;
+            };
+
+            toml.push(line);
+        }
+
+        // Find the closing `# ///`. The precedence is such that we need to identify the _last_ such
+        // line.
+        //
+        // For example, given:
+        // ```python
+        // # /// script
+        // #
+        // # ///
+        // #
+        // # ///
+        // ```
+        //
+        // The latter `///` is the closing pragma
+        let index = toml.iter().rev().position(|line| *line == "///")?;
+        let index = toml.len() - index;
+
+        // Discard any lines after the closing `# ///`.
+        //
+        // For example, given:
+        // ```python
+        // # /// script
+        // #
+        // # ///
+        // #
+        // #
+        // ```
+        //
+        // We need to discard the last two lines.
+        toml.truncate(index - 1);
+
+        // Join the lines into a single string.
+        let prelude = prelude.to_string();
+        let metadata = toml.join("\n") + "\n";
+        let postlude = python_script.join("\n") + "\n";
+
+        Some(Self {
+            prelude,
+            metadata,
+            postlude,
+        })
+    }
+}