diff --git a/Cargo.lock b/Cargo.lock index 19110cedc1..faa377a7be 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2620,6 +2620,7 @@ dependencies = [ "compact_str", "is-macro", "itertools 0.13.0", + "memchr", "ruff_cache", "ruff_macros", "ruff_python_trivia", diff --git a/crates/ruff_linter/resources/test/fixtures/flake8_no_pep420/test_pass_pep723/script.py b/crates/ruff_linter/resources/test/fixtures/flake8_no_pep420/test_pass_pep723/script.py new file mode 100755 index 0000000000..5e3d34c2b4 --- /dev/null +++ b/crates/ruff_linter/resources/test/fixtures/flake8_no_pep420/test_pass_pep723/script.py @@ -0,0 +1,14 @@ +# /// script +# requires-python = ">=3.11" +# dependencies = [ +# "requests<3", +# "rich", +# ] +# /// + +import requests +from rich.pretty import pprint + +resp = requests.get("https://peps.python.org/api/peps.json") +data = resp.json() +pprint([(k, v["title"]) for k, v in data.items()][:10]) diff --git a/crates/ruff_linter/src/rules/flake8_no_pep420/mod.rs b/crates/ruff_linter/src/rules/flake8_no_pep420/mod.rs index 34e0c3d4f2..cd2df547be 100644 --- a/crates/ruff_linter/src/rules/flake8_no_pep420/mod.rs +++ b/crates/ruff_linter/src/rules/flake8_no_pep420/mod.rs @@ -13,14 +13,15 @@ mod tests { use crate::settings::LinterSettings; use crate::test::{test_path, test_resource_path}; - #[test_case(Path::new("test_pass_init"), Path::new("example.py"))] #[test_case(Path::new("test_fail_empty"), Path::new("example.py"))] #[test_case(Path::new("test_fail_nonempty"), Path::new("example.py"))] - #[test_case(Path::new("test_pass_shebang"), Path::new("example.py"))] #[test_case(Path::new("test_ignored"), Path::new("example.py"))] + #[test_case(Path::new("test_pass_init"), Path::new("example.py"))] #[test_case(Path::new("test_pass_namespace_package"), Path::new("example.py"))] + #[test_case(Path::new("test_pass_pep723"), Path::new("script.py"))] #[test_case(Path::new("test_pass_pyi"), Path::new("example.pyi"))] #[test_case(Path::new("test_pass_script"), Path::new("script"))] + #[test_case(Path::new("test_pass_shebang"), Path::new("example.py"))] fn test_flake8_no_pep420(path: &Path, filename: &Path) -> Result<()> { let snapshot = format!("{}", path.to_string_lossy()); let p = PathBuf::from(format!( diff --git a/crates/ruff_linter/src/rules/flake8_no_pep420/rules/implicit_namespace_package.rs b/crates/ruff_linter/src/rules/flake8_no_pep420/rules/implicit_namespace_package.rs index 733248c47a..76fec32529 100644 --- a/crates/ruff_linter/src/rules/flake8_no_pep420/rules/implicit_namespace_package.rs +++ b/crates/ruff_linter/src/rules/flake8_no_pep420/rules/implicit_namespace_package.rs @@ -2,6 +2,7 @@ use std::path::{Path, PathBuf}; use ruff_diagnostics::{Diagnostic, Violation}; use ruff_macros::{derive_message_formats, violation}; +use ruff_python_ast::script::ScriptTag; use ruff_python_ast::PySourceType; use ruff_python_trivia::CommentRanges; use ruff_text_size::{TextRange, TextSize}; @@ -65,6 +66,8 @@ pub(crate) fn implicit_namespace_package( && !comment_ranges .first().filter(|range| range.start() == TextSize::from(0)) .is_some_and(|range| ShebangDirective::try_extract(locator.slice(*range)).is_some()) + // Ignore PEP 723 scripts. + && ScriptTag::parse(locator.contents().as_bytes()).is_none() { #[cfg(all(test, windows))] let path = path diff --git a/crates/ruff_linter/src/rules/flake8_no_pep420/snapshots/ruff_linter__rules__flake8_no_pep420__tests__test_pass_pep723.snap b/crates/ruff_linter/src/rules/flake8_no_pep420/snapshots/ruff_linter__rules__flake8_no_pep420__tests__test_pass_pep723.snap new file mode 100644 index 0000000000..624dce3225 --- /dev/null +++ b/crates/ruff_linter/src/rules/flake8_no_pep420/snapshots/ruff_linter__rules__flake8_no_pep420__tests__test_pass_pep723.snap @@ -0,0 +1,4 @@ +--- +source: crates/ruff_linter/src/rules/flake8_no_pep420/mod.rs +--- + diff --git a/crates/ruff_python_ast/Cargo.toml b/crates/ruff_python_ast/Cargo.toml index 497782a4ee..0d9cbc490f 100644 --- a/crates/ruff_python_ast/Cargo.toml +++ b/crates/ruff_python_ast/Cargo.toml @@ -21,12 +21,13 @@ ruff_text_size = { workspace = true } aho-corasick = { workspace = true } bitflags = { workspace = true } +compact_str = { workspace = true } is-macro = { workspace = true } itertools = { workspace = true } +memchr = { workspace = true } rustc-hash = { workspace = true } schemars = { workspace = true, optional = true } serde = { workspace = true, optional = true } -compact_str = { workspace = true } [features] schemars = ["dep:schemars"] diff --git a/crates/ruff_python_ast/src/lib.rs b/crates/ruff_python_ast/src/lib.rs index 346fae9d8a..b149bdddc8 100644 --- a/crates/ruff_python_ast/src/lib.rs +++ b/crates/ruff_python_ast/src/lib.rs @@ -18,6 +18,7 @@ mod node; mod nodes; pub mod parenthesize; pub mod relocate; +pub mod script; pub mod statement_visitor; pub mod stmt_if; pub mod str; diff --git a/crates/ruff_python_ast/src/script.rs b/crates/ruff_python_ast/src/script.rs new file mode 100644 index 0000000000..f6b592a7b9 --- /dev/null +++ b/crates/ruff_python_ast/src/script.rs @@ -0,0 +1,149 @@ +use std::sync::LazyLock; + +use memchr::memmem::Finder; + +static FINDER: LazyLock = LazyLock::new(|| Finder::new(b"# /// script")); + +/// PEP 723 metadata as parsed from a `script` comment block. +/// +/// See: +/// +/// Vendored from: +#[derive(Debug, Clone, Eq, PartialEq)] +pub struct ScriptTag { + /// The content of the script before the metadata block. + prelude: String, + /// The metadata block. + metadata: String, + /// The content of the script after the metadata block. + postlude: String, +} + +impl ScriptTag { + /// Given the contents of a Python file, extract the `script` metadata block with leading + /// comment hashes removed, any preceding shebang or content (prelude), and the remaining Python + /// script. + /// + /// Given the following input string representing the contents of a Python script: + /// + /// ```python + /// #!/usr/bin/env python3 + /// # /// script + /// # requires-python = '>=3.11' + /// # dependencies = [ + /// # 'requests<3', + /// # 'rich', + /// # ] + /// # /// + /// + /// import requests + /// + /// print("Hello, World!") + /// ``` + /// + /// This function would return: + /// + /// - Preamble: `#!/usr/bin/env python3\n` + /// - Metadata: `requires-python = '>=3.11'\ndependencies = [\n 'requests<3',\n 'rich',\n]` + /// - Postlude: `import requests\n\nprint("Hello, World!")\n` + /// + /// See: + pub fn parse(contents: &[u8]) -> Option { + // Identify the opening pragma. + let index = FINDER.find(contents)?; + + // The opening pragma must be the first line, or immediately preceded by a newline. + if !(index == 0 || matches!(contents[index - 1], b'\r' | b'\n')) { + return None; + } + + // Extract the preceding content. + let prelude = std::str::from_utf8(&contents[..index]).ok()?; + + // Decode as UTF-8. + let contents = &contents[index..]; + let contents = std::str::from_utf8(contents).ok()?; + + let mut lines = contents.lines(); + + // Ensure that the first line is exactly `# /// script`. + if !lines.next().is_some_and(|line| line == "# /// script") { + return None; + } + + // > Every line between these two lines (# /// TYPE and # ///) MUST be a comment starting + // > with #. If there are characters after the # then the first character MUST be a space. The + // > embedded content is formed by taking away the first two characters of each line if the + // > second character is a space, otherwise just the first character (which means the line + // > consists of only a single #). + let mut toml = vec![]; + + // Extract the content that follows the metadata block. + let mut python_script = vec![]; + + while let Some(line) = lines.next() { + // Remove the leading `#`. + let Some(line) = line.strip_prefix('#') else { + python_script.push(line); + python_script.extend(lines); + break; + }; + + // If the line is empty, continue. + if line.is_empty() { + toml.push(""); + continue; + } + + // Otherwise, the line _must_ start with ` `. + let Some(line) = line.strip_prefix(' ') else { + python_script.push(line); + python_script.extend(lines); + break; + }; + + toml.push(line); + } + + // Find the closing `# ///`. The precedence is such that we need to identify the _last_ such + // line. + // + // For example, given: + // ```python + // # /// script + // # + // # /// + // # + // # /// + // ``` + // + // The latter `///` is the closing pragma + let index = toml.iter().rev().position(|line| *line == "///")?; + let index = toml.len() - index; + + // Discard any lines after the closing `# ///`. + // + // For example, given: + // ```python + // # /// script + // # + // # /// + // # + // # + // ``` + // + // We need to discard the last two lines. + toml.truncate(index - 1); + + // Join the lines into a single string. + let prelude = prelude.to_string(); + let metadata = toml.join("\n") + "\n"; + let postlude = python_script.join("\n") + "\n"; + + Some(Self { + prelude, + metadata, + postlude, + }) + } +}