Use Cursor for shebang parsing (#5716)

## Summary

Better to leverage the shared functionality we get from `Cursor`. It's
also a little bit faster, which is very cool.
This commit is contained in:
Charlie Marsh 2023-07-12 17:22:09 -04:00 committed by GitHub
parent 6dbc6d2e59
commit c87faca884
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
13 changed files with 82 additions and 92 deletions

View file

@ -7,9 +7,9 @@ use ruff_diagnostics::Diagnostic;
use ruff_python_ast::source_code::{Indexer, Locator, Stylist};
use ruff_python_whitespace::UniversalNewlines;
use crate::comments::shebang::ShebangDirective;
use crate::registry::Rule;
use crate::rules::flake8_copyright::rules::missing_copyright_notice;
use crate::rules::flake8_executable::helpers::ShebangDirective;
use crate::rules::flake8_executable::rules::{
shebang_missing, shebang_newline, shebang_not_executable, shebang_python, shebang_whitespace,
};

View file

@ -0,0 +1 @@
pub(crate) mod shebang;

View file

@ -0,0 +1,67 @@
use ruff_python_whitespace::{is_python_whitespace, Cursor};
use ruff_text_size::{TextLen, TextSize};
/// A shebang directive (e.g., `#!/usr/bin/env python3`).
#[derive(Debug, PartialEq, Eq)]
pub(crate) struct ShebangDirective<'a> {
/// The offset of the directive contents (e.g., `/usr/bin/env python3`) from the start of the
/// line.
pub(crate) offset: TextSize,
/// The contents of the directive (e.g., `"/usr/bin/env python3"`).
pub(crate) contents: &'a str,
}
impl<'a> ShebangDirective<'a> {
/// Parse a shebang directive from a line, or return `None` if the line does not contain a
/// shebang directive.
pub(crate) fn try_extract(line: &'a str) -> Option<Self> {
let mut cursor = Cursor::new(line);
// Trim whitespace.
cursor.eat_while(is_python_whitespace);
// Trim the `#!` prefix.
if !cursor.eat_char('#') {
return None;
}
if !cursor.eat_char('!') {
return None;
}
Some(Self {
offset: line.text_len() - cursor.text_len(),
contents: cursor.chars().as_str(),
})
}
}
#[cfg(test)]
mod tests {
use insta::assert_debug_snapshot;
use super::ShebangDirective;
#[test]
fn shebang_non_match() {
let source = "not a match";
assert_debug_snapshot!(ShebangDirective::try_extract(source));
}
#[test]
fn shebang_end_of_line() {
let source = "print('test') #!/usr/bin/python";
assert_debug_snapshot!(ShebangDirective::try_extract(source));
}
#[test]
fn shebang_match() {
let source = "#!/usr/bin/env python";
assert_debug_snapshot!(ShebangDirective::try_extract(source));
}
#[test]
fn shebang_leading_space() {
let source = " #!/usr/bin/env python";
assert_debug_snapshot!(ShebangDirective::try_extract(source));
}
}

View file

@ -1,5 +1,5 @@
---
source: crates/ruff/src/rules/flake8_executable/helpers.rs
source: crates/ruff/src/comments/shebang.rs
expression: "ShebangDirective::try_extract(source)"
---
None

View file

@ -1,5 +1,5 @@
---
source: crates/ruff/src/rules/flake8_executable/helpers.rs
source: crates/ruff/src/comments/shebang.rs
expression: "ShebangDirective::try_extract(source)"
---
Some(

View file

@ -1,5 +1,5 @@
---
source: crates/ruff/src/rules/flake8_executable/helpers.rs
source: crates/ruff/src/comments/shebang.rs
expression: "ShebangDirective::try_extract(source)"
---
Some(

View file

@ -1,5 +1,5 @@
---
source: crates/ruff/src/rules/flake8_executable/helpers.rs
source: crates/ruff/src/comments/shebang.rs
expression: "ShebangDirective::try_extract(source)"
---
None

View file

@ -14,6 +14,7 @@ pub const VERSION: &str = env!("CARGO_PKG_VERSION");
mod autofix;
mod checkers;
mod codes;
mod comments;
mod cst;
pub mod directives;
mod doc_lines;

View file

@ -1,92 +1,12 @@
#[cfg(target_family = "unix")]
#![cfg(target_family = "unix")]
use std::os::unix::fs::PermissionsExt;
#[cfg(target_family = "unix")]
use std::path::Path;
#[cfg(target_family = "unix")]
use anyhow::Result;
use ruff_text_size::{TextLen, TextSize};
/// A shebang directive (e.g., `#!/usr/bin/env python3`).
#[derive(Debug, PartialEq, Eq)]
pub(crate) struct ShebangDirective<'a> {
/// The offset of the directive contents (e.g., `/usr/bin/env python3`) from the start of the
/// line.
pub(crate) offset: TextSize,
/// The contents of the directive (e.g., `"/usr/bin/env python3"`).
pub(crate) contents: &'a str,
}
impl<'a> ShebangDirective<'a> {
/// Parse a shebang directive from a line, or return `None` if the line does not contain a
/// shebang directive.
pub(crate) fn try_extract(line: &'a str) -> Option<Self> {
// Trim whitespace.
let directive = Self::lex_whitespace(line);
// Trim the `#!` prefix.
let directive = Self::lex_char(directive, '#')?;
let directive = Self::lex_char(directive, '!')?;
Some(Self {
offset: line.text_len() - directive.text_len(),
contents: directive,
})
}
/// Lex optional leading whitespace.
#[inline]
fn lex_whitespace(line: &str) -> &str {
line.trim_start()
}
/// Lex a specific character, or return `None` if the character is not the first character in
/// the line.
#[inline]
fn lex_char(line: &str, c: char) -> Option<&str> {
let mut chars = line.chars();
if chars.next() == Some(c) {
Some(chars.as_str())
} else {
None
}
}
}
#[cfg(target_family = "unix")]
pub(super) fn is_executable(filepath: &Path) -> Result<bool> {
let metadata = filepath.metadata()?;
let permissions = metadata.permissions();
Ok(permissions.mode() & 0o111 != 0)
}
#[cfg(test)]
mod tests {
use insta::assert_debug_snapshot;
use crate::rules::flake8_executable::helpers::ShebangDirective;
#[test]
fn shebang_non_match() {
let source = "not a match";
assert_debug_snapshot!(ShebangDirective::try_extract(source));
}
#[test]
fn shebang_end_of_line() {
let source = "print('test') #!/usr/bin/python";
assert_debug_snapshot!(ShebangDirective::try_extract(source));
}
#[test]
fn shebang_match() {
let source = "#!/usr/bin/env python";
assert_debug_snapshot!(ShebangDirective::try_extract(source));
}
#[test]
fn shebang_leading_space() {
let source = " #!/usr/bin/env python";
assert_debug_snapshot!(ShebangDirective::try_extract(source));
}
}

View file

@ -3,7 +3,7 @@ use ruff_text_size::{TextLen, TextRange};
use ruff_diagnostics::{Diagnostic, Violation};
use ruff_macros::{derive_message_formats, violation};
use crate::rules::flake8_executable::helpers::ShebangDirective;
use crate::comments::shebang::ShebangDirective;
/// ## What it does
/// Checks for a shebang directive that is not at the beginning of the file.

View file

@ -7,10 +7,10 @@ use ruff_text_size::{TextLen, TextRange, TextSize};
use ruff_diagnostics::{Diagnostic, Violation};
use ruff_macros::{derive_message_formats, violation};
use crate::comments::shebang::ShebangDirective;
use crate::registry::AsRule;
#[cfg(target_family = "unix")]
use crate::rules::flake8_executable::helpers::is_executable;
use crate::rules::flake8_executable::helpers::ShebangDirective;
/// ## What it does
/// Checks for a shebang directive in a file that is not executable.

View file

@ -3,7 +3,7 @@ use ruff_text_size::{TextLen, TextRange, TextSize};
use ruff_diagnostics::{Diagnostic, Violation};
use ruff_macros::{derive_message_formats, violation};
use crate::rules::flake8_executable::helpers::ShebangDirective;
use crate::comments::shebang::ShebangDirective;
/// ## What it does
/// Checks for a shebang directive in `.py` files that does not contain `python`.

View file

@ -1,10 +1,11 @@
use ruff_text_size::{TextRange, TextSize};
use std::ops::Sub;
use ruff_text_size::{TextRange, TextSize};
use ruff_diagnostics::{AlwaysAutofixableViolation, Diagnostic, Edit, Fix};
use ruff_macros::{derive_message_formats, violation};
use crate::rules::flake8_executable::helpers::ShebangDirective;
use crate::comments::shebang::ShebangDirective;
/// ## What it does
/// Checks for whitespace before a shebang directive.