From 2fdf98ef4ef4628007079673c01e7e63a0a8627d Mon Sep 17 00:00:00 2001 From: Micha Reiser Date: Tue, 28 Mar 2023 10:37:13 +0200 Subject: [PATCH] perf(pycodestyle): Refactor checks to iterate over tokens insteadof text (#3736) --- crates/ruff/src/checkers/logical_lines.rs | 89 ++- crates/ruff/src/checkers/mod.rs | 3 +- crates/ruff/src/linter.rs | 4 +- crates/ruff/src/registry.rs | 62 +- crates/ruff/src/rules/pycodestyle/helpers.rs | 10 + .../src/rules/pycodestyle/logical_lines.rs | 416 ----------- crates/ruff/src/rules/pycodestyle/mod.rs | 1 - .../extraneous_whitespace.rs | 67 +- .../rules/{ => logical_lines}/indentation.rs | 48 +- .../rules/logical_lines/missing_whitespace.rs | 85 +++ .../missing_whitespace_after_keyword.rs | 44 ++ .../missing_whitespace_around_operator.rs | 99 ++- .../pycodestyle/rules/logical_lines/mod.rs | 653 ++++++++++++++++++ .../space_around_operator.rs | 97 ++- .../whitespace_around_keywords.rs | 67 +- ...hitespace_around_named_parameter_equals.rs | 121 ++++ .../whitespace_before_comment.rs | 38 +- .../whitespace_before_parameters.rs | 52 +- .../pycodestyle/rules/missing_whitespace.rs | 102 --- .../rules/missing_whitespace_after_keyword.rs | 51 -- .../ruff/src/rules/pycodestyle/rules/mod.rs | 105 +-- ...hitespace_around_named_parameter_equals.rs | 113 --- 22 files changed, 1225 insertions(+), 1102 deletions(-) delete mode 100644 crates/ruff/src/rules/pycodestyle/logical_lines.rs rename crates/ruff/src/rules/pycodestyle/rules/{ => logical_lines}/extraneous_whitespace.rs (61%) rename crates/ruff/src/rules/pycodestyle/rules/{ => logical_lines}/indentation.rs (87%) create mode 100644 crates/ruff/src/rules/pycodestyle/rules/logical_lines/missing_whitespace.rs create mode 100644 crates/ruff/src/rules/pycodestyle/rules/logical_lines/missing_whitespace_after_keyword.rs rename crates/ruff/src/rules/pycodestyle/rules/{ => logical_lines}/missing_whitespace_around_operator.rs (65%) create mode 100644 crates/ruff/src/rules/pycodestyle/rules/logical_lines/mod.rs rename crates/ruff/src/rules/pycodestyle/rules/{ => logical_lines}/space_around_operator.rs (57%) rename crates/ruff/src/rules/pycodestyle/rules/{ => logical_lines}/whitespace_around_keywords.rs (54%) create mode 100644 crates/ruff/src/rules/pycodestyle/rules/logical_lines/whitespace_around_named_parameter_equals.rs rename crates/ruff/src/rules/pycodestyle/rules/{ => logical_lines}/whitespace_before_comment.rs (85%) rename crates/ruff/src/rules/pycodestyle/rules/{ => logical_lines}/whitespace_before_parameters.rs (55%) delete mode 100644 crates/ruff/src/rules/pycodestyle/rules/missing_whitespace.rs delete mode 100644 crates/ruff/src/rules/pycodestyle/rules/missing_whitespace_after_keyword.rs delete mode 100644 crates/ruff/src/rules/pycodestyle/rules/whitespace_around_named_parameter_equals.rs diff --git a/crates/ruff/src/checkers/logical_lines.rs b/crates/ruff/src/checkers/logical_lines.rs index 198205e73f..5af8b6e533 100644 --- a/crates/ruff/src/checkers/logical_lines.rs +++ b/crates/ruff/src/checkers/logical_lines.rs @@ -1,6 +1,3 @@ -#![allow(dead_code, unused_imports, unused_variables)] - -use itertools::Itertools; use rustpython_parser::ast::Location; use rustpython_parser::lexer::LexResult; @@ -9,12 +6,11 @@ use ruff_python_ast::source_code::{Locator, Stylist}; use ruff_python_ast::types::Range; use crate::registry::{AsRule, Rule}; -use crate::rules::pycodestyle::logical_lines::{LogicalLines, TokenFlags}; -use crate::rules::pycodestyle::rules::{ +use crate::rules::pycodestyle::rules::logical_lines::{ extraneous_whitespace, indentation, missing_whitespace, missing_whitespace_after_keyword, missing_whitespace_around_operator, space_around_operator, whitespace_around_keywords, whitespace_around_named_parameter_equals, whitespace_before_comment, - whitespace_before_parameters, + whitespace_before_parameters, LogicalLines, TokenFlags, }; use crate::settings::{flags, Settings}; @@ -57,21 +53,14 @@ pub fn check_logical_lines( #[cfg(not(feature = "logical_lines"))] let should_fix_whitespace_before_parameters = false; - let indent_char = stylist.indentation().as_char(); let mut prev_line = None; let mut prev_indent_level = None; - for line in &LogicalLines::from_tokens(tokens, locator) { - // Extract the indentation level. - let Some(start_loc) = line.first_token_location() else { continue; }; - let start_line = locator.slice(Range::new(Location::new(start_loc.row(), 0), *start_loc)); - let indent_level = expand_indent(start_line); - let indent_size = 4; + let indent_char = stylist.indentation().as_char(); + for line in &LogicalLines::from_tokens(tokens, locator) { if line.flags().contains(TokenFlags::OPERATOR) { - for (index, kind) in space_around_operator(line.text()) { + for (location, kind) in space_around_operator(&line) { if settings.rules.enabled(kind.rule()) { - let (token_offset, pos) = line.mapping(index); - let location = Location::new(pos.row(), pos.column() + index - token_offset); diagnostics.push(Diagnostic { kind, location, @@ -86,10 +75,8 @@ pub fn check_logical_lines( .flags() .contains(TokenFlags::OPERATOR | TokenFlags::PUNCTUATION) { - for (index, kind) in extraneous_whitespace(line.text()) { + for (location, kind) in extraneous_whitespace(&line) { if settings.rules.enabled(kind.rule()) { - let (token_offset, pos) = line.mapping(index); - let location = Location::new(pos.row(), pos.column() + index - token_offset); diagnostics.push(Diagnostic { kind, location, @@ -101,10 +88,8 @@ pub fn check_logical_lines( } } if line.flags().contains(TokenFlags::KEYWORD) { - for (index, kind) in whitespace_around_keywords(line.text()) { + for (location, kind) in whitespace_around_keywords(&line) { if settings.rules.enabled(kind.rule()) { - let (token_offset, pos) = line.mapping(index); - let location = Location::new(pos.row(), pos.column() + index - token_offset); diagnostics.push(Diagnostic { kind, location, @@ -115,7 +100,7 @@ pub fn check_logical_lines( } } - for (location, kind) in missing_whitespace_after_keyword(line.tokens()) { + for (location, kind) in missing_whitespace_after_keyword(&line.tokens()) { if settings.rules.enabled(kind.rule()) { diagnostics.push(Diagnostic { kind, @@ -128,7 +113,7 @@ pub fn check_logical_lines( } } if line.flags().contains(TokenFlags::COMMENT) { - for (range, kind) in whitespace_before_comment(line.tokens(), locator) { + for (range, kind) in whitespace_before_comment(&line.tokens(), locator) { if settings.rules.enabled(kind.rule()) { diagnostics.push(Diagnostic { kind, @@ -141,9 +126,7 @@ pub fn check_logical_lines( } } if line.flags().contains(TokenFlags::OPERATOR) { - for (location, kind) in - whitespace_around_named_parameter_equals(line.tokens(), line.text()) - { + for (location, kind) in whitespace_around_named_parameter_equals(&line.tokens()) { if settings.rules.enabled(kind.rule()) { diagnostics.push(Diagnostic { kind, @@ -154,7 +137,7 @@ pub fn check_logical_lines( }); } } - for (location, kind) in missing_whitespace_around_operator(line.tokens()) { + for (location, kind) in missing_whitespace_around_operator(&line.tokens()) { if settings.rules.enabled(kind.rule()) { diagnostics.push(Diagnostic { kind, @@ -166,12 +149,7 @@ pub fn check_logical_lines( } } - for diagnostic in missing_whitespace( - line.text(), - start_loc.row(), - should_fix_missing_whitespace, - indent_level, - ) { + for diagnostic in missing_whitespace(&line, should_fix_missing_whitespace) { if settings.rules.enabled(diagnostic.kind.rule()) { diagnostics.push(diagnostic); } @@ -179,16 +157,23 @@ pub fn check_logical_lines( } if line.flags().contains(TokenFlags::BRACKET) { - for diagnostic in - whitespace_before_parameters(line.tokens(), should_fix_whitespace_before_parameters) - { + for diagnostic in whitespace_before_parameters( + &line.tokens(), + should_fix_whitespace_before_parameters, + ) { if settings.rules.enabled(diagnostic.kind.rule()) { diagnostics.push(diagnostic); } } } - for (index, kind) in indentation( + // Extract the indentation level. + let Some(start_loc) = line.first_token_location() else { continue; }; + let start_line = locator.slice(Range::new(Location::new(start_loc.row(), 0), start_loc)); + let indent_level = expand_indent(start_line); + let indent_size = 4; + + for (location, kind) in indentation( &line, prev_line.as_ref(), indent_char, @@ -196,8 +181,6 @@ pub fn check_logical_lines( prev_indent_level, indent_size, ) { - let (token_offset, pos) = line.mapping(index); - let location = Location::new(pos.row(), pos.column() + index - token_offset); if settings.rules.enabled(kind.rule()) { diagnostics.push(Diagnostic { kind, @@ -209,7 +192,7 @@ pub fn check_logical_lines( } } - if !line.is_comment() { + if !line.is_comment_only() { prev_line = Some(line); prev_indent_level = Some(indent_level); } @@ -222,7 +205,7 @@ mod tests { use rustpython_parser::lexer::LexResult; use rustpython_parser::{lexer, Mode}; - use crate::rules::pycodestyle::logical_lines::LogicalLines; + use crate::rules::pycodestyle::rules::logical_lines::LogicalLines; use ruff_python_ast::source_code::Locator; #[test] @@ -235,7 +218,7 @@ z = x + 1"#; let locator = Locator::new(contents); let actual: Vec = LogicalLines::from_tokens(&lxr, &locator) .into_iter() - .map(|line| line.text().to_string()) + .map(|line| line.text_trimmed().to_string()) .collect(); let expected = vec![ "x = 1".to_string(), @@ -256,10 +239,10 @@ z = x + 1"#; let locator = Locator::new(contents); let actual: Vec = LogicalLines::from_tokens(&lxr, &locator) .into_iter() - .map(|line| line.text().to_string()) + .map(|line| line.text_trimmed().to_string()) .collect(); let expected = vec![ - "x = [1, 2, 3, ]".to_string(), + "x = [\n 1,\n 2,\n 3,\n]".to_string(), "y = 2".to_string(), "z = x + 1".to_string(), ]; @@ -270,9 +253,9 @@ z = x + 1"#; let locator = Locator::new(contents); let actual: Vec = LogicalLines::from_tokens(&lxr, &locator) .into_iter() - .map(|line| line.text().to_string()) + .map(|line| line.text_trimmed().to_string()) .collect(); - let expected = vec!["x = \"xxx\"".to_string()]; + let expected = vec!["x = 'abc'".to_string()]; assert_eq!(actual, expected); let contents = r#" @@ -283,7 +266,7 @@ f()"#; let locator = Locator::new(contents); let actual: Vec = LogicalLines::from_tokens(&lxr, &locator) .into_iter() - .map(|line| line.text().to_string()) + .map(|line| line.text_trimmed().to_string()) .collect(); let expected = vec!["def f():", "x = 1", "f()"]; assert_eq!(actual, expected); @@ -298,9 +281,15 @@ f()"#; let locator = Locator::new(contents); let actual: Vec = LogicalLines::from_tokens(&lxr, &locator) .into_iter() - .map(|line| line.text().to_string()) + .map(|line| line.text_trimmed().to_string()) .collect(); - let expected = vec!["def f():", "\"xxxxxxxxxxxxxxxxxxxx\"", "", "x = 1", "f()"]; + let expected = vec![ + "def f():", + "\"\"\"Docstring goes here.\"\"\"", + "", + "x = 1", + "f()", + ]; assert_eq!(actual, expected); } } diff --git a/crates/ruff/src/checkers/mod.rs b/crates/ruff/src/checkers/mod.rs index 0befee33ce..13c937dd60 100644 --- a/crates/ruff/src/checkers/mod.rs +++ b/crates/ruff/src/checkers/mod.rs @@ -1,7 +1,8 @@ pub mod ast; pub mod filesystem; pub mod imports; -pub mod logical_lines; +#[cfg(feature = "logical_lines")] +pub(crate) mod logical_lines; pub mod noqa; pub mod physical_lines; pub mod tokens; diff --git a/crates/ruff/src/linter.rs b/crates/ruff/src/linter.rs index b57d42ee79..878bf6a28d 100644 --- a/crates/ruff/src/linter.rs +++ b/crates/ruff/src/linter.rs @@ -16,7 +16,6 @@ use crate::autofix::fix_file; use crate::checkers::ast::check_ast; use crate::checkers::filesystem::check_file_path; use crate::checkers::imports::check_imports; -use crate::checkers::logical_lines::check_logical_lines; use crate::checkers::noqa::check_noqa; use crate::checkers::physical_lines::check_physical_lines; use crate::checkers::tokens::check_tokens; @@ -105,7 +104,8 @@ pub fn check_path( .iter_enabled() .any(|rule_code| rule_code.lint_source().is_logical_lines()) { - diagnostics.extend(check_logical_lines( + #[cfg(feature = "logical_lines")] + diagnostics.extend(crate::checkers::logical_lines::check_logical_lines( &tokens, locator, stylist, diff --git a/crates/ruff/src/registry.rs b/crates/ruff/src/registry.rs index 3afe2f29e9..8db3ebdc54 100644 --- a/crates/ruff/src/registry.rs +++ b/crates/ruff/src/registry.rs @@ -15,67 +15,67 @@ ruff_macros::register_rules!( // pycodestyle errors rules::pycodestyle::rules::MixedSpacesAndTabs, #[cfg(feature = "logical_lines")] - rules::pycodestyle::rules::IndentationWithInvalidMultiple, + rules::pycodestyle::rules::logical_lines::IndentationWithInvalidMultiple, #[cfg(feature = "logical_lines")] - rules::pycodestyle::rules::NoIndentedBlock, + rules::pycodestyle::rules::logical_lines::NoIndentedBlock, #[cfg(feature = "logical_lines")] - rules::pycodestyle::rules::UnexpectedIndentation, + rules::pycodestyle::rules::logical_lines::UnexpectedIndentation, #[cfg(feature = "logical_lines")] - rules::pycodestyle::rules::IndentationWithInvalidMultipleComment, + rules::pycodestyle::rules::logical_lines::IndentationWithInvalidMultipleComment, #[cfg(feature = "logical_lines")] - rules::pycodestyle::rules::NoIndentedBlockComment, + rules::pycodestyle::rules::logical_lines::NoIndentedBlockComment, #[cfg(feature = "logical_lines")] - rules::pycodestyle::rules::UnexpectedIndentationComment, + rules::pycodestyle::rules::logical_lines::UnexpectedIndentationComment, #[cfg(feature = "logical_lines")] - rules::pycodestyle::rules::OverIndented, + rules::pycodestyle::rules::logical_lines::OverIndented, #[cfg(feature = "logical_lines")] - rules::pycodestyle::rules::WhitespaceAfterOpenBracket, + rules::pycodestyle::rules::logical_lines::WhitespaceAfterOpenBracket, #[cfg(feature = "logical_lines")] - rules::pycodestyle::rules::WhitespaceBeforeCloseBracket, + rules::pycodestyle::rules::logical_lines::WhitespaceBeforeCloseBracket, #[cfg(feature = "logical_lines")] - rules::pycodestyle::rules::WhitespaceBeforePunctuation, + rules::pycodestyle::rules::logical_lines::WhitespaceBeforePunctuation, #[cfg(feature = "logical_lines")] - rules::pycodestyle::rules::MultipleSpacesBeforeOperator, + rules::pycodestyle::rules::logical_lines::MultipleSpacesBeforeOperator, #[cfg(feature = "logical_lines")] - rules::pycodestyle::rules::MultipleSpacesAfterOperator, + rules::pycodestyle::rules::logical_lines::MultipleSpacesAfterOperator, #[cfg(feature = "logical_lines")] - rules::pycodestyle::rules::TabBeforeOperator, + rules::pycodestyle::rules::logical_lines::TabBeforeOperator, #[cfg(feature = "logical_lines")] - rules::pycodestyle::rules::TabAfterOperator, + rules::pycodestyle::rules::logical_lines::TabAfterOperator, #[cfg(feature = "logical_lines")] - rules::pycodestyle::rules::TooFewSpacesBeforeInlineComment, + rules::pycodestyle::rules::logical_lines::TooFewSpacesBeforeInlineComment, #[cfg(feature = "logical_lines")] - rules::pycodestyle::rules::NoSpaceAfterInlineComment, + rules::pycodestyle::rules::logical_lines::NoSpaceAfterInlineComment, #[cfg(feature = "logical_lines")] - rules::pycodestyle::rules::NoSpaceAfterBlockComment, + rules::pycodestyle::rules::logical_lines::NoSpaceAfterBlockComment, #[cfg(feature = "logical_lines")] - rules::pycodestyle::rules::MultipleLeadingHashesForBlockComment, + rules::pycodestyle::rules::logical_lines::MultipleLeadingHashesForBlockComment, #[cfg(feature = "logical_lines")] - rules::pycodestyle::rules::MultipleSpacesAfterKeyword, + rules::pycodestyle::rules::logical_lines::MultipleSpacesAfterKeyword, #[cfg(feature = "logical_lines")] - rules::pycodestyle::rules::MissingWhitespace, + rules::pycodestyle::rules::logical_lines::MissingWhitespace, #[cfg(feature = "logical_lines")] - rules::pycodestyle::rules::MissingWhitespaceAfterKeyword, + rules::pycodestyle::rules::logical_lines::MissingWhitespaceAfterKeyword, #[cfg(feature = "logical_lines")] - rules::pycodestyle::rules::MultipleSpacesBeforeKeyword, + rules::pycodestyle::rules::logical_lines::MultipleSpacesBeforeKeyword, #[cfg(feature = "logical_lines")] - rules::pycodestyle::rules::MissingWhitespaceAroundOperator, + rules::pycodestyle::rules::logical_lines::MissingWhitespaceAroundOperator, #[cfg(feature = "logical_lines")] - rules::pycodestyle::rules::MissingWhitespaceAroundArithmeticOperator, + rules::pycodestyle::rules::logical_lines::MissingWhitespaceAroundArithmeticOperator, #[cfg(feature = "logical_lines")] - rules::pycodestyle::rules::MissingWhitespaceAroundBitwiseOrShiftOperator, + rules::pycodestyle::rules::logical_lines::MissingWhitespaceAroundBitwiseOrShiftOperator, #[cfg(feature = "logical_lines")] - rules::pycodestyle::rules::MissingWhitespaceAroundModuloOperator, + rules::pycodestyle::rules::logical_lines::MissingWhitespaceAroundModuloOperator, #[cfg(feature = "logical_lines")] - rules::pycodestyle::rules::TabAfterKeyword, + rules::pycodestyle::rules::logical_lines::TabAfterKeyword, #[cfg(feature = "logical_lines")] - rules::pycodestyle::rules::UnexpectedSpacesAroundKeywordParameterEquals, + rules::pycodestyle::rules::logical_lines::UnexpectedSpacesAroundKeywordParameterEquals, #[cfg(feature = "logical_lines")] - rules::pycodestyle::rules::MissingWhitespaceAroundParameterEquals, + rules::pycodestyle::rules::logical_lines::MissingWhitespaceAroundParameterEquals, #[cfg(feature = "logical_lines")] - rules::pycodestyle::rules::WhitespaceBeforeParameters, + rules::pycodestyle::rules::logical_lines::WhitespaceBeforeParameters, #[cfg(feature = "logical_lines")] - rules::pycodestyle::rules::TabBeforeKeyword, + rules::pycodestyle::rules::logical_lines::TabBeforeKeyword, rules::pycodestyle::rules::MultipleImportsOnOneLine, rules::pycodestyle::rules::ModuleImportNotAtTopOfFile, rules::pycodestyle::rules::LineTooLong, diff --git a/crates/ruff/src/rules/pycodestyle/helpers.rs b/crates/ruff/src/rules/pycodestyle/helpers.rs index dfe2be0481..8da9d37703 100644 --- a/crates/ruff/src/rules/pycodestyle/helpers.rs +++ b/crates/ruff/src/rules/pycodestyle/helpers.rs @@ -1,4 +1,5 @@ use rustpython_parser::ast::{Cmpop, Expr, ExprKind}; +#[cfg(feature = "logical_lines")] use rustpython_parser::Tok; use unicode_width::UnicodeWidthStr; @@ -58,6 +59,7 @@ pub fn is_overlong( true } +#[cfg(feature = "logical_lines")] pub const fn is_keyword_token(token: &Tok) -> bool { matches!( token, @@ -98,6 +100,7 @@ pub const fn is_keyword_token(token: &Tok) -> bool { ) } +#[cfg(feature = "logical_lines")] pub const fn is_singleton_token(token: &Tok) -> bool { matches!( token, @@ -105,6 +108,7 @@ pub const fn is_singleton_token(token: &Tok) -> bool { ) } +#[cfg(feature = "logical_lines")] pub const fn is_op_token(token: &Tok) -> bool { matches!( token, @@ -157,6 +161,7 @@ pub const fn is_op_token(token: &Tok) -> bool { ) } +#[cfg(feature = "logical_lines")] pub const fn is_skip_comment_token(token: &Tok) -> bool { matches!( token, @@ -164,10 +169,12 @@ pub const fn is_skip_comment_token(token: &Tok) -> bool { ) } +#[cfg(feature = "logical_lines")] pub const fn is_soft_keyword_token(token: &Tok) -> bool { matches!(token, Tok::Match | Tok::Case) } +#[cfg(feature = "logical_lines")] pub const fn is_arithmetic_token(token: &Tok) -> bool { matches!( token, @@ -175,6 +182,7 @@ pub const fn is_arithmetic_token(token: &Tok) -> bool { ) } +#[cfg(feature = "logical_lines")] pub const fn is_ws_optional_token(token: &Tok) -> bool { is_arithmetic_token(token) || matches!( @@ -188,6 +196,7 @@ pub const fn is_ws_optional_token(token: &Tok) -> bool { ) } +#[cfg(feature = "logical_lines")] pub const fn is_ws_needed_token(token: &Tok) -> bool { matches!( token, @@ -218,6 +227,7 @@ pub const fn is_ws_needed_token(token: &Tok) -> bool { ) } +#[cfg(feature = "logical_lines")] pub const fn is_unary_token(token: &Tok) -> bool { matches!( token, diff --git a/crates/ruff/src/rules/pycodestyle/logical_lines.rs b/crates/ruff/src/rules/pycodestyle/logical_lines.rs deleted file mode 100644 index d5b26e5676..0000000000 --- a/crates/ruff/src/rules/pycodestyle/logical_lines.rs +++ /dev/null @@ -1,416 +0,0 @@ -use bitflags::bitflags; -use rustpython_parser::ast::Location; -use rustpython_parser::lexer::LexResult; -use rustpython_parser::Tok; -use std::borrow::Cow; -use std::fmt::{Debug, Formatter}; -use std::iter::FusedIterator; -use unicode_width::UnicodeWidthStr; - -use ruff_python_ast::source_code::Locator; -use ruff_python_ast::types::Range; - -use crate::rules::pycodestyle::helpers::{is_keyword_token, is_op_token}; - -bitflags! { - #[derive(Default)] - pub struct TokenFlags: u8 { - /// Whether the logical line contains an operator. - const OPERATOR = 0b0000_0001; - /// Whether the logical line contains a bracket. - const BRACKET = 0b0000_0010; - /// Whether the logical line contains a punctuation mark. - const PUNCTUATION = 0b0000_0100; - /// Whether the logical line contains a keyword. - const KEYWORD = 0b0000_1000; - /// Whether the logical line contains a comment. - const COMMENT = 0b0001_0000; - } -} - -#[derive(Clone)] -pub struct LogicalLines<'a> { - text: String, - - /// start position, token, end position - tokens: Vec<(Location, &'a Tok, Location)>, - - mappings: Mappings, - - lines: Vec, -} - -impl<'a> LogicalLines<'a> { - pub fn from_tokens(tokens: &'a [LexResult], locator: &Locator) -> Self { - assert!(u32::try_from(tokens.len()).is_ok()); - - let single_token = tokens.len() == 1; - let mut builder = - LogicalLinesBuilder::with_capacity(tokens.len(), locator.contents().len()); - let mut parens: u32 = 0; - - for (start, token, end) in tokens.iter().flatten() { - builder.push_token(*start, token, *end, locator); - - match token { - Tok::Lbrace | Tok::Lpar | Tok::Lsqb => { - parens += 1; - } - Tok::Rbrace | Tok::Rpar | Tok::Rsqb => { - parens -= 1; - } - Tok::Newline | Tok::NonLogicalNewline | Tok::Comment(_) if parens == 0 => { - if matches!(token, Tok::Newline) { - builder.finish_line(); - } - // Comment only file or non logical new line? - else if single_token { - builder.discard_line(); - } else { - builder.finish_line(); - }; - } - _ => {} - } - } - - builder.finish() - } -} - -impl std::fmt::Debug for LogicalLines<'_> { - fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - f.debug_list() - .entries(self.into_iter().map(DebugLogicalLine)) - .finish() - } -} - -impl<'a> IntoIterator for &'a LogicalLines<'a> { - type Item = LogicalLine<'a>; - type IntoIter = LogicalLinesIter<'a>; - - fn into_iter(self) -> Self::IntoIter { - LogicalLinesIter { - lines: self, - inner: self.lines.iter(), - } - } -} - -#[derive(Debug, Clone)] -struct Line { - flags: TokenFlags, - /// Byte offset of the start of the text of this line. - text_start: u32, - - /// Byte offset of the end of the text of this line. - text_end: u32, - mappings_start: u32, - mappings_end: u32, - tokens_start: u32, - tokens_end: u32, -} - -#[derive(Debug)] -pub struct LogicalLine<'a> { - lines: &'a LogicalLines<'a>, - line: &'a Line, -} - -impl<'a> LogicalLine<'a> { - /// Returns true if this is a comment only line - pub fn is_comment(&self) -> bool { - self.text().is_empty() && self.flags().contains(TokenFlags::COMMENT) - } - - /// Returns the text of this line - pub fn text(&self) -> &'a str { - &self.lines.text[self.line.text_start as usize..self.line.text_end as usize] - } - - /// Returns the tokens of the line - pub fn tokens(&self) -> &'a [(Location, &'a Tok, Location)] { - &self.lines.tokens[self.line.tokens_start as usize..self.line.tokens_end as usize] - } - - /// Returns the [`Location`] of the first token on the line or [`None`]. - pub fn first_token_location(&self) -> Option<&Location> { - self.token_locations().first() - } - - fn token_offsets(&self) -> &[u32] { - &self.lines.mappings.logical_line_offsets - [self.line.mappings_start as usize..self.line.mappings_end as usize] - } - - fn token_locations(&self) -> &[Location] { - &self.lines.mappings.locations - [self.line.mappings_start as usize..self.line.mappings_end as usize] - } - - /// Returns the mapping for an offset in the logical line. - /// - /// The offset of the closest token and its corresponding location. - pub fn mapping(&self, offset: usize) -> (usize, Location) { - let index = self - .token_offsets() - .binary_search(&(self.line.text_start + u32::try_from(offset).unwrap())) - .unwrap_or_default(); - - ( - (self.token_offsets()[index] - self.line.text_start) as usize, - self.token_locations()[index], - ) - } - - pub fn is_empty(&self) -> bool { - self.lines.mappings.is_empty() - } - - pub const fn flags(&self) -> TokenFlags { - self.line.flags - } -} - -struct DebugLogicalLine<'a>(LogicalLine<'a>); - -impl Debug for DebugLogicalLine<'_> { - fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - f.debug_struct("LogicalLine") - .field("text", &self.0.text()) - .field("flags", &self.0.flags()) - .field("tokens", &self.0.tokens()) - .finish() - } -} - -/// Iterator over the logical lines of a document. -pub struct LogicalLinesIter<'a> { - lines: &'a LogicalLines<'a>, - inner: std::slice::Iter<'a, Line>, -} - -impl<'a> Iterator for LogicalLinesIter<'a> { - type Item = LogicalLine<'a>; - - fn next(&mut self) -> Option { - let line = self.inner.next()?; - - Some(LogicalLine { - lines: self.lines, - line, - }) - } - - fn size_hint(&self) -> (usize, Option) { - self.inner.size_hint() - } -} - -impl DoubleEndedIterator for LogicalLinesIter<'_> { - fn next_back(&mut self) -> Option { - let line = self.inner.next_back()?; - - Some(LogicalLine { - lines: self.lines, - line, - }) - } -} - -impl ExactSizeIterator for LogicalLinesIter<'_> {} - -impl FusedIterator for LogicalLinesIter<'_> {} - -/// Source map that maps byte positions in the logical line text to the [`Location`] in the -/// original document. -#[derive(Debug, Default, Clone)] -struct Mappings { - /// byte offsets of the logical lines at which tokens start/end. - logical_line_offsets: Vec, - - /// Corresponding [`Location`]s for each byte offset mapping it to the position in the original document. - locations: Vec, -} - -impl Mappings { - fn with_capacity(capacity: usize) -> Self { - Self { - logical_line_offsets: Vec::with_capacity(capacity), - locations: Vec::with_capacity(capacity), - } - } - - fn len(&self) -> usize { - self.logical_line_offsets.len() - } - - fn is_empty(&self) -> bool { - self.logical_line_offsets.is_empty() - } - - fn truncate(&mut self, len: usize) { - self.locations.truncate(len); - self.logical_line_offsets.truncate(len); - } - - #[allow(clippy::cast_possible_truncation)] - fn push(&mut self, offset: usize, location: Location) { - self.logical_line_offsets.push(offset as u32); - self.locations.push(location); - } -} - -#[derive(Debug, Default)] -struct CurrentLine { - flags: TokenFlags, - text_start: u32, - mappings_start: u32, - tokens_start: u32, - previous_token: Option, -} - -#[derive(Debug, Default)] -pub struct LogicalLinesBuilder<'a> { - text: String, - tokens: Vec<(Location, &'a Tok, Location)>, - mappings: Mappings, - lines: Vec, - current_line: Option, -} - -impl<'a> LogicalLinesBuilder<'a> { - fn with_capacity(tokens: usize, string: usize) -> Self { - Self { - tokens: Vec::with_capacity(tokens), - mappings: Mappings::with_capacity(tokens + 1), - text: String::with_capacity(string), - ..Self::default() - } - } - - // SAFETY: `LogicalLines::from_tokens` asserts that the file has less than `u32::MAX` tokens and each tokens is at least one character long - #[allow(clippy::cast_possible_truncation)] - fn push_token(&mut self, start: Location, token: &'a Tok, end: Location, locator: &Locator) { - let tokens_start = self.tokens.len(); - self.tokens.push((start, token, end)); - - let mut line = self.current_line.get_or_insert_with(|| { - let mappings_start = self.mappings.len(); - self.mappings.push(self.text.len(), start); - - CurrentLine { - flags: TokenFlags::empty(), - text_start: self.text.len() as u32, - mappings_start: mappings_start as u32, - tokens_start: tokens_start as u32, - previous_token: None, - } - }); - - if matches!( - token, - Tok::Newline | Tok::NonLogicalNewline | Tok::Indent | Tok::Dedent - ) { - return; - } - - if matches!(token, Tok::Comment(..)) { - line.flags.insert(TokenFlags::COMMENT); - return; - } - - if is_op_token(token) { - line.flags.insert(TokenFlags::OPERATOR); - } - - if matches!( - token, - Tok::Lpar | Tok::Lsqb | Tok::Lbrace | Tok::Rpar | Tok::Rsqb | Tok::Rbrace - ) { - line.flags.insert(TokenFlags::BRACKET); - } - - if matches!(token, Tok::Comma | Tok::Semi | Tok::Colon) { - line.flags.insert(TokenFlags::PUNCTUATION); - } - - if is_keyword_token(token) { - line.flags.insert(TokenFlags::KEYWORD); - } - - // TODO(charlie): "Mute" strings. - let text = if let Tok::String { value, .. } = token { - // Replace the content of strings with a non-whs sequence because some lints - // search for whitespace in the document and whitespace inside of the string - // would complicate the search. - Cow::Owned(format!("\"{}\"", "x".repeat(value.width()))) - } else { - Cow::Borrowed(locator.slice(Range { - location: start, - end_location: end, - })) - }; - - if let Some(prev) = line.previous_token.take() { - if prev.row() != start.row() { - let prev_text = locator.slice(Range { - location: Location::new(prev.row(), prev.column() - 1), - end_location: Location::new(prev.row(), prev.column()), - }); - if prev_text == "," - || ((prev_text != "{" && prev_text != "[" && prev_text != "(") - && (text != "}" && text != "]" && text != ")")) - { - self.text.push(' '); - } - } else if prev.column() != start.column() { - let prev_text = locator.slice(Range { - location: prev, - end_location: start, - }); - self.text.push_str(prev_text); - } - } - - line.previous_token = Some(end); - self.text.push_str(&text); - self.mappings.push(self.text.len(), end); - } - - // SAFETY: `LogicalLines::from_tokens` asserts that the file has less than `u32::MAX` tokens and each tokens is at least one character long - #[allow(clippy::cast_possible_truncation)] - fn finish_line(&mut self) { - if let Some(current) = self.current_line.take() { - self.lines.push(Line { - flags: current.flags, - text_start: current.text_start, - text_end: self.text.len() as u32, - mappings_start: current.mappings_start, - mappings_end: self.mappings.len() as u32, - tokens_start: current.tokens_start, - tokens_end: self.tokens.len() as u32, - }); - } - } - - fn discard_line(&mut self) { - if let Some(current) = self.current_line.take() { - self.text.truncate(current.text_start as usize); - self.tokens.truncate(current.tokens_start as usize); - self.mappings.truncate(current.mappings_start as usize); - } - } - - fn finish(mut self) -> LogicalLines<'a> { - self.finish_line(); - - LogicalLines { - text: self.text, - tokens: self.tokens, - mappings: self.mappings, - lines: self.lines, - } - } -} diff --git a/crates/ruff/src/rules/pycodestyle/mod.rs b/crates/ruff/src/rules/pycodestyle/mod.rs index 0a696d94c7..2adaddf8a9 100644 --- a/crates/ruff/src/rules/pycodestyle/mod.rs +++ b/crates/ruff/src/rules/pycodestyle/mod.rs @@ -3,7 +3,6 @@ pub(crate) mod rules; pub mod settings; pub(crate) mod helpers; -pub(crate) mod logical_lines; #[cfg(test)] mod tests { diff --git a/crates/ruff/src/rules/pycodestyle/rules/extraneous_whitespace.rs b/crates/ruff/src/rules/pycodestyle/rules/logical_lines/extraneous_whitespace.rs similarity index 61% rename from crates/ruff/src/rules/pycodestyle/rules/extraneous_whitespace.rs rename to crates/ruff/src/rules/pycodestyle/rules/logical_lines/extraneous_whitespace.rs index a7d8fba96d..bf02c07a25 100644 --- a/crates/ruff/src/rules/pycodestyle/rules/extraneous_whitespace.rs +++ b/crates/ruff/src/rules/pycodestyle/rules/logical_lines/extraneous_whitespace.rs @@ -1,8 +1,7 @@ -#![allow(dead_code, unused_imports, unused_variables)] - -use once_cell::sync::Lazy; -use regex::Regex; +use rustpython_parser::ast::Location; +use rustpython_parser::Tok; +use super::{LogicalLine, Whitespace}; use ruff_diagnostics::DiagnosticKind; use ruff_diagnostics::Violation; use ruff_macros::{derive_message_formats, violation}; @@ -101,32 +100,48 @@ impl Violation for WhitespaceBeforePunctuation { } } -// TODO(charlie): Pycodestyle has a negative lookahead on the end. -static EXTRANEOUS_WHITESPACE_REGEX: Lazy = - Lazy::new(|| Regex::new(r"[\[({][ \t]|[ \t][]}),;:]").unwrap()); - /// E201, E202, E203 -#[cfg(feature = "logical_lines")] -pub fn extraneous_whitespace(line: &str) -> Vec<(usize, DiagnosticKind)> { +pub(crate) fn extraneous_whitespace(line: &LogicalLine) -> Vec<(Location, DiagnosticKind)> { let mut diagnostics = vec![]; - for line_match in EXTRANEOUS_WHITESPACE_REGEX.find_iter(line) { - let text = &line[line_match.range()]; - let char = text.trim(); - let found = line_match.start(); - if text.chars().last().unwrap().is_ascii_whitespace() { - diagnostics.push((found + 1, WhitespaceAfterOpenBracket.into())); - } else if line.chars().nth(found - 1).map_or(false, |c| c != ',') { - if char == "}" || char == "]" || char == ")" { - diagnostics.push((found, WhitespaceBeforeCloseBracket.into())); - } else { - diagnostics.push((found, WhitespaceBeforePunctuation.into())); + let mut last_token: Option<&Tok> = None; + + for token in line.tokens() { + let kind = token.kind(); + match kind { + Tok::Lbrace | Tok::Lpar | Tok::Lsqb => { + if !matches!(line.trailing_whitespace(&token), Whitespace::None) { + let end = token.end(); + diagnostics.push(( + Location::new(end.row(), end.column()), + WhitespaceAfterOpenBracket.into(), + )); + } } + Tok::Rbrace | Tok::Rpar | Tok::Rsqb | Tok::Comma | Tok::Semi | Tok::Colon => { + let diagnostic_kind = if matches!(kind, Tok::Comma | Tok::Semi | Tok::Colon) { + DiagnosticKind::from(WhitespaceBeforePunctuation) + } else { + DiagnosticKind::from(WhitespaceBeforeCloseBracket) + }; + + if let (Whitespace::Single | Whitespace::Many | Whitespace::Tab, offset) = + line.leading_whitespace(&token) + { + let start = token.start(); + if !matches!(last_token, Some(Tok::Comma)) { + diagnostics.push(( + Location::new(start.row(), start.column() - offset), + diagnostic_kind, + )); + } + } + } + + _ => {} } + + last_token = Some(kind); } + diagnostics } - -#[cfg(not(feature = "logical_lines"))] -pub fn extraneous_whitespace(_line: &str) -> Vec<(usize, DiagnosticKind)> { - vec![] -} diff --git a/crates/ruff/src/rules/pycodestyle/rules/indentation.rs b/crates/ruff/src/rules/pycodestyle/rules/logical_lines/indentation.rs similarity index 87% rename from crates/ruff/src/rules/pycodestyle/rules/indentation.rs rename to crates/ruff/src/rules/pycodestyle/rules/logical_lines/indentation.rs index 9d7d7b3c93..721577de98 100644 --- a/crates/ruff/src/rules/pycodestyle/rules/indentation.rs +++ b/crates/ruff/src/rules/pycodestyle/rules/logical_lines/indentation.rs @@ -1,10 +1,10 @@ -#![allow(dead_code, unused_imports, unused_variables)] - use ruff_diagnostics::DiagnosticKind; use ruff_diagnostics::Violation; use ruff_macros::{derive_message_formats, violation}; +use rustpython_parser::ast::Location; +use rustpython_parser::Tok; -use crate::rules::pycodestyle::logical_lines::LogicalLine; +use super::LogicalLine; /// ## What it does /// Checks for indentation with a non-multiple of 4 spaces. @@ -230,33 +230,36 @@ impl Violation for OverIndented { } /// E111, E114, E112, E113, E115, E116, E117 -#[cfg(feature = "logical_lines")] -pub fn indentation( +pub(crate) fn indentation( logical_line: &LogicalLine, prev_logical_line: Option<&LogicalLine>, indent_char: char, indent_level: usize, prev_indent_level: Option, indent_size: usize, -) -> Vec<(usize, DiagnosticKind)> { +) -> Vec<(Location, DiagnosticKind)> { let mut diagnostics = vec![]; + + let location = logical_line.first_token_location().unwrap(); + if indent_level % indent_size != 0 { diagnostics.push(( - 0, - if logical_line.is_comment() { + location, + if logical_line.is_comment_only() { IndentationWithInvalidMultipleComment { indent_size }.into() } else { IndentationWithInvalidMultiple { indent_size }.into() }, )); } - let indent_expect = prev_logical_line.map_or(false, |prev_logical_line| { - prev_logical_line.text().ends_with(':') - }); + let indent_expect = prev_logical_line + .and_then(|prev_logical_line| prev_logical_line.tokens().trimmed().last()) + .map_or(false, |t| t.kind() == &Tok::Colon); + if indent_expect && indent_level <= prev_indent_level.unwrap_or(0) { diagnostics.push(( - 0, - if logical_line.is_comment() { + location, + if logical_line.is_comment_only() { NoIndentedBlockComment.into() } else { NoIndentedBlock.into() @@ -266,8 +269,8 @@ pub fn indentation( && prev_indent_level.map_or(false, |prev_indent_level| indent_level > prev_indent_level) { diagnostics.push(( - 0, - if logical_line.is_comment() { + location, + if logical_line.is_comment_only() { UnexpectedIndentationComment.into() } else { UnexpectedIndentation.into() @@ -278,20 +281,9 @@ pub fn indentation( let expected_indent_amount = if indent_char == '\t' { 8 } else { 4 }; let expected_indent_level = prev_indent_level.unwrap_or(0) + expected_indent_amount; if indent_level > expected_indent_level { - diagnostics.push((0, OverIndented.into())); + diagnostics.push((location, OverIndented.into())); } } + diagnostics } - -#[cfg(not(feature = "logical_lines"))] -pub fn indentation( - _logical_line: &LogicalLine, - _prev_logical_line: Option<&LogicalLine>, - _indent_char: char, - _indent_level: usize, - _prev_indent_level: Option, - _indent_size: usize, -) -> Vec<(usize, DiagnosticKind)> { - vec![] -} diff --git a/crates/ruff/src/rules/pycodestyle/rules/logical_lines/missing_whitespace.rs b/crates/ruff/src/rules/pycodestyle/rules/logical_lines/missing_whitespace.rs new file mode 100644 index 0000000000..0cf73b3d04 --- /dev/null +++ b/crates/ruff/src/rules/pycodestyle/rules/logical_lines/missing_whitespace.rs @@ -0,0 +1,85 @@ +use itertools::Itertools; +use rustpython_parser::Tok; + +use super::LogicalLine; +use ruff_diagnostics::Edit; +use ruff_diagnostics::{AlwaysAutofixableViolation, Diagnostic}; +use ruff_macros::{derive_message_formats, violation}; +use ruff_python_ast::types::Range; + +#[violation] +pub struct MissingWhitespace { + pub token: String, +} + +impl AlwaysAutofixableViolation for MissingWhitespace { + #[derive_message_formats] + fn message(&self) -> String { + let MissingWhitespace { token } = self; + format!("Missing whitespace after {token}") + } + + fn autofix_title(&self) -> String { + let MissingWhitespace { token } = self; + format!("Added missing whitespace after {token}") + } +} + +/// E231 +pub(crate) fn missing_whitespace(line: &LogicalLine, autofix: bool) -> Vec { + let mut diagnostics = vec![]; + + let mut num_lsqb = 0u32; + let mut num_rsqb = 0u32; + let mut prev_lsqb = None; + let mut prev_lbrace = None; + + for (token, next_token) in line.tokens().iter().tuple_windows() { + let kind = token.kind(); + match kind { + Tok::Lsqb => { + num_lsqb += 1; + prev_lsqb = Some(token.start()); + } + Tok::Rsqb => { + num_rsqb += 1; + } + Tok::Lbrace => { + prev_lbrace = Some(token.start()); + } + + Tok::Comma | Tok::Semi | Tok::Colon => { + let after = line.text_after(&token); + + if !after.chars().next().map_or(false, char::is_whitespace) { + match (kind, next_token.kind()) { + (Tok::Colon, _) if num_lsqb > num_rsqb && prev_lsqb > prev_lbrace => { + continue; // Slice syntax, no space required + } + (Tok::Comma, Tok::Rpar | Tok::Rsqb) => { + continue; // Allow tuple with only one element: (3,) + } + (Tok::Colon, Tok::Equal) => { + continue; // Allow assignment expression + } + _ => {} + } + + let kind = MissingWhitespace { + token: kind.to_string(), + }; + + let (start, end) = token.range(); + let mut diagnostic = Diagnostic::new(kind, Range::new(start, start)); + + if autofix { + diagnostic.set_fix(Edit::insertion(" ".to_string(), end)); + } + diagnostics.push(diagnostic); + } + } + _ => {} + } + } + diagnostics +} diff --git a/crates/ruff/src/rules/pycodestyle/rules/logical_lines/missing_whitespace_after_keyword.rs b/crates/ruff/src/rules/pycodestyle/rules/logical_lines/missing_whitespace_after_keyword.rs new file mode 100644 index 0000000000..48eef5d855 --- /dev/null +++ b/crates/ruff/src/rules/pycodestyle/rules/logical_lines/missing_whitespace_after_keyword.rs @@ -0,0 +1,44 @@ +use itertools::Itertools; +use rustpython_parser::ast::Location; +use rustpython_parser::Tok; + +use ruff_diagnostics::DiagnosticKind; +use ruff_diagnostics::Violation; +use ruff_macros::{derive_message_formats, violation}; + +use super::LogicalLineTokens; +use crate::rules::pycodestyle::helpers::{is_keyword_token, is_singleton_token}; + +#[violation] +pub struct MissingWhitespaceAfterKeyword; + +impl Violation for MissingWhitespaceAfterKeyword { + #[derive_message_formats] + fn message(&self) -> String { + format!("Missing whitespace after keyword") + } +} + +/// E275 +pub(crate) fn missing_whitespace_after_keyword( + tokens: &LogicalLineTokens, +) -> Vec<(Location, DiagnosticKind)> { + let mut diagnostics = vec![]; + + for (tok0, tok1) in tokens.iter().tuple_windows() { + let tok0_kind = tok0.kind(); + let tok1_kind = tok1.kind(); + + if is_keyword_token(tok0_kind) + && !(is_singleton_token(tok0_kind) + || matches!(tok0_kind, Tok::Async | Tok::Await) + || tok0_kind == &Tok::Except && tok1_kind == &Tok::Star + || tok0_kind == &Tok::Yield && tok1_kind == &Tok::Rpar + || matches!(tok1_kind, Tok::Colon | Tok::Newline)) + && tok0.end() == tok1.start() + { + diagnostics.push((tok0.end(), MissingWhitespaceAfterKeyword.into())); + } + } + diagnostics +} diff --git a/crates/ruff/src/rules/pycodestyle/rules/missing_whitespace_around_operator.rs b/crates/ruff/src/rules/pycodestyle/rules/logical_lines/missing_whitespace_around_operator.rs similarity index 65% rename from crates/ruff/src/rules/pycodestyle/rules/missing_whitespace_around_operator.rs rename to crates/ruff/src/rules/pycodestyle/rules/logical_lines/missing_whitespace_around_operator.rs index d80b20a2e9..653ea59de9 100644 --- a/crates/ruff/src/rules/pycodestyle/rules/missing_whitespace_around_operator.rs +++ b/crates/ruff/src/rules/pycodestyle/rules/logical_lines/missing_whitespace_around_operator.rs @@ -1,5 +1,3 @@ -#![allow(dead_code, unused_imports, unused_variables)] - use rustpython_parser::ast::Location; use rustpython_parser::Tok; @@ -8,9 +6,10 @@ use ruff_diagnostics::Violation; use ruff_macros::{derive_message_formats, violation}; use crate::rules::pycodestyle::helpers::{ - is_arithmetic_token, is_keyword_token, is_op_token, is_singleton_token, is_skip_comment_token, + is_arithmetic_token, is_keyword_token, is_op_token, is_skip_comment_token, is_soft_keyword_token, is_unary_token, is_ws_needed_token, is_ws_optional_token, }; +use crate::rules::pycodestyle::rules::logical_lines::LogicalLineTokens; // E225 #[violation] @@ -57,53 +56,53 @@ impl Violation for MissingWhitespaceAroundModuloOperator { } /// E225, E226, E227, E228 -#[cfg(feature = "logical_lines")] #[allow(clippy::if_same_then_else)] -pub fn missing_whitespace_around_operator( - tokens: &[(Location, &Tok, Location)], +pub(crate) fn missing_whitespace_around_operator( + tokens: &LogicalLineTokens, ) -> Vec<(Location, DiagnosticKind)> { let mut diagnostics = vec![]; let mut needs_space_main: Option = Some(false); let mut needs_space_aux: Option = None; - let mut prev_end_aux: Option<&Location> = None; + let mut prev_end_aux: Option = None; let mut parens = 0u32; let mut prev_type: Option<&Tok> = None; - let mut prev_end: Option<&Location> = None; + let mut prev_end: Option = None; - for (start, token, end) in tokens { - if is_skip_comment_token(token) { + for token in tokens { + let kind = token.kind(); + + if is_skip_comment_token(kind) { continue; } - if **token == Tok::Lpar || **token == Tok::Lambda { - parens += 1; - } else if **token == Tok::Rpar { - parens -= 1; - } + match kind { + Tok::Lpar | Tok::Lambda => parens += 1, + Tok::Rpar => parens -= 1, + _ => {} + }; + let needs_space = (needs_space_main.is_some() && needs_space_main.unwrap()) || needs_space_aux.is_some() || prev_end_aux.is_some(); if needs_space { - if Some(start) != prev_end { + if Some(token.start()) != prev_end { if !(needs_space_main.is_some() && needs_space_main.unwrap()) && (needs_space_aux.is_none() || !needs_space_aux.unwrap()) { diagnostics.push(( - *(prev_end_aux.unwrap()), + prev_end_aux.unwrap(), MissingWhitespaceAroundOperator.into(), )); } needs_space_main = Some(false); needs_space_aux = None; prev_end_aux = None; - } else if **token == Tok::Greater - && (prev_type == Some(&Tok::Less) || prev_type == Some(&Tok::Minus)) - { + } else if kind == &Tok::Greater && matches!(prev_type, Some(Tok::Less | Tok::Minus)) { // Tolerate the "<>" operator, even if running Python 3 // Deal with Python 3's annotated return value "->" } else if prev_type == Some(&Tok::Slash) - && (**token == Tok::Comma || **token == Tok::Rpar || **token == Tok::Colon) - || (prev_type == Some(&Tok::Rpar) && **token == Tok::Colon) + && matches!(kind, Tok::Comma | Tok::Rpar | Tok::Colon) + || (prev_type == Some(&Tok::Rpar) && kind == &Tok::Colon) { // Tolerate the "/" operator in function definition // For more info see PEP570 @@ -111,22 +110,21 @@ pub fn missing_whitespace_around_operator( if (needs_space_main.is_some() && needs_space_main.unwrap()) || (needs_space_aux.is_some() && needs_space_aux.unwrap()) { - diagnostics - .push((*(prev_end.unwrap()), MissingWhitespaceAroundOperator.into())); + diagnostics.push((prev_end.unwrap(), MissingWhitespaceAroundOperator.into())); } else if prev_type != Some(&Tok::DoubleStar) { if prev_type == Some(&Tok::Percent) { diagnostics.push(( - *(prev_end_aux.unwrap()), + prev_end_aux.unwrap(), MissingWhitespaceAroundModuloOperator.into(), )); } else if !is_arithmetic_token(prev_type.unwrap()) { diagnostics.push(( - *(prev_end_aux.unwrap()), + prev_end_aux.unwrap(), MissingWhitespaceAroundBitwiseOrShiftOperator.into(), )); } else { diagnostics.push(( - *(prev_end_aux.unwrap()), + prev_end_aux.unwrap(), MissingWhitespaceAroundArithmeticOperator.into(), )); } @@ -135,30 +133,28 @@ pub fn missing_whitespace_around_operator( needs_space_aux = None; prev_end_aux = None; } - } else if (is_op_token(token) || matches!(token, Tok::Name { .. })) && prev_end.is_some() { - if **token == Tok::Equal && parens > 0 { + } else if (is_op_token(kind) || matches!(kind, Tok::Name { .. })) && prev_end.is_some() { + if kind == &Tok::Equal && parens > 0 { // Allow keyword args or defaults: foo(bar=None). - } else if is_ws_needed_token(token) { + } else if is_ws_needed_token(kind) { needs_space_main = Some(true); needs_space_aux = None; prev_end_aux = None; - } else if is_unary_token(token) { + } else if is_unary_token(kind) { // Check if the operator is used as a binary operator // Allow unary operators: -123, -x, +1. // Allow argument unpacking: foo(*args, **kwargs) - if (prev_type.is_some() - && is_op_token(prev_type.unwrap()) - && (prev_type == Some(&Tok::Rpar) - || prev_type == Some(&Tok::Rsqb) - || prev_type == Some(&Tok::Rbrace))) - || (!is_op_token(prev_type.unwrap()) && !is_keyword_token(prev_type.unwrap())) - && (!is_soft_keyword_token(prev_type.unwrap())) - { - needs_space_main = None; - needs_space_aux = None; - prev_end_aux = None; + if let Some(prev_type) = prev_type { + if (matches!(prev_type, Tok::Rpar | Tok::Rsqb | Tok::Rbrace)) + || (!is_op_token(prev_type) && !is_keyword_token(prev_type)) + && (!is_soft_keyword_token(prev_type)) + { + needs_space_main = None; + needs_space_aux = None; + prev_end_aux = None; + } } - } else if is_ws_optional_token(token) { + } else if is_ws_optional_token(kind) { needs_space_main = None; needs_space_aux = None; prev_end_aux = None; @@ -169,28 +165,21 @@ pub fn missing_whitespace_around_operator( // trailing space matches opening space needs_space_main = None; prev_end_aux = prev_end; - needs_space_aux = Some(Some(start) != prev_end_aux); + needs_space_aux = Some(Some(token.start()) != prev_end_aux); } else if needs_space_main.is_some() && needs_space_main.unwrap() - && Some(start) == prev_end_aux + && Some(token.start()) == prev_end_aux { // A needed opening space was not found - diagnostics.push((*(prev_end.unwrap()), MissingWhitespaceAroundOperator.into())); + diagnostics.push((prev_end.unwrap(), MissingWhitespaceAroundOperator.into())); needs_space_main = Some(false); needs_space_aux = None; prev_end_aux = None; } } - prev_type = Some(*token); - prev_end = Some(end); + prev_type = Some(kind); + prev_end = Some(token.end()); } diagnostics } - -#[cfg(not(feature = "logical_lines"))] -pub fn missing_whitespace_around_operator( - _tokens: &[(Location, &Tok, Location)], -) -> Vec<(Location, DiagnosticKind)> { - vec![] -} diff --git a/crates/ruff/src/rules/pycodestyle/rules/logical_lines/mod.rs b/crates/ruff/src/rules/pycodestyle/rules/logical_lines/mod.rs new file mode 100644 index 0000000000..6801536965 --- /dev/null +++ b/crates/ruff/src/rules/pycodestyle/rules/logical_lines/mod.rs @@ -0,0 +1,653 @@ +use bitflags::bitflags; +use rustpython_parser::ast::Location; +use rustpython_parser::lexer::LexResult; +use rustpython_parser::Tok; +use std::fmt::{Debug, Formatter}; +use std::iter::FusedIterator; +use std::ops::Deref; + +use ruff_python_ast::source_code::Locator; +use ruff_python_ast::types::Range; + +use crate::rules::pycodestyle::helpers::{is_keyword_token, is_op_token}; + +pub(crate) use extraneous_whitespace::{ + extraneous_whitespace, WhitespaceAfterOpenBracket, WhitespaceBeforeCloseBracket, + WhitespaceBeforePunctuation, +}; +pub(crate) use indentation::{ + indentation, IndentationWithInvalidMultiple, IndentationWithInvalidMultipleComment, + NoIndentedBlock, NoIndentedBlockComment, OverIndented, UnexpectedIndentation, + UnexpectedIndentationComment, +}; +pub(crate) use missing_whitespace::{missing_whitespace, MissingWhitespace}; +pub(crate) use missing_whitespace_after_keyword::{ + missing_whitespace_after_keyword, MissingWhitespaceAfterKeyword, +}; +pub(crate) use missing_whitespace_around_operator::{ + missing_whitespace_around_operator, MissingWhitespaceAroundArithmeticOperator, + MissingWhitespaceAroundBitwiseOrShiftOperator, MissingWhitespaceAroundModuloOperator, + MissingWhitespaceAroundOperator, +}; +pub(crate) use space_around_operator::{ + space_around_operator, MultipleSpacesAfterOperator, MultipleSpacesBeforeOperator, + TabAfterOperator, TabBeforeOperator, +}; +pub(crate) use whitespace_around_keywords::{ + whitespace_around_keywords, MultipleSpacesAfterKeyword, MultipleSpacesBeforeKeyword, + TabAfterKeyword, TabBeforeKeyword, +}; +pub(crate) use whitespace_around_named_parameter_equals::{ + whitespace_around_named_parameter_equals, MissingWhitespaceAroundParameterEquals, + UnexpectedSpacesAroundKeywordParameterEquals, +}; +pub(crate) use whitespace_before_comment::{ + whitespace_before_comment, MultipleLeadingHashesForBlockComment, NoSpaceAfterBlockComment, + NoSpaceAfterInlineComment, TooFewSpacesBeforeInlineComment, +}; +pub(crate) use whitespace_before_parameters::{ + whitespace_before_parameters, WhitespaceBeforeParameters, +}; + +mod extraneous_whitespace; +mod indentation; +mod missing_whitespace; +mod missing_whitespace_after_keyword; +mod missing_whitespace_around_operator; +mod space_around_operator; +mod whitespace_around_keywords; +mod whitespace_around_named_parameter_equals; +mod whitespace_before_comment; +mod whitespace_before_parameters; + +bitflags! { + #[derive(Default)] + pub(crate) struct TokenFlags: u8 { + /// Whether the logical line contains an operator. + const OPERATOR = 0b0000_0001; + /// Whether the logical line contains a bracket. + const BRACKET = 0b0000_0010; + /// Whether the logical line contains a punctuation mark. + const PUNCTUATION = 0b0000_0100; + /// Whether the logical line contains a keyword. + const KEYWORD = 0b0000_1000; + /// Whether the logical line contains a comment. + const COMMENT = 0b0001_0000; + } +} + +#[derive(Clone)] +pub(crate) struct LogicalLines<'a> { + tokens: Tokens<'a>, + lines: Vec, + locator: &'a Locator<'a>, +} + +impl<'a> LogicalLines<'a> { + pub fn from_tokens(tokens: &'a [LexResult], locator: &'a Locator<'a>) -> Self { + assert!(u32::try_from(tokens.len()).is_ok()); + + let mut builder = LogicalLinesBuilder::with_capacity(tokens.len()); + let mut parens: u32 = 0; + + for (start, token, end) in tokens.iter().flatten() { + builder.push_token(*start, token, *end); + + match token { + Tok::Lbrace | Tok::Lpar | Tok::Lsqb => { + parens += 1; + } + Tok::Rbrace | Tok::Rpar | Tok::Rsqb => { + parens -= 1; + } + Tok::Newline | Tok::NonLogicalNewline | Tok::Comment { .. } if parens == 0 => { + builder.finish_line(); + } + _ => {} + } + } + + builder.finish(locator) + } +} + +impl Debug for LogicalLines<'_> { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + f.debug_list() + .entries(self.into_iter().map(DebugLogicalLine)) + .finish() + } +} + +impl<'a> IntoIterator for &'a LogicalLines<'a> { + type Item = LogicalLine<'a>; + type IntoIter = LogicalLinesIter<'a>; + + fn into_iter(self) -> Self::IntoIter { + LogicalLinesIter { + lines: self, + inner: self.lines.iter(), + } + } +} + +/// A logical line spawns multiple lines in the source document if the line +/// ends with a parenthesized expression (`(..)`, `[..]`, `{..}`) that contains +/// line breaks. +/// +/// ## Examples +/// This expression forms one logical line because because the array elements are parenthesized. +/// +/// ```python +/// a = [ +/// 1, +/// 2 +/// ] +/// ``` +#[derive(Debug)] +pub(crate) struct LogicalLine<'a> { + lines: &'a LogicalLines<'a>, + line: &'a Line, +} + +impl<'a> LogicalLine<'a> { + /// Returns `true` if this is a comment only line + pub fn is_comment_only(&self) -> bool { + self.flags() == TokenFlags::COMMENT && self.tokens().trimmed().is_empty() + } + + /// Returns logical line's text including comments, indents, dedent and trailing new lines. + pub fn text(&self) -> &'a str { + self.tokens().text() + } + + /// Returns the text without any leading or trailing newline, comment, indent, or dedent of this line + #[cfg(test)] + pub fn text_trimmed(&self) -> &'a str { + self.tokens_trimmed().text() + } + + #[cfg(test)] + pub fn tokens_trimmed(&self) -> LogicalLineTokens<'a> { + self.tokens().trimmed() + } + + /// Returns the text after `token` + pub fn text_after(&self, token: &LogicalLineToken<'a>) -> &str { + debug_assert!( + (self.line.tokens_start..self.line.tokens_end).contains(&token.position), + "Token does not belong to this line" + ); + + // SAFETY: The line must have at least one token or `token` would not belong to this line. + let last_token = self.tokens().last().unwrap(); + self.lines + .locator + .slice(Range::new(token.end(), last_token.end())) + } + + /// Returns the text before `token` + pub fn text_before(&self, token: &LogicalLineToken<'a>) -> &str { + debug_assert!( + (self.line.tokens_start..self.line.tokens_end).contains(&token.position), + "Token does not belong to this line" + ); + + // SAFETY: The line must have at least one token or `token` would not belong to this line. + let first_token = self.tokens().first().unwrap(); + self.lines + .locator + .slice(Range::new(first_token.start(), token.start())) + } + + /// Returns the whitespace *after* the `token` + pub fn trailing_whitespace(&self, token: &LogicalLineToken<'a>) -> Whitespace { + Whitespace::leading(self.text_after(token)) + } + + /// Returns the whitespace and whitespace character-length *before* the `token` + pub fn leading_whitespace(&self, token: &LogicalLineToken<'a>) -> (Whitespace, usize) { + Whitespace::trailing(self.text_before(token)) + } + + /// Returns all tokens of the line, including comments and trailing new lines. + pub fn tokens(&self) -> LogicalLineTokens<'a> { + LogicalLineTokens { + lines: self.lines, + front: self.line.tokens_start, + back: self.line.tokens_end, + } + } + + /// Returns the [`Location`] of the first token on the line or [`None`]. + pub fn first_token_location(&self) -> Option { + self.tokens().first().map(|t| t.start()) + } + + /// Returns the line's flags + pub const fn flags(&self) -> TokenFlags { + self.line.flags + } +} + +/// Helper struct to pretty print [`LogicalLine`] with `dbg` +struct DebugLogicalLine<'a>(LogicalLine<'a>); + +impl Debug for DebugLogicalLine<'_> { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + f.debug_struct("LogicalLine") + .field("text", &self.0.text()) + .field("flags", &self.0.flags()) + .field("tokens", &self.0.tokens()) + .finish() + } +} + +/// Iterator over the logical lines of a document. +pub(crate) struct LogicalLinesIter<'a> { + lines: &'a LogicalLines<'a>, + inner: std::slice::Iter<'a, Line>, +} + +impl<'a> Iterator for LogicalLinesIter<'a> { + type Item = LogicalLine<'a>; + + fn next(&mut self) -> Option { + let line = self.inner.next()?; + + Some(LogicalLine { + lines: self.lines, + line, + }) + } + + fn size_hint(&self) -> (usize, Option) { + self.inner.size_hint() + } +} + +impl DoubleEndedIterator for LogicalLinesIter<'_> { + fn next_back(&mut self) -> Option { + let line = self.inner.next_back()?; + + Some(LogicalLine { + lines: self.lines, + line, + }) + } +} + +impl ExactSizeIterator for LogicalLinesIter<'_> {} + +impl FusedIterator for LogicalLinesIter<'_> {} + +/// The tokens of a logical line +pub(crate) struct LogicalLineTokens<'a> { + lines: &'a LogicalLines<'a>, + front: u32, + back: u32, +} + +impl<'a> LogicalLineTokens<'a> { + pub fn iter(&self) -> LogicalLineTokensIter<'a> { + LogicalLineTokensIter { + tokens: &self.lines.tokens, + front: self.front, + back: self.back, + } + } + + pub fn len(&self) -> usize { + (self.back - self.front) as usize + } + + pub fn is_empty(&self) -> bool { + self.len() == 0 + } + + pub fn trimmed(&self) -> LogicalLineTokens<'a> { + let tokens = &self.lines.tokens[self.front as usize..self.back as usize]; + let mut front = self.front; + let mut back = self.back; + + let mut iter = tokens.iter(); + + for (_, kind, _) in iter.by_ref() { + if !matches!( + kind, + Tok::Newline + | Tok::NonLogicalNewline + | Tok::Indent + | Tok::Dedent + | Tok::Comment(..) + ) { + break; + } + front += 1; + } + + for (_, kind, _) in iter.rev() { + if !matches!( + kind, + Tok::Newline + | Tok::NonLogicalNewline + | Tok::Indent + | Tok::Dedent + | Tok::Comment(..) + ) { + break; + } + back -= 1; + } + + LogicalLineTokens { + lines: self.lines, + front, + back, + } + } + + pub fn text(&self) -> &'a str { + match (self.first(), self.last()) { + (Some(first), Some(last)) => { + let locator = self.lines.locator; + locator.slice(Range::new(first.start(), last.end())) + } + _ => "", + } + } + + /// Returns the first token + pub fn first(&self) -> Option> { + self.iter().next() + } + + /// Returns the last token + pub fn last(&self) -> Option> { + self.iter().next_back() + } +} + +impl<'a> IntoIterator for LogicalLineTokens<'a> { + type Item = LogicalLineToken<'a>; + type IntoIter = LogicalLineTokensIter<'a>; + + fn into_iter(self) -> Self::IntoIter { + self.iter() + } +} + +impl<'a> IntoIterator for &LogicalLineTokens<'a> { + type Item = LogicalLineToken<'a>; + type IntoIter = LogicalLineTokensIter<'a>; + + fn into_iter(self) -> Self::IntoIter { + self.iter() + } +} + +impl Debug for LogicalLineTokens<'_> { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + f.debug_list().entries(self.iter()).finish() + } +} + +/// Iterator over the tokens of a [`LogicalLine`] +pub(crate) struct LogicalLineTokensIter<'a> { + tokens: &'a Tokens<'a>, + front: u32, + back: u32, +} + +impl<'a> Iterator for LogicalLineTokensIter<'a> { + type Item = LogicalLineToken<'a>; + + fn next(&mut self) -> Option { + if self.front < self.back { + let result = Some(LogicalLineToken { + tokens: self.tokens, + position: self.front, + }); + + self.front += 1; + result + } else { + None + } + } + + fn size_hint(&self) -> (usize, Option) { + let len = (self.back - self.front) as usize; + (len, Some(len)) + } +} + +impl ExactSizeIterator for LogicalLineTokensIter<'_> {} + +impl FusedIterator for LogicalLineTokensIter<'_> {} + +impl DoubleEndedIterator for LogicalLineTokensIter<'_> { + fn next_back(&mut self) -> Option { + if self.front < self.back { + self.back -= 1; + Some(LogicalLineToken { + position: self.back, + tokens: self.tokens, + }) + } else { + None + } + } +} + +/// A token of a [`LogicalLine`] +#[derive(Clone)] +pub(crate) struct LogicalLineToken<'a> { + tokens: &'a Tokens<'a>, + position: u32, +} + +impl<'a> LogicalLineToken<'a> { + /// Returns the token's kind + pub fn kind(&self) -> &'a Tok { + #[allow(unsafe_code)] + let (_, token, _) = unsafe { *self.tokens.get_unchecked(self.position as usize) }; + + token + } + + /// Returns the token's start location + pub fn start(&self) -> Location { + self.range().0 + } + + /// Returns the token's end location + pub fn end(&self) -> Location { + self.range().1 + } + + /// Returns a tuple with the token's `(start, end)` locations + pub fn range(&self) -> (Location, Location) { + #[allow(unsafe_code)] + let &(start, _, end) = unsafe { self.tokens.get_unchecked(self.position as usize) }; + + (start, end) + } +} + +impl Debug for LogicalLineToken<'_> { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + f.debug_struct("LogicalLineToken") + .field("kind", &self.kind()) + .field("range", &self.range()) + .finish() + } +} + +#[derive(Copy, Clone, Eq, PartialEq)] +pub(crate) enum Whitespace { + None, + Single, + Many, + Tab, +} + +impl Whitespace { + fn leading(content: &str) -> Self { + let mut count = 0u32; + + for c in content.chars() { + if c == '\t' { + return Self::Tab; + } else if matches!(c, '\n' | '\r') { + break; + } else if c.is_whitespace() { + count += 1; + } else { + break; + } + } + + match count { + 0 => Whitespace::None, + 1 => Whitespace::Single, + _ => Whitespace::Many, + } + } + + fn trailing(content: &str) -> (Self, usize) { + let mut count = 0; + + for c in content.chars().rev() { + if c == '\t' { + return (Self::Tab, count + 1); + } else if matches!(c, '\n' | '\r') { + // Indent + return (Self::None, 0); + } else if c.is_whitespace() { + count += 1; + } else { + break; + } + } + + match count { + 0 => (Self::None, 0), + 1 => (Self::Single, count), + _ => (Self::Many, count), + } + } +} + +#[derive(Debug, Default)] +struct CurrentLine { + flags: TokenFlags, + tokens_start: u32, +} + +/// Builder for [`LogicalLines`] +#[derive(Debug, Default)] +struct LogicalLinesBuilder<'a> { + tokens: Tokens<'a>, + lines: Vec, + current_line: Option, +} + +impl<'a> LogicalLinesBuilder<'a> { + fn with_capacity(tokens: usize) -> Self { + Self { + tokens: Tokens::with_capacity(tokens), + ..Self::default() + } + } + + // SAFETY: `LogicalLines::from_tokens` asserts that the file has less than `u32::MAX` tokens and each tokens is at least one character long + #[allow(clippy::cast_possible_truncation)] + fn push_token(&mut self, start: Location, token: &'a Tok, end: Location) { + let tokens_start = self.tokens.len(); + + let line = self.current_line.get_or_insert_with(|| CurrentLine { + flags: TokenFlags::empty(), + tokens_start: tokens_start as u32, + }); + + if matches!(token, Tok::Comment { .. }) { + line.flags.insert(TokenFlags::COMMENT); + } else if is_op_token(token) { + line.flags.insert(TokenFlags::OPERATOR); + + line.flags.set( + TokenFlags::BRACKET, + matches!( + token, + Tok::Lpar | Tok::Lsqb | Tok::Lbrace | Tok::Rpar | Tok::Rsqb | Tok::Rbrace + ), + ); + } + + if matches!(token, Tok::Comma | Tok::Semi | Tok::Colon) { + line.flags.insert(TokenFlags::PUNCTUATION); + } else if is_keyword_token(token) { + line.flags.insert(TokenFlags::KEYWORD); + } + + self.tokens.push(token, start, end); + } + + // SAFETY: `LogicalLines::from_tokens` asserts that the file has less than `u32::MAX` tokens and each tokens is at least one character long + #[allow(clippy::cast_possible_truncation)] + fn finish_line(&mut self) { + if let Some(current) = self.current_line.take() { + self.lines.push(Line { + flags: current.flags, + tokens_start: current.tokens_start, + tokens_end: self.tokens.len() as u32, + }); + } + } + + fn finish(mut self, locator: &'a Locator<'a>) -> LogicalLines<'a> { + self.finish_line(); + + LogicalLines { + tokens: self.tokens, + lines: self.lines, + locator, + } + } +} + +#[derive(Debug, Clone)] +struct Line { + flags: TokenFlags, + tokens_start: u32, + tokens_end: u32, +} + +#[derive(Debug, Clone, Default)] +struct Tokens<'a>(Vec<(Location, &'a Tok, Location)>); + +impl<'a> Tokens<'a> { + /// Creates new tokens with a reserved size of `capacity` + fn with_capacity(capacity: usize) -> Self { + Self(Vec::with_capacity(capacity)) + } + + /// Returns the number of stored tokens. + fn len(&self) -> usize { + self.0.len() + } + + /// Adds a new token with the given `kind` and `start`, `end` location. + fn push(&mut self, kind: &'a Tok, start: Location, end: Location) { + self.0.push((start, kind, end)); + } +} + +impl<'a> Deref for Tokens<'a> { + type Target = [(Location, &'a Tok, Location)]; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} diff --git a/crates/ruff/src/rules/pycodestyle/rules/space_around_operator.rs b/crates/ruff/src/rules/pycodestyle/rules/logical_lines/space_around_operator.rs similarity index 57% rename from crates/ruff/src/rules/pycodestyle/rules/space_around_operator.rs rename to crates/ruff/src/rules/pycodestyle/rules/logical_lines/space_around_operator.rs index 81bda1401a..11bfa0d7b7 100644 --- a/crates/ruff/src/rules/pycodestyle/rules/space_around_operator.rs +++ b/crates/ruff/src/rules/pycodestyle/rules/logical_lines/space_around_operator.rs @@ -1,16 +1,10 @@ -#![allow(dead_code, unused_imports, unused_variables)] - -use once_cell::sync::Lazy; -use regex::Regex; use rustpython_parser::ast::Location; use rustpython_parser::Tok; -use crate::rules::pycodestyle::helpers::is_op_token; -use crate::rules::pycodestyle::rules::Whitespace; +use super::{LogicalLine, Whitespace}; use ruff_diagnostics::DiagnosticKind; use ruff_diagnostics::Violation; use ruff_macros::{derive_message_formats, violation}; -use ruff_python_ast::source_code::Locator; /// ## What it does /// Checks for extraneous tabs before an operator. @@ -128,46 +122,77 @@ impl Violation for MultipleSpacesAfterOperator { } } -static OPERATOR_REGEX: Lazy = Lazy::new(|| Regex::new(r"[-+*/|!<=>%&^]+|:=").unwrap()); - /// E221, E222, E223, E224 -#[cfg(feature = "logical_lines")] -pub fn space_around_operator(line: &str) -> Vec<(usize, DiagnosticKind)> { +pub(crate) fn space_around_operator(line: &LogicalLine) -> Vec<(Location, DiagnosticKind)> { let mut diagnostics = vec![]; - let mut last_end = None; + let mut after_operator = false; - for line_match in OPERATOR_REGEX.find_iter(line) { - if last_end != Some(line_match.start()) { - let before = &line[..line_match.start()]; + for token in line.tokens() { + let is_operator = is_operator_token(token.kind()); - match Whitespace::trailing(before) { - (Whitespace::Tab, offset) => { - diagnostics.push((line_match.start() - offset, TabBeforeOperator.into())); + if is_operator { + let (start, end) = token.range(); + + if !after_operator { + match line.leading_whitespace(&token) { + (Whitespace::Tab, offset) => diagnostics.push(( + Location::new(start.row(), start.column() - offset), + TabBeforeOperator.into(), + )), + (Whitespace::Many, offset) => diagnostics.push(( + Location::new(start.row(), start.column() - offset), + MultipleSpacesBeforeOperator.into(), + )), + _ => {} } - (Whitespace::Many, offset) => diagnostics.push(( - line_match.start() - offset, - MultipleSpacesBeforeOperator.into(), - )), + } + + match line.trailing_whitespace(&token) { + Whitespace::Tab => diagnostics.push((end, TabAfterOperator.into())), + Whitespace::Many => diagnostics.push((end, MultipleSpacesAfterOperator.into())), _ => {} } } - let after = &line[line_match.end()..]; - let (leading_offset, leading_kind) = Whitespace::leading(after); - match leading_kind { - Whitespace::Tab => diagnostics.push((line_match.end(), TabAfterOperator.into())), - Whitespace::Many => { - diagnostics.push((line_match.end(), MultipleSpacesAfterOperator.into())); - } - _ => {} - } - - last_end = Some(line_match.end() + leading_offset); + after_operator = is_operator; } + diagnostics } -#[cfg(not(feature = "logical_lines"))] -pub fn space_around_operator(_line: &str) -> Vec<(usize, DiagnosticKind)> { - vec![] +const fn is_operator_token(token: &Tok) -> bool { + matches!( + token, + Tok::Plus + | Tok::Minus + | Tok::Star + | Tok::Slash + | Tok::Vbar + | Tok::Amper + | Tok::Less + | Tok::Greater + | Tok::Equal + | Tok::Percent + | Tok::NotEqual + | Tok::LessEqual + | Tok::GreaterEqual + | Tok::CircumFlex + | Tok::LeftShift + | Tok::RightShift + | Tok::DoubleStar + | Tok::PlusEqual + | Tok::MinusEqual + | Tok::StarEqual + | Tok::SlashEqual + | Tok::PercentEqual + | Tok::AmperEqual + | Tok::VbarEqual + | Tok::CircumflexEqual + | Tok::LeftShiftEqual + | Tok::RightShiftEqual + | Tok::DoubleStarEqual + | Tok::DoubleSlash + | Tok::DoubleSlashEqual + | Tok::ColonEqual + ) } diff --git a/crates/ruff/src/rules/pycodestyle/rules/whitespace_around_keywords.rs b/crates/ruff/src/rules/pycodestyle/rules/logical_lines/whitespace_around_keywords.rs similarity index 54% rename from crates/ruff/src/rules/pycodestyle/rules/whitespace_around_keywords.rs rename to crates/ruff/src/rules/pycodestyle/rules/logical_lines/whitespace_around_keywords.rs index a6eb880b6d..567c5906cf 100644 --- a/crates/ruff/src/rules/pycodestyle/rules/whitespace_around_keywords.rs +++ b/crates/ruff/src/rules/pycodestyle/rules/logical_lines/whitespace_around_keywords.rs @@ -1,9 +1,7 @@ -#![allow(dead_code, unused_imports, unused_variables)] +use rustpython_parser::ast::Location; -use once_cell::sync::Lazy; -use regex::Regex; - -use crate::rules::pycodestyle::rules::Whitespace; +use super::{LogicalLine, Whitespace}; +use crate::rules::pycodestyle::helpers::is_keyword_token; use ruff_diagnostics::DiagnosticKind; use ruff_diagnostics::Violation; use ruff_macros::{derive_message_formats, violation}; @@ -111,47 +109,40 @@ impl Violation for TabBeforeKeyword { } } -static KEYWORD_REGEX: Lazy = Lazy::new(|| { - Regex::new(r"\b(False|None|True|and|as|assert|async|await|break|class|continue|def|del|elif|else|except|finally|for|from|global|if|import|in|is|lambda|nonlocal|not|or|pass|raise|return|try|while|with|yield)\b").unwrap() -}); - /// E271, E272, E273, E274 -#[cfg(feature = "logical_lines")] -pub fn whitespace_around_keywords(line: &str) -> Vec<(usize, DiagnosticKind)> { +pub(crate) fn whitespace_around_keywords(line: &LogicalLine) -> Vec<(Location, DiagnosticKind)> { let mut diagnostics = vec![]; - let mut last_end = None; + let mut after_keyword = false; - for line_match in KEYWORD_REGEX.find_iter(line) { - if last_end != Some(line_match.start()) { - let before = &line[..line_match.start()]; - match Whitespace::trailing(before) { - (Whitespace::Tab, offset) => { - diagnostics.push((line_match.start() - offset, TabBeforeKeyword.into())); + for token in line.tokens() { + let is_keyword = is_keyword_token(token.kind()); + + if is_keyword { + let (start, end) = token.range(); + + if !after_keyword { + match line.leading_whitespace(&token) { + (Whitespace::Tab, offset) => diagnostics.push(( + Location::new(start.row(), start.column() - offset), + TabBeforeKeyword.into(), + )), + (Whitespace::Many, offset) => diagnostics.push(( + Location::new(start.row(), start.column() - offset), + MultipleSpacesBeforeKeyword.into(), + )), + _ => {} } - (Whitespace::Many, offset) => diagnostics.push(( - line_match.start() - offset, - MultipleSpacesBeforeKeyword.into(), - )), + } + + match line.trailing_whitespace(&token) { + Whitespace::Tab => diagnostics.push((end, TabAfterKeyword.into())), + Whitespace::Many => diagnostics.push((end, MultipleSpacesAfterKeyword.into())), _ => {} } } - let after = &line[line_match.end()..]; - let (leading_offset, leading_kind) = Whitespace::leading(after); - match leading_kind { - Whitespace::Tab => diagnostics.push((line_match.end(), TabAfterKeyword.into())), - Whitespace::Many => { - diagnostics.push((line_match.end(), MultipleSpacesAfterKeyword.into())); - } - _ => {} - } - - last_end = Some(line_match.end() + leading_offset); + after_keyword = is_keyword; } + diagnostics } - -#[cfg(not(feature = "logical_lines"))] -pub fn whitespace_around_keywords(_line: &str) -> Vec<(usize, DiagnosticKind)> { - vec![] -} diff --git a/crates/ruff/src/rules/pycodestyle/rules/logical_lines/whitespace_around_named_parameter_equals.rs b/crates/ruff/src/rules/pycodestyle/rules/logical_lines/whitespace_around_named_parameter_equals.rs new file mode 100644 index 0000000000..95f8938e22 --- /dev/null +++ b/crates/ruff/src/rules/pycodestyle/rules/logical_lines/whitespace_around_named_parameter_equals.rs @@ -0,0 +1,121 @@ +use rustpython_parser::ast::Location; +use rustpython_parser::Tok; + +use ruff_diagnostics::DiagnosticKind; +use ruff_diagnostics::Violation; +use ruff_macros::{derive_message_formats, violation}; + +use super::LogicalLineTokens; +use crate::rules::pycodestyle::helpers::is_op_token; + +#[violation] +pub struct UnexpectedSpacesAroundKeywordParameterEquals; + +impl Violation for UnexpectedSpacesAroundKeywordParameterEquals { + #[derive_message_formats] + fn message(&self) -> String { + format!("Unexpected spaces around keyword / parameter equals") + } +} + +#[violation] +pub struct MissingWhitespaceAroundParameterEquals; + +impl Violation for MissingWhitespaceAroundParameterEquals { + #[derive_message_formats] + fn message(&self) -> String { + format!("Missing whitespace around parameter equals") + } +} + +fn is_in_def(tokens: &LogicalLineTokens) -> bool { + for token in tokens { + match token.kind() { + Tok::Async | Tok::Indent | Tok::Dedent => continue, + Tok::Def => return true, + _ => return false, + } + } + + false +} + +/// E251, E252 +pub(crate) fn whitespace_around_named_parameter_equals( + tokens: &LogicalLineTokens, +) -> Vec<(Location, DiagnosticKind)> { + let mut diagnostics = vec![]; + let mut parens = 0; + let mut require_space = false; + let mut no_space = false; + let mut annotated_func_arg = false; + let mut prev_end: Option = None; + + let in_def = is_in_def(tokens); + + for token in tokens { + let kind = token.kind(); + + if kind == &Tok::NonLogicalNewline { + continue; + } + if no_space { + no_space = false; + if Some(token.start()) != prev_end { + diagnostics.push(( + prev_end.unwrap(), + UnexpectedSpacesAroundKeywordParameterEquals.into(), + )); + } + } + if require_space { + require_space = false; + let start = token.start(); + if Some(start) == prev_end { + diagnostics.push((start, MissingWhitespaceAroundParameterEquals.into())); + } + } + if is_op_token(kind) { + match kind { + Tok::Lpar | Tok::Lsqb => { + parens += 1; + } + Tok::Rpar | Tok::Rsqb => { + parens -= 1; + } + + Tok::Colon if parens == 1 && in_def => { + annotated_func_arg = true; + } + Tok::Comma if parens == 1 => { + annotated_func_arg = false; + } + Tok::Equal if parens > 0 => { + if annotated_func_arg && parens == 1 { + require_space = true; + let start = token.start(); + if Some(start) == prev_end { + diagnostics + .push((start, MissingWhitespaceAroundParameterEquals.into())); + } + } else { + no_space = true; + if Some(token.start()) != prev_end { + diagnostics.push(( + prev_end.unwrap(), + UnexpectedSpacesAroundKeywordParameterEquals.into(), + )); + } + } + } + _ => {} + } + + if parens < 1 { + annotated_func_arg = false; + } + } + prev_end = Some(token.end()); + } + diagnostics +} diff --git a/crates/ruff/src/rules/pycodestyle/rules/whitespace_before_comment.rs b/crates/ruff/src/rules/pycodestyle/rules/logical_lines/whitespace_before_comment.rs similarity index 85% rename from crates/ruff/src/rules/pycodestyle/rules/whitespace_before_comment.rs rename to crates/ruff/src/rules/pycodestyle/rules/logical_lines/whitespace_before_comment.rs index 293ebcd8e5..8ab2bedc88 100644 --- a/crates/ruff/src/rules/pycodestyle/rules/whitespace_before_comment.rs +++ b/crates/ruff/src/rules/pycodestyle/rules/logical_lines/whitespace_before_comment.rs @@ -1,8 +1,7 @@ -#![allow(dead_code, unused_imports, unused_variables)] - use rustpython_parser::ast::Location; use rustpython_parser::Tok; +use super::LogicalLineTokens; use ruff_diagnostics::DiagnosticKind; use ruff_diagnostics::Violation; use ruff_macros::{derive_message_formats, violation}; @@ -139,25 +138,29 @@ impl Violation for MultipleLeadingHashesForBlockComment { } /// E261, E262, E265, E266 -#[cfg(feature = "logical_lines")] -pub fn whitespace_before_comment( - tokens: &[(Location, &Tok, Location)], +pub(crate) fn whitespace_before_comment( + tokens: &LogicalLineTokens, locator: &Locator, ) -> Vec<(Range, DiagnosticKind)> { let mut diagnostics = vec![]; let mut prev_end = Location::new(0, 0); - for (start, tok, end) in tokens { - if let Tok::Comment(text) = tok { + for token in tokens { + let kind = token.kind(); + + if let Tok::Comment { .. } = kind { + let (start, end) = token.range(); let line = locator.slice(Range::new( Location::new(start.row(), 0), Location::new(start.row(), start.column()), )); + let text = locator.slice(Range::new(start, end)); + let is_inline_comment = !line.trim().is_empty(); if is_inline_comment { if prev_end.row() == start.row() && start.column() < prev_end.column() + 2 { diagnostics.push(( - Range::new(prev_end, *start), + Range::new(prev_end, start), TooFewSpacesBeforeInlineComment.into(), )); } @@ -177,32 +180,23 @@ pub fn whitespace_before_comment( if is_inline_comment { if bad_prefix.is_some() || comment.chars().next().map_or(false, char::is_whitespace) { - diagnostics.push((Range::new(*start, *end), NoSpaceAfterInlineComment.into())); + diagnostics.push((Range::new(start, end), NoSpaceAfterInlineComment.into())); } } else if let Some(bad_prefix) = bad_prefix { if bad_prefix != '!' || start.row() > 1 { if bad_prefix != '#' { - diagnostics - .push((Range::new(*start, *end), NoSpaceAfterBlockComment.into())); + diagnostics.push((Range::new(start, end), NoSpaceAfterBlockComment.into())); } else if !comment.is_empty() { diagnostics.push(( - Range::new(*start, *end), + Range::new(start, end), MultipleLeadingHashesForBlockComment.into(), )); } } } - } else if !matches!(tok, Tok::NonLogicalNewline) { - prev_end = *end; + } else if !matches!(kind, Tok::NonLogicalNewline) { + prev_end = token.end(); } } diagnostics } - -#[cfg(not(feature = "logical_lines"))] -pub fn whitespace_before_comment( - _tokens: &[(Location, &Tok, Location)], - _locator: &Locator, -) -> Vec<(Range, DiagnosticKind)> { - vec![] -} diff --git a/crates/ruff/src/rules/pycodestyle/rules/whitespace_before_parameters.rs b/crates/ruff/src/rules/pycodestyle/rules/logical_lines/whitespace_before_parameters.rs similarity index 55% rename from crates/ruff/src/rules/pycodestyle/rules/whitespace_before_parameters.rs rename to crates/ruff/src/rules/pycodestyle/rules/logical_lines/whitespace_before_parameters.rs index 79899c9934..10a5a3ab65 100644 --- a/crates/ruff/src/rules/pycodestyle/rules/whitespace_before_parameters.rs +++ b/crates/ruff/src/rules/pycodestyle/rules/logical_lines/whitespace_before_parameters.rs @@ -1,5 +1,3 @@ -#![allow(dead_code, unused_imports, unused_variables)] - use rustpython_parser::ast::Location; use rustpython_parser::Tok; @@ -7,8 +5,7 @@ use ruff_diagnostics::{AlwaysAutofixableViolation, Diagnostic, Edit}; use ruff_macros::{derive_message_formats, violation}; use ruff_python_ast::types::Range; -use crate::registry::AsRule; -use crate::rules::pycodestyle::helpers::{is_keyword_token, is_op_token, is_soft_keyword_token}; +use super::LogicalLineTokens; #[violation] pub struct WhitespaceBeforeParameters { @@ -29,28 +26,34 @@ impl AlwaysAutofixableViolation for WhitespaceBeforeParameters { } /// E211 -#[cfg(feature = "logical_lines")] -pub fn whitespace_before_parameters( - tokens: &[(Location, &Tok, Location)], +pub(crate) fn whitespace_before_parameters( + tokens: &LogicalLineTokens, autofix: bool, ) -> Vec { let mut diagnostics = vec![]; - let (_, mut prev_token, mut prev_end) = tokens.first().unwrap(); - for (idx, (start, tok, end)) in tokens.iter().enumerate() { - if is_op_token(tok) - && (**tok == Tok::Lpar || **tok == Tok::Lsqb) - && *start != prev_end - && (matches!(prev_token, Tok::Name { .. }) - || matches!(prev_token, Tok::Rpar | Tok::Rsqb | Tok::Rbrace)) - && (idx < 2 || *(tokens[idx - 2].1) != Tok::Class) - && !is_keyword_token(tok) - && !is_soft_keyword_token(tok) + let previous = tokens.first().unwrap(); + + let mut pre_pre_kind: Option<&Tok> = None; + let mut prev_token = previous.kind(); + let mut prev_end = previous.end(); + + for token in tokens { + let kind = token.kind(); + + if matches!(kind, Tok::Lpar | Tok::Lsqb) + && token.start() != prev_end + && matches!( + prev_token, + Tok::Name { .. } | Tok::Rpar | Tok::Rsqb | Tok::Rbrace + ) + && (pre_pre_kind != Some(&Tok::Class)) { let start = Location::new(prev_end.row(), prev_end.column()); + let end = token.end(); let end = Location::new(end.row(), end.column() - 1); let kind: WhitespaceBeforeParameters = WhitespaceBeforeParameters { - bracket: tok.to_string(), + bracket: kind.to_string(), }; let mut diagnostic = Diagnostic::new(kind, Range::new(start, end)); @@ -60,16 +63,9 @@ pub fn whitespace_before_parameters( } diagnostics.push(diagnostic); } - prev_token = *tok; - prev_end = *end; + pre_pre_kind = Some(prev_token); + prev_token = kind; + prev_end = token.end(); } diagnostics } - -#[cfg(not(feature = "logical_lines"))] -pub fn whitespace_before_parameters( - _tokens: &[(Location, &Tok, Location)], - _autofix: bool, -) -> Vec { - vec![] -} diff --git a/crates/ruff/src/rules/pycodestyle/rules/missing_whitespace.rs b/crates/ruff/src/rules/pycodestyle/rules/missing_whitespace.rs deleted file mode 100644 index 626f7b331b..0000000000 --- a/crates/ruff/src/rules/pycodestyle/rules/missing_whitespace.rs +++ /dev/null @@ -1,102 +0,0 @@ -#![allow(dead_code, unused_imports, unused_variables)] - -use itertools::Itertools; -use rustpython_parser::ast::Location; - -use ruff_diagnostics::Edit; -use ruff_diagnostics::Violation; -use ruff_diagnostics::{AlwaysAutofixableViolation, Diagnostic}; -use ruff_macros::{derive_message_formats, violation}; -use ruff_python_ast::types::Range; - -#[violation] -pub struct MissingWhitespace { - pub token: String, -} - -impl AlwaysAutofixableViolation for MissingWhitespace { - #[derive_message_formats] - fn message(&self) -> String { - let MissingWhitespace { token } = self; - format!("Missing whitespace after '{token}'") - } - - fn autofix_title(&self) -> String { - let MissingWhitespace { token } = self; - format!("Added missing whitespace after '{token}'") - } -} - -/// E231 -#[cfg(feature = "logical_lines")] -pub fn missing_whitespace( - line: &str, - row: usize, - autofix: bool, - indent_level: usize, -) -> Vec { - let mut diagnostics = vec![]; - - let mut num_lsqb = 0u32; - let mut num_rsqb = 0u32; - let mut prev_lsqb = None; - let mut prev_lbrace = None; - for (idx, (char, next_char)) in line.chars().tuple_windows().enumerate() { - match char { - '[' => { - num_lsqb += 1; - prev_lsqb = Some(idx); - } - ']' => { - num_rsqb += 1; - } - '{' => { - prev_lbrace = Some(idx); - } - - ',' | ';' | ':' if !next_char.is_whitespace() => { - if char == ':' && num_lsqb > num_rsqb && prev_lsqb > prev_lbrace { - continue; // Slice syntax, no space required - } - if char == ',' && matches!(next_char, ')' | ']') { - continue; // Allow tuple with only one element: (3,) - } - if char == ':' && next_char == '=' { - continue; // Allow assignment expression - } - - let kind = MissingWhitespace { - token: char.to_string(), - }; - - let mut diagnostic = Diagnostic::new( - kind, - Range::new( - Location::new(row, indent_level + idx), - Location::new(row, indent_level + idx), - ), - ); - - if autofix { - diagnostic.set_fix(Edit::insertion( - " ".to_string(), - Location::new(row, indent_level + idx + 1), - )); - } - diagnostics.push(diagnostic); - } - _ => {} - } - } - diagnostics -} - -#[cfg(not(feature = "logical_lines"))] -pub fn missing_whitespace( - _line: &str, - _row: usize, - _autofix: bool, - indent_level: usize, -) -> Vec { - vec![] -} diff --git a/crates/ruff/src/rules/pycodestyle/rules/missing_whitespace_after_keyword.rs b/crates/ruff/src/rules/pycodestyle/rules/missing_whitespace_after_keyword.rs deleted file mode 100644 index 7b20f57313..0000000000 --- a/crates/ruff/src/rules/pycodestyle/rules/missing_whitespace_after_keyword.rs +++ /dev/null @@ -1,51 +0,0 @@ -#![allow(dead_code, unused_imports, unused_variables)] - -use rustpython_parser::ast::Location; -use rustpython_parser::Tok; - -use ruff_diagnostics::DiagnosticKind; -use ruff_diagnostics::Violation; -use ruff_macros::{derive_message_formats, violation}; - -use crate::rules::pycodestyle::helpers::{is_keyword_token, is_singleton_token}; - -#[violation] -pub struct MissingWhitespaceAfterKeyword; - -impl Violation for MissingWhitespaceAfterKeyword { - #[derive_message_formats] - fn message(&self) -> String { - format!("Missing whitespace after keyword") - } -} - -/// E275 -#[cfg(feature = "logical_lines")] -pub fn missing_whitespace_after_keyword( - tokens: &[(Location, &Tok, Location)], -) -> Vec<(Location, DiagnosticKind)> { - let mut diagnostics = vec![]; - - for (tok0, tok1) in tokens.iter().zip(&tokens[1..]) { - if tok0.2 == tok1.0 - && is_keyword_token(tok0.1) - && !is_singleton_token(tok0.1) - && *tok0.1 != Tok::Async - && *tok0.1 != Tok::Await - && !(*tok0.1 == Tok::Except && *tok1.1 == Tok::Star) - && !(*tok0.1 == Tok::Yield && *tok1.1 == Tok::Rpar) - && *tok1.1 != Tok::Colon - && *tok1.1 != Tok::Newline - { - diagnostics.push((tok0.2, MissingWhitespaceAfterKeyword.into())); - } - } - diagnostics -} - -#[cfg(not(feature = "logical_lines"))] -pub fn missing_whitespace_after_keyword( - _tokens: &[(Location, &Tok, Location)], -) -> Vec<(Location, DiagnosticKind)> { - vec![] -} diff --git a/crates/ruff/src/rules/pycodestyle/rules/mod.rs b/crates/ruff/src/rules/pycodestyle/rules/mod.rs index 97cdd26a59..9e4f26579d 100644 --- a/crates/ruff/src/rules/pycodestyle/rules/mod.rs +++ b/crates/ruff/src/rules/pycodestyle/rules/mod.rs @@ -8,55 +8,21 @@ pub use compound_statements::{ }; pub use doc_line_too_long::{doc_line_too_long, DocLineTooLong}; pub use errors::{syntax_error, IOError, SyntaxError}; -pub use extraneous_whitespace::{ - extraneous_whitespace, WhitespaceAfterOpenBracket, WhitespaceBeforeCloseBracket, - WhitespaceBeforePunctuation, -}; pub use imports::{ module_import_not_at_top_of_file, multiple_imports_on_one_line, ModuleImportNotAtTopOfFile, MultipleImportsOnOneLine, }; -pub use indentation::{ - indentation, IndentationWithInvalidMultiple, IndentationWithInvalidMultipleComment, - NoIndentedBlock, NoIndentedBlockComment, OverIndented, UnexpectedIndentation, - UnexpectedIndentationComment, -}; + pub use invalid_escape_sequence::{invalid_escape_sequence, InvalidEscapeSequence}; pub use lambda_assignment::{lambda_assignment, LambdaAssignment}; pub use line_too_long::{line_too_long, LineTooLong}; pub use literal_comparisons::{literal_comparisons, NoneComparison, TrueFalseComparison}; pub use missing_newline_at_end_of_file::{no_newline_at_end_of_file, MissingNewlineAtEndOfFile}; -pub use missing_whitespace::{missing_whitespace, MissingWhitespace}; -pub use missing_whitespace_after_keyword::{ - missing_whitespace_after_keyword, MissingWhitespaceAfterKeyword, -}; -pub use missing_whitespace_around_operator::{ - missing_whitespace_around_operator, MissingWhitespaceAroundArithmeticOperator, - MissingWhitespaceAroundBitwiseOrShiftOperator, MissingWhitespaceAroundModuloOperator, - MissingWhitespaceAroundOperator, -}; pub use mixed_spaces_and_tabs::{mixed_spaces_and_tabs, MixedSpacesAndTabs}; pub use not_tests::{not_tests, NotInTest, NotIsTest}; -pub use space_around_operator::{ - space_around_operator, MultipleSpacesAfterOperator, MultipleSpacesBeforeOperator, - TabAfterOperator, TabBeforeOperator, -}; pub use tab_indentation::{tab_indentation, TabIndentation}; pub use trailing_whitespace::{trailing_whitespace, BlankLineWithWhitespace, TrailingWhitespace}; pub use type_comparison::{type_comparison, TypeComparison}; -pub use whitespace_around_keywords::{ - whitespace_around_keywords, MultipleSpacesAfterKeyword, MultipleSpacesBeforeKeyword, - TabAfterKeyword, TabBeforeKeyword, -}; -pub use whitespace_around_named_parameter_equals::{ - whitespace_around_named_parameter_equals, MissingWhitespaceAroundParameterEquals, - UnexpectedSpacesAroundKeywordParameterEquals, -}; -pub use whitespace_before_comment::{ - whitespace_before_comment, MultipleLeadingHashesForBlockComment, NoSpaceAfterBlockComment, - NoSpaceAfterInlineComment, TooFewSpacesBeforeInlineComment, -}; -pub use whitespace_before_parameters::{whitespace_before_parameters, WhitespaceBeforeParameters}; mod ambiguous_class_name; mod ambiguous_function_name; @@ -65,81 +31,16 @@ mod bare_except; mod compound_statements; mod doc_line_too_long; mod errors; -mod extraneous_whitespace; mod imports; -mod indentation; mod invalid_escape_sequence; mod lambda_assignment; mod line_too_long; mod literal_comparisons; +#[cfg(feature = "logical_lines")] +pub(crate) mod logical_lines; mod missing_newline_at_end_of_file; -mod missing_whitespace; -mod missing_whitespace_after_keyword; -mod missing_whitespace_around_operator; mod mixed_spaces_and_tabs; mod not_tests; -mod space_around_operator; mod tab_indentation; mod trailing_whitespace; mod type_comparison; -mod whitespace_around_keywords; -mod whitespace_around_named_parameter_equals; -mod whitespace_before_comment; -mod whitespace_before_parameters; - -#[allow(unused)] -enum Whitespace { - None, - Single, - Many, - Tab, -} - -impl Whitespace { - #[allow(dead_code)] - fn leading(content: &str) -> (usize, Self) { - let mut offset = 0; - let mut kind = Self::None; - - for c in content.chars() { - if c == '\t' { - kind = Self::Tab; - offset += 1; - } else if c.is_whitespace() { - kind = match kind { - Whitespace::None => Whitespace::Single, - Whitespace::Single | Whitespace::Many => Whitespace::Many, - Whitespace::Tab => Whitespace::Tab, - }; - offset += c.len_utf8(); - } else { - break; - } - } - - (offset, kind) - } - - #[allow(dead_code)] - fn trailing(content: &str) -> (Self, usize) { - let mut count = 0u32; - let mut offset = 0; - - for c in content.chars().rev() { - if c == '\t' { - return (Self::Tab, offset + 1); - } else if c.is_whitespace() { - count += 1; - offset += c.len_utf8(); - } else { - break; - } - } - - match count { - 0 => (Self::None, 0), - 1 => (Self::Single, offset), - _ => (Self::Many, offset), - } - } -} diff --git a/crates/ruff/src/rules/pycodestyle/rules/whitespace_around_named_parameter_equals.rs b/crates/ruff/src/rules/pycodestyle/rules/whitespace_around_named_parameter_equals.rs deleted file mode 100644 index 35afdb0906..0000000000 --- a/crates/ruff/src/rules/pycodestyle/rules/whitespace_around_named_parameter_equals.rs +++ /dev/null @@ -1,113 +0,0 @@ -#![allow(dead_code, unused_imports, unused_variables)] - -use once_cell::sync::Lazy; -use regex::Regex; -use rustpython_parser::ast::Location; -use rustpython_parser::Tok; - -use ruff_diagnostics::DiagnosticKind; -use ruff_diagnostics::Violation; -use ruff_macros::{derive_message_formats, violation}; - -#[cfg(feature = "logical_lines")] -use crate::rules::pycodestyle::helpers::is_op_token; - -#[violation] -pub struct UnexpectedSpacesAroundKeywordParameterEquals; - -impl Violation for UnexpectedSpacesAroundKeywordParameterEquals { - #[derive_message_formats] - fn message(&self) -> String { - format!("Unexpected spaces around keyword / parameter equals") - } -} - -#[violation] -pub struct MissingWhitespaceAroundParameterEquals; - -impl Violation for MissingWhitespaceAroundParameterEquals { - #[derive_message_formats] - fn message(&self) -> String { - format!("Missing whitespace around parameter equals") - } -} - -static STARTSWITH_DEF_REGEX: Lazy = - Lazy::new(|| Regex::new(r"^(async\s+def|def)\b").unwrap()); - -/// E251, E252 -#[cfg(feature = "logical_lines")] -pub fn whitespace_around_named_parameter_equals( - tokens: &[(Location, &Tok, Location)], - line: &str, -) -> Vec<(Location, DiagnosticKind)> { - let mut diagnostics = vec![]; - let mut parens = 0; - let mut require_space = false; - let mut no_space = false; - let mut annotated_func_arg = false; - let mut prev_end: Option<&Location> = None; - - let in_def = STARTSWITH_DEF_REGEX.is_match(line); - - for (start, token, end) in tokens { - if **token == Tok::NonLogicalNewline { - continue; - } - if no_space { - no_space = false; - if Some(start) != prev_end { - diagnostics.push(( - *(prev_end.unwrap()), - UnexpectedSpacesAroundKeywordParameterEquals.into(), - )); - } - } - if require_space { - require_space = false; - if Some(start) == prev_end { - diagnostics.push((*start, MissingWhitespaceAroundParameterEquals.into())); - } - } - if is_op_token(token) { - if **token == Tok::Lpar || **token == Tok::Lsqb { - parens += 1; - } else if **token == Tok::Rpar || **token == Tok::Rsqb { - parens -= 1; - } else if in_def && **token == Tok::Colon && parens == 1 { - annotated_func_arg = true; - } else if parens == 1 && **token == Tok::Comma { - annotated_func_arg = false; - } else if parens > 0 && **token == Tok::Equal { - if annotated_func_arg && parens == 1 { - require_space = true; - if Some(start) == prev_end { - diagnostics.push((*start, MissingWhitespaceAroundParameterEquals.into())); - } - } else { - no_space = true; - if Some(start) != prev_end { - diagnostics.push(( - *(prev_end.unwrap()), - UnexpectedSpacesAroundKeywordParameterEquals.into(), - )); - } - } - } - - if parens < 1 { - annotated_func_arg = false; - } - } - prev_end = Some(end); - } - diagnostics -} - -#[cfg(not(feature = "logical_lines"))] -pub fn whitespace_around_named_parameter_equals( - _tokens: &[(Location, &Tok, Location)], - _line: &str, -) -> Vec<(Location, DiagnosticKind)> { - vec![] -}