perf(pycodestyle): Refactor checks to iterate over tokens insteadof text (#3736)

This commit is contained in:
Micha Reiser 2023-03-28 10:37:13 +02:00 committed by GitHub
parent 1d724b1495
commit 2fdf98ef4e
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
22 changed files with 1225 additions and 1102 deletions

View file

@ -1,6 +1,3 @@
#![allow(dead_code, unused_imports, unused_variables)]
use itertools::Itertools;
use rustpython_parser::ast::Location; use rustpython_parser::ast::Location;
use rustpython_parser::lexer::LexResult; use rustpython_parser::lexer::LexResult;
@ -9,12 +6,11 @@ use ruff_python_ast::source_code::{Locator, Stylist};
use ruff_python_ast::types::Range; use ruff_python_ast::types::Range;
use crate::registry::{AsRule, Rule}; use crate::registry::{AsRule, Rule};
use crate::rules::pycodestyle::logical_lines::{LogicalLines, TokenFlags}; use crate::rules::pycodestyle::rules::logical_lines::{
use crate::rules::pycodestyle::rules::{
extraneous_whitespace, indentation, missing_whitespace, missing_whitespace_after_keyword, extraneous_whitespace, indentation, missing_whitespace, missing_whitespace_after_keyword,
missing_whitespace_around_operator, space_around_operator, whitespace_around_keywords, missing_whitespace_around_operator, space_around_operator, whitespace_around_keywords,
whitespace_around_named_parameter_equals, whitespace_before_comment, whitespace_around_named_parameter_equals, whitespace_before_comment,
whitespace_before_parameters, whitespace_before_parameters, LogicalLines, TokenFlags,
}; };
use crate::settings::{flags, Settings}; use crate::settings::{flags, Settings};
@ -57,21 +53,14 @@ pub fn check_logical_lines(
#[cfg(not(feature = "logical_lines"))] #[cfg(not(feature = "logical_lines"))]
let should_fix_whitespace_before_parameters = false; let should_fix_whitespace_before_parameters = false;
let indent_char = stylist.indentation().as_char();
let mut prev_line = None; let mut prev_line = None;
let mut prev_indent_level = None; let mut prev_indent_level = None;
for line in &LogicalLines::from_tokens(tokens, locator) { let indent_char = stylist.indentation().as_char();
// Extract the indentation level.
let Some(start_loc) = line.first_token_location() else { continue; };
let start_line = locator.slice(Range::new(Location::new(start_loc.row(), 0), *start_loc));
let indent_level = expand_indent(start_line);
let indent_size = 4;
for line in &LogicalLines::from_tokens(tokens, locator) {
if line.flags().contains(TokenFlags::OPERATOR) { if line.flags().contains(TokenFlags::OPERATOR) {
for (index, kind) in space_around_operator(line.text()) { for (location, kind) in space_around_operator(&line) {
if settings.rules.enabled(kind.rule()) { if settings.rules.enabled(kind.rule()) {
let (token_offset, pos) = line.mapping(index);
let location = Location::new(pos.row(), pos.column() + index - token_offset);
diagnostics.push(Diagnostic { diagnostics.push(Diagnostic {
kind, kind,
location, location,
@ -86,10 +75,8 @@ pub fn check_logical_lines(
.flags() .flags()
.contains(TokenFlags::OPERATOR | TokenFlags::PUNCTUATION) .contains(TokenFlags::OPERATOR | TokenFlags::PUNCTUATION)
{ {
for (index, kind) in extraneous_whitespace(line.text()) { for (location, kind) in extraneous_whitespace(&line) {
if settings.rules.enabled(kind.rule()) { if settings.rules.enabled(kind.rule()) {
let (token_offset, pos) = line.mapping(index);
let location = Location::new(pos.row(), pos.column() + index - token_offset);
diagnostics.push(Diagnostic { diagnostics.push(Diagnostic {
kind, kind,
location, location,
@ -101,10 +88,8 @@ pub fn check_logical_lines(
} }
} }
if line.flags().contains(TokenFlags::KEYWORD) { if line.flags().contains(TokenFlags::KEYWORD) {
for (index, kind) in whitespace_around_keywords(line.text()) { for (location, kind) in whitespace_around_keywords(&line) {
if settings.rules.enabled(kind.rule()) { if settings.rules.enabled(kind.rule()) {
let (token_offset, pos) = line.mapping(index);
let location = Location::new(pos.row(), pos.column() + index - token_offset);
diagnostics.push(Diagnostic { diagnostics.push(Diagnostic {
kind, kind,
location, location,
@ -115,7 +100,7 @@ pub fn check_logical_lines(
} }
} }
for (location, kind) in missing_whitespace_after_keyword(line.tokens()) { for (location, kind) in missing_whitespace_after_keyword(&line.tokens()) {
if settings.rules.enabled(kind.rule()) { if settings.rules.enabled(kind.rule()) {
diagnostics.push(Diagnostic { diagnostics.push(Diagnostic {
kind, kind,
@ -128,7 +113,7 @@ pub fn check_logical_lines(
} }
} }
if line.flags().contains(TokenFlags::COMMENT) { if line.flags().contains(TokenFlags::COMMENT) {
for (range, kind) in whitespace_before_comment(line.tokens(), locator) { for (range, kind) in whitespace_before_comment(&line.tokens(), locator) {
if settings.rules.enabled(kind.rule()) { if settings.rules.enabled(kind.rule()) {
diagnostics.push(Diagnostic { diagnostics.push(Diagnostic {
kind, kind,
@ -141,9 +126,7 @@ pub fn check_logical_lines(
} }
} }
if line.flags().contains(TokenFlags::OPERATOR) { if line.flags().contains(TokenFlags::OPERATOR) {
for (location, kind) in for (location, kind) in whitespace_around_named_parameter_equals(&line.tokens()) {
whitespace_around_named_parameter_equals(line.tokens(), line.text())
{
if settings.rules.enabled(kind.rule()) { if settings.rules.enabled(kind.rule()) {
diagnostics.push(Diagnostic { diagnostics.push(Diagnostic {
kind, kind,
@ -154,7 +137,7 @@ pub fn check_logical_lines(
}); });
} }
} }
for (location, kind) in missing_whitespace_around_operator(line.tokens()) { for (location, kind) in missing_whitespace_around_operator(&line.tokens()) {
if settings.rules.enabled(kind.rule()) { if settings.rules.enabled(kind.rule()) {
diagnostics.push(Diagnostic { diagnostics.push(Diagnostic {
kind, kind,
@ -166,12 +149,7 @@ pub fn check_logical_lines(
} }
} }
for diagnostic in missing_whitespace( for diagnostic in missing_whitespace(&line, should_fix_missing_whitespace) {
line.text(),
start_loc.row(),
should_fix_missing_whitespace,
indent_level,
) {
if settings.rules.enabled(diagnostic.kind.rule()) { if settings.rules.enabled(diagnostic.kind.rule()) {
diagnostics.push(diagnostic); diagnostics.push(diagnostic);
} }
@ -179,16 +157,23 @@ pub fn check_logical_lines(
} }
if line.flags().contains(TokenFlags::BRACKET) { if line.flags().contains(TokenFlags::BRACKET) {
for diagnostic in for diagnostic in whitespace_before_parameters(
whitespace_before_parameters(line.tokens(), should_fix_whitespace_before_parameters) &line.tokens(),
{ should_fix_whitespace_before_parameters,
) {
if settings.rules.enabled(diagnostic.kind.rule()) { if settings.rules.enabled(diagnostic.kind.rule()) {
diagnostics.push(diagnostic); diagnostics.push(diagnostic);
} }
} }
} }
for (index, kind) in indentation( // Extract the indentation level.
let Some(start_loc) = line.first_token_location() else { continue; };
let start_line = locator.slice(Range::new(Location::new(start_loc.row(), 0), start_loc));
let indent_level = expand_indent(start_line);
let indent_size = 4;
for (location, kind) in indentation(
&line, &line,
prev_line.as_ref(), prev_line.as_ref(),
indent_char, indent_char,
@ -196,8 +181,6 @@ pub fn check_logical_lines(
prev_indent_level, prev_indent_level,
indent_size, indent_size,
) { ) {
let (token_offset, pos) = line.mapping(index);
let location = Location::new(pos.row(), pos.column() + index - token_offset);
if settings.rules.enabled(kind.rule()) { if settings.rules.enabled(kind.rule()) {
diagnostics.push(Diagnostic { diagnostics.push(Diagnostic {
kind, kind,
@ -209,7 +192,7 @@ pub fn check_logical_lines(
} }
} }
if !line.is_comment() { if !line.is_comment_only() {
prev_line = Some(line); prev_line = Some(line);
prev_indent_level = Some(indent_level); prev_indent_level = Some(indent_level);
} }
@ -222,7 +205,7 @@ mod tests {
use rustpython_parser::lexer::LexResult; use rustpython_parser::lexer::LexResult;
use rustpython_parser::{lexer, Mode}; use rustpython_parser::{lexer, Mode};
use crate::rules::pycodestyle::logical_lines::LogicalLines; use crate::rules::pycodestyle::rules::logical_lines::LogicalLines;
use ruff_python_ast::source_code::Locator; use ruff_python_ast::source_code::Locator;
#[test] #[test]
@ -235,7 +218,7 @@ z = x + 1"#;
let locator = Locator::new(contents); let locator = Locator::new(contents);
let actual: Vec<String> = LogicalLines::from_tokens(&lxr, &locator) let actual: Vec<String> = LogicalLines::from_tokens(&lxr, &locator)
.into_iter() .into_iter()
.map(|line| line.text().to_string()) .map(|line| line.text_trimmed().to_string())
.collect(); .collect();
let expected = vec![ let expected = vec![
"x = 1".to_string(), "x = 1".to_string(),
@ -256,10 +239,10 @@ z = x + 1"#;
let locator = Locator::new(contents); let locator = Locator::new(contents);
let actual: Vec<String> = LogicalLines::from_tokens(&lxr, &locator) let actual: Vec<String> = LogicalLines::from_tokens(&lxr, &locator)
.into_iter() .into_iter()
.map(|line| line.text().to_string()) .map(|line| line.text_trimmed().to_string())
.collect(); .collect();
let expected = vec![ let expected = vec![
"x = [1, 2, 3, ]".to_string(), "x = [\n 1,\n 2,\n 3,\n]".to_string(),
"y = 2".to_string(), "y = 2".to_string(),
"z = x + 1".to_string(), "z = x + 1".to_string(),
]; ];
@ -270,9 +253,9 @@ z = x + 1"#;
let locator = Locator::new(contents); let locator = Locator::new(contents);
let actual: Vec<String> = LogicalLines::from_tokens(&lxr, &locator) let actual: Vec<String> = LogicalLines::from_tokens(&lxr, &locator)
.into_iter() .into_iter()
.map(|line| line.text().to_string()) .map(|line| line.text_trimmed().to_string())
.collect(); .collect();
let expected = vec!["x = \"xxx\"".to_string()]; let expected = vec!["x = 'abc'".to_string()];
assert_eq!(actual, expected); assert_eq!(actual, expected);
let contents = r#" let contents = r#"
@ -283,7 +266,7 @@ f()"#;
let locator = Locator::new(contents); let locator = Locator::new(contents);
let actual: Vec<String> = LogicalLines::from_tokens(&lxr, &locator) let actual: Vec<String> = LogicalLines::from_tokens(&lxr, &locator)
.into_iter() .into_iter()
.map(|line| line.text().to_string()) .map(|line| line.text_trimmed().to_string())
.collect(); .collect();
let expected = vec!["def f():", "x = 1", "f()"]; let expected = vec!["def f():", "x = 1", "f()"];
assert_eq!(actual, expected); assert_eq!(actual, expected);
@ -298,9 +281,15 @@ f()"#;
let locator = Locator::new(contents); let locator = Locator::new(contents);
let actual: Vec<String> = LogicalLines::from_tokens(&lxr, &locator) let actual: Vec<String> = LogicalLines::from_tokens(&lxr, &locator)
.into_iter() .into_iter()
.map(|line| line.text().to_string()) .map(|line| line.text_trimmed().to_string())
.collect(); .collect();
let expected = vec!["def f():", "\"xxxxxxxxxxxxxxxxxxxx\"", "", "x = 1", "f()"]; let expected = vec![
"def f():",
"\"\"\"Docstring goes here.\"\"\"",
"",
"x = 1",
"f()",
];
assert_eq!(actual, expected); assert_eq!(actual, expected);
} }
} }

View file

@ -1,7 +1,8 @@
pub mod ast; pub mod ast;
pub mod filesystem; pub mod filesystem;
pub mod imports; pub mod imports;
pub mod logical_lines; #[cfg(feature = "logical_lines")]
pub(crate) mod logical_lines;
pub mod noqa; pub mod noqa;
pub mod physical_lines; pub mod physical_lines;
pub mod tokens; pub mod tokens;

View file

@ -16,7 +16,6 @@ use crate::autofix::fix_file;
use crate::checkers::ast::check_ast; use crate::checkers::ast::check_ast;
use crate::checkers::filesystem::check_file_path; use crate::checkers::filesystem::check_file_path;
use crate::checkers::imports::check_imports; use crate::checkers::imports::check_imports;
use crate::checkers::logical_lines::check_logical_lines;
use crate::checkers::noqa::check_noqa; use crate::checkers::noqa::check_noqa;
use crate::checkers::physical_lines::check_physical_lines; use crate::checkers::physical_lines::check_physical_lines;
use crate::checkers::tokens::check_tokens; use crate::checkers::tokens::check_tokens;
@ -105,7 +104,8 @@ pub fn check_path(
.iter_enabled() .iter_enabled()
.any(|rule_code| rule_code.lint_source().is_logical_lines()) .any(|rule_code| rule_code.lint_source().is_logical_lines())
{ {
diagnostics.extend(check_logical_lines( #[cfg(feature = "logical_lines")]
diagnostics.extend(crate::checkers::logical_lines::check_logical_lines(
&tokens, &tokens,
locator, locator,
stylist, stylist,

View file

@ -15,67 +15,67 @@ ruff_macros::register_rules!(
// pycodestyle errors // pycodestyle errors
rules::pycodestyle::rules::MixedSpacesAndTabs, rules::pycodestyle::rules::MixedSpacesAndTabs,
#[cfg(feature = "logical_lines")] #[cfg(feature = "logical_lines")]
rules::pycodestyle::rules::IndentationWithInvalidMultiple, rules::pycodestyle::rules::logical_lines::IndentationWithInvalidMultiple,
#[cfg(feature = "logical_lines")] #[cfg(feature = "logical_lines")]
rules::pycodestyle::rules::NoIndentedBlock, rules::pycodestyle::rules::logical_lines::NoIndentedBlock,
#[cfg(feature = "logical_lines")] #[cfg(feature = "logical_lines")]
rules::pycodestyle::rules::UnexpectedIndentation, rules::pycodestyle::rules::logical_lines::UnexpectedIndentation,
#[cfg(feature = "logical_lines")] #[cfg(feature = "logical_lines")]
rules::pycodestyle::rules::IndentationWithInvalidMultipleComment, rules::pycodestyle::rules::logical_lines::IndentationWithInvalidMultipleComment,
#[cfg(feature = "logical_lines")] #[cfg(feature = "logical_lines")]
rules::pycodestyle::rules::NoIndentedBlockComment, rules::pycodestyle::rules::logical_lines::NoIndentedBlockComment,
#[cfg(feature = "logical_lines")] #[cfg(feature = "logical_lines")]
rules::pycodestyle::rules::UnexpectedIndentationComment, rules::pycodestyle::rules::logical_lines::UnexpectedIndentationComment,
#[cfg(feature = "logical_lines")] #[cfg(feature = "logical_lines")]
rules::pycodestyle::rules::OverIndented, rules::pycodestyle::rules::logical_lines::OverIndented,
#[cfg(feature = "logical_lines")] #[cfg(feature = "logical_lines")]
rules::pycodestyle::rules::WhitespaceAfterOpenBracket, rules::pycodestyle::rules::logical_lines::WhitespaceAfterOpenBracket,
#[cfg(feature = "logical_lines")] #[cfg(feature = "logical_lines")]
rules::pycodestyle::rules::WhitespaceBeforeCloseBracket, rules::pycodestyle::rules::logical_lines::WhitespaceBeforeCloseBracket,
#[cfg(feature = "logical_lines")] #[cfg(feature = "logical_lines")]
rules::pycodestyle::rules::WhitespaceBeforePunctuation, rules::pycodestyle::rules::logical_lines::WhitespaceBeforePunctuation,
#[cfg(feature = "logical_lines")] #[cfg(feature = "logical_lines")]
rules::pycodestyle::rules::MultipleSpacesBeforeOperator, rules::pycodestyle::rules::logical_lines::MultipleSpacesBeforeOperator,
#[cfg(feature = "logical_lines")] #[cfg(feature = "logical_lines")]
rules::pycodestyle::rules::MultipleSpacesAfterOperator, rules::pycodestyle::rules::logical_lines::MultipleSpacesAfterOperator,
#[cfg(feature = "logical_lines")] #[cfg(feature = "logical_lines")]
rules::pycodestyle::rules::TabBeforeOperator, rules::pycodestyle::rules::logical_lines::TabBeforeOperator,
#[cfg(feature = "logical_lines")] #[cfg(feature = "logical_lines")]
rules::pycodestyle::rules::TabAfterOperator, rules::pycodestyle::rules::logical_lines::TabAfterOperator,
#[cfg(feature = "logical_lines")] #[cfg(feature = "logical_lines")]
rules::pycodestyle::rules::TooFewSpacesBeforeInlineComment, rules::pycodestyle::rules::logical_lines::TooFewSpacesBeforeInlineComment,
#[cfg(feature = "logical_lines")] #[cfg(feature = "logical_lines")]
rules::pycodestyle::rules::NoSpaceAfterInlineComment, rules::pycodestyle::rules::logical_lines::NoSpaceAfterInlineComment,
#[cfg(feature = "logical_lines")] #[cfg(feature = "logical_lines")]
rules::pycodestyle::rules::NoSpaceAfterBlockComment, rules::pycodestyle::rules::logical_lines::NoSpaceAfterBlockComment,
#[cfg(feature = "logical_lines")] #[cfg(feature = "logical_lines")]
rules::pycodestyle::rules::MultipleLeadingHashesForBlockComment, rules::pycodestyle::rules::logical_lines::MultipleLeadingHashesForBlockComment,
#[cfg(feature = "logical_lines")] #[cfg(feature = "logical_lines")]
rules::pycodestyle::rules::MultipleSpacesAfterKeyword, rules::pycodestyle::rules::logical_lines::MultipleSpacesAfterKeyword,
#[cfg(feature = "logical_lines")] #[cfg(feature = "logical_lines")]
rules::pycodestyle::rules::MissingWhitespace, rules::pycodestyle::rules::logical_lines::MissingWhitespace,
#[cfg(feature = "logical_lines")] #[cfg(feature = "logical_lines")]
rules::pycodestyle::rules::MissingWhitespaceAfterKeyword, rules::pycodestyle::rules::logical_lines::MissingWhitespaceAfterKeyword,
#[cfg(feature = "logical_lines")] #[cfg(feature = "logical_lines")]
rules::pycodestyle::rules::MultipleSpacesBeforeKeyword, rules::pycodestyle::rules::logical_lines::MultipleSpacesBeforeKeyword,
#[cfg(feature = "logical_lines")] #[cfg(feature = "logical_lines")]
rules::pycodestyle::rules::MissingWhitespaceAroundOperator, rules::pycodestyle::rules::logical_lines::MissingWhitespaceAroundOperator,
#[cfg(feature = "logical_lines")] #[cfg(feature = "logical_lines")]
rules::pycodestyle::rules::MissingWhitespaceAroundArithmeticOperator, rules::pycodestyle::rules::logical_lines::MissingWhitespaceAroundArithmeticOperator,
#[cfg(feature = "logical_lines")] #[cfg(feature = "logical_lines")]
rules::pycodestyle::rules::MissingWhitespaceAroundBitwiseOrShiftOperator, rules::pycodestyle::rules::logical_lines::MissingWhitespaceAroundBitwiseOrShiftOperator,
#[cfg(feature = "logical_lines")] #[cfg(feature = "logical_lines")]
rules::pycodestyle::rules::MissingWhitespaceAroundModuloOperator, rules::pycodestyle::rules::logical_lines::MissingWhitespaceAroundModuloOperator,
#[cfg(feature = "logical_lines")] #[cfg(feature = "logical_lines")]
rules::pycodestyle::rules::TabAfterKeyword, rules::pycodestyle::rules::logical_lines::TabAfterKeyword,
#[cfg(feature = "logical_lines")] #[cfg(feature = "logical_lines")]
rules::pycodestyle::rules::UnexpectedSpacesAroundKeywordParameterEquals, rules::pycodestyle::rules::logical_lines::UnexpectedSpacesAroundKeywordParameterEquals,
#[cfg(feature = "logical_lines")] #[cfg(feature = "logical_lines")]
rules::pycodestyle::rules::MissingWhitespaceAroundParameterEquals, rules::pycodestyle::rules::logical_lines::MissingWhitespaceAroundParameterEquals,
#[cfg(feature = "logical_lines")] #[cfg(feature = "logical_lines")]
rules::pycodestyle::rules::WhitespaceBeforeParameters, rules::pycodestyle::rules::logical_lines::WhitespaceBeforeParameters,
#[cfg(feature = "logical_lines")] #[cfg(feature = "logical_lines")]
rules::pycodestyle::rules::TabBeforeKeyword, rules::pycodestyle::rules::logical_lines::TabBeforeKeyword,
rules::pycodestyle::rules::MultipleImportsOnOneLine, rules::pycodestyle::rules::MultipleImportsOnOneLine,
rules::pycodestyle::rules::ModuleImportNotAtTopOfFile, rules::pycodestyle::rules::ModuleImportNotAtTopOfFile,
rules::pycodestyle::rules::LineTooLong, rules::pycodestyle::rules::LineTooLong,

View file

@ -1,4 +1,5 @@
use rustpython_parser::ast::{Cmpop, Expr, ExprKind}; use rustpython_parser::ast::{Cmpop, Expr, ExprKind};
#[cfg(feature = "logical_lines")]
use rustpython_parser::Tok; use rustpython_parser::Tok;
use unicode_width::UnicodeWidthStr; use unicode_width::UnicodeWidthStr;
@ -58,6 +59,7 @@ pub fn is_overlong(
true true
} }
#[cfg(feature = "logical_lines")]
pub const fn is_keyword_token(token: &Tok) -> bool { pub const fn is_keyword_token(token: &Tok) -> bool {
matches!( matches!(
token, token,
@ -98,6 +100,7 @@ pub const fn is_keyword_token(token: &Tok) -> bool {
) )
} }
#[cfg(feature = "logical_lines")]
pub const fn is_singleton_token(token: &Tok) -> bool { pub const fn is_singleton_token(token: &Tok) -> bool {
matches!( matches!(
token, token,
@ -105,6 +108,7 @@ pub const fn is_singleton_token(token: &Tok) -> bool {
) )
} }
#[cfg(feature = "logical_lines")]
pub const fn is_op_token(token: &Tok) -> bool { pub const fn is_op_token(token: &Tok) -> bool {
matches!( matches!(
token, token,
@ -157,6 +161,7 @@ pub const fn is_op_token(token: &Tok) -> bool {
) )
} }
#[cfg(feature = "logical_lines")]
pub const fn is_skip_comment_token(token: &Tok) -> bool { pub const fn is_skip_comment_token(token: &Tok) -> bool {
matches!( matches!(
token, token,
@ -164,10 +169,12 @@ pub const fn is_skip_comment_token(token: &Tok) -> bool {
) )
} }
#[cfg(feature = "logical_lines")]
pub const fn is_soft_keyword_token(token: &Tok) -> bool { pub const fn is_soft_keyword_token(token: &Tok) -> bool {
matches!(token, Tok::Match | Tok::Case) matches!(token, Tok::Match | Tok::Case)
} }
#[cfg(feature = "logical_lines")]
pub const fn is_arithmetic_token(token: &Tok) -> bool { pub const fn is_arithmetic_token(token: &Tok) -> bool {
matches!( matches!(
token, token,
@ -175,6 +182,7 @@ pub const fn is_arithmetic_token(token: &Tok) -> bool {
) )
} }
#[cfg(feature = "logical_lines")]
pub const fn is_ws_optional_token(token: &Tok) -> bool { pub const fn is_ws_optional_token(token: &Tok) -> bool {
is_arithmetic_token(token) is_arithmetic_token(token)
|| matches!( || matches!(
@ -188,6 +196,7 @@ pub const fn is_ws_optional_token(token: &Tok) -> bool {
) )
} }
#[cfg(feature = "logical_lines")]
pub const fn is_ws_needed_token(token: &Tok) -> bool { pub const fn is_ws_needed_token(token: &Tok) -> bool {
matches!( matches!(
token, token,
@ -218,6 +227,7 @@ pub const fn is_ws_needed_token(token: &Tok) -> bool {
) )
} }
#[cfg(feature = "logical_lines")]
pub const fn is_unary_token(token: &Tok) -> bool { pub const fn is_unary_token(token: &Tok) -> bool {
matches!( matches!(
token, token,

View file

@ -1,416 +0,0 @@
use bitflags::bitflags;
use rustpython_parser::ast::Location;
use rustpython_parser::lexer::LexResult;
use rustpython_parser::Tok;
use std::borrow::Cow;
use std::fmt::{Debug, Formatter};
use std::iter::FusedIterator;
use unicode_width::UnicodeWidthStr;
use ruff_python_ast::source_code::Locator;
use ruff_python_ast::types::Range;
use crate::rules::pycodestyle::helpers::{is_keyword_token, is_op_token};
bitflags! {
#[derive(Default)]
pub struct TokenFlags: u8 {
/// Whether the logical line contains an operator.
const OPERATOR = 0b0000_0001;
/// Whether the logical line contains a bracket.
const BRACKET = 0b0000_0010;
/// Whether the logical line contains a punctuation mark.
const PUNCTUATION = 0b0000_0100;
/// Whether the logical line contains a keyword.
const KEYWORD = 0b0000_1000;
/// Whether the logical line contains a comment.
const COMMENT = 0b0001_0000;
}
}
#[derive(Clone)]
pub struct LogicalLines<'a> {
text: String,
/// start position, token, end position
tokens: Vec<(Location, &'a Tok, Location)>,
mappings: Mappings,
lines: Vec<Line>,
}
impl<'a> LogicalLines<'a> {
pub fn from_tokens(tokens: &'a [LexResult], locator: &Locator) -> Self {
assert!(u32::try_from(tokens.len()).is_ok());
let single_token = tokens.len() == 1;
let mut builder =
LogicalLinesBuilder::with_capacity(tokens.len(), locator.contents().len());
let mut parens: u32 = 0;
for (start, token, end) in tokens.iter().flatten() {
builder.push_token(*start, token, *end, locator);
match token {
Tok::Lbrace | Tok::Lpar | Tok::Lsqb => {
parens += 1;
}
Tok::Rbrace | Tok::Rpar | Tok::Rsqb => {
parens -= 1;
}
Tok::Newline | Tok::NonLogicalNewline | Tok::Comment(_) if parens == 0 => {
if matches!(token, Tok::Newline) {
builder.finish_line();
}
// Comment only file or non logical new line?
else if single_token {
builder.discard_line();
} else {
builder.finish_line();
};
}
_ => {}
}
}
builder.finish()
}
}
impl std::fmt::Debug for LogicalLines<'_> {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
f.debug_list()
.entries(self.into_iter().map(DebugLogicalLine))
.finish()
}
}
impl<'a> IntoIterator for &'a LogicalLines<'a> {
type Item = LogicalLine<'a>;
type IntoIter = LogicalLinesIter<'a>;
fn into_iter(self) -> Self::IntoIter {
LogicalLinesIter {
lines: self,
inner: self.lines.iter(),
}
}
}
#[derive(Debug, Clone)]
struct Line {
flags: TokenFlags,
/// Byte offset of the start of the text of this line.
text_start: u32,
/// Byte offset of the end of the text of this line.
text_end: u32,
mappings_start: u32,
mappings_end: u32,
tokens_start: u32,
tokens_end: u32,
}
#[derive(Debug)]
pub struct LogicalLine<'a> {
lines: &'a LogicalLines<'a>,
line: &'a Line,
}
impl<'a> LogicalLine<'a> {
/// Returns true if this is a comment only line
pub fn is_comment(&self) -> bool {
self.text().is_empty() && self.flags().contains(TokenFlags::COMMENT)
}
/// Returns the text of this line
pub fn text(&self) -> &'a str {
&self.lines.text[self.line.text_start as usize..self.line.text_end as usize]
}
/// Returns the tokens of the line
pub fn tokens(&self) -> &'a [(Location, &'a Tok, Location)] {
&self.lines.tokens[self.line.tokens_start as usize..self.line.tokens_end as usize]
}
/// Returns the [`Location`] of the first token on the line or [`None`].
pub fn first_token_location(&self) -> Option<&Location> {
self.token_locations().first()
}
fn token_offsets(&self) -> &[u32] {
&self.lines.mappings.logical_line_offsets
[self.line.mappings_start as usize..self.line.mappings_end as usize]
}
fn token_locations(&self) -> &[Location] {
&self.lines.mappings.locations
[self.line.mappings_start as usize..self.line.mappings_end as usize]
}
/// Returns the mapping for an offset in the logical line.
///
/// The offset of the closest token and its corresponding location.
pub fn mapping(&self, offset: usize) -> (usize, Location) {
let index = self
.token_offsets()
.binary_search(&(self.line.text_start + u32::try_from(offset).unwrap()))
.unwrap_or_default();
(
(self.token_offsets()[index] - self.line.text_start) as usize,
self.token_locations()[index],
)
}
pub fn is_empty(&self) -> bool {
self.lines.mappings.is_empty()
}
pub const fn flags(&self) -> TokenFlags {
self.line.flags
}
}
struct DebugLogicalLine<'a>(LogicalLine<'a>);
impl Debug for DebugLogicalLine<'_> {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
f.debug_struct("LogicalLine")
.field("text", &self.0.text())
.field("flags", &self.0.flags())
.field("tokens", &self.0.tokens())
.finish()
}
}
/// Iterator over the logical lines of a document.
pub struct LogicalLinesIter<'a> {
lines: &'a LogicalLines<'a>,
inner: std::slice::Iter<'a, Line>,
}
impl<'a> Iterator for LogicalLinesIter<'a> {
type Item = LogicalLine<'a>;
fn next(&mut self) -> Option<Self::Item> {
let line = self.inner.next()?;
Some(LogicalLine {
lines: self.lines,
line,
})
}
fn size_hint(&self) -> (usize, Option<usize>) {
self.inner.size_hint()
}
}
impl DoubleEndedIterator for LogicalLinesIter<'_> {
fn next_back(&mut self) -> Option<Self::Item> {
let line = self.inner.next_back()?;
Some(LogicalLine {
lines: self.lines,
line,
})
}
}
impl ExactSizeIterator for LogicalLinesIter<'_> {}
impl FusedIterator for LogicalLinesIter<'_> {}
/// Source map that maps byte positions in the logical line text to the [`Location`] in the
/// original document.
#[derive(Debug, Default, Clone)]
struct Mappings {
/// byte offsets of the logical lines at which tokens start/end.
logical_line_offsets: Vec<u32>,
/// Corresponding [`Location`]s for each byte offset mapping it to the position in the original document.
locations: Vec<Location>,
}
impl Mappings {
fn with_capacity(capacity: usize) -> Self {
Self {
logical_line_offsets: Vec::with_capacity(capacity),
locations: Vec::with_capacity(capacity),
}
}
fn len(&self) -> usize {
self.logical_line_offsets.len()
}
fn is_empty(&self) -> bool {
self.logical_line_offsets.is_empty()
}
fn truncate(&mut self, len: usize) {
self.locations.truncate(len);
self.logical_line_offsets.truncate(len);
}
#[allow(clippy::cast_possible_truncation)]
fn push(&mut self, offset: usize, location: Location) {
self.logical_line_offsets.push(offset as u32);
self.locations.push(location);
}
}
#[derive(Debug, Default)]
struct CurrentLine {
flags: TokenFlags,
text_start: u32,
mappings_start: u32,
tokens_start: u32,
previous_token: Option<Location>,
}
#[derive(Debug, Default)]
pub struct LogicalLinesBuilder<'a> {
text: String,
tokens: Vec<(Location, &'a Tok, Location)>,
mappings: Mappings,
lines: Vec<Line>,
current_line: Option<CurrentLine>,
}
impl<'a> LogicalLinesBuilder<'a> {
fn with_capacity(tokens: usize, string: usize) -> Self {
Self {
tokens: Vec::with_capacity(tokens),
mappings: Mappings::with_capacity(tokens + 1),
text: String::with_capacity(string),
..Self::default()
}
}
// SAFETY: `LogicalLines::from_tokens` asserts that the file has less than `u32::MAX` tokens and each tokens is at least one character long
#[allow(clippy::cast_possible_truncation)]
fn push_token(&mut self, start: Location, token: &'a Tok, end: Location, locator: &Locator) {
let tokens_start = self.tokens.len();
self.tokens.push((start, token, end));
let mut line = self.current_line.get_or_insert_with(|| {
let mappings_start = self.mappings.len();
self.mappings.push(self.text.len(), start);
CurrentLine {
flags: TokenFlags::empty(),
text_start: self.text.len() as u32,
mappings_start: mappings_start as u32,
tokens_start: tokens_start as u32,
previous_token: None,
}
});
if matches!(
token,
Tok::Newline | Tok::NonLogicalNewline | Tok::Indent | Tok::Dedent
) {
return;
}
if matches!(token, Tok::Comment(..)) {
line.flags.insert(TokenFlags::COMMENT);
return;
}
if is_op_token(token) {
line.flags.insert(TokenFlags::OPERATOR);
}
if matches!(
token,
Tok::Lpar | Tok::Lsqb | Tok::Lbrace | Tok::Rpar | Tok::Rsqb | Tok::Rbrace
) {
line.flags.insert(TokenFlags::BRACKET);
}
if matches!(token, Tok::Comma | Tok::Semi | Tok::Colon) {
line.flags.insert(TokenFlags::PUNCTUATION);
}
if is_keyword_token(token) {
line.flags.insert(TokenFlags::KEYWORD);
}
// TODO(charlie): "Mute" strings.
let text = if let Tok::String { value, .. } = token {
// Replace the content of strings with a non-whs sequence because some lints
// search for whitespace in the document and whitespace inside of the string
// would complicate the search.
Cow::Owned(format!("\"{}\"", "x".repeat(value.width())))
} else {
Cow::Borrowed(locator.slice(Range {
location: start,
end_location: end,
}))
};
if let Some(prev) = line.previous_token.take() {
if prev.row() != start.row() {
let prev_text = locator.slice(Range {
location: Location::new(prev.row(), prev.column() - 1),
end_location: Location::new(prev.row(), prev.column()),
});
if prev_text == ","
|| ((prev_text != "{" && prev_text != "[" && prev_text != "(")
&& (text != "}" && text != "]" && text != ")"))
{
self.text.push(' ');
}
} else if prev.column() != start.column() {
let prev_text = locator.slice(Range {
location: prev,
end_location: start,
});
self.text.push_str(prev_text);
}
}
line.previous_token = Some(end);
self.text.push_str(&text);
self.mappings.push(self.text.len(), end);
}
// SAFETY: `LogicalLines::from_tokens` asserts that the file has less than `u32::MAX` tokens and each tokens is at least one character long
#[allow(clippy::cast_possible_truncation)]
fn finish_line(&mut self) {
if let Some(current) = self.current_line.take() {
self.lines.push(Line {
flags: current.flags,
text_start: current.text_start,
text_end: self.text.len() as u32,
mappings_start: current.mappings_start,
mappings_end: self.mappings.len() as u32,
tokens_start: current.tokens_start,
tokens_end: self.tokens.len() as u32,
});
}
}
fn discard_line(&mut self) {
if let Some(current) = self.current_line.take() {
self.text.truncate(current.text_start as usize);
self.tokens.truncate(current.tokens_start as usize);
self.mappings.truncate(current.mappings_start as usize);
}
}
fn finish(mut self) -> LogicalLines<'a> {
self.finish_line();
LogicalLines {
text: self.text,
tokens: self.tokens,
mappings: self.mappings,
lines: self.lines,
}
}
}

View file

@ -3,7 +3,6 @@ pub(crate) mod rules;
pub mod settings; pub mod settings;
pub(crate) mod helpers; pub(crate) mod helpers;
pub(crate) mod logical_lines;
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {

View file

@ -1,8 +1,7 @@
#![allow(dead_code, unused_imports, unused_variables)] use rustpython_parser::ast::Location;
use rustpython_parser::Tok;
use once_cell::sync::Lazy;
use regex::Regex;
use super::{LogicalLine, Whitespace};
use ruff_diagnostics::DiagnosticKind; use ruff_diagnostics::DiagnosticKind;
use ruff_diagnostics::Violation; use ruff_diagnostics::Violation;
use ruff_macros::{derive_message_formats, violation}; use ruff_macros::{derive_message_formats, violation};
@ -101,32 +100,48 @@ impl Violation for WhitespaceBeforePunctuation {
} }
} }
// TODO(charlie): Pycodestyle has a negative lookahead on the end.
static EXTRANEOUS_WHITESPACE_REGEX: Lazy<Regex> =
Lazy::new(|| Regex::new(r"[\[({][ \t]|[ \t][]}),;:]").unwrap());
/// E201, E202, E203 /// E201, E202, E203
#[cfg(feature = "logical_lines")] pub(crate) fn extraneous_whitespace(line: &LogicalLine) -> Vec<(Location, DiagnosticKind)> {
pub fn extraneous_whitespace(line: &str) -> Vec<(usize, DiagnosticKind)> {
let mut diagnostics = vec![]; let mut diagnostics = vec![];
for line_match in EXTRANEOUS_WHITESPACE_REGEX.find_iter(line) { let mut last_token: Option<&Tok> = None;
let text = &line[line_match.range()];
let char = text.trim(); for token in line.tokens() {
let found = line_match.start(); let kind = token.kind();
if text.chars().last().unwrap().is_ascii_whitespace() { match kind {
diagnostics.push((found + 1, WhitespaceAfterOpenBracket.into())); Tok::Lbrace | Tok::Lpar | Tok::Lsqb => {
} else if line.chars().nth(found - 1).map_or(false, |c| c != ',') { if !matches!(line.trailing_whitespace(&token), Whitespace::None) {
if char == "}" || char == "]" || char == ")" { let end = token.end();
diagnostics.push((found, WhitespaceBeforeCloseBracket.into())); diagnostics.push((
Location::new(end.row(), end.column()),
WhitespaceAfterOpenBracket.into(),
));
}
}
Tok::Rbrace | Tok::Rpar | Tok::Rsqb | Tok::Comma | Tok::Semi | Tok::Colon => {
let diagnostic_kind = if matches!(kind, Tok::Comma | Tok::Semi | Tok::Colon) {
DiagnosticKind::from(WhitespaceBeforePunctuation)
} else { } else {
diagnostics.push((found, WhitespaceBeforePunctuation.into())); DiagnosticKind::from(WhitespaceBeforeCloseBracket)
};
if let (Whitespace::Single | Whitespace::Many | Whitespace::Tab, offset) =
line.leading_whitespace(&token)
{
let start = token.start();
if !matches!(last_token, Some(Tok::Comma)) {
diagnostics.push((
Location::new(start.row(), start.column() - offset),
diagnostic_kind,
));
} }
} }
} }
_ => {}
}
last_token = Some(kind);
}
diagnostics diagnostics
} }
#[cfg(not(feature = "logical_lines"))]
pub fn extraneous_whitespace(_line: &str) -> Vec<(usize, DiagnosticKind)> {
vec![]
}

View file

@ -1,10 +1,10 @@
#![allow(dead_code, unused_imports, unused_variables)]
use ruff_diagnostics::DiagnosticKind; use ruff_diagnostics::DiagnosticKind;
use ruff_diagnostics::Violation; use ruff_diagnostics::Violation;
use ruff_macros::{derive_message_formats, violation}; use ruff_macros::{derive_message_formats, violation};
use rustpython_parser::ast::Location;
use rustpython_parser::Tok;
use crate::rules::pycodestyle::logical_lines::LogicalLine; use super::LogicalLine;
/// ## What it does /// ## What it does
/// Checks for indentation with a non-multiple of 4 spaces. /// Checks for indentation with a non-multiple of 4 spaces.
@ -230,33 +230,36 @@ impl Violation for OverIndented {
} }
/// E111, E114, E112, E113, E115, E116, E117 /// E111, E114, E112, E113, E115, E116, E117
#[cfg(feature = "logical_lines")] pub(crate) fn indentation(
pub fn indentation(
logical_line: &LogicalLine, logical_line: &LogicalLine,
prev_logical_line: Option<&LogicalLine>, prev_logical_line: Option<&LogicalLine>,
indent_char: char, indent_char: char,
indent_level: usize, indent_level: usize,
prev_indent_level: Option<usize>, prev_indent_level: Option<usize>,
indent_size: usize, indent_size: usize,
) -> Vec<(usize, DiagnosticKind)> { ) -> Vec<(Location, DiagnosticKind)> {
let mut diagnostics = vec![]; let mut diagnostics = vec![];
let location = logical_line.first_token_location().unwrap();
if indent_level % indent_size != 0 { if indent_level % indent_size != 0 {
diagnostics.push(( diagnostics.push((
0, location,
if logical_line.is_comment() { if logical_line.is_comment_only() {
IndentationWithInvalidMultipleComment { indent_size }.into() IndentationWithInvalidMultipleComment { indent_size }.into()
} else { } else {
IndentationWithInvalidMultiple { indent_size }.into() IndentationWithInvalidMultiple { indent_size }.into()
}, },
)); ));
} }
let indent_expect = prev_logical_line.map_or(false, |prev_logical_line| { let indent_expect = prev_logical_line
prev_logical_line.text().ends_with(':') .and_then(|prev_logical_line| prev_logical_line.tokens().trimmed().last())
}); .map_or(false, |t| t.kind() == &Tok::Colon);
if indent_expect && indent_level <= prev_indent_level.unwrap_or(0) { if indent_expect && indent_level <= prev_indent_level.unwrap_or(0) {
diagnostics.push(( diagnostics.push((
0, location,
if logical_line.is_comment() { if logical_line.is_comment_only() {
NoIndentedBlockComment.into() NoIndentedBlockComment.into()
} else { } else {
NoIndentedBlock.into() NoIndentedBlock.into()
@ -266,8 +269,8 @@ pub fn indentation(
&& prev_indent_level.map_or(false, |prev_indent_level| indent_level > prev_indent_level) && prev_indent_level.map_or(false, |prev_indent_level| indent_level > prev_indent_level)
{ {
diagnostics.push(( diagnostics.push((
0, location,
if logical_line.is_comment() { if logical_line.is_comment_only() {
UnexpectedIndentationComment.into() UnexpectedIndentationComment.into()
} else { } else {
UnexpectedIndentation.into() UnexpectedIndentation.into()
@ -278,20 +281,9 @@ pub fn indentation(
let expected_indent_amount = if indent_char == '\t' { 8 } else { 4 }; let expected_indent_amount = if indent_char == '\t' { 8 } else { 4 };
let expected_indent_level = prev_indent_level.unwrap_or(0) + expected_indent_amount; let expected_indent_level = prev_indent_level.unwrap_or(0) + expected_indent_amount;
if indent_level > expected_indent_level { if indent_level > expected_indent_level {
diagnostics.push((0, OverIndented.into())); diagnostics.push((location, OverIndented.into()));
} }
} }
diagnostics diagnostics
} }
#[cfg(not(feature = "logical_lines"))]
pub fn indentation(
_logical_line: &LogicalLine,
_prev_logical_line: Option<&LogicalLine>,
_indent_char: char,
_indent_level: usize,
_prev_indent_level: Option<usize>,
_indent_size: usize,
) -> Vec<(usize, DiagnosticKind)> {
vec![]
}

View file

@ -0,0 +1,85 @@
use itertools::Itertools;
use rustpython_parser::Tok;
use super::LogicalLine;
use ruff_diagnostics::Edit;
use ruff_diagnostics::{AlwaysAutofixableViolation, Diagnostic};
use ruff_macros::{derive_message_formats, violation};
use ruff_python_ast::types::Range;
#[violation]
pub struct MissingWhitespace {
pub token: String,
}
impl AlwaysAutofixableViolation for MissingWhitespace {
#[derive_message_formats]
fn message(&self) -> String {
let MissingWhitespace { token } = self;
format!("Missing whitespace after {token}")
}
fn autofix_title(&self) -> String {
let MissingWhitespace { token } = self;
format!("Added missing whitespace after {token}")
}
}
/// E231
pub(crate) fn missing_whitespace(line: &LogicalLine, autofix: bool) -> Vec<Diagnostic> {
let mut diagnostics = vec![];
let mut num_lsqb = 0u32;
let mut num_rsqb = 0u32;
let mut prev_lsqb = None;
let mut prev_lbrace = None;
for (token, next_token) in line.tokens().iter().tuple_windows() {
let kind = token.kind();
match kind {
Tok::Lsqb => {
num_lsqb += 1;
prev_lsqb = Some(token.start());
}
Tok::Rsqb => {
num_rsqb += 1;
}
Tok::Lbrace => {
prev_lbrace = Some(token.start());
}
Tok::Comma | Tok::Semi | Tok::Colon => {
let after = line.text_after(&token);
if !after.chars().next().map_or(false, char::is_whitespace) {
match (kind, next_token.kind()) {
(Tok::Colon, _) if num_lsqb > num_rsqb && prev_lsqb > prev_lbrace => {
continue; // Slice syntax, no space required
}
(Tok::Comma, Tok::Rpar | Tok::Rsqb) => {
continue; // Allow tuple with only one element: (3,)
}
(Tok::Colon, Tok::Equal) => {
continue; // Allow assignment expression
}
_ => {}
}
let kind = MissingWhitespace {
token: kind.to_string(),
};
let (start, end) = token.range();
let mut diagnostic = Diagnostic::new(kind, Range::new(start, start));
if autofix {
diagnostic.set_fix(Edit::insertion(" ".to_string(), end));
}
diagnostics.push(diagnostic);
}
}
_ => {}
}
}
diagnostics
}

View file

@ -0,0 +1,44 @@
use itertools::Itertools;
use rustpython_parser::ast::Location;
use rustpython_parser::Tok;
use ruff_diagnostics::DiagnosticKind;
use ruff_diagnostics::Violation;
use ruff_macros::{derive_message_formats, violation};
use super::LogicalLineTokens;
use crate::rules::pycodestyle::helpers::{is_keyword_token, is_singleton_token};
#[violation]
pub struct MissingWhitespaceAfterKeyword;
impl Violation for MissingWhitespaceAfterKeyword {
#[derive_message_formats]
fn message(&self) -> String {
format!("Missing whitespace after keyword")
}
}
/// E275
pub(crate) fn missing_whitespace_after_keyword(
tokens: &LogicalLineTokens,
) -> Vec<(Location, DiagnosticKind)> {
let mut diagnostics = vec![];
for (tok0, tok1) in tokens.iter().tuple_windows() {
let tok0_kind = tok0.kind();
let tok1_kind = tok1.kind();
if is_keyword_token(tok0_kind)
&& !(is_singleton_token(tok0_kind)
|| matches!(tok0_kind, Tok::Async | Tok::Await)
|| tok0_kind == &Tok::Except && tok1_kind == &Tok::Star
|| tok0_kind == &Tok::Yield && tok1_kind == &Tok::Rpar
|| matches!(tok1_kind, Tok::Colon | Tok::Newline))
&& tok0.end() == tok1.start()
{
diagnostics.push((tok0.end(), MissingWhitespaceAfterKeyword.into()));
}
}
diagnostics
}

View file

@ -1,5 +1,3 @@
#![allow(dead_code, unused_imports, unused_variables)]
use rustpython_parser::ast::Location; use rustpython_parser::ast::Location;
use rustpython_parser::Tok; use rustpython_parser::Tok;
@ -8,9 +6,10 @@ use ruff_diagnostics::Violation;
use ruff_macros::{derive_message_formats, violation}; use ruff_macros::{derive_message_formats, violation};
use crate::rules::pycodestyle::helpers::{ use crate::rules::pycodestyle::helpers::{
is_arithmetic_token, is_keyword_token, is_op_token, is_singleton_token, is_skip_comment_token, is_arithmetic_token, is_keyword_token, is_op_token, is_skip_comment_token,
is_soft_keyword_token, is_unary_token, is_ws_needed_token, is_ws_optional_token, is_soft_keyword_token, is_unary_token, is_ws_needed_token, is_ws_optional_token,
}; };
use crate::rules::pycodestyle::rules::logical_lines::LogicalLineTokens;
// E225 // E225
#[violation] #[violation]
@ -57,53 +56,53 @@ impl Violation for MissingWhitespaceAroundModuloOperator {
} }
/// E225, E226, E227, E228 /// E225, E226, E227, E228
#[cfg(feature = "logical_lines")]
#[allow(clippy::if_same_then_else)] #[allow(clippy::if_same_then_else)]
pub fn missing_whitespace_around_operator( pub(crate) fn missing_whitespace_around_operator(
tokens: &[(Location, &Tok, Location)], tokens: &LogicalLineTokens,
) -> Vec<(Location, DiagnosticKind)> { ) -> Vec<(Location, DiagnosticKind)> {
let mut diagnostics = vec![]; let mut diagnostics = vec![];
let mut needs_space_main: Option<bool> = Some(false); let mut needs_space_main: Option<bool> = Some(false);
let mut needs_space_aux: Option<bool> = None; let mut needs_space_aux: Option<bool> = None;
let mut prev_end_aux: Option<&Location> = None; let mut prev_end_aux: Option<Location> = None;
let mut parens = 0u32; let mut parens = 0u32;
let mut prev_type: Option<&Tok> = None; let mut prev_type: Option<&Tok> = None;
let mut prev_end: Option<&Location> = None; let mut prev_end: Option<Location> = None;
for (start, token, end) in tokens { for token in tokens {
if is_skip_comment_token(token) { let kind = token.kind();
if is_skip_comment_token(kind) {
continue; continue;
} }
if **token == Tok::Lpar || **token == Tok::Lambda { match kind {
parens += 1; Tok::Lpar | Tok::Lambda => parens += 1,
} else if **token == Tok::Rpar { Tok::Rpar => parens -= 1,
parens -= 1; _ => {}
} };
let needs_space = (needs_space_main.is_some() && needs_space_main.unwrap()) let needs_space = (needs_space_main.is_some() && needs_space_main.unwrap())
|| needs_space_aux.is_some() || needs_space_aux.is_some()
|| prev_end_aux.is_some(); || prev_end_aux.is_some();
if needs_space { if needs_space {
if Some(start) != prev_end { if Some(token.start()) != prev_end {
if !(needs_space_main.is_some() && needs_space_main.unwrap()) if !(needs_space_main.is_some() && needs_space_main.unwrap())
&& (needs_space_aux.is_none() || !needs_space_aux.unwrap()) && (needs_space_aux.is_none() || !needs_space_aux.unwrap())
{ {
diagnostics.push(( diagnostics.push((
*(prev_end_aux.unwrap()), prev_end_aux.unwrap(),
MissingWhitespaceAroundOperator.into(), MissingWhitespaceAroundOperator.into(),
)); ));
} }
needs_space_main = Some(false); needs_space_main = Some(false);
needs_space_aux = None; needs_space_aux = None;
prev_end_aux = None; prev_end_aux = None;
} else if **token == Tok::Greater } else if kind == &Tok::Greater && matches!(prev_type, Some(Tok::Less | Tok::Minus)) {
&& (prev_type == Some(&Tok::Less) || prev_type == Some(&Tok::Minus))
{
// Tolerate the "<>" operator, even if running Python 3 // Tolerate the "<>" operator, even if running Python 3
// Deal with Python 3's annotated return value "->" // Deal with Python 3's annotated return value "->"
} else if prev_type == Some(&Tok::Slash) } else if prev_type == Some(&Tok::Slash)
&& (**token == Tok::Comma || **token == Tok::Rpar || **token == Tok::Colon) && matches!(kind, Tok::Comma | Tok::Rpar | Tok::Colon)
|| (prev_type == Some(&Tok::Rpar) && **token == Tok::Colon) || (prev_type == Some(&Tok::Rpar) && kind == &Tok::Colon)
{ {
// Tolerate the "/" operator in function definition // Tolerate the "/" operator in function definition
// For more info see PEP570 // For more info see PEP570
@ -111,22 +110,21 @@ pub fn missing_whitespace_around_operator(
if (needs_space_main.is_some() && needs_space_main.unwrap()) if (needs_space_main.is_some() && needs_space_main.unwrap())
|| (needs_space_aux.is_some() && needs_space_aux.unwrap()) || (needs_space_aux.is_some() && needs_space_aux.unwrap())
{ {
diagnostics diagnostics.push((prev_end.unwrap(), MissingWhitespaceAroundOperator.into()));
.push((*(prev_end.unwrap()), MissingWhitespaceAroundOperator.into()));
} else if prev_type != Some(&Tok::DoubleStar) { } else if prev_type != Some(&Tok::DoubleStar) {
if prev_type == Some(&Tok::Percent) { if prev_type == Some(&Tok::Percent) {
diagnostics.push(( diagnostics.push((
*(prev_end_aux.unwrap()), prev_end_aux.unwrap(),
MissingWhitespaceAroundModuloOperator.into(), MissingWhitespaceAroundModuloOperator.into(),
)); ));
} else if !is_arithmetic_token(prev_type.unwrap()) { } else if !is_arithmetic_token(prev_type.unwrap()) {
diagnostics.push(( diagnostics.push((
*(prev_end_aux.unwrap()), prev_end_aux.unwrap(),
MissingWhitespaceAroundBitwiseOrShiftOperator.into(), MissingWhitespaceAroundBitwiseOrShiftOperator.into(),
)); ));
} else { } else {
diagnostics.push(( diagnostics.push((
*(prev_end_aux.unwrap()), prev_end_aux.unwrap(),
MissingWhitespaceAroundArithmeticOperator.into(), MissingWhitespaceAroundArithmeticOperator.into(),
)); ));
} }
@ -135,30 +133,28 @@ pub fn missing_whitespace_around_operator(
needs_space_aux = None; needs_space_aux = None;
prev_end_aux = None; prev_end_aux = None;
} }
} else if (is_op_token(token) || matches!(token, Tok::Name { .. })) && prev_end.is_some() { } else if (is_op_token(kind) || matches!(kind, Tok::Name { .. })) && prev_end.is_some() {
if **token == Tok::Equal && parens > 0 { if kind == &Tok::Equal && parens > 0 {
// Allow keyword args or defaults: foo(bar=None). // Allow keyword args or defaults: foo(bar=None).
} else if is_ws_needed_token(token) { } else if is_ws_needed_token(kind) {
needs_space_main = Some(true); needs_space_main = Some(true);
needs_space_aux = None; needs_space_aux = None;
prev_end_aux = None; prev_end_aux = None;
} else if is_unary_token(token) { } else if is_unary_token(kind) {
// Check if the operator is used as a binary operator // Check if the operator is used as a binary operator
// Allow unary operators: -123, -x, +1. // Allow unary operators: -123, -x, +1.
// Allow argument unpacking: foo(*args, **kwargs) // Allow argument unpacking: foo(*args, **kwargs)
if (prev_type.is_some() if let Some(prev_type) = prev_type {
&& is_op_token(prev_type.unwrap()) if (matches!(prev_type, Tok::Rpar | Tok::Rsqb | Tok::Rbrace))
&& (prev_type == Some(&Tok::Rpar) || (!is_op_token(prev_type) && !is_keyword_token(prev_type))
|| prev_type == Some(&Tok::Rsqb) && (!is_soft_keyword_token(prev_type))
|| prev_type == Some(&Tok::Rbrace)))
|| (!is_op_token(prev_type.unwrap()) && !is_keyword_token(prev_type.unwrap()))
&& (!is_soft_keyword_token(prev_type.unwrap()))
{ {
needs_space_main = None; needs_space_main = None;
needs_space_aux = None; needs_space_aux = None;
prev_end_aux = None; prev_end_aux = None;
} }
} else if is_ws_optional_token(token) { }
} else if is_ws_optional_token(kind) {
needs_space_main = None; needs_space_main = None;
needs_space_aux = None; needs_space_aux = None;
prev_end_aux = None; prev_end_aux = None;
@ -169,28 +165,21 @@ pub fn missing_whitespace_around_operator(
// trailing space matches opening space // trailing space matches opening space
needs_space_main = None; needs_space_main = None;
prev_end_aux = prev_end; prev_end_aux = prev_end;
needs_space_aux = Some(Some(start) != prev_end_aux); needs_space_aux = Some(Some(token.start()) != prev_end_aux);
} else if needs_space_main.is_some() } else if needs_space_main.is_some()
&& needs_space_main.unwrap() && needs_space_main.unwrap()
&& Some(start) == prev_end_aux && Some(token.start()) == prev_end_aux
{ {
// A needed opening space was not found // A needed opening space was not found
diagnostics.push((*(prev_end.unwrap()), MissingWhitespaceAroundOperator.into())); diagnostics.push((prev_end.unwrap(), MissingWhitespaceAroundOperator.into()));
needs_space_main = Some(false); needs_space_main = Some(false);
needs_space_aux = None; needs_space_aux = None;
prev_end_aux = None; prev_end_aux = None;
} }
} }
prev_type = Some(*token); prev_type = Some(kind);
prev_end = Some(end); prev_end = Some(token.end());
} }
diagnostics diagnostics
} }
#[cfg(not(feature = "logical_lines"))]
pub fn missing_whitespace_around_operator(
_tokens: &[(Location, &Tok, Location)],
) -> Vec<(Location, DiagnosticKind)> {
vec![]
}

View file

@ -0,0 +1,653 @@
use bitflags::bitflags;
use rustpython_parser::ast::Location;
use rustpython_parser::lexer::LexResult;
use rustpython_parser::Tok;
use std::fmt::{Debug, Formatter};
use std::iter::FusedIterator;
use std::ops::Deref;
use ruff_python_ast::source_code::Locator;
use ruff_python_ast::types::Range;
use crate::rules::pycodestyle::helpers::{is_keyword_token, is_op_token};
pub(crate) use extraneous_whitespace::{
extraneous_whitespace, WhitespaceAfterOpenBracket, WhitespaceBeforeCloseBracket,
WhitespaceBeforePunctuation,
};
pub(crate) use indentation::{
indentation, IndentationWithInvalidMultiple, IndentationWithInvalidMultipleComment,
NoIndentedBlock, NoIndentedBlockComment, OverIndented, UnexpectedIndentation,
UnexpectedIndentationComment,
};
pub(crate) use missing_whitespace::{missing_whitespace, MissingWhitespace};
pub(crate) use missing_whitespace_after_keyword::{
missing_whitespace_after_keyword, MissingWhitespaceAfterKeyword,
};
pub(crate) use missing_whitespace_around_operator::{
missing_whitespace_around_operator, MissingWhitespaceAroundArithmeticOperator,
MissingWhitespaceAroundBitwiseOrShiftOperator, MissingWhitespaceAroundModuloOperator,
MissingWhitespaceAroundOperator,
};
pub(crate) use space_around_operator::{
space_around_operator, MultipleSpacesAfterOperator, MultipleSpacesBeforeOperator,
TabAfterOperator, TabBeforeOperator,
};
pub(crate) use whitespace_around_keywords::{
whitespace_around_keywords, MultipleSpacesAfterKeyword, MultipleSpacesBeforeKeyword,
TabAfterKeyword, TabBeforeKeyword,
};
pub(crate) use whitespace_around_named_parameter_equals::{
whitespace_around_named_parameter_equals, MissingWhitespaceAroundParameterEquals,
UnexpectedSpacesAroundKeywordParameterEquals,
};
pub(crate) use whitespace_before_comment::{
whitespace_before_comment, MultipleLeadingHashesForBlockComment, NoSpaceAfterBlockComment,
NoSpaceAfterInlineComment, TooFewSpacesBeforeInlineComment,
};
pub(crate) use whitespace_before_parameters::{
whitespace_before_parameters, WhitespaceBeforeParameters,
};
mod extraneous_whitespace;
mod indentation;
mod missing_whitespace;
mod missing_whitespace_after_keyword;
mod missing_whitespace_around_operator;
mod space_around_operator;
mod whitespace_around_keywords;
mod whitespace_around_named_parameter_equals;
mod whitespace_before_comment;
mod whitespace_before_parameters;
bitflags! {
#[derive(Default)]
pub(crate) struct TokenFlags: u8 {
/// Whether the logical line contains an operator.
const OPERATOR = 0b0000_0001;
/// Whether the logical line contains a bracket.
const BRACKET = 0b0000_0010;
/// Whether the logical line contains a punctuation mark.
const PUNCTUATION = 0b0000_0100;
/// Whether the logical line contains a keyword.
const KEYWORD = 0b0000_1000;
/// Whether the logical line contains a comment.
const COMMENT = 0b0001_0000;
}
}
#[derive(Clone)]
pub(crate) struct LogicalLines<'a> {
tokens: Tokens<'a>,
lines: Vec<Line>,
locator: &'a Locator<'a>,
}
impl<'a> LogicalLines<'a> {
pub fn from_tokens(tokens: &'a [LexResult], locator: &'a Locator<'a>) -> Self {
assert!(u32::try_from(tokens.len()).is_ok());
let mut builder = LogicalLinesBuilder::with_capacity(tokens.len());
let mut parens: u32 = 0;
for (start, token, end) in tokens.iter().flatten() {
builder.push_token(*start, token, *end);
match token {
Tok::Lbrace | Tok::Lpar | Tok::Lsqb => {
parens += 1;
}
Tok::Rbrace | Tok::Rpar | Tok::Rsqb => {
parens -= 1;
}
Tok::Newline | Tok::NonLogicalNewline | Tok::Comment { .. } if parens == 0 => {
builder.finish_line();
}
_ => {}
}
}
builder.finish(locator)
}
}
impl Debug for LogicalLines<'_> {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
f.debug_list()
.entries(self.into_iter().map(DebugLogicalLine))
.finish()
}
}
impl<'a> IntoIterator for &'a LogicalLines<'a> {
type Item = LogicalLine<'a>;
type IntoIter = LogicalLinesIter<'a>;
fn into_iter(self) -> Self::IntoIter {
LogicalLinesIter {
lines: self,
inner: self.lines.iter(),
}
}
}
/// A logical line spawns multiple lines in the source document if the line
/// ends with a parenthesized expression (`(..)`, `[..]`, `{..}`) that contains
/// line breaks.
///
/// ## Examples
/// This expression forms one logical line because because the array elements are parenthesized.
///
/// ```python
/// a = [
/// 1,
/// 2
/// ]
/// ```
#[derive(Debug)]
pub(crate) struct LogicalLine<'a> {
lines: &'a LogicalLines<'a>,
line: &'a Line,
}
impl<'a> LogicalLine<'a> {
/// Returns `true` if this is a comment only line
pub fn is_comment_only(&self) -> bool {
self.flags() == TokenFlags::COMMENT && self.tokens().trimmed().is_empty()
}
/// Returns logical line's text including comments, indents, dedent and trailing new lines.
pub fn text(&self) -> &'a str {
self.tokens().text()
}
/// Returns the text without any leading or trailing newline, comment, indent, or dedent of this line
#[cfg(test)]
pub fn text_trimmed(&self) -> &'a str {
self.tokens_trimmed().text()
}
#[cfg(test)]
pub fn tokens_trimmed(&self) -> LogicalLineTokens<'a> {
self.tokens().trimmed()
}
/// Returns the text after `token`
pub fn text_after(&self, token: &LogicalLineToken<'a>) -> &str {
debug_assert!(
(self.line.tokens_start..self.line.tokens_end).contains(&token.position),
"Token does not belong to this line"
);
// SAFETY: The line must have at least one token or `token` would not belong to this line.
let last_token = self.tokens().last().unwrap();
self.lines
.locator
.slice(Range::new(token.end(), last_token.end()))
}
/// Returns the text before `token`
pub fn text_before(&self, token: &LogicalLineToken<'a>) -> &str {
debug_assert!(
(self.line.tokens_start..self.line.tokens_end).contains(&token.position),
"Token does not belong to this line"
);
// SAFETY: The line must have at least one token or `token` would not belong to this line.
let first_token = self.tokens().first().unwrap();
self.lines
.locator
.slice(Range::new(first_token.start(), token.start()))
}
/// Returns the whitespace *after* the `token`
pub fn trailing_whitespace(&self, token: &LogicalLineToken<'a>) -> Whitespace {
Whitespace::leading(self.text_after(token))
}
/// Returns the whitespace and whitespace character-length *before* the `token`
pub fn leading_whitespace(&self, token: &LogicalLineToken<'a>) -> (Whitespace, usize) {
Whitespace::trailing(self.text_before(token))
}
/// Returns all tokens of the line, including comments and trailing new lines.
pub fn tokens(&self) -> LogicalLineTokens<'a> {
LogicalLineTokens {
lines: self.lines,
front: self.line.tokens_start,
back: self.line.tokens_end,
}
}
/// Returns the [`Location`] of the first token on the line or [`None`].
pub fn first_token_location(&self) -> Option<Location> {
self.tokens().first().map(|t| t.start())
}
/// Returns the line's flags
pub const fn flags(&self) -> TokenFlags {
self.line.flags
}
}
/// Helper struct to pretty print [`LogicalLine`] with `dbg`
struct DebugLogicalLine<'a>(LogicalLine<'a>);
impl Debug for DebugLogicalLine<'_> {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
f.debug_struct("LogicalLine")
.field("text", &self.0.text())
.field("flags", &self.0.flags())
.field("tokens", &self.0.tokens())
.finish()
}
}
/// Iterator over the logical lines of a document.
pub(crate) struct LogicalLinesIter<'a> {
lines: &'a LogicalLines<'a>,
inner: std::slice::Iter<'a, Line>,
}
impl<'a> Iterator for LogicalLinesIter<'a> {
type Item = LogicalLine<'a>;
fn next(&mut self) -> Option<Self::Item> {
let line = self.inner.next()?;
Some(LogicalLine {
lines: self.lines,
line,
})
}
fn size_hint(&self) -> (usize, Option<usize>) {
self.inner.size_hint()
}
}
impl DoubleEndedIterator for LogicalLinesIter<'_> {
fn next_back(&mut self) -> Option<Self::Item> {
let line = self.inner.next_back()?;
Some(LogicalLine {
lines: self.lines,
line,
})
}
}
impl ExactSizeIterator for LogicalLinesIter<'_> {}
impl FusedIterator for LogicalLinesIter<'_> {}
/// The tokens of a logical line
pub(crate) struct LogicalLineTokens<'a> {
lines: &'a LogicalLines<'a>,
front: u32,
back: u32,
}
impl<'a> LogicalLineTokens<'a> {
pub fn iter(&self) -> LogicalLineTokensIter<'a> {
LogicalLineTokensIter {
tokens: &self.lines.tokens,
front: self.front,
back: self.back,
}
}
pub fn len(&self) -> usize {
(self.back - self.front) as usize
}
pub fn is_empty(&self) -> bool {
self.len() == 0
}
pub fn trimmed(&self) -> LogicalLineTokens<'a> {
let tokens = &self.lines.tokens[self.front as usize..self.back as usize];
let mut front = self.front;
let mut back = self.back;
let mut iter = tokens.iter();
for (_, kind, _) in iter.by_ref() {
if !matches!(
kind,
Tok::Newline
| Tok::NonLogicalNewline
| Tok::Indent
| Tok::Dedent
| Tok::Comment(..)
) {
break;
}
front += 1;
}
for (_, kind, _) in iter.rev() {
if !matches!(
kind,
Tok::Newline
| Tok::NonLogicalNewline
| Tok::Indent
| Tok::Dedent
| Tok::Comment(..)
) {
break;
}
back -= 1;
}
LogicalLineTokens {
lines: self.lines,
front,
back,
}
}
pub fn text(&self) -> &'a str {
match (self.first(), self.last()) {
(Some(first), Some(last)) => {
let locator = self.lines.locator;
locator.slice(Range::new(first.start(), last.end()))
}
_ => "",
}
}
/// Returns the first token
pub fn first(&self) -> Option<LogicalLineToken<'a>> {
self.iter().next()
}
/// Returns the last token
pub fn last(&self) -> Option<LogicalLineToken<'a>> {
self.iter().next_back()
}
}
impl<'a> IntoIterator for LogicalLineTokens<'a> {
type Item = LogicalLineToken<'a>;
type IntoIter = LogicalLineTokensIter<'a>;
fn into_iter(self) -> Self::IntoIter {
self.iter()
}
}
impl<'a> IntoIterator for &LogicalLineTokens<'a> {
type Item = LogicalLineToken<'a>;
type IntoIter = LogicalLineTokensIter<'a>;
fn into_iter(self) -> Self::IntoIter {
self.iter()
}
}
impl Debug for LogicalLineTokens<'_> {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
f.debug_list().entries(self.iter()).finish()
}
}
/// Iterator over the tokens of a [`LogicalLine`]
pub(crate) struct LogicalLineTokensIter<'a> {
tokens: &'a Tokens<'a>,
front: u32,
back: u32,
}
impl<'a> Iterator for LogicalLineTokensIter<'a> {
type Item = LogicalLineToken<'a>;
fn next(&mut self) -> Option<Self::Item> {
if self.front < self.back {
let result = Some(LogicalLineToken {
tokens: self.tokens,
position: self.front,
});
self.front += 1;
result
} else {
None
}
}
fn size_hint(&self) -> (usize, Option<usize>) {
let len = (self.back - self.front) as usize;
(len, Some(len))
}
}
impl ExactSizeIterator for LogicalLineTokensIter<'_> {}
impl FusedIterator for LogicalLineTokensIter<'_> {}
impl DoubleEndedIterator for LogicalLineTokensIter<'_> {
fn next_back(&mut self) -> Option<Self::Item> {
if self.front < self.back {
self.back -= 1;
Some(LogicalLineToken {
position: self.back,
tokens: self.tokens,
})
} else {
None
}
}
}
/// A token of a [`LogicalLine`]
#[derive(Clone)]
pub(crate) struct LogicalLineToken<'a> {
tokens: &'a Tokens<'a>,
position: u32,
}
impl<'a> LogicalLineToken<'a> {
/// Returns the token's kind
pub fn kind(&self) -> &'a Tok {
#[allow(unsafe_code)]
let (_, token, _) = unsafe { *self.tokens.get_unchecked(self.position as usize) };
token
}
/// Returns the token's start location
pub fn start(&self) -> Location {
self.range().0
}
/// Returns the token's end location
pub fn end(&self) -> Location {
self.range().1
}
/// Returns a tuple with the token's `(start, end)` locations
pub fn range(&self) -> (Location, Location) {
#[allow(unsafe_code)]
let &(start, _, end) = unsafe { self.tokens.get_unchecked(self.position as usize) };
(start, end)
}
}
impl Debug for LogicalLineToken<'_> {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
f.debug_struct("LogicalLineToken")
.field("kind", &self.kind())
.field("range", &self.range())
.finish()
}
}
#[derive(Copy, Clone, Eq, PartialEq)]
pub(crate) enum Whitespace {
None,
Single,
Many,
Tab,
}
impl Whitespace {
fn leading(content: &str) -> Self {
let mut count = 0u32;
for c in content.chars() {
if c == '\t' {
return Self::Tab;
} else if matches!(c, '\n' | '\r') {
break;
} else if c.is_whitespace() {
count += 1;
} else {
break;
}
}
match count {
0 => Whitespace::None,
1 => Whitespace::Single,
_ => Whitespace::Many,
}
}
fn trailing(content: &str) -> (Self, usize) {
let mut count = 0;
for c in content.chars().rev() {
if c == '\t' {
return (Self::Tab, count + 1);
} else if matches!(c, '\n' | '\r') {
// Indent
return (Self::None, 0);
} else if c.is_whitespace() {
count += 1;
} else {
break;
}
}
match count {
0 => (Self::None, 0),
1 => (Self::Single, count),
_ => (Self::Many, count),
}
}
}
#[derive(Debug, Default)]
struct CurrentLine {
flags: TokenFlags,
tokens_start: u32,
}
/// Builder for [`LogicalLines`]
#[derive(Debug, Default)]
struct LogicalLinesBuilder<'a> {
tokens: Tokens<'a>,
lines: Vec<Line>,
current_line: Option<CurrentLine>,
}
impl<'a> LogicalLinesBuilder<'a> {
fn with_capacity(tokens: usize) -> Self {
Self {
tokens: Tokens::with_capacity(tokens),
..Self::default()
}
}
// SAFETY: `LogicalLines::from_tokens` asserts that the file has less than `u32::MAX` tokens and each tokens is at least one character long
#[allow(clippy::cast_possible_truncation)]
fn push_token(&mut self, start: Location, token: &'a Tok, end: Location) {
let tokens_start = self.tokens.len();
let line = self.current_line.get_or_insert_with(|| CurrentLine {
flags: TokenFlags::empty(),
tokens_start: tokens_start as u32,
});
if matches!(token, Tok::Comment { .. }) {
line.flags.insert(TokenFlags::COMMENT);
} else if is_op_token(token) {
line.flags.insert(TokenFlags::OPERATOR);
line.flags.set(
TokenFlags::BRACKET,
matches!(
token,
Tok::Lpar | Tok::Lsqb | Tok::Lbrace | Tok::Rpar | Tok::Rsqb | Tok::Rbrace
),
);
}
if matches!(token, Tok::Comma | Tok::Semi | Tok::Colon) {
line.flags.insert(TokenFlags::PUNCTUATION);
} else if is_keyword_token(token) {
line.flags.insert(TokenFlags::KEYWORD);
}
self.tokens.push(token, start, end);
}
// SAFETY: `LogicalLines::from_tokens` asserts that the file has less than `u32::MAX` tokens and each tokens is at least one character long
#[allow(clippy::cast_possible_truncation)]
fn finish_line(&mut self) {
if let Some(current) = self.current_line.take() {
self.lines.push(Line {
flags: current.flags,
tokens_start: current.tokens_start,
tokens_end: self.tokens.len() as u32,
});
}
}
fn finish(mut self, locator: &'a Locator<'a>) -> LogicalLines<'a> {
self.finish_line();
LogicalLines {
tokens: self.tokens,
lines: self.lines,
locator,
}
}
}
#[derive(Debug, Clone)]
struct Line {
flags: TokenFlags,
tokens_start: u32,
tokens_end: u32,
}
#[derive(Debug, Clone, Default)]
struct Tokens<'a>(Vec<(Location, &'a Tok, Location)>);
impl<'a> Tokens<'a> {
/// Creates new tokens with a reserved size of `capacity`
fn with_capacity(capacity: usize) -> Self {
Self(Vec::with_capacity(capacity))
}
/// Returns the number of stored tokens.
fn len(&self) -> usize {
self.0.len()
}
/// Adds a new token with the given `kind` and `start`, `end` location.
fn push(&mut self, kind: &'a Tok, start: Location, end: Location) {
self.0.push((start, kind, end));
}
}
impl<'a> Deref for Tokens<'a> {
type Target = [(Location, &'a Tok, Location)];
fn deref(&self) -> &Self::Target {
&self.0
}
}

View file

@ -1,16 +1,10 @@
#![allow(dead_code, unused_imports, unused_variables)]
use once_cell::sync::Lazy;
use regex::Regex;
use rustpython_parser::ast::Location; use rustpython_parser::ast::Location;
use rustpython_parser::Tok; use rustpython_parser::Tok;
use crate::rules::pycodestyle::helpers::is_op_token; use super::{LogicalLine, Whitespace};
use crate::rules::pycodestyle::rules::Whitespace;
use ruff_diagnostics::DiagnosticKind; use ruff_diagnostics::DiagnosticKind;
use ruff_diagnostics::Violation; use ruff_diagnostics::Violation;
use ruff_macros::{derive_message_formats, violation}; use ruff_macros::{derive_message_formats, violation};
use ruff_python_ast::source_code::Locator;
/// ## What it does /// ## What it does
/// Checks for extraneous tabs before an operator. /// Checks for extraneous tabs before an operator.
@ -128,46 +122,77 @@ impl Violation for MultipleSpacesAfterOperator {
} }
} }
static OPERATOR_REGEX: Lazy<Regex> = Lazy::new(|| Regex::new(r"[-+*/|!<=>%&^]+|:=").unwrap());
/// E221, E222, E223, E224 /// E221, E222, E223, E224
#[cfg(feature = "logical_lines")] pub(crate) fn space_around_operator(line: &LogicalLine) -> Vec<(Location, DiagnosticKind)> {
pub fn space_around_operator(line: &str) -> Vec<(usize, DiagnosticKind)> {
let mut diagnostics = vec![]; let mut diagnostics = vec![];
let mut last_end = None; let mut after_operator = false;
for line_match in OPERATOR_REGEX.find_iter(line) { for token in line.tokens() {
if last_end != Some(line_match.start()) { let is_operator = is_operator_token(token.kind());
let before = &line[..line_match.start()];
match Whitespace::trailing(before) { if is_operator {
(Whitespace::Tab, offset) => { let (start, end) = token.range();
diagnostics.push((line_match.start() - offset, TabBeforeOperator.into()));
} if !after_operator {
match line.leading_whitespace(&token) {
(Whitespace::Tab, offset) => diagnostics.push((
Location::new(start.row(), start.column() - offset),
TabBeforeOperator.into(),
)),
(Whitespace::Many, offset) => diagnostics.push(( (Whitespace::Many, offset) => diagnostics.push((
line_match.start() - offset, Location::new(start.row(), start.column() - offset),
MultipleSpacesBeforeOperator.into(), MultipleSpacesBeforeOperator.into(),
)), )),
_ => {} _ => {}
} }
} }
let after = &line[line_match.end()..]; match line.trailing_whitespace(&token) {
let (leading_offset, leading_kind) = Whitespace::leading(after); Whitespace::Tab => diagnostics.push((end, TabAfterOperator.into())),
match leading_kind { Whitespace::Many => diagnostics.push((end, MultipleSpacesAfterOperator.into())),
Whitespace::Tab => diagnostics.push((line_match.end(), TabAfterOperator.into())),
Whitespace::Many => {
diagnostics.push((line_match.end(), MultipleSpacesAfterOperator.into()));
}
_ => {} _ => {}
} }
last_end = Some(line_match.end() + leading_offset);
} }
after_operator = is_operator;
}
diagnostics diagnostics
} }
#[cfg(not(feature = "logical_lines"))] const fn is_operator_token(token: &Tok) -> bool {
pub fn space_around_operator(_line: &str) -> Vec<(usize, DiagnosticKind)> { matches!(
vec![] token,
Tok::Plus
| Tok::Minus
| Tok::Star
| Tok::Slash
| Tok::Vbar
| Tok::Amper
| Tok::Less
| Tok::Greater
| Tok::Equal
| Tok::Percent
| Tok::NotEqual
| Tok::LessEqual
| Tok::GreaterEqual
| Tok::CircumFlex
| Tok::LeftShift
| Tok::RightShift
| Tok::DoubleStar
| Tok::PlusEqual
| Tok::MinusEqual
| Tok::StarEqual
| Tok::SlashEqual
| Tok::PercentEqual
| Tok::AmperEqual
| Tok::VbarEqual
| Tok::CircumflexEqual
| Tok::LeftShiftEqual
| Tok::RightShiftEqual
| Tok::DoubleStarEqual
| Tok::DoubleSlash
| Tok::DoubleSlashEqual
| Tok::ColonEqual
)
} }

View file

@ -1,9 +1,7 @@
#![allow(dead_code, unused_imports, unused_variables)] use rustpython_parser::ast::Location;
use once_cell::sync::Lazy; use super::{LogicalLine, Whitespace};
use regex::Regex; use crate::rules::pycodestyle::helpers::is_keyword_token;
use crate::rules::pycodestyle::rules::Whitespace;
use ruff_diagnostics::DiagnosticKind; use ruff_diagnostics::DiagnosticKind;
use ruff_diagnostics::Violation; use ruff_diagnostics::Violation;
use ruff_macros::{derive_message_formats, violation}; use ruff_macros::{derive_message_formats, violation};
@ -111,47 +109,40 @@ impl Violation for TabBeforeKeyword {
} }
} }
static KEYWORD_REGEX: Lazy<Regex> = Lazy::new(|| {
Regex::new(r"\b(False|None|True|and|as|assert|async|await|break|class|continue|def|del|elif|else|except|finally|for|from|global|if|import|in|is|lambda|nonlocal|not|or|pass|raise|return|try|while|with|yield)\b").unwrap()
});
/// E271, E272, E273, E274 /// E271, E272, E273, E274
#[cfg(feature = "logical_lines")] pub(crate) fn whitespace_around_keywords(line: &LogicalLine) -> Vec<(Location, DiagnosticKind)> {
pub fn whitespace_around_keywords(line: &str) -> Vec<(usize, DiagnosticKind)> {
let mut diagnostics = vec![]; let mut diagnostics = vec![];
let mut last_end = None; let mut after_keyword = false;
for line_match in KEYWORD_REGEX.find_iter(line) { for token in line.tokens() {
if last_end != Some(line_match.start()) { let is_keyword = is_keyword_token(token.kind());
let before = &line[..line_match.start()];
match Whitespace::trailing(before) { if is_keyword {
(Whitespace::Tab, offset) => { let (start, end) = token.range();
diagnostics.push((line_match.start() - offset, TabBeforeKeyword.into()));
} if !after_keyword {
match line.leading_whitespace(&token) {
(Whitespace::Tab, offset) => diagnostics.push((
Location::new(start.row(), start.column() - offset),
TabBeforeKeyword.into(),
)),
(Whitespace::Many, offset) => diagnostics.push(( (Whitespace::Many, offset) => diagnostics.push((
line_match.start() - offset, Location::new(start.row(), start.column() - offset),
MultipleSpacesBeforeKeyword.into(), MultipleSpacesBeforeKeyword.into(),
)), )),
_ => {} _ => {}
} }
} }
let after = &line[line_match.end()..]; match line.trailing_whitespace(&token) {
let (leading_offset, leading_kind) = Whitespace::leading(after); Whitespace::Tab => diagnostics.push((end, TabAfterKeyword.into())),
match leading_kind { Whitespace::Many => diagnostics.push((end, MultipleSpacesAfterKeyword.into())),
Whitespace::Tab => diagnostics.push((line_match.end(), TabAfterKeyword.into())),
Whitespace::Many => {
diagnostics.push((line_match.end(), MultipleSpacesAfterKeyword.into()));
}
_ => {} _ => {}
} }
last_end = Some(line_match.end() + leading_offset);
} }
after_keyword = is_keyword;
}
diagnostics diagnostics
} }
#[cfg(not(feature = "logical_lines"))]
pub fn whitespace_around_keywords(_line: &str) -> Vec<(usize, DiagnosticKind)> {
vec![]
}

View file

@ -0,0 +1,121 @@
use rustpython_parser::ast::Location;
use rustpython_parser::Tok;
use ruff_diagnostics::DiagnosticKind;
use ruff_diagnostics::Violation;
use ruff_macros::{derive_message_formats, violation};
use super::LogicalLineTokens;
use crate::rules::pycodestyle::helpers::is_op_token;
#[violation]
pub struct UnexpectedSpacesAroundKeywordParameterEquals;
impl Violation for UnexpectedSpacesAroundKeywordParameterEquals {
#[derive_message_formats]
fn message(&self) -> String {
format!("Unexpected spaces around keyword / parameter equals")
}
}
#[violation]
pub struct MissingWhitespaceAroundParameterEquals;
impl Violation for MissingWhitespaceAroundParameterEquals {
#[derive_message_formats]
fn message(&self) -> String {
format!("Missing whitespace around parameter equals")
}
}
fn is_in_def(tokens: &LogicalLineTokens) -> bool {
for token in tokens {
match token.kind() {
Tok::Async | Tok::Indent | Tok::Dedent => continue,
Tok::Def => return true,
_ => return false,
}
}
false
}
/// E251, E252
pub(crate) fn whitespace_around_named_parameter_equals(
tokens: &LogicalLineTokens,
) -> Vec<(Location, DiagnosticKind)> {
let mut diagnostics = vec![];
let mut parens = 0;
let mut require_space = false;
let mut no_space = false;
let mut annotated_func_arg = false;
let mut prev_end: Option<Location> = None;
let in_def = is_in_def(tokens);
for token in tokens {
let kind = token.kind();
if kind == &Tok::NonLogicalNewline {
continue;
}
if no_space {
no_space = false;
if Some(token.start()) != prev_end {
diagnostics.push((
prev_end.unwrap(),
UnexpectedSpacesAroundKeywordParameterEquals.into(),
));
}
}
if require_space {
require_space = false;
let start = token.start();
if Some(start) == prev_end {
diagnostics.push((start, MissingWhitespaceAroundParameterEquals.into()));
}
}
if is_op_token(kind) {
match kind {
Tok::Lpar | Tok::Lsqb => {
parens += 1;
}
Tok::Rpar | Tok::Rsqb => {
parens -= 1;
}
Tok::Colon if parens == 1 && in_def => {
annotated_func_arg = true;
}
Tok::Comma if parens == 1 => {
annotated_func_arg = false;
}
Tok::Equal if parens > 0 => {
if annotated_func_arg && parens == 1 {
require_space = true;
let start = token.start();
if Some(start) == prev_end {
diagnostics
.push((start, MissingWhitespaceAroundParameterEquals.into()));
}
} else {
no_space = true;
if Some(token.start()) != prev_end {
diagnostics.push((
prev_end.unwrap(),
UnexpectedSpacesAroundKeywordParameterEquals.into(),
));
}
}
}
_ => {}
}
if parens < 1 {
annotated_func_arg = false;
}
}
prev_end = Some(token.end());
}
diagnostics
}

View file

@ -1,8 +1,7 @@
#![allow(dead_code, unused_imports, unused_variables)]
use rustpython_parser::ast::Location; use rustpython_parser::ast::Location;
use rustpython_parser::Tok; use rustpython_parser::Tok;
use super::LogicalLineTokens;
use ruff_diagnostics::DiagnosticKind; use ruff_diagnostics::DiagnosticKind;
use ruff_diagnostics::Violation; use ruff_diagnostics::Violation;
use ruff_macros::{derive_message_formats, violation}; use ruff_macros::{derive_message_formats, violation};
@ -139,25 +138,29 @@ impl Violation for MultipleLeadingHashesForBlockComment {
} }
/// E261, E262, E265, E266 /// E261, E262, E265, E266
#[cfg(feature = "logical_lines")] pub(crate) fn whitespace_before_comment(
pub fn whitespace_before_comment( tokens: &LogicalLineTokens,
tokens: &[(Location, &Tok, Location)],
locator: &Locator, locator: &Locator,
) -> Vec<(Range, DiagnosticKind)> { ) -> Vec<(Range, DiagnosticKind)> {
let mut diagnostics = vec![]; let mut diagnostics = vec![];
let mut prev_end = Location::new(0, 0); let mut prev_end = Location::new(0, 0);
for (start, tok, end) in tokens { for token in tokens {
if let Tok::Comment(text) = tok { let kind = token.kind();
if let Tok::Comment { .. } = kind {
let (start, end) = token.range();
let line = locator.slice(Range::new( let line = locator.slice(Range::new(
Location::new(start.row(), 0), Location::new(start.row(), 0),
Location::new(start.row(), start.column()), Location::new(start.row(), start.column()),
)); ));
let text = locator.slice(Range::new(start, end));
let is_inline_comment = !line.trim().is_empty(); let is_inline_comment = !line.trim().is_empty();
if is_inline_comment { if is_inline_comment {
if prev_end.row() == start.row() && start.column() < prev_end.column() + 2 { if prev_end.row() == start.row() && start.column() < prev_end.column() + 2 {
diagnostics.push(( diagnostics.push((
Range::new(prev_end, *start), Range::new(prev_end, start),
TooFewSpacesBeforeInlineComment.into(), TooFewSpacesBeforeInlineComment.into(),
)); ));
} }
@ -177,32 +180,23 @@ pub fn whitespace_before_comment(
if is_inline_comment { if is_inline_comment {
if bad_prefix.is_some() || comment.chars().next().map_or(false, char::is_whitespace) if bad_prefix.is_some() || comment.chars().next().map_or(false, char::is_whitespace)
{ {
diagnostics.push((Range::new(*start, *end), NoSpaceAfterInlineComment.into())); diagnostics.push((Range::new(start, end), NoSpaceAfterInlineComment.into()));
} }
} else if let Some(bad_prefix) = bad_prefix { } else if let Some(bad_prefix) = bad_prefix {
if bad_prefix != '!' || start.row() > 1 { if bad_prefix != '!' || start.row() > 1 {
if bad_prefix != '#' { if bad_prefix != '#' {
diagnostics diagnostics.push((Range::new(start, end), NoSpaceAfterBlockComment.into()));
.push((Range::new(*start, *end), NoSpaceAfterBlockComment.into()));
} else if !comment.is_empty() { } else if !comment.is_empty() {
diagnostics.push(( diagnostics.push((
Range::new(*start, *end), Range::new(start, end),
MultipleLeadingHashesForBlockComment.into(), MultipleLeadingHashesForBlockComment.into(),
)); ));
} }
} }
} }
} else if !matches!(tok, Tok::NonLogicalNewline) { } else if !matches!(kind, Tok::NonLogicalNewline) {
prev_end = *end; prev_end = token.end();
} }
} }
diagnostics diagnostics
} }
#[cfg(not(feature = "logical_lines"))]
pub fn whitespace_before_comment(
_tokens: &[(Location, &Tok, Location)],
_locator: &Locator,
) -> Vec<(Range, DiagnosticKind)> {
vec![]
}

View file

@ -1,5 +1,3 @@
#![allow(dead_code, unused_imports, unused_variables)]
use rustpython_parser::ast::Location; use rustpython_parser::ast::Location;
use rustpython_parser::Tok; use rustpython_parser::Tok;
@ -7,8 +5,7 @@ use ruff_diagnostics::{AlwaysAutofixableViolation, Diagnostic, Edit};
use ruff_macros::{derive_message_formats, violation}; use ruff_macros::{derive_message_formats, violation};
use ruff_python_ast::types::Range; use ruff_python_ast::types::Range;
use crate::registry::AsRule; use super::LogicalLineTokens;
use crate::rules::pycodestyle::helpers::{is_keyword_token, is_op_token, is_soft_keyword_token};
#[violation] #[violation]
pub struct WhitespaceBeforeParameters { pub struct WhitespaceBeforeParameters {
@ -29,28 +26,34 @@ impl AlwaysAutofixableViolation for WhitespaceBeforeParameters {
} }
/// E211 /// E211
#[cfg(feature = "logical_lines")] pub(crate) fn whitespace_before_parameters(
pub fn whitespace_before_parameters( tokens: &LogicalLineTokens,
tokens: &[(Location, &Tok, Location)],
autofix: bool, autofix: bool,
) -> Vec<Diagnostic> { ) -> Vec<Diagnostic> {
let mut diagnostics = vec![]; let mut diagnostics = vec![];
let (_, mut prev_token, mut prev_end) = tokens.first().unwrap(); let previous = tokens.first().unwrap();
for (idx, (start, tok, end)) in tokens.iter().enumerate() {
if is_op_token(tok) let mut pre_pre_kind: Option<&Tok> = None;
&& (**tok == Tok::Lpar || **tok == Tok::Lsqb) let mut prev_token = previous.kind();
&& *start != prev_end let mut prev_end = previous.end();
&& (matches!(prev_token, Tok::Name { .. })
|| matches!(prev_token, Tok::Rpar | Tok::Rsqb | Tok::Rbrace)) for token in tokens {
&& (idx < 2 || *(tokens[idx - 2].1) != Tok::Class) let kind = token.kind();
&& !is_keyword_token(tok)
&& !is_soft_keyword_token(tok) if matches!(kind, Tok::Lpar | Tok::Lsqb)
&& token.start() != prev_end
&& matches!(
prev_token,
Tok::Name { .. } | Tok::Rpar | Tok::Rsqb | Tok::Rbrace
)
&& (pre_pre_kind != Some(&Tok::Class))
{ {
let start = Location::new(prev_end.row(), prev_end.column()); let start = Location::new(prev_end.row(), prev_end.column());
let end = token.end();
let end = Location::new(end.row(), end.column() - 1); let end = Location::new(end.row(), end.column() - 1);
let kind: WhitespaceBeforeParameters = WhitespaceBeforeParameters { let kind: WhitespaceBeforeParameters = WhitespaceBeforeParameters {
bracket: tok.to_string(), bracket: kind.to_string(),
}; };
let mut diagnostic = Diagnostic::new(kind, Range::new(start, end)); let mut diagnostic = Diagnostic::new(kind, Range::new(start, end));
@ -60,16 +63,9 @@ pub fn whitespace_before_parameters(
} }
diagnostics.push(diagnostic); diagnostics.push(diagnostic);
} }
prev_token = *tok; pre_pre_kind = Some(prev_token);
prev_end = *end; prev_token = kind;
prev_end = token.end();
} }
diagnostics diagnostics
} }
#[cfg(not(feature = "logical_lines"))]
pub fn whitespace_before_parameters(
_tokens: &[(Location, &Tok, Location)],
_autofix: bool,
) -> Vec<Diagnostic> {
vec![]
}

View file

@ -1,102 +0,0 @@
#![allow(dead_code, unused_imports, unused_variables)]
use itertools::Itertools;
use rustpython_parser::ast::Location;
use ruff_diagnostics::Edit;
use ruff_diagnostics::Violation;
use ruff_diagnostics::{AlwaysAutofixableViolation, Diagnostic};
use ruff_macros::{derive_message_formats, violation};
use ruff_python_ast::types::Range;
#[violation]
pub struct MissingWhitespace {
pub token: String,
}
impl AlwaysAutofixableViolation for MissingWhitespace {
#[derive_message_formats]
fn message(&self) -> String {
let MissingWhitespace { token } = self;
format!("Missing whitespace after '{token}'")
}
fn autofix_title(&self) -> String {
let MissingWhitespace { token } = self;
format!("Added missing whitespace after '{token}'")
}
}
/// E231
#[cfg(feature = "logical_lines")]
pub fn missing_whitespace(
line: &str,
row: usize,
autofix: bool,
indent_level: usize,
) -> Vec<Diagnostic> {
let mut diagnostics = vec![];
let mut num_lsqb = 0u32;
let mut num_rsqb = 0u32;
let mut prev_lsqb = None;
let mut prev_lbrace = None;
for (idx, (char, next_char)) in line.chars().tuple_windows().enumerate() {
match char {
'[' => {
num_lsqb += 1;
prev_lsqb = Some(idx);
}
']' => {
num_rsqb += 1;
}
'{' => {
prev_lbrace = Some(idx);
}
',' | ';' | ':' if !next_char.is_whitespace() => {
if char == ':' && num_lsqb > num_rsqb && prev_lsqb > prev_lbrace {
continue; // Slice syntax, no space required
}
if char == ',' && matches!(next_char, ')' | ']') {
continue; // Allow tuple with only one element: (3,)
}
if char == ':' && next_char == '=' {
continue; // Allow assignment expression
}
let kind = MissingWhitespace {
token: char.to_string(),
};
let mut diagnostic = Diagnostic::new(
kind,
Range::new(
Location::new(row, indent_level + idx),
Location::new(row, indent_level + idx),
),
);
if autofix {
diagnostic.set_fix(Edit::insertion(
" ".to_string(),
Location::new(row, indent_level + idx + 1),
));
}
diagnostics.push(diagnostic);
}
_ => {}
}
}
diagnostics
}
#[cfg(not(feature = "logical_lines"))]
pub fn missing_whitespace(
_line: &str,
_row: usize,
_autofix: bool,
indent_level: usize,
) -> Vec<Diagnostic> {
vec![]
}

View file

@ -1,51 +0,0 @@
#![allow(dead_code, unused_imports, unused_variables)]
use rustpython_parser::ast::Location;
use rustpython_parser::Tok;
use ruff_diagnostics::DiagnosticKind;
use ruff_diagnostics::Violation;
use ruff_macros::{derive_message_formats, violation};
use crate::rules::pycodestyle::helpers::{is_keyword_token, is_singleton_token};
#[violation]
pub struct MissingWhitespaceAfterKeyword;
impl Violation for MissingWhitespaceAfterKeyword {
#[derive_message_formats]
fn message(&self) -> String {
format!("Missing whitespace after keyword")
}
}
/// E275
#[cfg(feature = "logical_lines")]
pub fn missing_whitespace_after_keyword(
tokens: &[(Location, &Tok, Location)],
) -> Vec<(Location, DiagnosticKind)> {
let mut diagnostics = vec![];
for (tok0, tok1) in tokens.iter().zip(&tokens[1..]) {
if tok0.2 == tok1.0
&& is_keyword_token(tok0.1)
&& !is_singleton_token(tok0.1)
&& *tok0.1 != Tok::Async
&& *tok0.1 != Tok::Await
&& !(*tok0.1 == Tok::Except && *tok1.1 == Tok::Star)
&& !(*tok0.1 == Tok::Yield && *tok1.1 == Tok::Rpar)
&& *tok1.1 != Tok::Colon
&& *tok1.1 != Tok::Newline
{
diagnostics.push((tok0.2, MissingWhitespaceAfterKeyword.into()));
}
}
diagnostics
}
#[cfg(not(feature = "logical_lines"))]
pub fn missing_whitespace_after_keyword(
_tokens: &[(Location, &Tok, Location)],
) -> Vec<(Location, DiagnosticKind)> {
vec![]
}

View file

@ -8,55 +8,21 @@ pub use compound_statements::{
}; };
pub use doc_line_too_long::{doc_line_too_long, DocLineTooLong}; pub use doc_line_too_long::{doc_line_too_long, DocLineTooLong};
pub use errors::{syntax_error, IOError, SyntaxError}; pub use errors::{syntax_error, IOError, SyntaxError};
pub use extraneous_whitespace::{
extraneous_whitespace, WhitespaceAfterOpenBracket, WhitespaceBeforeCloseBracket,
WhitespaceBeforePunctuation,
};
pub use imports::{ pub use imports::{
module_import_not_at_top_of_file, multiple_imports_on_one_line, ModuleImportNotAtTopOfFile, module_import_not_at_top_of_file, multiple_imports_on_one_line, ModuleImportNotAtTopOfFile,
MultipleImportsOnOneLine, MultipleImportsOnOneLine,
}; };
pub use indentation::{
indentation, IndentationWithInvalidMultiple, IndentationWithInvalidMultipleComment,
NoIndentedBlock, NoIndentedBlockComment, OverIndented, UnexpectedIndentation,
UnexpectedIndentationComment,
};
pub use invalid_escape_sequence::{invalid_escape_sequence, InvalidEscapeSequence}; pub use invalid_escape_sequence::{invalid_escape_sequence, InvalidEscapeSequence};
pub use lambda_assignment::{lambda_assignment, LambdaAssignment}; pub use lambda_assignment::{lambda_assignment, LambdaAssignment};
pub use line_too_long::{line_too_long, LineTooLong}; pub use line_too_long::{line_too_long, LineTooLong};
pub use literal_comparisons::{literal_comparisons, NoneComparison, TrueFalseComparison}; pub use literal_comparisons::{literal_comparisons, NoneComparison, TrueFalseComparison};
pub use missing_newline_at_end_of_file::{no_newline_at_end_of_file, MissingNewlineAtEndOfFile}; pub use missing_newline_at_end_of_file::{no_newline_at_end_of_file, MissingNewlineAtEndOfFile};
pub use missing_whitespace::{missing_whitespace, MissingWhitespace};
pub use missing_whitespace_after_keyword::{
missing_whitespace_after_keyword, MissingWhitespaceAfterKeyword,
};
pub use missing_whitespace_around_operator::{
missing_whitespace_around_operator, MissingWhitespaceAroundArithmeticOperator,
MissingWhitespaceAroundBitwiseOrShiftOperator, MissingWhitespaceAroundModuloOperator,
MissingWhitespaceAroundOperator,
};
pub use mixed_spaces_and_tabs::{mixed_spaces_and_tabs, MixedSpacesAndTabs}; pub use mixed_spaces_and_tabs::{mixed_spaces_and_tabs, MixedSpacesAndTabs};
pub use not_tests::{not_tests, NotInTest, NotIsTest}; pub use not_tests::{not_tests, NotInTest, NotIsTest};
pub use space_around_operator::{
space_around_operator, MultipleSpacesAfterOperator, MultipleSpacesBeforeOperator,
TabAfterOperator, TabBeforeOperator,
};
pub use tab_indentation::{tab_indentation, TabIndentation}; pub use tab_indentation::{tab_indentation, TabIndentation};
pub use trailing_whitespace::{trailing_whitespace, BlankLineWithWhitespace, TrailingWhitespace}; pub use trailing_whitespace::{trailing_whitespace, BlankLineWithWhitespace, TrailingWhitespace};
pub use type_comparison::{type_comparison, TypeComparison}; pub use type_comparison::{type_comparison, TypeComparison};
pub use whitespace_around_keywords::{
whitespace_around_keywords, MultipleSpacesAfterKeyword, MultipleSpacesBeforeKeyword,
TabAfterKeyword, TabBeforeKeyword,
};
pub use whitespace_around_named_parameter_equals::{
whitespace_around_named_parameter_equals, MissingWhitespaceAroundParameterEquals,
UnexpectedSpacesAroundKeywordParameterEquals,
};
pub use whitespace_before_comment::{
whitespace_before_comment, MultipleLeadingHashesForBlockComment, NoSpaceAfterBlockComment,
NoSpaceAfterInlineComment, TooFewSpacesBeforeInlineComment,
};
pub use whitespace_before_parameters::{whitespace_before_parameters, WhitespaceBeforeParameters};
mod ambiguous_class_name; mod ambiguous_class_name;
mod ambiguous_function_name; mod ambiguous_function_name;
@ -65,81 +31,16 @@ mod bare_except;
mod compound_statements; mod compound_statements;
mod doc_line_too_long; mod doc_line_too_long;
mod errors; mod errors;
mod extraneous_whitespace;
mod imports; mod imports;
mod indentation;
mod invalid_escape_sequence; mod invalid_escape_sequence;
mod lambda_assignment; mod lambda_assignment;
mod line_too_long; mod line_too_long;
mod literal_comparisons; mod literal_comparisons;
#[cfg(feature = "logical_lines")]
pub(crate) mod logical_lines;
mod missing_newline_at_end_of_file; mod missing_newline_at_end_of_file;
mod missing_whitespace;
mod missing_whitespace_after_keyword;
mod missing_whitespace_around_operator;
mod mixed_spaces_and_tabs; mod mixed_spaces_and_tabs;
mod not_tests; mod not_tests;
mod space_around_operator;
mod tab_indentation; mod tab_indentation;
mod trailing_whitespace; mod trailing_whitespace;
mod type_comparison; mod type_comparison;
mod whitespace_around_keywords;
mod whitespace_around_named_parameter_equals;
mod whitespace_before_comment;
mod whitespace_before_parameters;
#[allow(unused)]
enum Whitespace {
None,
Single,
Many,
Tab,
}
impl Whitespace {
#[allow(dead_code)]
fn leading(content: &str) -> (usize, Self) {
let mut offset = 0;
let mut kind = Self::None;
for c in content.chars() {
if c == '\t' {
kind = Self::Tab;
offset += 1;
} else if c.is_whitespace() {
kind = match kind {
Whitespace::None => Whitespace::Single,
Whitespace::Single | Whitespace::Many => Whitespace::Many,
Whitespace::Tab => Whitespace::Tab,
};
offset += c.len_utf8();
} else {
break;
}
}
(offset, kind)
}
#[allow(dead_code)]
fn trailing(content: &str) -> (Self, usize) {
let mut count = 0u32;
let mut offset = 0;
for c in content.chars().rev() {
if c == '\t' {
return (Self::Tab, offset + 1);
} else if c.is_whitespace() {
count += 1;
offset += c.len_utf8();
} else {
break;
}
}
match count {
0 => (Self::None, 0),
1 => (Self::Single, offset),
_ => (Self::Many, offset),
}
}
}

View file

@ -1,113 +0,0 @@
#![allow(dead_code, unused_imports, unused_variables)]
use once_cell::sync::Lazy;
use regex::Regex;
use rustpython_parser::ast::Location;
use rustpython_parser::Tok;
use ruff_diagnostics::DiagnosticKind;
use ruff_diagnostics::Violation;
use ruff_macros::{derive_message_formats, violation};
#[cfg(feature = "logical_lines")]
use crate::rules::pycodestyle::helpers::is_op_token;
#[violation]
pub struct UnexpectedSpacesAroundKeywordParameterEquals;
impl Violation for UnexpectedSpacesAroundKeywordParameterEquals {
#[derive_message_formats]
fn message(&self) -> String {
format!("Unexpected spaces around keyword / parameter equals")
}
}
#[violation]
pub struct MissingWhitespaceAroundParameterEquals;
impl Violation for MissingWhitespaceAroundParameterEquals {
#[derive_message_formats]
fn message(&self) -> String {
format!("Missing whitespace around parameter equals")
}
}
static STARTSWITH_DEF_REGEX: Lazy<Regex> =
Lazy::new(|| Regex::new(r"^(async\s+def|def)\b").unwrap());
/// E251, E252
#[cfg(feature = "logical_lines")]
pub fn whitespace_around_named_parameter_equals(
tokens: &[(Location, &Tok, Location)],
line: &str,
) -> Vec<(Location, DiagnosticKind)> {
let mut diagnostics = vec![];
let mut parens = 0;
let mut require_space = false;
let mut no_space = false;
let mut annotated_func_arg = false;
let mut prev_end: Option<&Location> = None;
let in_def = STARTSWITH_DEF_REGEX.is_match(line);
for (start, token, end) in tokens {
if **token == Tok::NonLogicalNewline {
continue;
}
if no_space {
no_space = false;
if Some(start) != prev_end {
diagnostics.push((
*(prev_end.unwrap()),
UnexpectedSpacesAroundKeywordParameterEquals.into(),
));
}
}
if require_space {
require_space = false;
if Some(start) == prev_end {
diagnostics.push((*start, MissingWhitespaceAroundParameterEquals.into()));
}
}
if is_op_token(token) {
if **token == Tok::Lpar || **token == Tok::Lsqb {
parens += 1;
} else if **token == Tok::Rpar || **token == Tok::Rsqb {
parens -= 1;
} else if in_def && **token == Tok::Colon && parens == 1 {
annotated_func_arg = true;
} else if parens == 1 && **token == Tok::Comma {
annotated_func_arg = false;
} else if parens > 0 && **token == Tok::Equal {
if annotated_func_arg && parens == 1 {
require_space = true;
if Some(start) == prev_end {
diagnostics.push((*start, MissingWhitespaceAroundParameterEquals.into()));
}
} else {
no_space = true;
if Some(start) != prev_end {
diagnostics.push((
*(prev_end.unwrap()),
UnexpectedSpacesAroundKeywordParameterEquals.into(),
));
}
}
}
if parens < 1 {
annotated_func_arg = false;
}
}
prev_end = Some(end);
}
diagnostics
}
#[cfg(not(feature = "logical_lines"))]
pub fn whitespace_around_named_parameter_equals(
_tokens: &[(Location, &Tok, Location)],
_line: &str,
) -> Vec<(Location, DiagnosticKind)> {
vec![]
}