perf(pycodestyle): Introduce TokenKind (#3745)

This commit is contained in:
Micha Reiser 2023-03-28 11:22:39 +02:00 committed by GitHub
parent 2fdf98ef4e
commit 000394f428
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
14 changed files with 849 additions and 513 deletions

View file

@ -1,10 +1,7 @@
use rustpython_parser::ast::{Cmpop, Expr, ExprKind};
#[cfg(feature = "logical_lines")]
use rustpython_parser::Tok;
use unicode_width::UnicodeWidthStr;
use ruff_python_ast::helpers::{create_expr, unparse_expr}; use ruff_python_ast::helpers::{create_expr, unparse_expr};
use ruff_python_ast::source_code::Stylist; use ruff_python_ast::source_code::Stylist;
use rustpython_parser::ast::{Cmpop, Expr, ExprKind};
use unicode_width::UnicodeWidthStr;
pub fn is_ambiguous_name(name: &str) -> bool { pub fn is_ambiguous_name(name: &str) -> bool {
name == "l" || name == "I" || name == "O" name == "l" || name == "I" || name == "O"
@ -58,179 +55,3 @@ pub fn is_overlong(
true true
} }
#[cfg(feature = "logical_lines")]
pub const fn is_keyword_token(token: &Tok) -> bool {
matches!(
token,
Tok::False
| Tok::True
| Tok::None
| Tok::And
| Tok::As
| Tok::Assert
| Tok::Await
| Tok::Break
| Tok::Class
| Tok::Continue
| Tok::Def
| Tok::Del
| Tok::Elif
| Tok::Else
| Tok::Except
| Tok::Finally
| Tok::For
| Tok::From
| Tok::Global
| Tok::If
| Tok::Import
| Tok::In
| Tok::Is
| Tok::Lambda
| Tok::Nonlocal
| Tok::Not
| Tok::Or
| Tok::Pass
| Tok::Raise
| Tok::Return
| Tok::Try
| Tok::While
| Tok::With
| Tok::Yield
)
}
#[cfg(feature = "logical_lines")]
pub const fn is_singleton_token(token: &Tok) -> bool {
matches!(
token,
Tok::False { .. } | Tok::True { .. } | Tok::None { .. },
)
}
#[cfg(feature = "logical_lines")]
pub const fn is_op_token(token: &Tok) -> bool {
matches!(
token,
Tok::Lpar
| Tok::Rpar
| Tok::Lsqb
| Tok::Rsqb
| Tok::Comma
| Tok::Semi
| Tok::Plus
| Tok::Minus
| Tok::Star
| Tok::Slash
| Tok::Vbar
| Tok::Amper
| Tok::Less
| Tok::Greater
| Tok::Equal
| Tok::Dot
| Tok::Percent
| Tok::Lbrace
| Tok::Rbrace
| Tok::NotEqual
| Tok::LessEqual
| Tok::GreaterEqual
| Tok::Tilde
| Tok::CircumFlex
| Tok::LeftShift
| Tok::RightShift
| Tok::DoubleStar
| Tok::PlusEqual
| Tok::MinusEqual
| Tok::StarEqual
| Tok::SlashEqual
| Tok::PercentEqual
| Tok::AmperEqual
| Tok::VbarEqual
| Tok::CircumflexEqual
| Tok::LeftShiftEqual
| Tok::RightShiftEqual
| Tok::DoubleStarEqual
| Tok::DoubleSlash
| Tok::DoubleSlashEqual
| Tok::At
| Tok::AtEqual
| Tok::Rarrow
| Tok::Ellipsis
| Tok::ColonEqual
| Tok::Colon
)
}
#[cfg(feature = "logical_lines")]
pub const fn is_skip_comment_token(token: &Tok) -> bool {
matches!(
token,
Tok::Newline | Tok::Indent | Tok::Dedent | Tok::NonLogicalNewline | Tok::Comment { .. }
)
}
#[cfg(feature = "logical_lines")]
pub const fn is_soft_keyword_token(token: &Tok) -> bool {
matches!(token, Tok::Match | Tok::Case)
}
#[cfg(feature = "logical_lines")]
pub const fn is_arithmetic_token(token: &Tok) -> bool {
matches!(
token,
Tok::DoubleStar | Tok::Star | Tok::Plus | Tok::Minus | Tok::Slash | Tok::At
)
}
#[cfg(feature = "logical_lines")]
pub const fn is_ws_optional_token(token: &Tok) -> bool {
is_arithmetic_token(token)
|| matches!(
token,
Tok::CircumFlex
| Tok::Amper
| Tok::Vbar
| Tok::LeftShift
| Tok::RightShift
| Tok::Percent
)
}
#[cfg(feature = "logical_lines")]
pub const fn is_ws_needed_token(token: &Tok) -> bool {
matches!(
token,
Tok::DoubleStarEqual
| Tok::StarEqual
| Tok::SlashEqual
| Tok::DoubleSlashEqual
| Tok::PlusEqual
| Tok::MinusEqual
| Tok::NotEqual
| Tok::Less
| Tok::Greater
| Tok::PercentEqual
| Tok::CircumflexEqual
| Tok::AmperEqual
| Tok::VbarEqual
| Tok::EqEqual
| Tok::LessEqual
| Tok::GreaterEqual
| Tok::LeftShiftEqual
| Tok::RightShiftEqual
| Tok::Equal
| Tok::And
| Tok::Or
| Tok::In
| Tok::Is
| Tok::Rarrow
)
}
#[cfg(feature = "logical_lines")]
pub const fn is_unary_token(token: &Tok) -> bool {
matches!(
token,
Tok::Plus | Tok::Minus | Tok::Star | Tok::DoubleStar | Tok::RightShift
)
}

View file

@ -1,10 +1,10 @@
use rustpython_parser::ast::Location; use rustpython_parser::ast::Location;
use rustpython_parser::Tok;
use super::{LogicalLine, Whitespace}; use super::{LogicalLine, Whitespace};
use ruff_diagnostics::DiagnosticKind; use ruff_diagnostics::DiagnosticKind;
use ruff_diagnostics::Violation; use ruff_diagnostics::Violation;
use ruff_macros::{derive_message_formats, violation}; use ruff_macros::{derive_message_formats, violation};
use ruff_python_ast::token_kind::TokenKind;
/// ## What it does /// ## What it does
/// Checks for the use of extraneous whitespace after "(". /// Checks for the use of extraneous whitespace after "(".
@ -103,12 +103,12 @@ impl Violation for WhitespaceBeforePunctuation {
/// E201, E202, E203 /// E201, E202, E203
pub(crate) fn extraneous_whitespace(line: &LogicalLine) -> Vec<(Location, DiagnosticKind)> { pub(crate) fn extraneous_whitespace(line: &LogicalLine) -> Vec<(Location, DiagnosticKind)> {
let mut diagnostics = vec![]; let mut diagnostics = vec![];
let mut last_token: Option<&Tok> = None; let mut last_token: Option<TokenKind> = None;
for token in line.tokens() { for token in line.tokens() {
let kind = token.kind(); let kind = token.kind();
match kind { match kind {
Tok::Lbrace | Tok::Lpar | Tok::Lsqb => { TokenKind::Lbrace | TokenKind::Lpar | TokenKind::Lsqb => {
if !matches!(line.trailing_whitespace(&token), Whitespace::None) { if !matches!(line.trailing_whitespace(&token), Whitespace::None) {
let end = token.end(); let end = token.end();
diagnostics.push(( diagnostics.push((
@ -117,18 +117,24 @@ pub(crate) fn extraneous_whitespace(line: &LogicalLine) -> Vec<(Location, Diagno
)); ));
} }
} }
Tok::Rbrace | Tok::Rpar | Tok::Rsqb | Tok::Comma | Tok::Semi | Tok::Colon => { TokenKind::Rbrace
let diagnostic_kind = if matches!(kind, Tok::Comma | Tok::Semi | Tok::Colon) { | TokenKind::Rpar
DiagnosticKind::from(WhitespaceBeforePunctuation) | TokenKind::Rsqb
} else { | TokenKind::Comma
DiagnosticKind::from(WhitespaceBeforeCloseBracket) | TokenKind::Semi
}; | TokenKind::Colon => {
let diagnostic_kind =
if matches!(kind, TokenKind::Comma | TokenKind::Semi | TokenKind::Colon) {
DiagnosticKind::from(WhitespaceBeforePunctuation)
} else {
DiagnosticKind::from(WhitespaceBeforeCloseBracket)
};
if let (Whitespace::Single | Whitespace::Many | Whitespace::Tab, offset) = if let (Whitespace::Single | Whitespace::Many | Whitespace::Tab, offset) =
line.leading_whitespace(&token) line.leading_whitespace(&token)
{ {
let start = token.start(); if !matches!(last_token, Some(TokenKind::Comma)) {
if !matches!(last_token, Some(Tok::Comma)) { let start = token.start();
diagnostics.push(( diagnostics.push((
Location::new(start.row(), start.column() - offset), Location::new(start.row(), start.column() - offset),
diagnostic_kind, diagnostic_kind,

View file

@ -1,8 +1,8 @@
use ruff_diagnostics::DiagnosticKind; use ruff_diagnostics::DiagnosticKind;
use ruff_diagnostics::Violation; use ruff_diagnostics::Violation;
use ruff_macros::{derive_message_formats, violation}; use ruff_macros::{derive_message_formats, violation};
use ruff_python_ast::token_kind::TokenKind;
use rustpython_parser::ast::Location; use rustpython_parser::ast::Location;
use rustpython_parser::Tok;
use super::LogicalLine; use super::LogicalLine;
@ -253,8 +253,8 @@ pub(crate) fn indentation(
)); ));
} }
let indent_expect = prev_logical_line let indent_expect = prev_logical_line
.and_then(|prev_logical_line| prev_logical_line.tokens().trimmed().last()) .and_then(|prev_logical_line| prev_logical_line.tokens_trimmed().last())
.map_or(false, |t| t.kind() == &Tok::Colon); .map_or(false, |t| t.kind() == TokenKind::Colon);
if indent_expect && indent_level <= prev_indent_level.unwrap_or(0) { if indent_expect && indent_level <= prev_indent_level.unwrap_or(0) {
diagnostics.push(( diagnostics.push((

View file

@ -1,27 +1,36 @@
use itertools::Itertools;
use rustpython_parser::Tok;
use super::LogicalLine; use super::LogicalLine;
use ruff_diagnostics::Edit; use ruff_diagnostics::Edit;
use ruff_diagnostics::{AlwaysAutofixableViolation, Diagnostic}; use ruff_diagnostics::{AlwaysAutofixableViolation, Diagnostic};
use ruff_macros::{derive_message_formats, violation}; use ruff_macros::{derive_message_formats, violation};
use ruff_python_ast::token_kind::TokenKind;
use ruff_python_ast::types::Range; use ruff_python_ast::types::Range;
#[violation] #[violation]
pub struct MissingWhitespace { pub struct MissingWhitespace {
pub token: String, pub token: TokenKind,
}
impl MissingWhitespace {
fn token_text(&self) -> char {
match self.token {
TokenKind::Colon => ':',
TokenKind::Semi => ';',
TokenKind::Comma => ',',
_ => unreachable!(),
}
}
} }
impl AlwaysAutofixableViolation for MissingWhitespace { impl AlwaysAutofixableViolation for MissingWhitespace {
#[derive_message_formats] #[derive_message_formats]
fn message(&self) -> String { fn message(&self) -> String {
let MissingWhitespace { token } = self; let token = self.token_text();
format!("Missing whitespace after {token}") format!("Missing whitespace after '{token}'")
} }
fn autofix_title(&self) -> String { fn autofix_title(&self) -> String {
let MissingWhitespace { token } = self; let token = self.token_text();
format!("Added missing whitespace after {token}") format!("Added missing whitespace after '{token}'")
} }
} }
@ -29,45 +38,47 @@ impl AlwaysAutofixableViolation for MissingWhitespace {
pub(crate) fn missing_whitespace(line: &LogicalLine, autofix: bool) -> Vec<Diagnostic> { pub(crate) fn missing_whitespace(line: &LogicalLine, autofix: bool) -> Vec<Diagnostic> {
let mut diagnostics = vec![]; let mut diagnostics = vec![];
let mut num_lsqb = 0u32; let mut open_parentheses = 0u32;
let mut num_rsqb = 0u32;
let mut prev_lsqb = None; let mut prev_lsqb = None;
let mut prev_lbrace = None; let mut prev_lbrace = None;
let mut iter = line.tokens().iter().peekable();
for (token, next_token) in line.tokens().iter().tuple_windows() { while let Some(token) = iter.next() {
let kind = token.kind(); let kind = token.kind();
match kind { match kind {
Tok::Lsqb => { TokenKind::Lsqb => {
num_lsqb += 1; open_parentheses += 1;
prev_lsqb = Some(token.start()); prev_lsqb = Some(token.start());
} }
Tok::Rsqb => { TokenKind::Rsqb => {
num_rsqb += 1; open_parentheses += 1;
} }
Tok::Lbrace => { TokenKind::Lbrace => {
prev_lbrace = Some(token.start()); prev_lbrace = Some(token.start());
} }
Tok::Comma | Tok::Semi | Tok::Colon => { TokenKind::Comma | TokenKind::Semi | TokenKind::Colon => {
let after = line.text_after(&token); let after = line.text_after(&token);
if !after.chars().next().map_or(false, char::is_whitespace) { if !after.chars().next().map_or(false, char::is_whitespace) {
match (kind, next_token.kind()) { if let Some(next_token) = iter.peek() {
(Tok::Colon, _) if num_lsqb > num_rsqb && prev_lsqb > prev_lbrace => { match (kind, next_token.kind()) {
continue; // Slice syntax, no space required (TokenKind::Colon, _)
if open_parentheses > 0 && prev_lsqb > prev_lbrace =>
{
continue; // Slice syntax, no space required
}
(TokenKind::Comma, TokenKind::Rpar | TokenKind::Rsqb) => {
continue; // Allow tuple with only one element: (3,)
}
(TokenKind::Colon, TokenKind::Equal) => {
continue; // Allow assignment expression
}
_ => {}
} }
(Tok::Comma, Tok::Rpar | Tok::Rsqb) => {
continue; // Allow tuple with only one element: (3,)
}
(Tok::Colon, Tok::Equal) => {
continue; // Allow assignment expression
}
_ => {}
} }
let kind = MissingWhitespace { let kind = MissingWhitespace { token: kind };
token: kind.to_string(),
};
let (start, end) = token.range(); let (start, end) = token.range();
let mut diagnostic = Diagnostic::new(kind, Range::new(start, start)); let mut diagnostic = Diagnostic::new(kind, Range::new(start, start));

View file

@ -1,13 +1,12 @@
use itertools::Itertools; use itertools::Itertools;
use rustpython_parser::ast::Location; use rustpython_parser::ast::Location;
use rustpython_parser::Tok;
use ruff_diagnostics::DiagnosticKind; use ruff_diagnostics::DiagnosticKind;
use ruff_diagnostics::Violation; use ruff_diagnostics::Violation;
use ruff_macros::{derive_message_formats, violation}; use ruff_macros::{derive_message_formats, violation};
use ruff_python_ast::token_kind::TokenKind;
use super::LogicalLineTokens; use super::LogicalLineTokens;
use crate::rules::pycodestyle::helpers::{is_keyword_token, is_singleton_token};
#[violation] #[violation]
pub struct MissingWhitespaceAfterKeyword; pub struct MissingWhitespaceAfterKeyword;
@ -29,12 +28,12 @@ pub(crate) fn missing_whitespace_after_keyword(
let tok0_kind = tok0.kind(); let tok0_kind = tok0.kind();
let tok1_kind = tok1.kind(); let tok1_kind = tok1.kind();
if is_keyword_token(tok0_kind) if tok0_kind.is_keyword()
&& !(is_singleton_token(tok0_kind) && !(tok0_kind.is_singleton()
|| matches!(tok0_kind, Tok::Async | Tok::Await) || matches!(tok0_kind, TokenKind::Async | TokenKind::Await)
|| tok0_kind == &Tok::Except && tok1_kind == &Tok::Star || tok0_kind == TokenKind::Except && tok1_kind == TokenKind::Star
|| tok0_kind == &Tok::Yield && tok1_kind == &Tok::Rpar || tok0_kind == TokenKind::Yield && tok1_kind == TokenKind::Rpar
|| matches!(tok1_kind, Tok::Colon | Tok::Newline)) || matches!(tok1_kind, TokenKind::Colon | TokenKind::Newline))
&& tok0.end() == tok1.start() && tok0.end() == tok1.start()
{ {
diagnostics.push((tok0.end(), MissingWhitespaceAfterKeyword.into())); diagnostics.push((tok0.end(), MissingWhitespaceAfterKeyword.into()));

View file

@ -1,14 +1,10 @@
use rustpython_parser::ast::Location; use rustpython_parser::ast::Location;
use rustpython_parser::Tok;
use ruff_diagnostics::DiagnosticKind; use ruff_diagnostics::DiagnosticKind;
use ruff_diagnostics::Violation; use ruff_diagnostics::Violation;
use ruff_macros::{derive_message_formats, violation}; use ruff_macros::{derive_message_formats, violation};
use ruff_python_ast::token_kind::TokenKind;
use crate::rules::pycodestyle::helpers::{
is_arithmetic_token, is_keyword_token, is_op_token, is_skip_comment_token,
is_soft_keyword_token, is_unary_token, is_ws_needed_token, is_ws_optional_token,
};
use crate::rules::pycodestyle::rules::logical_lines::LogicalLineTokens; use crate::rules::pycodestyle::rules::logical_lines::LogicalLineTokens;
// E225 // E225
@ -66,29 +62,27 @@ pub(crate) fn missing_whitespace_around_operator(
let mut needs_space_aux: Option<bool> = None; let mut needs_space_aux: Option<bool> = None;
let mut prev_end_aux: Option<Location> = None; let mut prev_end_aux: Option<Location> = None;
let mut parens = 0u32; let mut parens = 0u32;
let mut prev_type: Option<&Tok> = None; let mut prev_type: Option<TokenKind> = None;
let mut prev_end: Option<Location> = None; let mut prev_end: Option<Location> = None;
for token in tokens { for token in tokens {
let kind = token.kind(); let kind = token.kind();
if is_skip_comment_token(kind) { if kind.is_skip_comment() {
continue; continue;
} }
match kind { match kind {
Tok::Lpar | Tok::Lambda => parens += 1, TokenKind::Lpar | TokenKind::Lambda => parens += 1,
Tok::Rpar => parens -= 1, TokenKind::Rpar => parens -= 1,
_ => {} _ => {}
}; };
let needs_space = (needs_space_main.is_some() && needs_space_main.unwrap()) let needs_space =
|| needs_space_aux.is_some() needs_space_main == Some(true) || needs_space_aux.is_some() || prev_end_aux.is_some();
|| prev_end_aux.is_some();
if needs_space { if needs_space {
if Some(token.start()) != prev_end { if Some(token.start()) != prev_end {
if !(needs_space_main.is_some() && needs_space_main.unwrap()) if needs_space_main != Some(true) && needs_space_aux != Some(true) {
&& (needs_space_aux.is_none() || !needs_space_aux.unwrap())
{
diagnostics.push(( diagnostics.push((
prev_end_aux.unwrap(), prev_end_aux.unwrap(),
MissingWhitespaceAroundOperator.into(), MissingWhitespaceAroundOperator.into(),
@ -97,27 +91,27 @@ pub(crate) fn missing_whitespace_around_operator(
needs_space_main = Some(false); needs_space_main = Some(false);
needs_space_aux = None; needs_space_aux = None;
prev_end_aux = None; prev_end_aux = None;
} else if kind == &Tok::Greater && matches!(prev_type, Some(Tok::Less | Tok::Minus)) { } else if kind == TokenKind::Greater
&& matches!(prev_type, Some(TokenKind::Less | TokenKind::Minus))
{
// Tolerate the "<>" operator, even if running Python 3 // Tolerate the "<>" operator, even if running Python 3
// Deal with Python 3's annotated return value "->" // Deal with Python 3's annotated return value "->"
} else if prev_type == Some(&Tok::Slash) } else if prev_type == Some(TokenKind::Slash)
&& matches!(kind, Tok::Comma | Tok::Rpar | Tok::Colon) && matches!(kind, TokenKind::Comma | TokenKind::Rpar | TokenKind::Colon)
|| (prev_type == Some(&Tok::Rpar) && kind == &Tok::Colon) || (prev_type == Some(TokenKind::Rpar) && kind == TokenKind::Colon)
{ {
// Tolerate the "/" operator in function definition // Tolerate the "/" operator in function definition
// For more info see PEP570 // For more info see PEP570
} else { } else {
if (needs_space_main.is_some() && needs_space_main.unwrap()) if needs_space_main == Some(true) || needs_space_aux == Some(true) {
|| (needs_space_aux.is_some() && needs_space_aux.unwrap())
{
diagnostics.push((prev_end.unwrap(), MissingWhitespaceAroundOperator.into())); diagnostics.push((prev_end.unwrap(), MissingWhitespaceAroundOperator.into()));
} else if prev_type != Some(&Tok::DoubleStar) { } else if prev_type != Some(TokenKind::DoubleStar) {
if prev_type == Some(&Tok::Percent) { if prev_type == Some(TokenKind::Percent) {
diagnostics.push(( diagnostics.push((
prev_end_aux.unwrap(), prev_end_aux.unwrap(),
MissingWhitespaceAroundModuloOperator.into(), MissingWhitespaceAroundModuloOperator.into(),
)); ));
} else if !is_arithmetic_token(prev_type.unwrap()) { } else if !prev_type.unwrap().is_arithmetic() {
diagnostics.push(( diagnostics.push((
prev_end_aux.unwrap(), prev_end_aux.unwrap(),
MissingWhitespaceAroundBitwiseOrShiftOperator.into(), MissingWhitespaceAroundBitwiseOrShiftOperator.into(),
@ -133,28 +127,30 @@ pub(crate) fn missing_whitespace_around_operator(
needs_space_aux = None; needs_space_aux = None;
prev_end_aux = None; prev_end_aux = None;
} }
} else if (is_op_token(kind) || matches!(kind, Tok::Name { .. })) && prev_end.is_some() { } else if (kind.is_operator() || matches!(kind, TokenKind::Name)) && prev_end.is_some() {
if kind == &Tok::Equal && parens > 0 { if kind == TokenKind::Equal && parens > 0 {
// Allow keyword args or defaults: foo(bar=None). // Allow keyword args or defaults: foo(bar=None).
} else if is_ws_needed_token(kind) { } else if kind.is_whitespace_needed() {
needs_space_main = Some(true); needs_space_main = Some(true);
needs_space_aux = None; needs_space_aux = None;
prev_end_aux = None; prev_end_aux = None;
} else if is_unary_token(kind) { } else if kind.is_unary() {
// Check if the operator is used as a binary operator // Check if the operator is used as a binary operator
// Allow unary operators: -123, -x, +1. // Allow unary operators: -123, -x, +1.
// Allow argument unpacking: foo(*args, **kwargs) // Allow argument unpacking: foo(*args, **kwargs)
if let Some(prev_type) = prev_type { if let Some(prev_type) = prev_type {
if (matches!(prev_type, Tok::Rpar | Tok::Rsqb | Tok::Rbrace)) if (matches!(
|| (!is_op_token(prev_type) && !is_keyword_token(prev_type)) prev_type,
&& (!is_soft_keyword_token(prev_type)) TokenKind::Rpar | TokenKind::Rsqb | TokenKind::Rbrace
)) || (!prev_type.is_operator() && !prev_type.is_keyword())
&& (!prev_type.is_soft_keyword())
{ {
needs_space_main = None; needs_space_main = None;
needs_space_aux = None; needs_space_aux = None;
prev_end_aux = None; prev_end_aux = None;
} }
} }
} else if is_ws_optional_token(kind) { } else if kind.is_whitespace_optional() {
needs_space_main = None; needs_space_main = None;
needs_space_aux = None; needs_space_aux = None;
prev_end_aux = None; prev_end_aux = None;
@ -163,13 +159,9 @@ pub(crate) fn missing_whitespace_around_operator(
if needs_space_main.is_none() { if needs_space_main.is_none() {
// Surrounding space is optional, but ensure that // Surrounding space is optional, but ensure that
// trailing space matches opening space // trailing space matches opening space
needs_space_main = None;
prev_end_aux = prev_end; prev_end_aux = prev_end;
needs_space_aux = Some(Some(token.start()) != prev_end_aux); needs_space_aux = Some(Some(token.start()) != prev_end_aux);
} else if needs_space_main.is_some() } else if needs_space_main == Some(true) && Some(token.start()) == prev_end_aux {
&& needs_space_main.unwrap()
&& Some(token.start()) == prev_end_aux
{
// A needed opening space was not found // A needed opening space was not found
diagnostics.push((prev_end.unwrap(), MissingWhitespaceAroundOperator.into())); diagnostics.push((prev_end.unwrap(), MissingWhitespaceAroundOperator.into()));
needs_space_main = Some(false); needs_space_main = Some(false);

View file

@ -1,16 +1,13 @@
use bitflags::bitflags; use bitflags::bitflags;
use rustpython_parser::ast::Location; use rustpython_parser::ast::Location;
use rustpython_parser::lexer::LexResult; use rustpython_parser::lexer::LexResult;
use rustpython_parser::Tok;
use std::fmt::{Debug, Formatter}; use std::fmt::{Debug, Formatter};
use std::iter::FusedIterator; use std::iter::FusedIterator;
use std::ops::Deref;
use ruff_python_ast::source_code::Locator; use ruff_python_ast::source_code::Locator;
use ruff_python_ast::token_kind::TokenKind;
use ruff_python_ast::types::Range; use ruff_python_ast::types::Range;
use crate::rules::pycodestyle::helpers::{is_keyword_token, is_op_token};
pub(crate) use extraneous_whitespace::{ pub(crate) use extraneous_whitespace::{
extraneous_whitespace, WhitespaceAfterOpenBracket, WhitespaceBeforeCloseBracket, extraneous_whitespace, WhitespaceAfterOpenBracket, WhitespaceBeforeCloseBracket,
WhitespaceBeforePunctuation, WhitespaceBeforePunctuation,
@ -73,12 +70,15 @@ bitflags! {
const KEYWORD = 0b0000_1000; const KEYWORD = 0b0000_1000;
/// Whether the logical line contains a comment. /// Whether the logical line contains a comment.
const COMMENT = 0b0001_0000; const COMMENT = 0b0001_0000;
/// Whether the logical line contains any non trivia token (no comment, newline, or in/dedent)
const NON_TRIVIA = 0b0010_0000;
} }
} }
#[derive(Clone)] #[derive(Clone)]
pub(crate) struct LogicalLines<'a> { pub(crate) struct LogicalLines<'a> {
tokens: Tokens<'a>, tokens: Tokens,
lines: Vec<Line>, lines: Vec<Line>,
locator: &'a Locator<'a>, locator: &'a Locator<'a>,
} }
@ -91,16 +91,19 @@ impl<'a> LogicalLines<'a> {
let mut parens: u32 = 0; let mut parens: u32 = 0;
for (start, token, end) in tokens.iter().flatten() { for (start, token, end) in tokens.iter().flatten() {
builder.push_token(*start, token, *end); let token_kind = TokenKind::from_token(token);
builder.push_token(*start, token_kind, *end);
match token { match token_kind {
Tok::Lbrace | Tok::Lpar | Tok::Lsqb => { TokenKind::Lbrace | TokenKind::Lpar | TokenKind::Lsqb => {
parens += 1; parens += 1;
} }
Tok::Rbrace | Tok::Rpar | Tok::Rsqb => { TokenKind::Rbrace | TokenKind::Rpar | TokenKind::Rsqb => {
parens -= 1; parens -= 1;
} }
Tok::Newline | Tok::NonLogicalNewline | Tok::Comment { .. } if parens == 0 => { TokenKind::Newline | TokenKind::NonLogicalNewline | TokenKind::Comment
if parens == 0 =>
{
builder.finish_line(); builder.finish_line();
} }
_ => {} _ => {}
@ -153,7 +156,7 @@ pub(crate) struct LogicalLine<'a> {
impl<'a> LogicalLine<'a> { impl<'a> LogicalLine<'a> {
/// Returns `true` if this is a comment only line /// Returns `true` if this is a comment only line
pub fn is_comment_only(&self) -> bool { pub fn is_comment_only(&self) -> bool {
self.flags() == TokenFlags::COMMENT && self.tokens().trimmed().is_empty() self.flags() == TokenFlags::COMMENT
} }
/// Returns logical line's text including comments, indents, dedent and trailing new lines. /// Returns logical line's text including comments, indents, dedent and trailing new lines.
@ -167,15 +170,53 @@ impl<'a> LogicalLine<'a> {
self.tokens_trimmed().text() self.tokens_trimmed().text()
} }
#[cfg(test)]
pub fn tokens_trimmed(&self) -> LogicalLineTokens<'a> { pub fn tokens_trimmed(&self) -> LogicalLineTokens<'a> {
self.tokens().trimmed() let mut front = self.line.tokens_start as usize;
let mut back = self.line.tokens_end as usize;
let mut kinds = self.lines.tokens.kinds[front..back].iter();
for kind in kinds.by_ref() {
if !matches!(
kind,
TokenKind::Newline
| TokenKind::NonLogicalNewline
| TokenKind::Indent
| TokenKind::Dedent
| TokenKind::Comment
) {
break;
}
front += 1;
}
for kind in kinds.rev() {
if !matches!(
kind,
TokenKind::Newline
| TokenKind::NonLogicalNewline
| TokenKind::Indent
| TokenKind::Dedent
| TokenKind::Comment
) {
break;
}
back -= 1;
}
LogicalLineTokens {
lines: self.lines,
front,
back,
}
} }
/// Returns the text after `token` /// Returns the text after `token`
#[inline]
pub fn text_after(&self, token: &LogicalLineToken<'a>) -> &str { pub fn text_after(&self, token: &LogicalLineToken<'a>) -> &str {
debug_assert!( debug_assert!(
(self.line.tokens_start..self.line.tokens_end).contains(&token.position), (self.line.tokens_start as usize..self.line.tokens_end as usize)
.contains(&token.position),
"Token does not belong to this line" "Token does not belong to this line"
); );
@ -187,9 +228,11 @@ impl<'a> LogicalLine<'a> {
} }
/// Returns the text before `token` /// Returns the text before `token`
#[inline]
pub fn text_before(&self, token: &LogicalLineToken<'a>) -> &str { pub fn text_before(&self, token: &LogicalLineToken<'a>) -> &str {
debug_assert!( debug_assert!(
(self.line.tokens_start..self.line.tokens_end).contains(&token.position), (self.line.tokens_start as usize..self.line.tokens_end as usize)
.contains(&token.position),
"Token does not belong to this line" "Token does not belong to this line"
); );
@ -214,8 +257,8 @@ impl<'a> LogicalLine<'a> {
pub fn tokens(&self) -> LogicalLineTokens<'a> { pub fn tokens(&self) -> LogicalLineTokens<'a> {
LogicalLineTokens { LogicalLineTokens {
lines: self.lines, lines: self.lines,
front: self.line.tokens_start, front: self.line.tokens_start as usize,
back: self.line.tokens_end, back: self.line.tokens_end as usize,
} }
} }
@ -284,8 +327,8 @@ impl FusedIterator for LogicalLinesIter<'_> {}
/// The tokens of a logical line /// The tokens of a logical line
pub(crate) struct LogicalLineTokens<'a> { pub(crate) struct LogicalLineTokens<'a> {
lines: &'a LogicalLines<'a>, lines: &'a LogicalLines<'a>,
front: u32, front: usize,
back: u32, back: usize,
} }
impl<'a> LogicalLineTokens<'a> { impl<'a> LogicalLineTokens<'a> {
@ -297,56 +340,6 @@ impl<'a> LogicalLineTokens<'a> {
} }
} }
pub fn len(&self) -> usize {
(self.back - self.front) as usize
}
pub fn is_empty(&self) -> bool {
self.len() == 0
}
pub fn trimmed(&self) -> LogicalLineTokens<'a> {
let tokens = &self.lines.tokens[self.front as usize..self.back as usize];
let mut front = self.front;
let mut back = self.back;
let mut iter = tokens.iter();
for (_, kind, _) in iter.by_ref() {
if !matches!(
kind,
Tok::Newline
| Tok::NonLogicalNewline
| Tok::Indent
| Tok::Dedent
| Tok::Comment(..)
) {
break;
}
front += 1;
}
for (_, kind, _) in iter.rev() {
if !matches!(
kind,
Tok::Newline
| Tok::NonLogicalNewline
| Tok::Indent
| Tok::Dedent
| Tok::Comment(..)
) {
break;
}
back -= 1;
}
LogicalLineTokens {
lines: self.lines,
front,
back,
}
}
pub fn text(&self) -> &'a str { pub fn text(&self) -> &'a str {
match (self.first(), self.last()) { match (self.first(), self.last()) {
(Some(first), Some(last)) => { (Some(first), Some(last)) => {
@ -394,9 +387,9 @@ impl Debug for LogicalLineTokens<'_> {
/// Iterator over the tokens of a [`LogicalLine`] /// Iterator over the tokens of a [`LogicalLine`]
pub(crate) struct LogicalLineTokensIter<'a> { pub(crate) struct LogicalLineTokensIter<'a> {
tokens: &'a Tokens<'a>, tokens: &'a Tokens,
front: u32, front: usize,
back: u32, back: usize,
} }
impl<'a> Iterator for LogicalLineTokensIter<'a> { impl<'a> Iterator for LogicalLineTokensIter<'a> {
@ -417,7 +410,7 @@ impl<'a> Iterator for LogicalLineTokensIter<'a> {
} }
fn size_hint(&self) -> (usize, Option<usize>) { fn size_hint(&self) -> (usize, Option<usize>) {
let len = (self.back - self.front) as usize; let len = self.back - self.front;
(len, Some(len)) (len, Some(len))
} }
} }
@ -443,35 +436,42 @@ impl DoubleEndedIterator for LogicalLineTokensIter<'_> {
/// A token of a [`LogicalLine`] /// A token of a [`LogicalLine`]
#[derive(Clone)] #[derive(Clone)]
pub(crate) struct LogicalLineToken<'a> { pub(crate) struct LogicalLineToken<'a> {
tokens: &'a Tokens<'a>, tokens: &'a Tokens,
position: u32, position: usize,
} }
impl<'a> LogicalLineToken<'a> { impl<'a> LogicalLineToken<'a> {
/// Returns the token's kind /// Returns the token's kind
pub fn kind(&self) -> &'a Tok { #[inline]
pub fn kind(&self) -> TokenKind {
#[allow(unsafe_code)] #[allow(unsafe_code)]
let (_, token, _) = unsafe { *self.tokens.get_unchecked(self.position as usize) }; unsafe {
*self.tokens.kinds.get_unchecked(self.position)
token }
} }
/// Returns the token's start location /// Returns the token's start location
#[inline]
pub fn start(&self) -> Location { pub fn start(&self) -> Location {
self.range().0 #[allow(unsafe_code)]
unsafe {
*self.tokens.starts.get_unchecked(self.position)
}
} }
/// Returns the token's end location /// Returns the token's end location
#[inline]
pub fn end(&self) -> Location { pub fn end(&self) -> Location {
self.range().1 #[allow(unsafe_code)]
unsafe {
*self.tokens.ends.get_unchecked(self.position)
}
} }
/// Returns a tuple with the token's `(start, end)` locations /// Returns a tuple with the token's `(start, end)` locations
#[inline]
pub fn range(&self) -> (Location, Location) { pub fn range(&self) -> (Location, Location) {
#[allow(unsafe_code)] (self.start(), self.end())
let &(start, _, end) = unsafe { self.tokens.get_unchecked(self.position as usize) };
(start, end)
} }
} }
@ -547,13 +547,13 @@ struct CurrentLine {
/// Builder for [`LogicalLines`] /// Builder for [`LogicalLines`]
#[derive(Debug, Default)] #[derive(Debug, Default)]
struct LogicalLinesBuilder<'a> { struct LogicalLinesBuilder {
tokens: Tokens<'a>, tokens: Tokens,
lines: Vec<Line>, lines: Vec<Line>,
current_line: Option<CurrentLine>, current_line: Option<CurrentLine>,
} }
impl<'a> LogicalLinesBuilder<'a> { impl LogicalLinesBuilder {
fn with_capacity(tokens: usize) -> Self { fn with_capacity(tokens: usize) -> Self {
Self { Self {
tokens: Tokens::with_capacity(tokens), tokens: Tokens::with_capacity(tokens),
@ -563,7 +563,7 @@ impl<'a> LogicalLinesBuilder<'a> {
// SAFETY: `LogicalLines::from_tokens` asserts that the file has less than `u32::MAX` tokens and each tokens is at least one character long // SAFETY: `LogicalLines::from_tokens` asserts that the file has less than `u32::MAX` tokens and each tokens is at least one character long
#[allow(clippy::cast_possible_truncation)] #[allow(clippy::cast_possible_truncation)]
fn push_token(&mut self, start: Location, token: &'a Tok, end: Location) { fn push_token(&mut self, start: Location, kind: TokenKind, end: Location) {
let tokens_start = self.tokens.len(); let tokens_start = self.tokens.len();
let line = self.current_line.get_or_insert_with(|| CurrentLine { let line = self.current_line.get_or_insert_with(|| CurrentLine {
@ -571,27 +571,44 @@ impl<'a> LogicalLinesBuilder<'a> {
tokens_start: tokens_start as u32, tokens_start: tokens_start as u32,
}); });
if matches!(token, Tok::Comment { .. }) { if matches!(kind, TokenKind::Comment) {
line.flags.insert(TokenFlags::COMMENT); line.flags.insert(TokenFlags::COMMENT);
} else if is_op_token(token) { } else if kind.is_operator() {
line.flags.insert(TokenFlags::OPERATOR); line.flags.insert(TokenFlags::OPERATOR);
line.flags.set( line.flags.set(
TokenFlags::BRACKET, TokenFlags::BRACKET,
matches!( matches!(
token, kind,
Tok::Lpar | Tok::Lsqb | Tok::Lbrace | Tok::Rpar | Tok::Rsqb | Tok::Rbrace TokenKind::Lpar
| TokenKind::Lsqb
| TokenKind::Lbrace
| TokenKind::Rpar
| TokenKind::Rsqb
| TokenKind::Rbrace
), ),
); );
} }
if matches!(token, Tok::Comma | Tok::Semi | Tok::Colon) { if matches!(kind, TokenKind::Comma | TokenKind::Semi | TokenKind::Colon) {
line.flags.insert(TokenFlags::PUNCTUATION); line.flags.insert(TokenFlags::PUNCTUATION);
} else if is_keyword_token(token) { } else if kind.is_keyword() {
line.flags.insert(TokenFlags::KEYWORD); line.flags.insert(TokenFlags::KEYWORD);
} }
self.tokens.push(token, start, end); line.flags.set(
TokenFlags::NON_TRIVIA,
!matches!(
kind,
TokenKind::Comment
| TokenKind::Newline
| TokenKind::NonLogicalNewline
| TokenKind::Dedent
| TokenKind::Indent
),
);
self.tokens.push(kind, start, end);
} }
// SAFETY: `LogicalLines::from_tokens` asserts that the file has less than `u32::MAX` tokens and each tokens is at least one character long // SAFETY: `LogicalLines::from_tokens` asserts that the file has less than `u32::MAX` tokens and each tokens is at least one character long
@ -606,7 +623,7 @@ impl<'a> LogicalLinesBuilder<'a> {
} }
} }
fn finish(mut self, locator: &'a Locator<'a>) -> LogicalLines<'a> { fn finish<'a>(mut self, locator: &'a Locator<'a>) -> LogicalLines<'a> {
self.finish_line(); self.finish_line();
LogicalLines { LogicalLines {
@ -625,29 +642,36 @@ struct Line {
} }
#[derive(Debug, Clone, Default)] #[derive(Debug, Clone, Default)]
struct Tokens<'a>(Vec<(Location, &'a Tok, Location)>); struct Tokens {
/// The token kinds
kinds: Vec<TokenKind>,
impl<'a> Tokens<'a> { /// The start locations
starts: Vec<Location>,
/// The end locations
ends: Vec<Location>,
}
impl Tokens {
/// Creates new tokens with a reserved size of `capacity` /// Creates new tokens with a reserved size of `capacity`
fn with_capacity(capacity: usize) -> Self { fn with_capacity(capacity: usize) -> Self {
Self(Vec::with_capacity(capacity)) Self {
kinds: Vec::with_capacity(capacity),
starts: Vec::with_capacity(capacity),
ends: Vec::with_capacity(capacity),
}
} }
/// Returns the number of stored tokens. /// Returns the number of stored tokens.
fn len(&self) -> usize { fn len(&self) -> usize {
self.0.len() self.kinds.len()
} }
/// Adds a new token with the given `kind` and `start`, `end` location. /// Adds a new token with the given `kind` and `start`, `end` location.
fn push(&mut self, kind: &'a Tok, start: Location, end: Location) { fn push(&mut self, kind: TokenKind, start: Location, end: Location) {
self.0.push((start, kind, end)); self.kinds.push(kind);
} self.starts.push(start);
} self.ends.push(end);
impl<'a> Deref for Tokens<'a> {
type Target = [(Location, &'a Tok, Location)];
fn deref(&self) -> &Self::Target {
&self.0
} }
} }

View file

@ -1,10 +1,10 @@
use rustpython_parser::ast::Location; use rustpython_parser::ast::Location;
use rustpython_parser::Tok;
use super::{LogicalLine, Whitespace}; use super::{LogicalLine, Whitespace};
use ruff_diagnostics::DiagnosticKind; use ruff_diagnostics::DiagnosticKind;
use ruff_diagnostics::Violation; use ruff_diagnostics::Violation;
use ruff_macros::{derive_message_formats, violation}; use ruff_macros::{derive_message_formats, violation};
use ruff_python_ast::token_kind::TokenKind;
/// ## What it does /// ## What it does
/// Checks for extraneous tabs before an operator. /// Checks for extraneous tabs before an operator.
@ -131,7 +131,7 @@ pub(crate) fn space_around_operator(line: &LogicalLine) -> Vec<(Location, Diagno
let is_operator = is_operator_token(token.kind()); let is_operator = is_operator_token(token.kind());
if is_operator { if is_operator {
let (start, end) = token.range(); let start = token.start();
if !after_operator { if !after_operator {
match line.leading_whitespace(&token) { match line.leading_whitespace(&token) {
@ -148,8 +148,14 @@ pub(crate) fn space_around_operator(line: &LogicalLine) -> Vec<(Location, Diagno
} }
match line.trailing_whitespace(&token) { match line.trailing_whitespace(&token) {
Whitespace::Tab => diagnostics.push((end, TabAfterOperator.into())), Whitespace::Tab => {
Whitespace::Many => diagnostics.push((end, MultipleSpacesAfterOperator.into())), let end = token.end();
diagnostics.push((end, TabAfterOperator.into()));
}
Whitespace::Many => {
let end = token.end();
diagnostics.push((end, MultipleSpacesAfterOperator.into()));
}
_ => {} _ => {}
} }
} }
@ -160,39 +166,39 @@ pub(crate) fn space_around_operator(line: &LogicalLine) -> Vec<(Location, Diagno
diagnostics diagnostics
} }
const fn is_operator_token(token: &Tok) -> bool { const fn is_operator_token(token: TokenKind) -> bool {
matches!( matches!(
token, token,
Tok::Plus TokenKind::Plus
| Tok::Minus | TokenKind::Minus
| Tok::Star | TokenKind::Star
| Tok::Slash | TokenKind::Slash
| Tok::Vbar | TokenKind::Vbar
| Tok::Amper | TokenKind::Amper
| Tok::Less | TokenKind::Less
| Tok::Greater | TokenKind::Greater
| Tok::Equal | TokenKind::Equal
| Tok::Percent | TokenKind::Percent
| Tok::NotEqual | TokenKind::NotEqual
| Tok::LessEqual | TokenKind::LessEqual
| Tok::GreaterEqual | TokenKind::GreaterEqual
| Tok::CircumFlex | TokenKind::CircumFlex
| Tok::LeftShift | TokenKind::LeftShift
| Tok::RightShift | TokenKind::RightShift
| Tok::DoubleStar | TokenKind::DoubleStar
| Tok::PlusEqual | TokenKind::PlusEqual
| Tok::MinusEqual | TokenKind::MinusEqual
| Tok::StarEqual | TokenKind::StarEqual
| Tok::SlashEqual | TokenKind::SlashEqual
| Tok::PercentEqual | TokenKind::PercentEqual
| Tok::AmperEqual | TokenKind::AmperEqual
| Tok::VbarEqual | TokenKind::VbarEqual
| Tok::CircumflexEqual | TokenKind::CircumflexEqual
| Tok::LeftShiftEqual | TokenKind::LeftShiftEqual
| Tok::RightShiftEqual | TokenKind::RightShiftEqual
| Tok::DoubleStarEqual | TokenKind::DoubleStarEqual
| Tok::DoubleSlash | TokenKind::DoubleSlash
| Tok::DoubleSlashEqual | TokenKind::DoubleSlashEqual
| Tok::ColonEqual | TokenKind::ColonEqual
) )
} }

View file

@ -1,7 +1,6 @@
use rustpython_parser::ast::Location; use rustpython_parser::ast::Location;
use super::{LogicalLine, Whitespace}; use super::{LogicalLine, Whitespace};
use crate::rules::pycodestyle::helpers::is_keyword_token;
use ruff_diagnostics::DiagnosticKind; use ruff_diagnostics::DiagnosticKind;
use ruff_diagnostics::Violation; use ruff_diagnostics::Violation;
use ruff_macros::{derive_message_formats, violation}; use ruff_macros::{derive_message_formats, violation};
@ -115,32 +114,40 @@ pub(crate) fn whitespace_around_keywords(line: &LogicalLine) -> Vec<(Location, D
let mut after_keyword = false; let mut after_keyword = false;
for token in line.tokens() { for token in line.tokens() {
let is_keyword = is_keyword_token(token.kind()); let is_keyword = token.kind().is_keyword();
if is_keyword { if is_keyword {
let (start, end) = token.range();
if !after_keyword { if !after_keyword {
match line.leading_whitespace(&token) { match line.leading_whitespace(&token) {
(Whitespace::Tab, offset) => diagnostics.push(( (Whitespace::Tab, offset) => {
Location::new(start.row(), start.column() - offset), let start = token.start();
TabBeforeKeyword.into(), diagnostics.push((
)), Location::new(start.row(), start.column() - offset),
(Whitespace::Many, offset) => diagnostics.push(( TabBeforeKeyword.into(),
Location::new(start.row(), start.column() - offset), ));
MultipleSpacesBeforeKeyword.into(), }
)), (Whitespace::Many, offset) => {
let start = token.start();
diagnostics.push((
Location::new(start.row(), start.column() - offset),
MultipleSpacesBeforeKeyword.into(),
));
}
_ => {} _ => {}
} }
} }
match line.trailing_whitespace(&token) { match line.trailing_whitespace(&token) {
Whitespace::Tab => diagnostics.push((end, TabAfterKeyword.into())), Whitespace::Tab => {
Whitespace::Many => diagnostics.push((end, MultipleSpacesAfterKeyword.into())), let end = token.end();
diagnostics.push((end, TabAfterKeyword.into()));
}
Whitespace::Many => {
let end = token.end();
diagnostics.push((end, MultipleSpacesAfterKeyword.into()));
}
_ => {} _ => {}
} }
} }
after_keyword = is_keyword; after_keyword = is_keyword;
} }

View file

@ -1,12 +1,10 @@
use rustpython_parser::ast::Location;
use rustpython_parser::Tok;
use ruff_diagnostics::DiagnosticKind; use ruff_diagnostics::DiagnosticKind;
use ruff_diagnostics::Violation; use ruff_diagnostics::Violation;
use ruff_macros::{derive_message_formats, violation}; use ruff_macros::{derive_message_formats, violation};
use ruff_python_ast::token_kind::TokenKind;
use rustpython_parser::ast::Location;
use super::LogicalLineTokens; use super::LogicalLineTokens;
use crate::rules::pycodestyle::helpers::is_op_token;
#[violation] #[violation]
pub struct UnexpectedSpacesAroundKeywordParameterEquals; pub struct UnexpectedSpacesAroundKeywordParameterEquals;
@ -31,8 +29,8 @@ impl Violation for MissingWhitespaceAroundParameterEquals {
fn is_in_def(tokens: &LogicalLineTokens) -> bool { fn is_in_def(tokens: &LogicalLineTokens) -> bool {
for token in tokens { for token in tokens {
match token.kind() { match token.kind() {
Tok::Async | Tok::Indent | Tok::Dedent => continue, TokenKind::Async | TokenKind::Indent | TokenKind::Dedent => continue,
Tok::Def => return true, TokenKind::Def => return true,
_ => return false, _ => return false,
} }
} }
@ -45,76 +43,86 @@ pub(crate) fn whitespace_around_named_parameter_equals(
tokens: &LogicalLineTokens, tokens: &LogicalLineTokens,
) -> Vec<(Location, DiagnosticKind)> { ) -> Vec<(Location, DiagnosticKind)> {
let mut diagnostics = vec![]; let mut diagnostics = vec![];
let mut parens = 0; let mut parens = 0u32;
let mut require_space = false;
let mut no_space = false;
let mut annotated_func_arg = false; let mut annotated_func_arg = false;
let mut prev_end: Option<Location> = None; let mut prev_end: Option<Location> = None;
let in_def = is_in_def(tokens); let in_def = is_in_def(tokens);
let mut iter = tokens.iter().peekable();
for token in tokens { while let Some(token) = iter.next() {
let kind = token.kind(); let kind = token.kind();
if kind == &Tok::NonLogicalNewline { if kind == TokenKind::NonLogicalNewline {
continue; continue;
} }
if no_space {
no_space = false;
if Some(token.start()) != prev_end {
diagnostics.push((
prev_end.unwrap(),
UnexpectedSpacesAroundKeywordParameterEquals.into(),
));
}
}
if require_space {
require_space = false;
let start = token.start();
if Some(start) == prev_end {
diagnostics.push((start, MissingWhitespaceAroundParameterEquals.into()));
}
}
if is_op_token(kind) {
match kind {
Tok::Lpar | Tok::Lsqb => {
parens += 1;
}
Tok::Rpar | Tok::Rsqb => {
parens -= 1;
}
Tok::Colon if parens == 1 && in_def => { match kind {
annotated_func_arg = true; TokenKind::Lpar | TokenKind::Lsqb => {
} parens += 1;
Tok::Comma if parens == 1 => { }
TokenKind::Rpar | TokenKind::Rsqb => {
parens -= 1;
if parens == 0 {
annotated_func_arg = false; annotated_func_arg = false;
} }
Tok::Equal if parens > 0 => { }
if annotated_func_arg && parens == 1 {
require_space = true; TokenKind::Colon if parens == 1 && in_def => {
let start = token.start(); annotated_func_arg = true;
if Some(start) == prev_end { }
diagnostics TokenKind::Comma if parens == 1 => {
.push((start, MissingWhitespaceAroundParameterEquals.into())); annotated_func_arg = false;
}
TokenKind::Equal if parens > 0 => {
if annotated_func_arg && parens == 1 {
let start = token.start();
if Some(start) == prev_end {
diagnostics.push((start, MissingWhitespaceAroundParameterEquals.into()));
}
while let Some(next) = iter.peek() {
if next.kind() == TokenKind::NonLogicalNewline {
iter.next();
} else {
let next_start = next.start();
if next_start == token.end() {
diagnostics.push((
next_start,
MissingWhitespaceAroundParameterEquals.into(),
));
}
break;
} }
} else { }
no_space = true; } else {
if Some(token.start()) != prev_end { if Some(token.start()) != prev_end {
diagnostics.push(( diagnostics.push((
prev_end.unwrap(), prev_end.unwrap(),
UnexpectedSpacesAroundKeywordParameterEquals.into(), UnexpectedSpacesAroundKeywordParameterEquals.into(),
)); ));
}
while let Some(next) = iter.peek() {
if next.kind() == TokenKind::NonLogicalNewline {
iter.next();
} else {
if next.start() != token.end() {
diagnostics.push((
token.end(),
UnexpectedSpacesAroundKeywordParameterEquals.into(),
));
}
break;
} }
} }
} }
_ => {}
}
if parens < 1 {
annotated_func_arg = false;
} }
_ => {}
} }
prev_end = Some(token.end()); prev_end = Some(token.end());
} }
diagnostics diagnostics

View file

@ -1,12 +1,11 @@
use rustpython_parser::ast::Location;
use rustpython_parser::Tok;
use super::LogicalLineTokens; use super::LogicalLineTokens;
use ruff_diagnostics::DiagnosticKind; use ruff_diagnostics::DiagnosticKind;
use ruff_diagnostics::Violation; use ruff_diagnostics::Violation;
use ruff_macros::{derive_message_formats, violation}; use ruff_macros::{derive_message_formats, violation};
use ruff_python_ast::source_code::Locator; use ruff_python_ast::source_code::Locator;
use ruff_python_ast::token_kind::TokenKind;
use ruff_python_ast::types::Range; use ruff_python_ast::types::Range;
use rustpython_parser::ast::Location;
/// ## What it does /// ## What it does
/// Checks if inline comments are separated by at least two spaces. /// Checks if inline comments are separated by at least two spaces.
@ -147,7 +146,7 @@ pub(crate) fn whitespace_before_comment(
for token in tokens { for token in tokens {
let kind = token.kind(); let kind = token.kind();
if let Tok::Comment { .. } = kind { if let TokenKind::Comment = kind {
let (start, end) = token.range(); let (start, end) = token.range();
let line = locator.slice(Range::new( let line = locator.slice(Range::new(
Location::new(start.row(), 0), Location::new(start.row(), 0),
@ -194,7 +193,7 @@ pub(crate) fn whitespace_before_comment(
} }
} }
} }
} else if !matches!(kind, Tok::NonLogicalNewline) { } else if !matches!(kind, TokenKind::NonLogicalNewline) {
prev_end = token.end(); prev_end = token.end();
} }
} }

View file

@ -1,27 +1,36 @@
use rustpython_parser::ast::Location;
use rustpython_parser::Tok;
use ruff_diagnostics::{AlwaysAutofixableViolation, Diagnostic, Edit}; use ruff_diagnostics::{AlwaysAutofixableViolation, Diagnostic, Edit};
use ruff_macros::{derive_message_formats, violation}; use ruff_macros::{derive_message_formats, violation};
use ruff_python_ast::token_kind::TokenKind;
use ruff_python_ast::types::Range; use ruff_python_ast::types::Range;
use rustpython_parser::ast::Location;
use super::LogicalLineTokens; use super::LogicalLineTokens;
#[violation] #[violation]
pub struct WhitespaceBeforeParameters { pub struct WhitespaceBeforeParameters {
pub bracket: String, pub bracket: TokenKind,
}
impl WhitespaceBeforeParameters {
fn bracket_text(&self) -> char {
match self.bracket {
TokenKind::Lpar => '(',
TokenKind::Lsqb => '[',
_ => unreachable!(),
}
}
} }
impl AlwaysAutofixableViolation for WhitespaceBeforeParameters { impl AlwaysAutofixableViolation for WhitespaceBeforeParameters {
#[derive_message_formats] #[derive_message_formats]
fn message(&self) -> String { fn message(&self) -> String {
let WhitespaceBeforeParameters { bracket } = self; let bracket = self.bracket_text();
format!("Whitespace before {bracket}") format!("Whitespace before '{bracket}'")
} }
fn autofix_title(&self) -> String { fn autofix_title(&self) -> String {
let WhitespaceBeforeParameters { bracket } = self; let bracket = self.bracket_text();
format!("Removed whitespace before {bracket}") format!("Removed whitespace before '{bracket}'")
} }
} }
@ -33,28 +42,26 @@ pub(crate) fn whitespace_before_parameters(
let mut diagnostics = vec![]; let mut diagnostics = vec![];
let previous = tokens.first().unwrap(); let previous = tokens.first().unwrap();
let mut pre_pre_kind: Option<&Tok> = None; let mut pre_pre_kind: Option<TokenKind> = None;
let mut prev_token = previous.kind(); let mut prev_token = previous.kind();
let mut prev_end = previous.end(); let mut prev_end = previous.end();
for token in tokens { for token in tokens {
let kind = token.kind(); let kind = token.kind();
if matches!(kind, Tok::Lpar | Tok::Lsqb) if matches!(kind, TokenKind::Lpar | TokenKind::Lsqb)
&& token.start() != prev_end
&& matches!( && matches!(
prev_token, prev_token,
Tok::Name { .. } | Tok::Rpar | Tok::Rsqb | Tok::Rbrace TokenKind::Name | TokenKind::Rpar | TokenKind::Rsqb | TokenKind::Rbrace
) )
&& (pre_pre_kind != Some(&Tok::Class)) && (pre_pre_kind != Some(TokenKind::Class))
&& token.start() != prev_end
{ {
let start = Location::new(prev_end.row(), prev_end.column()); let start = Location::new(prev_end.row(), prev_end.column());
let end = token.end(); let end = token.end();
let end = Location::new(end.row(), end.column() - 1); let end = Location::new(end.row(), end.column() - 1);
let kind: WhitespaceBeforeParameters = WhitespaceBeforeParameters { let kind: WhitespaceBeforeParameters = WhitespaceBeforeParameters { bracket: kind };
bracket: kind.to_string(),
};
let mut diagnostic = Diagnostic::new(kind, Range::new(start, end)); let mut diagnostic = Diagnostic::new(kind, Range::new(start, end));

View file

@ -12,6 +12,7 @@ pub mod relocate;
pub mod scope; pub mod scope;
pub mod source_code; pub mod source_code;
pub mod str; pub mod str;
pub mod token_kind;
pub mod types; pub mod types;
pub mod typing; pub mod typing;
pub mod visibility; pub mod visibility;

View file

@ -0,0 +1,455 @@
use rustpython_parser::Tok;
#[derive(Copy, Clone, PartialEq, Eq, Hash, Debug)]
pub enum TokenKind {
/// Token value for a name, commonly known as an identifier.
Name,
/// Token value for an integer.
Int,
/// Token value for a floating point number.
Float,
/// Token value for a complex number.
Complex,
/// Token value for a string.
String,
/// Token value for a comment. These are filtered out of the token stream prior to parsing.
Comment,
/// Token value for a newline.
Newline,
/// Token value for a newline that is not a logical line break. These are filtered out of
/// the token stream prior to parsing.
NonLogicalNewline,
/// Token value for an indent.
Indent,
/// Token value for a dedent.
Dedent,
EndOfFile,
/// Token value for a left parenthesis `(`.
Lpar,
/// Token value for a right parenthesis `)`.
Rpar,
/// Token value for a left square bracket `[`.
Lsqb,
/// Token value for a right square bracket `]`.
Rsqb,
/// Token value for a colon `:`.
Colon,
/// Token value for a comma `,`.
Comma,
/// Token value for a semicolon `;`.
Semi,
/// Token value for plus `+`.
Plus,
/// Token value for minus `-`.
Minus,
/// Token value for star `*`.
Star,
/// Token value for slash `/`.
Slash,
/// Token value for vertical bar `|`.
Vbar,
/// Token value for ampersand `&`.
Amper,
/// Token value for less than `<`.
Less,
/// Token value for greater than `>`.
Greater,
/// Token value for equal `=`.
Equal,
/// Token value for dot `.`.
Dot,
/// Token value for percent `%`.
Percent,
/// Token value for left bracket `{`.
Lbrace,
/// Token value for right bracket `}`.
Rbrace,
/// Token value for double equal `==`.
EqEqual,
/// Token value for not equal `!=`.
NotEqual,
/// Token value for less than or equal `<=`.
LessEqual,
/// Token value for greater than or equal `>=`.
GreaterEqual,
/// Token value for tilde `~`.
Tilde,
/// Token value for caret `^`.
CircumFlex,
/// Token value for left shift `<<`.
LeftShift,
/// Token value for right shift `>>`.
RightShift,
/// Token value for double star `**`.
DoubleStar,
/// Token value for double star equal `**=`.
DoubleStarEqual,
/// Token value for plus equal `+=`.
PlusEqual,
/// Token value for minus equal `-=`.
MinusEqual,
/// Token value for star equal `*=`.
StarEqual,
/// Token value for slash equal `/=`.
SlashEqual,
/// Token value for percent equal `%=`.
PercentEqual,
/// Token value for ampersand equal `&=`.
AmperEqual,
/// Token value for vertical bar equal `|=`.
VbarEqual,
/// Token value for caret equal `^=`.
CircumflexEqual,
/// Token value for left shift equal `<<=`.
LeftShiftEqual,
/// Token value for right shift equal `>>=`.
RightShiftEqual,
/// Token value for double slash `//`.
DoubleSlash,
/// Token value for double slash equal `//=`.
DoubleSlashEqual,
/// Token value for colon equal `:=`.
ColonEqual,
/// Token value for at `@`.
At,
/// Token value for at equal `@=`.
AtEqual,
/// Token value for arrow `->`.
Rarrow,
/// Token value for ellipsis `...`.
Ellipsis,
// Self documenting.
// Keywords (alphabetically):
False,
None,
True,
And,
As,
Assert,
Async,
Await,
Break,
Class,
Continue,
Def,
Del,
Elif,
Else,
Except,
Finally,
For,
From,
Global,
If,
Import,
In,
Is,
Lambda,
Nonlocal,
Not,
Or,
Pass,
Raise,
Return,
Try,
While,
Match,
Case,
With,
Yield,
// RustPython specific.
StartModule,
StartInteractive,
StartExpression,
}
impl TokenKind {
pub const fn is_whitespace_needed(&self) -> bool {
matches!(
self,
TokenKind::DoubleStarEqual
| TokenKind::StarEqual
| TokenKind::SlashEqual
| TokenKind::DoubleSlashEqual
| TokenKind::PlusEqual
| TokenKind::MinusEqual
| TokenKind::NotEqual
| TokenKind::Less
| TokenKind::Greater
| TokenKind::PercentEqual
| TokenKind::CircumflexEqual
| TokenKind::AmperEqual
| TokenKind::VbarEqual
| TokenKind::EqEqual
| TokenKind::LessEqual
| TokenKind::GreaterEqual
| TokenKind::LeftShiftEqual
| TokenKind::RightShiftEqual
| TokenKind::Equal
| TokenKind::And
| TokenKind::Or
| TokenKind::In
| TokenKind::Is
| TokenKind::Rarrow
)
}
pub const fn is_whitespace_optional(&self) -> bool {
self.is_arithmetic()
|| matches!(
self,
TokenKind::CircumFlex
| TokenKind::Amper
| TokenKind::Vbar
| TokenKind::LeftShift
| TokenKind::RightShift
| TokenKind::Percent
)
}
pub const fn is_unary(&self) -> bool {
matches!(
self,
TokenKind::Plus
| TokenKind::Minus
| TokenKind::Star
| TokenKind::DoubleStar
| TokenKind::RightShift
)
}
pub const fn is_keyword(&self) -> bool {
matches!(
self,
TokenKind::False
| TokenKind::True
| TokenKind::None
| TokenKind::And
| TokenKind::As
| TokenKind::Assert
| TokenKind::Await
| TokenKind::Break
| TokenKind::Class
| TokenKind::Continue
| TokenKind::Def
| TokenKind::Del
| TokenKind::Elif
| TokenKind::Else
| TokenKind::Except
| TokenKind::Finally
| TokenKind::For
| TokenKind::From
| TokenKind::Global
| TokenKind::If
| TokenKind::Import
| TokenKind::In
| TokenKind::Is
| TokenKind::Lambda
| TokenKind::Nonlocal
| TokenKind::Not
| TokenKind::Or
| TokenKind::Pass
| TokenKind::Raise
| TokenKind::Return
| TokenKind::Try
| TokenKind::While
| TokenKind::With
| TokenKind::Yield
)
}
pub const fn is_operator(&self) -> bool {
matches!(
self,
TokenKind::Lpar
| TokenKind::Rpar
| TokenKind::Lsqb
| TokenKind::Rsqb
| TokenKind::Comma
| TokenKind::Semi
| TokenKind::Plus
| TokenKind::Minus
| TokenKind::Star
| TokenKind::Slash
| TokenKind::Vbar
| TokenKind::Amper
| TokenKind::Less
| TokenKind::Greater
| TokenKind::Equal
| TokenKind::Dot
| TokenKind::Percent
| TokenKind::Lbrace
| TokenKind::Rbrace
| TokenKind::NotEqual
| TokenKind::LessEqual
| TokenKind::GreaterEqual
| TokenKind::Tilde
| TokenKind::CircumFlex
| TokenKind::LeftShift
| TokenKind::RightShift
| TokenKind::DoubleStar
| TokenKind::PlusEqual
| TokenKind::MinusEqual
| TokenKind::StarEqual
| TokenKind::SlashEqual
| TokenKind::PercentEqual
| TokenKind::AmperEqual
| TokenKind::VbarEqual
| TokenKind::CircumflexEqual
| TokenKind::LeftShiftEqual
| TokenKind::RightShiftEqual
| TokenKind::DoubleStarEqual
| TokenKind::DoubleSlash
| TokenKind::DoubleSlashEqual
| TokenKind::At
| TokenKind::AtEqual
| TokenKind::Rarrow
| TokenKind::Ellipsis
| TokenKind::ColonEqual
| TokenKind::Colon
)
}
pub const fn is_singleton(&self) -> bool {
matches!(self, TokenKind::False | TokenKind::True | TokenKind::None)
}
pub const fn is_skip_comment(&self) -> bool {
matches!(
self,
TokenKind::Newline
| TokenKind::Indent
| TokenKind::Dedent
| TokenKind::NonLogicalNewline
| TokenKind::Comment
)
}
pub const fn is_arithmetic(&self) -> bool {
matches!(
self,
TokenKind::DoubleStar
| TokenKind::Star
| TokenKind::Plus
| TokenKind::Minus
| TokenKind::Slash
| TokenKind::At
)
}
pub const fn is_soft_keyword(&self) -> bool {
matches!(self, TokenKind::Match | TokenKind::Case)
}
pub const fn from_token(token: &Tok) -> Self {
match token {
Tok::Name { .. } => TokenKind::Name,
Tok::Int { .. } => TokenKind::Int,
Tok::Float { .. } => TokenKind::Float,
Tok::Complex { .. } => TokenKind::Complex,
Tok::String { .. } => TokenKind::String,
Tok::Comment(_) => TokenKind::Comment,
Tok::Newline => TokenKind::Newline,
Tok::NonLogicalNewline => TokenKind::NonLogicalNewline,
Tok::Indent => TokenKind::Indent,
Tok::Dedent => TokenKind::Dedent,
Tok::EndOfFile => TokenKind::EndOfFile,
Tok::Lpar => TokenKind::Lpar,
Tok::Rpar => TokenKind::Rpar,
Tok::Lsqb => TokenKind::Lsqb,
Tok::Rsqb => TokenKind::Rsqb,
Tok::Colon => TokenKind::Colon,
Tok::Comma => TokenKind::Comma,
Tok::Semi => TokenKind::Semi,
Tok::Plus => TokenKind::Plus,
Tok::Minus => TokenKind::Minus,
Tok::Star => TokenKind::Star,
Tok::Slash => TokenKind::Slash,
Tok::Vbar => TokenKind::Vbar,
Tok::Amper => TokenKind::Amper,
Tok::Less => TokenKind::Less,
Tok::Greater => TokenKind::Greater,
Tok::Equal => TokenKind::Equal,
Tok::Dot => TokenKind::Dot,
Tok::Percent => TokenKind::Percent,
Tok::Lbrace => TokenKind::Lbrace,
Tok::Rbrace => TokenKind::Rbrace,
Tok::EqEqual => TokenKind::EqEqual,
Tok::NotEqual => TokenKind::NotEqual,
Tok::LessEqual => TokenKind::LessEqual,
Tok::GreaterEqual => TokenKind::GreaterEqual,
Tok::Tilde => TokenKind::Tilde,
Tok::CircumFlex => TokenKind::CircumFlex,
Tok::LeftShift => TokenKind::LeftShift,
Tok::RightShift => TokenKind::RightShift,
Tok::DoubleStar => TokenKind::DoubleStar,
Tok::DoubleStarEqual => TokenKind::DoubleStarEqual,
Tok::PlusEqual => TokenKind::PlusEqual,
Tok::MinusEqual => TokenKind::MinusEqual,
Tok::StarEqual => TokenKind::StarEqual,
Tok::SlashEqual => TokenKind::SlashEqual,
Tok::PercentEqual => TokenKind::PercentEqual,
Tok::AmperEqual => TokenKind::AmperEqual,
Tok::VbarEqual => TokenKind::VbarEqual,
Tok::CircumflexEqual => TokenKind::CircumflexEqual,
Tok::LeftShiftEqual => TokenKind::LeftShiftEqual,
Tok::RightShiftEqual => TokenKind::RightShiftEqual,
Tok::DoubleSlash => TokenKind::DoubleSlash,
Tok::DoubleSlashEqual => TokenKind::DoubleSlashEqual,
Tok::ColonEqual => TokenKind::ColonEqual,
Tok::At => TokenKind::At,
Tok::AtEqual => TokenKind::AtEqual,
Tok::Rarrow => TokenKind::Rarrow,
Tok::Ellipsis => TokenKind::Ellipsis,
Tok::False => TokenKind::False,
Tok::None => TokenKind::None,
Tok::True => TokenKind::True,
Tok::And => TokenKind::And,
Tok::As => TokenKind::As,
Tok::Assert => TokenKind::Assert,
Tok::Async => TokenKind::Async,
Tok::Await => TokenKind::Await,
Tok::Break => TokenKind::Break,
Tok::Class => TokenKind::Class,
Tok::Continue => TokenKind::Continue,
Tok::Def => TokenKind::Def,
Tok::Del => TokenKind::Del,
Tok::Elif => TokenKind::Elif,
Tok::Else => TokenKind::Else,
Tok::Except => TokenKind::Except,
Tok::Finally => TokenKind::Finally,
Tok::For => TokenKind::For,
Tok::From => TokenKind::From,
Tok::Global => TokenKind::Global,
Tok::If => TokenKind::If,
Tok::Import => TokenKind::Import,
Tok::In => TokenKind::In,
Tok::Is => TokenKind::Is,
Tok::Lambda => TokenKind::Lambda,
Tok::Nonlocal => TokenKind::Nonlocal,
Tok::Not => TokenKind::Not,
Tok::Or => TokenKind::Or,
Tok::Pass => TokenKind::Pass,
Tok::Raise => TokenKind::Raise,
Tok::Return => TokenKind::Return,
Tok::Try => TokenKind::Try,
Tok::While => TokenKind::While,
Tok::Match => TokenKind::Match,
Tok::Case => TokenKind::Case,
Tok::With => TokenKind::With,
Tok::Yield => TokenKind::Yield,
Tok::StartModule => TokenKind::StartModule,
Tok::StartInteractive => TokenKind::StartInteractive,
Tok::StartExpression => TokenKind::StartExpression,
}
}
}
impl From<&Tok> for TokenKind {
fn from(value: &Tok) -> Self {
Self::from_token(value)
}
}