perf(pycodestyle): Refactor checks to iterate over tokens insteadof text (#3736)

This commit is contained in:
Micha Reiser 2023-03-28 10:37:13 +02:00 committed by GitHub
parent 1d724b1495
commit 2fdf98ef4e
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
22 changed files with 1225 additions and 1102 deletions

View file

@ -1,6 +1,3 @@
#![allow(dead_code, unused_imports, unused_variables)]
use itertools::Itertools;
use rustpython_parser::ast::Location;
use rustpython_parser::lexer::LexResult;
@ -9,12 +6,11 @@ use ruff_python_ast::source_code::{Locator, Stylist};
use ruff_python_ast::types::Range;
use crate::registry::{AsRule, Rule};
use crate::rules::pycodestyle::logical_lines::{LogicalLines, TokenFlags};
use crate::rules::pycodestyle::rules::{
use crate::rules::pycodestyle::rules::logical_lines::{
extraneous_whitespace, indentation, missing_whitespace, missing_whitespace_after_keyword,
missing_whitespace_around_operator, space_around_operator, whitespace_around_keywords,
whitespace_around_named_parameter_equals, whitespace_before_comment,
whitespace_before_parameters,
whitespace_before_parameters, LogicalLines, TokenFlags,
};
use crate::settings::{flags, Settings};
@ -57,21 +53,14 @@ pub fn check_logical_lines(
#[cfg(not(feature = "logical_lines"))]
let should_fix_whitespace_before_parameters = false;
let indent_char = stylist.indentation().as_char();
let mut prev_line = None;
let mut prev_indent_level = None;
for line in &LogicalLines::from_tokens(tokens, locator) {
// Extract the indentation level.
let Some(start_loc) = line.first_token_location() else { continue; };
let start_line = locator.slice(Range::new(Location::new(start_loc.row(), 0), *start_loc));
let indent_level = expand_indent(start_line);
let indent_size = 4;
let indent_char = stylist.indentation().as_char();
for line in &LogicalLines::from_tokens(tokens, locator) {
if line.flags().contains(TokenFlags::OPERATOR) {
for (index, kind) in space_around_operator(line.text()) {
for (location, kind) in space_around_operator(&line) {
if settings.rules.enabled(kind.rule()) {
let (token_offset, pos) = line.mapping(index);
let location = Location::new(pos.row(), pos.column() + index - token_offset);
diagnostics.push(Diagnostic {
kind,
location,
@ -86,10 +75,8 @@ pub fn check_logical_lines(
.flags()
.contains(TokenFlags::OPERATOR | TokenFlags::PUNCTUATION)
{
for (index, kind) in extraneous_whitespace(line.text()) {
for (location, kind) in extraneous_whitespace(&line) {
if settings.rules.enabled(kind.rule()) {
let (token_offset, pos) = line.mapping(index);
let location = Location::new(pos.row(), pos.column() + index - token_offset);
diagnostics.push(Diagnostic {
kind,
location,
@ -101,10 +88,8 @@ pub fn check_logical_lines(
}
}
if line.flags().contains(TokenFlags::KEYWORD) {
for (index, kind) in whitespace_around_keywords(line.text()) {
for (location, kind) in whitespace_around_keywords(&line) {
if settings.rules.enabled(kind.rule()) {
let (token_offset, pos) = line.mapping(index);
let location = Location::new(pos.row(), pos.column() + index - token_offset);
diagnostics.push(Diagnostic {
kind,
location,
@ -115,7 +100,7 @@ pub fn check_logical_lines(
}
}
for (location, kind) in missing_whitespace_after_keyword(line.tokens()) {
for (location, kind) in missing_whitespace_after_keyword(&line.tokens()) {
if settings.rules.enabled(kind.rule()) {
diagnostics.push(Diagnostic {
kind,
@ -128,7 +113,7 @@ pub fn check_logical_lines(
}
}
if line.flags().contains(TokenFlags::COMMENT) {
for (range, kind) in whitespace_before_comment(line.tokens(), locator) {
for (range, kind) in whitespace_before_comment(&line.tokens(), locator) {
if settings.rules.enabled(kind.rule()) {
diagnostics.push(Diagnostic {
kind,
@ -141,9 +126,7 @@ pub fn check_logical_lines(
}
}
if line.flags().contains(TokenFlags::OPERATOR) {
for (location, kind) in
whitespace_around_named_parameter_equals(line.tokens(), line.text())
{
for (location, kind) in whitespace_around_named_parameter_equals(&line.tokens()) {
if settings.rules.enabled(kind.rule()) {
diagnostics.push(Diagnostic {
kind,
@ -154,7 +137,7 @@ pub fn check_logical_lines(
});
}
}
for (location, kind) in missing_whitespace_around_operator(line.tokens()) {
for (location, kind) in missing_whitespace_around_operator(&line.tokens()) {
if settings.rules.enabled(kind.rule()) {
diagnostics.push(Diagnostic {
kind,
@ -166,12 +149,7 @@ pub fn check_logical_lines(
}
}
for diagnostic in missing_whitespace(
line.text(),
start_loc.row(),
should_fix_missing_whitespace,
indent_level,
) {
for diagnostic in missing_whitespace(&line, should_fix_missing_whitespace) {
if settings.rules.enabled(diagnostic.kind.rule()) {
diagnostics.push(diagnostic);
}
@ -179,16 +157,23 @@ pub fn check_logical_lines(
}
if line.flags().contains(TokenFlags::BRACKET) {
for diagnostic in
whitespace_before_parameters(line.tokens(), should_fix_whitespace_before_parameters)
{
for diagnostic in whitespace_before_parameters(
&line.tokens(),
should_fix_whitespace_before_parameters,
) {
if settings.rules.enabled(diagnostic.kind.rule()) {
diagnostics.push(diagnostic);
}
}
}
for (index, kind) in indentation(
// Extract the indentation level.
let Some(start_loc) = line.first_token_location() else { continue; };
let start_line = locator.slice(Range::new(Location::new(start_loc.row(), 0), start_loc));
let indent_level = expand_indent(start_line);
let indent_size = 4;
for (location, kind) in indentation(
&line,
prev_line.as_ref(),
indent_char,
@ -196,8 +181,6 @@ pub fn check_logical_lines(
prev_indent_level,
indent_size,
) {
let (token_offset, pos) = line.mapping(index);
let location = Location::new(pos.row(), pos.column() + index - token_offset);
if settings.rules.enabled(kind.rule()) {
diagnostics.push(Diagnostic {
kind,
@ -209,7 +192,7 @@ pub fn check_logical_lines(
}
}
if !line.is_comment() {
if !line.is_comment_only() {
prev_line = Some(line);
prev_indent_level = Some(indent_level);
}
@ -222,7 +205,7 @@ mod tests {
use rustpython_parser::lexer::LexResult;
use rustpython_parser::{lexer, Mode};
use crate::rules::pycodestyle::logical_lines::LogicalLines;
use crate::rules::pycodestyle::rules::logical_lines::LogicalLines;
use ruff_python_ast::source_code::Locator;
#[test]
@ -235,7 +218,7 @@ z = x + 1"#;
let locator = Locator::new(contents);
let actual: Vec<String> = LogicalLines::from_tokens(&lxr, &locator)
.into_iter()
.map(|line| line.text().to_string())
.map(|line| line.text_trimmed().to_string())
.collect();
let expected = vec![
"x = 1".to_string(),
@ -256,10 +239,10 @@ z = x + 1"#;
let locator = Locator::new(contents);
let actual: Vec<String> = LogicalLines::from_tokens(&lxr, &locator)
.into_iter()
.map(|line| line.text().to_string())
.map(|line| line.text_trimmed().to_string())
.collect();
let expected = vec![
"x = [1, 2, 3, ]".to_string(),
"x = [\n 1,\n 2,\n 3,\n]".to_string(),
"y = 2".to_string(),
"z = x + 1".to_string(),
];
@ -270,9 +253,9 @@ z = x + 1"#;
let locator = Locator::new(contents);
let actual: Vec<String> = LogicalLines::from_tokens(&lxr, &locator)
.into_iter()
.map(|line| line.text().to_string())
.map(|line| line.text_trimmed().to_string())
.collect();
let expected = vec!["x = \"xxx\"".to_string()];
let expected = vec!["x = 'abc'".to_string()];
assert_eq!(actual, expected);
let contents = r#"
@ -283,7 +266,7 @@ f()"#;
let locator = Locator::new(contents);
let actual: Vec<String> = LogicalLines::from_tokens(&lxr, &locator)
.into_iter()
.map(|line| line.text().to_string())
.map(|line| line.text_trimmed().to_string())
.collect();
let expected = vec!["def f():", "x = 1", "f()"];
assert_eq!(actual, expected);
@ -298,9 +281,15 @@ f()"#;
let locator = Locator::new(contents);
let actual: Vec<String> = LogicalLines::from_tokens(&lxr, &locator)
.into_iter()
.map(|line| line.text().to_string())
.map(|line| line.text_trimmed().to_string())
.collect();
let expected = vec!["def f():", "\"xxxxxxxxxxxxxxxxxxxx\"", "", "x = 1", "f()"];
let expected = vec![
"def f():",
"\"\"\"Docstring goes here.\"\"\"",
"",
"x = 1",
"f()",
];
assert_eq!(actual, expected);
}
}

View file

@ -1,7 +1,8 @@
pub mod ast;
pub mod filesystem;
pub mod imports;
pub mod logical_lines;
#[cfg(feature = "logical_lines")]
pub(crate) mod logical_lines;
pub mod noqa;
pub mod physical_lines;
pub mod tokens;

View file

@ -16,7 +16,6 @@ use crate::autofix::fix_file;
use crate::checkers::ast::check_ast;
use crate::checkers::filesystem::check_file_path;
use crate::checkers::imports::check_imports;
use crate::checkers::logical_lines::check_logical_lines;
use crate::checkers::noqa::check_noqa;
use crate::checkers::physical_lines::check_physical_lines;
use crate::checkers::tokens::check_tokens;
@ -105,7 +104,8 @@ pub fn check_path(
.iter_enabled()
.any(|rule_code| rule_code.lint_source().is_logical_lines())
{
diagnostics.extend(check_logical_lines(
#[cfg(feature = "logical_lines")]
diagnostics.extend(crate::checkers::logical_lines::check_logical_lines(
&tokens,
locator,
stylist,

View file

@ -15,67 +15,67 @@ ruff_macros::register_rules!(
// pycodestyle errors
rules::pycodestyle::rules::MixedSpacesAndTabs,
#[cfg(feature = "logical_lines")]
rules::pycodestyle::rules::IndentationWithInvalidMultiple,
rules::pycodestyle::rules::logical_lines::IndentationWithInvalidMultiple,
#[cfg(feature = "logical_lines")]
rules::pycodestyle::rules::NoIndentedBlock,
rules::pycodestyle::rules::logical_lines::NoIndentedBlock,
#[cfg(feature = "logical_lines")]
rules::pycodestyle::rules::UnexpectedIndentation,
rules::pycodestyle::rules::logical_lines::UnexpectedIndentation,
#[cfg(feature = "logical_lines")]
rules::pycodestyle::rules::IndentationWithInvalidMultipleComment,
rules::pycodestyle::rules::logical_lines::IndentationWithInvalidMultipleComment,
#[cfg(feature = "logical_lines")]
rules::pycodestyle::rules::NoIndentedBlockComment,
rules::pycodestyle::rules::logical_lines::NoIndentedBlockComment,
#[cfg(feature = "logical_lines")]
rules::pycodestyle::rules::UnexpectedIndentationComment,
rules::pycodestyle::rules::logical_lines::UnexpectedIndentationComment,
#[cfg(feature = "logical_lines")]
rules::pycodestyle::rules::OverIndented,
rules::pycodestyle::rules::logical_lines::OverIndented,
#[cfg(feature = "logical_lines")]
rules::pycodestyle::rules::WhitespaceAfterOpenBracket,
rules::pycodestyle::rules::logical_lines::WhitespaceAfterOpenBracket,
#[cfg(feature = "logical_lines")]
rules::pycodestyle::rules::WhitespaceBeforeCloseBracket,
rules::pycodestyle::rules::logical_lines::WhitespaceBeforeCloseBracket,
#[cfg(feature = "logical_lines")]
rules::pycodestyle::rules::WhitespaceBeforePunctuation,
rules::pycodestyle::rules::logical_lines::WhitespaceBeforePunctuation,
#[cfg(feature = "logical_lines")]
rules::pycodestyle::rules::MultipleSpacesBeforeOperator,
rules::pycodestyle::rules::logical_lines::MultipleSpacesBeforeOperator,
#[cfg(feature = "logical_lines")]
rules::pycodestyle::rules::MultipleSpacesAfterOperator,
rules::pycodestyle::rules::logical_lines::MultipleSpacesAfterOperator,
#[cfg(feature = "logical_lines")]
rules::pycodestyle::rules::TabBeforeOperator,
rules::pycodestyle::rules::logical_lines::TabBeforeOperator,
#[cfg(feature = "logical_lines")]
rules::pycodestyle::rules::TabAfterOperator,
rules::pycodestyle::rules::logical_lines::TabAfterOperator,
#[cfg(feature = "logical_lines")]
rules::pycodestyle::rules::TooFewSpacesBeforeInlineComment,
rules::pycodestyle::rules::logical_lines::TooFewSpacesBeforeInlineComment,
#[cfg(feature = "logical_lines")]
rules::pycodestyle::rules::NoSpaceAfterInlineComment,
rules::pycodestyle::rules::logical_lines::NoSpaceAfterInlineComment,
#[cfg(feature = "logical_lines")]
rules::pycodestyle::rules::NoSpaceAfterBlockComment,
rules::pycodestyle::rules::logical_lines::NoSpaceAfterBlockComment,
#[cfg(feature = "logical_lines")]
rules::pycodestyle::rules::MultipleLeadingHashesForBlockComment,
rules::pycodestyle::rules::logical_lines::MultipleLeadingHashesForBlockComment,
#[cfg(feature = "logical_lines")]
rules::pycodestyle::rules::MultipleSpacesAfterKeyword,
rules::pycodestyle::rules::logical_lines::MultipleSpacesAfterKeyword,
#[cfg(feature = "logical_lines")]
rules::pycodestyle::rules::MissingWhitespace,
rules::pycodestyle::rules::logical_lines::MissingWhitespace,
#[cfg(feature = "logical_lines")]
rules::pycodestyle::rules::MissingWhitespaceAfterKeyword,
rules::pycodestyle::rules::logical_lines::MissingWhitespaceAfterKeyword,
#[cfg(feature = "logical_lines")]
rules::pycodestyle::rules::MultipleSpacesBeforeKeyword,
rules::pycodestyle::rules::logical_lines::MultipleSpacesBeforeKeyword,
#[cfg(feature = "logical_lines")]
rules::pycodestyle::rules::MissingWhitespaceAroundOperator,
rules::pycodestyle::rules::logical_lines::MissingWhitespaceAroundOperator,
#[cfg(feature = "logical_lines")]
rules::pycodestyle::rules::MissingWhitespaceAroundArithmeticOperator,
rules::pycodestyle::rules::logical_lines::MissingWhitespaceAroundArithmeticOperator,
#[cfg(feature = "logical_lines")]
rules::pycodestyle::rules::MissingWhitespaceAroundBitwiseOrShiftOperator,
rules::pycodestyle::rules::logical_lines::MissingWhitespaceAroundBitwiseOrShiftOperator,
#[cfg(feature = "logical_lines")]
rules::pycodestyle::rules::MissingWhitespaceAroundModuloOperator,
rules::pycodestyle::rules::logical_lines::MissingWhitespaceAroundModuloOperator,
#[cfg(feature = "logical_lines")]
rules::pycodestyle::rules::TabAfterKeyword,
rules::pycodestyle::rules::logical_lines::TabAfterKeyword,
#[cfg(feature = "logical_lines")]
rules::pycodestyle::rules::UnexpectedSpacesAroundKeywordParameterEquals,
rules::pycodestyle::rules::logical_lines::UnexpectedSpacesAroundKeywordParameterEquals,
#[cfg(feature = "logical_lines")]
rules::pycodestyle::rules::MissingWhitespaceAroundParameterEquals,
rules::pycodestyle::rules::logical_lines::MissingWhitespaceAroundParameterEquals,
#[cfg(feature = "logical_lines")]
rules::pycodestyle::rules::WhitespaceBeforeParameters,
rules::pycodestyle::rules::logical_lines::WhitespaceBeforeParameters,
#[cfg(feature = "logical_lines")]
rules::pycodestyle::rules::TabBeforeKeyword,
rules::pycodestyle::rules::logical_lines::TabBeforeKeyword,
rules::pycodestyle::rules::MultipleImportsOnOneLine,
rules::pycodestyle::rules::ModuleImportNotAtTopOfFile,
rules::pycodestyle::rules::LineTooLong,

View file

@ -1,4 +1,5 @@
use rustpython_parser::ast::{Cmpop, Expr, ExprKind};
#[cfg(feature = "logical_lines")]
use rustpython_parser::Tok;
use unicode_width::UnicodeWidthStr;
@ -58,6 +59,7 @@ pub fn is_overlong(
true
}
#[cfg(feature = "logical_lines")]
pub const fn is_keyword_token(token: &Tok) -> bool {
matches!(
token,
@ -98,6 +100,7 @@ pub const fn is_keyword_token(token: &Tok) -> bool {
)
}
#[cfg(feature = "logical_lines")]
pub const fn is_singleton_token(token: &Tok) -> bool {
matches!(
token,
@ -105,6 +108,7 @@ pub const fn is_singleton_token(token: &Tok) -> bool {
)
}
#[cfg(feature = "logical_lines")]
pub const fn is_op_token(token: &Tok) -> bool {
matches!(
token,
@ -157,6 +161,7 @@ pub const fn is_op_token(token: &Tok) -> bool {
)
}
#[cfg(feature = "logical_lines")]
pub const fn is_skip_comment_token(token: &Tok) -> bool {
matches!(
token,
@ -164,10 +169,12 @@ pub const fn is_skip_comment_token(token: &Tok) -> bool {
)
}
#[cfg(feature = "logical_lines")]
pub const fn is_soft_keyword_token(token: &Tok) -> bool {
matches!(token, Tok::Match | Tok::Case)
}
#[cfg(feature = "logical_lines")]
pub const fn is_arithmetic_token(token: &Tok) -> bool {
matches!(
token,
@ -175,6 +182,7 @@ pub const fn is_arithmetic_token(token: &Tok) -> bool {
)
}
#[cfg(feature = "logical_lines")]
pub const fn is_ws_optional_token(token: &Tok) -> bool {
is_arithmetic_token(token)
|| matches!(
@ -188,6 +196,7 @@ pub const fn is_ws_optional_token(token: &Tok) -> bool {
)
}
#[cfg(feature = "logical_lines")]
pub const fn is_ws_needed_token(token: &Tok) -> bool {
matches!(
token,
@ -218,6 +227,7 @@ pub const fn is_ws_needed_token(token: &Tok) -> bool {
)
}
#[cfg(feature = "logical_lines")]
pub const fn is_unary_token(token: &Tok) -> bool {
matches!(
token,

View file

@ -1,416 +0,0 @@
use bitflags::bitflags;
use rustpython_parser::ast::Location;
use rustpython_parser::lexer::LexResult;
use rustpython_parser::Tok;
use std::borrow::Cow;
use std::fmt::{Debug, Formatter};
use std::iter::FusedIterator;
use unicode_width::UnicodeWidthStr;
use ruff_python_ast::source_code::Locator;
use ruff_python_ast::types::Range;
use crate::rules::pycodestyle::helpers::{is_keyword_token, is_op_token};
bitflags! {
#[derive(Default)]
pub struct TokenFlags: u8 {
/// Whether the logical line contains an operator.
const OPERATOR = 0b0000_0001;
/// Whether the logical line contains a bracket.
const BRACKET = 0b0000_0010;
/// Whether the logical line contains a punctuation mark.
const PUNCTUATION = 0b0000_0100;
/// Whether the logical line contains a keyword.
const KEYWORD = 0b0000_1000;
/// Whether the logical line contains a comment.
const COMMENT = 0b0001_0000;
}
}
#[derive(Clone)]
pub struct LogicalLines<'a> {
text: String,
/// start position, token, end position
tokens: Vec<(Location, &'a Tok, Location)>,
mappings: Mappings,
lines: Vec<Line>,
}
impl<'a> LogicalLines<'a> {
pub fn from_tokens(tokens: &'a [LexResult], locator: &Locator) -> Self {
assert!(u32::try_from(tokens.len()).is_ok());
let single_token = tokens.len() == 1;
let mut builder =
LogicalLinesBuilder::with_capacity(tokens.len(), locator.contents().len());
let mut parens: u32 = 0;
for (start, token, end) in tokens.iter().flatten() {
builder.push_token(*start, token, *end, locator);
match token {
Tok::Lbrace | Tok::Lpar | Tok::Lsqb => {
parens += 1;
}
Tok::Rbrace | Tok::Rpar | Tok::Rsqb => {
parens -= 1;
}
Tok::Newline | Tok::NonLogicalNewline | Tok::Comment(_) if parens == 0 => {
if matches!(token, Tok::Newline) {
builder.finish_line();
}
// Comment only file or non logical new line?
else if single_token {
builder.discard_line();
} else {
builder.finish_line();
};
}
_ => {}
}
}
builder.finish()
}
}
impl std::fmt::Debug for LogicalLines<'_> {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
f.debug_list()
.entries(self.into_iter().map(DebugLogicalLine))
.finish()
}
}
impl<'a> IntoIterator for &'a LogicalLines<'a> {
type Item = LogicalLine<'a>;
type IntoIter = LogicalLinesIter<'a>;
fn into_iter(self) -> Self::IntoIter {
LogicalLinesIter {
lines: self,
inner: self.lines.iter(),
}
}
}
#[derive(Debug, Clone)]
struct Line {
flags: TokenFlags,
/// Byte offset of the start of the text of this line.
text_start: u32,
/// Byte offset of the end of the text of this line.
text_end: u32,
mappings_start: u32,
mappings_end: u32,
tokens_start: u32,
tokens_end: u32,
}
#[derive(Debug)]
pub struct LogicalLine<'a> {
lines: &'a LogicalLines<'a>,
line: &'a Line,
}
impl<'a> LogicalLine<'a> {
/// Returns true if this is a comment only line
pub fn is_comment(&self) -> bool {
self.text().is_empty() && self.flags().contains(TokenFlags::COMMENT)
}
/// Returns the text of this line
pub fn text(&self) -> &'a str {
&self.lines.text[self.line.text_start as usize..self.line.text_end as usize]
}
/// Returns the tokens of the line
pub fn tokens(&self) -> &'a [(Location, &'a Tok, Location)] {
&self.lines.tokens[self.line.tokens_start as usize..self.line.tokens_end as usize]
}
/// Returns the [`Location`] of the first token on the line or [`None`].
pub fn first_token_location(&self) -> Option<&Location> {
self.token_locations().first()
}
fn token_offsets(&self) -> &[u32] {
&self.lines.mappings.logical_line_offsets
[self.line.mappings_start as usize..self.line.mappings_end as usize]
}
fn token_locations(&self) -> &[Location] {
&self.lines.mappings.locations
[self.line.mappings_start as usize..self.line.mappings_end as usize]
}
/// Returns the mapping for an offset in the logical line.
///
/// The offset of the closest token and its corresponding location.
pub fn mapping(&self, offset: usize) -> (usize, Location) {
let index = self
.token_offsets()
.binary_search(&(self.line.text_start + u32::try_from(offset).unwrap()))
.unwrap_or_default();
(
(self.token_offsets()[index] - self.line.text_start) as usize,
self.token_locations()[index],
)
}
pub fn is_empty(&self) -> bool {
self.lines.mappings.is_empty()
}
pub const fn flags(&self) -> TokenFlags {
self.line.flags
}
}
struct DebugLogicalLine<'a>(LogicalLine<'a>);
impl Debug for DebugLogicalLine<'_> {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
f.debug_struct("LogicalLine")
.field("text", &self.0.text())
.field("flags", &self.0.flags())
.field("tokens", &self.0.tokens())
.finish()
}
}
/// Iterator over the logical lines of a document.
pub struct LogicalLinesIter<'a> {
lines: &'a LogicalLines<'a>,
inner: std::slice::Iter<'a, Line>,
}
impl<'a> Iterator for LogicalLinesIter<'a> {
type Item = LogicalLine<'a>;
fn next(&mut self) -> Option<Self::Item> {
let line = self.inner.next()?;
Some(LogicalLine {
lines: self.lines,
line,
})
}
fn size_hint(&self) -> (usize, Option<usize>) {
self.inner.size_hint()
}
}
impl DoubleEndedIterator for LogicalLinesIter<'_> {
fn next_back(&mut self) -> Option<Self::Item> {
let line = self.inner.next_back()?;
Some(LogicalLine {
lines: self.lines,
line,
})
}
}
impl ExactSizeIterator for LogicalLinesIter<'_> {}
impl FusedIterator for LogicalLinesIter<'_> {}
/// Source map that maps byte positions in the logical line text to the [`Location`] in the
/// original document.
#[derive(Debug, Default, Clone)]
struct Mappings {
/// byte offsets of the logical lines at which tokens start/end.
logical_line_offsets: Vec<u32>,
/// Corresponding [`Location`]s for each byte offset mapping it to the position in the original document.
locations: Vec<Location>,
}
impl Mappings {
fn with_capacity(capacity: usize) -> Self {
Self {
logical_line_offsets: Vec::with_capacity(capacity),
locations: Vec::with_capacity(capacity),
}
}
fn len(&self) -> usize {
self.logical_line_offsets.len()
}
fn is_empty(&self) -> bool {
self.logical_line_offsets.is_empty()
}
fn truncate(&mut self, len: usize) {
self.locations.truncate(len);
self.logical_line_offsets.truncate(len);
}
#[allow(clippy::cast_possible_truncation)]
fn push(&mut self, offset: usize, location: Location) {
self.logical_line_offsets.push(offset as u32);
self.locations.push(location);
}
}
#[derive(Debug, Default)]
struct CurrentLine {
flags: TokenFlags,
text_start: u32,
mappings_start: u32,
tokens_start: u32,
previous_token: Option<Location>,
}
#[derive(Debug, Default)]
pub struct LogicalLinesBuilder<'a> {
text: String,
tokens: Vec<(Location, &'a Tok, Location)>,
mappings: Mappings,
lines: Vec<Line>,
current_line: Option<CurrentLine>,
}
impl<'a> LogicalLinesBuilder<'a> {
fn with_capacity(tokens: usize, string: usize) -> Self {
Self {
tokens: Vec::with_capacity(tokens),
mappings: Mappings::with_capacity(tokens + 1),
text: String::with_capacity(string),
..Self::default()
}
}
// SAFETY: `LogicalLines::from_tokens` asserts that the file has less than `u32::MAX` tokens and each tokens is at least one character long
#[allow(clippy::cast_possible_truncation)]
fn push_token(&mut self, start: Location, token: &'a Tok, end: Location, locator: &Locator) {
let tokens_start = self.tokens.len();
self.tokens.push((start, token, end));
let mut line = self.current_line.get_or_insert_with(|| {
let mappings_start = self.mappings.len();
self.mappings.push(self.text.len(), start);
CurrentLine {
flags: TokenFlags::empty(),
text_start: self.text.len() as u32,
mappings_start: mappings_start as u32,
tokens_start: tokens_start as u32,
previous_token: None,
}
});
if matches!(
token,
Tok::Newline | Tok::NonLogicalNewline | Tok::Indent | Tok::Dedent
) {
return;
}
if matches!(token, Tok::Comment(..)) {
line.flags.insert(TokenFlags::COMMENT);
return;
}
if is_op_token(token) {
line.flags.insert(TokenFlags::OPERATOR);
}
if matches!(
token,
Tok::Lpar | Tok::Lsqb | Tok::Lbrace | Tok::Rpar | Tok::Rsqb | Tok::Rbrace
) {
line.flags.insert(TokenFlags::BRACKET);
}
if matches!(token, Tok::Comma | Tok::Semi | Tok::Colon) {
line.flags.insert(TokenFlags::PUNCTUATION);
}
if is_keyword_token(token) {
line.flags.insert(TokenFlags::KEYWORD);
}
// TODO(charlie): "Mute" strings.
let text = if let Tok::String { value, .. } = token {
// Replace the content of strings with a non-whs sequence because some lints
// search for whitespace in the document and whitespace inside of the string
// would complicate the search.
Cow::Owned(format!("\"{}\"", "x".repeat(value.width())))
} else {
Cow::Borrowed(locator.slice(Range {
location: start,
end_location: end,
}))
};
if let Some(prev) = line.previous_token.take() {
if prev.row() != start.row() {
let prev_text = locator.slice(Range {
location: Location::new(prev.row(), prev.column() - 1),
end_location: Location::new(prev.row(), prev.column()),
});
if prev_text == ","
|| ((prev_text != "{" && prev_text != "[" && prev_text != "(")
&& (text != "}" && text != "]" && text != ")"))
{
self.text.push(' ');
}
} else if prev.column() != start.column() {
let prev_text = locator.slice(Range {
location: prev,
end_location: start,
});
self.text.push_str(prev_text);
}
}
line.previous_token = Some(end);
self.text.push_str(&text);
self.mappings.push(self.text.len(), end);
}
// SAFETY: `LogicalLines::from_tokens` asserts that the file has less than `u32::MAX` tokens and each tokens is at least one character long
#[allow(clippy::cast_possible_truncation)]
fn finish_line(&mut self) {
if let Some(current) = self.current_line.take() {
self.lines.push(Line {
flags: current.flags,
text_start: current.text_start,
text_end: self.text.len() as u32,
mappings_start: current.mappings_start,
mappings_end: self.mappings.len() as u32,
tokens_start: current.tokens_start,
tokens_end: self.tokens.len() as u32,
});
}
}
fn discard_line(&mut self) {
if let Some(current) = self.current_line.take() {
self.text.truncate(current.text_start as usize);
self.tokens.truncate(current.tokens_start as usize);
self.mappings.truncate(current.mappings_start as usize);
}
}
fn finish(mut self) -> LogicalLines<'a> {
self.finish_line();
LogicalLines {
text: self.text,
tokens: self.tokens,
mappings: self.mappings,
lines: self.lines,
}
}
}

View file

@ -3,7 +3,6 @@ pub(crate) mod rules;
pub mod settings;
pub(crate) mod helpers;
pub(crate) mod logical_lines;
#[cfg(test)]
mod tests {

View file

@ -1,8 +1,7 @@
#![allow(dead_code, unused_imports, unused_variables)]
use once_cell::sync::Lazy;
use regex::Regex;
use rustpython_parser::ast::Location;
use rustpython_parser::Tok;
use super::{LogicalLine, Whitespace};
use ruff_diagnostics::DiagnosticKind;
use ruff_diagnostics::Violation;
use ruff_macros::{derive_message_formats, violation};
@ -101,32 +100,48 @@ impl Violation for WhitespaceBeforePunctuation {
}
}
// TODO(charlie): Pycodestyle has a negative lookahead on the end.
static EXTRANEOUS_WHITESPACE_REGEX: Lazy<Regex> =
Lazy::new(|| Regex::new(r"[\[({][ \t]|[ \t][]}),;:]").unwrap());
/// E201, E202, E203
#[cfg(feature = "logical_lines")]
pub fn extraneous_whitespace(line: &str) -> Vec<(usize, DiagnosticKind)> {
pub(crate) fn extraneous_whitespace(line: &LogicalLine) -> Vec<(Location, DiagnosticKind)> {
let mut diagnostics = vec![];
for line_match in EXTRANEOUS_WHITESPACE_REGEX.find_iter(line) {
let text = &line[line_match.range()];
let char = text.trim();
let found = line_match.start();
if text.chars().last().unwrap().is_ascii_whitespace() {
diagnostics.push((found + 1, WhitespaceAfterOpenBracket.into()));
} else if line.chars().nth(found - 1).map_or(false, |c| c != ',') {
if char == "}" || char == "]" || char == ")" {
diagnostics.push((found, WhitespaceBeforeCloseBracket.into()));
} else {
diagnostics.push((found, WhitespaceBeforePunctuation.into()));
let mut last_token: Option<&Tok> = None;
for token in line.tokens() {
let kind = token.kind();
match kind {
Tok::Lbrace | Tok::Lpar | Tok::Lsqb => {
if !matches!(line.trailing_whitespace(&token), Whitespace::None) {
let end = token.end();
diagnostics.push((
Location::new(end.row(), end.column()),
WhitespaceAfterOpenBracket.into(),
));
}
}
Tok::Rbrace | Tok::Rpar | Tok::Rsqb | Tok::Comma | Tok::Semi | Tok::Colon => {
let diagnostic_kind = if matches!(kind, Tok::Comma | Tok::Semi | Tok::Colon) {
DiagnosticKind::from(WhitespaceBeforePunctuation)
} else {
DiagnosticKind::from(WhitespaceBeforeCloseBracket)
};
if let (Whitespace::Single | Whitespace::Many | Whitespace::Tab, offset) =
line.leading_whitespace(&token)
{
let start = token.start();
if !matches!(last_token, Some(Tok::Comma)) {
diagnostics.push((
Location::new(start.row(), start.column() - offset),
diagnostic_kind,
));
}
}
}
_ => {}
}
last_token = Some(kind);
}
diagnostics
}
#[cfg(not(feature = "logical_lines"))]
pub fn extraneous_whitespace(_line: &str) -> Vec<(usize, DiagnosticKind)> {
vec![]
}

View file

@ -1,10 +1,10 @@
#![allow(dead_code, unused_imports, unused_variables)]
use ruff_diagnostics::DiagnosticKind;
use ruff_diagnostics::Violation;
use ruff_macros::{derive_message_formats, violation};
use rustpython_parser::ast::Location;
use rustpython_parser::Tok;
use crate::rules::pycodestyle::logical_lines::LogicalLine;
use super::LogicalLine;
/// ## What it does
/// Checks for indentation with a non-multiple of 4 spaces.
@ -230,33 +230,36 @@ impl Violation for OverIndented {
}
/// E111, E114, E112, E113, E115, E116, E117
#[cfg(feature = "logical_lines")]
pub fn indentation(
pub(crate) fn indentation(
logical_line: &LogicalLine,
prev_logical_line: Option<&LogicalLine>,
indent_char: char,
indent_level: usize,
prev_indent_level: Option<usize>,
indent_size: usize,
) -> Vec<(usize, DiagnosticKind)> {
) -> Vec<(Location, DiagnosticKind)> {
let mut diagnostics = vec![];
let location = logical_line.first_token_location().unwrap();
if indent_level % indent_size != 0 {
diagnostics.push((
0,
if logical_line.is_comment() {
location,
if logical_line.is_comment_only() {
IndentationWithInvalidMultipleComment { indent_size }.into()
} else {
IndentationWithInvalidMultiple { indent_size }.into()
},
));
}
let indent_expect = prev_logical_line.map_or(false, |prev_logical_line| {
prev_logical_line.text().ends_with(':')
});
let indent_expect = prev_logical_line
.and_then(|prev_logical_line| prev_logical_line.tokens().trimmed().last())
.map_or(false, |t| t.kind() == &Tok::Colon);
if indent_expect && indent_level <= prev_indent_level.unwrap_or(0) {
diagnostics.push((
0,
if logical_line.is_comment() {
location,
if logical_line.is_comment_only() {
NoIndentedBlockComment.into()
} else {
NoIndentedBlock.into()
@ -266,8 +269,8 @@ pub fn indentation(
&& prev_indent_level.map_or(false, |prev_indent_level| indent_level > prev_indent_level)
{
diagnostics.push((
0,
if logical_line.is_comment() {
location,
if logical_line.is_comment_only() {
UnexpectedIndentationComment.into()
} else {
UnexpectedIndentation.into()
@ -278,20 +281,9 @@ pub fn indentation(
let expected_indent_amount = if indent_char == '\t' { 8 } else { 4 };
let expected_indent_level = prev_indent_level.unwrap_or(0) + expected_indent_amount;
if indent_level > expected_indent_level {
diagnostics.push((0, OverIndented.into()));
diagnostics.push((location, OverIndented.into()));
}
}
diagnostics
}
#[cfg(not(feature = "logical_lines"))]
pub fn indentation(
_logical_line: &LogicalLine,
_prev_logical_line: Option<&LogicalLine>,
_indent_char: char,
_indent_level: usize,
_prev_indent_level: Option<usize>,
_indent_size: usize,
) -> Vec<(usize, DiagnosticKind)> {
vec![]
}

View file

@ -0,0 +1,85 @@
use itertools::Itertools;
use rustpython_parser::Tok;
use super::LogicalLine;
use ruff_diagnostics::Edit;
use ruff_diagnostics::{AlwaysAutofixableViolation, Diagnostic};
use ruff_macros::{derive_message_formats, violation};
use ruff_python_ast::types::Range;
#[violation]
pub struct MissingWhitespace {
pub token: String,
}
impl AlwaysAutofixableViolation for MissingWhitespace {
#[derive_message_formats]
fn message(&self) -> String {
let MissingWhitespace { token } = self;
format!("Missing whitespace after {token}")
}
fn autofix_title(&self) -> String {
let MissingWhitespace { token } = self;
format!("Added missing whitespace after {token}")
}
}
/// E231
pub(crate) fn missing_whitespace(line: &LogicalLine, autofix: bool) -> Vec<Diagnostic> {
let mut diagnostics = vec![];
let mut num_lsqb = 0u32;
let mut num_rsqb = 0u32;
let mut prev_lsqb = None;
let mut prev_lbrace = None;
for (token, next_token) in line.tokens().iter().tuple_windows() {
let kind = token.kind();
match kind {
Tok::Lsqb => {
num_lsqb += 1;
prev_lsqb = Some(token.start());
}
Tok::Rsqb => {
num_rsqb += 1;
}
Tok::Lbrace => {
prev_lbrace = Some(token.start());
}
Tok::Comma | Tok::Semi | Tok::Colon => {
let after = line.text_after(&token);
if !after.chars().next().map_or(false, char::is_whitespace) {
match (kind, next_token.kind()) {
(Tok::Colon, _) if num_lsqb > num_rsqb && prev_lsqb > prev_lbrace => {
continue; // Slice syntax, no space required
}
(Tok::Comma, Tok::Rpar | Tok::Rsqb) => {
continue; // Allow tuple with only one element: (3,)
}
(Tok::Colon, Tok::Equal) => {
continue; // Allow assignment expression
}
_ => {}
}
let kind = MissingWhitespace {
token: kind.to_string(),
};
let (start, end) = token.range();
let mut diagnostic = Diagnostic::new(kind, Range::new(start, start));
if autofix {
diagnostic.set_fix(Edit::insertion(" ".to_string(), end));
}
diagnostics.push(diagnostic);
}
}
_ => {}
}
}
diagnostics
}

View file

@ -0,0 +1,44 @@
use itertools::Itertools;
use rustpython_parser::ast::Location;
use rustpython_parser::Tok;
use ruff_diagnostics::DiagnosticKind;
use ruff_diagnostics::Violation;
use ruff_macros::{derive_message_formats, violation};
use super::LogicalLineTokens;
use crate::rules::pycodestyle::helpers::{is_keyword_token, is_singleton_token};
#[violation]
pub struct MissingWhitespaceAfterKeyword;
impl Violation for MissingWhitespaceAfterKeyword {
#[derive_message_formats]
fn message(&self) -> String {
format!("Missing whitespace after keyword")
}
}
/// E275
pub(crate) fn missing_whitespace_after_keyword(
tokens: &LogicalLineTokens,
) -> Vec<(Location, DiagnosticKind)> {
let mut diagnostics = vec![];
for (tok0, tok1) in tokens.iter().tuple_windows() {
let tok0_kind = tok0.kind();
let tok1_kind = tok1.kind();
if is_keyword_token(tok0_kind)
&& !(is_singleton_token(tok0_kind)
|| matches!(tok0_kind, Tok::Async | Tok::Await)
|| tok0_kind == &Tok::Except && tok1_kind == &Tok::Star
|| tok0_kind == &Tok::Yield && tok1_kind == &Tok::Rpar
|| matches!(tok1_kind, Tok::Colon | Tok::Newline))
&& tok0.end() == tok1.start()
{
diagnostics.push((tok0.end(), MissingWhitespaceAfterKeyword.into()));
}
}
diagnostics
}

View file

@ -1,5 +1,3 @@
#![allow(dead_code, unused_imports, unused_variables)]
use rustpython_parser::ast::Location;
use rustpython_parser::Tok;
@ -8,9 +6,10 @@ use ruff_diagnostics::Violation;
use ruff_macros::{derive_message_formats, violation};
use crate::rules::pycodestyle::helpers::{
is_arithmetic_token, is_keyword_token, is_op_token, is_singleton_token, is_skip_comment_token,
is_arithmetic_token, is_keyword_token, is_op_token, is_skip_comment_token,
is_soft_keyword_token, is_unary_token, is_ws_needed_token, is_ws_optional_token,
};
use crate::rules::pycodestyle::rules::logical_lines::LogicalLineTokens;
// E225
#[violation]
@ -57,53 +56,53 @@ impl Violation for MissingWhitespaceAroundModuloOperator {
}
/// E225, E226, E227, E228
#[cfg(feature = "logical_lines")]
#[allow(clippy::if_same_then_else)]
pub fn missing_whitespace_around_operator(
tokens: &[(Location, &Tok, Location)],
pub(crate) fn missing_whitespace_around_operator(
tokens: &LogicalLineTokens,
) -> Vec<(Location, DiagnosticKind)> {
let mut diagnostics = vec![];
let mut needs_space_main: Option<bool> = Some(false);
let mut needs_space_aux: Option<bool> = None;
let mut prev_end_aux: Option<&Location> = None;
let mut prev_end_aux: Option<Location> = None;
let mut parens = 0u32;
let mut prev_type: Option<&Tok> = None;
let mut prev_end: Option<&Location> = None;
let mut prev_end: Option<Location> = None;
for (start, token, end) in tokens {
if is_skip_comment_token(token) {
for token in tokens {
let kind = token.kind();
if is_skip_comment_token(kind) {
continue;
}
if **token == Tok::Lpar || **token == Tok::Lambda {
parens += 1;
} else if **token == Tok::Rpar {
parens -= 1;
}
match kind {
Tok::Lpar | Tok::Lambda => parens += 1,
Tok::Rpar => parens -= 1,
_ => {}
};
let needs_space = (needs_space_main.is_some() && needs_space_main.unwrap())
|| needs_space_aux.is_some()
|| prev_end_aux.is_some();
if needs_space {
if Some(start) != prev_end {
if Some(token.start()) != prev_end {
if !(needs_space_main.is_some() && needs_space_main.unwrap())
&& (needs_space_aux.is_none() || !needs_space_aux.unwrap())
{
diagnostics.push((
*(prev_end_aux.unwrap()),
prev_end_aux.unwrap(),
MissingWhitespaceAroundOperator.into(),
));
}
needs_space_main = Some(false);
needs_space_aux = None;
prev_end_aux = None;
} else if **token == Tok::Greater
&& (prev_type == Some(&Tok::Less) || prev_type == Some(&Tok::Minus))
{
} else if kind == &Tok::Greater && matches!(prev_type, Some(Tok::Less | Tok::Minus)) {
// Tolerate the "<>" operator, even if running Python 3
// Deal with Python 3's annotated return value "->"
} else if prev_type == Some(&Tok::Slash)
&& (**token == Tok::Comma || **token == Tok::Rpar || **token == Tok::Colon)
|| (prev_type == Some(&Tok::Rpar) && **token == Tok::Colon)
&& matches!(kind, Tok::Comma | Tok::Rpar | Tok::Colon)
|| (prev_type == Some(&Tok::Rpar) && kind == &Tok::Colon)
{
// Tolerate the "/" operator in function definition
// For more info see PEP570
@ -111,22 +110,21 @@ pub fn missing_whitespace_around_operator(
if (needs_space_main.is_some() && needs_space_main.unwrap())
|| (needs_space_aux.is_some() && needs_space_aux.unwrap())
{
diagnostics
.push((*(prev_end.unwrap()), MissingWhitespaceAroundOperator.into()));
diagnostics.push((prev_end.unwrap(), MissingWhitespaceAroundOperator.into()));
} else if prev_type != Some(&Tok::DoubleStar) {
if prev_type == Some(&Tok::Percent) {
diagnostics.push((
*(prev_end_aux.unwrap()),
prev_end_aux.unwrap(),
MissingWhitespaceAroundModuloOperator.into(),
));
} else if !is_arithmetic_token(prev_type.unwrap()) {
diagnostics.push((
*(prev_end_aux.unwrap()),
prev_end_aux.unwrap(),
MissingWhitespaceAroundBitwiseOrShiftOperator.into(),
));
} else {
diagnostics.push((
*(prev_end_aux.unwrap()),
prev_end_aux.unwrap(),
MissingWhitespaceAroundArithmeticOperator.into(),
));
}
@ -135,30 +133,28 @@ pub fn missing_whitespace_around_operator(
needs_space_aux = None;
prev_end_aux = None;
}
} else if (is_op_token(token) || matches!(token, Tok::Name { .. })) && prev_end.is_some() {
if **token == Tok::Equal && parens > 0 {
} else if (is_op_token(kind) || matches!(kind, Tok::Name { .. })) && prev_end.is_some() {
if kind == &Tok::Equal && parens > 0 {
// Allow keyword args or defaults: foo(bar=None).
} else if is_ws_needed_token(token) {
} else if is_ws_needed_token(kind) {
needs_space_main = Some(true);
needs_space_aux = None;
prev_end_aux = None;
} else if is_unary_token(token) {
} else if is_unary_token(kind) {
// Check if the operator is used as a binary operator
// Allow unary operators: -123, -x, +1.
// Allow argument unpacking: foo(*args, **kwargs)
if (prev_type.is_some()
&& is_op_token(prev_type.unwrap())
&& (prev_type == Some(&Tok::Rpar)
|| prev_type == Some(&Tok::Rsqb)
|| prev_type == Some(&Tok::Rbrace)))
|| (!is_op_token(prev_type.unwrap()) && !is_keyword_token(prev_type.unwrap()))
&& (!is_soft_keyword_token(prev_type.unwrap()))
{
needs_space_main = None;
needs_space_aux = None;
prev_end_aux = None;
if let Some(prev_type) = prev_type {
if (matches!(prev_type, Tok::Rpar | Tok::Rsqb | Tok::Rbrace))
|| (!is_op_token(prev_type) && !is_keyword_token(prev_type))
&& (!is_soft_keyword_token(prev_type))
{
needs_space_main = None;
needs_space_aux = None;
prev_end_aux = None;
}
}
} else if is_ws_optional_token(token) {
} else if is_ws_optional_token(kind) {
needs_space_main = None;
needs_space_aux = None;
prev_end_aux = None;
@ -169,28 +165,21 @@ pub fn missing_whitespace_around_operator(
// trailing space matches opening space
needs_space_main = None;
prev_end_aux = prev_end;
needs_space_aux = Some(Some(start) != prev_end_aux);
needs_space_aux = Some(Some(token.start()) != prev_end_aux);
} else if needs_space_main.is_some()
&& needs_space_main.unwrap()
&& Some(start) == prev_end_aux
&& Some(token.start()) == prev_end_aux
{
// A needed opening space was not found
diagnostics.push((*(prev_end.unwrap()), MissingWhitespaceAroundOperator.into()));
diagnostics.push((prev_end.unwrap(), MissingWhitespaceAroundOperator.into()));
needs_space_main = Some(false);
needs_space_aux = None;
prev_end_aux = None;
}
}
prev_type = Some(*token);
prev_end = Some(end);
prev_type = Some(kind);
prev_end = Some(token.end());
}
diagnostics
}
#[cfg(not(feature = "logical_lines"))]
pub fn missing_whitespace_around_operator(
_tokens: &[(Location, &Tok, Location)],
) -> Vec<(Location, DiagnosticKind)> {
vec![]
}

View file

@ -0,0 +1,653 @@
use bitflags::bitflags;
use rustpython_parser::ast::Location;
use rustpython_parser::lexer::LexResult;
use rustpython_parser::Tok;
use std::fmt::{Debug, Formatter};
use std::iter::FusedIterator;
use std::ops::Deref;
use ruff_python_ast::source_code::Locator;
use ruff_python_ast::types::Range;
use crate::rules::pycodestyle::helpers::{is_keyword_token, is_op_token};
pub(crate) use extraneous_whitespace::{
extraneous_whitespace, WhitespaceAfterOpenBracket, WhitespaceBeforeCloseBracket,
WhitespaceBeforePunctuation,
};
pub(crate) use indentation::{
indentation, IndentationWithInvalidMultiple, IndentationWithInvalidMultipleComment,
NoIndentedBlock, NoIndentedBlockComment, OverIndented, UnexpectedIndentation,
UnexpectedIndentationComment,
};
pub(crate) use missing_whitespace::{missing_whitespace, MissingWhitespace};
pub(crate) use missing_whitespace_after_keyword::{
missing_whitespace_after_keyword, MissingWhitespaceAfterKeyword,
};
pub(crate) use missing_whitespace_around_operator::{
missing_whitespace_around_operator, MissingWhitespaceAroundArithmeticOperator,
MissingWhitespaceAroundBitwiseOrShiftOperator, MissingWhitespaceAroundModuloOperator,
MissingWhitespaceAroundOperator,
};
pub(crate) use space_around_operator::{
space_around_operator, MultipleSpacesAfterOperator, MultipleSpacesBeforeOperator,
TabAfterOperator, TabBeforeOperator,
};
pub(crate) use whitespace_around_keywords::{
whitespace_around_keywords, MultipleSpacesAfterKeyword, MultipleSpacesBeforeKeyword,
TabAfterKeyword, TabBeforeKeyword,
};
pub(crate) use whitespace_around_named_parameter_equals::{
whitespace_around_named_parameter_equals, MissingWhitespaceAroundParameterEquals,
UnexpectedSpacesAroundKeywordParameterEquals,
};
pub(crate) use whitespace_before_comment::{
whitespace_before_comment, MultipleLeadingHashesForBlockComment, NoSpaceAfterBlockComment,
NoSpaceAfterInlineComment, TooFewSpacesBeforeInlineComment,
};
pub(crate) use whitespace_before_parameters::{
whitespace_before_parameters, WhitespaceBeforeParameters,
};
mod extraneous_whitespace;
mod indentation;
mod missing_whitespace;
mod missing_whitespace_after_keyword;
mod missing_whitespace_around_operator;
mod space_around_operator;
mod whitespace_around_keywords;
mod whitespace_around_named_parameter_equals;
mod whitespace_before_comment;
mod whitespace_before_parameters;
bitflags! {
#[derive(Default)]
pub(crate) struct TokenFlags: u8 {
/// Whether the logical line contains an operator.
const OPERATOR = 0b0000_0001;
/// Whether the logical line contains a bracket.
const BRACKET = 0b0000_0010;
/// Whether the logical line contains a punctuation mark.
const PUNCTUATION = 0b0000_0100;
/// Whether the logical line contains a keyword.
const KEYWORD = 0b0000_1000;
/// Whether the logical line contains a comment.
const COMMENT = 0b0001_0000;
}
}
#[derive(Clone)]
pub(crate) struct LogicalLines<'a> {
tokens: Tokens<'a>,
lines: Vec<Line>,
locator: &'a Locator<'a>,
}
impl<'a> LogicalLines<'a> {
pub fn from_tokens(tokens: &'a [LexResult], locator: &'a Locator<'a>) -> Self {
assert!(u32::try_from(tokens.len()).is_ok());
let mut builder = LogicalLinesBuilder::with_capacity(tokens.len());
let mut parens: u32 = 0;
for (start, token, end) in tokens.iter().flatten() {
builder.push_token(*start, token, *end);
match token {
Tok::Lbrace | Tok::Lpar | Tok::Lsqb => {
parens += 1;
}
Tok::Rbrace | Tok::Rpar | Tok::Rsqb => {
parens -= 1;
}
Tok::Newline | Tok::NonLogicalNewline | Tok::Comment { .. } if parens == 0 => {
builder.finish_line();
}
_ => {}
}
}
builder.finish(locator)
}
}
impl Debug for LogicalLines<'_> {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
f.debug_list()
.entries(self.into_iter().map(DebugLogicalLine))
.finish()
}
}
impl<'a> IntoIterator for &'a LogicalLines<'a> {
type Item = LogicalLine<'a>;
type IntoIter = LogicalLinesIter<'a>;
fn into_iter(self) -> Self::IntoIter {
LogicalLinesIter {
lines: self,
inner: self.lines.iter(),
}
}
}
/// A logical line spawns multiple lines in the source document if the line
/// ends with a parenthesized expression (`(..)`, `[..]`, `{..}`) that contains
/// line breaks.
///
/// ## Examples
/// This expression forms one logical line because because the array elements are parenthesized.
///
/// ```python
/// a = [
/// 1,
/// 2
/// ]
/// ```
#[derive(Debug)]
pub(crate) struct LogicalLine<'a> {
lines: &'a LogicalLines<'a>,
line: &'a Line,
}
impl<'a> LogicalLine<'a> {
/// Returns `true` if this is a comment only line
pub fn is_comment_only(&self) -> bool {
self.flags() == TokenFlags::COMMENT && self.tokens().trimmed().is_empty()
}
/// Returns logical line's text including comments, indents, dedent and trailing new lines.
pub fn text(&self) -> &'a str {
self.tokens().text()
}
/// Returns the text without any leading or trailing newline, comment, indent, or dedent of this line
#[cfg(test)]
pub fn text_trimmed(&self) -> &'a str {
self.tokens_trimmed().text()
}
#[cfg(test)]
pub fn tokens_trimmed(&self) -> LogicalLineTokens<'a> {
self.tokens().trimmed()
}
/// Returns the text after `token`
pub fn text_after(&self, token: &LogicalLineToken<'a>) -> &str {
debug_assert!(
(self.line.tokens_start..self.line.tokens_end).contains(&token.position),
"Token does not belong to this line"
);
// SAFETY: The line must have at least one token or `token` would not belong to this line.
let last_token = self.tokens().last().unwrap();
self.lines
.locator
.slice(Range::new(token.end(), last_token.end()))
}
/// Returns the text before `token`
pub fn text_before(&self, token: &LogicalLineToken<'a>) -> &str {
debug_assert!(
(self.line.tokens_start..self.line.tokens_end).contains(&token.position),
"Token does not belong to this line"
);
// SAFETY: The line must have at least one token or `token` would not belong to this line.
let first_token = self.tokens().first().unwrap();
self.lines
.locator
.slice(Range::new(first_token.start(), token.start()))
}
/// Returns the whitespace *after* the `token`
pub fn trailing_whitespace(&self, token: &LogicalLineToken<'a>) -> Whitespace {
Whitespace::leading(self.text_after(token))
}
/// Returns the whitespace and whitespace character-length *before* the `token`
pub fn leading_whitespace(&self, token: &LogicalLineToken<'a>) -> (Whitespace, usize) {
Whitespace::trailing(self.text_before(token))
}
/// Returns all tokens of the line, including comments and trailing new lines.
pub fn tokens(&self) -> LogicalLineTokens<'a> {
LogicalLineTokens {
lines: self.lines,
front: self.line.tokens_start,
back: self.line.tokens_end,
}
}
/// Returns the [`Location`] of the first token on the line or [`None`].
pub fn first_token_location(&self) -> Option<Location> {
self.tokens().first().map(|t| t.start())
}
/// Returns the line's flags
pub const fn flags(&self) -> TokenFlags {
self.line.flags
}
}
/// Helper struct to pretty print [`LogicalLine`] with `dbg`
struct DebugLogicalLine<'a>(LogicalLine<'a>);
impl Debug for DebugLogicalLine<'_> {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
f.debug_struct("LogicalLine")
.field("text", &self.0.text())
.field("flags", &self.0.flags())
.field("tokens", &self.0.tokens())
.finish()
}
}
/// Iterator over the logical lines of a document.
pub(crate) struct LogicalLinesIter<'a> {
lines: &'a LogicalLines<'a>,
inner: std::slice::Iter<'a, Line>,
}
impl<'a> Iterator for LogicalLinesIter<'a> {
type Item = LogicalLine<'a>;
fn next(&mut self) -> Option<Self::Item> {
let line = self.inner.next()?;
Some(LogicalLine {
lines: self.lines,
line,
})
}
fn size_hint(&self) -> (usize, Option<usize>) {
self.inner.size_hint()
}
}
impl DoubleEndedIterator for LogicalLinesIter<'_> {
fn next_back(&mut self) -> Option<Self::Item> {
let line = self.inner.next_back()?;
Some(LogicalLine {
lines: self.lines,
line,
})
}
}
impl ExactSizeIterator for LogicalLinesIter<'_> {}
impl FusedIterator for LogicalLinesIter<'_> {}
/// The tokens of a logical line
pub(crate) struct LogicalLineTokens<'a> {
lines: &'a LogicalLines<'a>,
front: u32,
back: u32,
}
impl<'a> LogicalLineTokens<'a> {
pub fn iter(&self) -> LogicalLineTokensIter<'a> {
LogicalLineTokensIter {
tokens: &self.lines.tokens,
front: self.front,
back: self.back,
}
}
pub fn len(&self) -> usize {
(self.back - self.front) as usize
}
pub fn is_empty(&self) -> bool {
self.len() == 0
}
pub fn trimmed(&self) -> LogicalLineTokens<'a> {
let tokens = &self.lines.tokens[self.front as usize..self.back as usize];
let mut front = self.front;
let mut back = self.back;
let mut iter = tokens.iter();
for (_, kind, _) in iter.by_ref() {
if !matches!(
kind,
Tok::Newline
| Tok::NonLogicalNewline
| Tok::Indent
| Tok::Dedent
| Tok::Comment(..)
) {
break;
}
front += 1;
}
for (_, kind, _) in iter.rev() {
if !matches!(
kind,
Tok::Newline
| Tok::NonLogicalNewline
| Tok::Indent
| Tok::Dedent
| Tok::Comment(..)
) {
break;
}
back -= 1;
}
LogicalLineTokens {
lines: self.lines,
front,
back,
}
}
pub fn text(&self) -> &'a str {
match (self.first(), self.last()) {
(Some(first), Some(last)) => {
let locator = self.lines.locator;
locator.slice(Range::new(first.start(), last.end()))
}
_ => "",
}
}
/// Returns the first token
pub fn first(&self) -> Option<LogicalLineToken<'a>> {
self.iter().next()
}
/// Returns the last token
pub fn last(&self) -> Option<LogicalLineToken<'a>> {
self.iter().next_back()
}
}
impl<'a> IntoIterator for LogicalLineTokens<'a> {
type Item = LogicalLineToken<'a>;
type IntoIter = LogicalLineTokensIter<'a>;
fn into_iter(self) -> Self::IntoIter {
self.iter()
}
}
impl<'a> IntoIterator for &LogicalLineTokens<'a> {
type Item = LogicalLineToken<'a>;
type IntoIter = LogicalLineTokensIter<'a>;
fn into_iter(self) -> Self::IntoIter {
self.iter()
}
}
impl Debug for LogicalLineTokens<'_> {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
f.debug_list().entries(self.iter()).finish()
}
}
/// Iterator over the tokens of a [`LogicalLine`]
pub(crate) struct LogicalLineTokensIter<'a> {
tokens: &'a Tokens<'a>,
front: u32,
back: u32,
}
impl<'a> Iterator for LogicalLineTokensIter<'a> {
type Item = LogicalLineToken<'a>;
fn next(&mut self) -> Option<Self::Item> {
if self.front < self.back {
let result = Some(LogicalLineToken {
tokens: self.tokens,
position: self.front,
});
self.front += 1;
result
} else {
None
}
}
fn size_hint(&self) -> (usize, Option<usize>) {
let len = (self.back - self.front) as usize;
(len, Some(len))
}
}
impl ExactSizeIterator for LogicalLineTokensIter<'_> {}
impl FusedIterator for LogicalLineTokensIter<'_> {}
impl DoubleEndedIterator for LogicalLineTokensIter<'_> {
fn next_back(&mut self) -> Option<Self::Item> {
if self.front < self.back {
self.back -= 1;
Some(LogicalLineToken {
position: self.back,
tokens: self.tokens,
})
} else {
None
}
}
}
/// A token of a [`LogicalLine`]
#[derive(Clone)]
pub(crate) struct LogicalLineToken<'a> {
tokens: &'a Tokens<'a>,
position: u32,
}
impl<'a> LogicalLineToken<'a> {
/// Returns the token's kind
pub fn kind(&self) -> &'a Tok {
#[allow(unsafe_code)]
let (_, token, _) = unsafe { *self.tokens.get_unchecked(self.position as usize) };
token
}
/// Returns the token's start location
pub fn start(&self) -> Location {
self.range().0
}
/// Returns the token's end location
pub fn end(&self) -> Location {
self.range().1
}
/// Returns a tuple with the token's `(start, end)` locations
pub fn range(&self) -> (Location, Location) {
#[allow(unsafe_code)]
let &(start, _, end) = unsafe { self.tokens.get_unchecked(self.position as usize) };
(start, end)
}
}
impl Debug for LogicalLineToken<'_> {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
f.debug_struct("LogicalLineToken")
.field("kind", &self.kind())
.field("range", &self.range())
.finish()
}
}
#[derive(Copy, Clone, Eq, PartialEq)]
pub(crate) enum Whitespace {
None,
Single,
Many,
Tab,
}
impl Whitespace {
fn leading(content: &str) -> Self {
let mut count = 0u32;
for c in content.chars() {
if c == '\t' {
return Self::Tab;
} else if matches!(c, '\n' | '\r') {
break;
} else if c.is_whitespace() {
count += 1;
} else {
break;
}
}
match count {
0 => Whitespace::None,
1 => Whitespace::Single,
_ => Whitespace::Many,
}
}
fn trailing(content: &str) -> (Self, usize) {
let mut count = 0;
for c in content.chars().rev() {
if c == '\t' {
return (Self::Tab, count + 1);
} else if matches!(c, '\n' | '\r') {
// Indent
return (Self::None, 0);
} else if c.is_whitespace() {
count += 1;
} else {
break;
}
}
match count {
0 => (Self::None, 0),
1 => (Self::Single, count),
_ => (Self::Many, count),
}
}
}
#[derive(Debug, Default)]
struct CurrentLine {
flags: TokenFlags,
tokens_start: u32,
}
/// Builder for [`LogicalLines`]
#[derive(Debug, Default)]
struct LogicalLinesBuilder<'a> {
tokens: Tokens<'a>,
lines: Vec<Line>,
current_line: Option<CurrentLine>,
}
impl<'a> LogicalLinesBuilder<'a> {
fn with_capacity(tokens: usize) -> Self {
Self {
tokens: Tokens::with_capacity(tokens),
..Self::default()
}
}
// SAFETY: `LogicalLines::from_tokens` asserts that the file has less than `u32::MAX` tokens and each tokens is at least one character long
#[allow(clippy::cast_possible_truncation)]
fn push_token(&mut self, start: Location, token: &'a Tok, end: Location) {
let tokens_start = self.tokens.len();
let line = self.current_line.get_or_insert_with(|| CurrentLine {
flags: TokenFlags::empty(),
tokens_start: tokens_start as u32,
});
if matches!(token, Tok::Comment { .. }) {
line.flags.insert(TokenFlags::COMMENT);
} else if is_op_token(token) {
line.flags.insert(TokenFlags::OPERATOR);
line.flags.set(
TokenFlags::BRACKET,
matches!(
token,
Tok::Lpar | Tok::Lsqb | Tok::Lbrace | Tok::Rpar | Tok::Rsqb | Tok::Rbrace
),
);
}
if matches!(token, Tok::Comma | Tok::Semi | Tok::Colon) {
line.flags.insert(TokenFlags::PUNCTUATION);
} else if is_keyword_token(token) {
line.flags.insert(TokenFlags::KEYWORD);
}
self.tokens.push(token, start, end);
}
// SAFETY: `LogicalLines::from_tokens` asserts that the file has less than `u32::MAX` tokens and each tokens is at least one character long
#[allow(clippy::cast_possible_truncation)]
fn finish_line(&mut self) {
if let Some(current) = self.current_line.take() {
self.lines.push(Line {
flags: current.flags,
tokens_start: current.tokens_start,
tokens_end: self.tokens.len() as u32,
});
}
}
fn finish(mut self, locator: &'a Locator<'a>) -> LogicalLines<'a> {
self.finish_line();
LogicalLines {
tokens: self.tokens,
lines: self.lines,
locator,
}
}
}
#[derive(Debug, Clone)]
struct Line {
flags: TokenFlags,
tokens_start: u32,
tokens_end: u32,
}
#[derive(Debug, Clone, Default)]
struct Tokens<'a>(Vec<(Location, &'a Tok, Location)>);
impl<'a> Tokens<'a> {
/// Creates new tokens with a reserved size of `capacity`
fn with_capacity(capacity: usize) -> Self {
Self(Vec::with_capacity(capacity))
}
/// Returns the number of stored tokens.
fn len(&self) -> usize {
self.0.len()
}
/// Adds a new token with the given `kind` and `start`, `end` location.
fn push(&mut self, kind: &'a Tok, start: Location, end: Location) {
self.0.push((start, kind, end));
}
}
impl<'a> Deref for Tokens<'a> {
type Target = [(Location, &'a Tok, Location)];
fn deref(&self) -> &Self::Target {
&self.0
}
}

View file

@ -1,16 +1,10 @@
#![allow(dead_code, unused_imports, unused_variables)]
use once_cell::sync::Lazy;
use regex::Regex;
use rustpython_parser::ast::Location;
use rustpython_parser::Tok;
use crate::rules::pycodestyle::helpers::is_op_token;
use crate::rules::pycodestyle::rules::Whitespace;
use super::{LogicalLine, Whitespace};
use ruff_diagnostics::DiagnosticKind;
use ruff_diagnostics::Violation;
use ruff_macros::{derive_message_formats, violation};
use ruff_python_ast::source_code::Locator;
/// ## What it does
/// Checks for extraneous tabs before an operator.
@ -128,46 +122,77 @@ impl Violation for MultipleSpacesAfterOperator {
}
}
static OPERATOR_REGEX: Lazy<Regex> = Lazy::new(|| Regex::new(r"[-+*/|!<=>%&^]+|:=").unwrap());
/// E221, E222, E223, E224
#[cfg(feature = "logical_lines")]
pub fn space_around_operator(line: &str) -> Vec<(usize, DiagnosticKind)> {
pub(crate) fn space_around_operator(line: &LogicalLine) -> Vec<(Location, DiagnosticKind)> {
let mut diagnostics = vec![];
let mut last_end = None;
let mut after_operator = false;
for line_match in OPERATOR_REGEX.find_iter(line) {
if last_end != Some(line_match.start()) {
let before = &line[..line_match.start()];
for token in line.tokens() {
let is_operator = is_operator_token(token.kind());
match Whitespace::trailing(before) {
(Whitespace::Tab, offset) => {
diagnostics.push((line_match.start() - offset, TabBeforeOperator.into()));
if is_operator {
let (start, end) = token.range();
if !after_operator {
match line.leading_whitespace(&token) {
(Whitespace::Tab, offset) => diagnostics.push((
Location::new(start.row(), start.column() - offset),
TabBeforeOperator.into(),
)),
(Whitespace::Many, offset) => diagnostics.push((
Location::new(start.row(), start.column() - offset),
MultipleSpacesBeforeOperator.into(),
)),
_ => {}
}
(Whitespace::Many, offset) => diagnostics.push((
line_match.start() - offset,
MultipleSpacesBeforeOperator.into(),
)),
}
match line.trailing_whitespace(&token) {
Whitespace::Tab => diagnostics.push((end, TabAfterOperator.into())),
Whitespace::Many => diagnostics.push((end, MultipleSpacesAfterOperator.into())),
_ => {}
}
}
let after = &line[line_match.end()..];
let (leading_offset, leading_kind) = Whitespace::leading(after);
match leading_kind {
Whitespace::Tab => diagnostics.push((line_match.end(), TabAfterOperator.into())),
Whitespace::Many => {
diagnostics.push((line_match.end(), MultipleSpacesAfterOperator.into()));
}
_ => {}
}
last_end = Some(line_match.end() + leading_offset);
after_operator = is_operator;
}
diagnostics
}
#[cfg(not(feature = "logical_lines"))]
pub fn space_around_operator(_line: &str) -> Vec<(usize, DiagnosticKind)> {
vec![]
const fn is_operator_token(token: &Tok) -> bool {
matches!(
token,
Tok::Plus
| Tok::Minus
| Tok::Star
| Tok::Slash
| Tok::Vbar
| Tok::Amper
| Tok::Less
| Tok::Greater
| Tok::Equal
| Tok::Percent
| Tok::NotEqual
| Tok::LessEqual
| Tok::GreaterEqual
| Tok::CircumFlex
| Tok::LeftShift
| Tok::RightShift
| Tok::DoubleStar
| Tok::PlusEqual
| Tok::MinusEqual
| Tok::StarEqual
| Tok::SlashEqual
| Tok::PercentEqual
| Tok::AmperEqual
| Tok::VbarEqual
| Tok::CircumflexEqual
| Tok::LeftShiftEqual
| Tok::RightShiftEqual
| Tok::DoubleStarEqual
| Tok::DoubleSlash
| Tok::DoubleSlashEqual
| Tok::ColonEqual
)
}

View file

@ -1,9 +1,7 @@
#![allow(dead_code, unused_imports, unused_variables)]
use rustpython_parser::ast::Location;
use once_cell::sync::Lazy;
use regex::Regex;
use crate::rules::pycodestyle::rules::Whitespace;
use super::{LogicalLine, Whitespace};
use crate::rules::pycodestyle::helpers::is_keyword_token;
use ruff_diagnostics::DiagnosticKind;
use ruff_diagnostics::Violation;
use ruff_macros::{derive_message_formats, violation};
@ -111,47 +109,40 @@ impl Violation for TabBeforeKeyword {
}
}
static KEYWORD_REGEX: Lazy<Regex> = Lazy::new(|| {
Regex::new(r"\b(False|None|True|and|as|assert|async|await|break|class|continue|def|del|elif|else|except|finally|for|from|global|if|import|in|is|lambda|nonlocal|not|or|pass|raise|return|try|while|with|yield)\b").unwrap()
});
/// E271, E272, E273, E274
#[cfg(feature = "logical_lines")]
pub fn whitespace_around_keywords(line: &str) -> Vec<(usize, DiagnosticKind)> {
pub(crate) fn whitespace_around_keywords(line: &LogicalLine) -> Vec<(Location, DiagnosticKind)> {
let mut diagnostics = vec![];
let mut last_end = None;
let mut after_keyword = false;
for line_match in KEYWORD_REGEX.find_iter(line) {
if last_end != Some(line_match.start()) {
let before = &line[..line_match.start()];
match Whitespace::trailing(before) {
(Whitespace::Tab, offset) => {
diagnostics.push((line_match.start() - offset, TabBeforeKeyword.into()));
for token in line.tokens() {
let is_keyword = is_keyword_token(token.kind());
if is_keyword {
let (start, end) = token.range();
if !after_keyword {
match line.leading_whitespace(&token) {
(Whitespace::Tab, offset) => diagnostics.push((
Location::new(start.row(), start.column() - offset),
TabBeforeKeyword.into(),
)),
(Whitespace::Many, offset) => diagnostics.push((
Location::new(start.row(), start.column() - offset),
MultipleSpacesBeforeKeyword.into(),
)),
_ => {}
}
(Whitespace::Many, offset) => diagnostics.push((
line_match.start() - offset,
MultipleSpacesBeforeKeyword.into(),
)),
}
match line.trailing_whitespace(&token) {
Whitespace::Tab => diagnostics.push((end, TabAfterKeyword.into())),
Whitespace::Many => diagnostics.push((end, MultipleSpacesAfterKeyword.into())),
_ => {}
}
}
let after = &line[line_match.end()..];
let (leading_offset, leading_kind) = Whitespace::leading(after);
match leading_kind {
Whitespace::Tab => diagnostics.push((line_match.end(), TabAfterKeyword.into())),
Whitespace::Many => {
diagnostics.push((line_match.end(), MultipleSpacesAfterKeyword.into()));
}
_ => {}
}
last_end = Some(line_match.end() + leading_offset);
after_keyword = is_keyword;
}
diagnostics
}
#[cfg(not(feature = "logical_lines"))]
pub fn whitespace_around_keywords(_line: &str) -> Vec<(usize, DiagnosticKind)> {
vec![]
}

View file

@ -0,0 +1,121 @@
use rustpython_parser::ast::Location;
use rustpython_parser::Tok;
use ruff_diagnostics::DiagnosticKind;
use ruff_diagnostics::Violation;
use ruff_macros::{derive_message_formats, violation};
use super::LogicalLineTokens;
use crate::rules::pycodestyle::helpers::is_op_token;
#[violation]
pub struct UnexpectedSpacesAroundKeywordParameterEquals;
impl Violation for UnexpectedSpacesAroundKeywordParameterEquals {
#[derive_message_formats]
fn message(&self) -> String {
format!("Unexpected spaces around keyword / parameter equals")
}
}
#[violation]
pub struct MissingWhitespaceAroundParameterEquals;
impl Violation for MissingWhitespaceAroundParameterEquals {
#[derive_message_formats]
fn message(&self) -> String {
format!("Missing whitespace around parameter equals")
}
}
fn is_in_def(tokens: &LogicalLineTokens) -> bool {
for token in tokens {
match token.kind() {
Tok::Async | Tok::Indent | Tok::Dedent => continue,
Tok::Def => return true,
_ => return false,
}
}
false
}
/// E251, E252
pub(crate) fn whitespace_around_named_parameter_equals(
tokens: &LogicalLineTokens,
) -> Vec<(Location, DiagnosticKind)> {
let mut diagnostics = vec![];
let mut parens = 0;
let mut require_space = false;
let mut no_space = false;
let mut annotated_func_arg = false;
let mut prev_end: Option<Location> = None;
let in_def = is_in_def(tokens);
for token in tokens {
let kind = token.kind();
if kind == &Tok::NonLogicalNewline {
continue;
}
if no_space {
no_space = false;
if Some(token.start()) != prev_end {
diagnostics.push((
prev_end.unwrap(),
UnexpectedSpacesAroundKeywordParameterEquals.into(),
));
}
}
if require_space {
require_space = false;
let start = token.start();
if Some(start) == prev_end {
diagnostics.push((start, MissingWhitespaceAroundParameterEquals.into()));
}
}
if is_op_token(kind) {
match kind {
Tok::Lpar | Tok::Lsqb => {
parens += 1;
}
Tok::Rpar | Tok::Rsqb => {
parens -= 1;
}
Tok::Colon if parens == 1 && in_def => {
annotated_func_arg = true;
}
Tok::Comma if parens == 1 => {
annotated_func_arg = false;
}
Tok::Equal if parens > 0 => {
if annotated_func_arg && parens == 1 {
require_space = true;
let start = token.start();
if Some(start) == prev_end {
diagnostics
.push((start, MissingWhitespaceAroundParameterEquals.into()));
}
} else {
no_space = true;
if Some(token.start()) != prev_end {
diagnostics.push((
prev_end.unwrap(),
UnexpectedSpacesAroundKeywordParameterEquals.into(),
));
}
}
}
_ => {}
}
if parens < 1 {
annotated_func_arg = false;
}
}
prev_end = Some(token.end());
}
diagnostics
}

View file

@ -1,8 +1,7 @@
#![allow(dead_code, unused_imports, unused_variables)]
use rustpython_parser::ast::Location;
use rustpython_parser::Tok;
use super::LogicalLineTokens;
use ruff_diagnostics::DiagnosticKind;
use ruff_diagnostics::Violation;
use ruff_macros::{derive_message_formats, violation};
@ -139,25 +138,29 @@ impl Violation for MultipleLeadingHashesForBlockComment {
}
/// E261, E262, E265, E266
#[cfg(feature = "logical_lines")]
pub fn whitespace_before_comment(
tokens: &[(Location, &Tok, Location)],
pub(crate) fn whitespace_before_comment(
tokens: &LogicalLineTokens,
locator: &Locator,
) -> Vec<(Range, DiagnosticKind)> {
let mut diagnostics = vec![];
let mut prev_end = Location::new(0, 0);
for (start, tok, end) in tokens {
if let Tok::Comment(text) = tok {
for token in tokens {
let kind = token.kind();
if let Tok::Comment { .. } = kind {
let (start, end) = token.range();
let line = locator.slice(Range::new(
Location::new(start.row(), 0),
Location::new(start.row(), start.column()),
));
let text = locator.slice(Range::new(start, end));
let is_inline_comment = !line.trim().is_empty();
if is_inline_comment {
if prev_end.row() == start.row() && start.column() < prev_end.column() + 2 {
diagnostics.push((
Range::new(prev_end, *start),
Range::new(prev_end, start),
TooFewSpacesBeforeInlineComment.into(),
));
}
@ -177,32 +180,23 @@ pub fn whitespace_before_comment(
if is_inline_comment {
if bad_prefix.is_some() || comment.chars().next().map_or(false, char::is_whitespace)
{
diagnostics.push((Range::new(*start, *end), NoSpaceAfterInlineComment.into()));
diagnostics.push((Range::new(start, end), NoSpaceAfterInlineComment.into()));
}
} else if let Some(bad_prefix) = bad_prefix {
if bad_prefix != '!' || start.row() > 1 {
if bad_prefix != '#' {
diagnostics
.push((Range::new(*start, *end), NoSpaceAfterBlockComment.into()));
diagnostics.push((Range::new(start, end), NoSpaceAfterBlockComment.into()));
} else if !comment.is_empty() {
diagnostics.push((
Range::new(*start, *end),
Range::new(start, end),
MultipleLeadingHashesForBlockComment.into(),
));
}
}
}
} else if !matches!(tok, Tok::NonLogicalNewline) {
prev_end = *end;
} else if !matches!(kind, Tok::NonLogicalNewline) {
prev_end = token.end();
}
}
diagnostics
}
#[cfg(not(feature = "logical_lines"))]
pub fn whitespace_before_comment(
_tokens: &[(Location, &Tok, Location)],
_locator: &Locator,
) -> Vec<(Range, DiagnosticKind)> {
vec![]
}

View file

@ -1,5 +1,3 @@
#![allow(dead_code, unused_imports, unused_variables)]
use rustpython_parser::ast::Location;
use rustpython_parser::Tok;
@ -7,8 +5,7 @@ use ruff_diagnostics::{AlwaysAutofixableViolation, Diagnostic, Edit};
use ruff_macros::{derive_message_formats, violation};
use ruff_python_ast::types::Range;
use crate::registry::AsRule;
use crate::rules::pycodestyle::helpers::{is_keyword_token, is_op_token, is_soft_keyword_token};
use super::LogicalLineTokens;
#[violation]
pub struct WhitespaceBeforeParameters {
@ -29,28 +26,34 @@ impl AlwaysAutofixableViolation for WhitespaceBeforeParameters {
}
/// E211
#[cfg(feature = "logical_lines")]
pub fn whitespace_before_parameters(
tokens: &[(Location, &Tok, Location)],
pub(crate) fn whitespace_before_parameters(
tokens: &LogicalLineTokens,
autofix: bool,
) -> Vec<Diagnostic> {
let mut diagnostics = vec![];
let (_, mut prev_token, mut prev_end) = tokens.first().unwrap();
for (idx, (start, tok, end)) in tokens.iter().enumerate() {
if is_op_token(tok)
&& (**tok == Tok::Lpar || **tok == Tok::Lsqb)
&& *start != prev_end
&& (matches!(prev_token, Tok::Name { .. })
|| matches!(prev_token, Tok::Rpar | Tok::Rsqb | Tok::Rbrace))
&& (idx < 2 || *(tokens[idx - 2].1) != Tok::Class)
&& !is_keyword_token(tok)
&& !is_soft_keyword_token(tok)
let previous = tokens.first().unwrap();
let mut pre_pre_kind: Option<&Tok> = None;
let mut prev_token = previous.kind();
let mut prev_end = previous.end();
for token in tokens {
let kind = token.kind();
if matches!(kind, Tok::Lpar | Tok::Lsqb)
&& token.start() != prev_end
&& matches!(
prev_token,
Tok::Name { .. } | Tok::Rpar | Tok::Rsqb | Tok::Rbrace
)
&& (pre_pre_kind != Some(&Tok::Class))
{
let start = Location::new(prev_end.row(), prev_end.column());
let end = token.end();
let end = Location::new(end.row(), end.column() - 1);
let kind: WhitespaceBeforeParameters = WhitespaceBeforeParameters {
bracket: tok.to_string(),
bracket: kind.to_string(),
};
let mut diagnostic = Diagnostic::new(kind, Range::new(start, end));
@ -60,16 +63,9 @@ pub fn whitespace_before_parameters(
}
diagnostics.push(diagnostic);
}
prev_token = *tok;
prev_end = *end;
pre_pre_kind = Some(prev_token);
prev_token = kind;
prev_end = token.end();
}
diagnostics
}
#[cfg(not(feature = "logical_lines"))]
pub fn whitespace_before_parameters(
_tokens: &[(Location, &Tok, Location)],
_autofix: bool,
) -> Vec<Diagnostic> {
vec![]
}

View file

@ -1,102 +0,0 @@
#![allow(dead_code, unused_imports, unused_variables)]
use itertools::Itertools;
use rustpython_parser::ast::Location;
use ruff_diagnostics::Edit;
use ruff_diagnostics::Violation;
use ruff_diagnostics::{AlwaysAutofixableViolation, Diagnostic};
use ruff_macros::{derive_message_formats, violation};
use ruff_python_ast::types::Range;
#[violation]
pub struct MissingWhitespace {
pub token: String,
}
impl AlwaysAutofixableViolation for MissingWhitespace {
#[derive_message_formats]
fn message(&self) -> String {
let MissingWhitespace { token } = self;
format!("Missing whitespace after '{token}'")
}
fn autofix_title(&self) -> String {
let MissingWhitespace { token } = self;
format!("Added missing whitespace after '{token}'")
}
}
/// E231
#[cfg(feature = "logical_lines")]
pub fn missing_whitespace(
line: &str,
row: usize,
autofix: bool,
indent_level: usize,
) -> Vec<Diagnostic> {
let mut diagnostics = vec![];
let mut num_lsqb = 0u32;
let mut num_rsqb = 0u32;
let mut prev_lsqb = None;
let mut prev_lbrace = None;
for (idx, (char, next_char)) in line.chars().tuple_windows().enumerate() {
match char {
'[' => {
num_lsqb += 1;
prev_lsqb = Some(idx);
}
']' => {
num_rsqb += 1;
}
'{' => {
prev_lbrace = Some(idx);
}
',' | ';' | ':' if !next_char.is_whitespace() => {
if char == ':' && num_lsqb > num_rsqb && prev_lsqb > prev_lbrace {
continue; // Slice syntax, no space required
}
if char == ',' && matches!(next_char, ')' | ']') {
continue; // Allow tuple with only one element: (3,)
}
if char == ':' && next_char == '=' {
continue; // Allow assignment expression
}
let kind = MissingWhitespace {
token: char.to_string(),
};
let mut diagnostic = Diagnostic::new(
kind,
Range::new(
Location::new(row, indent_level + idx),
Location::new(row, indent_level + idx),
),
);
if autofix {
diagnostic.set_fix(Edit::insertion(
" ".to_string(),
Location::new(row, indent_level + idx + 1),
));
}
diagnostics.push(diagnostic);
}
_ => {}
}
}
diagnostics
}
#[cfg(not(feature = "logical_lines"))]
pub fn missing_whitespace(
_line: &str,
_row: usize,
_autofix: bool,
indent_level: usize,
) -> Vec<Diagnostic> {
vec![]
}

View file

@ -1,51 +0,0 @@
#![allow(dead_code, unused_imports, unused_variables)]
use rustpython_parser::ast::Location;
use rustpython_parser::Tok;
use ruff_diagnostics::DiagnosticKind;
use ruff_diagnostics::Violation;
use ruff_macros::{derive_message_formats, violation};
use crate::rules::pycodestyle::helpers::{is_keyword_token, is_singleton_token};
#[violation]
pub struct MissingWhitespaceAfterKeyword;
impl Violation for MissingWhitespaceAfterKeyword {
#[derive_message_formats]
fn message(&self) -> String {
format!("Missing whitespace after keyword")
}
}
/// E275
#[cfg(feature = "logical_lines")]
pub fn missing_whitespace_after_keyword(
tokens: &[(Location, &Tok, Location)],
) -> Vec<(Location, DiagnosticKind)> {
let mut diagnostics = vec![];
for (tok0, tok1) in tokens.iter().zip(&tokens[1..]) {
if tok0.2 == tok1.0
&& is_keyword_token(tok0.1)
&& !is_singleton_token(tok0.1)
&& *tok0.1 != Tok::Async
&& *tok0.1 != Tok::Await
&& !(*tok0.1 == Tok::Except && *tok1.1 == Tok::Star)
&& !(*tok0.1 == Tok::Yield && *tok1.1 == Tok::Rpar)
&& *tok1.1 != Tok::Colon
&& *tok1.1 != Tok::Newline
{
diagnostics.push((tok0.2, MissingWhitespaceAfterKeyword.into()));
}
}
diagnostics
}
#[cfg(not(feature = "logical_lines"))]
pub fn missing_whitespace_after_keyword(
_tokens: &[(Location, &Tok, Location)],
) -> Vec<(Location, DiagnosticKind)> {
vec![]
}

View file

@ -8,55 +8,21 @@ pub use compound_statements::{
};
pub use doc_line_too_long::{doc_line_too_long, DocLineTooLong};
pub use errors::{syntax_error, IOError, SyntaxError};
pub use extraneous_whitespace::{
extraneous_whitespace, WhitespaceAfterOpenBracket, WhitespaceBeforeCloseBracket,
WhitespaceBeforePunctuation,
};
pub use imports::{
module_import_not_at_top_of_file, multiple_imports_on_one_line, ModuleImportNotAtTopOfFile,
MultipleImportsOnOneLine,
};
pub use indentation::{
indentation, IndentationWithInvalidMultiple, IndentationWithInvalidMultipleComment,
NoIndentedBlock, NoIndentedBlockComment, OverIndented, UnexpectedIndentation,
UnexpectedIndentationComment,
};
pub use invalid_escape_sequence::{invalid_escape_sequence, InvalidEscapeSequence};
pub use lambda_assignment::{lambda_assignment, LambdaAssignment};
pub use line_too_long::{line_too_long, LineTooLong};
pub use literal_comparisons::{literal_comparisons, NoneComparison, TrueFalseComparison};
pub use missing_newline_at_end_of_file::{no_newline_at_end_of_file, MissingNewlineAtEndOfFile};
pub use missing_whitespace::{missing_whitespace, MissingWhitespace};
pub use missing_whitespace_after_keyword::{
missing_whitespace_after_keyword, MissingWhitespaceAfterKeyword,
};
pub use missing_whitespace_around_operator::{
missing_whitespace_around_operator, MissingWhitespaceAroundArithmeticOperator,
MissingWhitespaceAroundBitwiseOrShiftOperator, MissingWhitespaceAroundModuloOperator,
MissingWhitespaceAroundOperator,
};
pub use mixed_spaces_and_tabs::{mixed_spaces_and_tabs, MixedSpacesAndTabs};
pub use not_tests::{not_tests, NotInTest, NotIsTest};
pub use space_around_operator::{
space_around_operator, MultipleSpacesAfterOperator, MultipleSpacesBeforeOperator,
TabAfterOperator, TabBeforeOperator,
};
pub use tab_indentation::{tab_indentation, TabIndentation};
pub use trailing_whitespace::{trailing_whitespace, BlankLineWithWhitespace, TrailingWhitespace};
pub use type_comparison::{type_comparison, TypeComparison};
pub use whitespace_around_keywords::{
whitespace_around_keywords, MultipleSpacesAfterKeyword, MultipleSpacesBeforeKeyword,
TabAfterKeyword, TabBeforeKeyword,
};
pub use whitespace_around_named_parameter_equals::{
whitespace_around_named_parameter_equals, MissingWhitespaceAroundParameterEquals,
UnexpectedSpacesAroundKeywordParameterEquals,
};
pub use whitespace_before_comment::{
whitespace_before_comment, MultipleLeadingHashesForBlockComment, NoSpaceAfterBlockComment,
NoSpaceAfterInlineComment, TooFewSpacesBeforeInlineComment,
};
pub use whitespace_before_parameters::{whitespace_before_parameters, WhitespaceBeforeParameters};
mod ambiguous_class_name;
mod ambiguous_function_name;
@ -65,81 +31,16 @@ mod bare_except;
mod compound_statements;
mod doc_line_too_long;
mod errors;
mod extraneous_whitespace;
mod imports;
mod indentation;
mod invalid_escape_sequence;
mod lambda_assignment;
mod line_too_long;
mod literal_comparisons;
#[cfg(feature = "logical_lines")]
pub(crate) mod logical_lines;
mod missing_newline_at_end_of_file;
mod missing_whitespace;
mod missing_whitespace_after_keyword;
mod missing_whitespace_around_operator;
mod mixed_spaces_and_tabs;
mod not_tests;
mod space_around_operator;
mod tab_indentation;
mod trailing_whitespace;
mod type_comparison;
mod whitespace_around_keywords;
mod whitespace_around_named_parameter_equals;
mod whitespace_before_comment;
mod whitespace_before_parameters;
#[allow(unused)]
enum Whitespace {
None,
Single,
Many,
Tab,
}
impl Whitespace {
#[allow(dead_code)]
fn leading(content: &str) -> (usize, Self) {
let mut offset = 0;
let mut kind = Self::None;
for c in content.chars() {
if c == '\t' {
kind = Self::Tab;
offset += 1;
} else if c.is_whitespace() {
kind = match kind {
Whitespace::None => Whitespace::Single,
Whitespace::Single | Whitespace::Many => Whitespace::Many,
Whitespace::Tab => Whitespace::Tab,
};
offset += c.len_utf8();
} else {
break;
}
}
(offset, kind)
}
#[allow(dead_code)]
fn trailing(content: &str) -> (Self, usize) {
let mut count = 0u32;
let mut offset = 0;
for c in content.chars().rev() {
if c == '\t' {
return (Self::Tab, offset + 1);
} else if c.is_whitespace() {
count += 1;
offset += c.len_utf8();
} else {
break;
}
}
match count {
0 => (Self::None, 0),
1 => (Self::Single, offset),
_ => (Self::Many, offset),
}
}
}

View file

@ -1,113 +0,0 @@
#![allow(dead_code, unused_imports, unused_variables)]
use once_cell::sync::Lazy;
use regex::Regex;
use rustpython_parser::ast::Location;
use rustpython_parser::Tok;
use ruff_diagnostics::DiagnosticKind;
use ruff_diagnostics::Violation;
use ruff_macros::{derive_message_formats, violation};
#[cfg(feature = "logical_lines")]
use crate::rules::pycodestyle::helpers::is_op_token;
#[violation]
pub struct UnexpectedSpacesAroundKeywordParameterEquals;
impl Violation for UnexpectedSpacesAroundKeywordParameterEquals {
#[derive_message_formats]
fn message(&self) -> String {
format!("Unexpected spaces around keyword / parameter equals")
}
}
#[violation]
pub struct MissingWhitespaceAroundParameterEquals;
impl Violation for MissingWhitespaceAroundParameterEquals {
#[derive_message_formats]
fn message(&self) -> String {
format!("Missing whitespace around parameter equals")
}
}
static STARTSWITH_DEF_REGEX: Lazy<Regex> =
Lazy::new(|| Regex::new(r"^(async\s+def|def)\b").unwrap());
/// E251, E252
#[cfg(feature = "logical_lines")]
pub fn whitespace_around_named_parameter_equals(
tokens: &[(Location, &Tok, Location)],
line: &str,
) -> Vec<(Location, DiagnosticKind)> {
let mut diagnostics = vec![];
let mut parens = 0;
let mut require_space = false;
let mut no_space = false;
let mut annotated_func_arg = false;
let mut prev_end: Option<&Location> = None;
let in_def = STARTSWITH_DEF_REGEX.is_match(line);
for (start, token, end) in tokens {
if **token == Tok::NonLogicalNewline {
continue;
}
if no_space {
no_space = false;
if Some(start) != prev_end {
diagnostics.push((
*(prev_end.unwrap()),
UnexpectedSpacesAroundKeywordParameterEquals.into(),
));
}
}
if require_space {
require_space = false;
if Some(start) == prev_end {
diagnostics.push((*start, MissingWhitespaceAroundParameterEquals.into()));
}
}
if is_op_token(token) {
if **token == Tok::Lpar || **token == Tok::Lsqb {
parens += 1;
} else if **token == Tok::Rpar || **token == Tok::Rsqb {
parens -= 1;
} else if in_def && **token == Tok::Colon && parens == 1 {
annotated_func_arg = true;
} else if parens == 1 && **token == Tok::Comma {
annotated_func_arg = false;
} else if parens > 0 && **token == Tok::Equal {
if annotated_func_arg && parens == 1 {
require_space = true;
if Some(start) == prev_end {
diagnostics.push((*start, MissingWhitespaceAroundParameterEquals.into()));
}
} else {
no_space = true;
if Some(start) != prev_end {
diagnostics.push((
*(prev_end.unwrap()),
UnexpectedSpacesAroundKeywordParameterEquals.into(),
));
}
}
}
if parens < 1 {
annotated_func_arg = false;
}
}
prev_end = Some(end);
}
diagnostics
}
#[cfg(not(feature = "logical_lines"))]
pub fn whitespace_around_named_parameter_equals(
_tokens: &[(Location, &Tok, Location)],
_line: &str,
) -> Vec<(Location, DiagnosticKind)> {
vec![]
}