diff --git a/crates/ruff/resources/test/fixtures/pycodestyle/W19.py b/crates/ruff/resources/test/fixtures/pycodestyle/W19.py index 2bb2893e8c..f029558825 100644 --- a/crates/ruff/resources/test/fixtures/pycodestyle/W19.py +++ b/crates/ruff/resources/test/fixtures/pycodestyle/W19.py @@ -97,10 +97,10 @@ if length > options.max_line_length: if os.path.exists(os.path.join(path, PEP8_BIN)): cmd = ([os.path.join(path, PEP8_BIN)] + self._pep8_options(targetfile)) -#: W191 +#: W191 - okay ''' multiline string with tab in it''' -#: E101 W191 +#: E101 (W191 okay) '''multiline string with tabs and spaces @@ -142,4 +142,10 @@ def test_keys(self): x = [ 'abc' ] -#: +#: W191 - okay +''' multiline string with tab in it, same lines''' +""" here we're using '''different delimiters'''""" +''' + multiline string with tab in it, different lines +''' +" single line string with tab in it" diff --git a/crates/ruff/src/checkers/physical_lines.rs b/crates/ruff/src/checkers/physical_lines.rs index bd832b55b3..7cbbd9b46c 100644 --- a/crates/ruff/src/checkers/physical_lines.rs +++ b/crates/ruff/src/checkers/physical_lines.rs @@ -4,7 +4,7 @@ use std::path::Path; use ruff_diagnostics::Diagnostic; use ruff_python_ast::newlines::StrExt; -use ruff_python_ast::source_code::{Locator, Stylist}; +use ruff_python_ast::source_code::{Indexer, Locator, Stylist}; use crate::registry::Rule; use crate::rules::flake8_executable::helpers::{extract_shebang, ShebangDirective}; @@ -24,7 +24,7 @@ pub fn check_physical_lines( path: &Path, locator: &Locator, stylist: &Stylist, - commented_lines: &[usize], + indexer: &Indexer, doc_lines: &[usize], settings: &Settings, autofix: flags::Autofix, @@ -55,8 +55,11 @@ pub fn check_physical_lines( let fix_shebang_whitespace = autofix.into() && settings.rules.should_fix(Rule::ShebangLeadingWhitespace); - let mut commented_lines_iter = commented_lines.iter().peekable(); + let mut commented_lines_iter = indexer.commented_lines().iter().peekable(); let mut doc_lines_iter = doc_lines.iter().peekable(); + + let string_lines = indexer.string_ranges(); + for (index, line) in locator.contents().universal_newlines().enumerate() { while commented_lines_iter .next_if(|lineno| &(index + 1) == *lineno) @@ -155,7 +158,7 @@ pub fn check_physical_lines( } if enforce_tab_indentation { - if let Some(diagnostic) = tab_indentation(index, line) { + if let Some(diagnostic) = tab_indentation(index + 1, line, string_lines) { diagnostics.push(diagnostic); } } @@ -186,7 +189,7 @@ mod tests { use rustpython_parser::Mode; use std::path::Path; - use ruff_python_ast::source_code::{Locator, Stylist}; + use ruff_python_ast::source_code::{Indexer, Locator, Stylist}; use crate::registry::Rule; use crate::settings::{flags, Settings}; @@ -198,6 +201,7 @@ mod tests { let line = "'\u{4e9c}' * 2"; // 7 in UTF-32, 9 in UTF-8. let locator = Locator::new(line); let tokens: Vec<_> = lex(line, Mode::Module).collect(); + let indexer: Indexer = tokens.as_slice().into(); let stylist = Stylist::from_tokens(&tokens, &locator); let check_with_max_line_length = |line_length: usize| { @@ -205,7 +209,7 @@ mod tests { Path::new("foo.py"), &locator, &stylist, - &[], + &indexer, &[], &Settings { line_length, diff --git a/crates/ruff/src/linter.rs b/crates/ruff/src/linter.rs index ef16d59c10..d7c05e037f 100644 --- a/crates/ruff/src/linter.rs +++ b/crates/ruff/src/linter.rs @@ -196,13 +196,7 @@ pub fn check_path( .any(|rule_code| rule_code.lint_source().is_physical_lines()) { diagnostics.extend(check_physical_lines( - path, - locator, - stylist, - indexer.commented_lines(), - &doc_lines, - settings, - autofix, + path, locator, stylist, indexer, &doc_lines, settings, autofix, )); } diff --git a/crates/ruff/src/rules/pycodestyle/rules/tab_indentation.rs b/crates/ruff/src/rules/pycodestyle/rules/tab_indentation.rs index 442fe588a7..b15855b144 100644 --- a/crates/ruff/src/rules/pycodestyle/rules/tab_indentation.rs +++ b/crates/ruff/src/rules/pycodestyle/rules/tab_indentation.rs @@ -16,15 +16,48 @@ impl Violation for TabIndentation { } /// W191 -pub fn tab_indentation(lineno: usize, line: &str) -> Option { +pub fn tab_indentation(lineno: usize, line: &str, string_ranges: &[Range]) -> Option { let indent = leading_space(line); + if let Some(tab_index) = indent.find('\t') { + // If the tab character is within a multi-line string, abort. + if let Ok(range_index) = string_ranges.binary_search_by(|range| { + let start = range.location.row(); + let end = range.end_location.row(); + if start > lineno { + std::cmp::Ordering::Greater + } else if end < lineno { + std::cmp::Ordering::Less + } else { + std::cmp::Ordering::Equal + } + }) { + let string_range = &string_ranges[range_index]; + let start = string_range.location; + let end = string_range.end_location; + + // Tab is contained in the string range by virtue of lines. + if lineno != start.row() && lineno != end.row() { + return None; + } + + let tab_column = line[..tab_index].chars().count(); + + // Tab on first line of the quoted range, following the quote. + if lineno == start.row() && tab_column > start.column() { + return None; + } + + // Tab on last line of the quoted range, preceding the quote. + if lineno == end.row() && tab_column < end.column() { + return None; + } + } - if indent.contains('\t') { Some(Diagnostic::new( TabIndentation, Range::new( - Location::new(lineno + 1, 0), - Location::new(lineno + 1, indent.chars().count()), + Location::new(lineno, 0), + Location::new(lineno, indent.chars().count()), ), )) } else { diff --git a/crates/ruff/src/rules/pycodestyle/snapshots/ruff__rules__pycodestyle__tests__W191_W19.py.snap b/crates/ruff/src/rules/pycodestyle/snapshots/ruff__rules__pycodestyle__tests__W191_W19.py.snap index d999c03c35..a6bd50f60c 100644 --- a/crates/ruff/src/rules/pycodestyle/snapshots/ruff__rules__pycodestyle__tests__W191_W19.py.snap +++ b/crates/ruff/src/rules/pycodestyle/snapshots/ruff__rules__pycodestyle__tests__W191_W19.py.snap @@ -352,48 +352,6 @@ expression: diagnostics fix: edits: [] parent: ~ -- kind: - name: TabIndentation - body: Indentation contains tabs - suggestion: ~ - fixable: false - location: - row: 102 - column: 0 - end_location: - row: 102 - column: 1 - fix: - edits: [] - parent: ~ -- kind: - name: TabIndentation - body: Indentation contains tabs - suggestion: ~ - fixable: false - location: - row: 105 - column: 0 - end_location: - row: 105 - column: 1 - fix: - edits: [] - parent: ~ -- kind: - name: TabIndentation - body: Indentation contains tabs - suggestion: ~ - fixable: false - location: - row: 110 - column: 0 - end_location: - row: 110 - column: 1 - fix: - edits: [] - parent: ~ - kind: name: TabIndentation body: Indentation contains tabs @@ -450,20 +408,6 @@ expression: diagnostics fix: edits: [] parent: ~ -- kind: - name: TabIndentation - body: Indentation contains tabs - suggestion: ~ - fixable: false - location: - row: 136 - column: 0 - end_location: - row: 136 - column: 1 - fix: - edits: [] - parent: ~ - kind: name: TabIndentation body: Indentation contains tabs diff --git a/crates/ruff_python_ast/src/source_code/indexer.rs b/crates/ruff_python_ast/src/source_code/indexer.rs index ecc52c7bd8..d282e12bb0 100644 --- a/crates/ruff_python_ast/src/source_code/indexer.rs +++ b/crates/ruff_python_ast/src/source_code/indexer.rs @@ -5,30 +5,47 @@ use rustpython_parser::ast::Location; use rustpython_parser::lexer::LexResult; use rustpython_parser::Tok; +use crate::types::Range; + pub struct Indexer { commented_lines: Vec, continuation_lines: Vec, + string_ranges: Vec, } impl Indexer { + /// Return a slice of all lines that include a comment. pub fn commented_lines(&self) -> &[usize] { &self.commented_lines } + /// Return a slice of all lines that end with a continuation (backslash). pub fn continuation_lines(&self) -> &[usize] { &self.continuation_lines } + + /// Return a slice of all ranges that include a triple-quoted string. + pub fn string_ranges(&self) -> &[Range] { + &self.string_ranges + } } impl From<&[LexResult]> for Indexer { fn from(lxr: &[LexResult]) -> Self { let mut commented_lines = Vec::new(); let mut continuation_lines = Vec::new(); + let mut string_ranges = Vec::new(); let mut prev: Option<(&Location, &Tok, &Location)> = None; for (start, tok, end) in lxr.iter().flatten() { - if matches!(tok, Tok::Comment(_)) { - commented_lines.push(start.row()); + match tok { + Tok::Comment(..) => commented_lines.push(start.row()), + Tok::String { + triple_quoted: true, + .. + } => string_ranges.push(Range::new(*start, *end)), + _ => (), } + if let Some((.., prev_tok, prev_end)) = prev { if !matches!( prev_tok, @@ -44,16 +61,19 @@ impl From<&[LexResult]> for Indexer { Self { commented_lines, continuation_lines, + string_ranges, } } } #[cfg(test)] mod tests { + use rustpython_parser::ast::Location; use rustpython_parser::lexer::LexResult; use rustpython_parser::{lexer, Mode}; use crate::source_code::Indexer; + use crate::types::Range; #[test] fn continuation() { @@ -114,4 +134,56 @@ import os let indexer: Indexer = lxr.as_slice().into(); assert_eq!(indexer.continuation_lines(), [9, 12]); } + + #[test] + fn string_ranges() { + let contents = r#""this is a single-quoted string""#; + let lxr: Vec = lexer::lex(contents, Mode::Module).collect(); + let indexer: Indexer = lxr.as_slice().into(); + assert_eq!(indexer.string_ranges(), &vec![]); + + let contents = r#" + """ + this is a multiline string + """ + "#; + let lxr: Vec = lexer::lex(contents, Mode::Module).collect(); + let indexer: Indexer = lxr.as_slice().into(); + assert_eq!( + indexer.string_ranges(), + &vec![Range::new(Location::new(2, 12), Location::new(4, 15))] + ); + + let contents = r#" + """ + '''this is a multiline string with multiple delimiter types''' + """ + "#; + let lxr: Vec = lexer::lex(contents, Mode::Module).collect(); + let indexer: Indexer = lxr.as_slice().into(); + assert_eq!( + indexer.string_ranges(), + &vec![Range::new(Location::new(2, 12), Location::new(4, 15))] + ); + + let contents = r#" + """ + this is one + multiline string + """ + """ + and this is + another + """ + "#; + let lxr: Vec = lexer::lex(contents, Mode::Module).collect(); + let indexer: Indexer = lxr.as_slice().into(); + assert_eq!( + indexer.string_ranges(), + &vec![ + Range::new(Location::new(2, 12), Location::new(5, 15)), + Range::new(Location::new(6, 12), Location::new(9, 15)) + ] + ); + } }