Ensure that tab characters aren't in multi-line strings before throwing a violation (#3837)

2025-11-25 22:29:02 +00:00 · 2023-04-06 21:25:40 -05:00 · 2023-04-06 21:25:40 -05:00 · abaf0a198d
commit abaf0a198d
parent 454c6d9c2f
6 changed files with 131 additions and 78 deletions
--- a/crates/ruff/resources/test/fixtures/pycodestyle/W19.py
+++ b/crates/ruff/resources/test/fixtures/pycodestyle/W19.py
@ -97,10 +97,10 @@ if length > options.max_line_length:
 if os.path.exists(os.path.join(path, PEP8_BIN)):
 	cmd = ([os.path.join(path, PEP8_BIN)] +
 	       self._pep8_options(targetfile))
-#: W191
+#: W191 - okay
 '''
 	multiline string with tab in it'''
-#: E101 W191
+#: E101 (W191 okay)
 '''multiline string
 	with tabs
   and spaces
@ -142,4 +142,10 @@ def test_keys(self):
 x = [
 	'abc'
 ]
-#:
+#: W191 - okay
 '''	multiline string with tab in it, same lines'''
 """	here we're using '''different delimiters'''"""
 '''
 	multiline string with tab in it, different lines
 '''
 "	single line string with tab in it"
--- a/crates/ruff/src/checkers/physical_lines.rs
+++ b/crates/ruff/src/checkers/physical_lines.rs
@ -4,7 +4,7 @@ use std::path::Path;
 use ruff_diagnostics::Diagnostic;
 use ruff_python_ast::newlines::StrExt;
-use ruff_python_ast::source_code::{Locator, Stylist};
+use ruff_python_ast::source_code::{Indexer, Locator, Stylist};
 use crate::registry::Rule;
 use crate::rules::flake8_executable::helpers::{extract_shebang, ShebangDirective};
@ -24,7 +24,7 @@ pub fn check_physical_lines(
    path: &Path,
    locator: &Locator,
    stylist: &Stylist,
-    commented_lines: &[usize],
+    indexer: &Indexer,
    doc_lines: &[usize],
    settings: &Settings,
    autofix: flags::Autofix,
@ -55,8 +55,11 @@ pub fn check_physical_lines(
    let fix_shebang_whitespace =
        autofix.into() && settings.rules.should_fix(Rule::ShebangLeadingWhitespace);
-    let mut commented_lines_iter = commented_lines.iter().peekable();
+    let mut commented_lines_iter = indexer.commented_lines().iter().peekable();
    let mut doc_lines_iter = doc_lines.iter().peekable();
    let string_lines = indexer.string_ranges();
    for (index, line) in locator.contents().universal_newlines().enumerate() {
        while commented_lines_iter
            .next_if(|lineno| &(index + 1) == *lineno)
@ -155,7 +158,7 @@ pub fn check_physical_lines(
        }
        if enforce_tab_indentation {
-            if let Some(diagnostic) = tab_indentation(index, line) {
+            if let Some(diagnostic) = tab_indentation(index + 1, line, string_lines) {
                diagnostics.push(diagnostic);
            }
        }
@ -186,7 +189,7 @@ mod tests {
    use rustpython_parser::Mode;
    use std::path::Path;
-    use ruff_python_ast::source_code::{Locator, Stylist};
+    use ruff_python_ast::source_code::{Indexer, Locator, Stylist};
    use crate::registry::Rule;
    use crate::settings::{flags, Settings};
@ -198,6 +201,7 @@ mod tests {
        let line = "'\u{4e9c}' * 2"; // 7 in UTF-32, 9 in UTF-8.
        let locator = Locator::new(line);
        let tokens: Vec<_> = lex(line, Mode::Module).collect();
        let indexer: Indexer = tokens.as_slice().into();
        let stylist = Stylist::from_tokens(&tokens, &locator);
        let check_with_max_line_length = |line_length: usize| {
@ -205,7 +209,7 @@ mod tests {
                Path::new("foo.py"),
                &locator,
                &stylist,
-                &[],
+                &indexer,
                &[],
                &Settings {
                    line_length,
--- a/crates/ruff/src/linter.rs
+++ b/crates/ruff/src/linter.rs
@ -196,13 +196,7 @@ pub fn check_path(
        .any(|rule_code| rule_code.lint_source().is_physical_lines())
    {
        diagnostics.extend(check_physical_lines(
-            path,
+            path, locator, stylist, indexer, &doc_lines, settings, autofix,
            locator,
            stylist,
            indexer.commented_lines(),
            &doc_lines,
            settings,
            autofix,
        ));
    }
--- a/crates/ruff/src/rules/pycodestyle/rules/tab_indentation.rs
+++ b/crates/ruff/src/rules/pycodestyle/rules/tab_indentation.rs
@ -16,15 +16,48 @@ impl Violation for TabIndentation {
 }
 /// W191
-pub fn tab_indentation(lineno: usize, line: &str) -> Option<Diagnostic> {
+pub fn tab_indentation(lineno: usize, line: &str, string_ranges: &[Range]) -> Option<Diagnostic> {
    let indent = leading_space(line);
    if let Some(tab_index) = indent.find('\t') {
        // If the tab character is within a multi-line string, abort.
        if let Ok(range_index) = string_ranges.binary_search_by(|range| {
            let start = range.location.row();
            let end = range.end_location.row();
            if start > lineno {
                std::cmp::Ordering::Greater
            } else if end < lineno {
                std::cmp::Ordering::Less
            } else {
                std::cmp::Ordering::Equal
            }
        }) {
            let string_range = &string_ranges[range_index];
            let start = string_range.location;
            let end = string_range.end_location;
            // Tab is contained in the string range by virtue of lines.
            if lineno != start.row() && lineno != end.row() {
                return None;
            }
            let tab_column = line[..tab_index].chars().count();
            // Tab on first line of the quoted range, following the quote.
            if lineno == start.row() && tab_column > start.column() {
                return None;
            }
            // Tab on last line of the quoted range, preceding the quote.
            if lineno == end.row() && tab_column < end.column() {
                return None;
            }
        }
    if indent.contains('\t') {
        Some(Diagnostic::new(
            TabIndentation,
            Range::new(
-                Location::new(lineno + 1, 0),
+                Location::new(lineno, 0),
-                Location::new(lineno + 1, indent.chars().count()),
+                Location::new(lineno, indent.chars().count()),
            ),
        ))
    } else {
--- a/crates/ruff/src/rules/pycodestyle/snapshots/ruffrulespycodestyletestsW191_W19.py.snap
+++ b/crates/ruff/src/rules/pycodestyle/snapshots/ruffrulespycodestyletestsW191_W19.py.snap
@ -352,48 +352,6 @@ expression: diagnostics
  fix:
    edits: []
  parent: ~
 - kind:
    name: TabIndentation
    body: Indentation contains tabs
    suggestion: ~
    fixable: false
  location:
    row: 102
    column: 0
  end_location:
    row: 102
    column: 1
  fix:
    edits: []
  parent: ~
 - kind:
    name: TabIndentation
    body: Indentation contains tabs
    suggestion: ~
    fixable: false
  location:
    row: 105
    column: 0
  end_location:
    row: 105
    column: 1
  fix:
    edits: []
  parent: ~
 - kind:
    name: TabIndentation
    body: Indentation contains tabs
    suggestion: ~
    fixable: false
  location:
    row: 110
    column: 0
  end_location:
    row: 110
    column: 1
  fix:
    edits: []
  parent: ~
 - kind:
    name: TabIndentation
    body: Indentation contains tabs
@ -450,20 +408,6 @@ expression: diagnostics
  fix:
    edits: []
  parent: ~
 - kind:
    name: TabIndentation
    body: Indentation contains tabs
    suggestion: ~
    fixable: false
  location:
    row: 136
    column: 0
  end_location:
    row: 136
    column: 1
  fix:
    edits: []
  parent: ~
 - kind:
    name: TabIndentation
    body: Indentation contains tabs
--- a/crates/ruff_python_ast/src/source_code/indexer.rs
+++ b/crates/ruff_python_ast/src/source_code/indexer.rs
@ -5,30 +5,47 @@ use rustpython_parser::ast::Location;
 use rustpython_parser::lexer::LexResult;
 use rustpython_parser::Tok;
 use crate::types::Range;
 pub struct Indexer {
    commented_lines: Vec<usize>,
    continuation_lines: Vec<usize>,
    string_ranges: Vec<Range>,
 }
 impl Indexer {
    /// Return a slice of all lines that include a comment.
    pub fn commented_lines(&self) -> &[usize] {
        &self.commented_lines
    }
    /// Return a slice of all lines that end with a continuation (backslash).
    pub fn continuation_lines(&self) -> &[usize] {
        &self.continuation_lines
    }
    /// Return a slice of all ranges that include a triple-quoted string.
    pub fn string_ranges(&self) -> &[Range] {
        &self.string_ranges
    }
 }
 impl From<&[LexResult]> for Indexer {
    fn from(lxr: &[LexResult]) -> Self {
        let mut commented_lines = Vec::new();
        let mut continuation_lines = Vec::new();
        let mut string_ranges = Vec::new();
        let mut prev: Option<(&Location, &Tok, &Location)> = None;
        for (start, tok, end) in lxr.iter().flatten() {
-            if matches!(tok, Tok::Comment(_)) {
+            match tok {
-                commented_lines.push(start.row());
+                Tok::Comment(..) => commented_lines.push(start.row()),
                Tok::String {
                    triple_quoted: true,
                    ..
                } => string_ranges.push(Range::new(*start, *end)),
                _ => (),
            }
            if let Some((.., prev_tok, prev_end)) = prev {
                if !matches!(
                    prev_tok,
@ -44,16 +61,19 @@ impl From<&[LexResult]> for Indexer {
        Self {
            commented_lines,
            continuation_lines,
            string_ranges,
        }
    }
 }
 #[cfg(test)]
 mod tests {
    use rustpython_parser::ast::Location;
    use rustpython_parser::lexer::LexResult;
    use rustpython_parser::{lexer, Mode};
    use crate::source_code::Indexer;
    use crate::types::Range;
    #[test]
    fn continuation() {
@ -114,4 +134,56 @@ import os
        let indexer: Indexer = lxr.as_slice().into();
        assert_eq!(indexer.continuation_lines(), [9, 12]);
    }
    #[test]
    fn string_ranges() {
        let contents = r#""this is a single-quoted string""#;
        let lxr: Vec<LexResult> = lexer::lex(contents, Mode::Module).collect();
        let indexer: Indexer = lxr.as_slice().into();
        assert_eq!(indexer.string_ranges(), &vec![]);
        let contents = r#"
            """
            this is a multiline string
            """
            "#;
        let lxr: Vec<LexResult> = lexer::lex(contents, Mode::Module).collect();
        let indexer: Indexer = lxr.as_slice().into();
        assert_eq!(
            indexer.string_ranges(),
            &vec![Range::new(Location::new(2, 12), Location::new(4, 15))]
        );
        let contents = r#"
            """
            '''this is a multiline string with multiple delimiter types'''
            """
            "#;
        let lxr: Vec<LexResult> = lexer::lex(contents, Mode::Module).collect();
        let indexer: Indexer = lxr.as_slice().into();
        assert_eq!(
            indexer.string_ranges(),
            &vec![Range::new(Location::new(2, 12), Location::new(4, 15))]
        );
        let contents = r#"
            """
            this is one
            multiline string
            """
            """
            and this is
            another
            """
            "#;
        let lxr: Vec<LexResult> = lexer::lex(contents, Mode::Module).collect();
        let indexer: Indexer = lxr.as_slice().into();
        assert_eq!(
            indexer.string_ranges(),
            &vec![
                Range::new(Location::new(2, 12), Location::new(5, 15)),
                Range::new(Location::new(6, 12), Location::new(9, 15))
            ]
        );
    }
 }