Ensure that tab characters aren't in multi-line strings before throwing a violation (#3837)

This commit is contained in:
Evan Rittenhouse 2023-04-06 21:25:40 -05:00 committed by GitHub
parent 454c6d9c2f
commit abaf0a198d
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 131 additions and 78 deletions

View file

@ -97,10 +97,10 @@ if length > options.max_line_length:
if os.path.exists(os.path.join(path, PEP8_BIN)): if os.path.exists(os.path.join(path, PEP8_BIN)):
cmd = ([os.path.join(path, PEP8_BIN)] + cmd = ([os.path.join(path, PEP8_BIN)] +
self._pep8_options(targetfile)) self._pep8_options(targetfile))
#: W191 #: W191 - okay
''' '''
multiline string with tab in it''' multiline string with tab in it'''
#: E101 W191 #: E101 (W191 okay)
'''multiline string '''multiline string
with tabs with tabs
and spaces and spaces
@ -142,4 +142,10 @@ def test_keys(self):
x = [ x = [
'abc' 'abc'
] ]
#: #: W191 - okay
''' multiline string with tab in it, same lines'''
""" here we're using '''different delimiters'''"""
'''
multiline string with tab in it, different lines
'''
" single line string with tab in it"

View file

@ -4,7 +4,7 @@ use std::path::Path;
use ruff_diagnostics::Diagnostic; use ruff_diagnostics::Diagnostic;
use ruff_python_ast::newlines::StrExt; use ruff_python_ast::newlines::StrExt;
use ruff_python_ast::source_code::{Locator, Stylist}; use ruff_python_ast::source_code::{Indexer, Locator, Stylist};
use crate::registry::Rule; use crate::registry::Rule;
use crate::rules::flake8_executable::helpers::{extract_shebang, ShebangDirective}; use crate::rules::flake8_executable::helpers::{extract_shebang, ShebangDirective};
@ -24,7 +24,7 @@ pub fn check_physical_lines(
path: &Path, path: &Path,
locator: &Locator, locator: &Locator,
stylist: &Stylist, stylist: &Stylist,
commented_lines: &[usize], indexer: &Indexer,
doc_lines: &[usize], doc_lines: &[usize],
settings: &Settings, settings: &Settings,
autofix: flags::Autofix, autofix: flags::Autofix,
@ -55,8 +55,11 @@ pub fn check_physical_lines(
let fix_shebang_whitespace = let fix_shebang_whitespace =
autofix.into() && settings.rules.should_fix(Rule::ShebangLeadingWhitespace); autofix.into() && settings.rules.should_fix(Rule::ShebangLeadingWhitespace);
let mut commented_lines_iter = commented_lines.iter().peekable(); let mut commented_lines_iter = indexer.commented_lines().iter().peekable();
let mut doc_lines_iter = doc_lines.iter().peekable(); let mut doc_lines_iter = doc_lines.iter().peekable();
let string_lines = indexer.string_ranges();
for (index, line) in locator.contents().universal_newlines().enumerate() { for (index, line) in locator.contents().universal_newlines().enumerate() {
while commented_lines_iter while commented_lines_iter
.next_if(|lineno| &(index + 1) == *lineno) .next_if(|lineno| &(index + 1) == *lineno)
@ -155,7 +158,7 @@ pub fn check_physical_lines(
} }
if enforce_tab_indentation { if enforce_tab_indentation {
if let Some(diagnostic) = tab_indentation(index, line) { if let Some(diagnostic) = tab_indentation(index + 1, line, string_lines) {
diagnostics.push(diagnostic); diagnostics.push(diagnostic);
} }
} }
@ -186,7 +189,7 @@ mod tests {
use rustpython_parser::Mode; use rustpython_parser::Mode;
use std::path::Path; use std::path::Path;
use ruff_python_ast::source_code::{Locator, Stylist}; use ruff_python_ast::source_code::{Indexer, Locator, Stylist};
use crate::registry::Rule; use crate::registry::Rule;
use crate::settings::{flags, Settings}; use crate::settings::{flags, Settings};
@ -198,6 +201,7 @@ mod tests {
let line = "'\u{4e9c}' * 2"; // 7 in UTF-32, 9 in UTF-8. let line = "'\u{4e9c}' * 2"; // 7 in UTF-32, 9 in UTF-8.
let locator = Locator::new(line); let locator = Locator::new(line);
let tokens: Vec<_> = lex(line, Mode::Module).collect(); let tokens: Vec<_> = lex(line, Mode::Module).collect();
let indexer: Indexer = tokens.as_slice().into();
let stylist = Stylist::from_tokens(&tokens, &locator); let stylist = Stylist::from_tokens(&tokens, &locator);
let check_with_max_line_length = |line_length: usize| { let check_with_max_line_length = |line_length: usize| {
@ -205,7 +209,7 @@ mod tests {
Path::new("foo.py"), Path::new("foo.py"),
&locator, &locator,
&stylist, &stylist,
&[], &indexer,
&[], &[],
&Settings { &Settings {
line_length, line_length,

View file

@ -196,13 +196,7 @@ pub fn check_path(
.any(|rule_code| rule_code.lint_source().is_physical_lines()) .any(|rule_code| rule_code.lint_source().is_physical_lines())
{ {
diagnostics.extend(check_physical_lines( diagnostics.extend(check_physical_lines(
path, path, locator, stylist, indexer, &doc_lines, settings, autofix,
locator,
stylist,
indexer.commented_lines(),
&doc_lines,
settings,
autofix,
)); ));
} }

View file

@ -16,15 +16,48 @@ impl Violation for TabIndentation {
} }
/// W191 /// W191
pub fn tab_indentation(lineno: usize, line: &str) -> Option<Diagnostic> { pub fn tab_indentation(lineno: usize, line: &str, string_ranges: &[Range]) -> Option<Diagnostic> {
let indent = leading_space(line); let indent = leading_space(line);
if let Some(tab_index) = indent.find('\t') {
// If the tab character is within a multi-line string, abort.
if let Ok(range_index) = string_ranges.binary_search_by(|range| {
let start = range.location.row();
let end = range.end_location.row();
if start > lineno {
std::cmp::Ordering::Greater
} else if end < lineno {
std::cmp::Ordering::Less
} else {
std::cmp::Ordering::Equal
}
}) {
let string_range = &string_ranges[range_index];
let start = string_range.location;
let end = string_range.end_location;
// Tab is contained in the string range by virtue of lines.
if lineno != start.row() && lineno != end.row() {
return None;
}
let tab_column = line[..tab_index].chars().count();
// Tab on first line of the quoted range, following the quote.
if lineno == start.row() && tab_column > start.column() {
return None;
}
// Tab on last line of the quoted range, preceding the quote.
if lineno == end.row() && tab_column < end.column() {
return None;
}
}
if indent.contains('\t') {
Some(Diagnostic::new( Some(Diagnostic::new(
TabIndentation, TabIndentation,
Range::new( Range::new(
Location::new(lineno + 1, 0), Location::new(lineno, 0),
Location::new(lineno + 1, indent.chars().count()), Location::new(lineno, indent.chars().count()),
), ),
)) ))
} else { } else {

View file

@ -352,48 +352,6 @@ expression: diagnostics
fix: fix:
edits: [] edits: []
parent: ~ parent: ~
- kind:
name: TabIndentation
body: Indentation contains tabs
suggestion: ~
fixable: false
location:
row: 102
column: 0
end_location:
row: 102
column: 1
fix:
edits: []
parent: ~
- kind:
name: TabIndentation
body: Indentation contains tabs
suggestion: ~
fixable: false
location:
row: 105
column: 0
end_location:
row: 105
column: 1
fix:
edits: []
parent: ~
- kind:
name: TabIndentation
body: Indentation contains tabs
suggestion: ~
fixable: false
location:
row: 110
column: 0
end_location:
row: 110
column: 1
fix:
edits: []
parent: ~
- kind: - kind:
name: TabIndentation name: TabIndentation
body: Indentation contains tabs body: Indentation contains tabs
@ -450,20 +408,6 @@ expression: diagnostics
fix: fix:
edits: [] edits: []
parent: ~ parent: ~
- kind:
name: TabIndentation
body: Indentation contains tabs
suggestion: ~
fixable: false
location:
row: 136
column: 0
end_location:
row: 136
column: 1
fix:
edits: []
parent: ~
- kind: - kind:
name: TabIndentation name: TabIndentation
body: Indentation contains tabs body: Indentation contains tabs

View file

@ -5,30 +5,47 @@ use rustpython_parser::ast::Location;
use rustpython_parser::lexer::LexResult; use rustpython_parser::lexer::LexResult;
use rustpython_parser::Tok; use rustpython_parser::Tok;
use crate::types::Range;
pub struct Indexer { pub struct Indexer {
commented_lines: Vec<usize>, commented_lines: Vec<usize>,
continuation_lines: Vec<usize>, continuation_lines: Vec<usize>,
string_ranges: Vec<Range>,
} }
impl Indexer { impl Indexer {
/// Return a slice of all lines that include a comment.
pub fn commented_lines(&self) -> &[usize] { pub fn commented_lines(&self) -> &[usize] {
&self.commented_lines &self.commented_lines
} }
/// Return a slice of all lines that end with a continuation (backslash).
pub fn continuation_lines(&self) -> &[usize] { pub fn continuation_lines(&self) -> &[usize] {
&self.continuation_lines &self.continuation_lines
} }
/// Return a slice of all ranges that include a triple-quoted string.
pub fn string_ranges(&self) -> &[Range] {
&self.string_ranges
}
} }
impl From<&[LexResult]> for Indexer { impl From<&[LexResult]> for Indexer {
fn from(lxr: &[LexResult]) -> Self { fn from(lxr: &[LexResult]) -> Self {
let mut commented_lines = Vec::new(); let mut commented_lines = Vec::new();
let mut continuation_lines = Vec::new(); let mut continuation_lines = Vec::new();
let mut string_ranges = Vec::new();
let mut prev: Option<(&Location, &Tok, &Location)> = None; let mut prev: Option<(&Location, &Tok, &Location)> = None;
for (start, tok, end) in lxr.iter().flatten() { for (start, tok, end) in lxr.iter().flatten() {
if matches!(tok, Tok::Comment(_)) { match tok {
commented_lines.push(start.row()); Tok::Comment(..) => commented_lines.push(start.row()),
Tok::String {
triple_quoted: true,
..
} => string_ranges.push(Range::new(*start, *end)),
_ => (),
} }
if let Some((.., prev_tok, prev_end)) = prev { if let Some((.., prev_tok, prev_end)) = prev {
if !matches!( if !matches!(
prev_tok, prev_tok,
@ -44,16 +61,19 @@ impl From<&[LexResult]> for Indexer {
Self { Self {
commented_lines, commented_lines,
continuation_lines, continuation_lines,
string_ranges,
} }
} }
} }
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use rustpython_parser::ast::Location;
use rustpython_parser::lexer::LexResult; use rustpython_parser::lexer::LexResult;
use rustpython_parser::{lexer, Mode}; use rustpython_parser::{lexer, Mode};
use crate::source_code::Indexer; use crate::source_code::Indexer;
use crate::types::Range;
#[test] #[test]
fn continuation() { fn continuation() {
@ -114,4 +134,56 @@ import os
let indexer: Indexer = lxr.as_slice().into(); let indexer: Indexer = lxr.as_slice().into();
assert_eq!(indexer.continuation_lines(), [9, 12]); assert_eq!(indexer.continuation_lines(), [9, 12]);
} }
#[test]
fn string_ranges() {
let contents = r#""this is a single-quoted string""#;
let lxr: Vec<LexResult> = lexer::lex(contents, Mode::Module).collect();
let indexer: Indexer = lxr.as_slice().into();
assert_eq!(indexer.string_ranges(), &vec![]);
let contents = r#"
"""
this is a multiline string
"""
"#;
let lxr: Vec<LexResult> = lexer::lex(contents, Mode::Module).collect();
let indexer: Indexer = lxr.as_slice().into();
assert_eq!(
indexer.string_ranges(),
&vec![Range::new(Location::new(2, 12), Location::new(4, 15))]
);
let contents = r#"
"""
'''this is a multiline string with multiple delimiter types'''
"""
"#;
let lxr: Vec<LexResult> = lexer::lex(contents, Mode::Module).collect();
let indexer: Indexer = lxr.as_slice().into();
assert_eq!(
indexer.string_ranges(),
&vec![Range::new(Location::new(2, 12), Location::new(4, 15))]
);
let contents = r#"
"""
this is one
multiline string
"""
"""
and this is
another
"""
"#;
let lxr: Vec<LexResult> = lexer::lex(contents, Mode::Module).collect();
let indexer: Indexer = lxr.as_slice().into();
assert_eq!(
indexer.string_ranges(),
&vec![
Range::new(Location::new(2, 12), Location::new(5, 15)),
Range::new(Location::new(6, 12), Location::new(9, 15))
]
);
}
} }