Ensure that tab characters aren't in multi-line strings before throwing a violation (#3837)

This commit is contained in:
Evan Rittenhouse 2023-04-06 21:25:40 -05:00 committed by GitHub
parent 454c6d9c2f
commit abaf0a198d
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 131 additions and 78 deletions

View file

@ -97,10 +97,10 @@ if length > options.max_line_length:
if os.path.exists(os.path.join(path, PEP8_BIN)):
cmd = ([os.path.join(path, PEP8_BIN)] +
self._pep8_options(targetfile))
#: W191
#: W191 - okay
'''
multiline string with tab in it'''
#: E101 W191
#: E101 (W191 okay)
'''multiline string
with tabs
and spaces
@ -142,4 +142,10 @@ def test_keys(self):
x = [
'abc'
]
#:
#: W191 - okay
''' multiline string with tab in it, same lines'''
""" here we're using '''different delimiters'''"""
'''
multiline string with tab in it, different lines
'''
" single line string with tab in it"

View file

@ -4,7 +4,7 @@ use std::path::Path;
use ruff_diagnostics::Diagnostic;
use ruff_python_ast::newlines::StrExt;
use ruff_python_ast::source_code::{Locator, Stylist};
use ruff_python_ast::source_code::{Indexer, Locator, Stylist};
use crate::registry::Rule;
use crate::rules::flake8_executable::helpers::{extract_shebang, ShebangDirective};
@ -24,7 +24,7 @@ pub fn check_physical_lines(
path: &Path,
locator: &Locator,
stylist: &Stylist,
commented_lines: &[usize],
indexer: &Indexer,
doc_lines: &[usize],
settings: &Settings,
autofix: flags::Autofix,
@ -55,8 +55,11 @@ pub fn check_physical_lines(
let fix_shebang_whitespace =
autofix.into() && settings.rules.should_fix(Rule::ShebangLeadingWhitespace);
let mut commented_lines_iter = commented_lines.iter().peekable();
let mut commented_lines_iter = indexer.commented_lines().iter().peekable();
let mut doc_lines_iter = doc_lines.iter().peekable();
let string_lines = indexer.string_ranges();
for (index, line) in locator.contents().universal_newlines().enumerate() {
while commented_lines_iter
.next_if(|lineno| &(index + 1) == *lineno)
@ -155,7 +158,7 @@ pub fn check_physical_lines(
}
if enforce_tab_indentation {
if let Some(diagnostic) = tab_indentation(index, line) {
if let Some(diagnostic) = tab_indentation(index + 1, line, string_lines) {
diagnostics.push(diagnostic);
}
}
@ -186,7 +189,7 @@ mod tests {
use rustpython_parser::Mode;
use std::path::Path;
use ruff_python_ast::source_code::{Locator, Stylist};
use ruff_python_ast::source_code::{Indexer, Locator, Stylist};
use crate::registry::Rule;
use crate::settings::{flags, Settings};
@ -198,6 +201,7 @@ mod tests {
let line = "'\u{4e9c}' * 2"; // 7 in UTF-32, 9 in UTF-8.
let locator = Locator::new(line);
let tokens: Vec<_> = lex(line, Mode::Module).collect();
let indexer: Indexer = tokens.as_slice().into();
let stylist = Stylist::from_tokens(&tokens, &locator);
let check_with_max_line_length = |line_length: usize| {
@ -205,7 +209,7 @@ mod tests {
Path::new("foo.py"),
&locator,
&stylist,
&[],
&indexer,
&[],
&Settings {
line_length,

View file

@ -196,13 +196,7 @@ pub fn check_path(
.any(|rule_code| rule_code.lint_source().is_physical_lines())
{
diagnostics.extend(check_physical_lines(
path,
locator,
stylist,
indexer.commented_lines(),
&doc_lines,
settings,
autofix,
path, locator, stylist, indexer, &doc_lines, settings, autofix,
));
}

View file

@ -16,15 +16,48 @@ impl Violation for TabIndentation {
}
/// W191
pub fn tab_indentation(lineno: usize, line: &str) -> Option<Diagnostic> {
pub fn tab_indentation(lineno: usize, line: &str, string_ranges: &[Range]) -> Option<Diagnostic> {
let indent = leading_space(line);
if let Some(tab_index) = indent.find('\t') {
// If the tab character is within a multi-line string, abort.
if let Ok(range_index) = string_ranges.binary_search_by(|range| {
let start = range.location.row();
let end = range.end_location.row();
if start > lineno {
std::cmp::Ordering::Greater
} else if end < lineno {
std::cmp::Ordering::Less
} else {
std::cmp::Ordering::Equal
}
}) {
let string_range = &string_ranges[range_index];
let start = string_range.location;
let end = string_range.end_location;
// Tab is contained in the string range by virtue of lines.
if lineno != start.row() && lineno != end.row() {
return None;
}
let tab_column = line[..tab_index].chars().count();
// Tab on first line of the quoted range, following the quote.
if lineno == start.row() && tab_column > start.column() {
return None;
}
// Tab on last line of the quoted range, preceding the quote.
if lineno == end.row() && tab_column < end.column() {
return None;
}
}
if indent.contains('\t') {
Some(Diagnostic::new(
TabIndentation,
Range::new(
Location::new(lineno + 1, 0),
Location::new(lineno + 1, indent.chars().count()),
Location::new(lineno, 0),
Location::new(lineno, indent.chars().count()),
),
))
} else {

View file

@ -352,48 +352,6 @@ expression: diagnostics
fix:
edits: []
parent: ~
- kind:
name: TabIndentation
body: Indentation contains tabs
suggestion: ~
fixable: false
location:
row: 102
column: 0
end_location:
row: 102
column: 1
fix:
edits: []
parent: ~
- kind:
name: TabIndentation
body: Indentation contains tabs
suggestion: ~
fixable: false
location:
row: 105
column: 0
end_location:
row: 105
column: 1
fix:
edits: []
parent: ~
- kind:
name: TabIndentation
body: Indentation contains tabs
suggestion: ~
fixable: false
location:
row: 110
column: 0
end_location:
row: 110
column: 1
fix:
edits: []
parent: ~
- kind:
name: TabIndentation
body: Indentation contains tabs
@ -450,20 +408,6 @@ expression: diagnostics
fix:
edits: []
parent: ~
- kind:
name: TabIndentation
body: Indentation contains tabs
suggestion: ~
fixable: false
location:
row: 136
column: 0
end_location:
row: 136
column: 1
fix:
edits: []
parent: ~
- kind:
name: TabIndentation
body: Indentation contains tabs

View file

@ -5,30 +5,47 @@ use rustpython_parser::ast::Location;
use rustpython_parser::lexer::LexResult;
use rustpython_parser::Tok;
use crate::types::Range;
pub struct Indexer {
commented_lines: Vec<usize>,
continuation_lines: Vec<usize>,
string_ranges: Vec<Range>,
}
impl Indexer {
/// Return a slice of all lines that include a comment.
pub fn commented_lines(&self) -> &[usize] {
&self.commented_lines
}
/// Return a slice of all lines that end with a continuation (backslash).
pub fn continuation_lines(&self) -> &[usize] {
&self.continuation_lines
}
/// Return a slice of all ranges that include a triple-quoted string.
pub fn string_ranges(&self) -> &[Range] {
&self.string_ranges
}
}
impl From<&[LexResult]> for Indexer {
fn from(lxr: &[LexResult]) -> Self {
let mut commented_lines = Vec::new();
let mut continuation_lines = Vec::new();
let mut string_ranges = Vec::new();
let mut prev: Option<(&Location, &Tok, &Location)> = None;
for (start, tok, end) in lxr.iter().flatten() {
if matches!(tok, Tok::Comment(_)) {
commented_lines.push(start.row());
match tok {
Tok::Comment(..) => commented_lines.push(start.row()),
Tok::String {
triple_quoted: true,
..
} => string_ranges.push(Range::new(*start, *end)),
_ => (),
}
if let Some((.., prev_tok, prev_end)) = prev {
if !matches!(
prev_tok,
@ -44,16 +61,19 @@ impl From<&[LexResult]> for Indexer {
Self {
commented_lines,
continuation_lines,
string_ranges,
}
}
}
#[cfg(test)]
mod tests {
use rustpython_parser::ast::Location;
use rustpython_parser::lexer::LexResult;
use rustpython_parser::{lexer, Mode};
use crate::source_code::Indexer;
use crate::types::Range;
#[test]
fn continuation() {
@ -114,4 +134,56 @@ import os
let indexer: Indexer = lxr.as_slice().into();
assert_eq!(indexer.continuation_lines(), [9, 12]);
}
#[test]
fn string_ranges() {
let contents = r#""this is a single-quoted string""#;
let lxr: Vec<LexResult> = lexer::lex(contents, Mode::Module).collect();
let indexer: Indexer = lxr.as_slice().into();
assert_eq!(indexer.string_ranges(), &vec![]);
let contents = r#"
"""
this is a multiline string
"""
"#;
let lxr: Vec<LexResult> = lexer::lex(contents, Mode::Module).collect();
let indexer: Indexer = lxr.as_slice().into();
assert_eq!(
indexer.string_ranges(),
&vec![Range::new(Location::new(2, 12), Location::new(4, 15))]
);
let contents = r#"
"""
'''this is a multiline string with multiple delimiter types'''
"""
"#;
let lxr: Vec<LexResult> = lexer::lex(contents, Mode::Module).collect();
let indexer: Indexer = lxr.as_slice().into();
assert_eq!(
indexer.string_ranges(),
&vec![Range::new(Location::new(2, 12), Location::new(4, 15))]
);
let contents = r#"
"""
this is one
multiline string
"""
"""
and this is
another
"""
"#;
let lxr: Vec<LexResult> = lexer::lex(contents, Mode::Module).collect();
let indexer: Indexer = lxr.as_slice().into();
assert_eq!(
indexer.string_ranges(),
&vec![
Range::new(Location::new(2, 12), Location::new(5, 15)),
Range::new(Location::new(6, 12), Location::new(9, 15))
]
);
}
}