mirror of
https://github.com/astral-sh/ruff.git
synced 2025-08-03 18:28:24 +00:00
Ensure that tab characters aren't in multi-line strings before throwing a violation (#3837)
This commit is contained in:
parent
454c6d9c2f
commit
abaf0a198d
6 changed files with 131 additions and 78 deletions
|
@ -97,10 +97,10 @@ if length > options.max_line_length:
|
||||||
if os.path.exists(os.path.join(path, PEP8_BIN)):
|
if os.path.exists(os.path.join(path, PEP8_BIN)):
|
||||||
cmd = ([os.path.join(path, PEP8_BIN)] +
|
cmd = ([os.path.join(path, PEP8_BIN)] +
|
||||||
self._pep8_options(targetfile))
|
self._pep8_options(targetfile))
|
||||||
#: W191
|
#: W191 - okay
|
||||||
'''
|
'''
|
||||||
multiline string with tab in it'''
|
multiline string with tab in it'''
|
||||||
#: E101 W191
|
#: E101 (W191 okay)
|
||||||
'''multiline string
|
'''multiline string
|
||||||
with tabs
|
with tabs
|
||||||
and spaces
|
and spaces
|
||||||
|
@ -142,4 +142,10 @@ def test_keys(self):
|
||||||
x = [
|
x = [
|
||||||
'abc'
|
'abc'
|
||||||
]
|
]
|
||||||
#:
|
#: W191 - okay
|
||||||
|
''' multiline string with tab in it, same lines'''
|
||||||
|
""" here we're using '''different delimiters'''"""
|
||||||
|
'''
|
||||||
|
multiline string with tab in it, different lines
|
||||||
|
'''
|
||||||
|
" single line string with tab in it"
|
||||||
|
|
|
@ -4,7 +4,7 @@ use std::path::Path;
|
||||||
|
|
||||||
use ruff_diagnostics::Diagnostic;
|
use ruff_diagnostics::Diagnostic;
|
||||||
use ruff_python_ast::newlines::StrExt;
|
use ruff_python_ast::newlines::StrExt;
|
||||||
use ruff_python_ast::source_code::{Locator, Stylist};
|
use ruff_python_ast::source_code::{Indexer, Locator, Stylist};
|
||||||
|
|
||||||
use crate::registry::Rule;
|
use crate::registry::Rule;
|
||||||
use crate::rules::flake8_executable::helpers::{extract_shebang, ShebangDirective};
|
use crate::rules::flake8_executable::helpers::{extract_shebang, ShebangDirective};
|
||||||
|
@ -24,7 +24,7 @@ pub fn check_physical_lines(
|
||||||
path: &Path,
|
path: &Path,
|
||||||
locator: &Locator,
|
locator: &Locator,
|
||||||
stylist: &Stylist,
|
stylist: &Stylist,
|
||||||
commented_lines: &[usize],
|
indexer: &Indexer,
|
||||||
doc_lines: &[usize],
|
doc_lines: &[usize],
|
||||||
settings: &Settings,
|
settings: &Settings,
|
||||||
autofix: flags::Autofix,
|
autofix: flags::Autofix,
|
||||||
|
@ -55,8 +55,11 @@ pub fn check_physical_lines(
|
||||||
let fix_shebang_whitespace =
|
let fix_shebang_whitespace =
|
||||||
autofix.into() && settings.rules.should_fix(Rule::ShebangLeadingWhitespace);
|
autofix.into() && settings.rules.should_fix(Rule::ShebangLeadingWhitespace);
|
||||||
|
|
||||||
let mut commented_lines_iter = commented_lines.iter().peekable();
|
let mut commented_lines_iter = indexer.commented_lines().iter().peekable();
|
||||||
let mut doc_lines_iter = doc_lines.iter().peekable();
|
let mut doc_lines_iter = doc_lines.iter().peekable();
|
||||||
|
|
||||||
|
let string_lines = indexer.string_ranges();
|
||||||
|
|
||||||
for (index, line) in locator.contents().universal_newlines().enumerate() {
|
for (index, line) in locator.contents().universal_newlines().enumerate() {
|
||||||
while commented_lines_iter
|
while commented_lines_iter
|
||||||
.next_if(|lineno| &(index + 1) == *lineno)
|
.next_if(|lineno| &(index + 1) == *lineno)
|
||||||
|
@ -155,7 +158,7 @@ pub fn check_physical_lines(
|
||||||
}
|
}
|
||||||
|
|
||||||
if enforce_tab_indentation {
|
if enforce_tab_indentation {
|
||||||
if let Some(diagnostic) = tab_indentation(index, line) {
|
if let Some(diagnostic) = tab_indentation(index + 1, line, string_lines) {
|
||||||
diagnostics.push(diagnostic);
|
diagnostics.push(diagnostic);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -186,7 +189,7 @@ mod tests {
|
||||||
use rustpython_parser::Mode;
|
use rustpython_parser::Mode;
|
||||||
use std::path::Path;
|
use std::path::Path;
|
||||||
|
|
||||||
use ruff_python_ast::source_code::{Locator, Stylist};
|
use ruff_python_ast::source_code::{Indexer, Locator, Stylist};
|
||||||
|
|
||||||
use crate::registry::Rule;
|
use crate::registry::Rule;
|
||||||
use crate::settings::{flags, Settings};
|
use crate::settings::{flags, Settings};
|
||||||
|
@ -198,6 +201,7 @@ mod tests {
|
||||||
let line = "'\u{4e9c}' * 2"; // 7 in UTF-32, 9 in UTF-8.
|
let line = "'\u{4e9c}' * 2"; // 7 in UTF-32, 9 in UTF-8.
|
||||||
let locator = Locator::new(line);
|
let locator = Locator::new(line);
|
||||||
let tokens: Vec<_> = lex(line, Mode::Module).collect();
|
let tokens: Vec<_> = lex(line, Mode::Module).collect();
|
||||||
|
let indexer: Indexer = tokens.as_slice().into();
|
||||||
let stylist = Stylist::from_tokens(&tokens, &locator);
|
let stylist = Stylist::from_tokens(&tokens, &locator);
|
||||||
|
|
||||||
let check_with_max_line_length = |line_length: usize| {
|
let check_with_max_line_length = |line_length: usize| {
|
||||||
|
@ -205,7 +209,7 @@ mod tests {
|
||||||
Path::new("foo.py"),
|
Path::new("foo.py"),
|
||||||
&locator,
|
&locator,
|
||||||
&stylist,
|
&stylist,
|
||||||
&[],
|
&indexer,
|
||||||
&[],
|
&[],
|
||||||
&Settings {
|
&Settings {
|
||||||
line_length,
|
line_length,
|
||||||
|
|
|
@ -196,13 +196,7 @@ pub fn check_path(
|
||||||
.any(|rule_code| rule_code.lint_source().is_physical_lines())
|
.any(|rule_code| rule_code.lint_source().is_physical_lines())
|
||||||
{
|
{
|
||||||
diagnostics.extend(check_physical_lines(
|
diagnostics.extend(check_physical_lines(
|
||||||
path,
|
path, locator, stylist, indexer, &doc_lines, settings, autofix,
|
||||||
locator,
|
|
||||||
stylist,
|
|
||||||
indexer.commented_lines(),
|
|
||||||
&doc_lines,
|
|
||||||
settings,
|
|
||||||
autofix,
|
|
||||||
));
|
));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -16,15 +16,48 @@ impl Violation for TabIndentation {
|
||||||
}
|
}
|
||||||
|
|
||||||
/// W191
|
/// W191
|
||||||
pub fn tab_indentation(lineno: usize, line: &str) -> Option<Diagnostic> {
|
pub fn tab_indentation(lineno: usize, line: &str, string_ranges: &[Range]) -> Option<Diagnostic> {
|
||||||
let indent = leading_space(line);
|
let indent = leading_space(line);
|
||||||
|
if let Some(tab_index) = indent.find('\t') {
|
||||||
|
// If the tab character is within a multi-line string, abort.
|
||||||
|
if let Ok(range_index) = string_ranges.binary_search_by(|range| {
|
||||||
|
let start = range.location.row();
|
||||||
|
let end = range.end_location.row();
|
||||||
|
if start > lineno {
|
||||||
|
std::cmp::Ordering::Greater
|
||||||
|
} else if end < lineno {
|
||||||
|
std::cmp::Ordering::Less
|
||||||
|
} else {
|
||||||
|
std::cmp::Ordering::Equal
|
||||||
|
}
|
||||||
|
}) {
|
||||||
|
let string_range = &string_ranges[range_index];
|
||||||
|
let start = string_range.location;
|
||||||
|
let end = string_range.end_location;
|
||||||
|
|
||||||
|
// Tab is contained in the string range by virtue of lines.
|
||||||
|
if lineno != start.row() && lineno != end.row() {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
|
||||||
|
let tab_column = line[..tab_index].chars().count();
|
||||||
|
|
||||||
|
// Tab on first line of the quoted range, following the quote.
|
||||||
|
if lineno == start.row() && tab_column > start.column() {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Tab on last line of the quoted range, preceding the quote.
|
||||||
|
if lineno == end.row() && tab_column < end.column() {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if indent.contains('\t') {
|
|
||||||
Some(Diagnostic::new(
|
Some(Diagnostic::new(
|
||||||
TabIndentation,
|
TabIndentation,
|
||||||
Range::new(
|
Range::new(
|
||||||
Location::new(lineno + 1, 0),
|
Location::new(lineno, 0),
|
||||||
Location::new(lineno + 1, indent.chars().count()),
|
Location::new(lineno, indent.chars().count()),
|
||||||
),
|
),
|
||||||
))
|
))
|
||||||
} else {
|
} else {
|
||||||
|
|
|
@ -352,48 +352,6 @@ expression: diagnostics
|
||||||
fix:
|
fix:
|
||||||
edits: []
|
edits: []
|
||||||
parent: ~
|
parent: ~
|
||||||
- kind:
|
|
||||||
name: TabIndentation
|
|
||||||
body: Indentation contains tabs
|
|
||||||
suggestion: ~
|
|
||||||
fixable: false
|
|
||||||
location:
|
|
||||||
row: 102
|
|
||||||
column: 0
|
|
||||||
end_location:
|
|
||||||
row: 102
|
|
||||||
column: 1
|
|
||||||
fix:
|
|
||||||
edits: []
|
|
||||||
parent: ~
|
|
||||||
- kind:
|
|
||||||
name: TabIndentation
|
|
||||||
body: Indentation contains tabs
|
|
||||||
suggestion: ~
|
|
||||||
fixable: false
|
|
||||||
location:
|
|
||||||
row: 105
|
|
||||||
column: 0
|
|
||||||
end_location:
|
|
||||||
row: 105
|
|
||||||
column: 1
|
|
||||||
fix:
|
|
||||||
edits: []
|
|
||||||
parent: ~
|
|
||||||
- kind:
|
|
||||||
name: TabIndentation
|
|
||||||
body: Indentation contains tabs
|
|
||||||
suggestion: ~
|
|
||||||
fixable: false
|
|
||||||
location:
|
|
||||||
row: 110
|
|
||||||
column: 0
|
|
||||||
end_location:
|
|
||||||
row: 110
|
|
||||||
column: 1
|
|
||||||
fix:
|
|
||||||
edits: []
|
|
||||||
parent: ~
|
|
||||||
- kind:
|
- kind:
|
||||||
name: TabIndentation
|
name: TabIndentation
|
||||||
body: Indentation contains tabs
|
body: Indentation contains tabs
|
||||||
|
@ -450,20 +408,6 @@ expression: diagnostics
|
||||||
fix:
|
fix:
|
||||||
edits: []
|
edits: []
|
||||||
parent: ~
|
parent: ~
|
||||||
- kind:
|
|
||||||
name: TabIndentation
|
|
||||||
body: Indentation contains tabs
|
|
||||||
suggestion: ~
|
|
||||||
fixable: false
|
|
||||||
location:
|
|
||||||
row: 136
|
|
||||||
column: 0
|
|
||||||
end_location:
|
|
||||||
row: 136
|
|
||||||
column: 1
|
|
||||||
fix:
|
|
||||||
edits: []
|
|
||||||
parent: ~
|
|
||||||
- kind:
|
- kind:
|
||||||
name: TabIndentation
|
name: TabIndentation
|
||||||
body: Indentation contains tabs
|
body: Indentation contains tabs
|
||||||
|
|
|
@ -5,30 +5,47 @@ use rustpython_parser::ast::Location;
|
||||||
use rustpython_parser::lexer::LexResult;
|
use rustpython_parser::lexer::LexResult;
|
||||||
use rustpython_parser::Tok;
|
use rustpython_parser::Tok;
|
||||||
|
|
||||||
|
use crate::types::Range;
|
||||||
|
|
||||||
pub struct Indexer {
|
pub struct Indexer {
|
||||||
commented_lines: Vec<usize>,
|
commented_lines: Vec<usize>,
|
||||||
continuation_lines: Vec<usize>,
|
continuation_lines: Vec<usize>,
|
||||||
|
string_ranges: Vec<Range>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Indexer {
|
impl Indexer {
|
||||||
|
/// Return a slice of all lines that include a comment.
|
||||||
pub fn commented_lines(&self) -> &[usize] {
|
pub fn commented_lines(&self) -> &[usize] {
|
||||||
&self.commented_lines
|
&self.commented_lines
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Return a slice of all lines that end with a continuation (backslash).
|
||||||
pub fn continuation_lines(&self) -> &[usize] {
|
pub fn continuation_lines(&self) -> &[usize] {
|
||||||
&self.continuation_lines
|
&self.continuation_lines
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Return a slice of all ranges that include a triple-quoted string.
|
||||||
|
pub fn string_ranges(&self) -> &[Range] {
|
||||||
|
&self.string_ranges
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl From<&[LexResult]> for Indexer {
|
impl From<&[LexResult]> for Indexer {
|
||||||
fn from(lxr: &[LexResult]) -> Self {
|
fn from(lxr: &[LexResult]) -> Self {
|
||||||
let mut commented_lines = Vec::new();
|
let mut commented_lines = Vec::new();
|
||||||
let mut continuation_lines = Vec::new();
|
let mut continuation_lines = Vec::new();
|
||||||
|
let mut string_ranges = Vec::new();
|
||||||
let mut prev: Option<(&Location, &Tok, &Location)> = None;
|
let mut prev: Option<(&Location, &Tok, &Location)> = None;
|
||||||
for (start, tok, end) in lxr.iter().flatten() {
|
for (start, tok, end) in lxr.iter().flatten() {
|
||||||
if matches!(tok, Tok::Comment(_)) {
|
match tok {
|
||||||
commented_lines.push(start.row());
|
Tok::Comment(..) => commented_lines.push(start.row()),
|
||||||
|
Tok::String {
|
||||||
|
triple_quoted: true,
|
||||||
|
..
|
||||||
|
} => string_ranges.push(Range::new(*start, *end)),
|
||||||
|
_ => (),
|
||||||
}
|
}
|
||||||
|
|
||||||
if let Some((.., prev_tok, prev_end)) = prev {
|
if let Some((.., prev_tok, prev_end)) = prev {
|
||||||
if !matches!(
|
if !matches!(
|
||||||
prev_tok,
|
prev_tok,
|
||||||
|
@ -44,16 +61,19 @@ impl From<&[LexResult]> for Indexer {
|
||||||
Self {
|
Self {
|
||||||
commented_lines,
|
commented_lines,
|
||||||
continuation_lines,
|
continuation_lines,
|
||||||
|
string_ranges,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
|
use rustpython_parser::ast::Location;
|
||||||
use rustpython_parser::lexer::LexResult;
|
use rustpython_parser::lexer::LexResult;
|
||||||
use rustpython_parser::{lexer, Mode};
|
use rustpython_parser::{lexer, Mode};
|
||||||
|
|
||||||
use crate::source_code::Indexer;
|
use crate::source_code::Indexer;
|
||||||
|
use crate::types::Range;
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn continuation() {
|
fn continuation() {
|
||||||
|
@ -114,4 +134,56 @@ import os
|
||||||
let indexer: Indexer = lxr.as_slice().into();
|
let indexer: Indexer = lxr.as_slice().into();
|
||||||
assert_eq!(indexer.continuation_lines(), [9, 12]);
|
assert_eq!(indexer.continuation_lines(), [9, 12]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn string_ranges() {
|
||||||
|
let contents = r#""this is a single-quoted string""#;
|
||||||
|
let lxr: Vec<LexResult> = lexer::lex(contents, Mode::Module).collect();
|
||||||
|
let indexer: Indexer = lxr.as_slice().into();
|
||||||
|
assert_eq!(indexer.string_ranges(), &vec![]);
|
||||||
|
|
||||||
|
let contents = r#"
|
||||||
|
"""
|
||||||
|
this is a multiline string
|
||||||
|
"""
|
||||||
|
"#;
|
||||||
|
let lxr: Vec<LexResult> = lexer::lex(contents, Mode::Module).collect();
|
||||||
|
let indexer: Indexer = lxr.as_slice().into();
|
||||||
|
assert_eq!(
|
||||||
|
indexer.string_ranges(),
|
||||||
|
&vec![Range::new(Location::new(2, 12), Location::new(4, 15))]
|
||||||
|
);
|
||||||
|
|
||||||
|
let contents = r#"
|
||||||
|
"""
|
||||||
|
'''this is a multiline string with multiple delimiter types'''
|
||||||
|
"""
|
||||||
|
"#;
|
||||||
|
let lxr: Vec<LexResult> = lexer::lex(contents, Mode::Module).collect();
|
||||||
|
let indexer: Indexer = lxr.as_slice().into();
|
||||||
|
assert_eq!(
|
||||||
|
indexer.string_ranges(),
|
||||||
|
&vec![Range::new(Location::new(2, 12), Location::new(4, 15))]
|
||||||
|
);
|
||||||
|
|
||||||
|
let contents = r#"
|
||||||
|
"""
|
||||||
|
this is one
|
||||||
|
multiline string
|
||||||
|
"""
|
||||||
|
"""
|
||||||
|
and this is
|
||||||
|
another
|
||||||
|
"""
|
||||||
|
"#;
|
||||||
|
let lxr: Vec<LexResult> = lexer::lex(contents, Mode::Module).collect();
|
||||||
|
let indexer: Indexer = lxr.as_slice().into();
|
||||||
|
assert_eq!(
|
||||||
|
indexer.string_ranges(),
|
||||||
|
&vec![
|
||||||
|
Range::new(Location::new(2, 12), Location::new(5, 15)),
|
||||||
|
Range::new(Location::new(6, 12), Location::new(9, 15))
|
||||||
|
]
|
||||||
|
);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue