mirror of
https://github.com/astral-sh/ruff.git
synced 2025-09-30 13:51:37 +00:00
Avoid panics for implicitly-concatenated docstrings (#3584)
## Summary In the rare event that a docstring contains an implicit string concatenation, we currently have the potential to panic, because we assume that if a string starts with triple quotes, it _ends_ with triple quotes. But with implicit concatenation, that's not the case: a single `Expr` could start and end with different quote styles, because it can contain multiple string tokens. Supporting these "properly" is pretty hard. In some cases it's hard to even know what the "right" behavior is. So for now, I'm just detecting and warning, which is better than a panic. Closes #3543. Closes #3585.
This commit is contained in:
parent
a5494b8541
commit
4892167217
7 changed files with 135 additions and 25 deletions
|
@ -504,3 +504,12 @@ Testing this incorrectly indented docstring.
|
||||||
x: Test argument.
|
x: Test argument.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
def implicit_string_concatenation():
|
||||||
|
"""Toggle the gizmo.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
A value of some sort.
|
||||||
|
|
||||||
|
""""Extra content"
|
||||||
|
|
|
@ -38,6 +38,7 @@ use crate::checkers::ast::deferred::Deferred;
|
||||||
use crate::docstrings::definition::{
|
use crate::docstrings::definition::{
|
||||||
transition_scope, Definition, DefinitionKind, Docstring, Documentable,
|
transition_scope, Definition, DefinitionKind, Docstring, Documentable,
|
||||||
};
|
};
|
||||||
|
use crate::fs::relativize_path;
|
||||||
use crate::registry::{AsRule, Rule};
|
use crate::registry::{AsRule, Rule};
|
||||||
use crate::rules::{
|
use crate::rules::{
|
||||||
flake8_2020, flake8_annotations, flake8_bandit, flake8_blind_except, flake8_boolean_trap,
|
flake8_2020, flake8_annotations, flake8_bandit, flake8_blind_except, flake8_boolean_trap,
|
||||||
|
@ -50,7 +51,7 @@ use crate::rules::{
|
||||||
};
|
};
|
||||||
use crate::settings::types::PythonVersion;
|
use crate::settings::types::PythonVersion;
|
||||||
use crate::settings::{flags, Settings};
|
use crate::settings::{flags, Settings};
|
||||||
use crate::{autofix, docstrings, noqa};
|
use crate::{autofix, docstrings, noqa, warn_user};
|
||||||
|
|
||||||
mod deferred;
|
mod deferred;
|
||||||
|
|
||||||
|
@ -5101,6 +5102,7 @@ impl<'a> Checker<'a> {
|
||||||
}
|
}
|
||||||
overloaded_name = flake8_annotations::helpers::overloaded_name(self, &definition);
|
overloaded_name = flake8_annotations::helpers::overloaded_name(self, &definition);
|
||||||
}
|
}
|
||||||
|
|
||||||
if self.is_stub {
|
if self.is_stub {
|
||||||
if self.settings.rules.enabled(Rule::DocstringInStub) {
|
if self.settings.rules.enabled(Rule::DocstringInStub) {
|
||||||
flake8_pyi::rules::docstring_in_stubs(self, definition.docstring);
|
flake8_pyi::rules::docstring_in_stubs(self, definition.docstring);
|
||||||
|
@ -5130,6 +5132,16 @@ impl<'a> Checker<'a> {
|
||||||
Location::new(expr.location.row(), expr.location.column()),
|
Location::new(expr.location.row(), expr.location.column()),
|
||||||
));
|
));
|
||||||
|
|
||||||
|
if pydocstyle::helpers::should_ignore_docstring(contents) {
|
||||||
|
warn_user!(
|
||||||
|
"Docstring at {}:{}:{} contains implicit string concatenation; ignoring...",
|
||||||
|
relativize_path(self.path),
|
||||||
|
expr.location.row(),
|
||||||
|
expr.location.column() + 1
|
||||||
|
);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
let body = str::raw_contents(contents);
|
let body = str::raw_contents(contents);
|
||||||
let docstring = Docstring {
|
let docstring = Docstring {
|
||||||
kind: definition.kind,
|
kind: definition.kind,
|
||||||
|
|
|
@ -2,15 +2,18 @@
|
||||||
|
|
||||||
use rustpython_parser::ast::{Constant, Expr, ExprKind, Stmt, StmtKind};
|
use rustpython_parser::ast::{Constant, Expr, ExprKind, Stmt, StmtKind};
|
||||||
|
|
||||||
use crate::docstrings::definition::{Definition, DefinitionKind, Documentable};
|
|
||||||
use ruff_python_ast::visibility::{Modifier, VisibleScope};
|
use ruff_python_ast::visibility::{Modifier, VisibleScope};
|
||||||
|
|
||||||
|
use crate::docstrings::definition::{Definition, DefinitionKind, Documentable};
|
||||||
|
|
||||||
/// Extract a docstring from a function or class body.
|
/// Extract a docstring from a function or class body.
|
||||||
pub fn docstring_from(suite: &[Stmt]) -> Option<&Expr> {
|
pub fn docstring_from(suite: &[Stmt]) -> Option<&Expr> {
|
||||||
let stmt = suite.first()?;
|
let stmt = suite.first()?;
|
||||||
|
// Require the docstring to be a standalone expression.
|
||||||
let StmtKind::Expr { value } = &stmt.node else {
|
let StmtKind::Expr { value } = &stmt.node else {
|
||||||
return None;
|
return None;
|
||||||
};
|
};
|
||||||
|
// Only match strings.
|
||||||
if !matches!(
|
if !matches!(
|
||||||
&value.node,
|
&value.node,
|
||||||
ExprKind::Constant {
|
ExprKind::Constant {
|
||||||
|
|
|
@ -3,12 +3,13 @@ use std::collections::BTreeSet;
|
||||||
use ruff_python_ast::cast;
|
use ruff_python_ast::cast;
|
||||||
use ruff_python_ast::helpers::{map_callable, to_call_path};
|
use ruff_python_ast::helpers::{map_callable, to_call_path};
|
||||||
use ruff_python_ast::newlines::StrExt;
|
use ruff_python_ast::newlines::StrExt;
|
||||||
|
use ruff_python_ast::str::is_implicit_concatenation;
|
||||||
|
|
||||||
use crate::checkers::ast::Checker;
|
use crate::checkers::ast::Checker;
|
||||||
use crate::docstrings::definition::{Definition, DefinitionKind};
|
use crate::docstrings::definition::{Definition, DefinitionKind};
|
||||||
|
|
||||||
/// Return the index of the first logical line in a string.
|
/// Return the index of the first logical line in a string.
|
||||||
pub fn logical_line(content: &str) -> Option<usize> {
|
pub(crate) fn logical_line(content: &str) -> Option<usize> {
|
||||||
// Find the first logical line.
|
// Find the first logical line.
|
||||||
let mut logical_line = None;
|
let mut logical_line = None;
|
||||||
for (i, line) in content.universal_newlines().enumerate() {
|
for (i, line) in content.universal_newlines().enumerate() {
|
||||||
|
@ -27,14 +28,14 @@ pub fn logical_line(content: &str) -> Option<usize> {
|
||||||
|
|
||||||
/// Normalize a word by removing all non-alphanumeric characters
|
/// Normalize a word by removing all non-alphanumeric characters
|
||||||
/// and converting it to lowercase.
|
/// and converting it to lowercase.
|
||||||
pub fn normalize_word(first_word: &str) -> String {
|
pub(crate) fn normalize_word(first_word: &str) -> String {
|
||||||
first_word
|
first_word
|
||||||
.replace(|c: char| !c.is_alphanumeric(), "")
|
.replace(|c: char| !c.is_alphanumeric(), "")
|
||||||
.to_lowercase()
|
.to_lowercase()
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Check decorator list to see if function should be ignored.
|
/// Check decorator list to see if function should be ignored.
|
||||||
pub fn should_ignore_definition(
|
pub(crate) fn should_ignore_definition(
|
||||||
checker: &Checker,
|
checker: &Checker,
|
||||||
definition: &Definition,
|
definition: &Definition,
|
||||||
ignore_decorators: &BTreeSet<String>,
|
ignore_decorators: &BTreeSet<String>,
|
||||||
|
@ -60,3 +61,11 @@ pub fn should_ignore_definition(
|
||||||
}
|
}
|
||||||
false
|
false
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Check if a docstring should be ignored.
|
||||||
|
pub(crate) fn should_ignore_docstring(contents: &str) -> bool {
|
||||||
|
// Avoid analyzing docstrings that contain implicit string concatenations.
|
||||||
|
// Python does consider these docstrings, but they're almost certainly a
|
||||||
|
// user error, and supporting them "properly" is extremely difficult.
|
||||||
|
is_implicit_concatenation(contents)
|
||||||
|
}
|
||||||
|
|
|
@ -3,11 +3,10 @@ use rustpython_common::format::{
|
||||||
FieldName, FieldNamePart, FieldType, FormatPart, FormatString, FromTemplate,
|
FieldName, FieldNamePart, FieldType, FormatPart, FormatString, FromTemplate,
|
||||||
};
|
};
|
||||||
use rustpython_parser::ast::{Constant, Expr, ExprKind, KeywordData};
|
use rustpython_parser::ast::{Constant, Expr, ExprKind, KeywordData};
|
||||||
use rustpython_parser::{lexer, Mode, Tok};
|
|
||||||
|
|
||||||
use ruff_diagnostics::{AlwaysAutofixableViolation, Diagnostic, Fix};
|
use ruff_diagnostics::{AlwaysAutofixableViolation, Diagnostic, Fix};
|
||||||
use ruff_macros::{derive_message_formats, violation};
|
use ruff_macros::{derive_message_formats, violation};
|
||||||
use ruff_python_ast::str::{leading_quote, trailing_quote};
|
use ruff_python_ast::str::{is_implicit_concatenation, leading_quote, trailing_quote};
|
||||||
use ruff_python_ast::types::Range;
|
use ruff_python_ast::types::Range;
|
||||||
|
|
||||||
use crate::checkers::ast::Checker;
|
use crate::checkers::ast::Checker;
|
||||||
|
@ -127,13 +126,8 @@ fn try_convert_to_f_string(checker: &Checker, expr: &Expr) -> Option<String> {
|
||||||
|
|
||||||
let contents = checker.locator.slice(value);
|
let contents = checker.locator.slice(value);
|
||||||
|
|
||||||
// Tokenize: we need to avoid trying to fix implicit string concatenations.
|
// Skip implicit string concatenations.
|
||||||
if lexer::lex(contents, Mode::Module)
|
if is_implicit_concatenation(contents) {
|
||||||
.flatten()
|
|
||||||
.filter(|(_, tok, _)| matches!(tok, Tok::String { .. }))
|
|
||||||
.count()
|
|
||||||
> 1
|
|
||||||
{
|
|
||||||
return None;
|
return None;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1,10 +1,10 @@
|
||||||
use std::fmt;
|
use std::fmt;
|
||||||
|
|
||||||
use rustpython_parser::ast::{Constant, Expr, ExprKind, Keyword};
|
use rustpython_parser::ast::{Constant, Expr, ExprKind, Keyword};
|
||||||
use rustpython_parser::{lexer, Mode, Tok};
|
|
||||||
|
|
||||||
use ruff_diagnostics::{AlwaysAutofixableViolation, Diagnostic, Fix};
|
use ruff_diagnostics::{AlwaysAutofixableViolation, Diagnostic, Fix};
|
||||||
use ruff_macros::{derive_message_formats, violation};
|
use ruff_macros::{derive_message_formats, violation};
|
||||||
|
use ruff_python_ast::str::is_implicit_concatenation;
|
||||||
use ruff_python_ast::types::Range;
|
use ruff_python_ast::types::Range;
|
||||||
|
|
||||||
use crate::checkers::ast::Checker;
|
use crate::checkers::ast::Checker;
|
||||||
|
@ -112,16 +112,9 @@ pub fn native_literals(
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
// rust-python merges adjacent string/bytes literals into one node, but we can't
|
// Skip implicit string concatenations.
|
||||||
// safely remove the outer call in this situation. We're following pyupgrade
|
|
||||||
// here and skip.
|
|
||||||
let arg_code = checker.locator.slice(arg);
|
let arg_code = checker.locator.slice(arg);
|
||||||
if lexer::lex_located(arg_code, Mode::Module, arg.location)
|
if is_implicit_concatenation(arg_code) {
|
||||||
.flatten()
|
|
||||||
.filter(|(_, tok, _)| matches!(tok, Tok::String { .. }))
|
|
||||||
.count()
|
|
||||||
> 1
|
|
||||||
{
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -67,15 +67,71 @@ pub fn is_triple_quote(content: &str) -> bool {
|
||||||
TRIPLE_QUOTE_STR_PREFIXES.contains(&content) || TRIPLE_QUOTE_BYTE_PREFIXES.contains(&content)
|
TRIPLE_QUOTE_STR_PREFIXES.contains(&content) || TRIPLE_QUOTE_BYTE_PREFIXES.contains(&content)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Return `true` if the string expression is an implicit concatenation.
|
||||||
|
///
|
||||||
|
/// ## Examples
|
||||||
|
///
|
||||||
|
/// ```rust
|
||||||
|
/// use ruff_python_ast::str::is_implicit_concatenation;
|
||||||
|
///
|
||||||
|
/// assert!(is_implicit_concatenation(r#"'abc' 'def'"#));
|
||||||
|
/// assert!(!is_implicit_concatenation(r#"'abcdef'"#));
|
||||||
|
/// ```
|
||||||
|
pub fn is_implicit_concatenation(content: &str) -> bool {
|
||||||
|
let Some(leading_quote_str) = leading_quote(content) else {
|
||||||
|
return false;
|
||||||
|
};
|
||||||
|
let Some(trailing_quote_str) = trailing_quote(content) else {
|
||||||
|
return false;
|
||||||
|
};
|
||||||
|
|
||||||
|
// If the trailing quote doesn't match the _expected_ trailing quote, then the string is
|
||||||
|
// implicitly concatenated.
|
||||||
|
//
|
||||||
|
// For example, given:
|
||||||
|
// ```python
|
||||||
|
// u"""abc""" 'def'
|
||||||
|
// ```
|
||||||
|
//
|
||||||
|
// The leading quote would be `u"""`, and the trailing quote would be `'`, but the _expected_
|
||||||
|
// trailing quote would be `"""`. Since `'` does not equal `"""`, we'd return `true`.
|
||||||
|
if trailing_quote_str != trailing_quote(leading_quote_str).unwrap() {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Search for any trailing quotes _before_ the end of the string.
|
||||||
|
let mut rest = &content[leading_quote_str.len()..content.len() - trailing_quote_str.len()];
|
||||||
|
while let Some(index) = rest.find(trailing_quote_str) {
|
||||||
|
let mut chars = rest[..index].chars().rev();
|
||||||
|
if let Some('\\') = chars.next() {
|
||||||
|
// If the quote is double-escaped, then it's _not_ escaped, so the string is
|
||||||
|
// implicitly concatenated.
|
||||||
|
if let Some('\\') = chars.next() {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// If the quote is _not_ escaped, then it's implicitly concatenated.
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
rest = &rest[index + trailing_quote_str.len()..];
|
||||||
|
}
|
||||||
|
|
||||||
|
// Otherwise, we know the string ends with the expected trailing quote, so it's not implicitly
|
||||||
|
// concatenated.
|
||||||
|
false
|
||||||
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
|
use crate::str::is_implicit_concatenation;
|
||||||
|
|
||||||
use super::{
|
use super::{
|
||||||
SINGLE_QUOTE_BYTE_PREFIXES, SINGLE_QUOTE_STR_PREFIXES, TRIPLE_QUOTE_BYTE_PREFIXES,
|
SINGLE_QUOTE_BYTE_PREFIXES, SINGLE_QUOTE_STR_PREFIXES, TRIPLE_QUOTE_BYTE_PREFIXES,
|
||||||
TRIPLE_QUOTE_STR_PREFIXES,
|
TRIPLE_QUOTE_STR_PREFIXES,
|
||||||
};
|
};
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_prefixes() {
|
fn prefix_uniqueness() {
|
||||||
let prefixes = TRIPLE_QUOTE_STR_PREFIXES
|
let prefixes = TRIPLE_QUOTE_STR_PREFIXES
|
||||||
.iter()
|
.iter()
|
||||||
.chain(TRIPLE_QUOTE_BYTE_PREFIXES)
|
.chain(TRIPLE_QUOTE_BYTE_PREFIXES)
|
||||||
|
@ -93,4 +149,38 @@ mod tests {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn implicit_concatenation() {
|
||||||
|
// Positive cases.
|
||||||
|
assert!(is_implicit_concatenation(r#""abc" "def""#));
|
||||||
|
assert!(is_implicit_concatenation(r#""abc" 'def'"#));
|
||||||
|
assert!(is_implicit_concatenation(r#""""abc""" "def""#));
|
||||||
|
assert!(is_implicit_concatenation(r#"'''abc''' 'def'"#));
|
||||||
|
assert!(is_implicit_concatenation(r#""""abc""" 'def'"#));
|
||||||
|
assert!(is_implicit_concatenation(r#"'''abc''' "def""#));
|
||||||
|
assert!(is_implicit_concatenation(r#""""abc""""def""#));
|
||||||
|
assert!(is_implicit_concatenation(r#"'''abc''''def'"#));
|
||||||
|
assert!(is_implicit_concatenation(r#""""abc"""'def'"#));
|
||||||
|
assert!(is_implicit_concatenation(r#"'''abc'''"def""#));
|
||||||
|
|
||||||
|
// Negative cases.
|
||||||
|
assert!(!is_implicit_concatenation(r#""abc""#));
|
||||||
|
assert!(!is_implicit_concatenation(r#"'abc'"#));
|
||||||
|
assert!(!is_implicit_concatenation(r#""""abc""""#));
|
||||||
|
assert!(!is_implicit_concatenation(r#"'''abc'''"#));
|
||||||
|
assert!(!is_implicit_concatenation(r#""""ab"c""""#));
|
||||||
|
assert!(!is_implicit_concatenation(r#"'''ab'c'''"#));
|
||||||
|
assert!(!is_implicit_concatenation(r#""""ab'c""""#));
|
||||||
|
assert!(!is_implicit_concatenation(r#"'''ab"c'''"#));
|
||||||
|
assert!(!is_implicit_concatenation(r#""""ab'''c""""#));
|
||||||
|
assert!(!is_implicit_concatenation(r#"'''ab"""c'''"#));
|
||||||
|
|
||||||
|
// Positive cases with escaped quotes.
|
||||||
|
assert!(is_implicit_concatenation(r#""abc\\""def""#));
|
||||||
|
assert!(is_implicit_concatenation(r#""abc\\""def""#));
|
||||||
|
|
||||||
|
// Negative cases with escaped quotes.
|
||||||
|
assert!(!is_implicit_concatenation(r#""abc\"def""#));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue