Replace dynamic implicit concatenation detection with parser flag (#6513)

## Summary

In https://github.com/astral-sh/ruff/pull/6512, we added a flag to the
AST to mark implicitly-concatenated string expressions. This PR makes
use of that flag to remove the `is_implicit_concatenation` method.

## Test Plan

`cargo test`
This commit is contained in:
Charlie Marsh 2023-08-14 10:27:17 -04:00 committed by GitHub
parent 40407dcce5
commit a7cf8f0b77
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
8 changed files with 52 additions and 150 deletions

View file

@ -171,7 +171,7 @@ pub(crate) fn definitions(checker: &mut Checker) {
expr.start(), expr.start(),
)); ));
if pydocstyle::helpers::should_ignore_docstring(contents) { if pydocstyle::helpers::should_ignore_docstring(expr) {
#[allow(deprecated)] #[allow(deprecated)]
let location = checker.locator.compute_source_location(expr.start()); let location = checker.locator.compute_source_location(expr.start());
warn_user!( warn_user!(

View file

@ -2,7 +2,7 @@ use std::collections::BTreeSet;
use ruff_python_ast::call_path::from_qualified_name; use ruff_python_ast::call_path::from_qualified_name;
use ruff_python_ast::helpers::map_callable; use ruff_python_ast::helpers::map_callable;
use ruff_python_ast::str::is_implicit_concatenation; use ruff_python_ast::Expr;
use ruff_python_semantic::{Definition, SemanticModel}; use ruff_python_semantic::{Definition, SemanticModel};
use ruff_source_file::UniversalNewlines; use ruff_source_file::UniversalNewlines;
@ -63,9 +63,11 @@ pub(crate) fn should_ignore_definition(
} }
/// Check if a docstring should be ignored. /// Check if a docstring should be ignored.
pub(crate) fn should_ignore_docstring(contents: &str) -> bool { pub(crate) fn should_ignore_docstring(docstring: &Expr) -> bool {
// Avoid analyzing docstrings that contain implicit string concatenations. // Avoid analyzing docstrings that contain implicit string concatenations.
// Python does consider these docstrings, but they're almost certainly a // Python does consider these docstrings, but they're almost certainly a
// user error, and supporting them "properly" is extremely difficult. // user error, and supporting them "properly" is extremely difficult.
is_implicit_concatenation(contents) docstring
.as_constant_expr()
.is_some_and(|constant| constant.value.is_implicit_concatenated())
} }

View file

@ -2,11 +2,10 @@ use std::fmt;
use std::str::FromStr; use std::str::FromStr;
use num_bigint::BigInt; use num_bigint::BigInt;
use ruff_python_ast::{self as ast, Constant, Expr, Keyword, Ranged};
use ruff_diagnostics::{AlwaysAutofixableViolation, Diagnostic, Edit, Fix}; use ruff_diagnostics::{AlwaysAutofixableViolation, Diagnostic, Edit, Fix};
use ruff_macros::{derive_message_formats, violation}; use ruff_macros::{derive_message_formats, violation};
use ruff_python_ast::str::is_implicit_concatenation; use ruff_python_ast::{self as ast, Constant, Expr, Keyword, Ranged};
use crate::checkers::ast::Checker; use crate::checkers::ast::Checker;
use crate::registry::AsRule; use crate::registry::AsRule;
@ -182,6 +181,11 @@ pub(crate) fn native_literals(
return; return;
}; };
// Skip implicit string concatenations.
if value.is_implicit_concatenated() {
return;
}
let Ok(arg_literal_type) = LiteralType::try_from(value) else { let Ok(arg_literal_type) = LiteralType::try_from(value) else {
return; return;
}; };
@ -192,13 +196,6 @@ pub(crate) fn native_literals(
let arg_code = checker.locator().slice(arg.range()); let arg_code = checker.locator().slice(arg.range());
// Skip implicit string concatenations.
if matches!(arg_literal_type, LiteralType::Str | LiteralType::Bytes)
&& is_implicit_concatenation(arg_code)
{
return;
}
let mut diagnostic = Diagnostic::new(NativeLiterals { literal_type }, expr.range()); let mut diagnostic = Diagnostic::new(NativeLiterals { literal_type }, expr.range());
if checker.patch(diagnostic.kind.rule()) { if checker.patch(diagnostic.kind.rule()) {
diagnostic.set_fix(Fix::automatic(Edit::range_replacement( diagnostic.set_fix(Fix::automatic(Edit::range_replacement(

View file

@ -184,68 +184,8 @@ pub fn is_triple_quote(content: &str) -> bool {
TRIPLE_QUOTE_STR_PREFIXES.contains(&content) || TRIPLE_QUOTE_BYTE_PREFIXES.contains(&content) TRIPLE_QUOTE_STR_PREFIXES.contains(&content) || TRIPLE_QUOTE_BYTE_PREFIXES.contains(&content)
} }
/// Return `true` if the string expression is an implicit concatenation.
///
/// ## Examples
///
/// ```rust
/// use ruff_python_ast::str::is_implicit_concatenation;
///
/// assert!(is_implicit_concatenation(r#"'abc' 'def'"#));
/// assert!(!is_implicit_concatenation(r#"'abcdef'"#));
/// ```
pub fn is_implicit_concatenation(content: &str) -> bool {
let Some(leading_quote_str) = leading_quote(content) else {
return false;
};
let Some(trailing_quote_str) = trailing_quote(content) else {
return false;
};
// If the trailing quote doesn't match the _expected_ trailing quote, then the string is
// implicitly concatenated.
//
// For example, given:
// ```python
// u"""abc""" 'def'
// ```
//
// The leading quote would be `u"""`, and the trailing quote would be `'`, but the _expected_
// trailing quote would be `"""`. Since `'` does not equal `"""`, we'd return `true`.
if trailing_quote_str != trailing_quote(leading_quote_str).unwrap() {
return true;
}
// Search for any trailing quotes _before_ the end of the string.
let mut rest = &content[leading_quote_str.len()..content.len() - trailing_quote_str.len()];
while let Some(index) = rest.find(trailing_quote_str) {
let mut chars = rest[..index].chars().rev();
if let Some('\\') = chars.next() {
if chars.next() == Some('\\') {
// Either `\\'` or `\\\'` need to test one more character
// If the quote is preceded by `//` then it is not escaped, instead the backslash is escaped.
if chars.next() != Some('\\') {
return true;
}
}
} else {
// If the quote is _not_ escaped, then it's implicitly concatenated.
return true;
}
rest = &rest[index + trailing_quote_str.len()..];
}
// Otherwise, we know the string ends with the expected trailing quote, so it's not implicitly
// concatenated.
false
}
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use crate::str::is_implicit_concatenation;
use super::{ use super::{
SINGLE_QUOTE_BYTE_PREFIXES, SINGLE_QUOTE_STR_PREFIXES, TRIPLE_QUOTE_BYTE_PREFIXES, SINGLE_QUOTE_BYTE_PREFIXES, SINGLE_QUOTE_STR_PREFIXES, TRIPLE_QUOTE_BYTE_PREFIXES,
TRIPLE_QUOTE_STR_PREFIXES, TRIPLE_QUOTE_STR_PREFIXES,
@ -270,39 +210,4 @@ mod tests {
} }
} }
} }
#[test]
fn implicit_concatenation() {
// Positive cases.
assert!(is_implicit_concatenation(r#""abc" "def""#));
assert!(is_implicit_concatenation(r#""abc" 'def'"#));
assert!(is_implicit_concatenation(r#""""abc""" "def""#));
assert!(is_implicit_concatenation(r#"'''abc''' 'def'"#));
assert!(is_implicit_concatenation(r#""""abc""" 'def'"#));
assert!(is_implicit_concatenation(r#"'''abc''' "def""#));
assert!(is_implicit_concatenation(r#""""abc""""def""#));
assert!(is_implicit_concatenation(r#"'''abc''''def'"#));
assert!(is_implicit_concatenation(r#""""abc"""'def'"#));
assert!(is_implicit_concatenation(r#"'''abc'''"def""#));
// Negative cases.
assert!(!is_implicit_concatenation(r#""abc""#));
assert!(!is_implicit_concatenation(r#"'abc'"#));
assert!(!is_implicit_concatenation(r#""""abc""""#));
assert!(!is_implicit_concatenation(r#"'''abc'''"#));
assert!(!is_implicit_concatenation(r#""""ab"c""""#));
assert!(!is_implicit_concatenation(r#"'''ab'c'''"#));
assert!(!is_implicit_concatenation(r#""""ab'c""""#));
assert!(!is_implicit_concatenation(r#"'''ab"c'''"#));
assert!(!is_implicit_concatenation(r#""""ab'''c""""#));
assert!(!is_implicit_concatenation(r#"'''ab"""c'''"#));
// Positive cases with escaped quotes.
assert!(is_implicit_concatenation(r#""abc\\""def""#));
assert!(is_implicit_concatenation(r#""abc\\""def""#));
// Negative cases with escaped quotes.
assert!(!is_implicit_concatenation(r#""abc\"def""#));
assert!(!is_implicit_concatenation(r#"'\\\' ""'"#));
}
} }

View file

@ -1,13 +1,13 @@
use std::iter; use std::iter;
use ruff_python_ast::{ use ruff_python_ast::{
Constant, Expr, ExprAttribute, ExprBinOp, ExprConstant, ExprUnaryOp, Operator, UnaryOp, Constant, Expr, ExprAttribute, ExprBinOp, ExprConstant, ExprUnaryOp, Operator, StringConstant,
UnaryOp,
}; };
use smallvec::SmallVec; use smallvec::SmallVec;
use ruff_formatter::{format_args, write, FormatOwnedWithRule, FormatRefWithRule}; use ruff_formatter::{format_args, write, FormatOwnedWithRule, FormatRefWithRule};
use ruff_python_ast::node::{AnyNodeRef, AstNode}; use ruff_python_ast::node::{AnyNodeRef, AstNode};
use ruff_python_ast::str::is_implicit_concatenation;
use crate::comments::{trailing_comments, trailing_node_comments}; use crate::comments::{trailing_comments, trailing_node_comments};
use crate::expression::expr_constant::ExprConstantLayout; use crate::expression::expr_constant::ExprConstantLayout;
@ -157,8 +157,11 @@ impl FormatExprBinOp {
fn layout<'a>(bin_op: &'a ExprBinOp, context: &PyFormatContext) -> BinOpLayout<'a> { fn layout<'a>(bin_op: &'a ExprBinOp, context: &PyFormatContext) -> BinOpLayout<'a> {
if let Some( if let Some(
constant @ ExprConstant { constant @ ExprConstant {
value: Constant::Str(_), value:
range, Constant::Str(StringConstant {
implicit_concatenated: true,
..
}),
.. ..
}, },
) = bin_op.left.as_constant_expr() ) = bin_op.left.as_constant_expr()
@ -169,7 +172,6 @@ impl FormatExprBinOp {
&& context.node_level().is_parenthesized() && context.node_level().is_parenthesized()
&& !comments.has_dangling_comments(constant) && !comments.has_dangling_comments(constant)
&& !comments.has_dangling_comments(bin_op) && !comments.has_dangling_comments(bin_op)
&& is_implicit_concatenation(&context.source()[*range])
{ {
BinOpLayout::LeftString(constant) BinOpLayout::LeftString(constant)
} else { } else {

View file

@ -1,9 +1,7 @@
use ruff_python_ast::{Constant, ExprConstant, Ranged};
use ruff_text_size::{TextLen, TextRange};
use ruff_formatter::FormatRuleWithOptions; use ruff_formatter::FormatRuleWithOptions;
use ruff_python_ast::node::AnyNodeRef; use ruff_python_ast::node::AnyNodeRef;
use ruff_python_ast::str::is_implicit_concatenation; use ruff_python_ast::{Constant, ExprConstant, Ranged};
use ruff_text_size::{TextLen, TextRange};
use crate::expression::number::{FormatComplex, FormatFloat, FormatInt}; use crate::expression::number::{FormatComplex, FormatFloat, FormatInt};
use crate::expression::parentheses::{NeedsParentheses, OptionalParentheses}; use crate::expression::parentheses::{NeedsParentheses, OptionalParentheses};
@ -80,10 +78,9 @@ impl NeedsParentheses for ExprConstant {
_parent: AnyNodeRef, _parent: AnyNodeRef,
context: &PyFormatContext, context: &PyFormatContext,
) -> OptionalParentheses { ) -> OptionalParentheses {
if self.value.is_str() || self.value.is_bytes() { if self.value.is_implicit_concatenated() {
let contents = context.locator().slice(self.range());
// Don't wrap triple quoted strings // Don't wrap triple quoted strings
if is_multiline_string(self, context.source()) || !is_implicit_concatenation(contents) { if is_multiline_string(self, context.source()) {
OptionalParentheses::Never OptionalParentheses::Never
} else { } else {
OptionalParentheses::Multiline OptionalParentheses::Multiline

View file

@ -4,7 +4,6 @@ use bitflags::bitflags;
use ruff_formatter::{format_args, write, FormatError}; use ruff_formatter::{format_args, write, FormatError};
use ruff_python_ast::node::AnyNodeRef; use ruff_python_ast::node::AnyNodeRef;
use ruff_python_ast::str::is_implicit_concatenation;
use ruff_python_ast::{self as ast, ExprConstant, ExprFString, Ranged}; use ruff_python_ast::{self as ast, ExprConstant, ExprFString, Ranged};
use ruff_python_parser::lexer::{lex_starts_at, LexicalError, LexicalErrorType}; use ruff_python_parser::lexer::{lex_starts_at, LexicalError, LexicalErrorType};
use ruff_python_parser::{Mode, Tok}; use ruff_python_parser::{Mode, Tok};
@ -49,6 +48,17 @@ impl<'a> AnyString<'a> {
} }
} }
} }
/// Returns `true` if the string is implicitly concatenated.
fn implicit_concatenated(&self) -> bool {
match self {
Self::Constant(ExprConstant { value, .. }) => value.is_implicit_concatenated(),
Self::FString(ExprFString {
implicit_concatenated,
..
}) => *implicit_concatenated,
}
}
} }
impl Ranged for AnyString<'_> { impl Ranged for AnyString<'_> {
@ -103,14 +113,11 @@ impl<'a> Format<PyFormatContext<'_>> for FormatString<'a> {
fn fmt(&self, f: &mut PyFormatter) -> FormatResult<()> { fn fmt(&self, f: &mut PyFormatter) -> FormatResult<()> {
match self.layout { match self.layout {
StringLayout::Default => { StringLayout::Default => {
let string_range = self.string.range(); if self.string.implicit_concatenated() {
let string_content = f.context().locator().slice(string_range);
if is_implicit_concatenation(string_content) {
in_parentheses_only_group(&FormatStringContinuation::new(self.string)).fmt(f) in_parentheses_only_group(&FormatStringContinuation::new(self.string)).fmt(f)
} else { } else {
FormatStringPart::new( FormatStringPart::new(
string_range, self.string.range(),
self.string.quoting(&f.context().locator()), self.string.quoting(&f.context().locator()),
&f.context().locator(), &f.context().locator(),
f.options().quote_style(), f.options().quote_style(),

View file

@ -1,10 +1,8 @@
use ruff_formatter::{write, FormatOwnedWithRule, FormatRefWithRule, FormatRuleWithOptions}; use ruff_formatter::{write, FormatOwnedWithRule, FormatRefWithRule, FormatRuleWithOptions};
use ruff_python_ast::helpers::is_compound_statement; use ruff_python_ast::helpers::is_compound_statement;
use ruff_python_ast::str::is_implicit_concatenation; use ruff_python_ast::{self as ast, Ranged, Stmt, Suite};
use ruff_python_ast::{self as ast, Expr, Ranged, Stmt, Suite};
use ruff_python_ast::{Constant, ExprConstant}; use ruff_python_ast::{Constant, ExprConstant};
use ruff_python_trivia::{lines_after_ignoring_trivia, lines_before}; use ruff_python_trivia::{lines_after_ignoring_trivia, lines_before};
use ruff_source_file::Locator;
use crate::comments::{leading_comments, trailing_comments}; use crate::comments::{leading_comments, trailing_comments};
use crate::context::{NodeLevel, WithNodeLevel}; use crate::context::{NodeLevel, WithNodeLevel};
@ -78,7 +76,7 @@ impl FormatRule<Suite, PyFormatContext<'_>> for FormatSuite {
write!(f, [first.format()])?; write!(f, [first.format()])?;
} }
SuiteKind::Function => { SuiteKind::Function => {
if let Some(constant) = get_docstring(first, &f.context().locator()) { if let Some(constant) = get_docstring(first) {
write!( write!(
f, f,
[ [
@ -95,7 +93,7 @@ impl FormatRule<Suite, PyFormatContext<'_>> for FormatSuite {
} }
} }
SuiteKind::Class => { SuiteKind::Class => {
if let Some(constant) = get_docstring(first, &f.context().locator()) { if let Some(constant) = get_docstring(first) {
if !comments.has_leading_comments(first) if !comments.has_leading_comments(first)
&& lines_before(first.start(), source) > 1 && lines_before(first.start(), source) > 1
{ {
@ -257,27 +255,21 @@ const fn is_import_definition(stmt: &Stmt) -> bool {
} }
/// Checks if the statement is a simple string that can be formatted as a docstring /// Checks if the statement is a simple string that can be formatted as a docstring
fn get_docstring<'a>(stmt: &'a Stmt, locator: &Locator) -> Option<&'a ExprConstant> { fn get_docstring(stmt: &Stmt) -> Option<&ExprConstant> {
let Stmt::Expr(ast::StmtExpr { value, .. }) = stmt else { let stmt_expr = stmt.as_expr_stmt()?;
return None; let expr_constant = stmt_expr.value.as_constant_expr()?;
}; if matches!(
expr_constant.value,
let Expr::Constant(constant) = value.as_ref() else { Constant::Str(ast::StringConstant {
return None; implicit_concatenated: false,
};
if let ExprConstant {
value: Constant::Str(..),
range,
.. ..
} = constant })
{ ) {
if is_implicit_concatenation(locator.slice(*range)) { Some(expr_constant)
return None; } else {
}
return Some(constant);
}
None None
} }
}
impl FormatRuleWithOptions<Suite, PyFormatContext<'_>> for FormatSuite { impl FormatRuleWithOptions<Suite, PyFormatContext<'_>> for FormatSuite {
type Options = SuiteKind; type Options = SuiteKind;