Replace dynamic implicit concatenation detection with parser flag (#6513)

## Summary

In https://github.com/astral-sh/ruff/pull/6512, we added a flag to the
AST to mark implicitly-concatenated string expressions. This PR makes
use of that flag to remove the `is_implicit_concatenation` method.

## Test Plan

`cargo test`
This commit is contained in:
Charlie Marsh 2023-08-14 10:27:17 -04:00 committed by GitHub
parent 40407dcce5
commit a7cf8f0b77
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
8 changed files with 52 additions and 150 deletions

View file

@ -171,7 +171,7 @@ pub(crate) fn definitions(checker: &mut Checker) {
expr.start(),
));
if pydocstyle::helpers::should_ignore_docstring(contents) {
if pydocstyle::helpers::should_ignore_docstring(expr) {
#[allow(deprecated)]
let location = checker.locator.compute_source_location(expr.start());
warn_user!(

View file

@ -2,7 +2,7 @@ use std::collections::BTreeSet;
use ruff_python_ast::call_path::from_qualified_name;
use ruff_python_ast::helpers::map_callable;
use ruff_python_ast::str::is_implicit_concatenation;
use ruff_python_ast::Expr;
use ruff_python_semantic::{Definition, SemanticModel};
use ruff_source_file::UniversalNewlines;
@ -63,9 +63,11 @@ pub(crate) fn should_ignore_definition(
}
/// Check if a docstring should be ignored.
pub(crate) fn should_ignore_docstring(contents: &str) -> bool {
pub(crate) fn should_ignore_docstring(docstring: &Expr) -> bool {
// Avoid analyzing docstrings that contain implicit string concatenations.
// Python does consider these docstrings, but they're almost certainly a
// user error, and supporting them "properly" is extremely difficult.
is_implicit_concatenation(contents)
docstring
.as_constant_expr()
.is_some_and(|constant| constant.value.is_implicit_concatenated())
}

View file

@ -2,11 +2,10 @@ use std::fmt;
use std::str::FromStr;
use num_bigint::BigInt;
use ruff_python_ast::{self as ast, Constant, Expr, Keyword, Ranged};
use ruff_diagnostics::{AlwaysAutofixableViolation, Diagnostic, Edit, Fix};
use ruff_macros::{derive_message_formats, violation};
use ruff_python_ast::str::is_implicit_concatenation;
use ruff_python_ast::{self as ast, Constant, Expr, Keyword, Ranged};
use crate::checkers::ast::Checker;
use crate::registry::AsRule;
@ -182,6 +181,11 @@ pub(crate) fn native_literals(
return;
};
// Skip implicit string concatenations.
if value.is_implicit_concatenated() {
return;
}
let Ok(arg_literal_type) = LiteralType::try_from(value) else {
return;
};
@ -192,13 +196,6 @@ pub(crate) fn native_literals(
let arg_code = checker.locator().slice(arg.range());
// Skip implicit string concatenations.
if matches!(arg_literal_type, LiteralType::Str | LiteralType::Bytes)
&& is_implicit_concatenation(arg_code)
{
return;
}
let mut diagnostic = Diagnostic::new(NativeLiterals { literal_type }, expr.range());
if checker.patch(diagnostic.kind.rule()) {
diagnostic.set_fix(Fix::automatic(Edit::range_replacement(

View file

@ -184,68 +184,8 @@ pub fn is_triple_quote(content: &str) -> bool {
TRIPLE_QUOTE_STR_PREFIXES.contains(&content) || TRIPLE_QUOTE_BYTE_PREFIXES.contains(&content)
}
/// Return `true` if the string expression is an implicit concatenation.
///
/// ## Examples
///
/// ```rust
/// use ruff_python_ast::str::is_implicit_concatenation;
///
/// assert!(is_implicit_concatenation(r#"'abc' 'def'"#));
/// assert!(!is_implicit_concatenation(r#"'abcdef'"#));
/// ```
pub fn is_implicit_concatenation(content: &str) -> bool {
let Some(leading_quote_str) = leading_quote(content) else {
return false;
};
let Some(trailing_quote_str) = trailing_quote(content) else {
return false;
};
// If the trailing quote doesn't match the _expected_ trailing quote, then the string is
// implicitly concatenated.
//
// For example, given:
// ```python
// u"""abc""" 'def'
// ```
//
// The leading quote would be `u"""`, and the trailing quote would be `'`, but the _expected_
// trailing quote would be `"""`. Since `'` does not equal `"""`, we'd return `true`.
if trailing_quote_str != trailing_quote(leading_quote_str).unwrap() {
return true;
}
// Search for any trailing quotes _before_ the end of the string.
let mut rest = &content[leading_quote_str.len()..content.len() - trailing_quote_str.len()];
while let Some(index) = rest.find(trailing_quote_str) {
let mut chars = rest[..index].chars().rev();
if let Some('\\') = chars.next() {
if chars.next() == Some('\\') {
// Either `\\'` or `\\\'` need to test one more character
// If the quote is preceded by `//` then it is not escaped, instead the backslash is escaped.
if chars.next() != Some('\\') {
return true;
}
}
} else {
// If the quote is _not_ escaped, then it's implicitly concatenated.
return true;
}
rest = &rest[index + trailing_quote_str.len()..];
}
// Otherwise, we know the string ends with the expected trailing quote, so it's not implicitly
// concatenated.
false
}
#[cfg(test)]
mod tests {
use crate::str::is_implicit_concatenation;
use super::{
SINGLE_QUOTE_BYTE_PREFIXES, SINGLE_QUOTE_STR_PREFIXES, TRIPLE_QUOTE_BYTE_PREFIXES,
TRIPLE_QUOTE_STR_PREFIXES,
@ -270,39 +210,4 @@ mod tests {
}
}
}
#[test]
fn implicit_concatenation() {
// Positive cases.
assert!(is_implicit_concatenation(r#""abc" "def""#));
assert!(is_implicit_concatenation(r#""abc" 'def'"#));
assert!(is_implicit_concatenation(r#""""abc""" "def""#));
assert!(is_implicit_concatenation(r#"'''abc''' 'def'"#));
assert!(is_implicit_concatenation(r#""""abc""" 'def'"#));
assert!(is_implicit_concatenation(r#"'''abc''' "def""#));
assert!(is_implicit_concatenation(r#""""abc""""def""#));
assert!(is_implicit_concatenation(r#"'''abc''''def'"#));
assert!(is_implicit_concatenation(r#""""abc"""'def'"#));
assert!(is_implicit_concatenation(r#"'''abc'''"def""#));
// Negative cases.
assert!(!is_implicit_concatenation(r#""abc""#));
assert!(!is_implicit_concatenation(r#"'abc'"#));
assert!(!is_implicit_concatenation(r#""""abc""""#));
assert!(!is_implicit_concatenation(r#"'''abc'''"#));
assert!(!is_implicit_concatenation(r#""""ab"c""""#));
assert!(!is_implicit_concatenation(r#"'''ab'c'''"#));
assert!(!is_implicit_concatenation(r#""""ab'c""""#));
assert!(!is_implicit_concatenation(r#"'''ab"c'''"#));
assert!(!is_implicit_concatenation(r#""""ab'''c""""#));
assert!(!is_implicit_concatenation(r#"'''ab"""c'''"#));
// Positive cases with escaped quotes.
assert!(is_implicit_concatenation(r#""abc\\""def""#));
assert!(is_implicit_concatenation(r#""abc\\""def""#));
// Negative cases with escaped quotes.
assert!(!is_implicit_concatenation(r#""abc\"def""#));
assert!(!is_implicit_concatenation(r#"'\\\' ""'"#));
}
}

View file

@ -1,13 +1,13 @@
use std::iter;
use ruff_python_ast::{
Constant, Expr, ExprAttribute, ExprBinOp, ExprConstant, ExprUnaryOp, Operator, UnaryOp,
Constant, Expr, ExprAttribute, ExprBinOp, ExprConstant, ExprUnaryOp, Operator, StringConstant,
UnaryOp,
};
use smallvec::SmallVec;
use ruff_formatter::{format_args, write, FormatOwnedWithRule, FormatRefWithRule};
use ruff_python_ast::node::{AnyNodeRef, AstNode};
use ruff_python_ast::str::is_implicit_concatenation;
use crate::comments::{trailing_comments, trailing_node_comments};
use crate::expression::expr_constant::ExprConstantLayout;
@ -157,8 +157,11 @@ impl FormatExprBinOp {
fn layout<'a>(bin_op: &'a ExprBinOp, context: &PyFormatContext) -> BinOpLayout<'a> {
if let Some(
constant @ ExprConstant {
value: Constant::Str(_),
range,
value:
Constant::Str(StringConstant {
implicit_concatenated: true,
..
}),
..
},
) = bin_op.left.as_constant_expr()
@ -169,7 +172,6 @@ impl FormatExprBinOp {
&& context.node_level().is_parenthesized()
&& !comments.has_dangling_comments(constant)
&& !comments.has_dangling_comments(bin_op)
&& is_implicit_concatenation(&context.source()[*range])
{
BinOpLayout::LeftString(constant)
} else {

View file

@ -1,9 +1,7 @@
use ruff_python_ast::{Constant, ExprConstant, Ranged};
use ruff_text_size::{TextLen, TextRange};
use ruff_formatter::FormatRuleWithOptions;
use ruff_python_ast::node::AnyNodeRef;
use ruff_python_ast::str::is_implicit_concatenation;
use ruff_python_ast::{Constant, ExprConstant, Ranged};
use ruff_text_size::{TextLen, TextRange};
use crate::expression::number::{FormatComplex, FormatFloat, FormatInt};
use crate::expression::parentheses::{NeedsParentheses, OptionalParentheses};
@ -80,10 +78,9 @@ impl NeedsParentheses for ExprConstant {
_parent: AnyNodeRef,
context: &PyFormatContext,
) -> OptionalParentheses {
if self.value.is_str() || self.value.is_bytes() {
let contents = context.locator().slice(self.range());
if self.value.is_implicit_concatenated() {
// Don't wrap triple quoted strings
if is_multiline_string(self, context.source()) || !is_implicit_concatenation(contents) {
if is_multiline_string(self, context.source()) {
OptionalParentheses::Never
} else {
OptionalParentheses::Multiline

View file

@ -4,7 +4,6 @@ use bitflags::bitflags;
use ruff_formatter::{format_args, write, FormatError};
use ruff_python_ast::node::AnyNodeRef;
use ruff_python_ast::str::is_implicit_concatenation;
use ruff_python_ast::{self as ast, ExprConstant, ExprFString, Ranged};
use ruff_python_parser::lexer::{lex_starts_at, LexicalError, LexicalErrorType};
use ruff_python_parser::{Mode, Tok};
@ -49,6 +48,17 @@ impl<'a> AnyString<'a> {
}
}
}
/// Returns `true` if the string is implicitly concatenated.
fn implicit_concatenated(&self) -> bool {
match self {
Self::Constant(ExprConstant { value, .. }) => value.is_implicit_concatenated(),
Self::FString(ExprFString {
implicit_concatenated,
..
}) => *implicit_concatenated,
}
}
}
impl Ranged for AnyString<'_> {
@ -103,14 +113,11 @@ impl<'a> Format<PyFormatContext<'_>> for FormatString<'a> {
fn fmt(&self, f: &mut PyFormatter) -> FormatResult<()> {
match self.layout {
StringLayout::Default => {
let string_range = self.string.range();
let string_content = f.context().locator().slice(string_range);
if is_implicit_concatenation(string_content) {
if self.string.implicit_concatenated() {
in_parentheses_only_group(&FormatStringContinuation::new(self.string)).fmt(f)
} else {
FormatStringPart::new(
string_range,
self.string.range(),
self.string.quoting(&f.context().locator()),
&f.context().locator(),
f.options().quote_style(),

View file

@ -1,10 +1,8 @@
use ruff_formatter::{write, FormatOwnedWithRule, FormatRefWithRule, FormatRuleWithOptions};
use ruff_python_ast::helpers::is_compound_statement;
use ruff_python_ast::str::is_implicit_concatenation;
use ruff_python_ast::{self as ast, Expr, Ranged, Stmt, Suite};
use ruff_python_ast::{self as ast, Ranged, Stmt, Suite};
use ruff_python_ast::{Constant, ExprConstant};
use ruff_python_trivia::{lines_after_ignoring_trivia, lines_before};
use ruff_source_file::Locator;
use crate::comments::{leading_comments, trailing_comments};
use crate::context::{NodeLevel, WithNodeLevel};
@ -78,7 +76,7 @@ impl FormatRule<Suite, PyFormatContext<'_>> for FormatSuite {
write!(f, [first.format()])?;
}
SuiteKind::Function => {
if let Some(constant) = get_docstring(first, &f.context().locator()) {
if let Some(constant) = get_docstring(first) {
write!(
f,
[
@ -95,7 +93,7 @@ impl FormatRule<Suite, PyFormatContext<'_>> for FormatSuite {
}
}
SuiteKind::Class => {
if let Some(constant) = get_docstring(first, &f.context().locator()) {
if let Some(constant) = get_docstring(first) {
if !comments.has_leading_comments(first)
&& lines_before(first.start(), source) > 1
{
@ -257,27 +255,21 @@ const fn is_import_definition(stmt: &Stmt) -> bool {
}
/// Checks if the statement is a simple string that can be formatted as a docstring
fn get_docstring<'a>(stmt: &'a Stmt, locator: &Locator) -> Option<&'a ExprConstant> {
let Stmt::Expr(ast::StmtExpr { value, .. }) = stmt else {
return None;
};
let Expr::Constant(constant) = value.as_ref() else {
return None;
};
if let ExprConstant {
value: Constant::Str(..),
range,
fn get_docstring(stmt: &Stmt) -> Option<&ExprConstant> {
let stmt_expr = stmt.as_expr_stmt()?;
let expr_constant = stmt_expr.value.as_constant_expr()?;
if matches!(
expr_constant.value,
Constant::Str(ast::StringConstant {
implicit_concatenated: false,
..
} = constant
{
if is_implicit_concatenation(locator.slice(*range)) {
return None;
}
return Some(constant);
}
})
) {
Some(expr_constant)
} else {
None
}
}
impl FormatRuleWithOptions<Suite, PyFormatContext<'_>> for FormatSuite {
type Options = SuiteKind;