Join implicit concatenated strings when they fit on a line (#13663)

This commit is contained in:
Micha Reiser 2024-10-24 11:52:22 +02:00 committed by GitHub
parent e402e27a09
commit 73ee72b665
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
50 changed files with 3907 additions and 363 deletions

View file

@ -6,6 +6,7 @@ use crate::comments::SourceComment;
use crate::expression::maybe_parenthesize_expression;
use crate::expression::parentheses::Parenthesize;
use crate::preview::is_join_implicit_concatenated_string_enabled;
use crate::{has_skip_comment, prelude::*};
#[derive(Default)]
@ -29,12 +30,18 @@ impl FormatNodeRule<StmtAssert> for FormatStmtAssert {
)?;
if let Some(msg) = msg {
let parenthesize = if is_join_implicit_concatenated_string_enabled(f.context()) {
Parenthesize::IfBreaksParenthesized
} else {
Parenthesize::IfBreaks
};
write!(
f,
[
token(","),
space(),
maybe_parenthesize_expression(msg, item, Parenthesize::IfBreaks),
maybe_parenthesize_expression(msg, item, parenthesize),
]
)?;
}

View file

@ -1,6 +1,6 @@
use ruff_formatter::{format_args, write, FormatError};
use ruff_formatter::{format_args, write, FormatError, RemoveSoftLinesBuffer};
use ruff_python_ast::{
AnyNodeRef, Expr, ExprAttribute, ExprCall, Operator, StmtAssign, TypeParams,
AnyNodeRef, Expr, ExprAttribute, ExprCall, Operator, StmtAssign, StringLike, TypeParams,
};
use crate::builders::parenthesize_if_expands;
@ -16,7 +16,11 @@ use crate::expression::{
can_omit_optional_parentheses, has_own_parentheses, has_parentheses,
maybe_parenthesize_expression,
};
use crate::preview::is_join_implicit_concatenated_string_enabled;
use crate::statement::trailing_semicolon;
use crate::string::implicit::{
FormatImplicitConcatenatedStringExpanded, FormatImplicitConcatenatedStringFlat,
};
use crate::{has_skip_comment, prelude::*};
#[derive(Default)]
@ -281,8 +285,11 @@ impl Format<PyFormatContext<'_>> for FormatStatementsLastExpression<'_> {
match self {
FormatStatementsLastExpression::LeftToRight { value, statement } => {
let can_inline_comment = should_inline_comments(value, *statement, f.context());
let format_implicit_flat = StringLike::try_from(*value).ok().and_then(|string| {
FormatImplicitConcatenatedStringFlat::new(string, f.context())
});
if !can_inline_comment {
if !can_inline_comment && format_implicit_flat.is_none() {
return maybe_parenthesize_expression(
value,
*statement,
@ -301,28 +308,149 @@ impl Format<PyFormatContext<'_>> for FormatStatementsLastExpression<'_> {
) {
let group_id = f.group_id("optional_parentheses");
let f = &mut WithNodeLevel::new(NodeLevel::Expression(Some(group_id)), f);
best_fit_parenthesize(&format_with(|f| {
// Special case for implicit concatenated strings in assignment value positions.
// The special handling is necessary to prevent an instability where an assignment has
// a trailing own line comment and the implicit concatenated string fits on the line,
// but only if the comment doesn't get inlined.
//
// ```python
// ____aaa = (
// "aaaaaaaaaaaaaaaaaaaaa" "aaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbvvvvvvvvvvvvvvv"
// ) # c
// ```
//
// Without the special handling, this would get formatted to:
// ```python
// ____aaa = (
// "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbvvvvvvvvvvvvvvv"
// ) # c
// ```
//
// However, this now gets reformatted again because Ruff now takes the `BestFit` layout for the string
// because the value is no longer an implicit concatenated string.
// ```python
// ____aaa = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbvvvvvvvvvvvvvvv" # c
// ```
//
// The special handling here ensures that the implicit concatenated string only gets
// joined **if** it fits with the trailing comment inlined. Otherwise, keep the multiline
// formatting.
if let Some(flat) = format_implicit_flat {
inline_comments.mark_formatted();
let string = flat.string();
value.format().with_options(Parentheses::Never).fmt(f)?;
let flat = format_with(|f| {
if string.is_fstring() {
let mut buffer = RemoveSoftLinesBuffer::new(&mut *f);
if !inline_comments.is_empty() {
// If the expressions exceeds the line width, format the comments in the parentheses
if_group_breaks(&inline_comments).fmt(f)?;
write!(buffer, [flat])
} else {
flat.fmt(f)
}
})
.memoized();
// F-String containing an expression with a magic trailing comma, a comment, or a
// multiline debug expression should never be joined. Use the default layout.
// ```python
// aaaa = f"abcd{[
// 1,
// 2,
// ]}" "more"
// ```
if string.is_fstring() && flat.inspect(f)?.will_break() {
inline_comments.mark_unformatted();
return write!(
f,
[maybe_parenthesize_expression(
value,
*statement,
Parenthesize::IfBreaks,
)]
);
}
Ok(())
}))
.with_group_id(Some(group_id))
.fmt(f)?;
let expanded = format_with(|f| {
let f =
&mut WithNodeLevel::new(NodeLevel::Expression(Some(group_id)), f);
if !inline_comments.is_empty() {
// If the line fits into the line width, format the comments after the parenthesized expression
if_group_fits_on_line(&inline_comments)
write!(f, [FormatImplicitConcatenatedStringExpanded::new(string)])
});
// Join the implicit concatenated string if it fits on a single line
// ```python
// a = "testmorelong" # comment
// ```
let single_line = format_with(|f| write!(f, [flat, inline_comments]));
// Parenthesize the string but join the implicit concatenated string and inline the comment.
// ```python
// a = (
// "testmorelong" # comment
// )
// ```
let joined_parenthesized = format_with(|f| {
group(&format_args![
token("("),
soft_block_indent(&format_args![flat, inline_comments]),
token(")"),
])
.with_group_id(Some(group_id))
.should_expand(true)
.fmt(f)
});
// Keep the implicit concatenated string multiline and don't inline the comment.
// ```python
// a = (
// "test"
// "more"
// "long"
// ) # comment
// ```
let implicit_expanded = format_with(|f| {
group(&format_args![
token("("),
block_indent(&expanded),
token(")"),
inline_comments,
])
.with_group_id(Some(group_id))
.should_expand(true)
.fmt(f)
});
// We can't use `optional_parentheses` here because the `inline_comments` contains
// a `expand_parent` which results in an instability because the next format
// collapses the parentheses.
// We can't use `parenthesize_if_expands` because it defaults to
// the *flat* layout when the expanded layout doesn't fit.
best_fitting![single_line, joined_parenthesized, implicit_expanded]
.with_mode(BestFittingMode::AllLines)
.fmt(f)?;
} else {
best_fit_parenthesize(&format_once(|f| {
inline_comments.mark_formatted();
value.format().with_options(Parentheses::Never).fmt(f)?;
if !inline_comments.is_empty() {
// If the expressions exceeds the line width, format the comments in the parentheses
if_group_breaks(&inline_comments).fmt(f)?;
}
Ok(())
}))
.with_group_id(Some(group_id))
.fmt(f)?;
if !inline_comments.is_empty() {
// If the line fits into the line width, format the comments after the parenthesized expression
if_group_fits_on_line(&inline_comments)
.with_group_id(Some(group_id))
.fmt(f)?;
}
}
Ok(())
@ -339,10 +467,14 @@ impl Format<PyFormatContext<'_>> for FormatStatementsLastExpression<'_> {
statement,
} => {
let should_inline_comments = should_inline_comments(value, *statement, f.context());
let format_implicit_flat = StringLike::try_from(*value).ok().and_then(|string| {
FormatImplicitConcatenatedStringFlat::new(string, f.context())
});
// Use the normal `maybe_parenthesize_layout` for splittable `value`s.
if !should_inline_comments
&& !should_non_inlineable_use_best_fit(value, *statement, f.context())
&& format_implicit_flat.is_none()
{
return write!(
f,
@ -364,7 +496,7 @@ impl Format<PyFormatContext<'_>> for FormatStatementsLastExpression<'_> {
let expression_comments = comments.leading_dangling_trailing(*value);
// Don't inline comments for attribute and call expressions for black compatibility
let inline_comments = if should_inline_comments {
let inline_comments = if should_inline_comments || format_implicit_flat.is_some() {
OptionalParenthesesInlinedComments::new(
&expression_comments,
*statement,
@ -396,13 +528,14 @@ impl Format<PyFormatContext<'_>> for FormatStatementsLastExpression<'_> {
// Prevent inline comments to be formatted as part of the expression.
inline_comments.mark_formatted();
let mut last_target = before_operator.memoized();
let last_target = before_operator.memoized();
let last_target_breaks = last_target.inspect(f)?.will_break();
// Don't parenthesize the `value` if it is known that the target will break.
// This is mainly a performance optimisation that avoids unnecessary memoization
// and using the costly `BestFitting` layout if it is already known that only the last variant
// can ever fit because the left breaks.
if last_target.inspect(f)?.will_break() {
if format_implicit_flat.is_none() && last_target_breaks {
return write!(
f,
[
@ -416,13 +549,29 @@ impl Format<PyFormatContext<'_>> for FormatStatementsLastExpression<'_> {
);
}
let format_value = value.format().with_options(Parentheses::Never).memoized();
let format_value = format_with(|f| {
if let Some(format_implicit_flat) = format_implicit_flat.as_ref() {
if format_implicit_flat.string().is_fstring() {
// Remove any soft line breaks emitted by the f-string formatting.
// This is important when formatting f-strings as part of an assignment right side
// because `best_fit_parenthesize` will otherwise still try to break inner
// groups if wrapped in a `group(..).should_expand(true)`
let mut buffer = RemoveSoftLinesBuffer::new(&mut *f);
write!(buffer, [format_implicit_flat])
} else {
format_implicit_flat.fmt(f)
}
} else {
value.format().with_options(Parentheses::Never).fmt(f)
}
})
.memoized();
// Tries to fit the `left` and the `value` on a single line:
// ```python
// a = b = c
// ```
let format_flat = format_with(|f| {
let single_line = format_with(|f| {
write!(
f,
[
@ -443,19 +592,21 @@ impl Format<PyFormatContext<'_>> for FormatStatementsLastExpression<'_> {
// c
// )
// ```
let format_parenthesize_value = format_with(|f| {
write!(
f,
[
last_target,
space(),
operator,
space(),
token("("),
block_indent(&format_args![format_value, inline_comments]),
token(")")
]
)
let flat_target_parenthesize_value = format_with(|f| {
write!(f, [last_target, space(), operator, space(), token("("),])?;
if is_join_implicit_concatenated_string_enabled(f.context()) {
group(&soft_block_indent(&format_args![
format_value,
inline_comments
]))
.should_expand(true)
.fmt(f)?;
} else {
block_indent(&format_args![format_value, inline_comments]).fmt(f)?;
}
token(")").fmt(f)
});
// Fall back to parenthesizing (or splitting) the last target part if we can't make the value
@ -466,17 +617,16 @@ impl Format<PyFormatContext<'_>> for FormatStatementsLastExpression<'_> {
// "bbbbb"
// ] = c
// ```
let format_split_left = format_with(|f| {
let split_target_flat_value = format_with(|f| {
if is_join_implicit_concatenated_string_enabled(f.context()) {
group(&last_target).should_expand(true).fmt(f)?;
} else {
last_target.fmt(f)?;
}
write!(
f,
[
last_target,
space(),
operator,
space(),
format_value,
inline_comments
]
[space(), operator, space(), format_value, inline_comments]
)
});
@ -486,7 +636,7 @@ impl Format<PyFormatContext<'_>> for FormatStatementsLastExpression<'_> {
// For attribute chains that contain any parenthesized value: Try expanding the parenthesized value first.
if value.is_call_expr() || value.is_subscript_expr() || value.is_attribute_expr() {
best_fitting![
format_flat,
single_line,
// Avoid parenthesizing the call expression if the `(` fit on the line
format_args![
last_target,
@ -495,12 +645,165 @@ impl Format<PyFormatContext<'_>> for FormatStatementsLastExpression<'_> {
space(),
group(&format_value).should_expand(true),
],
format_parenthesize_value,
format_split_left
flat_target_parenthesize_value,
split_target_flat_value
]
.fmt(f)
} else if let Some(format_implicit_flat) = &format_implicit_flat {
// F-String containing an expression with a magic trailing comma, a comment, or a
// multiline debug expression should never be joined. Use the default layout.
//
// ```python
// aaaa = f"abcd{[
// 1,
// 2,
// ]}" "more"
// ```
if format_implicit_flat.string().is_fstring()
&& format_value.inspect(f)?.will_break()
{
inline_comments.mark_unformatted();
return write!(
f,
[
before_operator,
space(),
operator,
space(),
maybe_parenthesize_expression(
value,
*statement,
Parenthesize::IfBreaks
)
]
);
}
let group_id = f.group_id("optional_parentheses");
let format_expanded = format_with(|f| {
let f = &mut WithNodeLevel::new(NodeLevel::Expression(Some(group_id)), f);
FormatImplicitConcatenatedStringExpanded::new(
StringLike::try_from(*value).unwrap(),
)
.fmt(f)
})
.memoized();
// Keep the target flat, parenthesize the value, and keep it multiline.
//
// ```python
// Literal[ "a", "b"] = (
// "looooooooooooooooooooooooooooooong"
// "string"
// ) # comment
// ```
let flat_target_value_parenthesized_multiline = format_with(|f| {
write!(
f,
[
last_target,
space(),
operator,
space(),
token("("),
group(&soft_block_indent(&format_expanded))
.with_group_id(Some(group_id))
.should_expand(true),
token(")"),
inline_comments
]
)
});
// Expand the parent and parenthesize the joined string with the inlined comment.
//
// ```python
// Literal[
// "a",
// "b",
// ] = (
// "not that long string" # comment
// )
// ```
let split_target_value_parenthesized_flat = format_with(|f| {
write!(
f,
[
group(&last_target).should_expand(true),
space(),
operator,
space(),
token("("),
group(&soft_block_indent(&format_args![
format_value,
inline_comments
]))
.should_expand(true),
token(")")
]
)
});
// The most expanded variant: Expand both the target and the string.
//
// ```python
// Literal[
// "a",
// "b",
// ] = (
// "looooooooooooooooooooooooooooooong"
// "string"
// ) # comment
// ```
let split_target_value_parenthesized_multiline = format_with(|f| {
write!(
f,
[
group(&last_target).should_expand(true),
space(),
operator,
space(),
token("("),
group(&soft_block_indent(&format_expanded))
.with_group_id(Some(group_id))
.should_expand(true),
token(")"),
inline_comments
]
)
});
// This is only a perf optimisation. No point in trying all the "flat-target"
// variants if we know that the last target must break.
if last_target_breaks {
best_fitting![
split_target_flat_value,
split_target_value_parenthesized_flat,
split_target_value_parenthesized_multiline,
]
.with_mode(BestFittingMode::AllLines)
.fmt(f)
} else {
best_fitting![
single_line,
flat_target_parenthesize_value,
flat_target_value_parenthesized_multiline,
split_target_flat_value,
split_target_value_parenthesized_flat,
split_target_value_parenthesized_multiline,
]
.with_mode(BestFittingMode::AllLines)
.fmt(f)
}
} else {
best_fitting![format_flat, format_parenthesize_value, format_split_left].fmt(f)
best_fitting![
single_line,
flat_target_parenthesize_value,
split_target_flat_value
]
.fmt(f)
}
}
}
@ -556,6 +859,12 @@ impl<'a> OptionalParenthesesInlinedComments<'a> {
comment.mark_formatted();
}
}
fn mark_unformatted(&self) {
for comment in self.expression {
comment.mark_unformatted();
}
}
}
impl Format<PyFormatContext<'_>> for OptionalParenthesesInlinedComments<'_> {

View file

@ -138,7 +138,7 @@ impl FormatRule<Suite, PyFormatContext<'_>> for FormatSuite {
SuiteKind::Function | SuiteKind::Class | SuiteKind::TopLevel => {
if let Some(docstring) =
DocstringStmt::try_from_statement(first, self.kind, source_type)
DocstringStmt::try_from_statement(first, self.kind, f.context())
{
SuiteChildStatement::Docstring(docstring)
} else {
@ -179,7 +179,7 @@ impl FormatRule<Suite, PyFormatContext<'_>> for FormatSuite {
// Insert a newline after a module level docstring, but treat
// it as a docstring otherwise. See: https://github.com/psf/black/pull/3932.
self.kind == SuiteKind::TopLevel
&& DocstringStmt::try_from_statement(first.statement(), self.kind, source_type)
&& DocstringStmt::try_from_statement(first.statement(), self.kind, f.context())
.is_some()
};
@ -785,37 +785,23 @@ impl<'a> DocstringStmt<'a> {
fn try_from_statement(
stmt: &'a Stmt,
suite_kind: SuiteKind,
source_type: PySourceType,
context: &PyFormatContext,
) -> Option<DocstringStmt<'a>> {
// Notebooks don't have a concept of modules, therefore, don't recognise the first string as the module docstring.
if source_type.is_ipynb() && suite_kind == SuiteKind::TopLevel {
if context.options().source_type().is_ipynb() && suite_kind == SuiteKind::TopLevel {
return None;
}
let Stmt::Expr(ast::StmtExpr { value, .. }) = stmt else {
return None;
};
match value.as_ref() {
Expr::StringLiteral(ast::ExprStringLiteral { value, .. })
if !value.is_implicit_concatenated() =>
{
Some(DocstringStmt {
docstring: stmt,
suite_kind,
})
}
_ => None,
}
Self::is_docstring_statement(stmt.as_expr_stmt()?, context).then_some(DocstringStmt {
docstring: stmt,
suite_kind,
})
}
pub(crate) fn is_docstring_statement(stmt: &StmtExpr, source_type: PySourceType) -> bool {
if source_type.is_ipynb() {
return false;
}
pub(crate) fn is_docstring_statement(stmt: &StmtExpr, context: &PyFormatContext) -> bool {
if let Expr::StringLiteral(ast::ExprStringLiteral { value, .. }) = stmt.value.as_ref() {
!value.is_implicit_concatenated()
|| !value.iter().any(|literal| context.comments().has(literal))
} else {
false
}