Avoid multiline expression if format specifier is present (#11123)

## Summary

This PR fixes the bug where the formatter would format an f-string and
could potentially change the AST.

For a triple-quoted f-string, the element can't be formatted into
multiline if it has a format specifier because otherwise the newline
would be treated as part of the format specifier.

Given the following f-string:
```python
f"""aaaaaaaaaaaaaaaa bbbbbbbbbbbbbbbbbb ccccccccccc {
    variable:.3f} ddddddddddddddd eeeeeeee"""
```

The formatter sees that the f-string is already multiline so it assumes
that it can contain line breaks i.e., broken into multiple lines. But,
in this specific case we can't format it as:

```python
f"""aaaaaaaaaaaaaaaa bbbbbbbbbbbbbbbbbb ccccccccccc {
    variable:.3f
} ddddddddddddddd eeeeeeee"""
```
                     
Because the format specifier string would become ".3f\n", which is not
the original string (`.3f`).

If the original source code already contained a newline, they'll be
preserved. For example:
```python
f"""aaaaaaaaaaaaaaaa bbbbbbbbbbbbbbbbbb ccccccccccc {
    variable:.3f
} ddddddddddddddd eeeeeeee"""
```

The above will be formatted as:
```py
f"""aaaaaaaaaaaaaaaa bbbbbbbbbbbbbbbbbb ccccccccccc {variable:.3f
} ddddddddddddddd eeeeeeee"""
```

Note that the newline after `.3f` is part of the format specifier which
needs to be preserved.
The Python version is irrelevant in this case.

fixes: #10040 

## Test Plan

Add some test cases to verify this behavior.
This commit is contained in:
Dhruv Manilawala 2024-04-26 19:04:38 +05:30 committed by GitHub
parent 16a1f3cbcc
commit 77a72ecd38
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 220 additions and 31 deletions

View file

@ -129,6 +129,10 @@ x = f"{ # comment 12
{'x': 1, 'y': 2} }"
x = f"{ # comment 13
{'x': 1, 'y': 2} = }"
# But, if there's a format specifier or a conversion flag then we don't need to add
# any whitespace at the end
x = f"aaaaa { {'aaaaaa', 'bbbbbbb', 'ccccccccc'}!s} bbbbbb"
x = f"aaaaa { {'aaaaaa', 'bbbbbbb', 'ccccccccc'}:.3f} bbbbbb"
# But, in this case, we would split the expression itself because it exceeds the line
# length limit so we need not add the extra space.
@ -207,6 +211,29 @@ f"{ # comment 15
}" # comment 19
# comment 20
# Single-quoted f-strings with a format specificer can be multiline
f"aaaaaaaaaaaaaaaa bbbbbbbbbbbbbbbbbb ccccccccccc {
variable:.3f} ddddddddddddddd eeeeeeee"
# But, if it's triple-quoted then we can't or the format specificer will have a
# trailing newline
f"""aaaaaaaaaaaaaaaa bbbbbbbbbbbbbbbbbb ccccccccccc {
variable:.3f} ddddddddddddddd eeeeeeee"""
# But, we can break the ones which don't have a format specifier
f"""fooooooooooooooooooo barrrrrrrrrrrrrrrrrrr {
xxxxxxxxxxxxxxx:.3f} aaaaaaaaaaaaaaaaa { xxxxxxxxxxxxxxxxxxxx } bbbbbbbbbbbb"""
# Throw in a random comment in it but surpise, this is not a comment but just a text
# which is part of the format specifier
aaaaaaaaaaa = f"""asaaaaaaaaaaaaaaaa {
aaaaaaaaaaaa + bbbbbbbbbbbb + ccccccccccccccc + dddddddd:.3f
# comment
} cccccccccc"""
aaaaaaaaaaa = f"""asaaaaaaaaaaaaaaaa {
aaaaaaaaaaaa + bbbbbbbbbbbb + ccccccccccccccc + dddddddd:.3f
# comment} cccccccccc"""
# Conversion flags
#
# This is not a valid Python code because of the additional whitespace between the `!`

View file

@ -211,7 +211,7 @@ impl<'fmt, 'ast, 'buf> JoinCommaSeparatedBuilder<'fmt, 'ast, 'buf> {
if let FStringState::InsideExpressionElement(context) =
self.fmt.context().f_string_state()
{
if context.layout().is_flat() {
if !context.can_contain_line_breaks() {
return Ok(());
}
}

View file

@ -1,5 +1,5 @@
use crate::comments::Comments;
use crate::other::f_string::FStringContext;
use crate::other::f_string_element::FStringExpressionElementContext;
use crate::PyFormatOptions;
use ruff_formatter::{Buffer, FormatContext, GroupId, IndentWidth, SourceCode};
use ruff_python_ast::str::Quote;
@ -128,13 +128,13 @@ impl Debug for PyFormatContext<'_> {
}
}
#[derive(Copy, Clone, Debug, Default)]
#[derive(Clone, Copy, Debug, Default)]
pub(crate) enum FStringState {
/// The formatter is inside an f-string expression element i.e., between the
/// curly brace in `f"foo {x}"`.
///
/// The containing `FStringContext` is the surrounding f-string context.
InsideExpressionElement(FStringContext),
InsideExpressionElement(FStringExpressionElementContext),
/// The formatter is outside an f-string.
#[default]
Outside,

View file

@ -135,7 +135,7 @@ impl FStringLayout {
}
}
pub(crate) const fn is_flat(self) -> bool {
matches!(self, Self::Flat)
pub(crate) const fn is_multiline(self) -> bool {
matches!(self, FStringLayout::Multiline)
}
}

View file

@ -64,15 +64,63 @@ impl Format<PyFormatContext<'_>> for FormatFStringLiteralElement<'_> {
}
}
/// Context representing an f-string expression element.
#[derive(Clone, Copy, Debug)]
pub(crate) struct FStringExpressionElementContext {
/// The context of the parent f-string containing this expression element.
parent_context: FStringContext,
/// Indicates whether this expression element has format specifier or not.
has_format_spec: bool,
}
impl FStringExpressionElementContext {
/// Returns the [`FStringContext`] containing this expression element.
pub(crate) fn f_string(self) -> FStringContext {
self.parent_context
}
/// Returns `true` if the expression element can contain line breaks.
pub(crate) fn can_contain_line_breaks(self) -> bool {
self.parent_context.layout().is_multiline()
// For a triple-quoted f-string, the element can't be formatted into multiline if it
// has a format specifier because otherwise the newline would be treated as part of the
// format specifier.
//
// Given the following f-string:
// ```python
// f"""aaaaaaaaaaaaaaaa bbbbbbbbbbbbbbbbbb ccccccccccc {variable:.3f} ddddddddddddddd eeeeeeee"""
// ```
//
// We can't format it as:
// ```python
// f"""aaaaaaaaaaaaaaaa bbbbbbbbbbbbbbbbbb ccccccccccc {
// variable:.3f
// } ddddddddddddddd eeeeeeee"""
// ```
//
// Here, the format specifier string would become ".3f\n", which is not what we want.
// But, if the original source code already contained a newline, they'll be preserved.
//
// The Python version is irrelevant in this case.
&& !(self.parent_context.kind().is_triple_quoted() && self.has_format_spec)
}
}
/// Formats an f-string expression element.
pub(crate) struct FormatFStringExpressionElement<'a> {
element: &'a FStringExpressionElement,
context: FStringContext,
context: FStringExpressionElementContext,
}
impl<'a> FormatFStringExpressionElement<'a> {
pub(crate) fn new(element: &'a FStringExpressionElement, context: FStringContext) -> Self {
Self { element, context }
Self {
element,
context: FStringExpressionElementContext {
parent_context: context,
has_format_spec: element.format_spec.is_some(),
},
}
}
}
@ -153,10 +201,10 @@ impl Format<PyFormatContext<'_>> for FormatFStringExpressionElement<'_> {
// added to maintain consistency.
Expr::Dict(_) | Expr::DictComp(_) | Expr::Set(_) | Expr::SetComp(_) => {
Some(format_with(|f| {
if self.context.layout().is_flat() {
space().fmt(f)
} else {
if self.context.can_contain_line_breaks() {
soft_line_break_or_space().fmt(f)
} else {
space().fmt(f)
}
}))
}
@ -183,12 +231,9 @@ impl Format<PyFormatContext<'_>> for FormatFStringExpressionElement<'_> {
token(":").fmt(f)?;
f.join()
.entries(
format_spec
.elements
.iter()
.map(|element| FormatFStringElement::new(element, self.context)),
)
.entries(format_spec.elements.iter().map(|element| {
FormatFStringElement::new(element, self.context.f_string())
}))
.finish()?;
// These trailing comments can only occur if the format specifier is
@ -205,7 +250,11 @@ impl Format<PyFormatContext<'_>> for FormatFStringExpressionElement<'_> {
trailing_comments(comments.trailing(self.element)).fmt(f)?;
}
bracket_spacing.fmt(f)
if conversion.is_none() && format_spec.is_none() {
bracket_spacing.fmt(f)?;
}
Ok(())
});
let open_parenthesis_comments = if dangling_item_comments.is_empty() {
@ -219,16 +268,16 @@ impl Format<PyFormatContext<'_>> for FormatFStringExpressionElement<'_> {
{
let mut f = WithNodeLevel::new(NodeLevel::ParenthesizedExpression, f);
if self.context.layout().is_flat() {
let mut buffer = RemoveSoftLinesBuffer::new(&mut *f);
write!(buffer, [open_parenthesis_comments, item])?;
} else {
if self.context.can_contain_line_breaks() {
group(&format_args![
open_parenthesis_comments,
soft_block_indent(&item)
])
.fmt(&mut f)?;
} else {
let mut buffer = RemoveSoftLinesBuffer::new(&mut *f);
write!(buffer, [open_parenthesis_comments, item])?;
}
}

View file

@ -60,7 +60,7 @@ impl StringNormalizer {
// The reason to preserve the quotes is based on the assumption that
// the original f-string is valid in terms of quoting, and we don't
// want to change that to make it invalid.
if (context.kind().is_triple_quoted() && !string.kind().is_triple_quoted())
if (context.f_string().kind().is_triple_quoted() && !string.kind().is_triple_quoted())
|| self.target_version.supports_pep_701()
{
self.quoting

View file

@ -135,6 +135,10 @@ x = f"{ # comment 12
{'x': 1, 'y': 2} }"
x = f"{ # comment 13
{'x': 1, 'y': 2} = }"
# But, if there's a format specifier or a conversion flag then we don't need to add
# any whitespace at the end
x = f"aaaaa { {'aaaaaa', 'bbbbbbb', 'ccccccccc'}!s} bbbbbb"
x = f"aaaaa { {'aaaaaa', 'bbbbbbb', 'ccccccccc'}:.3f} bbbbbb"
# But, in this case, we would split the expression itself because it exceeds the line
# length limit so we need not add the extra space.
@ -213,6 +217,29 @@ f"{ # comment 15
}" # comment 19
# comment 20
# Single-quoted f-strings with a format specificer can be multiline
f"aaaaaaaaaaaaaaaa bbbbbbbbbbbbbbbbbb ccccccccccc {
variable:.3f} ddddddddddddddd eeeeeeee"
# But, if it's triple-quoted then we can't or the format specificer will have a
# trailing newline
f"""aaaaaaaaaaaaaaaa bbbbbbbbbbbbbbbbbb ccccccccccc {
variable:.3f} ddddddddddddddd eeeeeeee"""
# But, we can break the ones which don't have a format specifier
f"""fooooooooooooooooooo barrrrrrrrrrrrrrrrrrr {
xxxxxxxxxxxxxxx:.3f} aaaaaaaaaaaaaaaaa { xxxxxxxxxxxxxxxxxxxx } bbbbbbbbbbbb"""
# Throw in a random comment in it but surpise, this is not a comment but just a text
# which is part of the format specifier
aaaaaaaaaaa = f"""asaaaaaaaaaaaaaaaa {
aaaaaaaaaaaa + bbbbbbbbbbbb + ccccccccccccccc + dddddddd:.3f
# comment
} cccccccccc"""
aaaaaaaaaaa = f"""asaaaaaaaaaaaaaaaa {
aaaaaaaaaaaa + bbbbbbbbbbbb + ccccccccccccccc + dddddddd:.3f
# comment} cccccccccc"""
# Conversion flags
#
# This is not a valid Python code because of the additional whitespace between the `!`
@ -442,6 +469,10 @@ x = f"{ # comment 12
}"
x = f"{ # comment 13
{'x': 1, 'y': 2} = }"
# But, if there's a format specifier or a conversion flag then we don't need to add
# any whitespace at the end
x = f"aaaaa { {'aaaaaa', 'bbbbbbb', 'ccccccccc'}!s} bbbbbb"
x = f"aaaaa { {'aaaaaa', 'bbbbbbb', 'ccccccccc'}:.3f} bbbbbb"
# But, in this case, we would split the expression itself because it exceeds the line
# length limit so we need not add the extra space.
@ -526,6 +557,28 @@ f"{ # comment 15
}" # comment 19
# comment 20
# Single-quoted f-strings with a format specificer can be multiline
f"aaaaaaaaaaaaaaaa bbbbbbbbbbbbbbbbbb ccccccccccc {
variable:.3f
} ddddddddddddddd eeeeeeee"
# But, if it's triple-quoted then we can't or the format specificer will have a
# trailing newline
f"""aaaaaaaaaaaaaaaa bbbbbbbbbbbbbbbbbb ccccccccccc {variable:.3f} ddddddddddddddd eeeeeeee"""
# But, we can break the ones which don't have a format specifier
f"""fooooooooooooooooooo barrrrrrrrrrrrrrrrrrr {xxxxxxxxxxxxxxx:.3f} aaaaaaaaaaaaaaaaa {
xxxxxxxxxxxxxxxxxxxx
} bbbbbbbbbbbb"""
# Throw in a random comment in it but surpise, this is not a comment but just a text
# which is part of the format specifier
aaaaaaaaaaa = f"""asaaaaaaaaaaaaaaaa {aaaaaaaaaaaa + bbbbbbbbbbbb + ccccccccccccccc + dddddddd:.3f
# comment
} cccccccccc"""
aaaaaaaaaaa = f"""asaaaaaaaaaaaaaaaa {aaaaaaaaaaaa + bbbbbbbbbbbb + ccccccccccccccc + dddddddd:.3f
# comment} cccccccccc"""
# Conversion flags
#
# This is not a valid Python code because of the additional whitespace between the `!`
@ -744,6 +797,10 @@ x = f"{ # comment 12
{'x': 1, 'y': 2} }"
x = f"{ # comment 13
{'x': 1, 'y': 2} = }"
# But, if there's a format specifier or a conversion flag then we don't need to add
# any whitespace at the end
x = f"aaaaa { {'aaaaaa', 'bbbbbbb', 'ccccccccc'}!s} bbbbbb"
x = f"aaaaa { {'aaaaaa', 'bbbbbbb', 'ccccccccc'}:.3f} bbbbbb"
# But, in this case, we would split the expression itself because it exceeds the line
# length limit so we need not add the extra space.
@ -820,6 +877,29 @@ f"{ # comment 15
}" # comment 19
# comment 20
# Single-quoted f-strings with a format specificer can be multiline
f"aaaaaaaaaaaaaaaa bbbbbbbbbbbbbbbbbb ccccccccccc {
variable:.3f} ddddddddddddddd eeeeeeee"
# But, if it's triple-quoted then we can't or the format specificer will have a
# trailing newline
f"""aaaaaaaaaaaaaaaa bbbbbbbbbbbbbbbbbb ccccccccccc {
variable:.3f} ddddddddddddddd eeeeeeee"""
# But, we can break the ones which don't have a format specifier
f"""fooooooooooooooooooo barrrrrrrrrrrrrrrrrrr {
xxxxxxxxxxxxxxx:.3f} aaaaaaaaaaaaaaaaa { xxxxxxxxxxxxxxxxxxxx } bbbbbbbbbbbb"""
# Throw in a random comment in it but surpise, this is not a comment but just a text
# which is part of the format specifier
aaaaaaaaaaa = f"""asaaaaaaaaaaaaaaaa {
aaaaaaaaaaaa + bbbbbbbbbbbb + ccccccccccccccc + dddddddd:.3f
# comment
} cccccccccc"""
aaaaaaaaaaa = f"""asaaaaaaaaaaaaaaaa {
aaaaaaaaaaaa + bbbbbbbbbbbb + ccccccccccccccc + dddddddd:.3f
# comment} cccccccccc"""
# Conversion flags
#
# This is not a valid Python code because of the additional whitespace between the `!`
@ -1014,8 +1094,8 @@ hello {
+}"
x = f"{ # comment 13
{'x': 1, 'y': 2} = }"
@@ -135,7 +143,11 @@
# But, if there's a format specifier or a conversion flag then we don't need to add
@@ -139,7 +147,11 @@
}"
# And, split the expression itself because it exceeds the line length.
xxxxxxx = f"{
@ -1028,7 +1108,7 @@ hello {
}"
# Quotes
@@ -148,13 +160,13 @@
@@ -152,13 +164,13 @@
# Here, the formatter will remove the escapes which is correct because they aren't allowed
# pre 3.12. This means we can assume that the f-string is used in the context of 3.12.
@ -1044,7 +1124,7 @@ hello {
# But if the inner string is also triple-quoted then we should preserve the existing quotes.
f"""test {'''inner'''}"""
@@ -167,38 +179,42 @@
@@ -171,63 +183,66 @@
f"aaaaaaa {['aaaaaaaaaaaaaaa', 'bbbbbbbbbbbbb', 'ccccccccccccccccc', 'ddddddddddddddd', 'eeeeeeeeeeeeee']} aaaaaaa"
# And, if the trailing comma is already present, we still need to remove it.
@ -1106,7 +1186,40 @@ hello {
# comment 18
}" # comment 19
# comment 20
@@ -208,24 +224,21 @@
# Single-quoted f-strings with a format specificer can be multiline
f"aaaaaaaaaaaaaaaa bbbbbbbbbbbbbbbbbb ccccccccccc {
- variable:.3f} ddddddddddddddd eeeeeeee"
+ variable:.3f
+} ddddddddddddddd eeeeeeee"
# But, if it's triple-quoted then we can't or the format specificer will have a
# trailing newline
-f"""aaaaaaaaaaaaaaaa bbbbbbbbbbbbbbbbbb ccccccccccc {
- variable:.3f} ddddddddddddddd eeeeeeee"""
+f"""aaaaaaaaaaaaaaaa bbbbbbbbbbbbbbbbbb ccccccccccc {variable:.3f} ddddddddddddddd eeeeeeee"""
# But, we can break the ones which don't have a format specifier
-f"""fooooooooooooooooooo barrrrrrrrrrrrrrrrrrr {
- xxxxxxxxxxxxxxx:.3f} aaaaaaaaaaaaaaaaa { xxxxxxxxxxxxxxxxxxxx } bbbbbbbbbbbb"""
+f"""fooooooooooooooooooo barrrrrrrrrrrrrrrrrrr {xxxxxxxxxxxxxxx:.3f} aaaaaaaaaaaaaaaaa {
+ xxxxxxxxxxxxxxxxxxxx
+} bbbbbbbbbbbb"""
# Throw in a random comment in it but surpise, this is not a comment but just a text
# which is part of the format specifier
-aaaaaaaaaaa = f"""asaaaaaaaaaaaaaaaa {
- aaaaaaaaaaaa + bbbbbbbbbbbb + ccccccccccccccc + dddddddd:.3f
+aaaaaaaaaaa = f"""asaaaaaaaaaaaaaaaa {aaaaaaaaaaaa + bbbbbbbbbbbb + ccccccccccccccc + dddddddd:.3f
# comment
} cccccccccc"""
-aaaaaaaaaaa = f"""asaaaaaaaaaaaaaaaa {
- aaaaaaaaaaaa + bbbbbbbbbbbb + ccccccccccccccc + dddddddd:.3f
+aaaaaaaaaaa = f"""asaaaaaaaaaaaaaaaa {aaaaaaaaaaaa + bbbbbbbbbbbb + ccccccccccccccc + dddddddd:.3f
# comment} cccccccccc"""
# Conversion flags
@@ -235,24 +250,21 @@
# This is not a valid Python code because of the additional whitespace between the `!`
# and conversion type. But, our parser isn't strict about this. This should probably be
# removed once we have a strict parser.
@ -1138,7 +1251,7 @@ hello {
x = f"""
{ # comment 22
@@ -234,19 +247,19 @@
@@ -261,19 +273,19 @@
# Here, the debug expression is in a nested f-string so we should start preserving
# whitespaces from that point onwards. This means we should format the outer f-string.
@ -1166,7 +1279,7 @@ hello {
# comment 27
# comment 28
} woah {x}"
@@ -260,19 +273,19 @@
@@ -287,19 +299,19 @@
if indent2:
foo = f"""hello world
hello {