Improve with statement comment handling and expression breaking (#6621)

## Summary

The motivating code here was:

```python
with test as (
    # test
foo):
    pass
```

Which we were formatting as:

```python
with test as
# test
(foo):
    pass
```

`with` statements are oddly difficult. This PR makes a bunch of subtle
modifications and adds a more extensive test suite. For example, we now
only preserve parentheses if there's more than one `WithItem` _or_ a
trailing comma; before, we always preserved.

Our formatting is_not_ the same as Black, but here's a diff of our
formatted code vs. Black's for the `with.py` test suite. The primary
difference is that we tend to break parentheses when they contain
comments rather than move them to the end of the life (this is a
consistent difference that we make across the codebase):

```diff
diff --git a/crates/ruff_python_formatter/foo.py b/crates/ruff_python_formatter/foo.py
index 85e761080..31625c876 100644
--- a/crates/ruff_python_formatter/foo.py
+++ b/crates/ruff_python_formatter/foo.py
@@ -1,6 +1,4 @@
-with (
-    aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
-), aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa:
+with aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa, aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa:
     ...
     # trailing
 
@@ -16,28 +14,33 @@ with (
     # trailing
 
 
-with a, b:  # a  # comma  # c  # colon
+with (
+    a,  # a  # comma
+    b,  # c
+):  # colon
     ...
 
 
 with (
-    a as  # a  # as
-    # own line
-    b,  # b  # comma
+    a as (  # a  # as
+        # own line
+        b
+    ),  # b  # comma
     c,  # c
 ):  # colon
     ...  # body
     # body trailing own
 
-with (
-    a as  # a  # as
+with a as (  # a  # as
     # own line
-    bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb  # b
-):
+    bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb
+):  # b
     pass
 
 
-with (a,):  # magic trailing comma
+with (
+    a,
+):  # magic trailing comma
     ...
 
 
@@ -47,6 +50,7 @@ with a:  # should remove brackets
 with aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa + bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb as c:
     ...
 
+
 with (
     # leading comment
     a
@@ -74,8 +78,7 @@ with (
 with (
     a  # trailing same line comment
     # trailing own line comment
-    as b
-):
+) as b:
     ...
 
 with (
@@ -87,7 +90,9 @@ with (
 with (
     a
     # trailing own line comment
-) as b:  # trailing as same line comment  # trailing b same line comment
+) as (  # trailing as same line comment
+    b
+):  # trailing b same line comment
     ...
 
 with (
@@ -124,18 +129,24 @@ with (  # comment
     ...
 
 with (  # outer comment
-    CtxManager1() as example1,  # inner comment
+    (  # inner comment
+        CtxManager1()
+    ) as example1,
     CtxManager2() as example2,
     CtxManager3() as example3,
 ):
     ...
 
-with CtxManager() as example:  # outer comment
+with (  # outer comment
+    CtxManager()
+) as example:
     ...
 
 with (  # outer comment
     CtxManager()
-) as example, CtxManager2() as example2:  # inner comment
+) as example, (  # inner comment
+    CtxManager2()
+) as example2:
     ...
 
 with (  # outer comment
@@ -145,7 +156,9 @@ with (  # outer comment
     ...
 
 with (  # outer comment
-    (CtxManager1()),  # inner comment
+    (  # inner comment
+        CtxManager1()
+    ),
     CtxManager2(),
 ) as example:
     ...
@@ -179,7 +192,9 @@ with (
 ):
     pass
 
-with a as (b):  # foo
+with a as (  # foo
+    b
+):
     pass
 
 with f(
@@ -209,17 +224,13 @@ with f(
 ) as b, c as d:
     pass
 
-with (
-    aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa + bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb
-) as b:
+with aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa + bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb as b:
     pass
 
 with aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa + bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb as b:
     pass
 
-with (
-    aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa + bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb
-) as b, c as d:
+with aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa + bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb as b, c as d:
     pass
 
 with (
@@ -230,6 +241,8 @@ with (
     pass
 
 with (
-    aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa + bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb
-) as b, c as d:
+    aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
+    + bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb as b,
+    c as d,
+):
     pass
```

Closes https://github.com/astral-sh/ruff/issues/6600.
## Test Plan

Before:

| project      | similarity index |
|--------------|------------------|
| cpython      | 0.75473          |
| django       | 0.99804          |
| transformers | 0.99618          |
| twine        | 0.99876          |
| typeshed     | 0.74292          |
| warehouse    | 0.99601          |
| zulip        | 0.99727          |

After:

| project      | similarity index |
|--------------|------------------|
| cpython      | 0.75473          |
| django       | 0.99804          |
| transformers | 0.99618          |
| twine        | 0.99876          |
| typeshed     | 0.74292          |
| warehouse    | 0.99601          |
| zulip        | 0.99727          |

`cargo test`
This commit is contained in:
Charlie Marsh 2023-08-17 23:30:38 -04:00 committed by GitHub
parent 26bba11be6
commit 1811312722
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
12 changed files with 472 additions and 98 deletions

View file

@ -1,11 +1,10 @@
use ruff_formatter::{format_args, write, Argument, Arguments};
use ruff_python_ast::Ranged;
use ruff_python_trivia::{SimpleToken, SimpleTokenKind, SimpleTokenizer};
use ruff_text_size::{TextRange, TextSize};
use crate::context::{NodeLevel, WithNodeLevel};
use crate::other::commas::has_magic_trailing_comma;
use crate::prelude::*;
use crate::MagicTrailingComma;
/// Adds parentheses and indents `content` if it doesn't fit on a line.
pub(crate) fn parenthesize_if_expands<'ast, T>(content: &T) -> ParenthesizeIfExpands<'_, 'ast>
@ -194,26 +193,11 @@ impl<'fmt, 'ast, 'buf> JoinCommaSeparatedBuilder<'fmt, 'ast, 'buf> {
pub(crate) fn finish(&mut self) -> FormatResult<()> {
self.result.and_then(|_| {
if let Some(last_end) = self.entries.position() {
let magic_trailing_comma = match self.fmt.options().magic_trailing_comma() {
MagicTrailingComma::Respect => {
let first_token = SimpleTokenizer::new(
self.fmt.context().source(),
TextRange::new(last_end, self.sequence_end),
)
.skip_trivia()
// Skip over any closing parentheses belonging to the expression
.find(|token| token.kind() != SimpleTokenKind::RParen);
matches!(
first_token,
Some(SimpleToken {
kind: SimpleTokenKind::Comma,
..
})
)
}
MagicTrailingComma::Ignore => false,
};
let magic_trailing_comma = has_magic_trailing_comma(
TextRange::new(last_end, self.sequence_end),
self.fmt.options(),
self.fmt.context(),
);
// If there is a single entry, only keep the magic trailing comma, don't add it if
// it wasn't there -- unless the trailing comma behavior is set to one-or-more.

View file

@ -247,12 +247,11 @@ pub(crate) struct FormatDanglingOpenParenthesisComments<'a> {
impl Format<PyFormatContext<'_>> for FormatDanglingOpenParenthesisComments<'_> {
fn fmt(&self, f: &mut Formatter<PyFormatContext>) -> FormatResult<()> {
let mut comments = self
for comment in self
.comments
.iter()
.filter(|comment| comment.is_unformatted());
if let Some(comment) = comments.next() {
.filter(|comment| comment.is_unformatted())
{
debug_assert!(
comment.line_position().is_end_of_line(),
"Expected dangling comment to be at the end of the line"
@ -261,16 +260,11 @@ impl Format<PyFormatContext<'_>> for FormatDanglingOpenParenthesisComments<'_> {
write!(
f,
[
line_suffix(&format_args![space(), space(), format_comment(comment)]),
line_suffix(&format_args!(space(), space(), format_comment(comment))),
expand_parent()
]
)?;
comment.mark_formatted();
debug_assert!(
comments.next().is_none(),
"Expected at most one dangling comment"
);
}
Ok(())

View file

@ -1130,9 +1130,8 @@ fn handle_with_item_comment<'a>(
if comment.end() < as_token.start() {
// If before the `as` keyword, then it must be a trailing comment of the context expression.
CommentPlacement::trailing(context_expr, comment)
}
// Trailing end of line comment coming after the `as` keyword`.
else if comment.line_position().is_end_of_line() {
} else if comment.line_position().is_end_of_line() {
// Trailing end of line comment coming after the `as` keyword`.
CommentPlacement::dangling(comment.enclosing_node(), comment)
} else {
CommentPlacement::leading(optional_vars, comment)

View file

@ -11,7 +11,7 @@ use std::str::FromStr;
serde(default)
)]
pub struct PyFormatOptions {
/// Whether we're in a `.py` file or `.pyi` file, which have different rules
/// Whether we're in a `.py` file or `.pyi` file, which have different rules.
source_type: PySourceType,
/// Specifies the indent style:
@ -27,7 +27,7 @@ pub struct PyFormatOptions {
/// The preferred quote style to use (single vs double quotes).
quote_style: QuoteStyle,
/// Whether to expand lists or elements if they have a trailing comma such as `(a, b,)`
/// Whether to expand lists or elements if they have a trailing comma such as `(a, b,)`.
magic_trailing_comma: MagicTrailingComma,
}

View file

@ -0,0 +1,31 @@
use ruff_python_trivia::{SimpleToken, SimpleTokenKind, SimpleTokenizer};
use ruff_text_size::TextRange;
use crate::prelude::*;
use crate::{MagicTrailingComma, PyFormatOptions};
/// Returns `true` if the range ends with a magic trailing comma (and the magic trailing comma
/// should be respected).
pub(crate) fn has_magic_trailing_comma(
range: TextRange,
options: &PyFormatOptions,
context: &PyFormatContext,
) -> bool {
match options.magic_trailing_comma() {
MagicTrailingComma::Respect => {
let first_token = SimpleTokenizer::new(context.source(), range)
.skip_trivia()
// Skip over any closing parentheses belonging to the expression
.find(|token| token.kind() != SimpleTokenKind::RParen);
matches!(
first_token,
Some(SimpleToken {
kind: SimpleTokenKind::Comma,
..
})
)
}
MagicTrailingComma::Ignore => false,
}
}

View file

@ -1,5 +1,6 @@
pub(crate) mod alias;
pub(crate) mod arguments;
pub(crate) mod commas;
pub(crate) mod comprehension;
pub(crate) mod decorator;
pub(crate) mod elif_else_clause;

View file

@ -1,10 +1,9 @@
use ruff_formatter::{write, Buffer, FormatResult};
use ruff_python_ast::WithItem;
use ruff_formatter::{write, Buffer, FormatResult};
use crate::comments::{leading_comments, trailing_comments, SourceComment};
use crate::comments::SourceComment;
use crate::expression::maybe_parenthesize_expression;
use crate::expression::parentheses::Parenthesize;
use crate::expression::parentheses::{parenthesized, Parentheses, Parenthesize};
use crate::prelude::*;
use crate::{FormatNodeRule, PyFormatter};
@ -22,28 +21,33 @@ impl FormatNodeRule<WithItem> for FormatWithItem {
let comments = f.context().comments().clone();
let trailing_as_comments = comments.dangling_comments(item);
maybe_parenthesize_expression(context_expr, item, Parenthesize::IfRequired).fmt(f)?;
write!(
f,
[maybe_parenthesize_expression(
context_expr,
item,
Parenthesize::IfRequired
)]
)?;
if let Some(optional_vars) = optional_vars {
write!(
f,
[space(), text("as"), trailing_comments(trailing_as_comments)]
)?;
let leading_var_comments = comments.leading_comments(optional_vars.as_ref());
if leading_var_comments.is_empty() {
write!(f, [space(), optional_vars.format()])?;
write!(f, [space(), text("as"), space()])?;
if trailing_as_comments.is_empty() {
write!(f, [optional_vars.format()])?;
} else {
write!(
f,
[
// Otherwise the comment would end up on the same line as the `as`
hard_line_break(),
leading_comments(leading_var_comments),
optional_vars.format()
]
[parenthesized(
"(",
&optional_vars.format().with_options(Parentheses::Never),
")",
)
.with_dangling_comments(trailing_as_comments)]
)?;
}
}
Ok(())
}

View file

@ -4,13 +4,15 @@ use ruff_python_ast::{Ranged, StmtWith};
use ruff_python_trivia::{SimpleTokenKind, SimpleTokenizer};
use ruff_text_size::TextRange;
use crate::builders::parenthesize_if_expands;
use crate::comments::SourceComment;
use crate::expression::parentheses::{
in_parentheses_only_soft_line_break_or_space, optional_parentheses, parenthesized,
};
use crate::other::commas;
use crate::prelude::*;
use crate::statement::clause::{clause_header, ClauseHeader};
use crate::FormatNodeRule;
use crate::{FormatNodeRule, PyFormatOptions};
#[derive(Default)]
pub struct FormatStmtWith;
@ -66,8 +68,8 @@ impl FormatNodeRule<StmtWith> for FormatStmtWith {
parenthesized("(", &joined, ")")
.with_dangling_comments(parenthesized_comments)
.fmt(f)?;
} else if are_with_items_parenthesized(item, f.context())? {
optional_parentheses(&format_with(|f| {
} else if should_parenthesize(item, f.options(), f.context())? {
parenthesize_if_expands(&format_with(|f| {
let mut joiner =
f.join_comma_separated(item.body.first().unwrap().start());
@ -81,6 +83,16 @@ impl FormatNodeRule<StmtWith> for FormatStmtWith {
joiner.finish()
}))
.fmt(f)?;
} else if let [item] = item.items.as_slice() {
// This is similar to `maybe_parenthesize_expression`, but we're not dealing with an
// expression here, it's a `WithItem`.
if comments.has_leading_comments(item)
|| comments.has_trailing_own_line_comments(item)
{
optional_parentheses(&item.format()).fmt(f)?;
} else {
item.format().fmt(f)?;
}
} else {
f.join_with(format_args![text(","), space()])
.entries(item.items.iter().formatted())
@ -105,14 +117,50 @@ impl FormatNodeRule<StmtWith> for FormatStmtWith {
}
}
fn are_with_items_parenthesized(with: &StmtWith, context: &PyFormatContext) -> FormatResult<bool> {
let first_with_item = with
.items
.first()
.ok_or(FormatError::syntax_error("Expected at least one with item"))?;
let before_first_with_item = TextRange::new(with.start(), first_with_item.start());
/// Returns `true` if the `with` items should be parenthesized, if at least one item expands.
///
/// Black parenthesizes `with` items if there's more than one item and they're already
/// parenthesized, _or_ there's a single item with a trailing comma.
fn should_parenthesize(
with: &StmtWith,
options: &PyFormatOptions,
context: &PyFormatContext,
) -> FormatResult<bool> {
if has_magic_trailing_comma(with, options, context) {
return Ok(true);
}
let mut tokenizer = SimpleTokenizer::new(context.source(), before_first_with_item)
if are_with_items_parenthesized(with, context)? {
return Ok(true);
}
Ok(false)
}
fn has_magic_trailing_comma(
with: &StmtWith,
options: &PyFormatOptions,
context: &PyFormatContext,
) -> bool {
let Some(last_item) = with.items.last() else {
return false;
};
commas::has_magic_trailing_comma(
TextRange::new(last_item.end(), with.end()),
options,
context,
)
}
fn are_with_items_parenthesized(with: &StmtWith, context: &PyFormatContext) -> FormatResult<bool> {
let [first_item, _, ..] = with.items.as_slice() else {
return Ok(false);
};
let before_first_item = TextRange::new(with.start(), first_item.start());
let mut tokenizer = SimpleTokenizer::new(context.source(), before_first_item)
.skip_trivia()
.skip_while(|t| t.kind() == SimpleTokenKind::Async);