Parenthesize with statements (#5758)

<!--
Thank you for contributing to Ruff! To help us out with reviewing,
please consider the following:

- Does this pull request include a summary of the change? (See below.)
- Does this pull request include a descriptive title?
- Does this pull request include references to any relevant issues?
-->

## Summary

This PR improves the parentheses handling for with items to get closer
to black's formatting.

### Case 1:

```python
# Black / Input
with (
    [
        "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
        "bbbbbbbbbb",
        "cccccccccccccccccccccccccccccccccccccccccc",
        dddddddddddddddddddddddddddddddd,
    ] as example1,
    aaaaaaaaaaaaaaaaaaaaaaaaaa
    + bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb
    + cccccccccccccccccccccccccccc
    + ddddddddddddddddd as example2,
    CtxManager2() as example2,
    CtxManager2() as example2,
    CtxManager2() as example2,
):
    ...

# Before
with (
    [
        "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
        "bbbbbbbbbb",
        "cccccccccccccccccccccccccccccccccccccccccc",
        dddddddddddddddddddddddddddddddd,
    ] as example1,
    (
        aaaaaaaaaaaaaaaaaaaaaaaaaa
        + bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb
        + cccccccccccccccccccccccccccc
        + ddddddddddddddddd
    ) as example2,
    CtxManager2() as example2,
    CtxManager2() as example2,
    CtxManager2() as example2,
):
    ...
```

Notice how Ruff wraps the binary expression in an extra set of
parentheses


### Case 2:
Black does not expand the with-items if the with has no parentheses:

```python
# Black / Input
with aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa + bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb as c:
    ...

# Before
with (
    aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa + bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb as c
):
    ...
```

Or 

```python
# Black / Input
with [
    "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
    "bbbbbbbbbb",
    "cccccccccccccccccccccccccccccccccccccccccc",
    dddddddddddddddddddddddddddddddd,
] as example1, aaaaaaaaaaaaaaaaaaaaaaaaaa * bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb * cccccccccccccccccccccccccccc + ddddddddddddddddd as example2, CtxManager222222222222222() as example2:
    ...

# Before (Same as Case 1)
with (
    [
        "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
        "bbbbbbbbbb",
        "cccccccccccccccccccccccccccccccccccccccccc",
        dddddddddddddddddddddddddddddddd,
    ] as example1,
    (
        aaaaaaaaaaaaaaaaaaaaaaaaaa
        * bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb
        * cccccccccccccccccccccccccccc
        + ddddddddddddddddd
    ) as example2,
    CtxManager222222222222222() as example2,
):
    ...

```
## Test Plan

I added new snapshot tests

Improves the django similarity index from 0.973 to 0.977
This commit is contained in:
Micha Reiser 2023-07-15 17:03:09 +02:00 committed by GitHub
parent e1c119fde3
commit 3cda89ecaf
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
13 changed files with 443 additions and 141 deletions

View file

@ -227,10 +227,23 @@ impl<'fmt, 'ast, 'buf> JoinCommaSeparatedBuilder<'fmt, 'ast, 'buf> {
) -> &mut Self
where
T: Ranged,
{
self.entry_with_line_separator(node, content, soft_line_break_or_space())
}
pub(crate) fn entry_with_line_separator<N, Separator>(
&mut self,
node: &N,
content: &dyn Format<PyFormatContext<'ast>>,
separator: Separator,
) -> &mut Self
where
N: Ranged,
Separator: Format<PyFormatContext<'ast>>,
{
self.result = self.result.and_then(|_| {
if self.end_of_last_entry.is_some() {
write!(self.fmt, [text(","), soft_line_break_or_space()])?;
write!(self.fmt, [text(","), separator])?;
}
self.end_of_last_entry = Some(node.end());

View file

@ -40,6 +40,7 @@ pub(super) fn place_comment<'a>(
handle_expr_if_comment,
handle_comprehension_comment,
handle_trailing_expression_starred_star_end_of_line_comment,
handle_with_item_comment,
];
for handler in HANDLERS {
comment = match handler(comment, locator) {
@ -1232,6 +1233,50 @@ fn handle_trailing_expression_starred_star_end_of_line_comment<'a>(
CommentPlacement::leading(starred.as_any_node_ref(), comment)
}
/// Handles trailing own line comments before the `as` keyword of a with item and
/// end of line comments that are on the same line as the `as` keyword:
///
/// ```python
/// with (
/// a
/// # trailing a own line comment
/// as # trailing as same line comment
/// b
// ): ...
/// ```
fn handle_with_item_comment<'a>(
comment: DecoratedComment<'a>,
locator: &Locator,
) -> CommentPlacement<'a> {
if !comment.enclosing_node().is_with_item() {
return CommentPlacement::Default(comment);
}
// Needs to be a with item with an `as` expression.
let (Some(context_expr), Some(optional_vars)) =
(comment.preceding_node(), comment.following_node())
else {
return CommentPlacement::Default(comment);
};
let as_token = find_only_token_in_range(
TextRange::new(context_expr.end(), optional_vars.start()),
locator,
TokenKind::As,
);
// If before the `as` keyword, then it must be a trailing comment of the context expression.
if comment.end() < as_token.start() {
CommentPlacement::trailing(context_expr, comment)
}
// Trailing end of line comment coming after the `as` keyword`.
else if comment.line_position().is_end_of_line() {
CommentPlacement::dangling(comment.enclosing_node(), comment)
} else {
CommentPlacement::Default(comment)
}
}
/// Looks for a token in the range that contains no other tokens except for parentheses outside
/// the expression ranges
fn find_only_token_in_range(range: TextRange, locator: &Locator, token_kind: TokenKind) -> Token {

View file

@ -15,24 +15,27 @@ impl FormatNodeRule<ExprAwait> for FormatExprAwait {
fn fmt_fields(&self, item: &ExprAwait, f: &mut PyFormatter) -> FormatResult<()> {
let ExprAwait { range: _, value } = item;
let format_value = format_with(|f: &mut PyFormatter| {
if f.context().node_level().is_parenthesized() {
value.format().fmt(f)
} else {
maybe_parenthesize_expression(value, item, Parenthesize::Optional).fmt(f)
}
});
write!(f, [text("await"), space(), format_value])
write!(
f,
[
text("await"),
space(),
maybe_parenthesize_expression(value, item, Parenthesize::IfRequired)
]
)
}
}
impl NeedsParentheses for ExprAwait {
fn needs_parentheses(
&self,
_parent: AnyNodeRef,
parent: AnyNodeRef,
_context: &PyFormatContext,
) -> OptionalParentheses {
OptionalParentheses::Multiline
if parent.is_expr_await() {
OptionalParentheses::Always
} else {
OptionalParentheses::Multiline
}
}
}

View file

@ -176,9 +176,13 @@ impl FormatRule<Operator, PyFormatContext<'_>> for FormatOperator {
impl NeedsParentheses for ExprBinOp {
fn needs_parentheses(
&self,
_parent: AnyNodeRef,
parent: AnyNodeRef,
_context: &PyFormatContext,
) -> OptionalParentheses {
OptionalParentheses::Multiline
if parent.is_expr_await() && !self.op.is_pow() {
OptionalParentheses::Always
} else {
OptionalParentheses::Multiline
}
}
}

View file

@ -159,22 +159,31 @@ impl Format<PyFormatContext<'_>> for MaybeParenthesizeExpression<'_> {
parenthesize,
} = self;
let parenthesize = match parenthesize {
Parenthesize::Optional => {
is_expression_parenthesized(AnyNodeRef::from(*expression), f.context().source())
let comments = f.context().comments();
let preserve_parentheses = parenthesize.is_optional()
&& is_expression_parenthesized(AnyNodeRef::from(*expression), f.context().source());
let has_comments = comments.has_leading_comments(*expression)
|| comments.has_trailing_own_line_comments(*expression);
if preserve_parentheses || has_comments {
return expression.format().with_options(Parentheses::Always).fmt(f);
}
let needs_parentheses = expression.needs_parentheses(*parent, f.context());
let needs_parentheses = match parenthesize {
Parenthesize::IfRequired => {
if !needs_parentheses.is_always() && f.context().node_level().is_parenthesized() {
OptionalParentheses::Never
} else {
needs_parentheses
}
}
Parenthesize::IfBreaks => false,
Parenthesize::Optional | Parenthesize::IfBreaks => needs_parentheses,
};
let parentheses =
if parenthesize || f.context().comments().has_leading_comments(*expression) {
OptionalParentheses::Always
} else {
expression.needs_parentheses(*parent, f.context())
};
match parentheses {
OptionalParentheses::Multiline => {
match needs_parentheses {
OptionalParentheses::Multiline if *parenthesize != Parenthesize::IfRequired => {
if can_omit_optional_parentheses(expression, f.context()) {
optional_parentheses(&expression.format().with_options(Parentheses::Never))
.fmt(f)
@ -186,7 +195,7 @@ impl Format<PyFormatContext<'_>> for MaybeParenthesizeExpression<'_> {
OptionalParentheses::Always => {
expression.format().with_options(Parentheses::Always).fmt(f)
}
OptionalParentheses::Never => {
OptionalParentheses::Never | OptionalParentheses::Multiline => {
expression.format().with_options(Parentheses::Never).fmt(f)
}
}

View file

@ -19,6 +19,12 @@ pub(crate) enum OptionalParentheses {
Never,
}
impl OptionalParentheses {
pub(crate) const fn is_always(self) -> bool {
matches!(self, OptionalParentheses::Always)
}
}
pub(crate) trait NeedsParentheses {
/// Determines if this object needs optional parentheses or if it is safe to omit the parentheses.
fn needs_parentheses(
@ -36,6 +42,17 @@ pub(crate) enum Parenthesize {
/// Parenthesizes the expression only if it doesn't fit on a line.
IfBreaks,
/// Only adds parentheses if absolutely necessary:
/// * The expression is not enclosed by another parenthesized expression and it expands over multiple lines
/// * The expression has leading or trailing comments. Adding parentheses is desired to prevent the comments from wandering.
IfRequired,
}
impl Parenthesize {
pub(crate) const fn is_optional(self) -> bool {
matches!(self, Parenthesize::Optional)
}
}
/// Whether it is necessary to add parentheses around an expression.

View file

@ -280,15 +280,30 @@ if True:
#[test]
fn quick_test() {
let src = r#"
if a * [
bbbbbbbbbbbbbbbbbbbbbb,
cccccccccccccccccccccccccccccdddddddddddddddddddddddddd,
] + a * e * [
ffff,
gggg,
hhhhhhhhhhhhhh,
] * c:
pass
with (
[
"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
"bbbbbbbbbb",
"cccccccccccccccccccccccccccccccccccccccccc",
dddddddddddddddddddddddddddddddd,
] as example1,
aaaaaaaaaaaaaaaaaaaaaaaaaa
+ bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb
+ cccccccccccccccccccccccccccc
+ ddddddddddddddddd as example2,
CtxManager2() as example2,
CtxManager2() as example2,
CtxManager2() as example2,
):
...
with [
"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
"bbbbbbbbbb",
"cccccccccccccccccccccccccccccccccccccccccc",
dddddddddddddddddddddddddddddddd,
] as example1, aaaaaaaaaaaaaaaaaaaaaaaaaa * bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb * cccccccccccccccccccccccccccc + ddddddddddddddddd as example2, CtxManager222222222222222() as example2:
...
"#;
// Tokenize once

View file

@ -1,9 +1,12 @@
use rustpython_parser::ast::WithItem;
use ruff_formatter::{write, Buffer, FormatResult};
use crate::comments::trailing_comments;
use crate::expression::maybe_parenthesize_expression;
use crate::expression::parentheses::Parenthesize;
use crate::prelude::*;
use crate::{FormatNodeRule, PyFormatter};
use ruff_formatter::{write, Buffer, FormatResult};
use rustpython_parser::ast::WithItem;
#[derive(Default)]
pub struct FormatWithItem;
@ -16,20 +19,27 @@ impl FormatNodeRule<WithItem> for FormatWithItem {
optional_vars,
} = item;
let inner = format_with(|f| {
let comments = f.context().comments().clone();
let trailing_as_comments = comments.dangling_comments(item);
maybe_parenthesize_expression(context_expr, item, Parenthesize::IfRequired).fmt(f)?;
if let Some(optional_vars) = optional_vars {
write!(
f,
[maybe_parenthesize_expression(
context_expr,
item,
Parenthesize::IfBreaks
)]
[
space(),
text("as"),
trailing_comments(trailing_as_comments),
space(),
optional_vars.format(),
]
)?;
if let Some(optional_vars) = optional_vars {
write!(f, [space(), text("as"), space(), optional_vars.format()])?;
}
Ok(())
});
write!(f, [group(&inner)])
}
Ok(())
}
fn fmt_dangling_comments(&self, _node: &WithItem, _f: &mut PyFormatter) -> FormatResult<()> {
Ok(())
}
}

View file

@ -1,11 +1,15 @@
use ruff_formatter::{write, Buffer, FormatResult};
use ruff_python_ast::node::AnyNodeRef;
use ruff_text_size::TextRange;
use rustpython_parser::ast::{Ranged, StmtAsyncWith, StmtWith, Suite, WithItem};
use crate::builders::parenthesize_if_expands;
use ruff_formatter::{format_args, write, FormatError};
use ruff_python_ast::node::AnyNodeRef;
use crate::comments::trailing_comments;
use crate::expression::parentheses::{
in_parentheses_only_soft_line_break_or_space, optional_parentheses,
};
use crate::prelude::*;
use crate::trivia::{SimpleTokenizer, TokenKind};
use crate::FormatNodeRule;
pub(super) enum AnyStatementWith<'a> {
@ -68,22 +72,39 @@ impl Format<PyFormatContext<'_>> for AnyStatementWith<'_> {
let comments = f.context().comments().clone();
let dangling_comments = comments.dangling_comments(self);
let joined_items = format_with(|f| {
f.join_comma_separated(self.body().first().unwrap().start())
.nodes(self.items().iter())
.finish()
});
write!(
f,
[
self.is_async()
.then_some(format_args![text("async"), space()]),
text("with"),
space()
]
)?;
if self.is_async() {
write!(f, [text("async"), space()])?;
if are_with_items_parenthesized(self, f.context())? {
optional_parentheses(&format_with(|f| {
let mut joiner = f.join_comma_separated(self.body().first().unwrap().start());
for item in self.items() {
joiner.entry_with_line_separator(
item,
&item.format(),
in_parentheses_only_soft_line_break_or_space(),
);
}
joiner.finish()
}))
.fmt(f)?;
} else {
f.join_with(format_args![text(","), space()])
.entries(self.items().iter().formatted())
.finish()?;
}
write!(
f,
[
text("with"),
space(),
group(&parenthesize_if_expands(&joined_items)),
text(":"),
trailing_comments(dangling_comments),
block_indent(&self.body().format())
@ -92,6 +113,34 @@ impl Format<PyFormatContext<'_>> for AnyStatementWith<'_> {
}
}
fn are_with_items_parenthesized(
with: &AnyStatementWith,
context: &PyFormatContext,
) -> FormatResult<bool> {
let first_with_item = with.items().first().ok_or(FormatError::SyntaxError)?;
let before_first_with_item = TextRange::new(with.start(), first_with_item.start());
let mut tokenizer = SimpleTokenizer::new(context.source(), before_first_with_item)
.skip_trivia()
.skip_while(|t| t.kind() == TokenKind::Async);
let with_keyword = tokenizer.next().ok_or(FormatError::SyntaxError)?;
debug_assert_eq!(
with_keyword.kind(),
TokenKind::With,
"Expected with keyword but at {with_keyword:?}"
);
match tokenizer.next() {
Some(left_paren) => {
debug_assert_eq!(left_paren.kind(), TokenKind::LParen);
Ok(true)
}
None => Ok(false),
}
}
#[derive(Default)]
pub struct FormatStmtWith;

View file

@ -193,9 +193,18 @@ pub(crate) enum TokenKind {
/// `in`
In,
/// `as`
As,
/// `match`
Match,
/// `with`
With,
/// `async`
Async,
/// Any other non trivia token.
Other,
@ -272,10 +281,13 @@ impl<'a> SimpleTokenizer<'a> {
fn to_keyword_or_other(&self, range: TextRange) -> TokenKind {
let source = &self.source[range];
match source {
"if" => TokenKind::If,
"as" => TokenKind::As,
"async" => TokenKind::Async,
"else" => TokenKind::Else,
"if" => TokenKind::If,
"in" => TokenKind::In,
"match" => TokenKind::Match, // Match is a soft keyword that depends on the context but we can always lex it as a keyword and leave it to the caller (parser) to decide if it should be handled as an identifier or keyword.
"with" => TokenKind::With,
// ...,
_ => TokenKind::Other, // Potentially an identifier, but only if it isn't a string prefix. We can ignore this for now https://docs.python.org/3/reference/lexical_analysis.html#string-and-bytes-literals
}