Format all attribute dot comments manually (#6825)

## Summary

This PR modifies our formatting of comments around the `.` in an
attribute. Specifically, the goal here is to avoid _reordering_
comments, and the net effect is that we generally leave comments
where-they-are when dealing with comments between around the dot (which
you can also think of as comments between attributes).

All comments around the dot are now treated as dangling and formatted
manually, with the exception of end-of-line or parenthesized comments on
the value, like those marked as trailing here, which remain trailing:

```python
(
    (
        a # trailing end-of-line
        # trailing own-line
    ) # dangling before dot end-of-line
    .b # trailing end-of-line
)
```

Closes https://github.com/astral-sh/ruff/issues/6823.

## Test Plan

`cargo test`

Before:

| project      | similarity index |
|--------------|------------------|
| cpython      | 0.76050          |
| django       | 0.99820          |
| transformers | 0.99800          |
| twine        | 0.99876          |
| typeshed     | 0.99953          |
| warehouse    | 0.99615          |
| zulip        | 0.99729          |

After:

| project      | similarity index |
|--------------|------------------|
| cpython      | 0.76050          |
| django       | 0.99820   |
| transformers | 0.99800          |
| twine        | 0.99876          |
| typeshed     | 0.99953          |
| warehouse    | 0.99615          |
| zulip        | 0.99729          |
This commit is contained in:
Charlie Marsh 2023-08-24 23:50:56 -04:00 committed by GitHub
parent 6f23469e00
commit 474e8fbcd4
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 216 additions and 128 deletions

View file

@ -109,3 +109,19 @@ x6 = (
# regression: https://github.com/astral-sh/ruff/issues/6181
(#
()).a
(
(
a # trailing end-of-line
# trailing own-line
) # dangling before dot end-of-line
.b # trailing end-of-line
)
(
(
a
)
# dangling before dot own-line
.b
)

View file

@ -3,7 +3,9 @@ use std::cmp::Ordering;
use ruff_python_ast::node::AnyNodeRef;
use ruff_python_ast::whitespace::indentation;
use ruff_python_ast::{self as ast, Comprehension, Expr, MatchCase, Parameters, Ranged};
use ruff_python_trivia::{indentation_at_offset, SimpleToken, SimpleTokenKind, SimpleTokenizer};
use ruff_python_trivia::{
find_only_token_in_range, indentation_at_offset, SimpleToken, SimpleTokenKind, SimpleTokenizer,
};
use ruff_source_file::Locator;
use ruff_text_size::{TextLen, TextRange};
@ -177,7 +179,9 @@ fn handle_enclosed_comment<'a>(
AnyNodeRef::Comprehension(comprehension) => {
handle_comprehension_comment(comment, comprehension, locator)
}
AnyNodeRef::ExprAttribute(attribute) => handle_attribute_comment(comment, attribute),
AnyNodeRef::ExprAttribute(attribute) => {
handle_attribute_comment(comment, attribute, locator)
}
AnyNodeRef::ExprBinOp(binary_expression) => {
handle_trailing_binary_expression_left_or_operator_comment(
comment,
@ -354,7 +358,7 @@ fn is_first_statement_in_body(statement: AnyNodeRef, has_body: AnyNodeRef) -> bo
| AnyNodeRef::ExceptHandlerExceptHandler(ast::ExceptHandlerExceptHandler {
body, ..
})
| AnyNodeRef::MatchCase(ast::MatchCase { body, .. })
| AnyNodeRef::MatchCase(MatchCase { body, .. })
| AnyNodeRef::StmtFunctionDef(ast::StmtFunctionDef { body, .. })
| AnyNodeRef::StmtClassDef(ast::StmtClassDef { body, .. }) => {
are_same_optional(statement, body.first())
@ -973,47 +977,88 @@ fn handle_dict_unpacking_comment<'a>(
/// Own line comments coming after the node are always dangling comments
/// ```python
/// (
/// a
/// # trailing a comment
/// . # dangling comment
/// # or this
/// a # trailing comment on `a`
/// # dangling comment on the attribute
/// . # dangling comment on the attribute
/// # dangling comment on the attribute
/// b
/// )
/// ```
fn handle_attribute_comment<'a>(
comment: DecoratedComment<'a>,
attribute: &'a ast::ExprAttribute,
locator: &Locator,
) -> CommentPlacement<'a> {
if comment.preceding_node().is_none() {
// ```text
// ( value) . attr
// ^^^^ we're in this range
// ```
return CommentPlacement::leading(attribute.value.as_ref(), comment);
}
// ```text
// value . attr
// ^^^^^^^ we're in this range
// If the comment is parenthesized, use the parentheses to either attach it as a trailing
// comment on the value or a dangling comment on the attribute.
// For example, treat this as trailing:
// ```python
// (
// (
// value
// # comment
// )
// .attribute
// )
// ```
debug_assert!(
TextRange::new(attribute.value.end(), attribute.attr.start())
.contains(comment.slice().start())
);
if comment.line_position().is_end_of_line() {
// Attach as trailing comment to a. The specific placement is only relevant for fluent style
// ```python
// x322 = (
// a
// . # end-of-line dot comment 2
// b
// )
// ```
CommentPlacement::trailing(attribute.value.as_ref(), comment)
} else {
CommentPlacement::dangling(attribute, comment)
//
// However, treat this as dangling:
// ```python
// (
// (value)
// # comment
// .attribute
// )
// ```
if let Some(right_paren) = SimpleTokenizer::starts_at(attribute.value.end(), locator.contents())
.skip_trivia()
.take_while(|token| token.kind == SimpleTokenKind::RParen)
.last()
{
return if comment.start() < right_paren.start() {
CommentPlacement::trailing(attribute.value.as_ref(), comment)
} else {
CommentPlacement::dangling(comment.enclosing_node(), comment)
};
}
// If the comment precedes the `.`, treat it as trailing _if_ it's on the same line as the
// value. For example, treat this as trailing:
// ```python
// (
// value # comment
// .attribute
// )
// ```
//
// However, treat this as dangling:
// ```python
// (
// value
// # comment
// .attribute
// )
// ```
if comment.line_position().is_end_of_line() {
let dot_token = find_only_token_in_range(
TextRange::new(attribute.value.end(), attribute.attr.start()),
SimpleTokenKind::Dot,
locator.contents(),
);
if comment.end() < dot_token.start() {
return CommentPlacement::trailing(attribute.value.as_ref(), comment);
}
}
CommentPlacement::dangling(comment.enclosing_node(), comment)
}
/// Assign comments between `if` and `test` and `else` and `orelse` as leading to the respective
@ -1050,7 +1095,7 @@ fn handle_expr_if_comment<'a>(
let if_token = find_only_token_in_range(
TextRange::new(body.end(), test.start()),
SimpleTokenKind::If,
locator,
locator.contents(),
);
// Between `if` and `test`
if if_token.range.start() < comment.slice().start() && comment.slice().start() < test.start() {
@ -1060,7 +1105,7 @@ fn handle_expr_if_comment<'a>(
let else_token = find_only_token_in_range(
TextRange::new(test.end(), orelse.start()),
SimpleTokenKind::Else,
locator,
locator.contents(),
);
// Between `else` and `orelse`
if else_token.range.start() < comment.slice().start()
@ -1132,7 +1177,7 @@ fn handle_with_item_comment<'a>(
let as_token = find_only_token_in_range(
TextRange::new(context_expr.end(), optional_vars.start()),
SimpleTokenKind::As,
locator,
locator.contents(),
);
if comment.end() < as_token.start() {
@ -1231,7 +1276,7 @@ fn handle_named_expr_comment<'a>(
let colon_equal = find_only_token_in_range(
TextRange::new(target.end(), value.start()),
SimpleTokenKind::ColonEqual,
locator,
locator.contents(),
);
if comment.end() < colon_equal.start() {
@ -1244,23 +1289,6 @@ fn handle_named_expr_comment<'a>(
}
}
/// Looks for a token in the range that contains no other tokens except for parentheses outside
/// the expression ranges
fn find_only_token_in_range(
range: TextRange,
token_kind: SimpleTokenKind,
locator: &Locator,
) -> SimpleToken {
let mut tokens = SimpleTokenizer::new(locator.contents(), range)
.skip_trivia()
.skip_while(|token| token.kind == SimpleTokenKind::RParen);
let token = tokens.next().expect("Expected a token");
debug_assert_eq!(token.kind(), token_kind);
let mut tokens = tokens.skip_while(|token| token.kind == SimpleTokenKind::LParen);
debug_assert_eq!(tokens.next(), None);
token
}
/// Attach an end-of-line comment immediately following an open bracket as a dangling comment on
/// enclosing node.
///
@ -1433,7 +1461,7 @@ fn handle_comprehension_comment<'a>(
let in_token = find_only_token_in_range(
TextRange::new(comprehension.target.end(), comprehension.iter.start()),
SimpleTokenKind::In,
locator,
locator.contents(),
);
// Comments between the target and the `in`
@ -1496,7 +1524,7 @@ fn handle_comprehension_comment<'a>(
let if_token = find_only_token_in_range(
TextRange::new(last_end, if_node.start()),
SimpleTokenKind::If,
locator,
locator.contents(),
);
if is_own_line {
if last_end < comment.slice().start() && comment.slice().start() < if_token.start() {

View file

@ -1,8 +1,10 @@
use ruff_formatter::{write, FormatRuleWithOptions};
use ruff_python_ast::node::AnyNodeRef;
use ruff_python_ast::{Constant, Expr, ExprAttribute, ExprConstant};
use ruff_python_ast::{Constant, Expr, ExprAttribute, ExprConstant, Ranged};
use ruff_python_trivia::{find_only_token_in_range, SimpleTokenKind};
use ruff_text_size::TextRange;
use crate::comments::{leading_comments, trailing_comments, SourceComment};
use crate::comments::{dangling_comments, trailing_comments, SourceComment};
use crate::expression::parentheses::{
is_expression_parenthesized, NeedsParentheses, OptionalParentheses, Parentheses,
};
@ -44,13 +46,6 @@ impl FormatNodeRule<ExprAttribute> for FormatExprAttribute {
})
);
let comments = f.context().comments().clone();
let dangling_comments = comments.dangling(item);
let leading_attribute_comments_start = dangling_comments
.partition_point(|comment| comment.line_position().is_end_of_line());
let (trailing_dot_comments, leading_attribute_comments) =
dangling_comments.split_at(leading_attribute_comments_start);
if needs_parentheses {
value.format().with_options(Parentheses::Always).fmt(f)?;
} else if call_chain_layout == CallChainLayout::Fluent {
@ -88,55 +83,54 @@ impl FormatNodeRule<ExprAttribute> for FormatExprAttribute {
value.format().fmt(f)?;
}
if comments.has_trailing_own_line(value.as_ref()) {
hard_line_break().fmt(f)?;
}
if call_chain_layout == CallChainLayout::Fluent {
// Fluent style has line breaks before the dot
// ```python
// blogs3 = (
// Blog.objects.filter(
// entry__headline__contains="Lennon",
// )
// .filter(
// entry__pub_date__year=2008,
// )
// .filter(
// entry__pub_date__year=2008,
// )
// )
// ```
write!(
f,
[
(!leading_attribute_comments.is_empty()).then_some(hard_line_break()),
leading_comments(leading_attribute_comments),
text("."),
trailing_comments(trailing_dot_comments),
attr.format()
]
)
// Identify dangling comments before and after the dot:
// ```python
// (
// (
// a
// ) # `before_dot_end_of_line`
// # `before_dot_own_line`
// . # `after_dot_end_of_line`
// # `after_dot_own_line`
// b
// )
// ```
let comments = f.context().comments().clone();
let dangling = comments.dangling(item);
let (before_dot, after_dot) = if dangling.is_empty() {
(dangling, dangling)
} else {
// Regular style
// ```python
// blogs2 = Blog.objects.filter(
// entry__headline__contains="Lennon",
// ).filter(
// entry__pub_date__year=2008,
// )
// ```
write!(
f,
[
text("."),
trailing_comments(trailing_dot_comments),
(!leading_attribute_comments.is_empty()).then_some(hard_line_break()),
leading_comments(leading_attribute_comments),
attr.format()
]
let dot_token = find_only_token_in_range(
TextRange::new(item.value.end(), item.attr.start()),
SimpleTokenKind::Dot,
f.context().source(),
);
dangling.split_at(
dangling.partition_point(|comment| comment.start() < dot_token.start()),
)
}
};
let (before_dot_end_of_line, before_dot_own_line) = before_dot.split_at(
before_dot.partition_point(|comment| comment.line_position().is_end_of_line()),
);
let (after_dot_end_of_line, after_dot_own_line) = after_dot.split_at(
after_dot.partition_point(|comment| comment.line_position().is_end_of_line()),
);
write!(
f,
[
trailing_comments(before_dot_end_of_line),
(!before_dot.is_empty()).then_some(hard_line_break()),
dangling_comments(before_dot_own_line),
text("."),
trailing_comments(after_dot_end_of_line),
(!after_dot.is_empty()).then_some(hard_line_break()),
dangling_comments(after_dot_own_line),
attr.format()
]
)
});
let is_call_chain_root = self.call_chain_layout == CallChainLayout::Default
@ -169,13 +163,7 @@ impl NeedsParentheses for ExprAttribute {
== CallChainLayout::Fluent
{
OptionalParentheses::Multiline
} else if context
.comments()
.dangling(self)
.iter()
.any(|comment| comment.line_position().is_own_line())
|| context.comments().has_trailing_own_line(self)
{
} else if context.comments().has_dangling(self) {
OptionalParentheses::Always
} else {
self.value.needs_parentheses(self.into(), context)

View file

@ -115,6 +115,22 @@ x6 = (
# regression: https://github.com/astral-sh/ruff/issues/6181
(#
()).a
(
(
a # trailing end-of-line
# trailing own-line
) # dangling before dot end-of-line
.b # trailing end-of-line
)
(
(
a
)
# dangling before dot own-line
.b
)
```
## Output
@ -124,27 +140,28 @@ from argparse import Namespace
a = Namespace()
(
a.
a
# comment
b # trailing comment
.b # trailing comment
)
(
a.
a
# comment
b # trailing dot comment # trailing identifier comment
.b # trailing dot comment # trailing identifier comment
)
(
a.
a
# comment
b # trailing identifier comment
.b # trailing identifier comment
)
(
a. # trailing dot comment
a
# comment
. # trailing dot comment
# in between
b # trailing identifier comment
)
@ -157,8 +174,9 @@ a.aaaaaaaaaaaaaaaaaaaaa.lllllllllllllllllllllllllllloooooooooong.chaaaaaaaaaaaaa
# chain if and only if we need them, that is if there are own line comments inside
# the chain.
x1 = (
a.b. # comment 2
a.b
# comment 1
. # comment 2
# comment 3
c.d
)
@ -169,27 +187,33 @@ x21 = (
a.b # trailing name end-of-line
)
x22 = (
a.
a
# outermost leading own line
b # outermost trailing end-of-line
.b # outermost trailing end-of-line
)
x31 = (
a.
a
# own line between nodes 1
.b
)
x321 = (
a. # end-of-line dot comment
b
)
x321 = a.b # end-of-line dot comment
x322 = a.b.c # end-of-line dot comment 2
x322 = (
a. # end-of-line dot comment 2
b.c
)
x331 = (
a.
# own line between nodes 3
b
)
x332 = (
"".
""
# own line between nodes
find
.find
)
x8 = (a + a).b
@ -207,6 +231,20 @@ x6 = (
( #
()
).a
(
(
a # trailing end-of-line
# trailing own-line
) # dangling before dot end-of-line
.b # trailing end-of-line
)
(
(a)
# dangling before dot own-line
.b
)
```

View file

@ -19,6 +19,24 @@ pub fn first_non_trivia_token(offset: TextSize, code: &str) -> Option<SimpleToke
.next()
}
/// Returns the only non-trivia, non-closing parenthesis token in `range`.
///
/// Includes debug assertions that the range only contains that single token.
pub fn find_only_token_in_range(
range: TextRange,
token_kind: SimpleTokenKind,
code: &str,
) -> SimpleToken {
let mut tokens = SimpleTokenizer::new(code, range)
.skip_trivia()
.skip_while(|token| token.kind == SimpleTokenKind::RParen);
let token = tokens.next().expect("Expected a token");
debug_assert_eq!(token.kind(), token_kind);
let mut tokens = tokens.skip_while(|token| token.kind == SimpleTokenKind::LParen);
debug_assert_eq!(tokens.next(), None);
token
}
/// Returns the number of newlines between `offset` and the first non whitespace character in the source code.
pub fn lines_before(offset: TextSize, code: &str) -> u32 {
let mut cursor = Cursor::new(&code[TextRange::up_to(offset)]);