Format all attribute dot comments manually (#6825)

## Summary This PR modifies our formatting of comments around the `.` in an attribute. Specifically, the goal here is to avoid _reordering_ comments, and the net effect is that we generally leave comments where-they-are when dealing with comments between around the dot (which you can also think of as comments between attributes). All comments around the dot are now treated as dangling and formatted manually, with the exception of end-of-line or parenthesized comments on the value, like those marked as trailing here, which remain trailing: ```python ( ( a # trailing end-of-line # trailing own-line ) # dangling before dot end-of-line .b # trailing end-of-line ) ``` Closes https://github.com/astral-sh/ruff/issues/6823. ## Test Plan `cargo test` Before: | project | similarity index | |--------------|------------------| | cpython | 0.76050 | | django | 0.99820 | | transformers | 0.99800 | | twine | 0.99876 | | typeshed | 0.99953 | | warehouse | 0.99615 | | zulip | 0.99729 | After: | project | similarity index | |--------------|------------------| | cpython | 0.76050 | | django | 0.99820 | | transformers | 0.99800 | | twine | 0.99876 | | typeshed | 0.99953 | | warehouse | 0.99615 | | zulip | 0.99729 |
2025-08-04 10:48:32 +00:00 · 2023-08-24 23:50:56 -04:00 · 2023-08-24 23:50:56 -04:00 · 474e8fbcd4
commit 474e8fbcd4
parent 6f23469e00
5 changed files with 216 additions and 128 deletions
--- a/crates/ruff_python_formatter/resources/test/fixtures/ruff/expression/attribute.py
+++ b/crates/ruff_python_formatter/resources/test/fixtures/ruff/expression/attribute.py
@ -109,3 +109,19 @@ x6 = (
 # regression: https://github.com/astral-sh/ruff/issues/6181
 (#
 ()).a
+
+(
+    (
+        a # trailing end-of-line
+        # trailing own-line
+    ) # dangling before dot end-of-line
+    .b # trailing end-of-line
+)
+
+(
+    (
+        a
+    )
+            # dangling before dot own-line
+    .b
+)
--- a/crates/ruff_python_formatter/src/comments/placement.rs
+++ b/crates/ruff_python_formatter/src/comments/placement.rs
@ -3,7 +3,9 @@ use std::cmp::Ordering;
 use ruff_python_ast::node::AnyNodeRef;
 use ruff_python_ast::whitespace::indentation;
 use ruff_python_ast::{self as ast, Comprehension, Expr, MatchCase, Parameters, Ranged};
-use ruff_python_trivia::{indentation_at_offset, SimpleToken, SimpleTokenKind, SimpleTokenizer};
+use ruff_python_trivia::{
+    find_only_token_in_range, indentation_at_offset, SimpleToken, SimpleTokenKind, SimpleTokenizer,
+};
 use ruff_source_file::Locator;
 use ruff_text_size::{TextLen, TextRange};

@ -177,7 +179,9 @@ fn handle_enclosed_comment<'a>(
        AnyNodeRef::Comprehension(comprehension) => {
            handle_comprehension_comment(comment, comprehension, locator)
        }
-        AnyNodeRef::ExprAttribute(attribute) => handle_attribute_comment(comment, attribute),
+        AnyNodeRef::ExprAttribute(attribute) => {
+            handle_attribute_comment(comment, attribute, locator)
+        }
        AnyNodeRef::ExprBinOp(binary_expression) => {
            handle_trailing_binary_expression_left_or_operator_comment(
                comment,
@ -354,7 +358,7 @@ fn is_first_statement_in_body(statement: AnyNodeRef, has_body: AnyNodeRef) -> bo
        | AnyNodeRef::ExceptHandlerExceptHandler(ast::ExceptHandlerExceptHandler {
            body, ..
        })
-        | AnyNodeRef::MatchCase(ast::MatchCase { body, .. })
+        | AnyNodeRef::MatchCase(MatchCase { body, .. })
        | AnyNodeRef::StmtFunctionDef(ast::StmtFunctionDef { body, .. })
        | AnyNodeRef::StmtClassDef(ast::StmtClassDef { body, .. }) => {
            are_same_optional(statement, body.first())
@ -973,47 +977,88 @@ fn handle_dict_unpacking_comment<'a>(
 /// Own line comments coming after the node are always dangling comments
 /// ```python
 /// (
-///      a
-///      # trailing a comment
-///      . # dangling comment
-///      # or this
+///      a  # trailing comment on `a`
+///      # dangling comment on the attribute
+///      . # dangling comment on the attribute
+///      # dangling comment on the attribute
 ///      b
 /// )
 /// ```
 fn handle_attribute_comment<'a>(
    comment: DecoratedComment<'a>,
    attribute: &'a ast::ExprAttribute,
+    locator: &Locator,
 ) -> CommentPlacement<'a> {
    if comment.preceding_node().is_none() {
        // ```text
        // (    value)   .   attr
        //  ^^^^ we're in this range
        // ```
-
        return CommentPlacement::leading(attribute.value.as_ref(), comment);
    }

-    // ```text
-    // value   .   attr
-    //      ^^^^^^^ we're in this range
+    // If the comment is parenthesized, use the parentheses to either attach it as a trailing
+    // comment on the value or a dangling comment on the attribute.
+    // For example, treat this as trailing:
+    // ```python
+    // (
+    //     (
+    //         value
+    //         # comment
+    //     )
+    //     .attribute
+    // )
    // ```
-    debug_assert!(
-        TextRange::new(attribute.value.end(), attribute.attr.start())
-            .contains(comment.slice().start())
-    );
-    if comment.line_position().is_end_of_line() {
-        // Attach as trailing comment to a. The specific placement is only relevant for fluent style
-        // ```python
-        // x322 = (
-        //     a
-        //     . # end-of-line dot comment 2
-        //     b
-        // )
-        // ```
-        CommentPlacement::trailing(attribute.value.as_ref(), comment)
-    } else {
-        CommentPlacement::dangling(attribute, comment)
+    //
+    // However, treat this as dangling:
+    // ```python
+    // (
+    //     (value)
+    //     # comment
+    //     .attribute
+    // )
+    // ```
+    if let Some(right_paren) = SimpleTokenizer::starts_at(attribute.value.end(), locator.contents())
+        .skip_trivia()
+        .take_while(|token| token.kind == SimpleTokenKind::RParen)
+        .last()
+    {
+        return if comment.start() < right_paren.start() {
+            CommentPlacement::trailing(attribute.value.as_ref(), comment)
+        } else {
+            CommentPlacement::dangling(comment.enclosing_node(), comment)
+        };
    }
+
+    // If the comment precedes the `.`, treat it as trailing _if_ it's on the same line as the
+    // value. For example, treat this as trailing:
+    // ```python
+    // (
+    //     value  # comment
+    //     .attribute
+    // )
+    // ```
+    //
+    // However, treat this as dangling:
+    // ```python
+    // (
+    //     value
+    //     # comment
+    //     .attribute
+    // )
+    // ```
+    if comment.line_position().is_end_of_line() {
+        let dot_token = find_only_token_in_range(
+            TextRange::new(attribute.value.end(), attribute.attr.start()),
+            SimpleTokenKind::Dot,
+            locator.contents(),
+        );
+        if comment.end() < dot_token.start() {
+            return CommentPlacement::trailing(attribute.value.as_ref(), comment);
+        }
+    }
+
+    CommentPlacement::dangling(comment.enclosing_node(), comment)
 }

 /// Assign comments between `if` and `test` and `else` and `orelse` as leading to the respective
@ -1050,7 +1095,7 @@ fn handle_expr_if_comment<'a>(
    let if_token = find_only_token_in_range(
        TextRange::new(body.end(), test.start()),
        SimpleTokenKind::If,
-        locator,
+        locator.contents(),
    );
    // Between `if` and `test`
    if if_token.range.start() < comment.slice().start() && comment.slice().start() < test.start() {
@ -1060,7 +1105,7 @@ fn handle_expr_if_comment<'a>(
    let else_token = find_only_token_in_range(
        TextRange::new(test.end(), orelse.start()),
        SimpleTokenKind::Else,
-        locator,
+        locator.contents(),
    );
    // Between `else` and `orelse`
    if else_token.range.start() < comment.slice().start()
@ -1132,7 +1177,7 @@ fn handle_with_item_comment<'a>(
    let as_token = find_only_token_in_range(
        TextRange::new(context_expr.end(), optional_vars.start()),
        SimpleTokenKind::As,
-        locator,
+        locator.contents(),
    );

    if comment.end() < as_token.start() {
@ -1231,7 +1276,7 @@ fn handle_named_expr_comment<'a>(
    let colon_equal = find_only_token_in_range(
        TextRange::new(target.end(), value.start()),
        SimpleTokenKind::ColonEqual,
-        locator,
+        locator.contents(),
    );

    if comment.end() < colon_equal.start() {
@ -1244,23 +1289,6 @@ fn handle_named_expr_comment<'a>(
    }
 }

-/// Looks for a token in the range that contains no other tokens except for parentheses outside
-/// the expression ranges
-fn find_only_token_in_range(
-    range: TextRange,
-    token_kind: SimpleTokenKind,
-    locator: &Locator,
-) -> SimpleToken {
-    let mut tokens = SimpleTokenizer::new(locator.contents(), range)
-        .skip_trivia()
-        .skip_while(|token| token.kind == SimpleTokenKind::RParen);
-    let token = tokens.next().expect("Expected a token");
-    debug_assert_eq!(token.kind(), token_kind);
-    let mut tokens = tokens.skip_while(|token| token.kind == SimpleTokenKind::LParen);
-    debug_assert_eq!(tokens.next(), None);
-    token
-}
-
 /// Attach an end-of-line comment immediately following an open bracket as a dangling comment on
 /// enclosing node.
 ///
@ -1433,7 +1461,7 @@ fn handle_comprehension_comment<'a>(
    let in_token = find_only_token_in_range(
        TextRange::new(comprehension.target.end(), comprehension.iter.start()),
        SimpleTokenKind::In,
-        locator,
+        locator.contents(),
    );

    // Comments between the target and the `in`
@ -1496,7 +1524,7 @@ fn handle_comprehension_comment<'a>(
        let if_token = find_only_token_in_range(
            TextRange::new(last_end, if_node.start()),
            SimpleTokenKind::If,
-            locator,
+            locator.contents(),
        );
        if is_own_line {
            if last_end < comment.slice().start() && comment.slice().start() < if_token.start() {
--- a/crates/ruff_python_formatter/src/expression/expr_attribute.rs
+++ b/crates/ruff_python_formatter/src/expression/expr_attribute.rs
@ -1,8 +1,10 @@
 use ruff_formatter::{write, FormatRuleWithOptions};
 use ruff_python_ast::node::AnyNodeRef;
-use ruff_python_ast::{Constant, Expr, ExprAttribute, ExprConstant};
+use ruff_python_ast::{Constant, Expr, ExprAttribute, ExprConstant, Ranged};
+use ruff_python_trivia::{find_only_token_in_range, SimpleTokenKind};
+use ruff_text_size::TextRange;

-use crate::comments::{leading_comments, trailing_comments, SourceComment};
+use crate::comments::{dangling_comments, trailing_comments, SourceComment};
 use crate::expression::parentheses::{
    is_expression_parenthesized, NeedsParentheses, OptionalParentheses, Parentheses,
 };
@ -44,13 +46,6 @@ impl FormatNodeRule<ExprAttribute> for FormatExprAttribute {
                })
            );

-            let comments = f.context().comments().clone();
-            let dangling_comments = comments.dangling(item);
-            let leading_attribute_comments_start = dangling_comments
-                .partition_point(|comment| comment.line_position().is_end_of_line());
-            let (trailing_dot_comments, leading_attribute_comments) =
-                dangling_comments.split_at(leading_attribute_comments_start);
-
            if needs_parentheses {
                value.format().with_options(Parentheses::Always).fmt(f)?;
            } else if call_chain_layout == CallChainLayout::Fluent {
@ -88,55 +83,54 @@ impl FormatNodeRule<ExprAttribute> for FormatExprAttribute {
                value.format().fmt(f)?;
            }

-            if comments.has_trailing_own_line(value.as_ref()) {
-                hard_line_break().fmt(f)?;
-            }
-
-            if call_chain_layout == CallChainLayout::Fluent {
-                // Fluent style has line breaks before the dot
-                // ```python
-                // blogs3 = (
-                //     Blog.objects.filter(
-                //         entry__headline__contains="Lennon",
-                //     )
-                //     .filter(
-                //         entry__pub_date__year=2008,
-                //     )
-                //     .filter(
-                //         entry__pub_date__year=2008,
-                //     )
-                // )
-                // ```
-                write!(
-                    f,
-                    [
-                        (!leading_attribute_comments.is_empty()).then_some(hard_line_break()),
-                        leading_comments(leading_attribute_comments),
-                        text("."),
-                        trailing_comments(trailing_dot_comments),
-                        attr.format()
-                    ]
-                )
+            // Identify dangling comments before and after the dot:
+            // ```python
+            // (
+            //      (
+            //          a
+            //      )  # `before_dot_end_of_line`
+            //      # `before_dot_own_line`
+            //      .  # `after_dot_end_of_line`
+            //      # `after_dot_own_line`
+            //      b
+            // )
+            // ```
+            let comments = f.context().comments().clone();
+            let dangling = comments.dangling(item);
+            let (before_dot, after_dot) = if dangling.is_empty() {
+                (dangling, dangling)
            } else {
-                // Regular style
-                // ```python
-                // blogs2 = Blog.objects.filter(
-                //     entry__headline__contains="Lennon",
-                // ).filter(
-                //     entry__pub_date__year=2008,
-                // )
-                // ```
-                write!(
-                    f,
-                    [
-                        text("."),
-                        trailing_comments(trailing_dot_comments),
-                        (!leading_attribute_comments.is_empty()).then_some(hard_line_break()),
-                        leading_comments(leading_attribute_comments),
-                        attr.format()
-                    ]
+                let dot_token = find_only_token_in_range(
+                    TextRange::new(item.value.end(), item.attr.start()),
+                    SimpleTokenKind::Dot,
+                    f.context().source(),
+                );
+                dangling.split_at(
+                    dangling.partition_point(|comment| comment.start() < dot_token.start()),
                )
-            }
+            };
+
+            let (before_dot_end_of_line, before_dot_own_line) = before_dot.split_at(
+                before_dot.partition_point(|comment| comment.line_position().is_end_of_line()),
+            );
+
+            let (after_dot_end_of_line, after_dot_own_line) = after_dot.split_at(
+                after_dot.partition_point(|comment| comment.line_position().is_end_of_line()),
+            );
+
+            write!(
+                f,
+                [
+                    trailing_comments(before_dot_end_of_line),
+                    (!before_dot.is_empty()).then_some(hard_line_break()),
+                    dangling_comments(before_dot_own_line),
+                    text("."),
+                    trailing_comments(after_dot_end_of_line),
+                    (!after_dot.is_empty()).then_some(hard_line_break()),
+                    dangling_comments(after_dot_own_line),
+                    attr.format()
+                ]
+            )
        });

        let is_call_chain_root = self.call_chain_layout == CallChainLayout::Default
@ -169,13 +163,7 @@ impl NeedsParentheses for ExprAttribute {
            == CallChainLayout::Fluent
        {
            OptionalParentheses::Multiline
-        } else if context
-            .comments()
-            .dangling(self)
-            .iter()
-            .any(|comment| comment.line_position().is_own_line())
-            || context.comments().has_trailing_own_line(self)
-        {
+        } else if context.comments().has_dangling(self) {
            OptionalParentheses::Always
        } else {
            self.value.needs_parentheses(self.into(), context)
--- a/crates/ruff_python_formatter/tests/snapshots/format@expression__attribute.py.snap
+++ b/crates/ruff_python_formatter/tests/snapshots/format@expression__attribute.py.snap
@ -115,6 +115,22 @@ x6 = (
 # regression: https://github.com/astral-sh/ruff/issues/6181
 (#
 ()).a
+
+(
+    (
+        a # trailing end-of-line
+        # trailing own-line
+    ) # dangling before dot end-of-line
+    .b # trailing end-of-line
+)
+
+(
+    (
+        a
+    )
+            # dangling before dot own-line
+    .b
+)
 ```

 ## Output
@ -124,27 +140,28 @@ from argparse import Namespace
 a = Namespace()

 (
-    a.
+    a
    # comment
-    b  # trailing comment
+    .b  # trailing comment
 )

 (
-    a.
+    a
    # comment
-    b  # trailing dot comment  # trailing identifier comment
+    .b  # trailing dot comment  # trailing identifier comment
 )

 (
-    a.
+    a
    # comment
-    b  # trailing identifier comment
+    .b  # trailing identifier comment
 )


 (
-    a.  # trailing dot comment
+    a
    # comment
+    .  # trailing dot comment
    # in between
    b  # trailing identifier comment
 )
@ -157,8 +174,9 @@ a.aaaaaaaaaaaaaaaaaaaaa.lllllllllllllllllllllllllllloooooooooong.chaaaaaaaaaaaaa
 # chain if and only if we need them, that is if there are own line comments inside
 # the chain.
 x1 = (
-    a.b.  # comment 2
+    a.b
    # comment 1
+    .  # comment 2
    # comment 3
    c.d
 )
@ -169,27 +187,33 @@ x21 = (
    a.b  # trailing name end-of-line
 )
 x22 = (
-    a.
+    a
    # outermost leading own line
-    b  # outermost trailing end-of-line
+    .b  # outermost trailing end-of-line
 )

 x31 = (
-    a.
+    a
    # own line between nodes 1
+    .b
+)
+x321 = (
+    a.  # end-of-line dot comment
    b
 )
-x321 = a.b  # end-of-line dot comment
-x322 = a.b.c  # end-of-line dot comment 2
+x322 = (
+    a.  # end-of-line dot comment 2
+    b.c
+)
 x331 = (
    a.
    # own line between nodes 3
    b
 )
 x332 = (
-    "".
+    ""
    # own line between nodes
-    find
+    .find
 )

 x8 = (a + a).b
@ -207,6 +231,20 @@ x6 = (
 (  #
    ()
 ).a
+
+(
+    (
+        a  # trailing end-of-line
+        # trailing own-line
+    )  # dangling before dot end-of-line
+    .b  # trailing end-of-line
+)
+
+(
+    (a)
+    # dangling before dot own-line
+    .b
+)
 ```


--- a/crates/ruff_python_trivia/src/tokenizer.rs
+++ b/crates/ruff_python_trivia/src/tokenizer.rs
@ -19,6 +19,24 @@ pub fn first_non_trivia_token(offset: TextSize, code: &str) -> Option<SimpleToke
        .next()
 }

+/// Returns the only non-trivia, non-closing parenthesis token in `range`.
+///
+/// Includes debug assertions that the range only contains that single token.
+pub fn find_only_token_in_range(
+    range: TextRange,
+    token_kind: SimpleTokenKind,
+    code: &str,
+) -> SimpleToken {
+    let mut tokens = SimpleTokenizer::new(code, range)
+        .skip_trivia()
+        .skip_while(|token| token.kind == SimpleTokenKind::RParen);
+    let token = tokens.next().expect("Expected a token");
+    debug_assert_eq!(token.kind(), token_kind);
+    let mut tokens = tokens.skip_while(|token| token.kind == SimpleTokenKind::LParen);
+    debug_assert_eq!(tokens.next(), None);
+    token
+}
+
 /// Returns the number of newlines between `offset` and the first non whitespace character in the source code.
 pub fn lines_before(offset: TextSize, code: &str) -> u32 {
    let mut cursor = Cursor::new(&code[TextRange::up_to(offset)]);