Add dangling comment handling to dictionary key-value pairs (#7495)

## Summary This PR fixes a formatting instability by changing the comment handling around the `:` in a dictionary to mirror that of the `:` in a lambda: we treat comments around the `:` as dangling, then format them after the `:`. Closes https://github.com/astral-sh/ruff/issues/7458. ## Test Plan `cargo test` No change in similarity. Before: | project | similarity index | total files | changed files | |--------------|------------------:|------------------:|------------------:| | cpython | 0.76083 | 1789 | 1631 | | django | 0.99983 | 2760 | 36 | | transformers | 0.99956 | 2587 | 404 | | twine | 1.00000 | 33 | 0 | | typeshed | 0.99983 | 3496 | 18 | | warehouse | 0.99929 | 648 | 16 | | zulip | 0.99969 | 1437 | 21 | After: | project | similarity index | total files | changed files | |--------------|------------------:|------------------:|------------------:| | cpython | 0.76083 | 1789 | 1631 | | django | 0.99983 | 2760 | 36 | | transformers | 0.99956 | 2587 | 404 | | twine | 1.00000 | 33 | 0 | | typeshed | 0.99983 | 3496 | 18 | | warehouse | 0.99929 | 648 | 16 | | zulip | 0.99969 | 1437 | 21 |
2025-12-09 19:29:19 +00:00 · 2023-09-19 15:17:21 -04:00 · 2023-09-19 15:17:21 -04:00 · e07670ad97
commit e07670ad97
parent b792140579
7 changed files with 567 additions and 77 deletions
--- a/crates/ruff_python_formatter/src/comments/placement.rs
+++ b/crates/ruff_python_formatter/src/comments/placement.rs
@ -121,51 +121,67 @@ fn handle_parenthesized_comment<'a>(
    //
    // For now, we _can_ assert, but to do so, we stop lexing when we hit a token that precedes an
    // identifier.
-    if comment.line_position().is_end_of_line() {
-        let range = TextRange::new(preceding.end(), comment.start());
-        let tokenizer = SimpleTokenizer::new(locator.contents(), range);
-        if tokenizer
-            .skip_trivia()
-            .take_while(|token| {
-                !matches!(
-                    token.kind,
-                    SimpleTokenKind::As | SimpleTokenKind::Def | SimpleTokenKind::Class
-                )
-            })
-            .any(|token| {
-                debug_assert!(
-                    !matches!(token.kind, SimpleTokenKind::Bogus),
-                    "Unexpected token between nodes: `{:?}`",
-                    locator.slice(range)
-                );

-                token.kind() == SimpleTokenKind::LParen
-            })
-        {
-            return CommentPlacement::leading(following, comment);
-        }
-    } else {
-        let range = TextRange::new(comment.end(), following.start());
-        let tokenizer = SimpleTokenizer::new(locator.contents(), range);
-        if tokenizer
-            .skip_trivia()
-            .take_while(|token| {
-                !matches!(
-                    token.kind,
-                    SimpleTokenKind::As | SimpleTokenKind::Def | SimpleTokenKind::Class
-                )
-            })
-            .any(|token| {
-                debug_assert!(
-                    !matches!(token.kind, SimpleTokenKind::Bogus),
-                    "Unexpected token between nodes: `{:?}`",
-                    locator.slice(range)
-                );
-                token.kind() == SimpleTokenKind::RParen
-            })
-        {
-            return CommentPlacement::trailing(preceding, comment);
-        }
+    // Search for comments that to the right of a parenthesized node, e.g.:
+    // ```python
+    // [
+    //     x  # comment,
+    //     (
+    //         y,
+    //     ),
+    // ]
+    // ```
+    let range = TextRange::new(preceding.end(), comment.start());
+    let tokenizer = SimpleTokenizer::new(locator.contents(), range);
+    if tokenizer
+        .skip_trivia()
+        .take_while(|token| {
+            !matches!(
+                token.kind,
+                SimpleTokenKind::As | SimpleTokenKind::Def | SimpleTokenKind::Class
+            )
+        })
+        .any(|token| {
+            debug_assert!(
+                !matches!(token.kind, SimpleTokenKind::Bogus),
+                "Unexpected token between nodes: `{:?}`",
+                locator.slice(range)
+            );
+            token.kind() == SimpleTokenKind::LParen
+        })
+    {
+        return CommentPlacement::leading(following, comment);
+    }
+
+    // Search for comments that to the right of a parenthesized node, e.g.:
+    // ```python
+    // [
+    //     (
+    //         x  # comment,
+    //     ),
+    //     y
+    // ]
+    // ```
+    let range = TextRange::new(comment.end(), following.start());
+    let tokenizer = SimpleTokenizer::new(locator.contents(), range);
+    if tokenizer
+        .skip_trivia()
+        .take_while(|token| {
+            !matches!(
+                token.kind,
+                SimpleTokenKind::As | SimpleTokenKind::Def | SimpleTokenKind::Class
+            )
+        })
+        .any(|token| {
+            debug_assert!(
+                !matches!(token.kind, SimpleTokenKind::Bogus),
+                "Unexpected token between nodes: `{:?}`",
+                locator.slice(range)
+            );
+            token.kind() == SimpleTokenKind::RParen
+        })
+    {
+        return CommentPlacement::trailing(preceding, comment);
    }

    CommentPlacement::Default(comment)
@ -214,6 +230,9 @@ fn handle_enclosed_comment<'a>(
        AnyNodeRef::ExprUnaryOp(unary_op) => handle_unary_op_comment(comment, unary_op, locator),
        AnyNodeRef::ExprNamedExpr(_) => handle_named_expr_comment(comment, locator),
        AnyNodeRef::ExprDict(_) => handle_dict_unpacking_comment(comment, locator)
+            .or_else(|comment| handle_bracketed_end_of_line_comment(comment, locator))
+            .or_else(|comment| handle_key_value_comment(comment, locator)),
+        AnyNodeRef::ExprDictComp(_) => handle_key_value_comment(comment, locator)
            .or_else(|comment| handle_bracketed_end_of_line_comment(comment, locator)),
        AnyNodeRef::ExprIfExp(expr_if) => handle_expr_if_comment(comment, expr_if, locator),
        AnyNodeRef::ExprSlice(expr_slice) => {
@ -272,8 +291,7 @@ fn handle_enclosed_comment<'a>(
        | AnyNodeRef::ExprSet(_)
        | AnyNodeRef::ExprGeneratorExp(_)
        | AnyNodeRef::ExprListComp(_)
-        | AnyNodeRef::ExprSetComp(_)
-        | AnyNodeRef::ExprDictComp(_) => handle_bracketed_end_of_line_comment(comment, locator),
+        | AnyNodeRef::ExprSetComp(_) => handle_bracketed_end_of_line_comment(comment, locator),
        AnyNodeRef::ExprTuple(tuple) if is_tuple_parenthesized(tuple, locator.contents()) => {
            handle_bracketed_end_of_line_comment(comment, locator)
        }
@ -1207,7 +1225,7 @@ fn handle_dict_unpacking_comment<'a>(
    .skip_while(|token| token.kind == SimpleTokenKind::RParen);

    // if the remaining tokens from the previous node are exactly `**`,
-    // re-assign the comment to the one that follows the stars
+    // re-assign the comment to the one that follows the stars.
    if tokens.any(|token| token.kind == SimpleTokenKind::DoubleStar) {
        CommentPlacement::leading(following, comment)
    } else {
@ -1215,6 +1233,52 @@ fn handle_dict_unpacking_comment<'a>(
    }
 }

+/// Handles comments around the `:` in a key-value pair:
+///
+/// ```python
+/// {
+///     key  # dangling
+///     :  # dangling
+///     # dangling
+///     value
+/// }
+/// ```
+fn handle_key_value_comment<'a>(
+    comment: DecoratedComment<'a>,
+    locator: &Locator,
+) -> CommentPlacement<'a> {
+    debug_assert!(matches!(
+        comment.enclosing_node(),
+        AnyNodeRef::ExprDict(_) | AnyNodeRef::ExprDictComp(_)
+    ));
+
+    let (Some(following), Some(preceding)) = (comment.following_node(), comment.preceding_node())
+    else {
+        return CommentPlacement::Default(comment);
+    };
+
+    // Ensure that the comment is between the key and the value by detecting the colon:
+    // ```python
+    // {
+    //     key  # comment
+    //     : value
+    // }
+    // ```
+    // This prevents against detecting comments on starred expressions as key-value comments.
+    let tokens = SimpleTokenizer::new(
+        locator.contents(),
+        TextRange::new(preceding.end(), following.start()),
+    );
+    if tokens
+        .skip_trivia()
+        .any(|token| token.kind == SimpleTokenKind::Colon)
+    {
+        CommentPlacement::dangling(comment.enclosing_node(), comment)
+    } else {
+        CommentPlacement::Default(comment)
+    }
+}
+
 /// Handle comments between a function call and its arguments. For example, attach the following as
 /// dangling on the call:
 /// ```python
--- a/crates/ruff_python_formatter/src/expression/expr_dict.rs
+++ b/crates/ruff_python_formatter/src/expression/expr_dict.rs
@ -3,7 +3,7 @@ use ruff_python_ast::node::AnyNodeRef;
 use ruff_python_ast::{Expr, ExprDict};
 use ruff_text_size::{Ranged, TextRange};

-use crate::comments::{leading_comments, SourceComment};
+use crate::comments::{dangling_comments, leading_comments, SourceComment};
 use crate::expression::parentheses::{
    empty_parenthesized, parenthesized, NeedsParentheses, OptionalParentheses,
 };
@ -25,15 +25,35 @@ impl FormatNodeRule<ExprDict> for FormatExprDict {
        let comments = f.context().comments().clone();
        let dangling = comments.dangling(item);

-        if values.is_empty() {
+        let (Some(key), Some(value)) = (keys.first(), values.first()) else {
            return empty_parenthesized("{", dangling, "}").fmt(f);
-        }
+        };
+
+        // Dangling comments can either appear after the open bracket, or around the key-value
+        // pairs:
+        // ```python
+        // {  # open_parenthesis_comments
+        //     x:  # key_value_comments
+        //     y
+        // }
+        // ```
+        let (open_parenthesis_comments, key_value_comments) = dangling.split_at(
+            dangling
+                .partition_point(|comment| comment.end() < KeyValuePair::new(key, value).start()),
+        );

        let format_pairs = format_with(|f| {
            let mut joiner = f.join_comma_separated(item.end());

+            let mut key_value_comments = key_value_comments;
            for (key, value) in keys.iter().zip(values) {
-                let key_value_pair = KeyValuePair { key, value };
+                let mut key_value_pair = KeyValuePair::new(key, value);
+
+                let partition = key_value_comments
+                    .partition_point(|comment| comment.start() < key_value_pair.end());
+                key_value_pair = key_value_pair.with_comments(&key_value_comments[..partition]);
+                key_value_comments = &key_value_comments[partition..];
+
                joiner.entry(&key_value_pair, &key_value_pair);
            }

@ -41,7 +61,7 @@ impl FormatNodeRule<ExprDict> for FormatExprDict {
        });

        parenthesized("{", &format_pairs, "}")
-            .with_dangling_comments(dangling)
+            .with_dangling_comments(open_parenthesis_comments)
            .fmt(f)
    }

@ -69,6 +89,21 @@ impl NeedsParentheses for ExprDict {
 struct KeyValuePair<'a> {
    key: &'a Option<Expr>,
    value: &'a Expr,
+    comments: &'a [SourceComment],
+}
+
+impl<'a> KeyValuePair<'a> {
+    fn new(key: &'a Option<Expr>, value: &'a Expr) -> Self {
+        Self {
+            key,
+            value,
+            comments: &[],
+        }
+    }
+
+    fn with_comments(self, comments: &'a [SourceComment]) -> Self {
+        Self { comments, ..self }
+    }
 }

 impl Ranged for KeyValuePair<'_> {
@ -86,12 +121,18 @@ impl Format<PyFormatContext<'_>> for KeyValuePair<'_> {
        if let Some(key) = self.key {
            write!(
                f,
-                [group(&format_args![
-                    key.format(),
-                    token(":"),
-                    space(),
-                    self.value.format()
-                ])]
+                [group(&format_with(|f| {
+                    key.format().fmt(f)?;
+                    token(":").fmt(f)?;
+
+                    if self.comments.is_empty() {
+                        space().fmt(f)?;
+                    } else {
+                        dangling_comments(self.comments).fmt(f)?;
+                    }
+
+                    self.value.format().fmt(f)
+                }))]
            )
        } else {
            // TODO(charlie): Make these dangling comments on the `ExprDict`, and identify them
--- a/crates/ruff_python_formatter/src/expression/expr_dict_comp.rs
+++ b/crates/ruff_python_formatter/src/expression/expr_dict_comp.rs
@ -1,10 +1,9 @@
-use ruff_formatter::prelude::{
-    format_args, format_with, group, soft_line_break_or_space, space, token,
-};
 use ruff_formatter::write;
 use ruff_python_ast::node::AnyNodeRef;
 use ruff_python_ast::ExprDictComp;
+use ruff_text_size::Ranged;

+use crate::comments::dangling_comments;
 use crate::expression::parentheses::{parenthesized, NeedsParentheses, OptionalParentheses};
 use crate::prelude::*;

@ -20,30 +19,43 @@ impl FormatNodeRule<ExprDictComp> for FormatExprDictComp {
            generators,
        } = item;

-        let joined = format_with(|f| {
-            f.join_with(soft_line_break_or_space())
-                .entries(generators.iter().formatted())
-                .finish()
-        });
-
        let comments = f.context().comments().clone();
        let dangling = comments.dangling(item);

+        // Dangling comments can either appear after the open bracket, or around the key-value
+        // pairs:
+        // ```python
+        // {  # open_parenthesis_comments
+        //     x:  # key_value_comments
+        //     y
+        //     for (x, y) in z
+        // }
+        // ```
+        let (open_parenthesis_comments, key_value_comments) =
+            dangling.split_at(dangling.partition_point(|comment| comment.end() < key.start()));
+
        write!(
            f,
            [parenthesized(
                "{",
-                &group(&format_args!(
-                    group(&key.format()),
-                    token(":"),
-                    space(),
-                    value.format(),
-                    soft_line_break_or_space(),
-                    joined
-                )),
+                &group(&format_with(|f| {
+                    write!(f, [group(&key.format()), token(":")])?;
+
+                    if key_value_comments.is_empty() {
+                        space().fmt(f)?;
+                    } else {
+                        dangling_comments(key_value_comments).fmt(f)?;
+                    }
+
+                    write!(f, [value.format(), soft_line_break_or_space()])?;
+
+                    f.join_with(soft_line_break_or_space())
+                        .entries(generators.iter().formatted())
+                        .finish()
+                })),
                "}"
            )
-            .with_dangling_comments(dangling)]
+            .with_dangling_comments(open_parenthesis_comments)]
        )
    }
 }