Add dangling comment handling to dictionary key-value pairs (#7495)

## Summary

This PR fixes a formatting instability by changing the comment handling
around the `:` in a dictionary to mirror that of the `:` in a lambda: we
treat comments around the `:` as dangling, then format them after the
`:`.

Closes https://github.com/astral-sh/ruff/issues/7458.

## Test Plan

`cargo test`

No change in similarity.

Before:

| project | similarity index | total files | changed files |

|--------------|------------------:|------------------:|------------------:|
| cpython | 0.76083 | 1789 | 1631 |
| django | 0.99983 | 2760 | 36 |
| transformers | 0.99956 | 2587 | 404 |
| twine | 1.00000 | 33 | 0 |
| typeshed | 0.99983 | 3496 | 18 |
| warehouse | 0.99929 | 648 | 16 |
| zulip | 0.99969 | 1437 | 21 |

After:

| project | similarity index | total files | changed files |

|--------------|------------------:|------------------:|------------------:|
| cpython | 0.76083 | 1789 | 1631 |
| django | 0.99983 | 2760 | 36 |
| transformers | 0.99956 | 2587 | 404 |
| twine | 1.00000 | 33 | 0 |
| typeshed | 0.99983 | 3496 | 18 |
| warehouse | 0.99929 | 648 | 16 |
| zulip | 0.99969 | 1437 | 21 |
This commit is contained in:
Charlie Marsh 2023-09-19 15:17:21 -04:00 committed by GitHub
parent b792140579
commit e07670ad97
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 567 additions and 77 deletions

View file

@ -121,51 +121,67 @@ fn handle_parenthesized_comment<'a>(
//
// For now, we _can_ assert, but to do so, we stop lexing when we hit a token that precedes an
// identifier.
if comment.line_position().is_end_of_line() {
let range = TextRange::new(preceding.end(), comment.start());
let tokenizer = SimpleTokenizer::new(locator.contents(), range);
if tokenizer
.skip_trivia()
.take_while(|token| {
!matches!(
token.kind,
SimpleTokenKind::As | SimpleTokenKind::Def | SimpleTokenKind::Class
)
})
.any(|token| {
debug_assert!(
!matches!(token.kind, SimpleTokenKind::Bogus),
"Unexpected token between nodes: `{:?}`",
locator.slice(range)
);
token.kind() == SimpleTokenKind::LParen
})
{
return CommentPlacement::leading(following, comment);
}
} else {
let range = TextRange::new(comment.end(), following.start());
let tokenizer = SimpleTokenizer::new(locator.contents(), range);
if tokenizer
.skip_trivia()
.take_while(|token| {
!matches!(
token.kind,
SimpleTokenKind::As | SimpleTokenKind::Def | SimpleTokenKind::Class
)
})
.any(|token| {
debug_assert!(
!matches!(token.kind, SimpleTokenKind::Bogus),
"Unexpected token between nodes: `{:?}`",
locator.slice(range)
);
token.kind() == SimpleTokenKind::RParen
})
{
return CommentPlacement::trailing(preceding, comment);
}
// Search for comments that to the right of a parenthesized node, e.g.:
// ```python
// [
// x # comment,
// (
// y,
// ),
// ]
// ```
let range = TextRange::new(preceding.end(), comment.start());
let tokenizer = SimpleTokenizer::new(locator.contents(), range);
if tokenizer
.skip_trivia()
.take_while(|token| {
!matches!(
token.kind,
SimpleTokenKind::As | SimpleTokenKind::Def | SimpleTokenKind::Class
)
})
.any(|token| {
debug_assert!(
!matches!(token.kind, SimpleTokenKind::Bogus),
"Unexpected token between nodes: `{:?}`",
locator.slice(range)
);
token.kind() == SimpleTokenKind::LParen
})
{
return CommentPlacement::leading(following, comment);
}
// Search for comments that to the right of a parenthesized node, e.g.:
// ```python
// [
// (
// x # comment,
// ),
// y
// ]
// ```
let range = TextRange::new(comment.end(), following.start());
let tokenizer = SimpleTokenizer::new(locator.contents(), range);
if tokenizer
.skip_trivia()
.take_while(|token| {
!matches!(
token.kind,
SimpleTokenKind::As | SimpleTokenKind::Def | SimpleTokenKind::Class
)
})
.any(|token| {
debug_assert!(
!matches!(token.kind, SimpleTokenKind::Bogus),
"Unexpected token between nodes: `{:?}`",
locator.slice(range)
);
token.kind() == SimpleTokenKind::RParen
})
{
return CommentPlacement::trailing(preceding, comment);
}
CommentPlacement::Default(comment)
@ -214,6 +230,9 @@ fn handle_enclosed_comment<'a>(
AnyNodeRef::ExprUnaryOp(unary_op) => handle_unary_op_comment(comment, unary_op, locator),
AnyNodeRef::ExprNamedExpr(_) => handle_named_expr_comment(comment, locator),
AnyNodeRef::ExprDict(_) => handle_dict_unpacking_comment(comment, locator)
.or_else(|comment| handle_bracketed_end_of_line_comment(comment, locator))
.or_else(|comment| handle_key_value_comment(comment, locator)),
AnyNodeRef::ExprDictComp(_) => handle_key_value_comment(comment, locator)
.or_else(|comment| handle_bracketed_end_of_line_comment(comment, locator)),
AnyNodeRef::ExprIfExp(expr_if) => handle_expr_if_comment(comment, expr_if, locator),
AnyNodeRef::ExprSlice(expr_slice) => {
@ -272,8 +291,7 @@ fn handle_enclosed_comment<'a>(
| AnyNodeRef::ExprSet(_)
| AnyNodeRef::ExprGeneratorExp(_)
| AnyNodeRef::ExprListComp(_)
| AnyNodeRef::ExprSetComp(_)
| AnyNodeRef::ExprDictComp(_) => handle_bracketed_end_of_line_comment(comment, locator),
| AnyNodeRef::ExprSetComp(_) => handle_bracketed_end_of_line_comment(comment, locator),
AnyNodeRef::ExprTuple(tuple) if is_tuple_parenthesized(tuple, locator.contents()) => {
handle_bracketed_end_of_line_comment(comment, locator)
}
@ -1207,7 +1225,7 @@ fn handle_dict_unpacking_comment<'a>(
.skip_while(|token| token.kind == SimpleTokenKind::RParen);
// if the remaining tokens from the previous node are exactly `**`,
// re-assign the comment to the one that follows the stars
// re-assign the comment to the one that follows the stars.
if tokens.any(|token| token.kind == SimpleTokenKind::DoubleStar) {
CommentPlacement::leading(following, comment)
} else {
@ -1215,6 +1233,52 @@ fn handle_dict_unpacking_comment<'a>(
}
}
/// Handles comments around the `:` in a key-value pair:
///
/// ```python
/// {
/// key # dangling
/// : # dangling
/// # dangling
/// value
/// }
/// ```
fn handle_key_value_comment<'a>(
comment: DecoratedComment<'a>,
locator: &Locator,
) -> CommentPlacement<'a> {
debug_assert!(matches!(
comment.enclosing_node(),
AnyNodeRef::ExprDict(_) | AnyNodeRef::ExprDictComp(_)
));
let (Some(following), Some(preceding)) = (comment.following_node(), comment.preceding_node())
else {
return CommentPlacement::Default(comment);
};
// Ensure that the comment is between the key and the value by detecting the colon:
// ```python
// {
// key # comment
// : value
// }
// ```
// This prevents against detecting comments on starred expressions as key-value comments.
let tokens = SimpleTokenizer::new(
locator.contents(),
TextRange::new(preceding.end(), following.start()),
);
if tokens
.skip_trivia()
.any(|token| token.kind == SimpleTokenKind::Colon)
{
CommentPlacement::dangling(comment.enclosing_node(), comment)
} else {
CommentPlacement::Default(comment)
}
}
/// Handle comments between a function call and its arguments. For example, attach the following as
/// dangling on the call:
/// ```python

View file

@ -3,7 +3,7 @@ use ruff_python_ast::node::AnyNodeRef;
use ruff_python_ast::{Expr, ExprDict};
use ruff_text_size::{Ranged, TextRange};
use crate::comments::{leading_comments, SourceComment};
use crate::comments::{dangling_comments, leading_comments, SourceComment};
use crate::expression::parentheses::{
empty_parenthesized, parenthesized, NeedsParentheses, OptionalParentheses,
};
@ -25,15 +25,35 @@ impl FormatNodeRule<ExprDict> for FormatExprDict {
let comments = f.context().comments().clone();
let dangling = comments.dangling(item);
if values.is_empty() {
let (Some(key), Some(value)) = (keys.first(), values.first()) else {
return empty_parenthesized("{", dangling, "}").fmt(f);
}
};
// Dangling comments can either appear after the open bracket, or around the key-value
// pairs:
// ```python
// { # open_parenthesis_comments
// x: # key_value_comments
// y
// }
// ```
let (open_parenthesis_comments, key_value_comments) = dangling.split_at(
dangling
.partition_point(|comment| comment.end() < KeyValuePair::new(key, value).start()),
);
let format_pairs = format_with(|f| {
let mut joiner = f.join_comma_separated(item.end());
let mut key_value_comments = key_value_comments;
for (key, value) in keys.iter().zip(values) {
let key_value_pair = KeyValuePair { key, value };
let mut key_value_pair = KeyValuePair::new(key, value);
let partition = key_value_comments
.partition_point(|comment| comment.start() < key_value_pair.end());
key_value_pair = key_value_pair.with_comments(&key_value_comments[..partition]);
key_value_comments = &key_value_comments[partition..];
joiner.entry(&key_value_pair, &key_value_pair);
}
@ -41,7 +61,7 @@ impl FormatNodeRule<ExprDict> for FormatExprDict {
});
parenthesized("{", &format_pairs, "}")
.with_dangling_comments(dangling)
.with_dangling_comments(open_parenthesis_comments)
.fmt(f)
}
@ -69,6 +89,21 @@ impl NeedsParentheses for ExprDict {
struct KeyValuePair<'a> {
key: &'a Option<Expr>,
value: &'a Expr,
comments: &'a [SourceComment],
}
impl<'a> KeyValuePair<'a> {
fn new(key: &'a Option<Expr>, value: &'a Expr) -> Self {
Self {
key,
value,
comments: &[],
}
}
fn with_comments(self, comments: &'a [SourceComment]) -> Self {
Self { comments, ..self }
}
}
impl Ranged for KeyValuePair<'_> {
@ -86,12 +121,18 @@ impl Format<PyFormatContext<'_>> for KeyValuePair<'_> {
if let Some(key) = self.key {
write!(
f,
[group(&format_args![
key.format(),
token(":"),
space(),
self.value.format()
])]
[group(&format_with(|f| {
key.format().fmt(f)?;
token(":").fmt(f)?;
if self.comments.is_empty() {
space().fmt(f)?;
} else {
dangling_comments(self.comments).fmt(f)?;
}
self.value.format().fmt(f)
}))]
)
} else {
// TODO(charlie): Make these dangling comments on the `ExprDict`, and identify them

View file

@ -1,10 +1,9 @@
use ruff_formatter::prelude::{
format_args, format_with, group, soft_line_break_or_space, space, token,
};
use ruff_formatter::write;
use ruff_python_ast::node::AnyNodeRef;
use ruff_python_ast::ExprDictComp;
use ruff_text_size::Ranged;
use crate::comments::dangling_comments;
use crate::expression::parentheses::{parenthesized, NeedsParentheses, OptionalParentheses};
use crate::prelude::*;
@ -20,30 +19,43 @@ impl FormatNodeRule<ExprDictComp> for FormatExprDictComp {
generators,
} = item;
let joined = format_with(|f| {
f.join_with(soft_line_break_or_space())
.entries(generators.iter().formatted())
.finish()
});
let comments = f.context().comments().clone();
let dangling = comments.dangling(item);
// Dangling comments can either appear after the open bracket, or around the key-value
// pairs:
// ```python
// { # open_parenthesis_comments
// x: # key_value_comments
// y
// for (x, y) in z
// }
// ```
let (open_parenthesis_comments, key_value_comments) =
dangling.split_at(dangling.partition_point(|comment| comment.end() < key.start()));
write!(
f,
[parenthesized(
"{",
&group(&format_args!(
group(&key.format()),
token(":"),
space(),
value.format(),
soft_line_break_or_space(),
joined
)),
&group(&format_with(|f| {
write!(f, [group(&key.format()), token(":")])?;
if key_value_comments.is_empty() {
space().fmt(f)?;
} else {
dangling_comments(key_value_comments).fmt(f)?;
}
write!(f, [value.format(), soft_line_break_or_space()])?;
f.join_with(soft_line_break_or_space())
.entries(generators.iter().formatted())
.finish()
})),
"}"
)
.with_dangling_comments(dangling)]
.with_dangling_comments(open_parenthesis_comments)]
)
}
}