Add dangling comment handling to dictionary key-value pairs (#7495)

## Summary

This PR fixes a formatting instability by changing the comment handling
around the `:` in a dictionary to mirror that of the `:` in a lambda: we
treat comments around the `:` as dangling, then format them after the
`:`.

Closes https://github.com/astral-sh/ruff/issues/7458.

## Test Plan

`cargo test`

No change in similarity.

Before:

| project | similarity index | total files | changed files |

|--------------|------------------:|------------------:|------------------:|
| cpython | 0.76083 | 1789 | 1631 |
| django | 0.99983 | 2760 | 36 |
| transformers | 0.99956 | 2587 | 404 |
| twine | 1.00000 | 33 | 0 |
| typeshed | 0.99983 | 3496 | 18 |
| warehouse | 0.99929 | 648 | 16 |
| zulip | 0.99969 | 1437 | 21 |

After:

| project | similarity index | total files | changed files |

|--------------|------------------:|------------------:|------------------:|
| cpython | 0.76083 | 1789 | 1631 |
| django | 0.99983 | 2760 | 36 |
| transformers | 0.99956 | 2587 | 404 |
| twine | 1.00000 | 33 | 0 |
| typeshed | 0.99983 | 3496 | 18 |
| warehouse | 0.99929 | 648 | 16 |
| zulip | 0.99969 | 1437 | 21 |
This commit is contained in:
Charlie Marsh 2023-09-19 15:17:21 -04:00 committed by GitHub
parent b792140579
commit e07670ad97
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 567 additions and 77 deletions

View file

@ -121,51 +121,67 @@ fn handle_parenthesized_comment<'a>(
//
// For now, we _can_ assert, but to do so, we stop lexing when we hit a token that precedes an
// identifier.
if comment.line_position().is_end_of_line() {
let range = TextRange::new(preceding.end(), comment.start());
let tokenizer = SimpleTokenizer::new(locator.contents(), range);
if tokenizer
.skip_trivia()
.take_while(|token| {
!matches!(
token.kind,
SimpleTokenKind::As | SimpleTokenKind::Def | SimpleTokenKind::Class
)
})
.any(|token| {
debug_assert!(
!matches!(token.kind, SimpleTokenKind::Bogus),
"Unexpected token between nodes: `{:?}`",
locator.slice(range)
);
token.kind() == SimpleTokenKind::LParen
})
{
return CommentPlacement::leading(following, comment);
}
} else {
let range = TextRange::new(comment.end(), following.start());
let tokenizer = SimpleTokenizer::new(locator.contents(), range);
if tokenizer
.skip_trivia()
.take_while(|token| {
!matches!(
token.kind,
SimpleTokenKind::As | SimpleTokenKind::Def | SimpleTokenKind::Class
)
})
.any(|token| {
debug_assert!(
!matches!(token.kind, SimpleTokenKind::Bogus),
"Unexpected token between nodes: `{:?}`",
locator.slice(range)
);
token.kind() == SimpleTokenKind::RParen
})
{
return CommentPlacement::trailing(preceding, comment);
}
// Search for comments that to the right of a parenthesized node, e.g.:
// ```python
// [
// x # comment,
// (
// y,
// ),
// ]
// ```
let range = TextRange::new(preceding.end(), comment.start());
let tokenizer = SimpleTokenizer::new(locator.contents(), range);
if tokenizer
.skip_trivia()
.take_while(|token| {
!matches!(
token.kind,
SimpleTokenKind::As | SimpleTokenKind::Def | SimpleTokenKind::Class
)
})
.any(|token| {
debug_assert!(
!matches!(token.kind, SimpleTokenKind::Bogus),
"Unexpected token between nodes: `{:?}`",
locator.slice(range)
);
token.kind() == SimpleTokenKind::LParen
})
{
return CommentPlacement::leading(following, comment);
}
// Search for comments that to the right of a parenthesized node, e.g.:
// ```python
// [
// (
// x # comment,
// ),
// y
// ]
// ```
let range = TextRange::new(comment.end(), following.start());
let tokenizer = SimpleTokenizer::new(locator.contents(), range);
if tokenizer
.skip_trivia()
.take_while(|token| {
!matches!(
token.kind,
SimpleTokenKind::As | SimpleTokenKind::Def | SimpleTokenKind::Class
)
})
.any(|token| {
debug_assert!(
!matches!(token.kind, SimpleTokenKind::Bogus),
"Unexpected token between nodes: `{:?}`",
locator.slice(range)
);
token.kind() == SimpleTokenKind::RParen
})
{
return CommentPlacement::trailing(preceding, comment);
}
CommentPlacement::Default(comment)
@ -214,6 +230,9 @@ fn handle_enclosed_comment<'a>(
AnyNodeRef::ExprUnaryOp(unary_op) => handle_unary_op_comment(comment, unary_op, locator),
AnyNodeRef::ExprNamedExpr(_) => handle_named_expr_comment(comment, locator),
AnyNodeRef::ExprDict(_) => handle_dict_unpacking_comment(comment, locator)
.or_else(|comment| handle_bracketed_end_of_line_comment(comment, locator))
.or_else(|comment| handle_key_value_comment(comment, locator)),
AnyNodeRef::ExprDictComp(_) => handle_key_value_comment(comment, locator)
.or_else(|comment| handle_bracketed_end_of_line_comment(comment, locator)),
AnyNodeRef::ExprIfExp(expr_if) => handle_expr_if_comment(comment, expr_if, locator),
AnyNodeRef::ExprSlice(expr_slice) => {
@ -272,8 +291,7 @@ fn handle_enclosed_comment<'a>(
| AnyNodeRef::ExprSet(_)
| AnyNodeRef::ExprGeneratorExp(_)
| AnyNodeRef::ExprListComp(_)
| AnyNodeRef::ExprSetComp(_)
| AnyNodeRef::ExprDictComp(_) => handle_bracketed_end_of_line_comment(comment, locator),
| AnyNodeRef::ExprSetComp(_) => handle_bracketed_end_of_line_comment(comment, locator),
AnyNodeRef::ExprTuple(tuple) if is_tuple_parenthesized(tuple, locator.contents()) => {
handle_bracketed_end_of_line_comment(comment, locator)
}
@ -1207,7 +1225,7 @@ fn handle_dict_unpacking_comment<'a>(
.skip_while(|token| token.kind == SimpleTokenKind::RParen);
// if the remaining tokens from the previous node are exactly `**`,
// re-assign the comment to the one that follows the stars
// re-assign the comment to the one that follows the stars.
if tokens.any(|token| token.kind == SimpleTokenKind::DoubleStar) {
CommentPlacement::leading(following, comment)
} else {
@ -1215,6 +1233,52 @@ fn handle_dict_unpacking_comment<'a>(
}
}
/// Handles comments around the `:` in a key-value pair:
///
/// ```python
/// {
/// key # dangling
/// : # dangling
/// # dangling
/// value
/// }
/// ```
fn handle_key_value_comment<'a>(
comment: DecoratedComment<'a>,
locator: &Locator,
) -> CommentPlacement<'a> {
debug_assert!(matches!(
comment.enclosing_node(),
AnyNodeRef::ExprDict(_) | AnyNodeRef::ExprDictComp(_)
));
let (Some(following), Some(preceding)) = (comment.following_node(), comment.preceding_node())
else {
return CommentPlacement::Default(comment);
};
// Ensure that the comment is between the key and the value by detecting the colon:
// ```python
// {
// key # comment
// : value
// }
// ```
// This prevents against detecting comments on starred expressions as key-value comments.
let tokens = SimpleTokenizer::new(
locator.contents(),
TextRange::new(preceding.end(), following.start()),
);
if tokens
.skip_trivia()
.any(|token| token.kind == SimpleTokenKind::Colon)
{
CommentPlacement::dangling(comment.enclosing_node(), comment)
} else {
CommentPlacement::Default(comment)
}
}
/// Handle comments between a function call and its arguments. For example, attach the following as
/// dangling on the call:
/// ```python