mirror of
https://github.com/astral-sh/ruff.git
synced 2025-08-04 18:58:04 +00:00
Avoid reordering mixed-indent-level comments after branches (#7609)
## Summary Given: ```python if True: if True: if True: pass #a #b #c else: pass ``` When determining the placement of the various comments, we compute the indentation depth of each comment, and then compare it to the depth of the previous statement. It turns out this can lead to reordering comments, e.g., above, `#b` is assigned as a trailing comment of `pass`, and so gets reordered above `#a`. This PR modifies the logic such that when we compute the indentation depth of `#b`, we limit it to at most the indentation depth of `#a`. In other words, when analyzing comments at the end of branches, we don't let successive comments go any _deeper_ than their preceding comments. Closes https://github.com/astral-sh/ruff/issues/7602. ## Test Plan `cargo test` No change in similarity. Before: | project | similarity index | total files | changed files | |--------------|------------------:|------------------:|------------------:| | cpython | 0.76083 | 1789 | 1631 | | django | 0.99983 | 2760 | 36 | | transformers | 0.99963 | 2587 | 319 | | twine | 1.00000 | 33 | 0 | | typeshed | 0.99979 | 3496 | 22 | | warehouse | 0.99967 | 648 | 15 | | zulip | 0.99972 | 1437 | 21 | After: | project | similarity index | total files | changed files | |--------------|------------------:|------------------:|------------------:| | cpython | 0.76083 | 1789 | 1631 | | django | 0.99983 | 2760 | 36 | | transformers | 0.99963 | 2587 | 319 | | twine | 1.00000 | 33 | 0 | | typeshed | 0.99979 | 3496 | 22 | | warehouse | 0.99967 | 648 | 15 | | zulip | 0.99972 | 1437 | 21 |
This commit is contained in:
parent
19010f276e
commit
1a4f2a9baf
3 changed files with 240 additions and 17 deletions
|
@ -208,6 +208,56 @@ else:
|
|||
pass
|
||||
|
||||
|
||||
# Regression test for: https://github.com/astral-sh/ruff/issues/7602
|
||||
if True:
|
||||
if True:
|
||||
if True:
|
||||
pass
|
||||
|
||||
#a
|
||||
#b
|
||||
#c
|
||||
else:
|
||||
pass
|
||||
|
||||
if True:
|
||||
if True:
|
||||
if True:
|
||||
pass
|
||||
# b
|
||||
|
||||
# a
|
||||
# c
|
||||
|
||||
else:
|
||||
pass
|
||||
|
||||
# Same indent
|
||||
|
||||
if True:
|
||||
if True:
|
||||
if True:
|
||||
pass
|
||||
|
||||
#a
|
||||
#b
|
||||
#c
|
||||
else:
|
||||
pass
|
||||
|
||||
if True:
|
||||
if True:
|
||||
if True:
|
||||
pass
|
||||
|
||||
# a
|
||||
# b
|
||||
# c
|
||||
|
||||
else:
|
||||
pass
|
||||
|
||||
|
||||
# Regression test for https://github.com/astral-sh/ruff/issues/5337
|
||||
if parent_body:
|
||||
if current_body:
|
||||
|
|
|
@ -8,7 +8,7 @@ use ruff_python_trivia::{
|
|||
SimpleToken, SimpleTokenKind, SimpleTokenizer,
|
||||
};
|
||||
use ruff_source_file::Locator;
|
||||
use ruff_text_size::{Ranged, TextLen, TextRange};
|
||||
use ruff_text_size::{Ranged, TextLen, TextRange, TextSize};
|
||||
|
||||
use crate::comments::visitor::{CommentPlacement, DecoratedComment};
|
||||
use crate::expression::expr_slice::{assign_comment_in_slice, ExprSliceCommentSection};
|
||||
|
@ -587,11 +587,11 @@ fn handle_own_line_comment_between_branches<'a>(
|
|||
|
||||
// It depends on the indentation level of the comment if it is a leading comment for the
|
||||
// following branch or if it a trailing comment of the previous body's last statement.
|
||||
let comment_indentation = indentation_at_offset(comment.start(), locator)
|
||||
.unwrap_or_default()
|
||||
.len();
|
||||
let comment_indentation = own_line_comment_indentation(preceding, &comment, locator);
|
||||
|
||||
let preceding_indentation = indentation(locator, &preceding).unwrap_or_default().len();
|
||||
let preceding_indentation = indentation(locator, &preceding)
|
||||
.unwrap_or_default()
|
||||
.text_len();
|
||||
|
||||
// Compare to the last statement in the body
|
||||
match comment_indentation.cmp(&preceding_indentation) {
|
||||
|
@ -662,18 +662,16 @@ fn handle_own_line_comment_between_branches<'a>(
|
|||
/// Determine where to attach an own line comment after a branch depending on its indentation
|
||||
fn handle_own_line_comment_after_branch<'a>(
|
||||
comment: DecoratedComment<'a>,
|
||||
preceding_node: AnyNodeRef<'a>,
|
||||
preceding: AnyNodeRef<'a>,
|
||||
locator: &Locator,
|
||||
) -> CommentPlacement<'a> {
|
||||
let Some(last_child) = last_child_in_body(preceding_node) else {
|
||||
let Some(last_child) = last_child_in_body(preceding) else {
|
||||
return CommentPlacement::Default(comment);
|
||||
};
|
||||
|
||||
// We only care about the length because indentations with mixed spaces and tabs are only valid if
|
||||
// the indent-level doesn't depend on the tab width (the indent level must be the same if the tab width is 1 or 8).
|
||||
let comment_indentation = indentation_at_offset(comment.start(), locator)
|
||||
.unwrap_or_default()
|
||||
.len();
|
||||
let comment_indentation = own_line_comment_indentation(preceding, &comment, locator);
|
||||
|
||||
// Keep the comment on the entire statement in case it's a trailing comment
|
||||
// ```python
|
||||
|
@ -684,9 +682,9 @@ fn handle_own_line_comment_after_branch<'a>(
|
|||
// # Trailing if comment
|
||||
// ```
|
||||
// Here we keep the comment a trailing comment of the `if`
|
||||
let preceding_indentation = indentation_at_offset(preceding_node.start(), locator)
|
||||
let preceding_indentation = indentation_at_offset(preceding.start(), locator)
|
||||
.unwrap_or_default()
|
||||
.len();
|
||||
.text_len();
|
||||
if comment_indentation == preceding_indentation {
|
||||
return CommentPlacement::Default(comment);
|
||||
}
|
||||
|
@ -697,7 +695,7 @@ fn handle_own_line_comment_after_branch<'a>(
|
|||
loop {
|
||||
let child_indentation = indentation(locator, &last_child_in_parent)
|
||||
.unwrap_or_default()
|
||||
.len();
|
||||
.text_len();
|
||||
|
||||
// There a three cases:
|
||||
// ```python
|
||||
|
@ -749,6 +747,80 @@ fn handle_own_line_comment_after_branch<'a>(
|
|||
}
|
||||
}
|
||||
|
||||
/// Determine the indentation level of an own-line comment, defined as the minimum indentation of
|
||||
/// all comments between the preceding node and the comment, including the comment itself. In
|
||||
/// other words, we don't allow successive comments to ident _further_ than any preceding comments.
|
||||
///
|
||||
/// For example, given:
|
||||
/// ```python
|
||||
/// if True:
|
||||
/// pass
|
||||
/// # comment
|
||||
/// ```
|
||||
///
|
||||
/// The indentation would be 4, as the comment is indented by 4 spaces.
|
||||
///
|
||||
/// Given:
|
||||
/// ```python
|
||||
/// if True:
|
||||
/// pass
|
||||
/// # comment
|
||||
/// else:
|
||||
/// pass
|
||||
/// ```
|
||||
///
|
||||
/// The indentation would be 0, as the comment is not indented at all.
|
||||
///
|
||||
/// Given:
|
||||
/// ```python
|
||||
/// if True:
|
||||
/// pass
|
||||
/// # comment
|
||||
/// # comment
|
||||
/// ```
|
||||
///
|
||||
/// Both comments would be marked as indented at 4 spaces, as the indentation of the first comment
|
||||
/// is used for the second comment.
|
||||
///
|
||||
/// This logic avoids pathological cases like:
|
||||
/// ```python
|
||||
/// try:
|
||||
/// if True:
|
||||
/// if True:
|
||||
/// pass
|
||||
///
|
||||
/// # a
|
||||
/// # b
|
||||
/// # c
|
||||
/// except Exception:
|
||||
/// pass
|
||||
/// ```
|
||||
///
|
||||
/// If we don't use the minimum indentation of any preceding comments, we would mark `# b` as
|
||||
/// indented to the same depth as `pass`, which could in turn lead to us treating it as a trailing
|
||||
/// comment of `pass`, despite there being a comment between them that "resets" the indentation.
|
||||
fn own_line_comment_indentation(
|
||||
preceding: AnyNodeRef,
|
||||
comment: &DecoratedComment,
|
||||
locator: &Locator,
|
||||
) -> TextSize {
|
||||
let tokenizer = SimpleTokenizer::new(
|
||||
locator.contents(),
|
||||
TextRange::new(locator.full_line_end(preceding.end()), comment.end()),
|
||||
);
|
||||
|
||||
tokenizer
|
||||
.filter_map(|token| {
|
||||
if token.kind() == SimpleTokenKind::Comment {
|
||||
indentation_at_offset(token.start(), locator).map(TextLen::text_len)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
.min()
|
||||
.unwrap_or_default()
|
||||
}
|
||||
|
||||
/// Attaches comments for the positional-only parameters separator `/` or the keywords-only
|
||||
/// parameters separator `*` as dangling comments to the enclosing [`Parameters`] node.
|
||||
///
|
||||
|
|
|
@ -214,6 +214,56 @@ else:
|
|||
pass
|
||||
|
||||
|
||||
# Regression test for: https://github.com/astral-sh/ruff/issues/7602
|
||||
if True:
|
||||
if True:
|
||||
if True:
|
||||
pass
|
||||
|
||||
#a
|
||||
#b
|
||||
#c
|
||||
else:
|
||||
pass
|
||||
|
||||
if True:
|
||||
if True:
|
||||
if True:
|
||||
pass
|
||||
# b
|
||||
|
||||
# a
|
||||
# c
|
||||
|
||||
else:
|
||||
pass
|
||||
|
||||
# Same indent
|
||||
|
||||
if True:
|
||||
if True:
|
||||
if True:
|
||||
pass
|
||||
|
||||
#a
|
||||
#b
|
||||
#c
|
||||
else:
|
||||
pass
|
||||
|
||||
if True:
|
||||
if True:
|
||||
if True:
|
||||
pass
|
||||
|
||||
# a
|
||||
# b
|
||||
# c
|
||||
|
||||
else:
|
||||
pass
|
||||
|
||||
|
||||
# Regression test for https://github.com/astral-sh/ruff/issues/5337
|
||||
if parent_body:
|
||||
if current_body:
|
||||
|
@ -444,17 +494,68 @@ else:
|
|||
pass
|
||||
|
||||
|
||||
# Regression test for: https://github.com/astral-sh/ruff/issues/7602
|
||||
if True:
|
||||
if True:
|
||||
if True:
|
||||
pass
|
||||
|
||||
# a
|
||||
# b
|
||||
# c
|
||||
|
||||
else:
|
||||
pass
|
||||
|
||||
if True:
|
||||
if True:
|
||||
if True:
|
||||
pass
|
||||
# b
|
||||
|
||||
# a
|
||||
# c
|
||||
else:
|
||||
pass
|
||||
|
||||
# Same indent
|
||||
|
||||
if True:
|
||||
if True:
|
||||
if True:
|
||||
pass
|
||||
|
||||
# a
|
||||
# b
|
||||
# c
|
||||
|
||||
else:
|
||||
pass
|
||||
|
||||
if True:
|
||||
if True:
|
||||
if True:
|
||||
pass
|
||||
|
||||
# a
|
||||
# b
|
||||
# c
|
||||
|
||||
else:
|
||||
pass
|
||||
|
||||
|
||||
# Regression test for https://github.com/astral-sh/ruff/issues/5337
|
||||
if parent_body:
|
||||
if current_body:
|
||||
child_in_body()
|
||||
last_child_in_current_body() # may or may not have children on its own
|
||||
# e
|
||||
# f
|
||||
# c
|
||||
# d
|
||||
# a
|
||||
# b
|
||||
# c
|
||||
# d
|
||||
# e
|
||||
# f
|
||||
```
|
||||
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue