[ruff] Mark fixes for unsorted-dunder-all and unsorted-dunder-slots as unsafe when there are complex comments in the sequence (RUF022, RUF023) (#14560)

2025-08-03 18:28:24 +00:00 · 2024-11-24 12:49:29 +00:00 · 2024-11-24 12:49:29 +00:00 · ac23c99744
commit ac23c99744
parent e5c7d87461
5 changed files with 219 additions and 87 deletions
--- a/crates/ruff_linter/src/rules/ruff/rules/sequence_sorting.rs
+++ b/crates/ruff_linter/src/rules/ruff/rules/sequence_sorting.rs
@ -6,6 +6,8 @@
 use std::borrow::Cow;
 use std::cmp::Ordering;

+use itertools::Itertools;
+
 use ruff_python_ast as ast;
 use ruff_python_codegen::Stylist;
 use ruff_python_parser::{TokenKind, Tokens};
@ -314,6 +316,52 @@ impl<'a> SortClassification<'a> {
    }
 }

+/// The complexity of the comments in a multiline sequence.
+///
+/// A sequence like this has "simple" comments: it's unambiguous
+/// which item each comment refers to, so there's no "risk" in sorting it:
+///
+/// ```py
+/// __all__ = [
+///     "foo",  # comment1
+///     "bar",  # comment2
+/// ]
+/// ```
+///
+/// This sequence has complex comments: we can't safely autofix the sort here,
+/// as the own-line comments might be used to create sections in `__all__`:
+///
+/// ```py
+/// __all__ = [
+///     # fooey things
+///     "foo1",
+///     "foo2",
+///     # barey things
+///     "bar1",
+///     "foobar",
+/// ]
+/// ```
+///
+/// This sequence also has complex comments -- it's ambiguous which item
+/// each comment should belong to:
+///
+/// ```py
+/// __all__ = [
+///     "foo1", "foo", "barfoo",  # fooey things
+///     "baz", bazz2", "fbaz",  # barrey things
+/// ]
+#[derive(Debug, Copy, Clone, PartialEq, Eq)]
+pub(super) enum CommentComplexity {
+    Simple,
+    Complex,
+}
+
+impl CommentComplexity {
+    pub(super) const fn is_complex(self) -> bool {
+        matches!(self, CommentComplexity::Complex)
+    }
+}
+
 // An instance of this struct encapsulates an analysis
 /// of a multiline Python tuple/list that represents an
 /// `__all__`/`__slots__`/etc. definition or augmentation.
@ -328,6 +376,24 @@ impl<'a> MultilineStringSequenceValue<'a> {
        self.items.len()
    }

+    /// Determine the [`CommentComplexity`] of this multiline string sequence.
+    pub(super) fn comment_complexity(&self) -> CommentComplexity {
+        if self.items.iter().tuple_windows().any(|(first, second)| {
+            first.has_own_line_comments()
+                || first
+                    .end_of_line_comments
+                    .is_some_and(|end_line_comment| second.start() < end_line_comment.end())
+        }) || self
+            .items
+            .last()
+            .is_some_and(StringSequenceItem::has_own_line_comments)
+        {
+            CommentComplexity::Complex
+        } else {
+            CommentComplexity::Simple
+        }
+    }
+
    /// Analyse the source range for a multiline Python tuple/list that
    /// represents an `__all__`/`__slots__`/etc. definition or augmentation.
    /// Return `None` if the analysis fails for whatever reason.
@ -792,6 +858,10 @@ impl<'a> StringSequenceItem<'a> {
    fn with_no_comments(value: &'a str, element_range: TextRange) -> Self {
        Self::new(value, vec![], element_range, None)
    }
+
+    fn has_own_line_comments(&self) -> bool {
+        !self.preceding_comment_ranges.is_empty()
+    }
 }

 impl Ranged for StringSequenceItem<'_> {
--- a/crates/ruff_linter/src/rules/ruff/rules/sort_dunder_all.rs
+++ b/crates/ruff_linter/src/rules/ruff/rules/sort_dunder_all.rs
@ -1,4 +1,4 @@
-use ruff_diagnostics::{Diagnostic, Edit, Fix, FixAvailability, Violation};
+use ruff_diagnostics::{Applicability, Diagnostic, Edit, Fix, FixAvailability, Violation};
 use ruff_macros::{derive_message_formats, violation};
 use ruff_python_ast as ast;
 use ruff_source_file::LineRanges;
@ -54,19 +54,40 @@ use crate::rules::ruff::rules::sequence_sorting::{
 /// ```
 ///
 /// ## Fix safety
-/// This rule's fix is marked as always being safe, in that
+/// This rule's fix is marked as unsafe if there are any comments that take up
+/// a whole line by themselves inside the `__all__` definition, for example:
+/// ```py
+/// __all__ = [
+///     # eggy things
+///     "duck_eggs",
+///     "chicken_eggs",
+///     # hammy things
+///     "country_ham",
+///     "parma_ham",
+/// ]
+/// ```
+///
+/// This is a common pattern used to delimit categories within a module's API,
+/// but it would be out of the scope of this rule to attempt to maintain these
+/// categories when alphabetically sorting the items of `__all__`.
+///
+/// The fix is also marked as unsafe if there are more than two `__all__` items
+/// on a single line and that line also has a trailing comment, since here it
+/// is impossible to accurately gauge which item the comment should be moved
+/// with when sorting `__all__`:
+/// ```py
+/// __all__ = [
+///     "a", "c", "e",  # a comment
+///     "b", "d", "f",  # a second  comment
+/// ]
+/// ```
+///
+/// Other than this, the rule's fix is marked as always being safe, in that
 /// it should very rarely alter the semantics of any Python code.
 /// However, note that (although it's rare) the value of `__all__`
 /// could be read by code elsewhere that depends on the exact
 /// iteration order of the items in `__all__`, in which case this
 /// rule's fix could theoretically cause breakage.
-///
-/// Note also that for multiline `__all__` definitions
-/// that include comments on their own line, it can be hard
-/// to tell where the comments should be moved to when sorting
-/// the contents of `__all__`. While this rule's fix will
-/// never delete a comment, it might *sometimes* move a
-/// comment to an unexpected location.
 #[violation]
 pub struct UnsortedDunderAll;

@ -208,37 +229,42 @@ fn create_fix(
    let locator = checker.locator();
    let is_multiline = locator.contains_line_break(range);

-    let sorted_source_code = {
-        // The machinery in the `MultilineDunderAllValue` is actually
-        // sophisticated enough that it would work just as well for
-        // single-line `__all__` definitions, and we could reduce
-        // the number of lines of code in this file by doing that.
-        // Unfortunately, however, `MultilineDunderAllValue::from_source_range()`
-        // must process every token in an `__all__` definition as
-        // part of its analysis, and this is quite slow. For
-        // single-line `__all__` definitions, it's also unnecessary,
-        // as it's impossible to have comments in between the
-        // `__all__` elements if the `__all__` definition is all on
-        // a single line. Therefore, as an optimisation, we do the
-        // bare minimum of token-processing for single-line `__all__`
-        // definitions:
-        if is_multiline {
-            let value = MultilineStringSequenceValue::from_source_range(
-                range,
-                kind,
-                locator,
-                checker.tokens(),
-                string_items,
-            )?;
-            assert_eq!(value.len(), elts.len());
-            value.into_sorted_source_code(SORTING_STYLE, locator, checker.stylist())
+    // The machinery in the `MultilineDunderAllValue` is actually
+    // sophisticated enough that it would work just as well for
+    // single-line `__all__` definitions, and we could reduce
+    // the number of lines of code in this file by doing that.
+    // Unfortunately, however, `MultilineDunderAllValue::from_source_range()`
+    // must process every token in an `__all__` definition as
+    // part of its analysis, and this is quite slow. For
+    // single-line `__all__` definitions, it's also unnecessary,
+    // as it's impossible to have comments in between the
+    // `__all__` elements if the `__all__` definition is all on
+    // a single line. Therefore, as an optimisation, we do the
+    // bare minimum of token-processing for single-line `__all__`
+    // definitions:
+    let (sorted_source_code, applicability) = if is_multiline {
+        let value = MultilineStringSequenceValue::from_source_range(
+            range,
+            kind,
+            locator,
+            checker.tokens(),
+            string_items,
+        )?;
+        assert_eq!(value.len(), elts.len());
+        let applicability = if value.comment_complexity().is_complex() {
+            Applicability::Unsafe
        } else {
-            sort_single_line_elements_sequence(kind, elts, string_items, locator, SORTING_STYLE)
-        }
+            Applicability::Safe
+        };
+        let sorted_source =
+            value.into_sorted_source_code(SORTING_STYLE, locator, checker.stylist());
+        (sorted_source, applicability)
+    } else {
+        let sorted_source =
+            sort_single_line_elements_sequence(kind, elts, string_items, locator, SORTING_STYLE);
+        (sorted_source, Applicability::Safe)
    };

-    Some(Fix::safe_edit(Edit::range_replacement(
-        sorted_source_code,
-        range,
-    )))
+    let edit = Edit::range_replacement(sorted_source_code, range);
+    Some(Fix::applicable_edit(edit, applicability))
 }
--- a/crates/ruff_linter/src/rules/ruff/rules/sort_dunder_slots.rs
+++ b/crates/ruff_linter/src/rules/ruff/rules/sort_dunder_slots.rs
@ -11,8 +11,8 @@ use ruff_text_size::{Ranged, TextRange};

 use crate::checkers::ast::Checker;
 use crate::rules::ruff::rules::sequence_sorting::{
-    sort_single_line_elements_sequence, MultilineStringSequenceValue, SequenceKind,
-    SortClassification, SortingStyle,
+    sort_single_line_elements_sequence, CommentComplexity, MultilineStringSequenceValue,
+    SequenceKind, SortClassification, SortingStyle,
 };
 use crate::Locator;

@ -37,11 +37,44 @@ use crate::Locator;
 /// ```
 ///
 /// ## Fix safety
-/// This rule's fix is marked as unsafe whenever Ruff can detect that code
-/// elsewhere in the same file reads the `__slots__` variable in some way.
-/// This is because the order of the items in `__slots__` may have semantic
-/// significance if the `__slots__` of a class is being iterated over, or
-/// being assigned to another value.
+/// This rule's fix is marked as unsafe in three situations.
+///
+/// Firstly, the fix is unsafe if there are any comments that take up
+/// a whole line by themselves inside the `__slots__` definition, for example:
+/// ```py
+/// class Foo:
+///     __slots__ = [
+///         # eggy things
+///         "duck_eggs",
+///         "chicken_eggs",
+///         # hammy things
+///         "country_ham",
+///         "parma_ham",
+///     ]
+/// ```
+///
+/// This is a common pattern used to delimit categories within a class's slots,
+/// but it would be out of the scope of this rule to attempt to maintain these
+/// categories when applying a natural sort to the items of `__slots__`.
+///
+/// Secondly, the fix is also marked as unsafe if there are more than two
+/// `__slots__` items on a single line and that line also has a trailing
+/// comment, since here it is impossible to accurately gauge which item the
+/// comment should be moved with when sorting `__slots__`:
+/// ```py
+/// class Bar:
+///     __slots__ = [
+///         "a", "c", "e",  # a comment
+///         "b", "d", "f",  # a second  comment
+///     ]
+/// ```
+///
+/// Lastly, this rule's fix is marked as unsafe whenever Ruff can detect that
+/// code elsewhere in the same file reads the `__slots__` variable in some way
+/// and the `__slots__` variable is not assigned to a set. This is because the
+/// order of the items in `__slots__` may have semantic significance if the
+/// `__slots__` of a class is being iterated over, or being assigned to another
+/// value.
 ///
 /// In the vast majority of other cases, this rule's fix is unlikely to
 /// cause breakage; as such, Ruff will otherwise mark this rule's fix as
@ -49,12 +82,6 @@ use crate::Locator;
 /// could still be read by code outside of the module in which the
 /// `__slots__` definition occurs, in which case this rule's fix could
 /// theoretically cause breakage.
-///
-/// Additionally, note that for multiline `__slots__` definitions that
-/// include comments on their own line, it can be hard to tell where the
-/// comments should be moved to when sorting the contents of `__slots__`.
-/// While this rule's fix will never delete a comment, it might *sometimes*
-/// move a comment to an unexpected location.
 #[violation]
 pub struct UnsortedDunderSlots {
    class_name: ast::name::Name,
@ -122,15 +149,17 @@ pub(crate) fn sort_dunder_slots(checker: &Checker, binding: &Binding) -> Option<
    );

    if let SortClassification::UnsortedAndMaybeFixable { items } = sort_classification {
-        if let Some(sorted_source_code) = display.generate_sorted_source_code(&items, checker) {
+        if let Some((sorted_source_code, comment_complexity)) =
+            display.generate_sorted_source_code(&items, checker)
+        {
            let edit = Edit::range_replacement(sorted_source_code, display.range());
-
-            let applicability = if display.kind.is_set_literal() || binding.is_unused() {
-                Applicability::Safe
-            } else {
+            let applicability = if comment_complexity.is_complex()
+                || (binding.is_used() && !display.kind.is_set_literal())
+            {
                Applicability::Unsafe
+            } else {
+                Applicability::Safe
            };
-
            diagnostic.set_fix(Fix::applicable_edit(edit, applicability));
        }
    }
@ -219,7 +248,11 @@ impl<'a> StringLiteralDisplay<'a> {
        Some(result)
    }

-    fn generate_sorted_source_code(&self, elements: &[&str], checker: &Checker) -> Option<String> {
+    fn generate_sorted_source_code(
+        &self,
+        elements: &[&str],
+        checker: &Checker,
+    ) -> Option<(String, CommentComplexity)> {
        let locator = checker.locator();

        let multiline_classification = if locator.contains_line_break(self.range()) {
@ -238,26 +271,31 @@ impl<'a> StringLiteralDisplay<'a> {
                    elements,
                )?;
                assert_eq!(analyzed_sequence.len(), self.elts.len());
-                Some(analyzed_sequence.into_sorted_source_code(
+                let comment_complexity = analyzed_sequence.comment_complexity();
+                let sorted_code = analyzed_sequence.into_sorted_source_code(
                    SORTING_STYLE,
                    locator,
                    checker.stylist(),
-                ))
+                );
+                Some((sorted_code, comment_complexity))
            }
            // Sorting multiline dicts is unsupported
            (DisplayKind::Dict { .. }, MultilineClassification::Multiline) => None,
            (DisplayKind::Sequence(sequence_kind), MultilineClassification::SingleLine) => {
-                Some(sort_single_line_elements_sequence(
+                let sorted_code = sort_single_line_elements_sequence(
                    *sequence_kind,
                    &self.elts,
                    elements,
                    locator,
                    SORTING_STYLE,
-                ))
+                );
+                Some((sorted_code, CommentComplexity::Simple))
+            }
+            (DisplayKind::Dict { items }, MultilineClassification::SingleLine) => {
+                let sorted_code =
+                    sort_single_line_elements_dict(&self.elts, elements, items, locator);
+                Some((sorted_code, CommentComplexity::Simple))
            }
-            (DisplayKind::Dict { items }, MultilineClassification::SingleLine) => Some(
-                sort_single_line_elements_dict(&self.elts, elements, items, locator),
-            ),
        }
    }
 }
--- a/crates/ruff_linter/src/rules/ruff/snapshots/ruff_linterrulesrufftestsRUF022_RUF022.py.snap
+++ b/crates/ruff_linter/src/rules/ruff/snapshots/ruff_linterrulesrufftestsRUF022_RUF022.py.snap
@ -1,6 +1,5 @@
 ---
 source: crates/ruff_linter/src/rules/ruff/mod.rs
-snapshot_kind: text
 ---
 RUF022.py:5:11: RUF022 [*] `__all__` is not sorted
  |
@ -267,7 +266,7 @@ RUF022.py:32:11: RUF022 [*] `__all__` is not sorted
   |
   = help: Apply an isort-style sorting to `__all__`

-ℹ Safe fix
+ℹ Unsafe fix
 30 30 | ####################################
 31 31 | 
 32 32 | __all__ = (
@ -302,7 +301,7 @@ RUF022.py:40:11: RUF022 [*] `__all__` is not sorted
   |
   = help: Apply an isort-style sorting to `__all__`

-ℹ Safe fix
+ℹ Unsafe fix
 38 38 | )
 39 39 | 
 40 40 | __all__ = [
@ -423,7 +422,7 @@ RUF022.py:91:11: RUF022 [*] `__all__` is not sorted
   |
   = help: Apply an isort-style sorting to `__all__`

-ℹ Safe fix
+ℹ Unsafe fix
 88  88  | ##########################################
 89  89  | 
 90  90  | # comment0
@ -464,7 +463,7 @@ RUF022.py:101:11: RUF022 [*] `__all__` is not sorted
    |
    = help: Apply an isort-style sorting to `__all__`

-ℹ Safe fix
+ℹ Unsafe fix
 99  99  | # comment7
 100 100 | 
 101 101 | __all__ = [  # comment0
@ -591,7 +590,7 @@ RUF022.py:125:28: RUF022 [*] `__all__` is not sorted
    |
    = help: Apply an isort-style sorting to `__all__`

-ℹ Safe fix
+ℹ Unsafe fix
 123 123 |            "register_error", "lookup_error"]
 124 124 | 
 125 125 | __all__: tuple[str, ...] = (  # a comment about the opening paren
@ -658,7 +657,7 @@ RUF022.py:145:16: RUF022 [*] `__all__` is not sorted
    |
    = help: Apply an isort-style sorting to `__all__`

-ℹ Safe fix
+ℹ Unsafe fix
 143 143 | )
 144 144 | 
 145 145 | __all__.extend((  # comment0
@ -690,7 +689,7 @@ RUF022.py:155:5: RUF022 [*] `__all__` is not sorted
    |
    = help: Apply an isort-style sorting to `__all__`

-ℹ Safe fix
+ℹ Unsafe fix
 153 153 | __all__.extend(  # comment0
 154 154 |     # comment1
 155 155 |     (  # comment2
@ -723,7 +722,7 @@ RUF022.py:164:16: RUF022 [*] `__all__` is not sorted
    |
    = help: Apply an isort-style sorting to `__all__`

-ℹ Safe fix
+ℹ Unsafe fix
 162 162 | )  # comment2
 163 163 | 
 164 164 | __all__.extend([  # comment0
@ -755,7 +754,7 @@ RUF022.py:174:5: RUF022 [*] `__all__` is not sorted
    |
    = help: Apply an isort-style sorting to `__all__`

-ℹ Safe fix
+ℹ Unsafe fix
 172 172 | __all__.extend(  # comment0
 173 173 |     # comment1
 174 174 |     [  # comment2
@ -785,7 +784,7 @@ RUF022.py:183:11: RUF022 [*] `__all__` is not sorted
    |
    = help: Apply an isort-style sorting to `__all__`

-ℹ Safe fix
+ℹ Unsafe fix
 180 180 |     ]  # comment4
 181 181 | )  # comment2
 182 182 | 
@ -1014,7 +1013,7 @@ RUF022.py:243:11: RUF022 [*] `__all__` is not sorted
    |
    = help: Apply an isort-style sorting to `__all__`

-ℹ Safe fix
+ℹ Unsafe fix
 241 241 |                 ]
 242 242 | 
 243 243 | __all__ = (
@ -1056,7 +1055,7 @@ RUF022.py:253:11: RUF022 [*] `__all__` is not sorted
    |
    = help: Apply an isort-style sorting to `__all__`

-ℹ Safe fix
+ℹ Unsafe fix
 251 251 | )
 252 252 | 
 253 253 | __all__ = (  # comment about the opening paren
--- a/crates/ruff_linter/src/rules/ruff/snapshots/ruff_linterrulesrufftestsRUF023_RUF023.py.snap
+++ b/crates/ruff_linter/src/rules/ruff/snapshots/ruff_linterrulesrufftestsRUF023_RUF023.py.snap
@ -1,6 +1,5 @@
 ---
 source: crates/ruff_linter/src/rules/ruff/mod.rs
-snapshot_kind: text
 ---
 RUF023.py:6:17: RUF023 [*] `Klass.__slots__` is not sorted
  |
@ -205,7 +204,7 @@ RUF023.py:33:17: RUF023 [*] `Klass3.__slots__` is not sorted
   |
   = help: Apply a natural sort to `Klass3.__slots__`

-ℹ Safe fix
+ℹ Unsafe fix
 31 31 | 
 32 32 | class Klass3:
 33 33 |     __slots__ = (
@ -240,7 +239,7 @@ RUF023.py:40:17: RUF023 [*] `Klass3.__slots__` is not sorted
   |
   = help: Apply a natural sort to `Klass3.__slots__`

-ℹ Safe fix
+ℹ Unsafe fix
 38 38 |         "a0"
 39 39 |     )
 40 40 |     __slots__ = [
@ -275,7 +274,7 @@ RUF023.py:54:17: RUF023 [*] `Klass4.__slots__` is not sorted
   |
   = help: Apply a natural sort to `Klass4.__slots__`

-ℹ Safe fix
+ℹ Unsafe fix
 51 51 | 
 52 52 | class Klass4:
 53 53 |     # comment0
@ -316,7 +315,7 @@ RUF023.py:64:17: RUF023 [*] `Klass4.__slots__` is not sorted
   |
   = help: Apply a natural sort to `Klass4.__slots__`

-ℹ Safe fix
+ℹ Unsafe fix
 62 62 |     # comment7
 63 63 | 
 64 64 |     __slots__ = [  # comment0
@ -377,7 +376,7 @@ RUF023.py:75:17: RUF023 [*] `PurePath.__slots__` is not sorted
    |
    = help: Apply a natural sort to `PurePath.__slots__`

-ℹ Safe fix
+ℹ Unsafe fix
 73  73  | # from cpython/Lib/pathlib/__init__.py
 74  74  | class PurePath:
 75  75  |     __slots__ = (
@ -460,7 +459,7 @@ RUF023.py:113:17: RUF023 [*] `ArgumentDescriptor.__slots__` is not sorted
    |
    = help: Apply a natural sort to `ArgumentDescriptor.__slots__`

-ℹ Safe fix
+ℹ Unsafe fix
 111 111 | # From cpython/Lib/pickletools.py
 112 112 | class ArgumentDescriptor(object):
 113 113 |     __slots__ = (
@ -649,7 +648,7 @@ RUF023.py:181:17: RUF023 [*] `BezierBuilder4.__slots__` is not sorted
    |
    = help: Apply a natural sort to `BezierBuilder4.__slots__`

-ℹ Safe fix
+ℹ Unsafe fix
 179 179 | 
 180 180 | class BezierBuilder4:
 181 181 |     __slots__ = (