[ruff] Mark fixes for unsorted-dunder-all and unsorted-dunder-slots as unsafe when there are complex comments in the sequence (RUF022, RUF023) (#14560)
Some checks are pending
CI / Determine changes (push) Waiting to run
CI / cargo fmt (push) Waiting to run
CI / cargo clippy (push) Blocked by required conditions
CI / cargo test (linux) (push) Blocked by required conditions
CI / cargo test (windows) (push) Blocked by required conditions
CI / cargo test (wasm) (push) Blocked by required conditions
CI / cargo build (release) (push) Blocked by required conditions
CI / cargo build (msrv) (push) Blocked by required conditions
CI / cargo fuzz build (push) Blocked by required conditions
CI / fuzz parser (push) Blocked by required conditions
CI / test scripts (push) Blocked by required conditions
CI / ecosystem (push) Blocked by required conditions
CI / cargo shear (push) Blocked by required conditions
CI / python package (push) Waiting to run
CI / pre-commit (push) Waiting to run
CI / mkdocs (push) Waiting to run
CI / formatter instabilities and black similarity (push) Blocked by required conditions
CI / test ruff-lsp (push) Blocked by required conditions
CI / benchmarks (push) Blocked by required conditions

This commit is contained in:
Alex Waygood 2024-11-24 12:49:29 +00:00 committed by GitHub
parent e5c7d87461
commit ac23c99744
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 219 additions and 87 deletions

View file

@ -6,6 +6,8 @@
use std::borrow::Cow;
use std::cmp::Ordering;
use itertools::Itertools;
use ruff_python_ast as ast;
use ruff_python_codegen::Stylist;
use ruff_python_parser::{TokenKind, Tokens};
@ -314,6 +316,52 @@ impl<'a> SortClassification<'a> {
}
}
/// The complexity of the comments in a multiline sequence.
///
/// A sequence like this has "simple" comments: it's unambiguous
/// which item each comment refers to, so there's no "risk" in sorting it:
///
/// ```py
/// __all__ = [
/// "foo", # comment1
/// "bar", # comment2
/// ]
/// ```
///
/// This sequence has complex comments: we can't safely autofix the sort here,
/// as the own-line comments might be used to create sections in `__all__`:
///
/// ```py
/// __all__ = [
/// # fooey things
/// "foo1",
/// "foo2",
/// # barey things
/// "bar1",
/// "foobar",
/// ]
/// ```
///
/// This sequence also has complex comments -- it's ambiguous which item
/// each comment should belong to:
///
/// ```py
/// __all__ = [
/// "foo1", "foo", "barfoo", # fooey things
/// "baz", bazz2", "fbaz", # barrey things
/// ]
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
pub(super) enum CommentComplexity {
Simple,
Complex,
}
impl CommentComplexity {
pub(super) const fn is_complex(self) -> bool {
matches!(self, CommentComplexity::Complex)
}
}
// An instance of this struct encapsulates an analysis
/// of a multiline Python tuple/list that represents an
/// `__all__`/`__slots__`/etc. definition or augmentation.
@ -328,6 +376,24 @@ impl<'a> MultilineStringSequenceValue<'a> {
self.items.len()
}
/// Determine the [`CommentComplexity`] of this multiline string sequence.
pub(super) fn comment_complexity(&self) -> CommentComplexity {
if self.items.iter().tuple_windows().any(|(first, second)| {
first.has_own_line_comments()
|| first
.end_of_line_comments
.is_some_and(|end_line_comment| second.start() < end_line_comment.end())
}) || self
.items
.last()
.is_some_and(StringSequenceItem::has_own_line_comments)
{
CommentComplexity::Complex
} else {
CommentComplexity::Simple
}
}
/// Analyse the source range for a multiline Python tuple/list that
/// represents an `__all__`/`__slots__`/etc. definition or augmentation.
/// Return `None` if the analysis fails for whatever reason.
@ -792,6 +858,10 @@ impl<'a> StringSequenceItem<'a> {
fn with_no_comments(value: &'a str, element_range: TextRange) -> Self {
Self::new(value, vec![], element_range, None)
}
fn has_own_line_comments(&self) -> bool {
!self.preceding_comment_ranges.is_empty()
}
}
impl Ranged for StringSequenceItem<'_> {

View file

@ -1,4 +1,4 @@
use ruff_diagnostics::{Diagnostic, Edit, Fix, FixAvailability, Violation};
use ruff_diagnostics::{Applicability, Diagnostic, Edit, Fix, FixAvailability, Violation};
use ruff_macros::{derive_message_formats, violation};
use ruff_python_ast as ast;
use ruff_source_file::LineRanges;
@ -54,19 +54,40 @@ use crate::rules::ruff::rules::sequence_sorting::{
/// ```
///
/// ## Fix safety
/// This rule's fix is marked as always being safe, in that
/// This rule's fix is marked as unsafe if there are any comments that take up
/// a whole line by themselves inside the `__all__` definition, for example:
/// ```py
/// __all__ = [
/// # eggy things
/// "duck_eggs",
/// "chicken_eggs",
/// # hammy things
/// "country_ham",
/// "parma_ham",
/// ]
/// ```
///
/// This is a common pattern used to delimit categories within a module's API,
/// but it would be out of the scope of this rule to attempt to maintain these
/// categories when alphabetically sorting the items of `__all__`.
///
/// The fix is also marked as unsafe if there are more than two `__all__` items
/// on a single line and that line also has a trailing comment, since here it
/// is impossible to accurately gauge which item the comment should be moved
/// with when sorting `__all__`:
/// ```py
/// __all__ = [
/// "a", "c", "e", # a comment
/// "b", "d", "f", # a second comment
/// ]
/// ```
///
/// Other than this, the rule's fix is marked as always being safe, in that
/// it should very rarely alter the semantics of any Python code.
/// However, note that (although it's rare) the value of `__all__`
/// could be read by code elsewhere that depends on the exact
/// iteration order of the items in `__all__`, in which case this
/// rule's fix could theoretically cause breakage.
///
/// Note also that for multiline `__all__` definitions
/// that include comments on their own line, it can be hard
/// to tell where the comments should be moved to when sorting
/// the contents of `__all__`. While this rule's fix will
/// never delete a comment, it might *sometimes* move a
/// comment to an unexpected location.
#[violation]
pub struct UnsortedDunderAll;
@ -208,37 +229,42 @@ fn create_fix(
let locator = checker.locator();
let is_multiline = locator.contains_line_break(range);
let sorted_source_code = {
// The machinery in the `MultilineDunderAllValue` is actually
// sophisticated enough that it would work just as well for
// single-line `__all__` definitions, and we could reduce
// the number of lines of code in this file by doing that.
// Unfortunately, however, `MultilineDunderAllValue::from_source_range()`
// must process every token in an `__all__` definition as
// part of its analysis, and this is quite slow. For
// single-line `__all__` definitions, it's also unnecessary,
// as it's impossible to have comments in between the
// `__all__` elements if the `__all__` definition is all on
// a single line. Therefore, as an optimisation, we do the
// bare minimum of token-processing for single-line `__all__`
// definitions:
if is_multiline {
let value = MultilineStringSequenceValue::from_source_range(
range,
kind,
locator,
checker.tokens(),
string_items,
)?;
assert_eq!(value.len(), elts.len());
value.into_sorted_source_code(SORTING_STYLE, locator, checker.stylist())
// The machinery in the `MultilineDunderAllValue` is actually
// sophisticated enough that it would work just as well for
// single-line `__all__` definitions, and we could reduce
// the number of lines of code in this file by doing that.
// Unfortunately, however, `MultilineDunderAllValue::from_source_range()`
// must process every token in an `__all__` definition as
// part of its analysis, and this is quite slow. For
// single-line `__all__` definitions, it's also unnecessary,
// as it's impossible to have comments in between the
// `__all__` elements if the `__all__` definition is all on
// a single line. Therefore, as an optimisation, we do the
// bare minimum of token-processing for single-line `__all__`
// definitions:
let (sorted_source_code, applicability) = if is_multiline {
let value = MultilineStringSequenceValue::from_source_range(
range,
kind,
locator,
checker.tokens(),
string_items,
)?;
assert_eq!(value.len(), elts.len());
let applicability = if value.comment_complexity().is_complex() {
Applicability::Unsafe
} else {
sort_single_line_elements_sequence(kind, elts, string_items, locator, SORTING_STYLE)
}
Applicability::Safe
};
let sorted_source =
value.into_sorted_source_code(SORTING_STYLE, locator, checker.stylist());
(sorted_source, applicability)
} else {
let sorted_source =
sort_single_line_elements_sequence(kind, elts, string_items, locator, SORTING_STYLE);
(sorted_source, Applicability::Safe)
};
Some(Fix::safe_edit(Edit::range_replacement(
sorted_source_code,
range,
)))
let edit = Edit::range_replacement(sorted_source_code, range);
Some(Fix::applicable_edit(edit, applicability))
}

View file

@ -11,8 +11,8 @@ use ruff_text_size::{Ranged, TextRange};
use crate::checkers::ast::Checker;
use crate::rules::ruff::rules::sequence_sorting::{
sort_single_line_elements_sequence, MultilineStringSequenceValue, SequenceKind,
SortClassification, SortingStyle,
sort_single_line_elements_sequence, CommentComplexity, MultilineStringSequenceValue,
SequenceKind, SortClassification, SortingStyle,
};
use crate::Locator;
@ -37,11 +37,44 @@ use crate::Locator;
/// ```
///
/// ## Fix safety
/// This rule's fix is marked as unsafe whenever Ruff can detect that code
/// elsewhere in the same file reads the `__slots__` variable in some way.
/// This is because the order of the items in `__slots__` may have semantic
/// significance if the `__slots__` of a class is being iterated over, or
/// being assigned to another value.
/// This rule's fix is marked as unsafe in three situations.
///
/// Firstly, the fix is unsafe if there are any comments that take up
/// a whole line by themselves inside the `__slots__` definition, for example:
/// ```py
/// class Foo:
/// __slots__ = [
/// # eggy things
/// "duck_eggs",
/// "chicken_eggs",
/// # hammy things
/// "country_ham",
/// "parma_ham",
/// ]
/// ```
///
/// This is a common pattern used to delimit categories within a class's slots,
/// but it would be out of the scope of this rule to attempt to maintain these
/// categories when applying a natural sort to the items of `__slots__`.
///
/// Secondly, the fix is also marked as unsafe if there are more than two
/// `__slots__` items on a single line and that line also has a trailing
/// comment, since here it is impossible to accurately gauge which item the
/// comment should be moved with when sorting `__slots__`:
/// ```py
/// class Bar:
/// __slots__ = [
/// "a", "c", "e", # a comment
/// "b", "d", "f", # a second comment
/// ]
/// ```
///
/// Lastly, this rule's fix is marked as unsafe whenever Ruff can detect that
/// code elsewhere in the same file reads the `__slots__` variable in some way
/// and the `__slots__` variable is not assigned to a set. This is because the
/// order of the items in `__slots__` may have semantic significance if the
/// `__slots__` of a class is being iterated over, or being assigned to another
/// value.
///
/// In the vast majority of other cases, this rule's fix is unlikely to
/// cause breakage; as such, Ruff will otherwise mark this rule's fix as
@ -49,12 +82,6 @@ use crate::Locator;
/// could still be read by code outside of the module in which the
/// `__slots__` definition occurs, in which case this rule's fix could
/// theoretically cause breakage.
///
/// Additionally, note that for multiline `__slots__` definitions that
/// include comments on their own line, it can be hard to tell where the
/// comments should be moved to when sorting the contents of `__slots__`.
/// While this rule's fix will never delete a comment, it might *sometimes*
/// move a comment to an unexpected location.
#[violation]
pub struct UnsortedDunderSlots {
class_name: ast::name::Name,
@ -122,15 +149,17 @@ pub(crate) fn sort_dunder_slots(checker: &Checker, binding: &Binding) -> Option<
);
if let SortClassification::UnsortedAndMaybeFixable { items } = sort_classification {
if let Some(sorted_source_code) = display.generate_sorted_source_code(&items, checker) {
if let Some((sorted_source_code, comment_complexity)) =
display.generate_sorted_source_code(&items, checker)
{
let edit = Edit::range_replacement(sorted_source_code, display.range());
let applicability = if display.kind.is_set_literal() || binding.is_unused() {
Applicability::Safe
} else {
let applicability = if comment_complexity.is_complex()
|| (binding.is_used() && !display.kind.is_set_literal())
{
Applicability::Unsafe
} else {
Applicability::Safe
};
diagnostic.set_fix(Fix::applicable_edit(edit, applicability));
}
}
@ -219,7 +248,11 @@ impl<'a> StringLiteralDisplay<'a> {
Some(result)
}
fn generate_sorted_source_code(&self, elements: &[&str], checker: &Checker) -> Option<String> {
fn generate_sorted_source_code(
&self,
elements: &[&str],
checker: &Checker,
) -> Option<(String, CommentComplexity)> {
let locator = checker.locator();
let multiline_classification = if locator.contains_line_break(self.range()) {
@ -238,26 +271,31 @@ impl<'a> StringLiteralDisplay<'a> {
elements,
)?;
assert_eq!(analyzed_sequence.len(), self.elts.len());
Some(analyzed_sequence.into_sorted_source_code(
let comment_complexity = analyzed_sequence.comment_complexity();
let sorted_code = analyzed_sequence.into_sorted_source_code(
SORTING_STYLE,
locator,
checker.stylist(),
))
);
Some((sorted_code, comment_complexity))
}
// Sorting multiline dicts is unsupported
(DisplayKind::Dict { .. }, MultilineClassification::Multiline) => None,
(DisplayKind::Sequence(sequence_kind), MultilineClassification::SingleLine) => {
Some(sort_single_line_elements_sequence(
let sorted_code = sort_single_line_elements_sequence(
*sequence_kind,
&self.elts,
elements,
locator,
SORTING_STYLE,
))
);
Some((sorted_code, CommentComplexity::Simple))
}
(DisplayKind::Dict { items }, MultilineClassification::SingleLine) => {
let sorted_code =
sort_single_line_elements_dict(&self.elts, elements, items, locator);
Some((sorted_code, CommentComplexity::Simple))
}
(DisplayKind::Dict { items }, MultilineClassification::SingleLine) => Some(
sort_single_line_elements_dict(&self.elts, elements, items, locator),
),
}
}
}

View file

@ -1,6 +1,5 @@
---
source: crates/ruff_linter/src/rules/ruff/mod.rs
snapshot_kind: text
---
RUF022.py:5:11: RUF022 [*] `__all__` is not sorted
|
@ -267,7 +266,7 @@ RUF022.py:32:11: RUF022 [*] `__all__` is not sorted
|
= help: Apply an isort-style sorting to `__all__`
Safe fix
Unsafe fix
30 30 | ####################################
31 31 |
32 32 | __all__ = (
@ -302,7 +301,7 @@ RUF022.py:40:11: RUF022 [*] `__all__` is not sorted
|
= help: Apply an isort-style sorting to `__all__`
Safe fix
Unsafe fix
38 38 | )
39 39 |
40 40 | __all__ = [
@ -423,7 +422,7 @@ RUF022.py:91:11: RUF022 [*] `__all__` is not sorted
|
= help: Apply an isort-style sorting to `__all__`
Safe fix
Unsafe fix
88 88 | ##########################################
89 89 |
90 90 | # comment0
@ -464,7 +463,7 @@ RUF022.py:101:11: RUF022 [*] `__all__` is not sorted
|
= help: Apply an isort-style sorting to `__all__`
Safe fix
Unsafe fix
99 99 | # comment7
100 100 |
101 101 | __all__ = [ # comment0
@ -591,7 +590,7 @@ RUF022.py:125:28: RUF022 [*] `__all__` is not sorted
|
= help: Apply an isort-style sorting to `__all__`
Safe fix
Unsafe fix
123 123 | "register_error", "lookup_error"]
124 124 |
125 125 | __all__: tuple[str, ...] = ( # a comment about the opening paren
@ -658,7 +657,7 @@ RUF022.py:145:16: RUF022 [*] `__all__` is not sorted
|
= help: Apply an isort-style sorting to `__all__`
Safe fix
Unsafe fix
143 143 | )
144 144 |
145 145 | __all__.extend(( # comment0
@ -690,7 +689,7 @@ RUF022.py:155:5: RUF022 [*] `__all__` is not sorted
|
= help: Apply an isort-style sorting to `__all__`
Safe fix
Unsafe fix
153 153 | __all__.extend( # comment0
154 154 | # comment1
155 155 | ( # comment2
@ -723,7 +722,7 @@ RUF022.py:164:16: RUF022 [*] `__all__` is not sorted
|
= help: Apply an isort-style sorting to `__all__`
Safe fix
Unsafe fix
162 162 | ) # comment2
163 163 |
164 164 | __all__.extend([ # comment0
@ -755,7 +754,7 @@ RUF022.py:174:5: RUF022 [*] `__all__` is not sorted
|
= help: Apply an isort-style sorting to `__all__`
Safe fix
Unsafe fix
172 172 | __all__.extend( # comment0
173 173 | # comment1
174 174 | [ # comment2
@ -785,7 +784,7 @@ RUF022.py:183:11: RUF022 [*] `__all__` is not sorted
|
= help: Apply an isort-style sorting to `__all__`
Safe fix
Unsafe fix
180 180 | ] # comment4
181 181 | ) # comment2
182 182 |
@ -1014,7 +1013,7 @@ RUF022.py:243:11: RUF022 [*] `__all__` is not sorted
|
= help: Apply an isort-style sorting to `__all__`
Safe fix
Unsafe fix
241 241 | ]
242 242 |
243 243 | __all__ = (
@ -1056,7 +1055,7 @@ RUF022.py:253:11: RUF022 [*] `__all__` is not sorted
|
= help: Apply an isort-style sorting to `__all__`
Safe fix
Unsafe fix
251 251 | )
252 252 |
253 253 | __all__ = ( # comment about the opening paren

View file

@ -1,6 +1,5 @@
---
source: crates/ruff_linter/src/rules/ruff/mod.rs
snapshot_kind: text
---
RUF023.py:6:17: RUF023 [*] `Klass.__slots__` is not sorted
|
@ -205,7 +204,7 @@ RUF023.py:33:17: RUF023 [*] `Klass3.__slots__` is not sorted
|
= help: Apply a natural sort to `Klass3.__slots__`
Safe fix
Unsafe fix
31 31 |
32 32 | class Klass3:
33 33 | __slots__ = (
@ -240,7 +239,7 @@ RUF023.py:40:17: RUF023 [*] `Klass3.__slots__` is not sorted
|
= help: Apply a natural sort to `Klass3.__slots__`
Safe fix
Unsafe fix
38 38 | "a0"
39 39 | )
40 40 | __slots__ = [
@ -275,7 +274,7 @@ RUF023.py:54:17: RUF023 [*] `Klass4.__slots__` is not sorted
|
= help: Apply a natural sort to `Klass4.__slots__`
Safe fix
Unsafe fix
51 51 |
52 52 | class Klass4:
53 53 | # comment0
@ -316,7 +315,7 @@ RUF023.py:64:17: RUF023 [*] `Klass4.__slots__` is not sorted
|
= help: Apply a natural sort to `Klass4.__slots__`
Safe fix
Unsafe fix
62 62 | # comment7
63 63 |
64 64 | __slots__ = [ # comment0
@ -377,7 +376,7 @@ RUF023.py:75:17: RUF023 [*] `PurePath.__slots__` is not sorted
|
= help: Apply a natural sort to `PurePath.__slots__`
Safe fix
Unsafe fix
73 73 | # from cpython/Lib/pathlib/__init__.py
74 74 | class PurePath:
75 75 | __slots__ = (
@ -460,7 +459,7 @@ RUF023.py:113:17: RUF023 [*] `ArgumentDescriptor.__slots__` is not sorted
|
= help: Apply a natural sort to `ArgumentDescriptor.__slots__`
Safe fix
Unsafe fix
111 111 | # From cpython/Lib/pickletools.py
112 112 | class ArgumentDescriptor(object):
113 113 | __slots__ = (
@ -649,7 +648,7 @@ RUF023.py:181:17: RUF023 [*] `BezierBuilder4.__slots__` is not sorted
|
= help: Apply a natural sort to `BezierBuilder4.__slots__`
Safe fix
Unsafe fix
179 179 |
180 180 | class BezierBuilder4:
181 181 | __slots__ = (