[refurb] New rule to suggest min/max over sorted() (FURB192) (#10868)

## Summary

Fixes #10463

Add `FURB192` which detects violations like this:

```python
# Bad
a = sorted(l)[0]

# Good
a = min(l)
```

There is a caveat that @Skylion007 has pointed out, which is that
violations with `reverse=True` technically aren't compatible with this
change, in the edge case where the unstable behavior is intended. For
example:

```python
from operator import itemgetter
data = [('red', 1), ('blue', 1), ('red', 2), ('blue', 2)]

min(data, key=itemgetter(0))  # ('blue', 1)
sorted(data, key=itemgetter(0))[0]  # ('blue', 1)
sorted(data, key=itemgetter(0), reverse=True)[-1]  # ('blue, 2')
```

This seems like a rare edge case, but I can make the `reverse=True`
fixes unsafe if that's best.

## Test Plan

This is unit tested.

## References

https://github.com/dosisod/refurb/pull/333/files

---------

Co-authored-by: Charlie Marsh <charlie.r.marsh@gmail.com>
This commit is contained in:
Ottavio Hartman 2024-04-22 21:13:57 -04:00 committed by GitHub
parent 925c7f8dd3
commit 111bbc61f6
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 485 additions and 0 deletions

View file

@ -0,0 +1,48 @@
# Errors
sorted(l)[0]
sorted(l)[-1]
sorted(l, reverse=False)[-1]
sorted(l, key=lambda x: x)[0]
sorted(l, key=key_fn)[0]
sorted([1, 2, 3])[0]
# Unsafe
sorted(l, key=key_fn, reverse=True)[-1]
sorted(l, reverse=True)[0]
sorted(l, reverse=True)[-1]
# Non-errors
sorted(l, reverse=foo())[0]
sorted(l)[1]
sorted(get_list())[1]
sorted()[0]
sorted(l)[1]
sorted(l)[-2]
b = True
sorted(l, reverse=b)[0]
sorted(l, invalid_kwarg=True)[0]
def sorted():
pass
sorted(l)[0]

View file

@ -125,6 +125,9 @@ pub(crate) fn expression(expr: &Expr, checker: &mut Checker) {
if checker.enabled(Rule::PotentialIndexError) {
pylint::rules::potential_index_error(checker, value, slice);
}
if checker.enabled(Rule::SortedMinMax) {
refurb::rules::sorted_min_max(checker, subscript);
}
pandas_vet::rules::subscript(checker, value, expr);
}

View file

@ -1075,6 +1075,7 @@ pub fn code_to_rule(linter: Linter, code: &str) -> Option<(RuleGroup, Rule)> {
(Refurb, "180") => (RuleGroup::Preview, rules::refurb::rules::MetaClassABCMeta),
(Refurb, "181") => (RuleGroup::Preview, rules::refurb::rules::HashlibDigestHex),
(Refurb, "187") => (RuleGroup::Preview, rules::refurb::rules::ListReverseCopy),
(Refurb, "192") => (RuleGroup::Preview, rules::refurb::rules::SortedMinMax),
// flake8-logging
(Flake8Logging, "001") => (RuleGroup::Stable, rules::flake8_logging::rules::DirectLoggerInstantiation),

View file

@ -42,6 +42,7 @@ mod tests {
#[test_case(Rule::HashlibDigestHex, Path::new("FURB181.py"))]
#[test_case(Rule::ListReverseCopy, Path::new("FURB187.py"))]
#[test_case(Rule::WriteWholeFile, Path::new("FURB103.py"))]
#[test_case(Rule::SortedMinMax, Path::new("FURB192.py"))]
fn rules(rule_code: Rule, path: &Path) -> Result<()> {
let snapshot = format!("{}_{}", rule_code.noqa_code(), path.to_string_lossy());
let diagnostics = test_path(

View file

@ -21,6 +21,7 @@ pub(crate) use reimplemented_starmap::*;
pub(crate) use repeated_append::*;
pub(crate) use single_item_membership_test::*;
pub(crate) use slice_copy::*;
pub(crate) use sorted_min_max::*;
pub(crate) use type_none_comparison::*;
pub(crate) use unnecessary_enumerate::*;
pub(crate) use unnecessary_from_float::*;
@ -50,6 +51,7 @@ mod reimplemented_starmap;
mod repeated_append;
mod single_item_membership_test;
mod slice_copy;
mod sorted_min_max;
mod type_none_comparison;
mod unnecessary_enumerate;
mod unnecessary_from_float;

View file

@ -0,0 +1,238 @@
use ruff_diagnostics::Diagnostic;
use ruff_diagnostics::Edit;
use ruff_diagnostics::Fix;
use ruff_diagnostics::FixAvailability;
use ruff_diagnostics::Violation;
use ruff_macros::{derive_message_formats, violation};
use ruff_python_ast::Number;
use ruff_python_ast::{self as ast, Expr};
use ruff_text_size::Ranged;
use crate::checkers::ast::Checker;
/// ## What it does
/// Checks for uses of `sorted()` to retrieve the minimum or maximum value in
/// a sequence.
///
/// ## Why is this bad?
/// Using `sorted()` to compute the minimum or maximum value in a sequence is
/// inefficient and less readable than using `min()` or `max()` directly.
///
/// ## Example
/// ```python
/// nums = [3, 1, 4, 1, 5]
/// lowest = sorted(nums)[0]
/// highest = sorted(nums)[-1]
/// highest = sorted(nums, reverse=True)[0]
/// ```
///
/// Use instead:
/// ```python
/// nums = [3, 1, 4, 1, 5]
/// lowest = min(nums)
/// highest = max(nums)
/// ```
///
/// ## Fix safety
/// In some cases, migrating to `min` or `max` can lead to a change in behavior,
/// notably when breaking ties.
///
/// As an example, `sorted(data, key=itemgetter(0), reverse=True)[0]` will return
/// the _last_ "minimum" element in the list, if there are multiple elements with
/// the same key. However, `min(data, key=itemgetter(0))` will return the _first_
/// "minimum" element in the list in the same scenario.
///
/// AS such, this rule's fix is marked as unsafe when the `reverse` keyword is used.
///
/// ## References
/// - [Python documentation: `min`](https://docs.python.org/3/library/functions.html#min)
/// - [Python documentation: `max`](https://docs.python.org/3/library/functions.html#max)
#[violation]
pub struct SortedMinMax {
min_max: MinMax,
}
impl Violation for SortedMinMax {
const FIX_AVAILABILITY: FixAvailability = FixAvailability::Sometimes;
#[derive_message_formats]
fn message(&self) -> String {
let Self { min_max } = self;
match min_max {
MinMax::Min => {
format!("Prefer `min` over `sorted()` to compute the minimum value in a sequence")
}
MinMax::Max => {
format!("Prefer `max` over `sorted()` to compute the maximum value in a sequence")
}
}
}
fn fix_title(&self) -> Option<String> {
let Self { min_max } = self;
match min_max {
MinMax::Min => Some("Replace with `min`".to_string()),
MinMax::Max => Some("Replace with `max`".to_string()),
}
}
}
/// FURB192
pub(crate) fn sorted_min_max(checker: &mut Checker, subscript: &ast::ExprSubscript) {
if subscript.ctx.is_store() || subscript.ctx.is_del() {
return;
}
let ast::ExprSubscript { slice, value, .. } = &subscript;
// Early return if index is not unary or a number literal.
if !(slice.is_number_literal_expr() || slice.is_unary_op_expr()) {
return;
}
// Early return if the value is not a call expression.
let Expr::Call(ast::ExprCall {
func, arguments, ..
}) = value.as_ref()
else {
return;
};
// Check if the index is either 0 or -1.
let index = match slice.as_ref() {
// [0]
Expr::NumberLiteral(ast::ExprNumberLiteral {
value: Number::Int(index),
..
}) if *index == 0 => Index::First,
// [-1]
Expr::UnaryOp(ast::ExprUnaryOp {
op: ast::UnaryOp::USub,
operand,
..
}) => {
match operand.as_ref() {
// [-1]
Expr::NumberLiteral(ast::ExprNumberLiteral {
value: Number::Int(index),
..
}) if *index == 1 => Index::Last,
_ => return,
}
}
_ => return,
};
// Check if the value is a call to `sorted()`.
if !checker.semantic().match_builtin_expr(func, "sorted") {
return;
}
// Check if the call to `sorted()` has a single argument.
let [list_expr] = arguments.args.as_ref() else {
return;
};
let mut reverse_keyword = None;
let mut key_keyword_expr = None;
// Check if the call to `sorted()` has the `reverse` and `key` keywords.
for keyword in arguments.keywords.iter() {
// If the call contains `**kwargs`, return.
let Some(arg) = keyword.arg.as_ref() else {
return;
};
match arg.as_str() {
"reverse" => {
reverse_keyword = Some(keyword);
}
"key" => {
key_keyword_expr = Some(keyword);
}
_ => {
// If unexpected keyword is found, return.
return;
}
}
}
let is_reversed = if let Some(reverse_keyword) = reverse_keyword {
match reverse_keyword.value.as_boolean_literal_expr() {
Some(ast::ExprBooleanLiteral { value, .. }) => *value,
// If the value is not a boolean literal, we can't determine if it is reversed.
_ => return,
}
} else {
// No reverse keyword, so it is not reversed.
false
};
// Determine whether the operation is computing a minimum or maximum value.
let min_max = match (index, is_reversed) {
(Index::First, false) => MinMax::Min,
(Index::First, true) => MinMax::Max,
(Index::Last, false) => MinMax::Max,
(Index::Last, true) => MinMax::Min,
};
let mut diagnostic = Diagnostic::new(SortedMinMax { min_max }, subscript.range());
if checker.semantic().has_builtin_binding(min_max.as_str()) {
diagnostic.set_fix({
let replacement = if let Some(key) = key_keyword_expr {
format!(
"{min_max}({}, {})",
checker.locator().slice(list_expr),
checker.locator().slice(key),
)
} else {
format!("{min_max}({})", checker.locator().slice(list_expr))
};
let replacement = Edit::range_replacement(replacement, subscript.range());
if is_reversed {
Fix::unsafe_edit(replacement)
} else {
Fix::safe_edit(replacement)
}
});
}
checker.diagnostics.push(diagnostic);
}
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
enum MinMax {
/// E.g., `min(nums)`
Min,
/// E.g., `max(nums)`
Max,
}
impl MinMax {
fn as_str(self) -> &'static str {
match self {
Self::Min => "min",
Self::Max => "max",
}
}
}
impl std::fmt::Display for MinMax {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Min => write!(f, "min"),
Self::Max => write!(f, "max"),
}
}
}
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
enum Index {
/// E.g., `sorted(nums)[0]`
First,
/// E.g., `sorted(nums)[-1]`
Last,
}

View file

@ -0,0 +1,190 @@
---
source: crates/ruff_linter/src/rules/refurb/mod.rs
---
FURB192.py:3:1: FURB192 [*] Prefer `min` over `sorted()` to compute the minimum value in a sequence
|
1 | # Errors
2 |
3 | sorted(l)[0]
| ^^^^^^^^^^^^ FURB192
4 |
5 | sorted(l)[-1]
|
= help: Replace with `min`
Safe fix
1 1 | # Errors
2 2 |
3 |-sorted(l)[0]
3 |+min(l)
4 4 |
5 5 | sorted(l)[-1]
6 6 |
FURB192.py:5:1: FURB192 [*] Prefer `max` over `sorted()` to compute the maximum value in a sequence
|
3 | sorted(l)[0]
4 |
5 | sorted(l)[-1]
| ^^^^^^^^^^^^^ FURB192
6 |
7 | sorted(l, reverse=False)[-1]
|
= help: Replace with `max`
Safe fix
2 2 |
3 3 | sorted(l)[0]
4 4 |
5 |-sorted(l)[-1]
5 |+max(l)
6 6 |
7 7 | sorted(l, reverse=False)[-1]
8 8 |
FURB192.py:7:1: FURB192 [*] Prefer `max` over `sorted()` to compute the maximum value in a sequence
|
5 | sorted(l)[-1]
6 |
7 | sorted(l, reverse=False)[-1]
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ FURB192
8 |
9 | sorted(l, key=lambda x: x)[0]
|
= help: Replace with `max`
Safe fix
4 4 |
5 5 | sorted(l)[-1]
6 6 |
7 |-sorted(l, reverse=False)[-1]
7 |+max(l)
8 8 |
9 9 | sorted(l, key=lambda x: x)[0]
10 10 |
FURB192.py:9:1: FURB192 [*] Prefer `min` over `sorted()` to compute the minimum value in a sequence
|
7 | sorted(l, reverse=False)[-1]
8 |
9 | sorted(l, key=lambda x: x)[0]
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ FURB192
10 |
11 | sorted(l, key=key_fn)[0]
|
= help: Replace with `min`
Safe fix
6 6 |
7 7 | sorted(l, reverse=False)[-1]
8 8 |
9 |-sorted(l, key=lambda x: x)[0]
9 |+min(l, key=lambda x: x)
10 10 |
11 11 | sorted(l, key=key_fn)[0]
12 12 |
FURB192.py:11:1: FURB192 [*] Prefer `min` over `sorted()` to compute the minimum value in a sequence
|
9 | sorted(l, key=lambda x: x)[0]
10 |
11 | sorted(l, key=key_fn)[0]
| ^^^^^^^^^^^^^^^^^^^^^^^^ FURB192
12 |
13 | sorted([1, 2, 3])[0]
|
= help: Replace with `min`
Safe fix
8 8 |
9 9 | sorted(l, key=lambda x: x)[0]
10 10 |
11 |-sorted(l, key=key_fn)[0]
11 |+min(l, key=key_fn)
12 12 |
13 13 | sorted([1, 2, 3])[0]
14 14 |
FURB192.py:13:1: FURB192 [*] Prefer `min` over `sorted()` to compute the minimum value in a sequence
|
11 | sorted(l, key=key_fn)[0]
12 |
13 | sorted([1, 2, 3])[0]
| ^^^^^^^^^^^^^^^^^^^^ FURB192
14 |
15 | # Unsafe
|
= help: Replace with `min`
Safe fix
10 10 |
11 11 | sorted(l, key=key_fn)[0]
12 12 |
13 |-sorted([1, 2, 3])[0]
13 |+min([1, 2, 3])
14 14 |
15 15 | # Unsafe
16 16 |
FURB192.py:17:1: FURB192 [*] Prefer `min` over `sorted()` to compute the minimum value in a sequence
|
15 | # Unsafe
16 |
17 | sorted(l, key=key_fn, reverse=True)[-1]
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ FURB192
18 |
19 | sorted(l, reverse=True)[0]
|
= help: Replace with `min`
Unsafe fix
14 14 |
15 15 | # Unsafe
16 16 |
17 |-sorted(l, key=key_fn, reverse=True)[-1]
17 |+min(l, key=key_fn)
18 18 |
19 19 | sorted(l, reverse=True)[0]
20 20 |
FURB192.py:19:1: FURB192 [*] Prefer `max` over `sorted()` to compute the maximum value in a sequence
|
17 | sorted(l, key=key_fn, reverse=True)[-1]
18 |
19 | sorted(l, reverse=True)[0]
| ^^^^^^^^^^^^^^^^^^^^^^^^^^ FURB192
20 |
21 | sorted(l, reverse=True)[-1]
|
= help: Replace with `max`
Unsafe fix
16 16 |
17 17 | sorted(l, key=key_fn, reverse=True)[-1]
18 18 |
19 |-sorted(l, reverse=True)[0]
19 |+max(l)
20 20 |
21 21 | sorted(l, reverse=True)[-1]
22 22 |
FURB192.py:21:1: FURB192 [*] Prefer `min` over `sorted()` to compute the minimum value in a sequence
|
19 | sorted(l, reverse=True)[0]
20 |
21 | sorted(l, reverse=True)[-1]
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^ FURB192
22 |
23 | # Non-errors
|
= help: Replace with `min`
Unsafe fix
18 18 |
19 19 | sorted(l, reverse=True)[0]
20 20 |
21 |-sorted(l, reverse=True)[-1]
21 |+min(l)
22 22 |
23 23 | # Non-errors
24 24 |

2
ruff.schema.json generated
View file

@ -3077,6 +3077,8 @@
"FURB180",
"FURB181",
"FURB187",
"FURB19",
"FURB192",
"G",
"G0",
"G00",