Implement UnnecessaryListAllocationForFirstElement (#5549)

## Summary

Fixes #5503. Ready for final review as the `mkdocs` issue involving SSH
keys is fixed.

Note that this will only throw on a `Name` - it will be refactorable
once we have a type-checker. This means that this is the only sort of
input that will throw.
```python
x = range(10)
list(x)[0]
```

I thought it'd be confusing if we supported direct function results.
Consider this example, assuming we support direct results:
```python
# throws
list(range(10))[0]

def createRange(bound):
    return range(bound)

# "why doesn't this throw, but a direct `range(10)` call does?"
list(createRange(10))[0]
```
If it's necessary, I can go through the list of built-ins and find those
which produce iterables, then add them to the throwing list.

## Test Plan

Added a new fixture, then ran `cargo t`
This commit is contained in:
Evan Rittenhouse 2023-07-10 11:32:41 -05:00 committed by GitHub
parent 3562d809b2
commit 28fe2d334a
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
8 changed files with 630 additions and 1 deletions

View file

@ -0,0 +1,44 @@
x = range(10)
# RUF015
list(x)[0]
list(x)[:1]
list(x)[:1:1]
list(x)[:1:2]
tuple(x)[0]
tuple(x)[:1]
tuple(x)[:1:1]
tuple(x)[:1:2]
list(i for i in x)[0]
list(i for i in x)[:1]
list(i for i in x)[:1:1]
list(i for i in x)[:1:2]
[i for i in x][0]
[i for i in x][:1]
[i for i in x][:1:1]
[i for i in x][:1:2]
# OK (not indexing (solely) the first element)
list(x)
list(x)[1]
list(x)[-1]
list(x)[1:]
list(x)[:3:2]
list(x)[::2]
list(x)[::]
[i for i in x]
[i for i in x][1]
[i for i in x][-1]
[i for i in x][1:]
[i for i in x][:3:2]
[i for i in x][::2]
[i for i in x][::]
# OK (doesn't mirror the underlying list)
[i + 1 for i in x][0]
[i for i in x if i > 5][0]
[(i, i + 1) for i in x][0]
# OK (multiple generators)
y = range(10)
[i + j for i in x for j in y][0]

View file

@ -2142,7 +2142,7 @@ where
// Pre-visit.
match expr {
Expr::Subscript(ast::ExprSubscript { value, slice, .. }) => {
subscript @ Expr::Subscript(ast::ExprSubscript { value, slice, .. }) => {
// Ex) Optional[...], Union[...]
if self.any_enabled(&[
Rule::FutureRewritableTypeAnnotation,
@ -2225,6 +2225,9 @@ where
if self.enabled(Rule::UncapitalizedEnvironmentVariables) {
flake8_simplify::rules::use_capital_environment_variables(self, expr);
}
if self.enabled(Rule::UnnecessaryIterableAllocationForFirstElement) {
ruff::rules::unnecessary_iterable_allocation_for_first_element(self, subscript);
}
pandas_vet::rules::subscript(self, value, expr);
}

View file

@ -779,6 +779,7 @@ pub fn code_to_rule(linter: Linter, code: &str) -> Option<(RuleGroup, Rule)> {
(Ruff, "013") => (RuleGroup::Unspecified, rules::ruff::rules::ImplicitOptional),
#[cfg(feature = "unreachable-code")]
(Ruff, "014") => (RuleGroup::Nursery, rules::ruff::rules::UnreachableCode),
(Ruff, "015") => (RuleGroup::Unspecified, rules::ruff::rules::UnnecessaryIterableAllocationForFirstElement),
(Ruff, "100") => (RuleGroup::Unspecified, rules::ruff::rules::UnusedNOQA),
(Ruff, "200") => (RuleGroup::Unspecified, rules::ruff::rules::InvalidPyprojectToml),

View file

@ -30,6 +30,10 @@ mod tests {
#[test_case(Rule::MutableDataclassDefault, Path::new("RUF008.py"))]
#[test_case(Rule::PairwiseOverZipped, Path::new("RUF007.py"))]
#[test_case(Rule::StaticKeyDictComprehension, Path::new("RUF011.py"))]
#[test_case(
Rule::UnnecessaryIterableAllocationForFirstElement,
Path::new("RUF015.py")
)]
#[cfg_attr(
feature = "unreachable-code",
test_case(Rule::UnreachableCode, Path::new("RUF014.py"))

View file

@ -9,6 +9,7 @@ pub(crate) use mutable_class_default::*;
pub(crate) use mutable_dataclass_default::*;
pub(crate) use pairwise_over_zipped::*;
pub(crate) use static_key_dict_comprehension::*;
pub(crate) use unnecessary_iterable_allocation_for_first_element::*;
#[cfg(feature = "unreachable-code")]
pub(crate) use unreachable::*;
pub(crate) use unused_noqa::*;
@ -26,6 +27,7 @@ mod mutable_class_default;
mod mutable_dataclass_default;
mod pairwise_over_zipped;
mod static_key_dict_comprehension;
mod unnecessary_iterable_allocation_for_first_element;
#[cfg(feature = "unreachable-code")]
pub(crate) mod unreachable;
mod unused_noqa;

View file

@ -0,0 +1,236 @@
use num_bigint::BigInt;
use num_traits::{One, Zero};
use rustpython_parser::ast::{self, Comprehension, Constant, Expr};
use ruff_diagnostics::{AlwaysAutofixableViolation, Diagnostic, Edit, Fix};
use ruff_macros::{derive_message_formats, violation};
use ruff_python_semantic::SemanticModel;
use crate::checkers::ast::Checker;
use crate::registry::AsRule;
/// ## What it does
/// Checks for uses of `list(...)[0]` that can be replaced with
/// `next(iter(...))`.
///
/// ## Why is this bad?
/// Calling `list(...)` will create a new list of the entire collection, which
/// can be very expensive for large collections. If you only need the first
/// element of the collection, you can use `next(iter(...))` to lazily fetch
/// the first element without creating a new list.
///
/// Note that migrating from `list(...)[0]` to `next(iter(...))` can change
/// the behavior of your program in two ways:
///
/// 1. First, `list(...)` will eagerly evaluate the entire collection, while
/// `next(iter(...))` will only evaluate the first element. As such, any
/// side effects that occur during iteration will be delayed.
/// 2. Second, `list(...)[0]` will raise `IndexError` if the collection is
/// empty, while `next(iter(...))` will raise `StopIteration`.
///
/// ## Example
/// ```python
/// head = list(range(1000000000000))[0]
/// ```
///
/// Use instead:
/// ```python
/// head = next(iter(range(1000000000000)))
/// ```
///
/// ## References
/// - [Iterators and Iterables in Python: Run Efficient Iterations](https://realpython.com/python-iterators-iterables/#when-to-use-an-iterator-in-python)
#[violation]
pub(crate) struct UnnecessaryIterableAllocationForFirstElement {
iterable: String,
subscript_kind: HeadSubscriptKind,
}
impl AlwaysAutofixableViolation for UnnecessaryIterableAllocationForFirstElement {
#[derive_message_formats]
fn message(&self) -> String {
let UnnecessaryIterableAllocationForFirstElement {
iterable,
subscript_kind,
} = self;
match subscript_kind {
HeadSubscriptKind::Index => {
format!("Prefer `next(iter({iterable}))` over `list({iterable})[0]`")
}
HeadSubscriptKind::Slice => {
format!("Prefer `[next(iter({iterable}))]` over `list({iterable})[:1]`")
}
}
}
fn autofix_title(&self) -> String {
let UnnecessaryIterableAllocationForFirstElement {
iterable,
subscript_kind,
} = self;
match subscript_kind {
HeadSubscriptKind::Index => format!("Replace with `next(iter({iterable}))`"),
HeadSubscriptKind::Slice => format!("Replace with `[next(iter({iterable}))]"),
}
}
}
/// RUF015
pub(crate) fn unnecessary_iterable_allocation_for_first_element(
checker: &mut Checker,
subscript: &Expr,
) {
let Expr::Subscript(ast::ExprSubscript {
value,
slice,
range,
..
}) = subscript
else {
return;
};
let Some(subscript_kind) = classify_subscript(slice) else {
return;
};
let Some(iterable) = iterable_name(value, checker.semantic()) else {
return;
};
let mut diagnostic = Diagnostic::new(
UnnecessaryIterableAllocationForFirstElement {
iterable: iterable.to_string(),
subscript_kind,
},
*range,
);
if checker.patch(diagnostic.kind.rule()) {
let replacement = match subscript_kind {
HeadSubscriptKind::Index => format!("next(iter({iterable}))"),
HeadSubscriptKind::Slice => format!("[next(iter({iterable}))]"),
};
diagnostic.set_fix(Fix::suggested(Edit::range_replacement(replacement, *range)));
}
checker.diagnostics.push(diagnostic);
}
/// A subscript slice that represents the first element of a list.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum HeadSubscriptKind {
/// The subscript is an index (e.g., `[0]`).
Index,
/// The subscript is a slice (e.g., `[:1]`).
Slice,
}
/// Check that the slice [`Expr`] is functionally equivalent to slicing into the first element. The
/// first `bool` checks that the element is in fact first, the second checks if it's a slice or an
/// index.
fn classify_subscript(expr: &Expr) -> Option<HeadSubscriptKind> {
match expr {
Expr::Constant(ast::ExprConstant {
value: Constant::Int(value),
..
}) if value.is_zero() => Some(HeadSubscriptKind::Index),
Expr::Slice(ast::ExprSlice {
step, lower, upper, ..
}) => {
// Avoid, e.g., `list(...)[:2]`
let upper = upper.as_ref()?;
let upper = as_int(upper)?;
if !upper.is_one() {
return None;
}
// Avoid, e.g., `list(...)[2:]`.
if let Some(lower) = lower.as_ref() {
let lower = as_int(lower)?;
if !lower.is_zero() {
return None;
}
}
// Avoid, e.g., `list(...)[::-1]`
if let Some(step) = step.as_ref() {
let step = as_int(step)?;
if step < upper {
return None;
}
}
Some(HeadSubscriptKind::Slice)
}
_ => None,
}
}
/// Fetch the name of the iterable from an expression if the expression returns an unmodified list
/// which can be sliced into.
fn iterable_name<'a>(expr: &'a Expr, model: &SemanticModel) -> Option<&'a str> {
match expr {
Expr::Call(ast::ExprCall { func, args, .. }) => {
let ast::ExprName { id, .. } = func.as_name_expr()?;
if !matches!(id.as_str(), "tuple" | "list") {
return None;
}
if !model.is_builtin(id.as_str()) {
return None;
}
match args.first() {
Some(Expr::Name(ast::ExprName { id: arg_name, .. })) => Some(arg_name.as_str()),
Some(Expr::GeneratorExp(ast::ExprGeneratorExp {
elt, generators, ..
})) => generator_iterable(elt, generators),
_ => None,
}
}
Expr::ListComp(ast::ExprListComp {
elt, generators, ..
}) => generator_iterable(elt, generators),
_ => None,
}
}
/// Given a comprehension, returns the name of the iterable over which it iterates, if it's
/// a simple comprehension (e.g., `x` for `[i for i in x]`).
fn generator_iterable<'a>(elt: &'a Expr, generators: &'a Vec<Comprehension>) -> Option<&'a str> {
// If the `elt` field is anything other than a [`Expr::Name`], we can't be sure that it
// doesn't modify the elements of the underlying iterator (e.g., `[i + 1 for i in x][0]`).
if !elt.is_name_expr() {
return None;
}
// If there's more than 1 generator, we can't safely say that it fits the diagnostic conditions
// (e.g., `[(i, j) for i in x for j in y][0]`).
let [generator] = generators.as_slice() else {
return None;
};
// Ignore if there's an `if` statement in the comprehension, since it filters the list.
if !generator.ifs.is_empty() {
return None;
}
let ast::ExprName { id, .. } = generator.iter.as_name_expr()?;
Some(id.as_str())
}
/// If an expression is a constant integer, returns the value of that integer; otherwise,
/// returns `None`.
fn as_int(expr: &Expr) -> Option<&BigInt> {
if let Expr::Constant(ast::ExprConstant {
value: Constant::Int(value),
..
}) = expr
{
Some(value)
} else {
None
}
}

View file

@ -0,0 +1,338 @@
---
source: crates/ruff/src/rules/ruff/mod.rs
---
RUF015.py:4:1: RUF015 [*] Prefer `next(iter(x))` over `list(x)[0]`
|
3 | # RUF015
4 | list(x)[0]
| ^^^^^^^^^^ RUF015
5 | list(x)[:1]
6 | list(x)[:1:1]
|
= help: Replace with `next(iter(x))`
Suggested fix
1 1 | x = range(10)
2 2 |
3 3 | # RUF015
4 |-list(x)[0]
4 |+next(iter(x))
5 5 | list(x)[:1]
6 6 | list(x)[:1:1]
7 7 | list(x)[:1:2]
RUF015.py:5:1: RUF015 [*] Prefer `[next(iter(x))]` over `list(x)[:1]`
|
3 | # RUF015
4 | list(x)[0]
5 | list(x)[:1]
| ^^^^^^^^^^^ RUF015
6 | list(x)[:1:1]
7 | list(x)[:1:2]
|
= help: Replace with `[next(iter(x))]
Suggested fix
2 2 |
3 3 | # RUF015
4 4 | list(x)[0]
5 |-list(x)[:1]
5 |+[next(iter(x))]
6 6 | list(x)[:1:1]
7 7 | list(x)[:1:2]
8 8 | tuple(x)[0]
RUF015.py:6:1: RUF015 [*] Prefer `[next(iter(x))]` over `list(x)[:1]`
|
4 | list(x)[0]
5 | list(x)[:1]
6 | list(x)[:1:1]
| ^^^^^^^^^^^^^ RUF015
7 | list(x)[:1:2]
8 | tuple(x)[0]
|
= help: Replace with `[next(iter(x))]
Suggested fix
3 3 | # RUF015
4 4 | list(x)[0]
5 5 | list(x)[:1]
6 |-list(x)[:1:1]
6 |+[next(iter(x))]
7 7 | list(x)[:1:2]
8 8 | tuple(x)[0]
9 9 | tuple(x)[:1]
RUF015.py:7:1: RUF015 [*] Prefer `[next(iter(x))]` over `list(x)[:1]`
|
5 | list(x)[:1]
6 | list(x)[:1:1]
7 | list(x)[:1:2]
| ^^^^^^^^^^^^^ RUF015
8 | tuple(x)[0]
9 | tuple(x)[:1]
|
= help: Replace with `[next(iter(x))]
Suggested fix
4 4 | list(x)[0]
5 5 | list(x)[:1]
6 6 | list(x)[:1:1]
7 |-list(x)[:1:2]
7 |+[next(iter(x))]
8 8 | tuple(x)[0]
9 9 | tuple(x)[:1]
10 10 | tuple(x)[:1:1]
RUF015.py:8:1: RUF015 [*] Prefer `next(iter(x))` over `list(x)[0]`
|
6 | list(x)[:1:1]
7 | list(x)[:1:2]
8 | tuple(x)[0]
| ^^^^^^^^^^^ RUF015
9 | tuple(x)[:1]
10 | tuple(x)[:1:1]
|
= help: Replace with `next(iter(x))`
Suggested fix
5 5 | list(x)[:1]
6 6 | list(x)[:1:1]
7 7 | list(x)[:1:2]
8 |-tuple(x)[0]
8 |+next(iter(x))
9 9 | tuple(x)[:1]
10 10 | tuple(x)[:1:1]
11 11 | tuple(x)[:1:2]
RUF015.py:9:1: RUF015 [*] Prefer `[next(iter(x))]` over `list(x)[:1]`
|
7 | list(x)[:1:2]
8 | tuple(x)[0]
9 | tuple(x)[:1]
| ^^^^^^^^^^^^ RUF015
10 | tuple(x)[:1:1]
11 | tuple(x)[:1:2]
|
= help: Replace with `[next(iter(x))]
Suggested fix
6 6 | list(x)[:1:1]
7 7 | list(x)[:1:2]
8 8 | tuple(x)[0]
9 |-tuple(x)[:1]
9 |+[next(iter(x))]
10 10 | tuple(x)[:1:1]
11 11 | tuple(x)[:1:2]
12 12 | list(i for i in x)[0]
RUF015.py:10:1: RUF015 [*] Prefer `[next(iter(x))]` over `list(x)[:1]`
|
8 | tuple(x)[0]
9 | tuple(x)[:1]
10 | tuple(x)[:1:1]
| ^^^^^^^^^^^^^^ RUF015
11 | tuple(x)[:1:2]
12 | list(i for i in x)[0]
|
= help: Replace with `[next(iter(x))]
Suggested fix
7 7 | list(x)[:1:2]
8 8 | tuple(x)[0]
9 9 | tuple(x)[:1]
10 |-tuple(x)[:1:1]
10 |+[next(iter(x))]
11 11 | tuple(x)[:1:2]
12 12 | list(i for i in x)[0]
13 13 | list(i for i in x)[:1]
RUF015.py:11:1: RUF015 [*] Prefer `[next(iter(x))]` over `list(x)[:1]`
|
9 | tuple(x)[:1]
10 | tuple(x)[:1:1]
11 | tuple(x)[:1:2]
| ^^^^^^^^^^^^^^ RUF015
12 | list(i for i in x)[0]
13 | list(i for i in x)[:1]
|
= help: Replace with `[next(iter(x))]
Suggested fix
8 8 | tuple(x)[0]
9 9 | tuple(x)[:1]
10 10 | tuple(x)[:1:1]
11 |-tuple(x)[:1:2]
11 |+[next(iter(x))]
12 12 | list(i for i in x)[0]
13 13 | list(i for i in x)[:1]
14 14 | list(i for i in x)[:1:1]
RUF015.py:12:1: RUF015 [*] Prefer `next(iter(x))` over `list(x)[0]`
|
10 | tuple(x)[:1:1]
11 | tuple(x)[:1:2]
12 | list(i for i in x)[0]
| ^^^^^^^^^^^^^^^^^^^^^ RUF015
13 | list(i for i in x)[:1]
14 | list(i for i in x)[:1:1]
|
= help: Replace with `next(iter(x))`
Suggested fix
9 9 | tuple(x)[:1]
10 10 | tuple(x)[:1:1]
11 11 | tuple(x)[:1:2]
12 |-list(i for i in x)[0]
12 |+next(iter(x))
13 13 | list(i for i in x)[:1]
14 14 | list(i for i in x)[:1:1]
15 15 | list(i for i in x)[:1:2]
RUF015.py:13:1: RUF015 [*] Prefer `[next(iter(x))]` over `list(x)[:1]`
|
11 | tuple(x)[:1:2]
12 | list(i for i in x)[0]
13 | list(i for i in x)[:1]
| ^^^^^^^^^^^^^^^^^^^^^^ RUF015
14 | list(i for i in x)[:1:1]
15 | list(i for i in x)[:1:2]
|
= help: Replace with `[next(iter(x))]
Suggested fix
10 10 | tuple(x)[:1:1]
11 11 | tuple(x)[:1:2]
12 12 | list(i for i in x)[0]
13 |-list(i for i in x)[:1]
13 |+[next(iter(x))]
14 14 | list(i for i in x)[:1:1]
15 15 | list(i for i in x)[:1:2]
16 16 | [i for i in x][0]
RUF015.py:14:1: RUF015 [*] Prefer `[next(iter(x))]` over `list(x)[:1]`
|
12 | list(i for i in x)[0]
13 | list(i for i in x)[:1]
14 | list(i for i in x)[:1:1]
| ^^^^^^^^^^^^^^^^^^^^^^^^ RUF015
15 | list(i for i in x)[:1:2]
16 | [i for i in x][0]
|
= help: Replace with `[next(iter(x))]
Suggested fix
11 11 | tuple(x)[:1:2]
12 12 | list(i for i in x)[0]
13 13 | list(i for i in x)[:1]
14 |-list(i for i in x)[:1:1]
14 |+[next(iter(x))]
15 15 | list(i for i in x)[:1:2]
16 16 | [i for i in x][0]
17 17 | [i for i in x][:1]
RUF015.py:15:1: RUF015 [*] Prefer `[next(iter(x))]` over `list(x)[:1]`
|
13 | list(i for i in x)[:1]
14 | list(i for i in x)[:1:1]
15 | list(i for i in x)[:1:2]
| ^^^^^^^^^^^^^^^^^^^^^^^^ RUF015
16 | [i for i in x][0]
17 | [i for i in x][:1]
|
= help: Replace with `[next(iter(x))]
Suggested fix
12 12 | list(i for i in x)[0]
13 13 | list(i for i in x)[:1]
14 14 | list(i for i in x)[:1:1]
15 |-list(i for i in x)[:1:2]
15 |+[next(iter(x))]
16 16 | [i for i in x][0]
17 17 | [i for i in x][:1]
18 18 | [i for i in x][:1:1]
RUF015.py:16:1: RUF015 [*] Prefer `next(iter(x))` over `list(x)[0]`
|
14 | list(i for i in x)[:1:1]
15 | list(i for i in x)[:1:2]
16 | [i for i in x][0]
| ^^^^^^^^^^^^^^^^^ RUF015
17 | [i for i in x][:1]
18 | [i for i in x][:1:1]
|
= help: Replace with `next(iter(x))`
Suggested fix
13 13 | list(i for i in x)[:1]
14 14 | list(i for i in x)[:1:1]
15 15 | list(i for i in x)[:1:2]
16 |-[i for i in x][0]
16 |+next(iter(x))
17 17 | [i for i in x][:1]
18 18 | [i for i in x][:1:1]
19 19 | [i for i in x][:1:2]
RUF015.py:17:1: RUF015 [*] Prefer `[next(iter(x))]` over `list(x)[:1]`
|
15 | list(i for i in x)[:1:2]
16 | [i for i in x][0]
17 | [i for i in x][:1]
| ^^^^^^^^^^^^^^^^^^ RUF015
18 | [i for i in x][:1:1]
19 | [i for i in x][:1:2]
|
= help: Replace with `[next(iter(x))]
Suggested fix
14 14 | list(i for i in x)[:1:1]
15 15 | list(i for i in x)[:1:2]
16 16 | [i for i in x][0]
17 |-[i for i in x][:1]
17 |+[next(iter(x))]
18 18 | [i for i in x][:1:1]
19 19 | [i for i in x][:1:2]
20 20 |
RUF015.py:18:1: RUF015 [*] Prefer `[next(iter(x))]` over `list(x)[:1]`
|
16 | [i for i in x][0]
17 | [i for i in x][:1]
18 | [i for i in x][:1:1]
| ^^^^^^^^^^^^^^^^^^^^ RUF015
19 | [i for i in x][:1:2]
|
= help: Replace with `[next(iter(x))]
Suggested fix
15 15 | list(i for i in x)[:1:2]
16 16 | [i for i in x][0]
17 17 | [i for i in x][:1]
18 |-[i for i in x][:1:1]
18 |+[next(iter(x))]
19 19 | [i for i in x][:1:2]
20 20 |
21 21 | # OK (not indexing (solely) the first element)
RUF015.py:19:1: RUF015 [*] Prefer `[next(iter(x))]` over `list(x)[:1]`
|
17 | [i for i in x][:1]
18 | [i for i in x][:1:1]
19 | [i for i in x][:1:2]
| ^^^^^^^^^^^^^^^^^^^^ RUF015
20 |
21 | # OK (not indexing (solely) the first element)
|
= help: Replace with `[next(iter(x))]
Suggested fix
16 16 | [i for i in x][0]
17 17 | [i for i in x][:1]
18 18 | [i for i in x][:1:1]
19 |-[i for i in x][:1:2]
19 |+[next(iter(x))]
20 20 |
21 21 | # OK (not indexing (solely) the first element)
22 22 | list(x)

1
ruff.schema.json generated
View file

@ -2403,6 +2403,7 @@
"RUF012",
"RUF013",
"RUF014",
"RUF015",
"RUF1",
"RUF10",
"RUF100",