Detect items that hash to same value in duplicate dictionaries (#14065)

## Summary

Closes https://github.com/astral-sh/ruff/issues/12772.
This commit is contained in:
Charlie Marsh 2024-11-03 14:16:34 -05:00 committed by GitHub
parent 71a122f060
commit f09dc8b67c
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 170 additions and 84 deletions

View file

@ -58,3 +58,13 @@ x = {
t={"x":"test123", "x":("test123")}
t={"x":("test123"), "x":"test123"}
# Regression test for: https://github.com/astral-sh/ruff/issues/12772
x = {
1: "abc",
1: "def",
True: "ghi",
0: "foo",
0: "bar",
False: "baz",
}

View file

@ -1,8 +1,9 @@
use rustc_hash::{FxBuildHasher, FxHashMap, FxHashSet};
use std::collections::hash_map::Entry;
use ruff_diagnostics::{Diagnostic, Edit, Fix, FixAvailability, Violation};
use ruff_macros::{derive_message_formats, violation};
use ruff_python_ast::comparable::ComparableExpr;
use ruff_python_ast::comparable::{ComparableExpr, HashableExpr};
use ruff_python_ast::parenthesize::parenthesized_range;
use ruff_python_ast::{self as ast, Expr};
use ruff_text_size::Ranged;
@ -44,6 +45,7 @@ use crate::registry::Rule;
#[violation]
pub struct MultiValueRepeatedKeyLiteral {
name: SourceCodeSnippet,
existing: SourceCodeSnippet,
}
impl Violation for MultiValueRepeatedKeyLiteral {
@ -51,16 +53,26 @@ impl Violation for MultiValueRepeatedKeyLiteral {
#[derive_message_formats]
fn message(&self) -> String {
let MultiValueRepeatedKeyLiteral { name } = self;
if let Some(name) = name.full_display() {
format!("Dictionary key literal `{name}` repeated")
} else {
format!("Dictionary key literal repeated")
let MultiValueRepeatedKeyLiteral { name, existing } = self;
match (name.full_display(), existing.full_display()) {
(Some(name), None) => {
format!("Dictionary key literal `{name}` repeated")
}
(Some(name), Some(existing)) => {
if name == existing {
format!("Dictionary key literal `{name}` repeated")
} else {
format!(
"Dictionary key literal `{name}` repeated (`{name}` hashes to the same value as `{existing}`)"
)
}
}
_ => format!("Dictionary key literal repeated"),
}
}
fn fix_title(&self) -> Option<String> {
let MultiValueRepeatedKeyLiteral { name } = self;
let MultiValueRepeatedKeyLiteral { name, .. } = self;
if let Some(name) = name.full_display() {
Some(format!("Remove repeated key literal `{name}`"))
} else {
@ -129,7 +141,7 @@ impl Violation for MultiValueRepeatedKeyVariable {
/// F601, F602
pub(crate) fn repeated_keys(checker: &mut Checker, dict: &ast::ExprDict) {
// Generate a map from key to (index, value).
let mut seen: FxHashMap<ComparableExpr, FxHashSet<ComparableExpr>> =
let mut seen: FxHashMap<HashableExpr, (&Expr, FxHashSet<ComparableExpr>)> =
FxHashMap::with_capacity_and_hasher(dict.len(), FxBuildHasher);
// Detect duplicate keys.
@ -138,86 +150,92 @@ pub(crate) fn repeated_keys(checker: &mut Checker, dict: &ast::ExprDict) {
continue;
};
let comparable_key = ComparableExpr::from(key);
let comparable_key = HashableExpr::from(key);
let comparable_value = ComparableExpr::from(value);
let Some(seen_values) = seen.get_mut(&comparable_key) else {
seen.insert(comparable_key, FxHashSet::from_iter([comparable_value]));
continue;
};
match key {
Expr::StringLiteral(_)
| Expr::BytesLiteral(_)
| Expr::NumberLiteral(_)
| Expr::BooleanLiteral(_)
| Expr::NoneLiteral(_)
| Expr::EllipsisLiteral(_)
| Expr::Tuple(_)
| Expr::FString(_) => {
if checker.enabled(Rule::MultiValueRepeatedKeyLiteral) {
let mut diagnostic = Diagnostic::new(
MultiValueRepeatedKeyLiteral {
name: SourceCodeSnippet::from_str(checker.locator().slice(key)),
},
key.range(),
);
if !seen_values.insert(comparable_value) {
diagnostic.set_fix(Fix::unsafe_edit(Edit::deletion(
parenthesized_range(
dict.value(i - 1).into(),
dict.into(),
checker.comment_ranges(),
checker.locator().contents(),
)
.unwrap_or_else(|| dict.value(i - 1).range())
.end(),
parenthesized_range(
dict.value(i).into(),
dict.into(),
checker.comment_ranges(),
checker.locator().contents(),
)
.unwrap_or_else(|| dict.value(i).range())
.end(),
)));
match seen.entry(comparable_key) {
Entry::Vacant(entry) => {
entry.insert((key, FxHashSet::from_iter([comparable_value])));
}
Entry::Occupied(mut entry) => {
let (seen_key, seen_values) = entry.get_mut();
match key {
Expr::StringLiteral(_)
| Expr::BytesLiteral(_)
| Expr::NumberLiteral(_)
| Expr::BooleanLiteral(_)
| Expr::NoneLiteral(_)
| Expr::EllipsisLiteral(_)
| Expr::Tuple(_)
| Expr::FString(_) => {
if checker.enabled(Rule::MultiValueRepeatedKeyLiteral) {
let mut diagnostic = Diagnostic::new(
MultiValueRepeatedKeyLiteral {
name: SourceCodeSnippet::from_str(checker.locator().slice(key)),
existing: SourceCodeSnippet::from_str(
checker.locator().slice(*seen_key),
),
},
key.range(),
);
if !seen_values.insert(comparable_value) {
diagnostic.set_fix(Fix::unsafe_edit(Edit::deletion(
parenthesized_range(
dict.value(i - 1).into(),
dict.into(),
checker.comment_ranges(),
checker.locator().contents(),
)
.unwrap_or_else(|| dict.value(i - 1).range())
.end(),
parenthesized_range(
dict.value(i).into(),
dict.into(),
checker.comment_ranges(),
checker.locator().contents(),
)
.unwrap_or_else(|| dict.value(i).range())
.end(),
)));
}
checker.diagnostics.push(diagnostic);
}
}
checker.diagnostics.push(diagnostic);
Expr::Name(_) => {
if checker.enabled(Rule::MultiValueRepeatedKeyVariable) {
let mut diagnostic = Diagnostic::new(
MultiValueRepeatedKeyVariable {
name: SourceCodeSnippet::from_str(checker.locator().slice(key)),
},
key.range(),
);
let comparable_value: ComparableExpr = dict.value(i).into();
if !seen_values.insert(comparable_value) {
diagnostic.set_fix(Fix::unsafe_edit(Edit::deletion(
parenthesized_range(
dict.value(i - 1).into(),
dict.into(),
checker.comment_ranges(),
checker.locator().contents(),
)
.unwrap_or_else(|| dict.value(i - 1).range())
.end(),
parenthesized_range(
dict.value(i).into(),
dict.into(),
checker.comment_ranges(),
checker.locator().contents(),
)
.unwrap_or_else(|| dict.value(i).range())
.end(),
)));
}
checker.diagnostics.push(diagnostic);
}
}
_ => {}
}
}
Expr::Name(_) => {
if checker.enabled(Rule::MultiValueRepeatedKeyVariable) {
let mut diagnostic = Diagnostic::new(
MultiValueRepeatedKeyVariable {
name: SourceCodeSnippet::from_str(checker.locator().slice(key)),
},
key.range(),
);
let comparable_value: ComparableExpr = dict.value(i).into();
if !seen_values.insert(comparable_value) {
diagnostic.set_fix(Fix::unsafe_edit(Edit::deletion(
parenthesized_range(
dict.value(i - 1).into(),
dict.into(),
checker.comment_ranges(),
checker.locator().contents(),
)
.unwrap_or_else(|| dict.value(i - 1).range())
.end(),
parenthesized_range(
dict.value(i).into(),
dict.into(),
checker.comment_ranges(),
checker.locator().contents(),
)
.unwrap_or_else(|| dict.value(i).range())
.end(),
)));
}
checker.diagnostics.push(diagnostic);
}
}
_ => {}
}
}
}

View file

@ -23,7 +23,18 @@ F601.py:6:5: F601 Dictionary key literal `("a", "b")` repeated
|
= help: Remove repeated key literal `("a", "b")`
F601.py:9:5: F601 Dictionary key literal `1` repeated
F601.py:8:5: F601 Dictionary key literal `1` repeated (`1` hashes to the same value as `1.0`)
|
6 | ("a", "b"): 4,
7 | 1.0: 2,
8 | 1: 0,
| ^ F601
9 | 1: 3,
10 | b"123": 1,
|
= help: Remove repeated key literal `1`
F601.py:9:5: F601 Dictionary key literal `1` repeated (`1` hashes to the same value as `1.0`)
|
7 | 1.0: 2,
8 | 1: 0,
@ -299,6 +310,7 @@ F601.py:58:19: F601 [*] Dictionary key literal `"x"` repeated
58 |+t={"x":"test123"}
59 59 |
60 60 | t={"x":("test123"), "x":"test123"}
61 61 |
F601.py:60:21: F601 [*] Dictionary key literal `"x"` repeated
|
@ -306,6 +318,8 @@ F601.py:60:21: F601 [*] Dictionary key literal `"x"` repeated
59 |
60 | t={"x":("test123"), "x":"test123"}
| ^^^ F601
61 |
62 | # Regression test for: https://github.com/astral-sh/ruff/issues/12772
|
= help: Remove repeated key literal `"x"`
@ -315,5 +329,49 @@ F601.py:60:21: F601 [*] Dictionary key literal `"x"` repeated
59 59 |
60 |-t={"x":("test123"), "x":"test123"}
60 |+t={"x":("test123")}
61 61 |
62 62 | # Regression test for: https://github.com/astral-sh/ruff/issues/12772
63 63 | x = {
F601.py:65:5: F601 Dictionary key literal `1` repeated
|
63 | x = {
64 | 1: "abc",
65 | 1: "def",
| ^ F601
66 | True: "ghi",
67 | 0: "foo",
|
= help: Remove repeated key literal `1`
F601.py:66:5: F601 Dictionary key literal `True` repeated (`True` hashes to the same value as `1`)
|
64 | 1: "abc",
65 | 1: "def",
66 | True: "ghi",
| ^^^^ F601
67 | 0: "foo",
68 | 0: "bar",
|
= help: Remove repeated key literal `True`
F601.py:68:5: F601 Dictionary key literal `0` repeated
|
66 | True: "ghi",
67 | 0: "foo",
68 | 0: "bar",
| ^ F601
69 | False: "baz",
70 | }
|
= help: Remove repeated key literal `0`
F601.py:69:5: F601 Dictionary key literal `False` repeated (`False` hashes to the same value as `0`)
|
67 | 0: "foo",
68 | 0: "bar",
69 | False: "baz",
| ^^^^^ F601
70 | }
|
= help: Remove repeated key literal `False`