mirror of
https://github.com/astral-sh/ruff.git
synced 2025-08-19 01:50:38 +00:00
Replace NoHashHasher
usages with FxHashMap
(#6049)
## Summary I had always assumed that `NoHashHasher` would be faster when using integer keys, but benchmarking shows otherwise: ``` linter/default-rules/numpy/globals.py time: [66.544 µs 66.606 µs 66.678 µs] thrpt: [44.253 MiB/s 44.300 MiB/s 44.342 MiB/s] change: time: [-0.1843% +0.1087% +0.3718%] (p = 0.46 > 0.05) thrpt: [-0.3704% -0.1086% +0.1847%] No change in performance detected. Found 1 outliers among 100 measurements (1.00%) 1 (1.00%) high mild linter/default-rules/pydantic/types.py time: [1.3787 ms 1.3811 ms 1.3837 ms] thrpt: [18.431 MiB/s 18.466 MiB/s 18.498 MiB/s] change: time: [-0.4827% -0.1074% +0.1927%] (p = 0.56 > 0.05) thrpt: [-0.1924% +0.1075% +0.4850%] No change in performance detected. linter/default-rules/numpy/ctypeslib.py time: [624.82 µs 625.96 µs 627.17 µs] thrpt: [26.550 MiB/s 26.601 MiB/s 26.650 MiB/s] change: time: [-0.7071% -0.4908% -0.2736%] (p = 0.00 < 0.05) thrpt: [+0.2744% +0.4932% +0.7122%] Change within noise threshold. linter/default-rules/large/dataset.py time: [3.1585 ms 3.1634 ms 3.1685 ms] thrpt: [12.840 MiB/s 12.861 MiB/s 12.880 MiB/s] change: time: [-1.5338% -1.3463% -1.1476%] (p = 0.00 < 0.05) thrpt: [+1.1610% +1.3647% +1.5577%] Performance has improved. linter/all-rules/numpy/globals.py time: [140.17 µs 140.37 µs 140.58 µs] thrpt: [20.989 MiB/s 21.020 MiB/s 21.051 MiB/s] change: time: [-0.1066% +0.3140% +0.7479%] (p = 0.14 > 0.05) thrpt: [-0.7423% -0.3130% +0.1067%] No change in performance detected. Found 3 outliers among 100 measurements (3.00%) 2 (2.00%) high mild 1 (1.00%) high severe linter/all-rules/pydantic/types.py time: [2.7030 ms 2.7069 ms 2.7112 ms] thrpt: [9.4064 MiB/s 9.4216 MiB/s 9.4351 MiB/s] change: time: [-0.6721% -0.4874% -0.2974%] (p = 0.00 < 0.05) thrpt: [+0.2982% +0.4898% +0.6766%] Change within noise threshold. Found 14 outliers among 100 measurements (14.00%) 12 (12.00%) high mild 2 (2.00%) high severe linter/all-rules/numpy/ctypeslib.py time: [1.4709 ms 1.4727 ms 1.4749 ms] thrpt: [11.290 MiB/s 11.306 MiB/s 11.320 MiB/s] change: time: [-1.1617% -0.9766% -0.8094%] (p = 0.00 < 0.05) thrpt: [+0.8160% +0.9862% +1.1754%] Change within noise threshold. Found 12 outliers among 100 measurements (12.00%) 9 (9.00%) high mild 3 (3.00%) high severe linter/all-rules/large/dataset.py time: [5.8086 ms 5.8163 ms 5.8240 ms] thrpt: [6.9854 MiB/s 6.9946 MiB/s 7.0038 MiB/s] change: time: [-1.5651% -1.3536% -1.1584%] (p = 0.00 < 0.05) thrpt: [+1.1720% +1.3721% +1.5900%] Performance has improved. ``` My guess is that `NoHashHasher` underperforms because the keys are not randomly distributed... Anyway, it's a ~1% (significant) performance gain on some of the above, plus we get to remove a dependency.
This commit is contained in:
parent
b7e7346081
commit
ed72c027a3
8 changed files with 13 additions and 30 deletions
8
Cargo.lock
generated
8
Cargo.lock
generated
|
@ -1295,12 +1295,6 @@ dependencies = [
|
||||||
"static_assertions",
|
"static_assertions",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "nohash-hasher"
|
|
||||||
version = "0.2.0"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "2bf50223579dc7cdcfb3bfcacf7069ff68243f8c363f62ffa99cf000a6b9c451"
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "nom"
|
name = "nom"
|
||||||
version = "7.1.3"
|
version = "7.1.3"
|
||||||
|
@ -1895,7 +1889,6 @@ dependencies = [
|
||||||
"log",
|
"log",
|
||||||
"memchr",
|
"memchr",
|
||||||
"natord",
|
"natord",
|
||||||
"nohash-hasher",
|
|
||||||
"num-bigint",
|
"num-bigint",
|
||||||
"num-traits",
|
"num-traits",
|
||||||
"once_cell",
|
"once_cell",
|
||||||
|
@ -2164,7 +2157,6 @@ version = "0.0.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"bitflags 2.3.3",
|
"bitflags 2.3.3",
|
||||||
"is-macro",
|
"is-macro",
|
||||||
"nohash-hasher",
|
|
||||||
"num-traits",
|
"num-traits",
|
||||||
"ruff_index",
|
"ruff_index",
|
||||||
"ruff_python_ast",
|
"ruff_python_ast",
|
||||||
|
|
|
@ -26,7 +26,6 @@ is-macro = { version = "0.2.2" }
|
||||||
itertools = { version = "0.10.5" }
|
itertools = { version = "0.10.5" }
|
||||||
log = { version = "0.4.17" }
|
log = { version = "0.4.17" }
|
||||||
memchr = "2.5.0"
|
memchr = "2.5.0"
|
||||||
nohash-hasher = { version = "0.2.0" }
|
|
||||||
num-bigint = { version = "0.4.3" }
|
num-bigint = { version = "0.4.3" }
|
||||||
num-traits = { version = "0.2.15" }
|
num-traits = { version = "0.2.15" }
|
||||||
once_cell = { version = "1.17.1" }
|
once_cell = { version = "1.17.1" }
|
||||||
|
|
|
@ -45,7 +45,6 @@ libcst = { workspace = true }
|
||||||
log = { workspace = true }
|
log = { workspace = true }
|
||||||
memchr = { workspace = true }
|
memchr = { workspace = true }
|
||||||
natord = { version = "1.0.9" }
|
natord = { version = "1.0.9" }
|
||||||
nohash-hasher = { workspace = true }
|
|
||||||
num-bigint = { workspace = true }
|
num-bigint = { workspace = true }
|
||||||
num-traits = { workspace = true }
|
num-traits = { workspace = true }
|
||||||
once_cell = { workspace = true }
|
once_cell = { workspace = true }
|
||||||
|
|
|
@ -1,9 +1,8 @@
|
||||||
use std::collections::BTreeSet;
|
use std::collections::BTreeSet;
|
||||||
|
|
||||||
use itertools::Itertools;
|
use itertools::Itertools;
|
||||||
use nohash_hasher::IntSet;
|
|
||||||
use ruff_text_size::{TextLen, TextRange, TextSize};
|
use ruff_text_size::{TextLen, TextRange, TextSize};
|
||||||
use rustc_hash::FxHashMap;
|
use rustc_hash::{FxHashMap, FxHashSet};
|
||||||
|
|
||||||
use ruff_diagnostics::{Diagnostic, Edit, Fix, IsolationLevel};
|
use ruff_diagnostics::{Diagnostic, Edit, Fix, IsolationLevel};
|
||||||
use ruff_python_ast::source_code::Locator;
|
use ruff_python_ast::source_code::Locator;
|
||||||
|
@ -47,7 +46,7 @@ fn apply_fixes<'a>(
|
||||||
let mut output = String::with_capacity(locator.len());
|
let mut output = String::with_capacity(locator.len());
|
||||||
let mut last_pos: Option<TextSize> = None;
|
let mut last_pos: Option<TextSize> = None;
|
||||||
let mut applied: BTreeSet<&Edit> = BTreeSet::default();
|
let mut applied: BTreeSet<&Edit> = BTreeSet::default();
|
||||||
let mut isolated: IntSet<u32> = IntSet::default();
|
let mut isolated: FxHashSet<u32> = FxHashSet::default();
|
||||||
let mut fixed = FxHashMap::default();
|
let mut fixed = FxHashMap::default();
|
||||||
let mut source_map = SourceMap::default();
|
let mut source_map = SourceMap::default();
|
||||||
|
|
||||||
|
|
|
@ -20,7 +20,6 @@ ruff_index = { path = "../ruff_index" }
|
||||||
|
|
||||||
bitflags = { workspace = true }
|
bitflags = { workspace = true }
|
||||||
is-macro = { workspace = true }
|
is-macro = { workspace = true }
|
||||||
nohash-hasher = { workspace = true }
|
|
||||||
num-traits = { workspace = true }
|
num-traits = { workspace = true }
|
||||||
rustc-hash = { workspace = true }
|
rustc-hash = { workspace = true }
|
||||||
rustpython-parser = { workspace = true }
|
rustpython-parser = { workspace = true }
|
||||||
|
|
|
@ -275,8 +275,6 @@ bitflags! {
|
||||||
#[newtype_index]
|
#[newtype_index]
|
||||||
pub struct BindingId;
|
pub struct BindingId;
|
||||||
|
|
||||||
impl nohash_hasher::IsEnabled for BindingId {}
|
|
||||||
|
|
||||||
/// The bindings in a program.
|
/// The bindings in a program.
|
||||||
///
|
///
|
||||||
/// Bindings are indexed by [`BindingId`]
|
/// Bindings are indexed by [`BindingId`]
|
||||||
|
|
|
@ -1,9 +1,8 @@
|
||||||
use std::collections::HashMap;
|
|
||||||
use std::path::Path;
|
use std::path::Path;
|
||||||
|
|
||||||
use bitflags::bitflags;
|
use bitflags::bitflags;
|
||||||
use nohash_hasher::{BuildNoHashHasher, IntMap};
|
|
||||||
use ruff_text_size::TextRange;
|
use ruff_text_size::TextRange;
|
||||||
|
use rustc_hash::FxHashMap;
|
||||||
use rustpython_parser::ast::{Expr, Ranged, Stmt};
|
use rustpython_parser::ast::{Expr, Ranged, Stmt};
|
||||||
use smallvec::SmallVec;
|
use smallvec::SmallVec;
|
||||||
|
|
||||||
|
@ -74,7 +73,7 @@ pub struct SemanticModel<'a> {
|
||||||
///
|
///
|
||||||
/// In this case, the binding created by `x = 1` shadows the binding created by `import x`,
|
/// In this case, the binding created by `x = 1` shadows the binding created by `import x`,
|
||||||
/// despite the fact that they're in different scopes.
|
/// despite the fact that they're in different scopes.
|
||||||
pub shadowed_bindings: HashMap<BindingId, BindingId, BuildNoHashHasher<BindingId>>,
|
pub shadowed_bindings: FxHashMap<BindingId, BindingId>,
|
||||||
|
|
||||||
/// Map from binding index to indexes of bindings that annotate it (in the same scope).
|
/// Map from binding index to indexes of bindings that annotate it (in the same scope).
|
||||||
///
|
///
|
||||||
|
@ -97,7 +96,7 @@ pub struct SemanticModel<'a> {
|
||||||
/// In this case, we _do_ store the binding created by `x: int` directly on the scope, and not
|
/// In this case, we _do_ store the binding created by `x: int` directly on the scope, and not
|
||||||
/// as a delayed annotation. Annotations are thus treated as bindings only when they are the
|
/// as a delayed annotation. Annotations are thus treated as bindings only when they are the
|
||||||
/// first binding in a scope; any annotations that follow are treated as "delayed" annotations.
|
/// first binding in a scope; any annotations that follow are treated as "delayed" annotations.
|
||||||
delayed_annotations: HashMap<BindingId, Vec<BindingId>, BuildNoHashHasher<BindingId>>,
|
delayed_annotations: FxHashMap<BindingId, Vec<BindingId>>,
|
||||||
|
|
||||||
/// Map from binding ID to the IDs of all scopes in which it is declared a `global` or
|
/// Map from binding ID to the IDs of all scopes in which it is declared a `global` or
|
||||||
/// `nonlocal`.
|
/// `nonlocal`.
|
||||||
|
@ -112,7 +111,7 @@ pub struct SemanticModel<'a> {
|
||||||
///
|
///
|
||||||
/// In this case, the binding created by `x = 1` is rebound within the scope created by `f`
|
/// In this case, the binding created by `x = 1` is rebound within the scope created by `f`
|
||||||
/// by way of the `global x` statement.
|
/// by way of the `global x` statement.
|
||||||
rebinding_scopes: HashMap<BindingId, Vec<ScopeId>, BuildNoHashHasher<BindingId>>,
|
rebinding_scopes: FxHashMap<BindingId, Vec<ScopeId>>,
|
||||||
|
|
||||||
/// Flags for the semantic model.
|
/// Flags for the semantic model.
|
||||||
pub flags: SemanticModelFlags,
|
pub flags: SemanticModelFlags,
|
||||||
|
@ -137,9 +136,9 @@ impl<'a> SemanticModel<'a> {
|
||||||
resolved_references: ResolvedReferences::default(),
|
resolved_references: ResolvedReferences::default(),
|
||||||
unresolved_references: UnresolvedReferences::default(),
|
unresolved_references: UnresolvedReferences::default(),
|
||||||
globals: GlobalsArena::default(),
|
globals: GlobalsArena::default(),
|
||||||
shadowed_bindings: IntMap::default(),
|
shadowed_bindings: FxHashMap::default(),
|
||||||
delayed_annotations: IntMap::default(),
|
delayed_annotations: FxHashMap::default(),
|
||||||
rebinding_scopes: IntMap::default(),
|
rebinding_scopes: FxHashMap::default(),
|
||||||
flags: SemanticModelFlags::new(path),
|
flags: SemanticModelFlags::new(path),
|
||||||
handled_exceptions: Vec::default(),
|
handled_exceptions: Vec::default(),
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,8 +1,6 @@
|
||||||
use bitflags::bitflags;
|
|
||||||
use nohash_hasher::{BuildNoHashHasher, IntMap};
|
|
||||||
use std::collections::HashMap;
|
|
||||||
use std::ops::{Deref, DerefMut};
|
use std::ops::{Deref, DerefMut};
|
||||||
|
|
||||||
|
use bitflags::bitflags;
|
||||||
use rustc_hash::FxHashMap;
|
use rustc_hash::FxHashMap;
|
||||||
use rustpython_parser::ast;
|
use rustpython_parser::ast;
|
||||||
|
|
||||||
|
@ -37,7 +35,7 @@ pub struct Scope<'a> {
|
||||||
/// ```
|
/// ```
|
||||||
///
|
///
|
||||||
/// In this case, the binding created by `x = 2` shadows the binding created by `x = 1`.
|
/// In this case, the binding created by `x = 2` shadows the binding created by `x = 1`.
|
||||||
shadowed_bindings: HashMap<BindingId, BindingId, BuildNoHashHasher<BindingId>>,
|
shadowed_bindings: FxHashMap<BindingId, BindingId>,
|
||||||
|
|
||||||
/// Index into the globals arena, if the scope contains any globally-declared symbols.
|
/// Index into the globals arena, if the scope contains any globally-declared symbols.
|
||||||
globals_id: Option<GlobalsId>,
|
globals_id: Option<GlobalsId>,
|
||||||
|
@ -53,7 +51,7 @@ impl<'a> Scope<'a> {
|
||||||
parent: None,
|
parent: None,
|
||||||
star_imports: Vec::default(),
|
star_imports: Vec::default(),
|
||||||
bindings: FxHashMap::default(),
|
bindings: FxHashMap::default(),
|
||||||
shadowed_bindings: IntMap::default(),
|
shadowed_bindings: FxHashMap::default(),
|
||||||
globals_id: None,
|
globals_id: None,
|
||||||
flags: ScopeFlags::empty(),
|
flags: ScopeFlags::empty(),
|
||||||
}
|
}
|
||||||
|
@ -65,7 +63,7 @@ impl<'a> Scope<'a> {
|
||||||
parent: Some(parent),
|
parent: Some(parent),
|
||||||
star_imports: Vec::default(),
|
star_imports: Vec::default(),
|
||||||
bindings: FxHashMap::default(),
|
bindings: FxHashMap::default(),
|
||||||
shadowed_bindings: IntMap::default(),
|
shadowed_bindings: FxHashMap::default(),
|
||||||
globals_id: None,
|
globals_id: None,
|
||||||
flags: ScopeFlags::empty(),
|
flags: ScopeFlags::empty(),
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue