Replace NoHashHasher usages with FxHashMap (#6049)

## Summary

I had always assumed that `NoHashHasher` would be faster when using
integer keys, but benchmarking shows otherwise:

```
linter/default-rules/numpy/globals.py
                        time:   [66.544 µs 66.606 µs 66.678 µs]
                        thrpt:  [44.253 MiB/s 44.300 MiB/s 44.342 MiB/s]
                 change:
                        time:   [-0.1843% +0.1087% +0.3718%] (p = 0.46 > 0.05)
                        thrpt:  [-0.3704% -0.1086% +0.1847%]
                        No change in performance detected.
Found 1 outliers among 100 measurements (1.00%)
  1 (1.00%) high mild
linter/default-rules/pydantic/types.py
                        time:   [1.3787 ms 1.3811 ms 1.3837 ms]
                        thrpt:  [18.431 MiB/s 18.466 MiB/s 18.498 MiB/s]
                 change:
                        time:   [-0.4827% -0.1074% +0.1927%] (p = 0.56 > 0.05)
                        thrpt:  [-0.1924% +0.1075% +0.4850%]
                        No change in performance detected.
linter/default-rules/numpy/ctypeslib.py
                        time:   [624.82 µs 625.96 µs 627.17 µs]
                        thrpt:  [26.550 MiB/s 26.601 MiB/s 26.650 MiB/s]
                 change:
                        time:   [-0.7071% -0.4908% -0.2736%] (p = 0.00 < 0.05)
                        thrpt:  [+0.2744% +0.4932% +0.7122%]
                        Change within noise threshold.
linter/default-rules/large/dataset.py
                        time:   [3.1585 ms 3.1634 ms 3.1685 ms]
                        thrpt:  [12.840 MiB/s 12.861 MiB/s 12.880 MiB/s]
                 change:
                        time:   [-1.5338% -1.3463% -1.1476%] (p = 0.00 < 0.05)
                        thrpt:  [+1.1610% +1.3647% +1.5577%]
                        Performance has improved.

linter/all-rules/numpy/globals.py
                        time:   [140.17 µs 140.37 µs 140.58 µs]
                        thrpt:  [20.989 MiB/s 21.020 MiB/s 21.051 MiB/s]
                 change:
                        time:   [-0.1066% +0.3140% +0.7479%] (p = 0.14 > 0.05)
                        thrpt:  [-0.7423% -0.3130% +0.1067%]
                        No change in performance detected.
Found 3 outliers among 100 measurements (3.00%)
  2 (2.00%) high mild
  1 (1.00%) high severe
linter/all-rules/pydantic/types.py
                        time:   [2.7030 ms 2.7069 ms 2.7112 ms]
                        thrpt:  [9.4064 MiB/s 9.4216 MiB/s 9.4351 MiB/s]
                 change:
                        time:   [-0.6721% -0.4874% -0.2974%] (p = 0.00 < 0.05)
                        thrpt:  [+0.2982% +0.4898% +0.6766%]
                        Change within noise threshold.
Found 14 outliers among 100 measurements (14.00%)
  12 (12.00%) high mild
  2 (2.00%) high severe
linter/all-rules/numpy/ctypeslib.py
                        time:   [1.4709 ms 1.4727 ms 1.4749 ms]
                        thrpt:  [11.290 MiB/s 11.306 MiB/s 11.320 MiB/s]
                 change:
                        time:   [-1.1617% -0.9766% -0.8094%] (p = 0.00 < 0.05)
                        thrpt:  [+0.8160% +0.9862% +1.1754%]
                        Change within noise threshold.
Found 12 outliers among 100 measurements (12.00%)
  9 (9.00%) high mild
  3 (3.00%) high severe
linter/all-rules/large/dataset.py
                        time:   [5.8086 ms 5.8163 ms 5.8240 ms]
                        thrpt:  [6.9854 MiB/s 6.9946 MiB/s 7.0038 MiB/s]
                 change:
                        time:   [-1.5651% -1.3536% -1.1584%] (p = 0.00 < 0.05)
                        thrpt:  [+1.1720% +1.3721% +1.5900%]
                        Performance has improved.
```

My guess is that `NoHashHasher` underperforms because the keys are not
randomly distributed...

Anyway, it's a ~1% (significant) performance gain on some of the above,
plus we get to remove a dependency.
This commit is contained in:
Charlie Marsh 2023-07-24 19:41:57 -04:00 committed by GitHub
parent b7e7346081
commit ed72c027a3
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
8 changed files with 13 additions and 30 deletions

View file

@ -275,8 +275,6 @@ bitflags! {
#[newtype_index]
pub struct BindingId;
impl nohash_hasher::IsEnabled for BindingId {}
/// The bindings in a program.
///
/// Bindings are indexed by [`BindingId`]

View file

@ -1,9 +1,8 @@
use std::collections::HashMap;
use std::path::Path;
use bitflags::bitflags;
use nohash_hasher::{BuildNoHashHasher, IntMap};
use ruff_text_size::TextRange;
use rustc_hash::FxHashMap;
use rustpython_parser::ast::{Expr, Ranged, Stmt};
use smallvec::SmallVec;
@ -74,7 +73,7 @@ pub struct SemanticModel<'a> {
///
/// In this case, the binding created by `x = 1` shadows the binding created by `import x`,
/// despite the fact that they're in different scopes.
pub shadowed_bindings: HashMap<BindingId, BindingId, BuildNoHashHasher<BindingId>>,
pub shadowed_bindings: FxHashMap<BindingId, BindingId>,
/// Map from binding index to indexes of bindings that annotate it (in the same scope).
///
@ -97,7 +96,7 @@ pub struct SemanticModel<'a> {
/// In this case, we _do_ store the binding created by `x: int` directly on the scope, and not
/// as a delayed annotation. Annotations are thus treated as bindings only when they are the
/// first binding in a scope; any annotations that follow are treated as "delayed" annotations.
delayed_annotations: HashMap<BindingId, Vec<BindingId>, BuildNoHashHasher<BindingId>>,
delayed_annotations: FxHashMap<BindingId, Vec<BindingId>>,
/// Map from binding ID to the IDs of all scopes in which it is declared a `global` or
/// `nonlocal`.
@ -112,7 +111,7 @@ pub struct SemanticModel<'a> {
///
/// In this case, the binding created by `x = 1` is rebound within the scope created by `f`
/// by way of the `global x` statement.
rebinding_scopes: HashMap<BindingId, Vec<ScopeId>, BuildNoHashHasher<BindingId>>,
rebinding_scopes: FxHashMap<BindingId, Vec<ScopeId>>,
/// Flags for the semantic model.
pub flags: SemanticModelFlags,
@ -137,9 +136,9 @@ impl<'a> SemanticModel<'a> {
resolved_references: ResolvedReferences::default(),
unresolved_references: UnresolvedReferences::default(),
globals: GlobalsArena::default(),
shadowed_bindings: IntMap::default(),
delayed_annotations: IntMap::default(),
rebinding_scopes: IntMap::default(),
shadowed_bindings: FxHashMap::default(),
delayed_annotations: FxHashMap::default(),
rebinding_scopes: FxHashMap::default(),
flags: SemanticModelFlags::new(path),
handled_exceptions: Vec::default(),
}

View file

@ -1,8 +1,6 @@
use bitflags::bitflags;
use nohash_hasher::{BuildNoHashHasher, IntMap};
use std::collections::HashMap;
use std::ops::{Deref, DerefMut};
use bitflags::bitflags;
use rustc_hash::FxHashMap;
use rustpython_parser::ast;
@ -37,7 +35,7 @@ pub struct Scope<'a> {
/// ```
///
/// In this case, the binding created by `x = 2` shadows the binding created by `x = 1`.
shadowed_bindings: HashMap<BindingId, BindingId, BuildNoHashHasher<BindingId>>,
shadowed_bindings: FxHashMap<BindingId, BindingId>,
/// Index into the globals arena, if the scope contains any globally-declared symbols.
globals_id: Option<GlobalsId>,
@ -53,7 +51,7 @@ impl<'a> Scope<'a> {
parent: None,
star_imports: Vec::default(),
bindings: FxHashMap::default(),
shadowed_bindings: IntMap::default(),
shadowed_bindings: FxHashMap::default(),
globals_id: None,
flags: ScopeFlags::empty(),
}
@ -65,7 +63,7 @@ impl<'a> Scope<'a> {
parent: Some(parent),
star_imports: Vec::default(),
bindings: FxHashMap::default(),
shadowed_bindings: IntMap::default(),
shadowed_bindings: FxHashMap::default(),
globals_id: None,
flags: ScopeFlags::empty(),
}