mirror of
https://github.com/astral-sh/ruff.git
synced 2025-08-09 13:18:52 +00:00
Cache name resolutions in the semantic model (#6047)
## Summary This PR stores the mapping from `ExprName` node to resolved `BindingId`, which lets us skip scope lookups in `resolve_call_path`. It's enabled by #6045, since that PR ensures that when we analyze a node (and thus call `resolve_call_path`), we'll have already visited its `ExprName` elements. In more detail: imagine that we're traversing over `foo.bar()`. When we read `foo`, it will be an `ExprName`, which we'll then resolve to a binding via `handle_node_load`. With this change, we then store that binding in a map. Later, if we call `collect_call_path` on `foo.bar`, we'll identify `foo` (the "head" of the attribute) and grab the resolved binding in that map. _Almost_ all names are now resolved in advance, though it's not a strict requirement, and some rules break that pattern (e.g., if we're analyzing arguments, and they need to inspect their annotations, which are visited in a deferred manner). This improves performance by 4-6% on the all-rules benchmark. It looks like it hurts performance (1-2% drop) in the default-rules benchmark, presumedly because those rules don't call `resolve_call_path` nearly as much, and so we're paying for these extra writes. Here's the benchmark data: ``` linter/default-rules/numpy/globals.py time: [67.270 µs 67.380 µs 67.489 µs] thrpt: [43.720 MiB/s 43.792 MiB/s 43.863 MiB/s] change: time: [+0.4747% +0.7752% +1.0626%] (p = 0.00 < 0.05) thrpt: [-1.0514% -0.7693% -0.4724%] Change within noise threshold. Found 1 outliers among 100 measurements (1.00%) 1 (1.00%) high severe linter/default-rules/pydantic/types.py time: [1.4067 ms 1.4105 ms 1.4146 ms] thrpt: [18.028 MiB/s 18.081 MiB/s 18.129 MiB/s] change: time: [+1.3152% +1.6953% +2.0414%] (p = 0.00 < 0.05) thrpt: [-2.0006% -1.6671% -1.2981%] Performance has regressed. linter/default-rules/numpy/ctypeslib.py time: [637.67 µs 638.96 µs 640.28 µs] thrpt: [26.006 MiB/s 26.060 MiB/s 26.113 MiB/s] change: time: [+1.5859% +1.8109% +2.0353%] (p = 0.00 < 0.05) thrpt: [-1.9947% -1.7787% -1.5611%] Performance has regressed. linter/default-rules/large/dataset.py time: [3.2289 ms 3.2336 ms 3.2383 ms] thrpt: [12.563 MiB/s 12.581 MiB/s 12.599 MiB/s] change: time: [+0.8029% +0.9898% +1.1740%] (p = 0.00 < 0.05) thrpt: [-1.1604% -0.9801% -0.7965%] Change within noise threshold. linter/all-rules/numpy/globals.py time: [134.05 µs 134.15 µs 134.26 µs] thrpt: [21.977 MiB/s 21.995 MiB/s 22.012 MiB/s] change: time: [-4.4571% -4.1175% -3.8268%] (p = 0.00 < 0.05) thrpt: [+3.9791% +4.2943% +4.6651%] Performance has improved. Found 8 outliers among 100 measurements (8.00%) 2 (2.00%) low mild 3 (3.00%) high mild 3 (3.00%) high severe linter/all-rules/pydantic/types.py time: [2.5627 ms 2.5669 ms 2.5720 ms] thrpt: [9.9158 MiB/s 9.9354 MiB/s 9.9516 MiB/s] change: time: [-5.8304% -5.6374% -5.4452%] (p = 0.00 < 0.05) thrpt: [+5.7587% +5.9742% +6.1914%] Performance has improved. Found 7 outliers among 100 measurements (7.00%) 6 (6.00%) high mild 1 (1.00%) high severe linter/all-rules/numpy/ctypeslib.py time: [1.3949 ms 1.3956 ms 1.3964 ms] thrpt: [11.925 MiB/s 11.931 MiB/s 11.937 MiB/s] change: time: [-6.2496% -6.0856% -5.9293%] (p = 0.00 < 0.05) thrpt: [+6.3030% +6.4799% +6.6662%] Performance has improved. Found 7 outliers among 100 measurements (7.00%) 3 (3.00%) high mild 4 (4.00%) high severe linter/all-rules/large/dataset.py time: [5.5951 ms 5.6019 ms 5.6093 ms] thrpt: [7.2527 MiB/s 7.2623 MiB/s 7.2711 MiB/s] change: time: [-5.1781% -4.9783% -4.8070%] (p = 0.00 < 0.05) thrpt: [+5.0497% +5.2391% +5.4608%] Performance has improved. ``` Still playing with this (the concepts need better names, documentation, etc.), but opening up for feedback.
This commit is contained in:
parent
0638a26347
commit
e15b9c5572
2 changed files with 85 additions and 40 deletions
|
@ -1,10 +1,10 @@
|
|||
use std::path::Path;
|
||||
|
||||
use bitflags::bitflags;
|
||||
use ruff_python_ast::{Expr, Ranged, Stmt};
|
||||
use ruff_text_size::TextRange;
|
||||
use ruff_python_ast::{self as ast, Expr, Ranged, Stmt};
|
||||
use ruff_text_size::{TextRange, TextSize};
|
||||
use rustc_hash::FxHashMap;
|
||||
use smallvec::SmallVec;
|
||||
use smallvec::smallvec;
|
||||
|
||||
use ruff_python_ast::call_path::{collect_call_path, from_unqualified_name, CallPath};
|
||||
use ruff_python_ast::helpers::from_relative_import;
|
||||
|
@ -118,6 +118,10 @@ pub struct SemanticModel<'a> {
|
|||
|
||||
/// Exceptions that have been handled by the current scope.
|
||||
pub handled_exceptions: Vec<Exceptions>,
|
||||
|
||||
/// Map from [`ast::ExprName`] node (represented as a [`NameId`]) to the [`Binding`] to which
|
||||
/// it resolved (represented as a [`BindingId`]).
|
||||
resolved_names: FxHashMap<NameId, BindingId>,
|
||||
}
|
||||
|
||||
impl<'a> SemanticModel<'a> {
|
||||
|
@ -141,6 +145,7 @@ impl<'a> SemanticModel<'a> {
|
|||
rebinding_scopes: FxHashMap::default(),
|
||||
flags: SemanticModelFlags::new(path),
|
||||
handled_exceptions: Vec::default(),
|
||||
resolved_names: FxHashMap::default(),
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -270,29 +275,32 @@ impl<'a> SemanticModel<'a> {
|
|||
}
|
||||
}
|
||||
|
||||
/// Resolve a `load` reference to `symbol` at `range`.
|
||||
pub fn resolve_load(&mut self, symbol: &str, range: TextRange) -> ReadResult {
|
||||
/// Resolve a `load` reference to an [`ast::ExprName`].
|
||||
pub fn resolve_load(&mut self, name: &ast::ExprName) -> ReadResult {
|
||||
// PEP 563 indicates that if a forward reference can be resolved in the module scope, we
|
||||
// should prefer it over local resolutions.
|
||||
if self.in_forward_reference() {
|
||||
if let Some(binding_id) = self.scopes.global().get(symbol) {
|
||||
if let Some(binding_id) = self.scopes.global().get(name.id.as_str()) {
|
||||
if !self.bindings[binding_id].is_unbound() {
|
||||
// Mark the binding as used.
|
||||
let reference_id =
|
||||
self.resolved_references
|
||||
.push(ScopeId::global(), range, self.flags);
|
||||
.push(ScopeId::global(), name.range, self.flags);
|
||||
self.bindings[binding_id].references.push(reference_id);
|
||||
|
||||
// Mark any submodule aliases as used.
|
||||
if let Some(binding_id) =
|
||||
self.resolve_submodule(symbol, ScopeId::global(), binding_id)
|
||||
self.resolve_submodule(name.id.as_str(), ScopeId::global(), binding_id)
|
||||
{
|
||||
let reference_id =
|
||||
self.resolved_references
|
||||
.push(ScopeId::global(), range, self.flags);
|
||||
let reference_id = self.resolved_references.push(
|
||||
ScopeId::global(),
|
||||
name.range,
|
||||
self.flags,
|
||||
);
|
||||
self.bindings[binding_id].references.push(reference_id);
|
||||
}
|
||||
|
||||
self.resolved_names.insert(name.into(), binding_id);
|
||||
return ReadResult::Resolved(binding_id);
|
||||
}
|
||||
}
|
||||
|
@ -310,7 +318,7 @@ impl<'a> SemanticModel<'a> {
|
|||
// def __init__(self):
|
||||
// print(__class__)
|
||||
// ```
|
||||
if seen_function && matches!(symbol, "__class__") {
|
||||
if seen_function && matches!(name.id.as_str(), "__class__") {
|
||||
return ReadResult::ImplicitGlobal;
|
||||
}
|
||||
// Do not allow usages of class symbols unless it is the immediate parent, e.g.:
|
||||
|
@ -332,18 +340,20 @@ impl<'a> SemanticModel<'a> {
|
|||
}
|
||||
}
|
||||
|
||||
if let Some(binding_id) = scope.get(symbol) {
|
||||
if let Some(binding_id) = scope.get(name.id.as_str()) {
|
||||
// Mark the binding as used.
|
||||
let reference_id = self
|
||||
.resolved_references
|
||||
.push(self.scope_id, range, self.flags);
|
||||
let reference_id =
|
||||
self.resolved_references
|
||||
.push(self.scope_id, name.range, self.flags);
|
||||
self.bindings[binding_id].references.push(reference_id);
|
||||
|
||||
// Mark any submodule aliases as used.
|
||||
if let Some(binding_id) = self.resolve_submodule(symbol, scope_id, binding_id) {
|
||||
if let Some(binding_id) =
|
||||
self.resolve_submodule(name.id.as_str(), scope_id, binding_id)
|
||||
{
|
||||
let reference_id =
|
||||
self.resolved_references
|
||||
.push(self.scope_id, range, self.flags);
|
||||
.push(self.scope_id, name.range, self.flags);
|
||||
self.bindings[binding_id].references.push(reference_id);
|
||||
}
|
||||
|
||||
|
@ -383,7 +393,7 @@ impl<'a> SemanticModel<'a> {
|
|||
// The `x` in `print(x)` should be treated as unresolved.
|
||||
BindingKind::Deletion | BindingKind::UnboundException(None) => {
|
||||
self.unresolved_references.push(
|
||||
range,
|
||||
name.range,
|
||||
self.exceptions(),
|
||||
UnresolvedReferenceFlags::empty(),
|
||||
);
|
||||
|
@ -409,24 +419,30 @@ impl<'a> SemanticModel<'a> {
|
|||
// Mark the binding as used.
|
||||
let reference_id =
|
||||
self.resolved_references
|
||||
.push(self.scope_id, range, self.flags);
|
||||
.push(self.scope_id, name.range, self.flags);
|
||||
self.bindings[binding_id].references.push(reference_id);
|
||||
|
||||
// Mark any submodule aliases as used.
|
||||
if let Some(binding_id) =
|
||||
self.resolve_submodule(symbol, scope_id, binding_id)
|
||||
self.resolve_submodule(name.id.as_str(), scope_id, binding_id)
|
||||
{
|
||||
let reference_id =
|
||||
self.resolved_references
|
||||
.push(self.scope_id, range, self.flags);
|
||||
let reference_id = self.resolved_references.push(
|
||||
self.scope_id,
|
||||
name.range,
|
||||
self.flags,
|
||||
);
|
||||
self.bindings[binding_id].references.push(reference_id);
|
||||
}
|
||||
|
||||
self.resolved_names.insert(name.into(), binding_id);
|
||||
return ReadResult::Resolved(binding_id);
|
||||
}
|
||||
|
||||
// Otherwise, treat it as resolved.
|
||||
_ => return ReadResult::Resolved(binding_id),
|
||||
_ => {
|
||||
// Otherwise, treat it as resolved.
|
||||
self.resolved_names.insert(name.into(), binding_id);
|
||||
return ReadResult::Resolved(binding_id);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -446,7 +462,7 @@ impl<'a> SemanticModel<'a> {
|
|||
// print(__qualname__)
|
||||
// ```
|
||||
if index == 0 && scope.kind.is_class() {
|
||||
if matches!(symbol, "__module__" | "__qualname__") {
|
||||
if matches!(name.id.as_str(), "__module__" | "__qualname__") {
|
||||
return ReadResult::ImplicitGlobal;
|
||||
}
|
||||
}
|
||||
|
@ -457,14 +473,14 @@ impl<'a> SemanticModel<'a> {
|
|||
|
||||
if import_starred {
|
||||
self.unresolved_references.push(
|
||||
range,
|
||||
name.range,
|
||||
self.exceptions(),
|
||||
UnresolvedReferenceFlags::WILDCARD_IMPORT,
|
||||
);
|
||||
ReadResult::WildcardImport
|
||||
} else {
|
||||
self.unresolved_references.push(
|
||||
range,
|
||||
name.range,
|
||||
self.exceptions(),
|
||||
UnresolvedReferenceFlags::empty(),
|
||||
);
|
||||
|
@ -585,13 +601,30 @@ impl<'a> SemanticModel<'a> {
|
|||
///
|
||||
/// ...then `resolve_call_path(${python_version})` will resolve to `sys.version_info`.
|
||||
pub fn resolve_call_path(&'a self, value: &'a Expr) -> Option<CallPath<'a>> {
|
||||
let call_path = collect_call_path(value)?;
|
||||
let (head, tail) = call_path.split_first()?;
|
||||
let binding = self.find_binding(head)?;
|
||||
/// Return the [`ast::ExprName`] at the head of the expression, if any.
|
||||
const fn match_head(value: &Expr) -> Option<&ast::ExprName> {
|
||||
match value {
|
||||
Expr::Attribute(ast::ExprAttribute { value, .. }) => match_head(value),
|
||||
Expr::Name(name) => Some(name),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
// If the name was already resolved, look it up; otherwise, search for the symbol.
|
||||
let head = match_head(value)?;
|
||||
let binding = self
|
||||
.resolved_names
|
||||
.get(&head.into())
|
||||
.map(|id| self.binding(*id))
|
||||
.or_else(|| self.find_binding(&head.id))?;
|
||||
|
||||
match &binding.kind {
|
||||
BindingKind::Import(Import {
|
||||
qualified_name: name,
|
||||
}) => {
|
||||
let call_path = collect_call_path(value)?;
|
||||
let (_, tail) = call_path.split_first()?;
|
||||
|
||||
let mut source_path: CallPath = from_unqualified_name(name);
|
||||
source_path.extend_from_slice(tail);
|
||||
Some(source_path)
|
||||
|
@ -599,6 +632,9 @@ impl<'a> SemanticModel<'a> {
|
|||
BindingKind::SubmoduleImport(SubmoduleImport {
|
||||
qualified_name: name,
|
||||
}) => {
|
||||
let call_path = collect_call_path(value)?;
|
||||
let (_, tail) = call_path.split_first()?;
|
||||
|
||||
let name = name.split('.').next().unwrap_or(name);
|
||||
let mut source_path: CallPath = from_unqualified_name(name);
|
||||
source_path.extend_from_slice(tail);
|
||||
|
@ -607,6 +643,9 @@ impl<'a> SemanticModel<'a> {
|
|||
BindingKind::FromImport(FromImport {
|
||||
qualified_name: name,
|
||||
}) => {
|
||||
let call_path = collect_call_path(value)?;
|
||||
let (_, tail) = call_path.split_first()?;
|
||||
|
||||
if name.starts_with('.') {
|
||||
let mut source_path = from_relative_import(self.module_path?, name);
|
||||
if source_path.is_empty() {
|
||||
|
@ -621,12 +660,7 @@ impl<'a> SemanticModel<'a> {
|
|||
Some(source_path)
|
||||
}
|
||||
}
|
||||
BindingKind::Builtin => {
|
||||
let mut source_path: CallPath = SmallVec::with_capacity(1 + call_path.len());
|
||||
source_path.push("");
|
||||
source_path.extend_from_slice(call_path.as_slice());
|
||||
Some(source_path)
|
||||
}
|
||||
BindingKind::Builtin => Some(smallvec!["", head.id.as_str()]),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
@ -1504,3 +1538,14 @@ impl Ranged for ImportedName {
|
|||
self.range
|
||||
}
|
||||
}
|
||||
|
||||
/// A unique identifier for an [`ast::ExprName`]. No two names can even appear at the same location
|
||||
/// in the source code, so the starting offset is a cheap and sufficient unique identifier.
|
||||
#[derive(Debug, Hash, PartialEq, Eq)]
|
||||
struct NameId(TextSize);
|
||||
|
||||
impl From<&ast::ExprName> for NameId {
|
||||
fn from(name: &ast::ExprName) -> Self {
|
||||
Self(name.start())
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue