Track top-level module imports in the semantic model (#9775)

## Summary

This is a simple idea to avoid unnecessary work in the linter,
especially for rules that run on all name and/or all attribute nodes.
Imagine a rule like the NumPy deprecation check. If the user never
imported `numpy`, we should be able to skip that rule entirely --
whereas today, we do a `resolve_call_path` check on _every_ name in the
file. It turns out that there's basically a finite set of modules that
we care about, so we now track imports on those modules as explicit
flags on the semantic model. In rules that can _only_ ever trigger if
those modules were imported, we add a dedicated and extremely cheap
check to the top of the rule.

We could consider generalizing this to all modules, but I would expect
that not to be much faster than `resolve_call_path`, which is just a
hash map lookup on `TextSize` anyway.

It would also be nice to make this declarative, such that rules could
declare the modules they care about, the analyzers could call the rules
as appropriate. But, I don't think such a design should block merging
this.
This commit is contained in:
Charlie Marsh 2024-02-02 11:37:20 -08:00 committed by GitHub
parent c3ca34543f
commit e50603caf6
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
55 changed files with 395 additions and 100 deletions

View file

@ -13,7 +13,7 @@ use ruff_text_size::Ranged;
use crate::analyze::type_inference::{PythonType, ResolvedPythonType};
use crate::model::SemanticModel;
use crate::{Binding, BindingKind};
use crate::{Binding, BindingKind, Modules};
#[derive(Debug, Copy, Clone)]
pub enum Callable {
@ -101,18 +101,22 @@ impl std::fmt::Display for ModuleMember {
/// Returns the PEP 585 standard library generic variant for a `typing` module reference, if such
/// a variant exists.
pub fn to_pep585_generic(expr: &Expr, semantic: &SemanticModel) -> Option<ModuleMember> {
semantic.resolve_call_path(expr).and_then(|call_path| {
let [module, member] = call_path.as_slice() else {
return None;
};
as_pep_585_generic(module, member).map(|(module, member)| {
if module.is_empty() {
ModuleMember::BuiltIn(member)
} else {
ModuleMember::Member(module, member)
}
semantic
.seen_module(Modules::TYPING | Modules::TYPING_EXTENSIONS)
.then(|| semantic.resolve_call_path(expr))
.flatten()
.and_then(|call_path| {
let [module, member] = call_path.as_slice() else {
return None;
};
as_pep_585_generic(module, member).map(|(module, member)| {
if module.is_empty() {
ModuleMember::BuiltIn(member)
} else {
ModuleMember::Member(module, member)
}
})
})
})
}
/// Return whether a given expression uses a PEP 585 standard library generic.

View file

@ -120,6 +120,9 @@ pub struct SemanticModel<'a> {
/// Flags for the semantic model.
pub flags: SemanticModelFlags,
/// Modules that have been seen by the semantic model.
pub seen: Modules,
/// Exceptions that have been handled by the current scope.
pub handled_exceptions: Vec<Exceptions>,
@ -149,6 +152,7 @@ impl<'a> SemanticModel<'a> {
delayed_annotations: FxHashMap::default(),
rebinding_scopes: FxHashMap::default(),
flags: SemanticModelFlags::new(path),
seen: Modules::empty(),
handled_exceptions: Vec::default(),
resolved_names: FxHashMap::default(),
}
@ -1080,6 +1084,40 @@ impl<'a> SemanticModel<'a> {
.filter_map(move |id| self.nodes[id].as_expression())
}
/// Mark a Python module as "seen" by the semantic model. Future callers can quickly discount
/// the need to resolve symbols from these modules if they haven't been seen.
pub fn add_module(&mut self, module: &str) {
match module {
"trio" => self.seen.insert(Modules::TRIO),
"numpy" => self.seen.insert(Modules::NUMPY),
"pandas" => self.seen.insert(Modules::PANDAS),
"pytest" => self.seen.insert(Modules::PYTEST),
"django" => self.seen.insert(Modules::DJANGO),
"six" => self.seen.insert(Modules::SIX),
"logging" => self.seen.insert(Modules::LOGGING),
"typing" => self.seen.insert(Modules::TYPING),
"typing_extensions" => self.seen.insert(Modules::TYPING_EXTENSIONS),
"tarfile" => self.seen.insert(Modules::TARFILE),
"re" => self.seen.insert(Modules::RE),
"collections" => self.seen.insert(Modules::COLLECTIONS),
"mock" => self.seen.insert(Modules::MOCK),
"os" => self.seen.insert(Modules::OS),
"datetime" => self.seen.insert(Modules::DATETIME),
"subprocess" => self.seen.insert(Modules::SUBPROCESS),
_ => {}
}
}
/// Return `true` if the [`Module`] was "seen" anywhere in the semantic model. This is used as
/// a fast path to avoid unnecessary work when resolving symbols.
///
/// Callers should still verify that the module is available in the current scope, as visiting
/// an import of the relevant module _anywhere_ in the file will cause this method to return
/// `true`.
pub fn seen_module(&self, module: Modules) -> bool {
self.seen.intersects(module)
}
/// Set the [`Globals`] for the current [`Scope`].
pub fn set_globals(&mut self, globals: Globals<'a>) {
// If any global bindings don't already exist in the global scope, add them.
@ -1297,16 +1335,6 @@ impl<'a> SemanticModel<'a> {
exceptions
}
/// Return `true` if the module at the given path was seen anywhere in the semantic model.
/// This includes both direct imports (`import trio`) and member imports (`from trio import
/// TrioTask`).
pub fn seen(&self, module: &[&str]) -> bool {
self.bindings
.iter()
.filter_map(Binding::as_any_import)
.any(|import| import.call_path().starts_with(module))
}
/// Generate a [`Snapshot`] of the current semantic model.
pub fn snapshot(&self) -> Snapshot {
Snapshot {
@ -1532,6 +1560,29 @@ impl ShadowedBinding {
}
}
bitflags! {
/// A select list of Python modules that the semantic model can explicitly track.
#[derive(Debug)]
pub struct Modules: u16 {
const COLLECTIONS = 1 << 0;
const DATETIME = 1 << 1;
const DJANGO = 1 << 2;
const LOGGING = 1 << 3;
const MOCK = 1 << 4;
const NUMPY = 1 << 5;
const OS = 1 << 6;
const PANDAS = 1 << 7;
const PYTEST = 1 << 8;
const RE = 1 << 9;
const SIX = 1 << 10;
const SUBPROCESS = 1 << 11;
const TARFILE = 1 << 12;
const TRIO = 1 << 13;
const TYPING = 1 << 14;
const TYPING_EXTENSIONS = 1 << 15;
}
}
bitflags! {
/// Flags indicating the current model state.
#[derive(Debug, Default, Copy, Clone, Eq, PartialEq)]