red-knot: Don't refer to Module instances as IDs (#11649)

This commit is contained in:
Alex Waygood 2024-05-31 21:04:47 +01:00 committed by GitHub
parent 16a926d138
commit d62a617938
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 51 additions and 40 deletions

View file

@ -12,7 +12,10 @@ use crate::files::FileId;
use crate::symbols::Dependency; use crate::symbols::Dependency;
use crate::FxDashMap; use crate::FxDashMap;
/// ID uniquely identifying a module. /// Representation of a Python module.
///
/// The inner type wrapped by this struct is a unique identifier for the module
/// that is used by the struct's methods to lazily query information about the module.
#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)] #[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)]
pub struct Module(u32); pub struct Module(u32);
@ -100,7 +103,8 @@ impl Module {
/// A module name, e.g. `foo.bar`. /// A module name, e.g. `foo.bar`.
/// ///
/// Always normalized to the absolute form (never a relative module name). /// Always normalized to the absolute form
/// (never a relative module name, i.e., never `.foo`).
#[derive(Clone, Debug, Eq, PartialEq, Hash)] #[derive(Clone, Debug, Eq, PartialEq, Hash)]
pub struct ModuleName(smol_str::SmolStr); pub struct ModuleName(smol_str::SmolStr);
@ -250,9 +254,11 @@ pub struct ModuleData {
// Queries // Queries
////////////////////////////////////////////////////// //////////////////////////////////////////////////////
/// Resolves a module name to a module id /// Resolves a module name to a module.
/// TODO: This would not work with Salsa because `ModuleName` isn't an ingredient and, therefore, cannot be used as part of a query. ///
/// For this to work with salsa, it would be necessary to intern all `ModuleName`s. /// TODO: This would not work with Salsa because `ModuleName` isn't an ingredient
/// and, therefore, cannot be used as part of a query.
/// For this to work with salsa, it would be necessary to intern all `ModuleName`s.
#[tracing::instrument(level = "debug", skip(db))] #[tracing::instrument(level = "debug", skip(db))]
pub fn resolve_module(db: &dyn SemanticDb, name: ModuleName) -> QueryResult<Option<Module>> { pub fn resolve_module(db: &dyn SemanticDb, name: ModuleName) -> QueryResult<Option<Module>> {
let jar: &SemanticJar = db.jar()?; let jar: &SemanticJar = db.jar()?;
@ -274,7 +280,7 @@ pub fn resolve_module(db: &dyn SemanticDb, name: ModuleName) -> QueryResult<Opti
let file_id = db.file_id(&normalized); let file_id = db.file_id(&normalized);
let path = ModulePath::new(root_path.clone(), file_id); let path = ModulePath::new(root_path.clone(), file_id);
let id = Module( let module = Module(
modules modules
.next_module_id .next_module_id
.fetch_add(1, std::sync::atomic::Ordering::Relaxed), .fetch_add(1, std::sync::atomic::Ordering::Relaxed),
@ -282,7 +288,7 @@ pub fn resolve_module(db: &dyn SemanticDb, name: ModuleName) -> QueryResult<Opti
modules modules
.modules .modules
.insert(id, Arc::from(ModuleData { name, path, kind })); .insert(module, Arc::from(ModuleData { name, path, kind }));
// A path can map to multiple modules because of symlinks: // A path can map to multiple modules because of symlinks:
// ``` // ```
@ -291,33 +297,33 @@ pub fn resolve_module(db: &dyn SemanticDb, name: ModuleName) -> QueryResult<Opti
// ``` // ```
// Here, both `foo` and `bar` resolve to the same module but through different paths. // Here, both `foo` and `bar` resolve to the same module but through different paths.
// That's why we need to insert the absolute path and not the normalized path here. // That's why we need to insert the absolute path and not the normalized path here.
let absolute_id = if absolute_path == normalized { let absolute_file_id = if absolute_path == normalized {
file_id file_id
} else { } else {
db.file_id(&absolute_path) db.file_id(&absolute_path)
}; };
modules.by_file.insert(absolute_id, id); modules.by_file.insert(absolute_file_id, module);
entry.insert_entry(id); entry.insert_entry(module);
Ok(Some(id)) Ok(Some(module))
} }
} }
} }
/// Resolves the module id for the given path. /// Resolves the module for the given path.
/// ///
/// Returns `None` if the path is not a module in `sys.path`. /// Returns `None` if the path is not a module locatable via `sys.path`.
#[tracing::instrument(level = "debug", skip(db))] #[tracing::instrument(level = "debug", skip(db))]
pub fn path_to_module(db: &dyn SemanticDb, path: &Path) -> QueryResult<Option<Module>> { pub fn path_to_module(db: &dyn SemanticDb, path: &Path) -> QueryResult<Option<Module>> {
let file = db.file_id(path); let file = db.file_id(path);
file_to_module(db, file) file_to_module(db, file)
} }
/// Resolves the module id for the file with the given id. /// Resolves the module for the file with the given id.
/// ///
/// Returns `None` if the file is not a module in `sys.path`. /// Returns `None` if the file is not a module locatable via `sys.path`.
#[tracing::instrument(level = "debug", skip(db))] #[tracing::instrument(level = "debug", skip(db))]
pub fn file_to_module(db: &dyn SemanticDb, file: FileId) -> QueryResult<Option<Module>> { pub fn file_to_module(db: &dyn SemanticDb, file: FileId) -> QueryResult<Option<Module>> {
let jar: &SemanticJar = db.jar()?; let jar: &SemanticJar = db.jar()?;
@ -344,12 +350,12 @@ pub fn file_to_module(db: &dyn SemanticDb, file: FileId) -> QueryResult<Option<M
// Resolve the module name to see if Python would resolve the name to the same path. // Resolve the module name to see if Python would resolve the name to the same path.
// If it doesn't, then that means that multiple modules have the same in different // If it doesn't, then that means that multiple modules have the same in different
// root paths, but that the module corresponding to the past path is in a lower priority path, // root paths, but that the module corresponding to the past path is in a lower priority search path,
// in which case we ignore it. // in which case we ignore it.
let Some(module_id) = resolve_module(db, module_name)? else { let Some(module) = resolve_module(db, module_name)? else {
return Ok(None); return Ok(None);
}; };
let module_path = module_id.path(db)?; let module_path = module.path(db)?;
if module_path.root() == &root_path { if module_path.root() == &root_path {
let Ok(normalized) = path.canonicalize() else { let Ok(normalized) = path.canonicalize() else {
@ -369,7 +375,7 @@ pub fn file_to_module(db: &dyn SemanticDb, file: FileId) -> QueryResult<Option<M
} }
// Path has been inserted by `resolved` // Path has been inserted by `resolved`
Ok(Some(module_id)) Ok(Some(module))
} else { } else {
// This path is for a module with the same name but in a module search path with a lower priority. // This path is for a module with the same name but in a module search path with a lower priority.
// Ignore it. // Ignore it.
@ -388,19 +394,22 @@ pub fn set_module_search_paths(db: &mut dyn SemanticDb, search_paths: Vec<Module
jar.module_resolver = ModuleResolver::new(search_paths); jar.module_resolver = ModuleResolver::new(search_paths);
} }
/// Adds a module to the resolver. /// Adds a module located at `path` to the resolver.
/// ///
/// Returns `None` if the path doesn't resolve to a module. /// Returns `None` if the path doesn't resolve to a module.
/// ///
/// Returns `Some` with the id of the module and the ids of the modules that need re-resolving /// Returns `Some(module, other_modules)`, where `module` is the resolved module
/// because they were part of a namespace package and might now resolve differently. /// with file location `path`, and `other_modules` is a `Vec` of `ModuleData` instances.
/// Each element in `other_modules` provides information regarding a single module that needs
/// re-resolving because it was part of a namespace package and might now resolve differently.
///
/// Note: This won't work with salsa because `Path` is not an ingredient. /// Note: This won't work with salsa because `Path` is not an ingredient.
pub fn add_module(db: &mut dyn SemanticDb, path: &Path) -> Option<(Module, Vec<Arc<ModuleData>>)> { pub fn add_module(db: &mut dyn SemanticDb, path: &Path) -> Option<(Module, Vec<Arc<ModuleData>>)> {
// No locking is required because we're holding a mutable reference to `modules`. // No locking is required because we're holding a mutable reference to `modules`.
// TODO This needs tests // TODO This needs tests
// Note: Intentionally by-pass caching here. Module should not be in the cache yet. // Note: Intentionally bypass caching here. Module should not be in the cache yet.
let module = path_to_module(db, path).ok()??; let module = path_to_module(db, path).ok()??;
// The code below is to handle the addition of `__init__.py` files. // The code below is to handle the addition of `__init__.py` files.
@ -424,15 +433,15 @@ pub fn add_module(db: &mut dyn SemanticDb, path: &Path) -> Option<(Module, Vec<A
let jar: &mut SemanticJar = db.jar_mut(); let jar: &mut SemanticJar = db.jar_mut();
let modules = &mut jar.module_resolver; let modules = &mut jar.module_resolver;
modules.by_file.retain(|_, id| { modules.by_file.retain(|_, module| {
if modules if modules
.modules .modules
.get(id) .get(module)
.unwrap() .unwrap()
.name .name
.starts_with(&parent_name) .starts_with(&parent_name)
{ {
to_remove.push(*id); to_remove.push(*module);
false false
} else { } else {
true true
@ -441,8 +450,8 @@ pub fn add_module(db: &mut dyn SemanticDb, path: &Path) -> Option<(Module, Vec<A
// TODO remove need for this vec // TODO remove need for this vec
let mut removed = Vec::with_capacity(to_remove.len()); let mut removed = Vec::with_capacity(to_remove.len());
for id in &to_remove { for module in &to_remove {
removed.push(modules.remove_module_by_id(*id)); removed.push(modules.remove_module(*module));
} }
Some((module, removed)) Some((module, removed))
@ -455,10 +464,10 @@ pub struct ModuleResolver {
// Locking: Locking is done by acquiring a (write) lock on `by_name`. This is because `by_name` is the primary // Locking: Locking is done by acquiring a (write) lock on `by_name`. This is because `by_name` is the primary
// lookup method. Acquiring locks in any other ordering can result in deadlocks. // lookup method. Acquiring locks in any other ordering can result in deadlocks.
/// Resolves a module name to it's module id. /// Looks up a module by name
by_name: FxDashMap<ModuleName, Module>, by_name: FxDashMap<ModuleName, Module>,
/// All known modules, indexed by the module id. /// A map of all known modules to data about those modules
modules: FxDashMap<Module, Arc<ModuleData>>, modules: FxDashMap<Module, Arc<ModuleData>>,
/// Lookup from absolute path to module. /// Lookup from absolute path to module.
@ -479,24 +488,26 @@ impl ModuleResolver {
} }
/// Remove a module from the inner cache /// Remove a module from the inner cache
pub(crate) fn remove_module(&mut self, file_id: FileId) { pub(crate) fn remove_module_by_file(&mut self, file_id: FileId) {
// No locking is required because we're holding a mutable reference to `self`. // No locking is required because we're holding a mutable reference to `self`.
let Some((_, id)) = self.by_file.remove(&file_id) else { let Some((_, module)) = self.by_file.remove(&file_id) else {
return; return;
}; };
self.remove_module_by_id(id); self.remove_module(module);
} }
fn remove_module_by_id(&mut self, id: Module) -> Arc<ModuleData> { fn remove_module(&mut self, module: Module) -> Arc<ModuleData> {
let (_, module) = self.modules.remove(&id).unwrap(); let (_, module_data) = self.modules.remove(&module).unwrap();
self.by_name.remove(&module.name).unwrap(); self.by_name.remove(&module_data.name).unwrap();
// It's possible that multiple paths map to the same id. Search all other paths referencing the same module id. // It's possible that multiple paths map to the same module.
self.by_file.retain(|_, current_id| *current_id != id); // Search all other paths referencing the same module.
self.by_file
.retain(|_, current_module| *current_module != module);
module module_data
} }
} }

View file

@ -42,7 +42,7 @@ impl Program {
let (source, semantic, lint) = self.jars_mut(); let (source, semantic, lint) = self.jars_mut();
for change in aggregated_changes.iter() { for change in aggregated_changes.iter() {
semantic.module_resolver.remove_module(change.id); semantic.module_resolver.remove_module_by_file(change.id);
semantic.symbol_tables.remove(&change.id); semantic.symbol_tables.remove(&change.id);
source.sources.remove(&change.id); source.sources.remove(&change.id);
source.parsed.remove(&change.id); source.parsed.remove(&change.id);