Remove salsa::report_untracked_read when finding the dynamic module resolution paths (#12509)

This commit is contained in:
Micha Reiser 2024-07-29 11:31:29 +02:00 committed by GitHub
parent e18b4e42d3
commit 2f54d05d97
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 242 additions and 67 deletions

2
Cargo.lock generated
View file

@ -2173,6 +2173,8 @@ dependencies = [
"filetime", "filetime",
"ignore", "ignore",
"insta", "insta",
"matchit",
"path-slash",
"ruff_cache", "ruff_cache",
"ruff_notebook", "ruff_notebook",
"ruff_python_ast", "ruff_python_ast",

View file

@ -4,7 +4,7 @@ use std::iter::FusedIterator;
use once_cell::sync::Lazy; use once_cell::sync::Lazy;
use rustc_hash::{FxBuildHasher, FxHashSet}; use rustc_hash::{FxBuildHasher, FxHashSet};
use ruff_db::files::{File, FilePath}; use ruff_db::files::{File, FilePath, FileRootKind};
use ruff_db::program::{Program, SearchPathSettings, TargetVersion}; use ruff_db::program::{Program, SearchPathSettings, TargetVersion};
use ruff_db::system::{DirectoryEntry, System, SystemPath, SystemPathBuf}; use ruff_db::system::{DirectoryEntry, System, SystemPath, SystemPathBuf};
use ruff_db::vendored::VendoredPath; use ruff_db::vendored::VendoredPath;
@ -139,24 +139,33 @@ fn try_resolve_module_resolution_settings(
} }
let system = db.system(); let system = db.system();
let files = db.files();
let mut static_search_paths = vec![]; let mut static_search_paths = vec![];
for path in extra_paths.iter().cloned() { for path in extra_paths {
static_search_paths.push(SearchPath::extra(system, path)?); files.try_add_root(db.upcast(), path, FileRootKind::LibrarySearchPath);
static_search_paths.push(SearchPath::extra(system, path.clone())?);
} }
static_search_paths.push(SearchPath::first_party(system, workspace_root.clone())?); static_search_paths.push(SearchPath::first_party(system, workspace_root.clone())?);
static_search_paths.push(if let Some(custom_typeshed) = custom_typeshed.as_ref() { static_search_paths.push(if let Some(custom_typeshed) = custom_typeshed.as_ref() {
files.try_add_root(
db.upcast(),
custom_typeshed,
FileRootKind::LibrarySearchPath,
);
SearchPath::custom_stdlib(db, custom_typeshed.clone())? SearchPath::custom_stdlib(db, custom_typeshed.clone())?
} else { } else {
SearchPath::vendored_stdlib() SearchPath::vendored_stdlib()
}); });
if let Some(site_packages) = site_packages { if let Some(site_packages) = site_packages {
files.try_add_root(db.upcast(), site_packages, FileRootKind::LibrarySearchPath);
static_search_paths.push(SearchPath::site_packages(system, site_packages.clone())?); static_search_paths.push(SearchPath::site_packages(system, site_packages.clone())?);
} };
// TODO vendor typeshed's third-party stubs as well as the stdlib and fallback to them as a final step // TODO vendor typeshed's third-party stubs as well as the stdlib and fallback to them as a final step
@ -197,31 +206,32 @@ pub(crate) fn module_resolution_settings(db: &dyn Db) -> ModuleResolutionSetting
/// due to editable installations of third-party packages. /// due to editable installations of third-party packages.
#[salsa::tracked(return_ref)] #[salsa::tracked(return_ref)]
pub(crate) fn editable_install_resolution_paths(db: &dyn Db) -> Vec<SearchPath> { pub(crate) fn editable_install_resolution_paths(db: &dyn Db) -> Vec<SearchPath> {
// This query needs to be re-executed each time a `.pth` file let settings = module_resolution_settings(db);
// is added, modified or removed from the `site-packages` directory. let static_search_paths = &settings.static_search_paths;
// However, we don't use Salsa queries to read the source text of `.pth` files;
// we use the APIs on the `System` trait directly. As such, for now we simply ask
// Salsa to recompute this query on each new revision.
//
// TODO: add some kind of watcher for the `site-packages` directory that looks
// for `site-packages/*.pth` files being added/modified/removed; get rid of this.
// When doing so, also make the test
// `deleting_pth_file_on_which_module_resolution_depends_invalidates_cache()`
// more principled!
db.report_untracked_read();
let static_search_paths = &module_resolution_settings(db).static_search_paths;
let site_packages = static_search_paths let site_packages = static_search_paths
.iter() .iter()
.find(|path| path.is_site_packages()); .find(|path| path.is_site_packages());
let mut dynamic_paths = Vec::default(); let Some(site_packages) = site_packages else {
return Vec::new();
};
if let Some(site_packages) = site_packages {
let site_packages = site_packages let site_packages = site_packages
.as_system_path() .as_system_path()
.expect("Expected site-packages never to be a VendoredPath!"); .expect("Expected site-packages never to be a VendoredPath!");
let mut dynamic_paths = Vec::default();
// This query needs to be re-executed each time a `.pth` file
// is added, modified or removed from the `site-packages` directory.
// However, we don't use Salsa queries to read the source text of `.pth` files;
// we use the APIs on the `System` trait directly. As such, add a dependency on the
// site-package directory's revision.
if let Some(site_packages_root) = db.files().root(db.upcast(), site_packages) {
let _ = site_packages_root.revision(db.upcast());
}
// As well as modules installed directly into `site-packages`, // As well as modules installed directly into `site-packages`,
// the directory may also contain `.pth` files. // the directory may also contain `.pth` files.
// Each `.pth` file in `site-packages` may contain one or more lines // Each `.pth` file in `site-packages` may contain one or more lines
@ -255,7 +265,6 @@ pub(crate) fn editable_install_resolution_paths(db: &dyn Db) -> Vec<SearchPath>
} }
} }
} }
}
dynamic_paths dynamic_paths
} }
@ -397,9 +406,6 @@ pub(crate) struct ModuleResolutionSettings {
target_version: TargetVersion, target_version: TargetVersion,
/// Search paths that have been statically determined purely from reading Ruff's configuration settings. /// Search paths that have been statically determined purely from reading Ruff's configuration settings.
/// These shouldn't ever change unless the config settings themselves change. /// These shouldn't ever change unless the config settings themselves change.
///
/// Note that `site-packages` *is included* as a search path in this sequence,
/// but it is also stored separately so that we're able to find editable installs later.
static_search_paths: Vec<SearchPath>, static_search_paths: Vec<SearchPath>,
} }
@ -1599,18 +1605,7 @@ not_a_directory
.remove_file(site_packages.join("_foo.pth")) .remove_file(site_packages.join("_foo.pth"))
.unwrap(); .unwrap();
// Why are we touching a random file in the path that's been editably installed, File::sync_path(&mut db, &site_packages.join("_foo.pth"));
// rather than the `.pth` file, when the `.pth` file is the one that has been deleted?
// It's because the `.pth` file isn't directly tracked as a dependency by Salsa
// currently (we don't use `system_path_to_file()` to get the file, and we don't use
// `source_text()` to read the source of the file). Instead of using these APIs which
// would automatically add the existence and contents of the file as a Salsa-tracked
// dependency, we use `.report_untracked_read()` to force Salsa to re-parse all
// `.pth` files on each new "revision". Making a random modification to a tracked
// Salsa file forces a new revision.
//
// TODO: get rid of the `.report_untracked_read()` call...
File::sync_path(&mut db, SystemPath::new("/x/src/foo.py"));
assert_eq!(resolve_module(&db, foo_module_name.clone()), None); assert_eq!(resolve_module(&db, foo_module_name.clone()), None);
} }

View file

@ -24,7 +24,9 @@ countme = { workspace = true }
dashmap = { workspace = true } dashmap = { workspace = true }
filetime = { workspace = true } filetime = { workspace = true }
ignore = { workspace = true, optional = true } ignore = { workspace = true, optional = true }
matchit = { workspace = true }
salsa = { workspace = true } salsa = { workspace = true }
path-slash = { workspace = true }
tracing = { workspace = true } tracing = { workspace = true }
rustc-hash = { workspace = true } rustc-hash = { workspace = true }
zip = { workspace = true } zip = { workspace = true }

View file

@ -15,6 +15,10 @@ impl FileRevision {
Self(value) Self(value)
} }
pub fn now() -> Self {
Self::from(filetime::FileTime::now())
}
pub const fn zero() -> Self { pub const fn zero() -> Self {
Self(0) Self(0)
} }

View file

@ -4,15 +4,18 @@ use countme::Count;
use dashmap::mapref::entry::Entry; use dashmap::mapref::entry::Entry;
use salsa::Setter; use salsa::Setter;
pub use file_root::{FileRoot, FileRootKind};
pub use path::FilePath; pub use path::FilePath;
use ruff_notebook::{Notebook, NotebookError}; use ruff_notebook::{Notebook, NotebookError};
use crate::file_revision::FileRevision; use crate::file_revision::FileRevision;
use crate::files::file_root::FileRoots;
use crate::files::private::FileStatus; use crate::files::private::FileStatus;
use crate::system::{Metadata, SystemPath, SystemPathBuf, SystemVirtualPath, SystemVirtualPathBuf}; use crate::system::{Metadata, SystemPath, SystemPathBuf, SystemVirtualPath, SystemVirtualPathBuf};
use crate::vendored::{VendoredPath, VendoredPathBuf}; use crate::vendored::{VendoredPath, VendoredPathBuf};
use crate::{Db, FxDashMap}; use crate::{Db, FxDashMap};
mod file_root;
mod path; mod path;
/// Interns a file system path and returns a salsa `File` ingredient. /// Interns a file system path and returns a salsa `File` ingredient.
@ -54,6 +57,9 @@ struct FilesInner {
/// Lookup table that maps vendored files to the salsa [`File`] ingredients. /// Lookup table that maps vendored files to the salsa [`File`] ingredients.
vendored_by_path: FxDashMap<VendoredPathBuf, File>, vendored_by_path: FxDashMap<VendoredPathBuf, File>,
/// Lookup table that maps file paths to their [`FileRoot`].
roots: std::sync::RwLock<FileRoots>,
} }
impl Files { impl Files {
@ -72,6 +78,7 @@ impl Files {
.system_by_path .system_by_path
.entry(absolute.clone()) .entry(absolute.clone())
.or_insert_with(|| { .or_insert_with(|| {
// TODO: Set correct durability according to source root.
let metadata = db.system().path_metadata(path); let metadata = db.system().path_metadata(path);
match metadata { match metadata {
@ -161,6 +168,33 @@ impl Files {
Some(file) Some(file)
} }
/// Looks up the closest root for `path`. Returns `None` if `path` isn't enclosed by any source root.
///
/// Roots can be nested, in which case the closest root is returned.
pub fn root(&self, db: &dyn Db, path: &SystemPath) -> Option<FileRoot> {
let roots = self.inner.roots.read().unwrap();
let absolute = SystemPath::absolute(path, db.system().current_directory());
roots.at(&absolute)
}
/// Adds a new root for `path` and returns the root.
///
/// The root isn't added nor is the file root's kind updated if a root for `path` already exists.
pub fn try_add_root(&self, db: &dyn Db, path: &SystemPath, kind: FileRootKind) -> FileRoot {
let mut roots = self.inner.roots.write().unwrap();
let absolute = SystemPath::absolute(path, db.system().current_directory());
roots.try_add(db, absolute, kind)
}
/// Updates the revision of the root for `path`.
pub fn touch_root(db: &mut dyn Db, path: &SystemPath) {
if let Some(root) = db.files().root(db, path) {
root.set_revision(db).to(FileRevision::now());
}
}
/// Refreshes the state of all known files under `path` recursively. /// Refreshes the state of all known files under `path` recursively.
/// ///
/// The most common use case is to update the [`Files`] state after removing or moving a directory. /// The most common use case is to update the [`Files`] state after removing or moving a directory.
@ -180,6 +214,14 @@ impl Files {
file.sync(db); file.sync(db);
} }
} }
let roots = inner.roots.read().unwrap();
for root in roots.all() {
if root.path(db).starts_with(&path) {
root.set_revision(db).to(FileRevision::now());
}
}
} }
/// Refreshes the state of all known files. /// Refreshes the state of all known files.
@ -197,6 +239,12 @@ impl Files {
let file = entry.value(); let file = entry.value();
file.sync(db); file.sync(db);
} }
let roots = inner.roots.read().unwrap();
for root in roots.all() {
root.set_revision(db).to(FileRevision::now());
}
} }
} }
@ -309,6 +357,7 @@ impl File {
} }
fn sync_system_path(db: &mut dyn Db, path: &SystemPath, file: Option<File>) { fn sync_system_path(db: &mut dyn Db, path: &SystemPath, file: Option<File>) {
Files::touch_root(db, path);
let Some(file) = file.or_else(|| db.files().try_system(db, path)) else { let Some(file) = file.or_else(|| db.files().try_system(db, path)) else {
return; return;
}; };

View file

@ -0,0 +1,125 @@
use std::fmt::Formatter;
use path_slash::PathExt;
use crate::file_revision::FileRevision;
use crate::system::{SystemPath, SystemPathBuf};
use crate::Db;
/// A root path for files tracked by the database.
///
/// We currently create roots for:
/// * static module resolution paths
/// * the workspace root
///
/// The main usage of file roots is to determine a file's durability. But it can also be used
/// to make a salsa query dependent on whether a file in a root has changed without writing any
/// manual invalidation logic.
#[salsa::input]
pub struct FileRoot {
/// The path of a root is guaranteed to never change.
#[return_ref]
path_buf: SystemPathBuf,
/// The kind of the root at the time of its creation.
kind_at_time_of_creation: FileRootKind,
/// A revision that changes when the contents of the source root change.
///
/// The revision changes when a new file was added, removed, or changed inside this source root.
pub revision: FileRevision,
}
impl FileRoot {
pub fn path(self, db: &dyn Db) -> &SystemPath {
self.path_buf(db)
}
pub fn durability(self, db: &dyn Db) -> salsa::Durability {
match self.kind_at_time_of_creation(db) {
FileRootKind::Workspace => salsa::Durability::LOW,
FileRootKind::LibrarySearchPath => salsa::Durability::HIGH,
}
}
}
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
pub enum FileRootKind {
/// The root of a workspace.
Workspace,
/// A non-workspace module resolution search path.
LibrarySearchPath,
}
#[derive(Default)]
pub(super) struct FileRoots {
by_path: matchit::Router<FileRoot>,
roots: Vec<FileRoot>,
}
impl FileRoots {
/// Tries to add a new root for `path` and returns the root.
///
/// The root isn't added nor is the file root's kind updated if a root for `path` already exists.
pub(super) fn try_add(
&mut self,
db: &dyn Db,
path: SystemPathBuf,
kind: FileRootKind,
) -> FileRoot {
// SAFETY: Guaranteed to succeed because `path` is a UTF-8 that only contains Unicode characters.
let normalized_path = path.as_std_path().to_slash().unwrap();
if let Ok(existing) = self.by_path.at(&normalized_path) {
// Only if it is an exact match
if existing.value.path(db) == &*path {
return *existing.value;
}
}
// normalize the path to use `/` separators and escape the '{' and '}' characters,
// which matchit uses for routing parameters
let mut route = normalized_path.replace('{', "{{").replace('}', "}}");
// Insert a new source root
let root = FileRoot::new(db, path, kind, FileRevision::now());
// Insert a path that matches the root itself
self.by_path.insert(route.clone(), root).unwrap();
// Insert a path that matches all subdirectories and files
route.push_str("/{*filepath}");
self.by_path.insert(route, root).unwrap();
self.roots.push(root);
root
}
/// Returns the closest root for `path` or `None` if no root contains `path`.
pub(super) fn at(&self, path: &SystemPath) -> Option<FileRoot> {
// SAFETY: Guaranteed to succeed because `path` is a UTF-8 that only contains Unicode characters.
let normalized_path = path.as_std_path().to_slash().unwrap();
dbg!(&normalized_path);
dbg!(&self.roots);
let entry = self.by_path.at(&normalized_path).ok()?;
Some(*entry.value)
}
pub(super) fn all(&self) -> impl Iterator<Item = FileRoot> + '_ {
self.roots.iter().copied()
}
}
impl std::fmt::Debug for FileRoots {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
f.debug_tuple("FileRoots").field(&self.roots).finish()
}
}
impl PartialEq for FileRoots {
fn eq(&self, other: &Self) -> bool {
self.roots.eq(&other.roots)
}
}

View file

@ -76,9 +76,8 @@ where
let event = events.iter().find(|event| { let event = events.iter().find(|event| {
if let salsa::EventKind::WillExecute { database_key } = event.kind { if let salsa::EventKind::WillExecute { database_key } = event.kind {
dbg!(db db.lookup_ingredient(database_key.ingredient_index())
.lookup_ingredient(database_key.ingredient_index()) .debug_name()
.debug_name())
== query_name == query_name
&& database_key.key_index() == input.as_id() && database_key.key_index() == input.as_id()
} else { } else {
@ -190,7 +189,6 @@ fn const_query_was_not_run_fails_if_query_was_run() {
assert_eq!(len(&db), 5); assert_eq!(len(&db), 5);
let events = db.take_salsa_events(); let events = db.take_salsa_events();
dbg!(&events);
assert_const_function_query_was_not_run(&db, len, &events); assert_const_function_query_was_not_run(&db, len, &events);
} }