Fix file watching on macOS if a module-search path is a symlink (#12634)

This commit is contained in:
Micha Reiser 2024-08-03 09:24:07 +02:00 committed by GitHub
parent 38e178e914
commit 341a25eec1
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
11 changed files with 158 additions and 91 deletions

1
Cargo.lock generated
View file

@ -2005,6 +2005,7 @@ dependencies = [
"notify",
"red_knot_module_resolver",
"red_knot_python_semantic",
"ruff_cache",
"ruff_db",
"ruff_python_ast",
"rustc-hash 2.0.0",

View file

@ -1184,8 +1184,9 @@ mod unix {
ModuleName::new_static("bar.baz").unwrap(),
)
.expect("Expected bar.baz to exist in site-packages.");
let baz_site_packages =
let baz_site_packages_path =
case.workspace_path(".venv/lib/python3.12/site-packages/bar/baz.py");
let baz_site_packages = case.system_file(&baz_site_packages_path).unwrap();
let baz_original = case.root_path().join("site-packages/bar/baz.py");
let baz_original_file = case.system_file(&baz_original).unwrap();
@ -1195,12 +1196,12 @@ mod unix {
);
assert_eq!(
source_text(case.db(), baz.file()).as_str(),
source_text(case.db(), baz_site_packages).as_str(),
"def baz(): ..."
);
assert_eq!(
baz.file().path(case.db()).as_system_path(),
Some(&*baz_site_packages)
Some(&*baz_original)
);
// Write to the symlink target.
@ -1212,7 +1213,7 @@ mod unix {
case.db_mut().apply_changes(changes);
assert_eq!(
source_text(case.db(), baz.file()).as_str(),
source_text(case.db(), baz_original_file).as_str(),
"def baz(): print('Version 2')"
);
@ -1224,7 +1225,7 @@ mod unix {
// it doesn't seem worth doing considering that as prominent tools like PyCharm don't support it.
// Pyright does support it, thanks to chokidar.
assert_ne!(
source_text(case.db(), baz_original_file).as_str(),
source_text(case.db(), baz_site_packages).as_str(),
"def baz(): print('Version 2')"
);

View file

@ -382,22 +382,27 @@ enum SearchPathInner {
pub(crate) struct SearchPath(Arc<SearchPathInner>);
impl SearchPath {
fn directory_path(system: &dyn System, root: SystemPathBuf) -> SearchPathResult<SystemPathBuf> {
let canonicalized = system.canonicalize_path(&root).unwrap_or(root);
if system.is_directory(&canonicalized) {
Ok(canonicalized)
} else {
Err(SearchPathValidationError::NotADirectory(canonicalized))
}
}
/// Create a new "Extra" search path
pub(crate) fn extra(system: &dyn System, root: SystemPathBuf) -> SearchPathResult<Self> {
if system.is_directory(&root) {
Ok(Self(Arc::new(SearchPathInner::Extra(root))))
} else {
Err(SearchPathValidationError::NotADirectory(root))
}
Ok(Self(Arc::new(SearchPathInner::Extra(
Self::directory_path(system, root)?,
))))
}
/// Create a new first-party search path, pointing to the user code we were directly invoked on
pub(crate) fn first_party(system: &dyn System, root: SystemPathBuf) -> SearchPathResult<Self> {
if system.is_directory(&root) {
Ok(Self(Arc::new(SearchPathInner::FirstParty(root))))
} else {
Err(SearchPathValidationError::NotADirectory(root))
}
Ok(Self(Arc::new(SearchPathInner::FirstParty(
Self::directory_path(system, root)?,
))))
}
/// Create a new standard-library search path pointing to a custom directory on disk
@ -408,12 +413,13 @@ impl SearchPath {
typeshed.to_path_buf(),
));
}
let stdlib = typeshed.join("stdlib");
if !system.is_directory(&stdlib) {
return Err(SearchPathValidationError::NoStdlibSubdirectory(
typeshed.to_path_buf(),
));
}
let stdlib =
Self::directory_path(system, typeshed.join("stdlib")).map_err(|err| match err {
SearchPathValidationError::NotADirectory(path) => {
SearchPathValidationError::NoStdlibSubdirectory(path)
}
err => err,
})?;
let typeshed_versions =
system_path_to_file(db.upcast(), stdlib.join("VERSIONS")).map_err(|err| match err {
FileError::NotFound => SearchPathValidationError::NoVersionsFile(typeshed),
@ -444,20 +450,16 @@ impl SearchPath {
system: &dyn System,
root: SystemPathBuf,
) -> SearchPathResult<Self> {
if system.is_directory(&root) {
Ok(Self(Arc::new(SearchPathInner::SitePackages(root))))
} else {
Err(SearchPathValidationError::NotADirectory(root))
}
Ok(Self(Arc::new(SearchPathInner::SitePackages(
Self::directory_path(system, root)?,
))))
}
/// Create a new search path pointing to an editable installation
pub(crate) fn editable(system: &dyn System, root: SystemPathBuf) -> SearchPathResult<Self> {
if system.is_directory(&root) {
Ok(Self(Arc::new(SearchPathInner::Editable(root))))
} else {
Err(SearchPathValidationError::NotADirectory(root))
}
Ok(Self(Arc::new(SearchPathInner::Editable(
Self::directory_path(system, root)?,
))))
}
#[must_use]

View file

@ -15,6 +15,7 @@ license.workspace = true
red_knot_module_resolver = { workspace = true }
red_knot_python_semantic = { workspace = true }
ruff_cache = { workspace = true }
ruff_db = { workspace = true, features = ["os", "cache"] }
ruff_python_ast = { workspace = true }

View file

@ -1,20 +1,28 @@
use std::fmt::{Formatter, Write};
use std::hash::Hasher;
use tracing::info;
use red_knot_module_resolver::system_module_search_paths;
use ruff_cache::{CacheKey, CacheKeyHasher};
use ruff_db::system::{SystemPath, SystemPathBuf};
use ruff_db::Upcast;
use crate::db::RootDatabase;
use crate::watch::Watcher;
use ruff_db::system::SystemPathBuf;
use rustc_hash::FxHashSet;
use std::fmt::{Formatter, Write};
use tracing::info;
/// Wrapper around a [`Watcher`] that watches the relevant paths of a workspace.
pub struct WorkspaceWatcher {
watcher: Watcher,
/// The paths that need to be watched. This includes paths for which setting up file watching failed.
watched_paths: FxHashSet<SystemPathBuf>,
watched_paths: Vec<SystemPathBuf>,
/// Paths that should be watched but setting up the watcher failed for some reason.
/// This should be rare.
errored_paths: Vec<SystemPathBuf>,
/// True if registering a watcher for any path failed.
has_errored_paths: bool,
/// Cache key over the paths that need watching. It allows short-circuiting if the paths haven't changed.
cache_key: Option<u64>,
}
impl WorkspaceWatcher {
@ -22,8 +30,9 @@ impl WorkspaceWatcher {
pub fn new(watcher: Watcher, db: &RootDatabase) -> Self {
let mut watcher = Self {
watcher,
watched_paths: FxHashSet::default(),
errored_paths: Vec::new(),
watched_paths: Vec::new(),
cache_key: None,
has_errored_paths: false,
};
watcher.update(db);
@ -32,53 +41,83 @@ impl WorkspaceWatcher {
}
pub fn update(&mut self, db: &RootDatabase) {
let new_watch_paths = db.workspace().paths_to_watch(db);
let search_paths: Vec<_> = system_module_search_paths(db.upcast()).collect();
let workspace_path = db.workspace().root(db).to_path_buf();
let mut added_folders = new_watch_paths.difference(&self.watched_paths).peekable();
let mut removed_folders = self.watched_paths.difference(&new_watch_paths).peekable();
let new_cache_key = Self::compute_cache_key(&workspace_path, &search_paths);
if added_folders.peek().is_none() && removed_folders.peek().is_none() {
if self.cache_key == Some(new_cache_key) {
return;
}
for added_folder in added_folders {
// Log a warning. It's not worth aborting if registering a single folder fails because
// Ruff otherwise stills works as expected.
if let Err(error) = self.watcher.watch(added_folder) {
// TODO: Log a user-facing warning.
tracing::warn!("Failed to setup watcher for path '{added_folder}': {error}. You have to restart Ruff after making changes to files under this path or you might see stale results.");
self.errored_paths.push(added_folder.clone());
// Unregister all watch paths because ordering is important for linux because
// it only emits an event for the last added watcher if a subtree is covered by multiple watchers.
// A path can be covered by multiple watchers if a subdirectory symlinks to a path that's covered by another watch path:
// ```text
// - bar
// - baz.py
// - workspace
// - bar -> /bar
// - foo.py
// ```
for path in self.watched_paths.drain(..) {
if let Err(error) = self.watcher.unwatch(&path) {
info!("Failed to remove the file watcher for the path '{path}: {error}.");
}
}
for removed_path in removed_folders {
if let Some(index) = self
.errored_paths
.iter()
.position(|path| path == removed_path)
{
self.errored_paths.swap_remove(index);
continue;
}
self.has_errored_paths = false;
if let Err(error) = self.watcher.unwatch(removed_path) {
info!("Failed to remove the file watcher for the path '{removed_path}: {error}.");
let workspace_path = workspace_path
.as_utf8_path()
.canonicalize_utf8()
.map(SystemPathBuf::from_utf8_path_buf)
.unwrap_or(workspace_path);
// Find the non-overlapping module search paths and filter out paths that are already covered by the workspace.
// Module search paths are already canonicalized.
let unique_module_paths = ruff_db::system::deduplicate_nested_paths(
search_paths
.into_iter()
.filter(|path| !path.starts_with(&workspace_path)),
)
.map(SystemPath::to_path_buf);
// Now add the new paths, first starting with the workspace path and then
// adding the library search paths.
for path in std::iter::once(workspace_path).chain(unique_module_paths) {
// Log a warning. It's not worth aborting if registering a single folder fails because
// Ruff otherwise stills works as expected.
if let Err(error) = self.watcher.watch(&path) {
// TODO: Log a user-facing warning.
tracing::warn!("Failed to setup watcher for path '{path}': {error}. You have to restart Ruff after making changes to files under this path or you might see stale results.");
self.has_errored_paths = true;
} else {
self.watched_paths.push(path);
}
}
info!(
"Set up file watchers for {}",
DisplayWatchedPaths {
paths: &new_watch_paths
paths: &self.watched_paths
}
);
self.watched_paths = new_watch_paths;
self.cache_key = Some(new_cache_key);
}
fn compute_cache_key(workspace_root: &SystemPath, search_paths: &[&SystemPath]) -> u64 {
let mut cache_key_hasher = CacheKeyHasher::new();
search_paths.cache_key(&mut cache_key_hasher);
workspace_root.cache_key(&mut cache_key_hasher);
cache_key_hasher.finish()
}
/// Returns `true` if setting up watching for any path failed.
pub fn has_errored_paths(&self) -> bool {
!self.errored_paths.is_empty()
self.has_errored_paths
}
pub fn flush(&self) {
@ -91,7 +130,7 @@ impl WorkspaceWatcher {
}
struct DisplayWatchedPaths<'a> {
paths: &'a FxHashSet<SystemPathBuf>,
paths: &'a [SystemPathBuf],
}
impl std::fmt::Display for DisplayWatchedPaths<'_> {

View file

@ -4,7 +4,6 @@ use std::{collections::BTreeMap, sync::Arc};
use rustc_hash::{FxBuildHasher, FxHashSet};
pub use metadata::{PackageMetadata, WorkspaceMetadata};
use red_knot_module_resolver::system_module_search_paths;
use ruff_db::{
files::{system_path_to_file, File},
system::{walk_directory::WalkState, SystemPath, SystemPathBuf},
@ -242,17 +241,6 @@ impl Workspace {
FxHashSet::default()
}
}
/// Returns the paths that should be watched.
///
/// The paths that require watching might change with every revision.
pub fn paths_to_watch(self, db: &dyn Db) -> FxHashSet<SystemPathBuf> {
ruff_db::system::deduplicate_nested_paths(
std::iter::once(self.root(db)).chain(system_module_search_paths(db.upcast())),
)
.map(SystemPath::to_path_buf)
.collect()
}
}
#[salsa::tracked]

View file

@ -43,6 +43,16 @@ pub trait System: Debug {
/// This function will traverse symbolic links to query information about the destination file.
fn path_metadata(&self, path: &SystemPath) -> Result<Metadata>;
/// Returns the canonical, absolute form of a path with all intermediate components normalized
/// and symbolic links resolved.
///
/// # Errors
/// This function will return an error in the following situations, but is not limited to just these cases:
/// * `path` does not exist.
/// * A non-final component in `path` is not a directory.
/// * the symlink target path is not valid Unicode.
fn canonicalize_path(&self, path: &SystemPath) -> Result<SystemPathBuf>;
/// Reads the content of the file at `path` into a [`String`].
fn read_to_string(&self, path: &SystemPath) -> Result<String>;

View file

@ -94,6 +94,10 @@ impl MemoryFileSystem {
metadata(self, path.as_ref())
}
pub fn canonicalize(&self, path: impl AsRef<SystemPath>) -> SystemPathBuf {
SystemPathBuf::from_utf8_path_buf(self.normalize_path(path))
}
pub fn is_file(&self, path: impl AsRef<SystemPath>) -> bool {
let by_path = self.inner.by_path.read().unwrap();
let normalized = self.normalize_path(path.as_ref());

View file

@ -63,6 +63,12 @@ impl System for OsSystem {
})
}
fn canonicalize_path(&self, path: &SystemPath) -> Result<SystemPathBuf> {
path.as_utf8_path()
.canonicalize_utf8()
.map(SystemPathBuf::from_utf8_path_buf)
}
fn read_to_string(&self, path: &SystemPath) -> Result<String> {
std::fs::read_to_string(path.as_std_path())
}

View file

@ -733,26 +733,30 @@ impl ruff_cache::CacheKey for SystemVirtualPathBuf {
/// let paths = vec![SystemPath::new("/a/b/c"), SystemPath::new("/a/b"), SystemPath::new("/a/beta"), SystemPath::new("/a/b/c")];
/// assert_eq!(deduplicate_nested_paths(paths).collect::<Vec<_>>(), &[SystemPath::new("/a/b"), SystemPath::new("/a/beta")]);
/// ```
pub fn deduplicate_nested_paths<'a, I>(paths: I) -> DeduplicatedNestedPathsIter<'a>
pub fn deduplicate_nested_paths<P, I>(paths: I) -> DeduplicatedNestedPathsIter<P>
where
I: IntoIterator<Item = &'a SystemPath>,
I: IntoIterator<Item = P>,
P: AsRef<SystemPath>,
{
DeduplicatedNestedPathsIter::new(paths)
}
pub struct DeduplicatedNestedPathsIter<'a> {
inner: std::vec::IntoIter<&'a SystemPath>,
next: Option<&'a SystemPath>,
pub struct DeduplicatedNestedPathsIter<P> {
inner: std::vec::IntoIter<P>,
next: Option<P>,
}
impl<'a> DeduplicatedNestedPathsIter<'a> {
impl<P> DeduplicatedNestedPathsIter<P>
where
P: AsRef<SystemPath>,
{
fn new<I>(paths: I) -> Self
where
I: IntoIterator<Item = &'a SystemPath>,
I: IntoIterator<Item = P>,
{
let mut paths = paths.into_iter().collect::<Vec<_>>();
// Sort the path to ensure that e.g. `/a/b/c`, comes right after `/a/b`.
paths.sort_unstable();
paths.sort_unstable_by(|left, right| left.as_ref().cmp(right.as_ref()));
let mut iter = paths.into_iter();
@ -763,15 +767,18 @@ impl<'a> DeduplicatedNestedPathsIter<'a> {
}
}
impl<'a> Iterator for DeduplicatedNestedPathsIter<'a> {
type Item = &'a SystemPath;
impl<P> Iterator for DeduplicatedNestedPathsIter<P>
where
P: AsRef<SystemPath>,
{
type Item = P;
fn next(&mut self) -> Option<Self::Item> {
let current = self.next.take()?;
for next in self.inner.by_ref() {
// Skip all paths that have the same prefix as the current path
if !next.starts_with(current) {
if !next.as_ref().starts_with(current.as_ref()) {
self.next = Some(next);
break;
}

View file

@ -7,7 +7,8 @@ use ruff_python_trivia::textwrap;
use crate::files::File;
use crate::system::{
DirectoryEntry, MemoryFileSystem, Metadata, Result, System, SystemPath, SystemVirtualPath,
DirectoryEntry, MemoryFileSystem, Metadata, Result, System, SystemPath, SystemPathBuf,
SystemVirtualPath,
};
use crate::Db;
@ -140,6 +141,13 @@ impl System for TestSystem {
TestSystemInner::Stub(fs) => Ok(Box::new(fs.read_directory(path)?)),
}
}
fn canonicalize_path(&self, path: &SystemPath) -> Result<SystemPathBuf> {
match &self.inner {
TestSystemInner::System(fs) => fs.canonicalize_path(path),
TestSystemInner::Stub(fs) => Ok(fs.canonicalize(path)),
}
}
}
/// Extension trait for databases that use [`TestSystem`].