[red-knot] Improved file watching (#12382)

This commit is contained in:
Micha Reiser 2024-07-23 08:18:59 +02:00 committed by GitHub
parent a9f8bd59b2
commit 40d9324f5a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
16 changed files with 1476 additions and 381 deletions

1
Cargo.lock generated
View file

@ -1866,6 +1866,7 @@ dependencies = [
"ruff_python_ast",
"rustc-hash 2.0.0",
"salsa",
"tempfile",
"tracing",
"tracing-subscriber",
"tracing-tree",

View file

@ -31,6 +31,9 @@ tracing = { workspace = true }
tracing-subscriber = { workspace = true }
tracing-tree = { workspace = true }
[dev-dependencies]
tempfile = { workspace = true }
[lints]
workspace = true

View file

@ -5,16 +5,17 @@ use salsa::{Cancelled, Database, DbWithJar};
use red_knot_module_resolver::{vendored_typeshed_stubs, Db as ResolverDb, Jar as ResolverJar};
use red_knot_python_semantic::{Db as SemanticDb, Jar as SemanticJar};
use ruff_db::files::{system_path_to_file, File, Files};
use ruff_db::files::{File, Files};
use ruff_db::program::{Program, ProgramSettings};
use ruff_db::system::System;
use ruff_db::vendored::VendoredFileSystem;
use ruff_db::{Db as SourceDb, Jar as SourceJar, Upcast};
use crate::lint::{lint_semantic, lint_syntax, unwind_if_cancelled, Diagnostics};
use crate::watch::{FileChangeKind, FileWatcherChange};
use crate::workspace::{check_file, Package, Workspace, WorkspaceMetadata};
mod changes;
pub trait Db: DbWithJar<Jar> + SemanticDb + Upcast<dyn SemanticDb> {}
#[salsa::jar(db=Db)]
@ -59,58 +60,6 @@ impl RootDatabase {
self.workspace.unwrap()
}
#[tracing::instrument(level = "debug", skip(self, changes))]
pub fn apply_changes(&mut self, changes: Vec<FileWatcherChange>) {
let workspace = self.workspace();
let workspace_path = workspace.root(self).to_path_buf();
// TODO: Optimize change tracking by only reloading a package if a file that is part of the package was changed.
let mut structural_change = false;
for change in changes {
if matches!(
change.path.file_name(),
Some(".gitignore" | ".ignore" | "ruff.toml" | ".ruff.toml" | "pyproject.toml")
) {
// Changes to ignore files or settings can change the workspace structure or add/remove files
// from packages.
structural_change = true;
} else {
match change.kind {
FileChangeKind::Created => {
// Reload the package when a new file was added. This is necessary because the file might be excluded
// by a gitignore.
if workspace.package(self, &change.path).is_some() {
structural_change = true;
}
}
FileChangeKind::Modified => {}
FileChangeKind::Deleted => {
if let Some(package) = workspace.package(self, &change.path) {
if let Some(file) = system_path_to_file(self, &change.path) {
package.remove_file(self, file);
}
}
}
}
}
File::touch_path(self, &change.path);
}
if structural_change {
match WorkspaceMetadata::from_path(&workspace_path, self.system()) {
Ok(metadata) => {
tracing::debug!("Reload workspace after structural change.");
// TODO: Handle changes in the program settings.
workspace.reload(self, metadata);
}
Err(error) => {
tracing::error!("Failed to load workspace, keep old workspace: {error}");
}
}
}
}
/// Checks all open files in the workspace and its dependencies.
pub fn check(&self) -> Result<Vec<String>, Cancelled> {
self.with_db(|db| db.workspace().check(db))
@ -152,18 +101,29 @@ impl Upcast<dyn SemanticDb> for RootDatabase {
fn upcast(&self) -> &(dyn SemanticDb + 'static) {
self
}
fn upcast_mut(&mut self) -> &mut (dyn SemanticDb + 'static) {
self
}
}
impl Upcast<dyn SourceDb> for RootDatabase {
fn upcast(&self) -> &(dyn SourceDb + 'static) {
self
}
fn upcast_mut(&mut self) -> &mut (dyn SourceDb + 'static) {
self
}
}
impl Upcast<dyn ResolverDb> for RootDatabase {
fn upcast(&self) -> &(dyn ResolverDb + 'static) {
self
}
fn upcast_mut(&mut self) -> &mut (dyn ResolverDb + 'static) {
self
}
}
impl ResolverDb for RootDatabase {}

View file

@ -0,0 +1,190 @@
use rustc_hash::FxHashSet;
use ruff_db::files::{system_path_to_file, File, Files};
use ruff_db::system::walk_directory::WalkState;
use ruff_db::system::SystemPath;
use ruff_db::Db;
use crate::db::RootDatabase;
use crate::watch;
use crate::watch::{CreatedKind, DeletedKind};
use crate::workspace::WorkspaceMetadata;
impl RootDatabase {
#[tracing::instrument(level = "debug", skip(self, changes))]
pub fn apply_changes(&mut self, changes: Vec<watch::ChangeEvent>) {
let workspace = self.workspace();
let workspace_path = workspace.root(self).to_path_buf();
let mut workspace_change = false;
// Packages that need reloading
let mut changed_packages = FxHashSet::default();
// Paths that were added
let mut added_paths = FxHashSet::default();
// Deduplicate the `sync` calls. Many file watchers emit multiple events for the same path.
let mut synced_files = FxHashSet::default();
let mut synced_recursively = FxHashSet::default();
let mut sync_path = |db: &mut RootDatabase, path: &SystemPath| {
if synced_files.insert(path.to_path_buf()) {
File::sync_path(db, path);
}
};
let mut sync_recursively = |db: &mut RootDatabase, path: &SystemPath| {
if synced_recursively.insert(path.to_path_buf()) {
Files::sync_recursively(db, path);
}
};
for change in changes {
if let Some(path) = change.path() {
if matches!(
path.file_name(),
Some(".gitignore" | ".ignore" | "ruff.toml" | ".ruff.toml" | "pyproject.toml")
) {
// Changes to ignore files or settings can change the workspace structure or add/remove files
// from packages.
if let Some(package) = workspace.package(self, path) {
changed_packages.insert(package);
} else {
workspace_change = true;
}
continue;
}
}
match change {
watch::ChangeEvent::Changed { path, kind: _ } => sync_path(self, &path),
watch::ChangeEvent::Created { kind, path } => {
match kind {
CreatedKind::File => sync_path(self, &path),
CreatedKind::Directory | CreatedKind::Any => {
sync_recursively(self, &path);
}
}
if self.system().is_file(&path) {
// Add the parent directory because `walkdir` always visits explicitly passed files
// even if they match an exclude filter.
added_paths.insert(path.parent().unwrap().to_path_buf());
} else {
added_paths.insert(path);
}
}
watch::ChangeEvent::Deleted { kind, path } => {
let is_file = match kind {
DeletedKind::File => true,
DeletedKind::Directory => {
// file watchers emit an event for every deleted file. No need to scan the entire dir.
continue;
}
DeletedKind::Any => self
.files
.try_system(self, &path)
.is_some_and(|file| file.exists(self)),
};
if is_file {
sync_path(self, &path);
if let Some(package) = workspace.package(self, &path) {
if let Some(file) = self.files().try_system(self, &path) {
package.remove_file(self, file);
}
}
} else {
sync_recursively(self, &path);
// TODO: Remove after converting `package.files()` to a salsa query.
if let Some(package) = workspace.package(self, &path) {
changed_packages.insert(package);
} else {
workspace_change = true;
}
}
}
watch::ChangeEvent::Rescan => {
workspace_change = true;
Files::sync_all(self);
break;
}
}
}
if workspace_change {
match WorkspaceMetadata::from_path(&workspace_path, self.system()) {
Ok(metadata) => {
tracing::debug!("Reload workspace after structural change.");
// TODO: Handle changes in the program settings.
workspace.reload(self, metadata);
}
Err(error) => {
tracing::error!("Failed to load workspace, keep old workspace: {error}");
}
}
return;
}
let mut added_paths = added_paths.into_iter().filter(|path| {
let Some(package) = workspace.package(self, path) else {
return false;
};
// Skip packages that need reloading
!changed_packages.contains(&package)
});
// Use directory walking to discover newly added files.
if let Some(path) = added_paths.next() {
let mut walker = self.system().walk_directory(&path);
for extra_path in added_paths {
walker = walker.add(&extra_path);
}
let added_paths = std::sync::Mutex::new(Vec::default());
walker.run(|| {
Box::new(|entry| {
let Ok(entry) = entry else {
return WalkState::Continue;
};
if !entry.file_type().is_file() {
return WalkState::Continue;
}
let mut paths = added_paths.lock().unwrap();
paths.push(entry.into_path());
WalkState::Continue
})
});
for path in added_paths.into_inner().unwrap() {
let package = workspace.package(self, &path);
let file = system_path_to_file(self, &path);
if let (Some(package), Some(file)) = (package, file) {
package.add_file(self, file);
}
}
}
// Reload
for package in changed_packages {
package.reload_files(self);
}
}
}
#[cfg(test)]
mod tests {}

View file

@ -11,8 +11,8 @@ use tracing_subscriber::{Layer, Registry};
use tracing_tree::time::Uptime;
use red_knot::db::RootDatabase;
use red_knot::watch::FileWatcher;
use red_knot::watch::FileWatcherChange;
use red_knot::watch;
use red_knot::watch::Watcher;
use red_knot::workspace::WorkspaceMetadata;
use ruff_db::program::{ProgramSettings, SearchPathSettings};
use ruff_db::system::{OsSystem, System, SystemPathBuf};
@ -57,6 +57,13 @@ struct Args {
#[clap(flatten)]
verbosity: Verbosity,
#[arg(
long,
help = "Run in watch mode by re-running whenever files change",
short = 'W'
)]
watch: bool,
}
#[allow(
@ -72,6 +79,7 @@ pub fn main() -> anyhow::Result<()> {
extra_search_path: extra_paths,
target_version,
verbosity,
watch,
} = Args::parse_from(std::env::args().collect::<Vec<_>>());
let verbosity = verbosity.level();
@ -117,125 +125,120 @@ pub fn main() -> anyhow::Result<()> {
}
})?;
let file_changes_notifier = main_loop.file_changes_notifier();
// Watch for file changes and re-trigger the analysis.
let mut file_watcher = FileWatcher::new(move |changes| {
file_changes_notifier.notify(changes);
})?;
file_watcher.watch_folder(db.workspace().root(&db).as_std_path())?;
if watch {
main_loop.watch(&mut db)?;
} else {
main_loop.run(&mut db);
println!("{}", countme::get_all());
}
Ok(())
}
struct MainLoop {
verbosity: Option<VerbosityLevel>,
orchestrator: crossbeam_channel::Sender<OrchestratorMessage>,
/// Sender that can be used to send messages to the main loop.
sender: crossbeam_channel::Sender<MainLoopMessage>,
/// Receiver for the messages sent **to** the main loop.
receiver: crossbeam_channel::Receiver<MainLoopMessage>,
/// The file system watcher, if running in watch mode.
watcher: Option<Watcher>,
verbosity: Option<VerbosityLevel>,
}
impl MainLoop {
fn new(verbosity: Option<VerbosityLevel>) -> (Self, MainLoopCancellationToken) {
let (orchestrator_sender, orchestrator_receiver) = crossbeam_channel::bounded(1);
let (main_loop_sender, main_loop_receiver) = crossbeam_channel::bounded(1);
let mut orchestrator = Orchestrator {
receiver: orchestrator_receiver,
main_loop: main_loop_sender.clone(),
revision: 0,
};
std::thread::spawn(move || {
orchestrator.run();
});
let (sender, receiver) = crossbeam_channel::bounded(10);
(
Self {
sender: sender.clone(),
receiver,
watcher: None,
verbosity,
orchestrator: orchestrator_sender,
receiver: main_loop_receiver,
},
MainLoopCancellationToken {
sender: main_loop_sender,
},
MainLoopCancellationToken { sender },
)
}
fn file_changes_notifier(&self) -> FileChangesNotifier {
FileChangesNotifier {
sender: self.orchestrator.clone(),
}
fn watch(mut self, db: &mut RootDatabase) -> anyhow::Result<()> {
let sender = self.sender.clone();
let mut watcher = watch::directory_watcher(move |event| {
sender.send(MainLoopMessage::ApplyChanges(event)).unwrap();
})?;
watcher.watch(db.workspace().root(db))?;
self.watcher = Some(watcher);
self.run(db);
Ok(())
}
#[allow(clippy::print_stderr)]
fn run(self, db: &mut RootDatabase) {
self.orchestrator.send(OrchestratorMessage::Run).unwrap();
// Schedule the first check.
self.sender.send(MainLoopMessage::CheckWorkspace).unwrap();
let mut revision = 0usize;
for message in &self.receiver {
tracing::trace!("Main Loop: Tick");
match message {
MainLoopMessage::CheckWorkspace { revision } => {
MainLoopMessage::CheckWorkspace => {
let db = db.snapshot();
let orchestrator = self.orchestrator.clone();
let sender = self.sender.clone();
// Spawn a new task that checks the workspace. This needs to be done in a separate thread
// to prevent blocking the main loop here.
rayon::spawn(move || {
if let Ok(result) = db.check() {
orchestrator
.send(OrchestratorMessage::CheckCompleted {
diagnostics: result,
revision,
})
.unwrap();
// Send the result back to the main loop for printing.
sender
.send(MainLoopMessage::CheckCompleted { result, revision })
.ok();
}
});
}
MainLoopMessage::CheckCompleted {
result,
revision: check_revision,
} => {
if check_revision == revision {
eprintln!("{}", result.join("\n"));
if self.verbosity == Some(VerbosityLevel::Trace) {
eprintln!("{}", countme::get_all());
}
}
if self.watcher.is_none() {
return self.exit();
}
}
MainLoopMessage::ApplyChanges(changes) => {
revision += 1;
// Automatically cancels any pending queries and waits for them to complete.
db.apply_changes(changes);
}
MainLoopMessage::CheckCompleted(diagnostics) => {
eprintln!("{}", diagnostics.join("\n"));
if self.verbosity == Some(VerbosityLevel::Trace) {
eprintln!("{}", countme::get_all());
}
self.sender.send(MainLoopMessage::CheckWorkspace).unwrap();
}
MainLoopMessage::Exit => {
return self.exit();
}
}
}
}
#[allow(clippy::print_stderr, clippy::unused_self)]
fn exit(self) {
if self.verbosity == Some(VerbosityLevel::Trace) {
eprintln!("Exit");
eprintln!("{}", countme::get_all());
}
return;
}
}
}
}
}
impl Drop for MainLoop {
fn drop(&mut self) {
self.orchestrator
.send(OrchestratorMessage::Shutdown)
.unwrap();
}
}
#[derive(Debug, Clone)]
struct FileChangesNotifier {
sender: crossbeam_channel::Sender<OrchestratorMessage>,
}
impl FileChangesNotifier {
fn notify(&self, changes: Vec<FileWatcherChange>) {
self.sender
.send(OrchestratorMessage::FileChanges(changes))
.unwrap();
}
}
@ -250,115 +253,16 @@ impl MainLoopCancellationToken {
}
}
struct Orchestrator {
/// Sends messages to the main loop.
main_loop: crossbeam_channel::Sender<MainLoopMessage>,
/// Receives messages from the main loop.
receiver: crossbeam_channel::Receiver<OrchestratorMessage>,
revision: usize,
}
impl Orchestrator {
#[allow(clippy::print_stderr)]
fn run(&mut self) {
while let Ok(message) = self.receiver.recv() {
match message {
OrchestratorMessage::Run => {
self.main_loop
.send(MainLoopMessage::CheckWorkspace {
revision: self.revision,
})
.unwrap();
}
OrchestratorMessage::CheckCompleted {
diagnostics,
revision,
} => {
// Only take the diagnostics if they are for the latest revision.
if self.revision == revision {
self.main_loop
.send(MainLoopMessage::CheckCompleted(diagnostics))
.unwrap();
} else {
tracing::debug!("Discarding diagnostics for outdated revision {revision} (current: {}).", self.revision);
}
}
OrchestratorMessage::FileChanges(changes) => {
// Request cancellation, but wait until all analysis tasks have completed to
// avoid stale messages in the next main loop.
self.revision += 1;
self.debounce_changes(changes);
}
OrchestratorMessage::Shutdown => {
return self.shutdown();
}
}
}
}
fn debounce_changes(&self, mut changes: Vec<FileWatcherChange>) {
loop {
// Consume possibly incoming file change messages before running a new analysis, but don't wait for more than 100ms.
crossbeam_channel::select! {
recv(self.receiver) -> message => {
match message {
Ok(OrchestratorMessage::Shutdown) => {
return self.shutdown();
}
Ok(OrchestratorMessage::FileChanges(file_changes)) => {
changes.extend(file_changes);
}
Ok(OrchestratorMessage::CheckCompleted { .. })=> {
// disregard any outdated completion message.
}
Ok(OrchestratorMessage::Run) => unreachable!("The orchestrator is already running."),
Err(_) => {
// There are no more senders, no point in waiting for more messages
return;
}
}
},
default(std::time::Duration::from_millis(10)) => {
// No more file changes after 10 ms, send the changes and schedule a new analysis
self.main_loop.send(MainLoopMessage::ApplyChanges(changes)).unwrap();
self.main_loop.send(MainLoopMessage::CheckWorkspace { revision: self.revision}).unwrap();
return;
}
}
}
}
#[allow(clippy::unused_self)]
fn shutdown(&self) {
tracing::trace!("Shutting down orchestrator.");
}
}
/// Message sent from the orchestrator to the main loop.
#[derive(Debug)]
enum MainLoopMessage {
CheckWorkspace { revision: usize },
CheckCompleted(Vec<String>),
ApplyChanges(Vec<FileWatcherChange>),
Exit,
}
#[derive(Debug)]
enum OrchestratorMessage {
Run,
Shutdown,
CheckWorkspace,
CheckCompleted {
diagnostics: Vec<String>,
result: Vec<String>,
revision: usize,
},
FileChanges(Vec<FileWatcherChange>),
ApplyChanges(Vec<watch::ChangeEvent>),
Exit,
}
fn setup_tracing(verbosity: Option<VerbosityLevel>) {

View file

@ -1,111 +1,92 @@
use std::path::Path;
use anyhow::Context;
use notify::event::{CreateKind, ModifyKind, RemoveKind};
use notify::{recommended_watcher, Event, EventKind, RecommendedWatcher, RecursiveMode, Watcher};
use ruff_db::system::{SystemPath, SystemPathBuf};
pub use watcher::{directory_watcher, EventHandler, Watcher};
pub struct FileWatcher {
watcher: RecommendedWatcher,
mod watcher;
/// Classification of a file system change event.
///
/// ## Renaming a path
/// Renaming a path creates a [`ChangeEvent::Deleted`] event for the old path and/or a [`ChangeEvent::Created`] for the new location.
/// Whether both events are created or just one of them depends from where to where the path was moved:
///
/// * Inside the watched directory: Both events are created.
/// * From a watched directory to a non-watched directory: Only a [`ChangeEvent::Deleted`] event is created.
/// * From a non-watched directory to a watched directory: Only a [`ChangeEvent::Created`] event is created.
///
/// ## Renaming a directory
/// It's up to the file watcher implementation to aggregate the rename event for a directory to a single rename
/// event instead of emitting an event for each file or subdirectory in that path.
#[derive(Debug, PartialEq, Eq)]
pub enum ChangeEvent {
/// A new path was created
Created {
path: SystemPathBuf,
kind: CreatedKind,
},
/// The content or metadata of a path was changed.
Changed {
path: SystemPathBuf,
kind: ChangedKind,
},
/// A path was deleted.
Deleted {
path: SystemPathBuf,
kind: DeletedKind,
},
/// The file watcher failed to observe some changes and now is out of sync with the file system.
///
/// This can happen if many files are changed at once. The consumer should rescan all files to catch up
/// with the file system.
Rescan,
}
pub trait EventHandler: Send + 'static {
fn handle(&self, changes: Vec<FileWatcherChange>);
}
impl ChangeEvent {
pub fn file_name(&self) -> Option<&str> {
self.path().and_then(|path| path.file_name())
}
impl<F> EventHandler for F
where
F: Fn(Vec<FileWatcherChange>) + Send + 'static,
{
fn handle(&self, changes: Vec<FileWatcherChange>) {
let f = self;
f(changes);
pub fn path(&self) -> Option<&SystemPath> {
match self {
ChangeEvent::Created { path, .. }
| ChangeEvent::Changed { path, .. }
| ChangeEvent::Deleted { path, .. } => Some(path),
ChangeEvent::Rescan => None,
}
}
}
impl FileWatcher {
pub fn new<E>(handler: E) -> anyhow::Result<Self>
where
E: EventHandler,
{
Self::from_handler(Box::new(handler))
}
/// Classification of an event that creates a new path.
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
pub enum CreatedKind {
/// A file was created.
File,
fn from_handler(handler: Box<dyn EventHandler>) -> anyhow::Result<Self> {
let watcher = recommended_watcher(move |event: notify::Result<Event>| {
match event {
Ok(event) => {
// TODO verify that this handles all events correctly
let change_kind = match event.kind {
EventKind::Create(CreateKind::File) => FileChangeKind::Created,
EventKind::Modify(ModifyKind::Name(notify::event::RenameMode::From)) => {
FileChangeKind::Deleted
}
EventKind::Modify(ModifyKind::Name(notify::event::RenameMode::To)) => {
FileChangeKind::Created
}
EventKind::Modify(ModifyKind::Name(notify::event::RenameMode::Any)) => {
// TODO Introduce a better catch all event for cases that we don't understand.
FileChangeKind::Created
}
EventKind::Modify(ModifyKind::Name(notify::event::RenameMode::Both)) => {
todo!("Handle both create and delete event.");
}
EventKind::Modify(_) => FileChangeKind::Modified,
EventKind::Remove(RemoveKind::File) => FileChangeKind::Deleted,
_ => {
return;
}
};
/// A directory was created.
Directory,
let mut changes = Vec::new();
for path in event.paths {
if let Some(fs_path) = SystemPath::from_std_path(&path) {
changes
.push(FileWatcherChange::new(fs_path.to_path_buf(), change_kind));
}
}
if !changes.is_empty() {
handler.handle(changes);
}
}
// TODO proper error handling
Err(err) => {
panic!("Error: {err}");
}
}
})
.context("Failed to create file watcher.")?;
Ok(Self { watcher })
}
pub fn watch_folder(&mut self, path: &Path) -> anyhow::Result<()> {
self.watcher.watch(path, RecursiveMode::Recursive)?;
Ok(())
}
/// A file, directory, or any other kind of path was created.
Any,
}
#[derive(Clone, Debug)]
pub struct FileWatcherChange {
pub path: SystemPathBuf,
#[allow(unused)]
pub kind: FileChangeKind,
}
/// Classification of an event related to a content or metadata change.
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
pub enum ChangedKind {
/// The content of a file was changed.
FileContent,
impl FileWatcherChange {
pub fn new(path: SystemPathBuf, kind: FileChangeKind) -> Self {
Self { path, kind }
}
/// The metadata of a file was changed.
FileMetadata,
/// Either the content or metadata of a path was changed.
Any,
}
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
pub enum FileChangeKind {
Created,
Modified,
Deleted,
pub enum DeletedKind {
File,
Directory,
Any,
}

View file

@ -0,0 +1,393 @@
use notify::event::{CreateKind, MetadataKind, ModifyKind, RemoveKind, RenameMode};
use notify::{recommended_watcher, EventKind, RecommendedWatcher, RecursiveMode, Watcher as _};
use ruff_db::system::{SystemPath, SystemPathBuf};
use crate::watch::{ChangeEvent, ChangedKind, CreatedKind, DeletedKind};
/// Creates a new watcher observing file system changes.
///
/// The watcher debounces events, but guarantees to send all changes eventually (even if the file system keeps changing).
pub fn directory_watcher<H>(handler: H) -> notify::Result<Watcher>
where
H: EventHandler,
{
let (sender, receiver) = crossbeam::channel::bounded(20);
let debouncer = std::thread::Builder::new()
.name("watcher::debouncer".to_string())
.spawn(move || {
// Wait for the next set of changes
for message in &receiver {
let event = match message {
DebouncerMessage::Event(event) => event,
DebouncerMessage::Flush => {
continue;
}
DebouncerMessage::Exit => {
return;
}
};
let mut debouncer = Debouncer::default();
debouncer.add_result(event);
// Debounce any new incoming changes:
// * Take any new incoming change events and merge them with the previous change events
// * If there are no new incoming change events after 10 ms, flush the changes and wait for the next notify event.
// * Flush no later than after 3s.
loop {
let start = std::time::Instant::now();
crossbeam::select! {
recv(receiver) -> message => {
match message {
Ok(DebouncerMessage::Event(event)) => {
debouncer.add_result(event);
// Ensure that we flush the changes eventually.
if start.elapsed() > std::time::Duration::from_secs(3) {
break;
}
}
Ok(DebouncerMessage::Flush) => {
break;
}
Ok(DebouncerMessage::Exit) => {
return;
},
Err(_) => {
// There are no more senders. There's no point in waiting for more messages
return;
}
}
},
default(std::time::Duration::from_millis(10)) => {
break;
}
}
}
// No more file changes after 10 ms, send the changes and schedule a new analysis
let events = debouncer.into_events();
if !events.is_empty() {
handler.handle(events);
}
}
})
.unwrap();
let debouncer_sender = sender.clone();
let watcher =
recommended_watcher(move |event| sender.send(DebouncerMessage::Event(event)).unwrap())?;
Ok(Watcher {
watcher,
debouncer_sender,
debouncer_thread: Some(debouncer),
})
}
#[derive(Debug)]
enum DebouncerMessage {
/// A new file system event.
Event(notify::Result<notify::Event>),
Flush,
/// Exit the debouncer thread.
Exit,
}
pub struct Watcher {
watcher: RecommendedWatcher,
debouncer_sender: crossbeam::channel::Sender<DebouncerMessage>,
debouncer_thread: Option<std::thread::JoinHandle<()>>,
}
impl Watcher {
/// Sets up file watching for `path`.
pub fn watch(&mut self, path: &SystemPath) -> notify::Result<()> {
self.watcher
.watch(path.as_std_path(), RecursiveMode::Recursive)
}
/// Stops file watching for `path`.
pub fn unwatch(&mut self, path: &SystemPath) -> notify::Result<()> {
self.watcher.unwatch(path.as_std_path())
}
/// Stops the file watcher.
///
/// Pending events will be discarded.
///
/// The call blocks until the watcher has stopped.
pub fn stop(mut self) {
self.set_stop();
if let Some(debouncher) = self.debouncer_thread.take() {
debouncher.join().unwrap();
}
}
/// Flushes any pending events.
pub fn flush(&self) {
self.debouncer_sender.send(DebouncerMessage::Flush).unwrap();
}
fn set_stop(&mut self) {
self.debouncer_sender.send(DebouncerMessage::Exit).ok();
}
}
impl Drop for Watcher {
fn drop(&mut self) {
self.set_stop();
}
}
#[derive(Default)]
struct Debouncer {
events: Vec<ChangeEvent>,
rescan_event: Option<ChangeEvent>,
}
impl Debouncer {
#[tracing::instrument(level = "trace", skip(self))]
fn add_result(&mut self, result: notify::Result<notify::Event>) {
match result {
Ok(event) => self.add_event(event),
Err(error) => self.add_error(error),
}
}
#[allow(clippy::unused_self, clippy::needless_pass_by_value)]
fn add_error(&mut self, error: notify::Error) {
// Micha: I skimmed through some of notify's source code and it seems the most common errors
// are IO errors. All other errors should really only happen when adding or removing a watched folders.
// It's not clear what an upstream handler should do in the case of an IOError (other than logging it).
// That's what we do for now as well.
tracing::warn!("File watcher error: {error:?}.");
}
fn add_event(&mut self, event: notify::Event) {
if self.rescan_event.is_some() {
// We're already in a rescan state, ignore all other events
return;
}
// If the file watcher is out of sync or we observed too many changes, trigger a full rescan
if event.need_rescan() || self.events.len() > 10000 {
self.events = Vec::new();
self.rescan_event = Some(ChangeEvent::Rescan);
return;
}
let kind = event.kind;
let path = match SystemPathBuf::from_path_buf(event.paths.into_iter().next().unwrap()) {
Ok(path) => path,
Err(path) => {
tracing::debug!(
"Ignore change to non-UTF8 path '{path}': {kind:?}",
path = path.display()
);
// Ignore non-UTF8 paths because they aren't handled by the rest of the system.
return;
}
};
let event = match kind {
EventKind::Create(create) => {
let kind = match create {
CreateKind::File => CreatedKind::File,
CreateKind::Folder => CreatedKind::Directory,
CreateKind::Any | CreateKind::Other => {
CreatedKind::from(FileType::from_path(&path))
}
};
ChangeEvent::Created { path, kind }
}
EventKind::Modify(modify) => match modify {
ModifyKind::Metadata(metadata) => {
if FileType::from_path(&path) != FileType::File {
// Only interested in file metadata events.
return;
}
match metadata {
MetadataKind::Any | MetadataKind::Permissions | MetadataKind::Other => {
ChangeEvent::Changed {
path,
kind: ChangedKind::FileMetadata,
}
}
MetadataKind::AccessTime
| MetadataKind::WriteTime
| MetadataKind::Ownership
| MetadataKind::Extended => {
// We're not interested in these metadata changes
return;
}
}
}
ModifyKind::Data(_) => ChangeEvent::Changed {
kind: ChangedKind::FileMetadata,
path,
},
ModifyKind::Name(rename) => match rename {
RenameMode::From => {
// TODO: notify_debouncer_full matches the `RenameMode::From` and `RenameMode::To` events.
// Matching the from and to event would have the added advantage that we know the
// type of the path that was renamed, allowing `apply_changes` to avoid traversing the
// entire package.
// https://github.com/notify-rs/notify/blob/128bf6230c03d39dbb7f301ff7b20e594e34c3a2/notify-debouncer-full/src/lib.rs#L293-L297
ChangeEvent::Deleted {
kind: DeletedKind::Any,
path,
}
}
RenameMode::To => ChangeEvent::Created {
kind: CreatedKind::from(FileType::from_path(&path)),
path,
},
RenameMode::Both => {
// Both is only emitted when moving a path from within a watched directory
// to another watched directory. The event is not emitted if the `to` or `from` path
// lay outside the watched directory. However, the `To` and `From` events are always emitted.
// That's why we ignore `Both` and instead rely on `To` and `From`.
return;
}
RenameMode::Other => {
// Skip over any other rename events
return;
}
RenameMode::Any => {
// Guess the action based on the current file system state
if path.as_std_path().exists() {
let file_type = FileType::from_path(&path);
ChangeEvent::Created {
kind: file_type.into(),
path,
}
} else {
ChangeEvent::Deleted {
kind: DeletedKind::Any,
path,
}
}
}
},
ModifyKind::Other => {
// Skip other modification events that are not content or metadata related
return;
}
ModifyKind::Any => {
if !path.as_std_path().is_file() {
return;
}
ChangeEvent::Changed {
path,
kind: ChangedKind::Any,
}
}
},
EventKind::Access(_) => {
// We're not interested in any access events
return;
}
EventKind::Remove(kind) => {
let kind = match kind {
RemoveKind::File => DeletedKind::File,
RemoveKind::Folder => DeletedKind::Directory,
RemoveKind::Any | RemoveKind::Other => DeletedKind::Any,
};
ChangeEvent::Deleted { path, kind }
}
EventKind::Other => {
// Skip over meta events
return;
}
EventKind::Any => {
tracing::debug!("Skip any FS event for {path}.");
return;
}
};
self.events.push(event);
}
fn into_events(self) -> Vec<ChangeEvent> {
if let Some(rescan_event) = self.rescan_event {
vec![rescan_event]
} else {
self.events
}
}
}
pub trait EventHandler: Send + 'static {
fn handle(&self, changes: Vec<ChangeEvent>);
}
impl<F> EventHandler for F
where
F: Fn(Vec<ChangeEvent>) + Send + 'static,
{
fn handle(&self, changes: Vec<ChangeEvent>) {
let f = self;
f(changes);
}
}
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
enum FileType {
/// The event is related to a directory.
File,
/// The event is related to a directory.
Directory,
/// It's unknown whether the event is related to a file or a directory or if it is any other file type.
Any,
}
impl FileType {
fn from_path(path: &SystemPath) -> FileType {
match path.as_std_path().metadata() {
Ok(metadata) if metadata.is_file() => FileType::File,
Ok(metadata) if metadata.is_dir() => FileType::Directory,
Ok(_) | Err(_) => FileType::Any,
}
}
}
impl From<FileType> for CreatedKind {
fn from(value: FileType) -> Self {
match value {
FileType::File => Self::File,
FileType::Directory => Self::Directory,
FileType::Any => Self::Any,
}
}
}

View file

@ -117,6 +117,7 @@ impl Workspace {
self.package_tree(db).values().copied()
}
#[tracing::instrument(skip_all)]
pub fn reload(self, db: &mut dyn Db, metadata: WorkspaceMetadata) {
assert_eq!(self.root(db), metadata.root());
@ -139,6 +140,7 @@ impl Workspace {
self.set_package_tree(db).to(new_packages);
}
#[tracing::instrument(level = "debug", skip_all)]
pub fn update_package(self, db: &mut dyn Db, metadata: PackageMetadata) -> anyhow::Result<()> {
let path = metadata.root().to_path_buf();
@ -157,7 +159,7 @@ impl Workspace {
pub fn package(self, db: &dyn Db, path: &SystemPath) -> Option<Package> {
let packages = self.package_tree(db);
let (package_path, package) = packages.range(..path.to_path_buf()).next_back()?;
let (package_path, package) = packages.range(..=path.to_path_buf()).next_back()?;
if path.starts_with(package_path) {
Some(*package)
@ -252,6 +254,7 @@ impl Package {
self.file_set(db)
}
#[tracing::instrument(level = "debug", skip(db))]
pub fn remove_file(self, db: &mut dyn Db, file: File) -> bool {
let mut files_arc = self.file_set(db).clone();
@ -266,6 +269,22 @@ impl Package {
removed
}
#[tracing::instrument(level = "debug", skip(db))]
pub fn add_file(self, db: &mut dyn Db, file: File) -> bool {
let mut files_arc = self.file_set(db).clone();
// Set a dummy value. Salsa will cancel any pending queries and remove its own reference to `files`
// so that the reference counter to `files` now drops to 1.
self.set_file_set(db).to(Arc::new(FxHashSet::default()));
let files = Arc::get_mut(&mut files_arc).unwrap();
let added = files.insert(file);
self.set_file_set(db).to(files_arc);
added
}
#[tracing::instrument(level = "debug", skip(db))]
pub(crate) fn check(self, db: &dyn Db) -> Vec<String> {
let mut result = Vec::new();
for file in self.files(db) {
@ -286,9 +305,14 @@ impl Package {
let root = self.root(db);
assert_eq!(root, metadata.root());
let files = discover_package_files(db, root);
self.reload_files(db);
self.set_name(db).to(metadata.name);
}
#[tracing::instrument(level = "debug", skip(db))]
pub fn reload_files(self, db: &mut dyn Db) {
let files = discover_package_files(db, self.root(db));
self.set_file_set(db).to(Arc::new(files));
}
}

View file

@ -0,0 +1,590 @@
#![allow(clippy::disallowed_names)]
use std::time::Duration;
use anyhow::{anyhow, Context};
use red_knot::db::RootDatabase;
use red_knot::watch;
use red_knot::watch::{directory_watcher, Watcher};
use red_knot::workspace::WorkspaceMetadata;
use red_knot_module_resolver::{resolve_module, ModuleName};
use ruff_db::files::system_path_to_file;
use ruff_db::program::{ProgramSettings, SearchPathSettings, TargetVersion};
use ruff_db::source::source_text;
use ruff_db::system::{OsSystem, SystemPath, SystemPathBuf};
use ruff_db::Upcast;
struct TestCase {
db: RootDatabase,
watcher: Option<Watcher>,
changes_receiver: crossbeam::channel::Receiver<Vec<watch::ChangeEvent>>,
temp_dir: tempfile::TempDir,
}
impl TestCase {
fn workspace_path(&self, relative: impl AsRef<SystemPath>) -> SystemPathBuf {
SystemPath::absolute(relative, self.db.workspace().root(&self.db))
}
fn root_path(&self) -> &SystemPath {
SystemPath::from_std_path(self.temp_dir.path()).unwrap()
}
fn db(&self) -> &RootDatabase {
&self.db
}
fn db_mut(&mut self) -> &mut RootDatabase {
&mut self.db
}
fn stop_watch(&mut self) -> Vec<watch::ChangeEvent> {
if let Some(watcher) = self.watcher.take() {
// Give the watcher some time to catch up.
std::thread::sleep(Duration::from_millis(10));
watcher.flush();
watcher.stop();
}
let mut all_events = Vec::new();
for events in &self.changes_receiver {
all_events.extend(events);
}
all_events
}
}
fn setup<I, P>(workspace_files: I) -> anyhow::Result<TestCase>
where
I: IntoIterator<Item = (P, &'static str)>,
P: AsRef<SystemPath>,
{
let temp_dir = tempfile::tempdir()?;
let workspace_path = temp_dir.path().join("workspace");
std::fs::create_dir_all(&workspace_path).with_context(|| {
format!(
"Failed to create workspace directory '{}'",
workspace_path.display()
)
})?;
let workspace_path = SystemPath::from_std_path(&workspace_path).ok_or_else(|| {
anyhow!(
"Workspace root '{}' in temp directory is not a valid UTF-8 path.",
workspace_path.display()
)
})?;
let workspace_path = SystemPathBuf::from_utf8_path_buf(
workspace_path
.as_utf8_path()
.canonicalize_utf8()
.with_context(|| "Failed to canonzialize workspace path.")?,
);
for (relative_path, content) in workspace_files {
let relative_path = relative_path.as_ref();
let absolute_path = workspace_path.join(relative_path);
if let Some(parent) = absolute_path.parent() {
std::fs::create_dir_all(parent).with_context(|| {
format!("Failed to create parent directory for file '{relative_path}'.",)
})?;
}
std::fs::write(absolute_path.as_std_path(), content)
.with_context(|| format!("Failed to write file '{relative_path}'"))?;
}
let system = OsSystem::new(&workspace_path);
let workspace = WorkspaceMetadata::from_path(&workspace_path, &system)?;
let settings = ProgramSettings {
target_version: TargetVersion::default(),
search_paths: SearchPathSettings {
extra_paths: vec![],
workspace_root: workspace.root().to_path_buf(),
custom_typeshed: None,
site_packages: None,
},
};
let db = RootDatabase::new(workspace, settings, system);
let (sender, receiver) = crossbeam::channel::unbounded();
let mut watcher = directory_watcher(move |events| sender.send(events).unwrap())
.with_context(|| "Failed to create directory watcher")?;
watcher
.watch(&workspace_path)
.with_context(|| "Failed to set up watcher for workspace directory.")?;
let test_case = TestCase {
db,
changes_receiver: receiver,
watcher: Some(watcher),
temp_dir,
};
Ok(test_case)
}
#[test]
fn new_file() -> anyhow::Result<()> {
let mut case = setup([("bar.py", "")])?;
let foo_path = case.workspace_path("foo.py");
assert_eq!(system_path_to_file(case.db(), &foo_path), None);
std::fs::write(foo_path.as_std_path(), "print('Hello')")?;
let changes = case.stop_watch();
case.db_mut().apply_changes(changes);
let foo = system_path_to_file(case.db(), &foo_path).expect("foo.py to exist.");
let package = case
.db()
.workspace()
.package(case.db(), &foo_path)
.expect("foo.py to belong to a package.");
assert!(package.contains_file(case.db(), foo));
Ok(())
}
#[test]
fn new_ignored_file() -> anyhow::Result<()> {
let mut case = setup([("bar.py", ""), (".ignore", "foo.py")])?;
let foo_path = case.workspace_path("foo.py");
assert_eq!(system_path_to_file(case.db(), &foo_path), None);
std::fs::write(foo_path.as_std_path(), "print('Hello')")?;
let changes = case.stop_watch();
case.db_mut().apply_changes(changes);
let foo = system_path_to_file(case.db(), &foo_path).expect("foo.py to exist.");
let package = case
.db()
.workspace()
.package(case.db(), &foo_path)
.expect("foo.py to belong to a package.");
assert!(!package.contains_file(case.db(), foo));
Ok(())
}
#[test]
fn changed_file() -> anyhow::Result<()> {
let foo_source = "print('Hello, world!')";
let mut case = setup([("foo.py", foo_source)])?;
let foo_path = case.workspace_path("foo.py");
let foo = system_path_to_file(case.db(), &foo_path).ok_or_else(|| anyhow!("Foo not found"))?;
assert_eq!(source_text(case.db(), foo).as_str(), foo_source);
std::fs::write(foo_path.as_std_path(), "print('Version 2')")?;
let changes = case.stop_watch();
case.db_mut().apply_changes(changes);
assert_eq!(source_text(case.db(), foo).as_str(), "print('Version 2')");
Ok(())
}
#[cfg(unix)]
#[test]
fn changed_metadata() -> anyhow::Result<()> {
use std::os::unix::fs::PermissionsExt;
let mut case = setup([("foo.py", "")])?;
let foo_path = case.workspace_path("foo.py");
let foo = system_path_to_file(case.db(), &foo_path).ok_or_else(|| anyhow!("Foo not found"))?;
assert_eq!(
foo.permissions(case.db()),
Some(
std::fs::metadata(foo_path.as_std_path())
.unwrap()
.permissions()
.mode()
)
);
std::fs::set_permissions(
foo_path.as_std_path(),
std::fs::Permissions::from_mode(0o777),
)
.with_context(|| "Failed to set file permissions.")?;
let changes = case.stop_watch();
case.db_mut().apply_changes(changes);
assert_eq!(
foo.permissions(case.db()),
Some(
std::fs::metadata(foo_path.as_std_path())
.unwrap()
.permissions()
.mode()
)
);
Ok(())
}
#[test]
fn deleted_file() -> anyhow::Result<()> {
let foo_source = "print('Hello, world!')";
let mut case = setup([("foo.py", foo_source)])?;
let foo_path = case.workspace_path("foo.py");
let foo = system_path_to_file(case.db(), &foo_path).ok_or_else(|| anyhow!("Foo not found"))?;
let Some(package) = case.db().workspace().package(case.db(), &foo_path) else {
panic!("Expected foo.py to belong to a package.");
};
assert!(foo.exists(case.db()));
assert!(package.contains_file(case.db(), foo));
std::fs::remove_file(foo_path.as_std_path())?;
let changes = case.stop_watch();
case.db_mut().apply_changes(changes);
assert!(!foo.exists(case.db()));
assert!(!package.contains_file(case.db(), foo));
Ok(())
}
/// Tests the case where a file is moved from inside a watched directory to a directory that is not watched.
///
/// This matches the behavior of deleting a file in VS code.
#[test]
fn move_file_to_trash() -> anyhow::Result<()> {
let foo_source = "print('Hello, world!')";
let mut case = setup([("foo.py", foo_source)])?;
let foo_path = case.workspace_path("foo.py");
let trash_path = case.root_path().join(".trash");
std::fs::create_dir_all(trash_path.as_std_path())?;
let foo = system_path_to_file(case.db(), &foo_path).ok_or_else(|| anyhow!("Foo not found"))?;
let Some(package) = case.db().workspace().package(case.db(), &foo_path) else {
panic!("Expected foo.py to belong to a package.");
};
assert!(foo.exists(case.db()));
assert!(package.contains_file(case.db(), foo));
std::fs::rename(
foo_path.as_std_path(),
trash_path.join("foo.py").as_std_path(),
)?;
let changes = case.stop_watch();
case.db_mut().apply_changes(changes);
assert!(!foo.exists(case.db()));
assert!(!package.contains_file(case.db(), foo));
Ok(())
}
/// Move a file from a non-workspace (non-watched) location into the workspace.
#[test]
fn move_file_to_workspace() -> anyhow::Result<()> {
let mut case = setup([("bar.py", "")])?;
let foo_path = case.root_path().join("foo.py");
std::fs::write(foo_path.as_std_path(), "")?;
let foo_in_workspace_path = case.workspace_path("foo.py");
assert!(system_path_to_file(case.db(), &foo_path).is_some());
assert!(case
.db()
.workspace()
.package(case.db(), &foo_path)
.is_none());
std::fs::rename(foo_path.as_std_path(), foo_in_workspace_path.as_std_path())?;
let changes = case.stop_watch();
case.db_mut().apply_changes(changes);
let foo_in_workspace = system_path_to_file(case.db(), &foo_in_workspace_path)
.ok_or_else(|| anyhow!("Foo not found"))?;
let Some(package) = case
.db()
.workspace()
.package(case.db(), &foo_in_workspace_path)
else {
panic!("Expected foo.py to belong to a package.");
};
assert!(foo_in_workspace.exists(case.db()));
assert!(package.contains_file(case.db(), foo_in_workspace));
Ok(())
}
/// Rename a workspace file.
#[test]
fn rename_file() -> anyhow::Result<()> {
let mut case = setup([("foo.py", "")])?;
let foo_path = case.workspace_path("foo.py");
let bar_path = case.workspace_path("bar.py");
let foo = system_path_to_file(case.db(), &foo_path).ok_or_else(|| anyhow!("Foo not found"))?;
let Some(package) = case.db().workspace().package(case.db(), &foo_path) else {
panic!("Expected foo.py to belong to a package.");
};
std::fs::rename(foo_path.as_std_path(), bar_path.as_std_path())?;
let changes = case.stop_watch();
case.db_mut().apply_changes(changes);
assert!(!foo.exists(case.db()));
assert!(!package.contains_file(case.db(), foo));
let bar = system_path_to_file(case.db(), &bar_path).ok_or_else(|| anyhow!("Bar not found"))?;
let Some(package) = case.db().workspace().package(case.db(), &bar_path) else {
panic!("Expected bar.py to belong to a package.");
};
assert!(bar.exists(case.db()));
assert!(package.contains_file(case.db(), bar));
Ok(())
}
#[test]
fn directory_moved_to_workspace() -> anyhow::Result<()> {
let mut case = setup([("bar.py", "import sub.a")])?;
let sub_original_path = case.root_path().join("sub");
let init_original_path = sub_original_path.join("__init__.py");
let a_original_path = sub_original_path.join("a.py");
std::fs::create_dir(sub_original_path.as_std_path())
.with_context(|| "Failed to create sub directory")?;
std::fs::write(init_original_path.as_std_path(), "")
.with_context(|| "Failed to create __init__.py")?;
std::fs::write(a_original_path.as_std_path(), "").with_context(|| "Failed to create a.py")?;
let sub_a_module = resolve_module(case.db().upcast(), ModuleName::new_static("sub.a").unwrap());
assert_eq!(sub_a_module, None);
let sub_new_path = case.workspace_path("sub");
std::fs::rename(sub_original_path.as_std_path(), sub_new_path.as_std_path())
.with_context(|| "Failed to move sub directory")?;
let changes = case.stop_watch();
case.db_mut().apply_changes(changes);
let init_file = system_path_to_file(case.db(), sub_new_path.join("__init__.py"))
.expect("__init__.py to exist");
let a_file = system_path_to_file(case.db(), sub_new_path.join("a.py")).expect("a.py to exist");
// `import sub.a` should now resolve
assert!(resolve_module(case.db().upcast(), ModuleName::new_static("sub.a").unwrap()).is_some());
let package = case
.db()
.workspace()
.package(case.db(), &sub_new_path)
.expect("sub to belong to a package");
assert!(package.contains_file(case.db(), init_file));
assert!(package.contains_file(case.db(), a_file));
Ok(())
}
#[test]
fn directory_moved_to_trash() -> anyhow::Result<()> {
let mut case = setup([
("bar.py", "import sub.a"),
("sub/__init__.py", ""),
("sub/a.py", ""),
])?;
assert!(resolve_module(case.db().upcast(), ModuleName::new_static("sub.a").unwrap()).is_some(),);
let sub_path = case.workspace_path("sub");
let package = case
.db()
.workspace()
.package(case.db(), &sub_path)
.expect("sub to belong to a package");
let init_file =
system_path_to_file(case.db(), sub_path.join("__init__.py")).expect("__init__.py to exist");
let a_file = system_path_to_file(case.db(), sub_path.join("a.py")).expect("a.py to exist");
assert!(package.contains_file(case.db(), init_file));
assert!(package.contains_file(case.db(), a_file));
std::fs::create_dir(case.root_path().join(".trash").as_std_path())?;
let trashed_sub = case.root_path().join(".trash/sub");
std::fs::rename(sub_path.as_std_path(), trashed_sub.as_std_path())
.with_context(|| "Failed to move the sub directory to the trash")?;
let changes = case.stop_watch();
case.db_mut().apply_changes(changes);
// `import sub.a` should no longer resolve
assert!(resolve_module(case.db().upcast(), ModuleName::new_static("sub.a").unwrap()).is_none());
assert!(!init_file.exists(case.db()));
assert!(!a_file.exists(case.db()));
assert!(!package.contains_file(case.db(), init_file));
assert!(!package.contains_file(case.db(), a_file));
Ok(())
}
#[test]
fn directory_renamed() -> anyhow::Result<()> {
let mut case = setup([
("bar.py", "import sub.a"),
("sub/__init__.py", ""),
("sub/a.py", ""),
])?;
assert!(resolve_module(case.db().upcast(), ModuleName::new_static("sub.a").unwrap()).is_some());
assert!(resolve_module(
case.db().upcast(),
ModuleName::new_static("foo.baz").unwrap()
)
.is_none());
let sub_path = case.workspace_path("sub");
let package = case
.db()
.workspace()
.package(case.db(), &sub_path)
.expect("sub to belong to a package");
let sub_init =
system_path_to_file(case.db(), sub_path.join("__init__.py")).expect("__init__.py to exist");
let sub_a = system_path_to_file(case.db(), sub_path.join("a.py")).expect("a.py to exist");
assert!(package.contains_file(case.db(), sub_init));
assert!(package.contains_file(case.db(), sub_a));
let foo_baz = case.workspace_path("foo/baz");
std::fs::create_dir(case.workspace_path("foo").as_std_path())?;
std::fs::rename(sub_path.as_std_path(), foo_baz.as_std_path())
.with_context(|| "Failed to move the sub directory")?;
let changes = case.stop_watch();
case.db_mut().apply_changes(changes);
// `import sub.a` should no longer resolve
assert!(resolve_module(case.db().upcast(), ModuleName::new_static("sub.a").unwrap()).is_none());
// `import foo.baz` should now resolve
assert!(resolve_module(
case.db().upcast(),
ModuleName::new_static("foo.baz").unwrap()
)
.is_some());
// The old paths are no longer tracked
assert!(!sub_init.exists(case.db()));
assert!(!sub_a.exists(case.db()));
assert!(!package.contains_file(case.db(), sub_init));
assert!(!package.contains_file(case.db(), sub_a));
let foo_baz_init =
system_path_to_file(case.db(), foo_baz.join("__init__.py")).expect("__init__.py to exist");
let foo_baz_a = system_path_to_file(case.db(), foo_baz.join("a.py")).expect("a.py to exist");
// The new paths are synced
assert!(foo_baz_init.exists(case.db()));
assert!(foo_baz_a.exists(case.db()));
assert!(package.contains_file(case.db(), foo_baz_init));
assert!(package.contains_file(case.db(), foo_baz_a));
Ok(())
}
#[test]
fn directory_deleted() -> anyhow::Result<()> {
let mut case = setup([
("bar.py", "import sub.a"),
("sub/__init__.py", ""),
("sub/a.py", ""),
])?;
assert!(resolve_module(case.db().upcast(), ModuleName::new_static("sub.a").unwrap()).is_some(),);
let sub_path = case.workspace_path("sub");
let package = case
.db()
.workspace()
.package(case.db(), &sub_path)
.expect("sub to belong to a package");
let init_file =
system_path_to_file(case.db(), sub_path.join("__init__.py")).expect("__init__.py to exist");
let a_file = system_path_to_file(case.db(), sub_path.join("a.py")).expect("a.py to exist");
assert!(package.contains_file(case.db(), init_file));
assert!(package.contains_file(case.db(), a_file));
std::fs::remove_dir_all(sub_path.as_std_path())
.with_context(|| "Failed to remove the sub directory")?;
let changes = case.stop_watch();
case.db_mut().apply_changes(changes);
// `import sub.a` should no longer resolve
assert!(resolve_module(case.db().upcast(), ModuleName::new_static("sub.a").unwrap()).is_none());
assert!(!init_file.exists(case.db()));
assert!(!a_file.exists(case.db()));
assert!(!package.contains_file(case.db(), init_file));
assert!(!package.contains_file(case.db(), a_file));
Ok(())
}

View file

@ -76,6 +76,9 @@ pub(crate) mod tests {
fn upcast(&self) -> &(dyn ruff_db::Db + 'static) {
self
}
fn upcast_mut(&mut self) -> &mut (dyn ruff_db::Db + 'static) {
self
}
}
impl ruff_db::Db for TestDb {

View file

@ -30,9 +30,8 @@ pub(crate) fn resolve_module_query<'db>(
db: &'db dyn Db,
module_name: internal::ModuleNameIngredient<'db>,
) -> Option<Module> {
let _span = tracing::trace_span!("resolve_module", ?module_name).entered();
let name = module_name.name(db);
let _span = tracing::trace_span!("resolve_module", %name).entered();
let (search_path, module_file, kind) = resolve_name(db, name)?;
@ -1225,7 +1224,7 @@ mod tests {
// Delete `bar.py`
db.memory_file_system().remove_file(&bar_path).unwrap();
bar.touch(&mut db);
bar.sync(&mut db);
// Re-query the foo module. The foo module should still be cached because `bar.py` isn't relevant
// for resolving `foo`.
@ -1277,7 +1276,7 @@ mod tests {
db.memory_file_system().remove_file(&foo_init_path)?;
db.memory_file_system()
.remove_directory(foo_init_path.parent().unwrap())?;
File::touch_path(&mut db, &foo_init_path);
File::sync_path(&mut db, &foo_init_path);
let foo_module = resolve_module(&db, foo_module_name).expect("Foo module to resolve");
assert_eq!(&src.join("foo.py"), foo_module.file().path(&db));
@ -1405,7 +1404,7 @@ mod tests {
db.memory_file_system()
.remove_file(&src_functools_path)
.unwrap();
File::touch_path(&mut db, &src_functools_path);
File::sync_path(&mut db, &src_functools_path);
let functools_module = resolve_module(&db, functools_module_name.clone()).unwrap();
assert_eq!(functools_module.search_path(), &stdlib);
assert_eq!(
@ -1617,7 +1616,7 @@ not_a_directory
// Salsa file forces a new revision.
//
// TODO: get rid of the `.report_untracked_read()` call...
File::touch_path(&mut db, SystemPath::new("/x/src/foo.py"));
File::sync_path(&mut db, SystemPath::new("/x/src/foo.py"));
assert_eq!(resolve_module(&db, foo_module_name.clone()), None);
}
@ -1645,8 +1644,8 @@ not_a_directory
.remove_file(src_path.join("foo.py"))
.unwrap();
db.memory_file_system().remove_directory(&src_path).unwrap();
File::touch_path(&mut db, &src_path.join("foo.py"));
File::touch_path(&mut db, &src_path);
File::sync_path(&mut db, &src_path.join("foo.py"));
File::sync_path(&mut db, &src_path);
assert_eq!(resolve_module(&db, foo_module_name.clone()), None);
}

View file

@ -33,10 +33,7 @@ pub struct Jar(
);
/// Database giving access to semantic information about a Python program.
pub trait Db:
SourceDb + ResolverDb + DbWithJar<Jar> + Upcast<dyn SourceDb> + Upcast<dyn ResolverDb>
{
}
pub trait Db: SourceDb + ResolverDb + DbWithJar<Jar> + Upcast<dyn ResolverDb> {}
#[cfg(test)]
pub(crate) mod tests {
@ -120,12 +117,18 @@ pub(crate) mod tests {
fn upcast(&self) -> &(dyn SourceDb + 'static) {
self
}
fn upcast_mut(&mut self) -> &mut (dyn SourceDb + 'static) {
self
}
}
impl Upcast<dyn ResolverDb> for TestDb {
fn upcast(&self) -> &(dyn ResolverDb + 'static) {
self
}
fn upcast_mut(&mut self) -> &mut (dyn ResolverDb + 'static) {
self
}
}
impl red_knot_module_resolver::Db for TestDb {}

View file

@ -144,7 +144,7 @@ fn benchmark_incremental(criterion: &mut Criterion) {
)
.unwrap();
case.bar.touch(&mut case.db);
case.bar.sync(&mut case.db);
case
},
|case| {

View file

@ -5,8 +5,8 @@ use dashmap::mapref::entry::Entry;
use crate::file_revision::FileRevision;
use crate::files::private::FileStatus;
use crate::system::SystemPath;
use crate::vendored::VendoredPath;
use crate::system::{SystemPath, SystemPathBuf};
use crate::vendored::{VendoredPath, VendoredPathBuf};
use crate::{Db, FxDashMap};
pub use path::FilePath;
use ruff_notebook::{Notebook, NotebookError};
@ -24,10 +24,7 @@ pub fn system_path_to_file(db: &dyn Db, path: impl AsRef<SystemPath>) -> Option<
// exist anymore so that Salsa can track that the caller of this function depends on the existence of
// that file. This function filters out files that don't exist, but Salsa will know that it must
// re-run the calling query whenever the `file`'s status changes (because of the `.status` call here).
match file.status(db) {
FileStatus::Exists => Some(file),
FileStatus::Deleted => None,
}
file.exists(db).then_some(file)
}
/// Interns a vendored file path. Returns `Some` if the vendored file for `path` exists and `None` otherwise.
@ -44,11 +41,14 @@ pub struct Files {
#[derive(Default)]
struct FilesInner {
/// Lookup table that maps [`FilePath`]s to salsa interned [`File`] instances.
/// Lookup table that maps [`SystemPathBuf`]s to salsa interned [`File`] instances.
///
/// The map also stores entries for files that don't exist on the file system. This is necessary
/// so that queries that depend on the existence of a file are re-executed when the file is created.
files_by_path: FxDashMap<FilePath, File>,
system_by_path: FxDashMap<SystemPathBuf, File>,
/// Lookup table that maps vendored files to the salsa [`File`] ingredients.
vendored_by_path: FxDashMap<VendoredPathBuf, File>,
}
impl Files {
@ -61,11 +61,10 @@ impl Files {
#[tracing::instrument(level = "trace", skip(self, db), ret)]
fn system(&self, db: &dyn Db, path: &SystemPath) -> File {
let absolute = SystemPath::absolute(path, db.system().current_directory());
let absolute = FilePath::System(absolute);
*self
.inner
.files_by_path
.system_by_path
.entry(absolute.clone())
.or_insert_with(|| {
let metadata = db.system().path_metadata(path);
@ -73,7 +72,7 @@ impl Files {
match metadata {
Ok(metadata) if metadata.file_type().is_file() => File::new(
db,
absolute,
FilePath::System(absolute),
metadata.permissions(),
metadata.revision(),
FileStatus::Exists,
@ -81,7 +80,7 @@ impl Files {
),
_ => File::new(
db,
absolute,
FilePath::System(absolute),
None,
FileRevision::zero(),
FileStatus::Deleted,
@ -92,11 +91,11 @@ impl Files {
}
/// Tries to look up the file for the given system path, returns `None` if no such file exists yet
fn try_system(&self, db: &dyn Db, path: &SystemPath) -> Option<File> {
pub fn try_system(&self, db: &dyn Db, path: &SystemPath) -> Option<File> {
let absolute = SystemPath::absolute(path, db.system().current_directory());
self.inner
.files_by_path
.get(&FilePath::System(absolute))
.system_by_path
.get(&absolute)
.map(|entry| *entry.value())
}
@ -104,11 +103,7 @@ impl Files {
/// exists and `None` otherwise.
#[tracing::instrument(level = "trace", skip(self, db), ret)]
fn vendored(&self, db: &dyn Db, path: &VendoredPath) -> Option<File> {
let file = match self
.inner
.files_by_path
.entry(FilePath::Vendored(path.to_path_buf()))
{
let file = match self.inner.vendored_by_path.entry(path.to_path_buf()) {
Entry::Occupied(entry) => *entry.get(),
Entry::Vacant(entry) => {
let metadata = db.vendored().metadata(path).ok()?;
@ -131,6 +126,44 @@ impl Files {
Some(file)
}
/// Refreshes the state of all known files under `path` recursively.
///
/// The most common use case is to update the [`Files`] state after removing or moving a directory.
///
/// # Performance
/// Refreshing the state of every file under `path` is expensive. It requires iterating over all known files
/// and making system calls to get the latest status of each file in `path`.
/// That's why [`File::sync_path`] and [`File::sync_path`] is preferred if it is known that the path is a file.
#[tracing::instrument(level = "debug", skip(db))]
pub fn sync_recursively(db: &mut dyn Db, path: &SystemPath) {
let path = SystemPath::absolute(path, db.system().current_directory());
let inner = Arc::clone(&db.files().inner);
for entry in inner.system_by_path.iter_mut() {
if entry.key().starts_with(&path) {
let file = entry.value();
file.sync(db);
}
}
}
/// Refreshes the state of all known files.
///
/// This is a last-resort method that should only be used when more granular updates aren't possible
/// (for example, because the file watcher failed to observe some changes). Use responsibly!
///
/// # Performance
/// Refreshing the state of every file is expensive. It requires iterating over all known files and
/// issuing a system call to get the latest status of each file.
#[tracing::instrument(level = "debug", skip(db))]
pub fn sync_all(db: &mut dyn Db) {
let inner = Arc::clone(&db.files().inner);
for entry in inner.system_by_path.iter_mut() {
let file = entry.value();
file.sync(db);
}
}
/// Creates a salsa like snapshot. The instances share
/// the same path-to-file mapping.
pub fn snapshot(&self) -> Self {
@ -144,7 +177,7 @@ impl std::fmt::Debug for Files {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let mut map = f.debug_map();
for entry in self.inner.files_by_path.iter() {
for entry in self.inner.system_by_path.iter() {
map.entry(entry.key(), entry.value());
}
map.finish()
@ -219,18 +252,20 @@ impl File {
}
/// Refreshes the file metadata by querying the file system if needed.
/// TODO: The API should instead take all observed changes from the file system directly
/// and then apply the VfsFile status accordingly. But for now, this is sufficient.
pub fn touch_path(db: &mut dyn Db, path: &SystemPath) {
Self::touch_impl(db, path, None);
#[tracing::instrument(level = "debug", skip(db))]
pub fn sync_path(db: &mut dyn Db, path: &SystemPath) {
let absolute = SystemPath::absolute(path, db.system().current_directory());
Self::sync_impl(db, &absolute, None);
}
pub fn touch(self, db: &mut dyn Db) {
/// Syncs the [`File`]'s state with the state of the file on the system.
#[tracing::instrument(level = "debug", skip(db))]
pub fn sync(self, db: &mut dyn Db) {
let path = self.path(db).clone();
match path {
FilePath::System(system) => {
Self::touch_impl(db, &system, Some(self));
Self::sync_impl(db, &system, Some(self));
}
FilePath::Vendored(_) => {
// Readonly, can never be out of date.
@ -238,23 +273,31 @@ impl File {
}
}
/// Private method providing the implementation for [`Self::touch_path`] and [`Self::touch`].
fn touch_impl(db: &mut dyn Db, path: &SystemPath, file: Option<File>) {
let metadata = db.system().path_metadata(path);
let (status, revision) = match metadata {
Ok(metadata) if metadata.file_type().is_file() => {
(FileStatus::Exists, metadata.revision())
}
_ => (FileStatus::Deleted, FileRevision::zero()),
};
/// Private method providing the implementation for [`Self::sync_path`] and [`Self::sync_path`].
fn sync_impl(db: &mut dyn Db, path: &SystemPath, file: Option<File>) {
let Some(file) = file.or_else(|| db.files().try_system(db, path)) else {
return;
};
let metadata = db.system().path_metadata(path);
let (status, revision, permission) = match metadata {
Ok(metadata) if metadata.file_type().is_file() => (
FileStatus::Exists,
metadata.revision(),
metadata.permissions(),
),
_ => (FileStatus::Deleted, FileRevision::zero(), None),
};
file.set_status(db).to(status);
file.set_revision(db).to(revision);
file.set_permissions(db).to(permission);
}
/// Returns `true` if the file exists.
pub fn exists(self, db: &dyn Db) -> bool {
self.status(db) == FileStatus::Exists
}
}

View file

@ -34,6 +34,7 @@ pub trait Db: DbWithJar<Jar> {
/// Trait for upcasting a reference to a base trait object.
pub trait Upcast<T: ?Sized> {
fn upcast(&self) -> &T;
fn upcast_mut(&mut self) -> &mut T;
}
#[cfg(test)]

View file

@ -145,7 +145,7 @@ pub trait DbWithTestSystem: Db + Sized {
.write_file(path, content);
if result.is_ok() {
File::touch_path(self, path);
File::sync_path(self, path);
}
result