diff --git a/.editorconfig b/.editorconfig index 843bd1f3ad..23a805b49f 100644 --- a/.editorconfig +++ b/.editorconfig @@ -17,4 +17,7 @@ indent_size = 4 trim_trailing_whitespace = false [*.md] -max_line_length = 100 \ No newline at end of file +max_line_length = 100 + +[*.toml] +indent_size = 4 \ No newline at end of file diff --git a/Cargo.lock b/Cargo.lock index ad7567d88d..adb7cb7c7d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -170,6 +170,12 @@ version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f1fdabc7756949593fe60f30ec81974b613357de856987752631dea1e3394c80" +[[package]] +name = "base64" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e1b586273c5702936fe7b7d6896644d8be71e6314cfe09d3167c95f712589e8" + [[package]] name = "base64" version = "0.22.0" @@ -829,6 +835,12 @@ version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9bda8e21c04aca2ae33ffc2fd8c23134f3cac46db123ba97bd9d3f3b8a4a85e1" +[[package]] +name = "dunce" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92773504d58c093f6de2459af4af33faa518c13451eb8f2b5698ed3d36e7c813" + [[package]] name = "dyn-clone" version = "1.0.17" @@ -1358,6 +1370,7 @@ dependencies = [ "pest", "pest_derive", "regex", + "ron", "serde", "similar", "walkdir", @@ -2269,6 +2282,7 @@ dependencies = [ "ruff_text_size", "rustc-hash 2.0.0", "salsa", + "serde", "smallvec", "static_assertions", "tempfile", @@ -2354,7 +2368,9 @@ dependencies = [ "anyhow", "crossbeam", "glob", + "insta", "notify", + "pep440_rs 0.7.2", "rayon", "red_knot_python_semantic", "red_knot_vendored", @@ -2364,6 +2380,9 @@ dependencies = [ "ruff_text_size", "rustc-hash 2.0.0", "salsa", + "serde", + "thiserror 2.0.3", + "toml", "tracing", ] @@ -2455,6 +2474,17 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "ron" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "88073939a61e5b7680558e6be56b419e208420c2adb92be54921fa6b72283f1a" +dependencies = [ + "base64 0.13.1", + "bitflags 1.3.2", + "serde", +] + [[package]] name = "ruff" version = "0.7.4" @@ -2556,7 +2586,9 @@ dependencies = [ "camino", "countme", "dashmap 6.1.0", + "dunce", "filetime", + "glob", "ignore", "insta", "matchit", @@ -3906,7 +3938,7 @@ version = "2.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b74fc6b57825be3373f7054754755f03ac3a8f5d70015ccad699ba2029956f4a" dependencies = [ - "base64", + "base64 0.22.0", "flate2", "log", "once_cell", diff --git a/Cargo.toml b/Cargo.toml index 3060bec445..1a931ae14f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -66,6 +66,7 @@ criterion = { version = "0.5.1", default-features = false } crossbeam = { version = "0.8.4" } dashmap = { version = "6.0.1" } dir-test = { version = "0.3.0" } +dunce = { version = "1.0.5" } drop_bomb = { version = "0.1.5" } env_logger = { version = "0.11.0" } etcetera = { version = "0.8.0" } @@ -81,7 +82,7 @@ hashbrown = { version = "0.15.0", default-features = false, features = [ ignore = { version = "0.4.22" } imara-diff = { version = "0.1.5" } imperative = { version = "1.0.4" } -indexmap = {version = "2.6.0" } +indexmap = { version = "2.6.0" } indicatif = { version = "0.17.8" } indoc = { version = "2.0.4" } insta = { version = "1.35.1" } diff --git a/_typos.toml b/_typos.toml index 748c29e66c..48e4a4c375 100644 --- a/_typos.toml +++ b/_typos.toml @@ -1,6 +1,11 @@ [files] # https://github.com/crate-ci/typos/issues/868 -extend-exclude = ["crates/red_knot_vendored/vendor/**/*", "**/resources/**/*", "**/snapshots/**/*"] +extend-exclude = [ + "crates/red_knot_vendored/vendor/**/*", + "**/resources/**/*", + "**/snapshots/**/*", + "crates/red_knot_workspace/src/workspace/pyproject/package_name.rs" +] [default.extend-words] "arange" = "arange" # e.g. `numpy.arange` diff --git a/crates/red_knot/Cargo.toml b/crates/red_knot/Cargo.toml index 1b3fdfa346..07715be94e 100644 --- a/crates/red_knot/Cargo.toml +++ b/crates/red_knot/Cargo.toml @@ -34,6 +34,7 @@ tracing-tree = { workspace = true } [dev-dependencies] filetime = { workspace = true } tempfile = { workspace = true } +ruff_db = { workspace = true, features = ["testing"] } [lints] workspace = true diff --git a/crates/red_knot/src/main.rs b/crates/red_knot/src/main.rs index 7e18843bfd..173652746c 100644 --- a/crates/red_knot/src/main.rs +++ b/crates/red_knot/src/main.rs @@ -183,10 +183,10 @@ fn run() -> anyhow::Result { let system = OsSystem::new(cwd.clone()); let cli_configuration = args.to_configuration(&cwd); - let workspace_metadata = WorkspaceMetadata::from_path( + let workspace_metadata = WorkspaceMetadata::discover( system.current_directory(), &system, - Some(cli_configuration.clone()), + Some(&cli_configuration), )?; // TODO: Use the `program_settings` to compute the key for the database's persistent diff --git a/crates/red_knot/tests/file_watching.rs b/crates/red_knot/tests/file_watching.rs index 8067214235..5c2880e0b0 100644 --- a/crates/red_knot/tests/file_watching.rs +++ b/crates/red_knot/tests/file_watching.rs @@ -6,7 +6,7 @@ use std::time::Duration; use anyhow::{anyhow, Context}; use red_knot_python_semantic::{resolve_module, ModuleName, Program, PythonVersion, SitePackages}; -use red_knot_workspace::db::RootDatabase; +use red_knot_workspace::db::{Db, RootDatabase}; use red_knot_workspace::watch; use red_knot_workspace::watch::{directory_watcher, WorkspaceWatcher}; use red_knot_workspace::workspace::settings::{Configuration, SearchPathConfiguration}; @@ -14,6 +14,7 @@ use red_knot_workspace::workspace::WorkspaceMetadata; use ruff_db::files::{system_path_to_file, File, FileError}; use ruff_db::source::source_text; use ruff_db::system::{OsSystem, SystemPath, SystemPathBuf}; +use ruff_db::testing::setup_logging; use ruff_db::Upcast; struct TestCase { @@ -69,7 +70,6 @@ impl TestCase { Some(all_events) } - #[cfg(unix)] fn take_watch_changes(&self) -> Vec { self.try_take_watch_changes(Duration::from_secs(10)) .expect("Expected watch changes but observed none") @@ -110,8 +110,8 @@ impl TestCase { ) -> anyhow::Result<()> { let program = Program::get(self.db()); - self.configuration.search_paths = configuration.clone(); - let new_settings = configuration.into_settings(self.db.workspace().root(&self.db)); + let new_settings = configuration.to_settings(self.db.workspace().root(&self.db)); + self.configuration.search_paths = configuration; program.update_search_paths(&mut self.db, &new_settings)?; @@ -204,7 +204,9 @@ where .as_utf8_path() .canonicalize_utf8() .with_context(|| "Failed to canonicalize root path.")?, - ); + ) + .simplified() + .to_path_buf(); let workspace_path = root_path.join("workspace"); @@ -241,8 +243,7 @@ where search_paths, }; - let workspace = - WorkspaceMetadata::from_path(&workspace_path, &system, Some(configuration.clone()))?; + let workspace = WorkspaceMetadata::discover(&workspace_path, &system, Some(&configuration))?; let db = RootDatabase::new(workspace, system)?; @@ -1311,3 +1312,138 @@ mod unix { Ok(()) } } + +#[test] +fn nested_packages_delete_root() -> anyhow::Result<()> { + let mut case = setup(|root: &SystemPath, workspace_root: &SystemPath| { + std::fs::write( + workspace_root.join("pyproject.toml").as_std_path(), + r#" + [project] + name = "inner" + "#, + )?; + + std::fs::write( + root.join("pyproject.toml").as_std_path(), + r#" + [project] + name = "outer" + "#, + )?; + + Ok(()) + })?; + + assert_eq!( + case.db().workspace().root(case.db()), + &*case.workspace_path("") + ); + + std::fs::remove_file(case.workspace_path("pyproject.toml").as_std_path())?; + + let changes = case.stop_watch(); + + case.apply_changes(changes); + + // It should now pick up the outer workspace. + assert_eq!(case.db().workspace().root(case.db()), case.root_path()); + + Ok(()) +} + +#[test] +fn added_package() -> anyhow::Result<()> { + let _ = setup_logging(); + let mut case = setup([ + ( + "pyproject.toml", + r#" + [project] + name = "inner" + + [tool.knot.workspace] + members = ["packages/*"] + "#, + ), + ( + "packages/a/pyproject.toml", + r#" + [project] + name = "a" + "#, + ), + ])?; + + assert_eq!(case.db().workspace().packages(case.db()).len(), 2); + + std::fs::create_dir(case.workspace_path("packages/b").as_std_path()) + .context("failed to create folder for package 'b'")?; + + // It seems that the file watcher won't pick up on file changes shortly after the folder + // was created... I suspect this is because most file watchers don't support recursive + // file watching. Instead, file-watching libraries manually implement recursive file watching + // by setting a watcher for each directory. But doing this obviously "lags" behind. + case.take_watch_changes(); + + std::fs::write( + case.workspace_path("packages/b/pyproject.toml") + .as_std_path(), + r#" + [project] + name = "b" + "#, + ) + .context("failed to write pyproject.toml for package b")?; + + let changes = case.stop_watch(); + + case.apply_changes(changes); + + assert_eq!(case.db().workspace().packages(case.db()).len(), 3); + + Ok(()) +} + +#[test] +fn removed_package() -> anyhow::Result<()> { + let mut case = setup([ + ( + "pyproject.toml", + r#" + [project] + name = "inner" + + [tool.knot.workspace] + members = ["packages/*"] + "#, + ), + ( + "packages/a/pyproject.toml", + r#" + [project] + name = "a" + "#, + ), + ( + "packages/b/pyproject.toml", + r#" + [project] + name = "b" + "#, + ), + ])?; + + assert_eq!(case.db().workspace().packages(case.db()).len(), 3); + + std::fs::remove_dir_all(case.workspace_path("packages/b").as_std_path()) + .context("failed to remove package 'b'")?; + + let changes = case.stop_watch(); + + case.apply_changes(changes); + + assert_eq!(case.db().workspace().packages(case.db()).len(), 2); + + Ok(()) +} diff --git a/crates/red_knot_python_semantic/Cargo.toml b/crates/red_knot_python_semantic/Cargo.toml index a16cd4dd55..af735f99e1 100644 --- a/crates/red_knot_python_semantic/Cargo.toml +++ b/crates/red_knot_python_semantic/Cargo.toml @@ -33,6 +33,7 @@ thiserror = { workspace = true } tracing = { workspace = true } rustc-hash = { workspace = true } hashbrown = { workspace = true } +serde = { workspace = true, optional = true } smallvec = { workspace = true } static_assertions = { workspace = true } test-case = { workspace = true } diff --git a/crates/red_knot_python_semantic/src/program.rs b/crates/red_knot_python_semantic/src/program.rs index 7671dabb95..39f369b46c 100644 --- a/crates/red_knot_python_semantic/src/program.rs +++ b/crates/red_knot_python_semantic/src/program.rs @@ -54,6 +54,7 @@ impl Program { } #[derive(Clone, Debug, Eq, PartialEq)] +#[cfg_attr(feature = "serde", derive(serde::Serialize))] pub struct ProgramSettings { pub target_version: PythonVersion, pub search_paths: SearchPathSettings, @@ -61,6 +62,7 @@ pub struct ProgramSettings { /// Configures the search paths for module resolution. #[derive(Eq, PartialEq, Debug, Clone)] +#[cfg_attr(feature = "serde", derive(serde::Serialize))] pub struct SearchPathSettings { /// List of user-provided paths that should take first priority in the module resolution. /// Examples in other type checkers are mypy's MYPYPATH environment variable, @@ -91,6 +93,7 @@ impl SearchPathSettings { } #[derive(Debug, Clone, Eq, PartialEq)] +#[cfg_attr(feature = "serde", derive(serde::Serialize))] pub enum SitePackages { Derived { venv_path: SystemPathBuf, diff --git a/crates/red_knot_python_semantic/src/python_version.rs b/crates/red_knot_python_semantic/src/python_version.rs index 58d15d76f9..b8fe608f98 100644 --- a/crates/red_knot_python_semantic/src/python_version.rs +++ b/crates/red_knot_python_semantic/src/python_version.rs @@ -5,6 +5,7 @@ use std::fmt; /// Unlike the `TargetVersion` enums in the CLI crates, /// this does not necessarily represent a Python version that we actually support. #[derive(Debug, Clone, Copy, Eq, PartialEq, Ord, PartialOrd, Hash)] +#[cfg_attr(feature = "serde", derive(serde::Serialize))] pub struct PythonVersion { pub major: u8, pub minor: u8, diff --git a/crates/red_knot_python_semantic/src/site_packages.rs b/crates/red_knot_python_semantic/src/site_packages.rs index b6b65890d1..890162dc90 100644 --- a/crates/red_knot_python_semantic/src/site_packages.rs +++ b/crates/red_knot_python_semantic/src/site_packages.rs @@ -732,7 +732,20 @@ mod tests { let system = TestSystem::default(); assert!(matches!( VirtualEnvironment::new("/.venv", &system), - Err(SitePackagesDiscoveryError::VenvDirIsNotADirectory(_)) + Err(SitePackagesDiscoveryError::VenvDirCanonicalizationError(..)) + )); + } + + #[test] + fn reject_venv_that_is_not_a_directory() { + let system = TestSystem::default(); + system + .memory_file_system() + .write_file("/.venv", "") + .unwrap(); + assert!(matches!( + VirtualEnvironment::new("/.venv", &system), + Err(SitePackagesDiscoveryError::VenvDirIsNotADirectory(..)) )); } diff --git a/crates/red_knot_server/src/session.rs b/crates/red_knot_server/src/session.rs index de01e227d1..ca4e129959 100644 --- a/crates/red_knot_server/src/session.rs +++ b/crates/red_knot_server/src/session.rs @@ -68,7 +68,7 @@ impl Session { let system = LSPSystem::new(index.clone()); // TODO(dhruvmanila): Get the values from the client settings - let metadata = WorkspaceMetadata::from_path(system_path, &system, None)?; + let metadata = WorkspaceMetadata::discover(system_path, &system, None)?; // TODO(micha): Handle the case where the program settings are incorrect more gracefully. workspaces.insert(path, RootDatabase::new(metadata, system)?); } diff --git a/crates/red_knot_server/src/system.rs b/crates/red_knot_server/src/system.rs index 4d8c18785b..e2d4438567 100644 --- a/crates/red_knot_server/src/system.rs +++ b/crates/red_knot_server/src/system.rs @@ -7,8 +7,8 @@ use lsp_types::Url; use ruff_db::file_revision::FileRevision; use ruff_db::system::walk_directory::WalkDirectoryBuilder; use ruff_db::system::{ - DirectoryEntry, FileType, Metadata, OsSystem, Result, System, SystemPath, SystemPathBuf, - SystemVirtualPath, SystemVirtualPathBuf, + DirectoryEntry, FileType, GlobError, Metadata, OsSystem, PatternError, Result, System, + SystemPath, SystemPathBuf, SystemVirtualPath, SystemVirtualPathBuf, }; use ruff_notebook::{Notebook, NotebookError}; @@ -198,6 +198,16 @@ impl System for LSPSystem { self.os_system.walk_directory(path) } + fn glob( + &self, + pattern: &str, + ) -> std::result::Result< + Box>>, + PatternError, + > { + self.os_system.glob(pattern) + } + fn as_any(&self) -> &dyn Any { self } diff --git a/crates/red_knot_wasm/src/lib.rs b/crates/red_knot_wasm/src/lib.rs index 8c144052c4..63b67240ad 100644 --- a/crates/red_knot_wasm/src/lib.rs +++ b/crates/red_knot_wasm/src/lib.rs @@ -3,15 +3,15 @@ use std::any::Any; use js_sys::Error; use wasm_bindgen::prelude::*; -use red_knot_workspace::db::RootDatabase; +use red_knot_workspace::db::{Db, RootDatabase}; use red_knot_workspace::workspace::settings::Configuration; use red_knot_workspace::workspace::WorkspaceMetadata; use ruff_db::diagnostic::Diagnostic; use ruff_db::files::{system_path_to_file, File}; use ruff_db::system::walk_directory::WalkDirectoryBuilder; use ruff_db::system::{ - DirectoryEntry, MemoryFileSystem, Metadata, System, SystemPath, SystemPathBuf, - SystemVirtualPath, + DirectoryEntry, GlobError, MemoryFileSystem, Metadata, PatternError, System, SystemPath, + SystemPathBuf, SystemVirtualPath, }; use ruff_notebook::Notebook; @@ -42,10 +42,10 @@ impl Workspace { #[wasm_bindgen(constructor)] pub fn new(root: &str, settings: &Settings) -> Result { let system = WasmSystem::new(SystemPath::new(root)); - let workspace = WorkspaceMetadata::from_path( + let workspace = WorkspaceMetadata::discover( SystemPath::new(root), &system, - Some(Configuration { + Some(&Configuration { target_version: Some(settings.target_version.into()), ..Configuration::default() }), @@ -226,7 +226,7 @@ impl System for WasmSystem { } fn canonicalize_path(&self, path: &SystemPath) -> ruff_db::system::Result { - Ok(self.fs.canonicalize(path)) + self.fs.canonicalize(path) } fn read_to_string(&self, path: &SystemPath) -> ruff_db::system::Result { @@ -272,6 +272,13 @@ impl System for WasmSystem { self.fs.walk_directory(path) } + fn glob( + &self, + pattern: &str, + ) -> Result>>, PatternError> { + Ok(Box::new(self.fs.glob(pattern)?)) + } + fn as_any(&self) -> &dyn Any { self } diff --git a/crates/red_knot_workspace/Cargo.toml b/crates/red_knot_workspace/Cargo.toml index 944aab459a..c91906d982 100644 --- a/crates/red_knot_workspace/Cargo.toml +++ b/crates/red_knot_workspace/Cargo.toml @@ -15,22 +15,29 @@ license.workspace = true red_knot_python_semantic = { workspace = true } ruff_cache = { workspace = true } -ruff_db = { workspace = true, features = ["os", "cache"] } -ruff_python_ast = { workspace = true } +ruff_db = { workspace = true, features = ["os", "cache", "serde"] } +ruff_python_ast = { workspace = true, features = ["serde"] } ruff_text_size = { workspace = true } red_knot_vendored = { workspace = true } anyhow = { workspace = true } crossbeam = { workspace = true } +glob = { workspace = true } notify = { workspace = true } +pep440_rs = { workspace = true } rayon = { workspace = true } rustc-hash = { workspace = true } salsa = { workspace = true } +serde = { workspace = true } +thiserror = { workspace = true } +toml = { workspace = true } tracing = { workspace = true } [dev-dependencies] +red_knot_python_semantic = { workspace = true, features = ["serde"] } ruff_db = { workspace = true, features = ["testing"] } glob = { workspace = true } +insta = { workspace = true, features = ["redactions", "ron"] } [features] default = ["zstd"] diff --git a/crates/red_knot_workspace/src/db.rs b/crates/red_knot_workspace/src/db.rs index 1b81ff835a..780eb9be81 100644 --- a/crates/red_knot_workspace/src/db.rs +++ b/crates/red_knot_workspace/src/db.rs @@ -15,7 +15,9 @@ use ruff_db::{Db as SourceDb, Upcast}; mod changes; #[salsa::db] -pub trait Db: SemanticDb + Upcast {} +pub trait Db: SemanticDb + Upcast { + fn workspace(&self) -> Workspace; +} #[salsa::db] pub struct RootDatabase { @@ -45,11 +47,6 @@ impl RootDatabase { Ok(db) } - pub fn workspace(&self) -> Workspace { - // SAFETY: The workspace is always initialized in `new`. - self.workspace.unwrap() - } - /// Checks all open files in the workspace and its dependencies. pub fn check(&self) -> Result>, Cancelled> { self.with_db(|db| db.workspace().check(db)) @@ -153,7 +150,11 @@ impl salsa::Database for RootDatabase { } #[salsa::db] -impl Db for RootDatabase {} +impl Db for RootDatabase { + fn workspace(&self) -> Workspace { + self.workspace.unwrap() + } +} #[cfg(test)] pub(crate) mod tests { @@ -168,6 +169,7 @@ pub(crate) mod tests { use ruff_db::{Db as SourceDb, Upcast}; use crate::db::Db; + use crate::workspace::{Workspace, WorkspaceMetadata}; #[salsa::db] pub(crate) struct TestDb { @@ -176,17 +178,23 @@ pub(crate) mod tests { files: Files, system: TestSystem, vendored: VendoredFileSystem, + workspace: Option, } impl TestDb { - pub(crate) fn new() -> Self { - Self { + pub(crate) fn new(workspace: WorkspaceMetadata) -> Self { + let mut db = Self { storage: salsa::Storage::default(), system: TestSystem::default(), vendored: red_knot_vendored::file_system().clone(), files: Files::default(), events: Arc::default(), - } + workspace: None, + }; + + let workspace = Workspace::from_metadata(&db, workspace); + db.workspace = Some(workspace); + db } } @@ -254,7 +262,11 @@ pub(crate) mod tests { } #[salsa::db] - impl Db for TestDb {} + impl Db for TestDb { + fn workspace(&self) -> Workspace { + self.workspace.unwrap() + } + } #[salsa::db] impl salsa::Database for TestDb { diff --git a/crates/red_knot_workspace/src/db/changes.rs b/crates/red_knot_workspace/src/db/changes.rs index 30a37dc17b..a5ee5b3936 100644 --- a/crates/red_knot_workspace/src/db/changes.rs +++ b/crates/red_knot_workspace/src/db/changes.rs @@ -1,16 +1,15 @@ +use crate::db::{Db, RootDatabase}; +use crate::watch; +use crate::watch::{ChangeEvent, CreatedKind, DeletedKind}; +use crate::workspace::settings::Configuration; +use crate::workspace::{Workspace, WorkspaceMetadata}; use red_knot_python_semantic::Program; use ruff_db::files::{system_path_to_file, File, Files}; use ruff_db::system::walk_directory::WalkState; use ruff_db::system::SystemPath; -use ruff_db::Db; +use ruff_db::Db as _; use rustc_hash::FxHashSet; -use crate::db::RootDatabase; -use crate::watch; -use crate::watch::{CreatedKind, DeletedKind}; -use crate::workspace::settings::Configuration; -use crate::workspace::WorkspaceMetadata; - impl RootDatabase { #[tracing::instrument(level = "debug", skip(self, changes, base_configuration))] pub fn apply_changes( @@ -18,7 +17,7 @@ impl RootDatabase { changes: Vec, base_configuration: Option<&Configuration>, ) { - let workspace = self.workspace(); + let mut workspace = self.workspace(); let workspace_path = workspace.root(self).to_path_buf(); let program = Program::get(self); let custom_stdlib_versions_path = program @@ -58,6 +57,12 @@ impl RootDatabase { // Changes to ignore files or settings can change the workspace structure or add/remove files // from packages. if let Some(package) = workspace.package(self, path) { + if package.root(self) == workspace.root(self) + || matches!(change, ChangeEvent::Deleted { .. }) + { + workspace_change = true; + } + changed_packages.insert(package); } else { workspace_change = true; @@ -151,18 +156,22 @@ impl RootDatabase { } if workspace_change { - match WorkspaceMetadata::from_path( - &workspace_path, - self.system(), - base_configuration.cloned(), - ) { + match WorkspaceMetadata::discover(&workspace_path, self.system(), base_configuration) { Ok(metadata) => { - tracing::debug!("Reloading workspace after structural change"); - // TODO: Handle changes in the program settings. - workspace.reload(self, metadata); + if metadata.root() == workspace.root(self) { + tracing::debug!("Reloading workspace after structural change"); + // TODO: Handle changes in the program settings. + workspace.reload(self, metadata); + } else { + tracing::debug!("Replace workspace after structural change"); + workspace = Workspace::from_metadata(self, metadata); + self.workspace = Some(workspace); + } } Err(error) => { - tracing::error!("Failed to load workspace, keep old workspace: {error}"); + tracing::error!( + "Failed to load workspace, keeping old workspace configuration: {error}" + ); } } @@ -227,6 +236,3 @@ impl RootDatabase { } } } - -#[cfg(test)] -mod tests {} diff --git a/crates/red_knot_workspace/src/watch/workspace_watcher.rs b/crates/red_knot_workspace/src/watch/workspace_watcher.rs index 1e653d131a..a76f09e585 100644 --- a/crates/red_knot_workspace/src/watch/workspace_watcher.rs +++ b/crates/red_knot_workspace/src/watch/workspace_watcher.rs @@ -6,9 +6,9 @@ use tracing::info; use red_knot_python_semantic::system_module_search_paths; use ruff_cache::{CacheKey, CacheKeyHasher}; use ruff_db::system::{SystemPath, SystemPathBuf}; -use ruff_db::Upcast; +use ruff_db::{Db as _, Upcast}; -use crate::db::RootDatabase; +use crate::db::{Db, RootDatabase}; use crate::watch::Watcher; /// Wrapper around a [`Watcher`] that watches the relevant paths of a workspace. @@ -68,10 +68,9 @@ impl WorkspaceWatcher { self.has_errored_paths = false; - let workspace_path = workspace_path - .as_utf8_path() - .canonicalize_utf8() - .map(SystemPathBuf::from_utf8_path_buf) + let workspace_path = db + .system() + .canonicalize_path(&workspace_path) .unwrap_or(workspace_path); // Find the non-overlapping module search paths and filter out paths that are already covered by the workspace. diff --git a/crates/red_knot_workspace/src/workspace.rs b/crates/red_knot_workspace/src/workspace.rs index 9d2978d547..98ba10fe6a 100644 --- a/crates/red_knot_workspace/src/workspace.rs +++ b/crates/red_knot_workspace/src/workspace.rs @@ -1,26 +1,28 @@ -use rustc_hash::{FxBuildHasher, FxHashSet}; -use salsa::{Durability, Setter as _}; -use std::borrow::Cow; -use std::{collections::BTreeMap, sync::Arc}; - use crate::db::Db; use crate::db::RootDatabase; use crate::workspace::files::{Index, Indexed, IndexedIter, PackageFiles}; -pub use metadata::{PackageMetadata, WorkspaceMetadata}; +pub use metadata::{PackageMetadata, WorkspaceDiscoveryError, WorkspaceMetadata}; use red_knot_python_semantic::types::check_types; use red_knot_python_semantic::SearchPathSettings; use ruff_db::diagnostic::{Diagnostic, ParseDiagnostic, Severity}; use ruff_db::parsed::parsed_module; use ruff_db::source::{source_text, SourceTextError}; +use ruff_db::system::FileType; use ruff_db::{ files::{system_path_to_file, File}, system::{walk_directory::WalkState, SystemPath, SystemPathBuf}, }; use ruff_python_ast::{name::Name, PySourceType}; use ruff_text_size::TextRange; +use rustc_hash::{FxBuildHasher, FxHashSet}; +use salsa::{Durability, Setter as _}; +use std::borrow::Cow; +use std::iter::FusedIterator; +use std::{collections::BTreeMap, sync::Arc}; mod files; mod metadata; +mod pyproject; pub mod settings; /// The project workspace as a Salsa ingredient. @@ -81,7 +83,7 @@ pub struct Workspace { /// The (first-party) packages in this workspace. #[return_ref] - package_tree: BTreeMap, + package_tree: PackageTree, /// The unresolved search path configuration. #[return_ref] @@ -106,7 +108,6 @@ pub struct Package { } impl Workspace { - /// Discovers the closest workspace at `path` and returns its metadata. pub fn from_metadata(db: &dyn Db, metadata: WorkspaceMetadata) -> Self { let mut packages = BTreeMap::new(); @@ -114,10 +115,12 @@ impl Workspace { packages.insert(package.root.clone(), Package::from_metadata(db, package)); } + let program_settings = metadata.settings.program; + Workspace::builder( metadata.root, - packages, - metadata.settings.program.search_paths, + PackageTree(packages), + program_settings.search_paths, ) .durability(Durability::MEDIUM) .open_fileset_durability(Durability::LOW) @@ -128,15 +131,11 @@ impl Workspace { self.root_buf(db) } - pub fn packages(self, db: &dyn Db) -> impl Iterator + '_ { - self.package_tree(db).values().copied() - } - pub fn reload(self, db: &mut dyn Db, metadata: WorkspaceMetadata) { tracing::debug!("Reloading workspace"); assert_eq!(self.root(db), metadata.root()); - let mut old_packages = self.package_tree(db).clone(); + let mut old_packages = self.package_tree(db).0.clone(); let mut new_packages = BTreeMap::new(); for package_metadata in metadata.packages { @@ -157,13 +156,13 @@ impl Workspace { .to(metadata.settings.program.search_paths); } - self.set_package_tree(db).to(new_packages); + self.set_package_tree(db).to(PackageTree(new_packages)); } pub fn update_package(self, db: &mut dyn Db, metadata: PackageMetadata) -> anyhow::Result<()> { let path = metadata.root().to_path_buf(); - if let Some(package) = self.package_tree(db).get(&path).copied() { + if let Some(package) = self.package_tree(db).get(&path) { package.update(db, metadata); Ok(()) } else { @@ -171,20 +170,17 @@ impl Workspace { } } + pub fn packages(self, db: &dyn Db) -> &PackageTree { + self.package_tree(db) + } + /// Returns the closest package to which the first-party `path` belongs. /// /// Returns `None` if the `path` is outside of any package or if `file` isn't a first-party file /// (e.g. third-party dependencies or `excluded`). - pub fn package(self, db: &dyn Db, path: &SystemPath) -> Option { + pub fn package(self, db: &dyn Db, path: impl AsRef) -> Option { let packages = self.package_tree(db); - - let (package_path, package) = packages.range(..=path.to_path_buf()).next_back()?; - - if path.starts_with(package_path) { - Some(*package) - } else { - None - } + packages.get(path.as_ref()) } /// Checks all open files in the workspace and its dependencies. @@ -342,7 +338,7 @@ impl Package { let _entered = tracing::debug_span!("index_package_files", package = %self.name(db)).entered(); - let files = discover_package_files(db, self.root(db)); + let files = discover_package_files(db, self); tracing::info!("Found {} files in package `{}`", files.len(), self.name(db)); vacant.set(files) } @@ -407,23 +403,33 @@ pub(super) fn check_file(db: &dyn Db, file: File) -> Vec> { diagnostics } -fn discover_package_files(db: &dyn Db, path: &SystemPath) -> FxHashSet { +fn discover_package_files(db: &dyn Db, package: Package) -> FxHashSet { let paths = std::sync::Mutex::new(Vec::new()); + let packages = db.workspace().packages(db); - db.system().walk_directory(path).run(|| { + db.system().walk_directory(package.root(db)).run(|| { Box::new(|entry| { match entry { Ok(entry) => { // Skip over any non python files to avoid creating too many entries in `Files`. - if entry.file_type().is_file() - && entry - .path() - .extension() - .and_then(PySourceType::try_from_extension) - .is_some() - { - let mut paths = paths.lock().unwrap(); - paths.push(entry.into_path()); + match entry.file_type() { + FileType::File => { + if entry + .path() + .extension() + .and_then(PySourceType::try_from_extension) + .is_some() + { + let mut paths = paths.lock().unwrap(); + paths.push(entry.into_path()); + } + } + FileType::Directory | FileType::Symlink => { + // Don't traverse into nested packages (the workspace-package is an ancestor of all other packages) + if packages.get(entry.path()) != Some(package) { + return WalkState::Skip; + } + } } } Err(error) => { @@ -464,6 +470,7 @@ impl<'a> WorkspaceFiles<'a> { WorkspaceFiles::PackageFiles( workspace .packages(db) + .iter() .map(|package| package.files(db)) .collect(), ) @@ -545,20 +552,78 @@ impl Diagnostic for IOErrorDiagnostic { } } +#[derive(Debug, Eq, PartialEq, Clone)] +pub struct PackageTree(BTreeMap); + +impl PackageTree { + pub fn get(&self, path: &SystemPath) -> Option { + let (package_path, package) = self.0.range(..=path.to_path_buf()).next_back()?; + + if path.starts_with(package_path) { + Some(*package) + } else { + None + } + } + + // The package table should never be empty, that's why `is_empty` makes little sense + #[allow(clippy::len_without_is_empty)] + pub fn len(&self) -> usize { + self.0.len() + } + + pub fn iter(&self) -> PackageTreeIter { + PackageTreeIter(self.0.values()) + } +} + +impl<'a> IntoIterator for &'a PackageTree { + type Item = Package; + type IntoIter = PackageTreeIter<'a>; + + fn into_iter(self) -> Self::IntoIter { + self.iter() + } +} + +pub struct PackageTreeIter<'a>(std::collections::btree_map::Values<'a, SystemPathBuf, Package>); + +impl Iterator for PackageTreeIter<'_> { + type Item = Package; + + fn next(&mut self) -> Option { + self.0.next().copied() + } + + fn size_hint(&self) -> (usize, Option) { + self.0.size_hint() + } + + fn last(mut self) -> Option { + self.0.next_back().copied() + } +} + +impl ExactSizeIterator for PackageTreeIter<'_> {} +impl FusedIterator for PackageTreeIter<'_> {} + #[cfg(test)] mod tests { use crate::db::tests::TestDb; - use crate::workspace::check_file; + use crate::workspace::{check_file, WorkspaceMetadata}; use red_knot_python_semantic::types::check_types; use ruff_db::diagnostic::Diagnostic; use ruff_db::files::system_path_to_file; use ruff_db::source::source_text; - use ruff_db::system::{DbWithTestSystem, SystemPath}; + use ruff_db::system::{DbWithTestSystem, SystemPath, SystemPathBuf}; use ruff_db::testing::assert_function_query_was_not_run; + use ruff_python_ast::name::Name; #[test] fn check_file_skips_type_checking_when_file_cant_be_read() -> ruff_db::system::Result<()> { - let mut db = TestDb::new(); + let workspace = + WorkspaceMetadata::single_package(Name::new_static("test"), SystemPathBuf::from("/")); + let mut db = TestDb::new(workspace); let path = SystemPath::new("test.py"); db.write_file(path, "x = 10")?; diff --git a/crates/red_knot_workspace/src/workspace/files.rs b/crates/red_knot_workspace/src/workspace/files.rs index 634cf9ba8a..e434fb7f72 100644 --- a/crates/red_knot_workspace/src/workspace/files.rs +++ b/crates/red_knot_workspace/src/workspace/files.rs @@ -232,21 +232,28 @@ impl Drop for IndexedMut<'_> { mod tests { use rustc_hash::FxHashSet; + use crate::db::tests::TestDb; + use crate::db::Db; + use crate::workspace::files::Index; + use crate::workspace::WorkspaceMetadata; use ruff_db::files::system_path_to_file; use ruff_db::system::{DbWithTestSystem, SystemPathBuf}; use ruff_python_ast::name::Name; - use crate::db::tests::TestDb; - use crate::workspace::files::Index; - use crate::workspace::Package; - #[test] fn re_entrance() -> anyhow::Result<()> { - let mut db = TestDb::new(); + let metadata = WorkspaceMetadata::single_package( + Name::new_static("test"), + SystemPathBuf::from("/test"), + ); + let mut db = TestDb::new(metadata); db.write_file("test.py", "")?; - let package = Package::new(&db, Name::new("test"), SystemPathBuf::from("/test")); + let package = db + .workspace() + .package(&db, "/test") + .expect("test package to exist"); let file = system_path_to_file(&db, "test.py").unwrap(); diff --git a/crates/red_knot_workspace/src/workspace/metadata.rs b/crates/red_knot_workspace/src/workspace/metadata.rs index a56f93e19b..7321c62dee 100644 --- a/crates/red_knot_workspace/src/workspace/metadata.rs +++ b/crates/red_knot_workspace/src/workspace/metadata.rs @@ -1,67 +1,191 @@ -use crate::workspace::settings::{Configuration, WorkspaceSettings}; -use ruff_db::system::{System, SystemPath, SystemPathBuf}; +use ruff_db::system::{GlobError, System, SystemPath, SystemPathBuf}; use ruff_python_ast::name::Name; +use rustc_hash::{FxBuildHasher, FxHashMap, FxHashSet}; +use thiserror::Error; -#[derive(Debug)] +use crate::workspace::pyproject::{PyProject, PyProjectError, Workspace}; +use crate::workspace::settings::{Configuration, WorkspaceSettings}; + +#[derive(Debug, PartialEq, Eq)] +#[cfg_attr(test, derive(serde::Serialize))] pub struct WorkspaceMetadata { pub(super) root: SystemPathBuf, /// The (first-party) packages in this workspace. pub(super) packages: Vec, + /// The resolved settings for this workspace. pub(super) settings: WorkspaceSettings, } /// A first-party package in a workspace. -#[derive(Debug)] +#[derive(Debug, Clone, PartialEq, Eq)] +#[cfg_attr(test, derive(serde::Serialize))] pub struct PackageMetadata { pub(super) name: Name, /// The path to the root directory of the package. pub(super) root: SystemPathBuf, - // TODO: Add the loaded package configuration (not the nested ruff settings) + + pub(super) configuration: Configuration, } impl WorkspaceMetadata { + /// Creates a workspace that consists of a single package located at `root`. + pub fn single_package(name: Name, root: SystemPathBuf) -> Self { + let package = PackageMetadata { + name, + root: root.clone(), + configuration: Configuration::default(), + }; + + let packages = vec![package]; + let settings = packages[0] + .configuration + .to_workspace_settings(&root, &packages); + + Self { + root, + packages, + settings, + } + } + /// Discovers the closest workspace at `path` and returns its metadata. - pub fn from_path( + /// + /// 1. Traverse upwards in the `path`'s ancestor chain and find the first `pyproject.toml`. + /// 1. If the `pyproject.toml` contains no `knot.workspace` table, then keep traversing the `path`'s ancestor + /// chain until we find one or reach the root. + /// 1. If we've found a workspace, then resolve the workspace's members and assert that the closest + /// package (the first found package without a `knot.workspace` table is a member. If not, create + /// a single package workspace for the closest package. + /// 1. If there's no `pyrpoject.toml` with a `knot.workspace` table, then create a single-package workspace. + /// 1. If no ancestor directory contains any `pyproject.toml`, create an ad-hoc workspace for `path` + /// that consists of a single package and uses the default settings. + pub fn discover( path: &SystemPath, system: &dyn System, - base_configuration: Option, - ) -> anyhow::Result { - assert!( - system.is_directory(path), - "Workspace root path must be a directory" - ); - tracing::debug!("Searching for workspace in '{path}'"); + base_configuration: Option<&Configuration>, + ) -> Result { + tracing::debug!("Searching for a workspace in '{path}'"); - let root = path.to_path_buf(); - - // TODO: Discover package name from `pyproject.toml`. - let package_name: Name = path.file_name().unwrap_or("").into(); - - let package = PackageMetadata { - name: package_name, - root: root.clone(), - }; - - // TODO: Load the configuration from disk. - let mut configuration = Configuration::default(); - - if let Some(base_configuration) = base_configuration { - configuration.extend(base_configuration); + if !system.is_directory(path) { + return Err(WorkspaceDiscoveryError::NotADirectory(path.to_path_buf())); } - // TODO: Respect the package configurations when resolving settings (e.g. for the target version). - let settings = configuration.into_workspace_settings(&root); + let mut closest_package: Option = None; - let workspace = WorkspaceMetadata { - root, - packages: vec![package], - settings, + for ancestor in path.ancestors() { + let pyproject_path = ancestor.join("pyproject.toml"); + if let Ok(pyproject_str) = system.read_to_string(&pyproject_path) { + let pyproject = PyProject::from_str(&pyproject_str).map_err(|error| { + WorkspaceDiscoveryError::InvalidPyProject { + path: pyproject_path, + source: Box::new(error), + } + })?; + + let workspace_table = pyproject.workspace().cloned(); + let package = PackageMetadata::from_pyproject( + pyproject, + ancestor.to_path_buf(), + base_configuration, + ); + + if let Some(workspace_table) = workspace_table { + let workspace_root = ancestor; + tracing::debug!("Found workspace at '{}'", workspace_root); + + match collect_packages( + package, + &workspace_table, + closest_package, + base_configuration, + system, + )? { + CollectedPackagesOrStandalone::Packages(mut packages) => { + let mut by_name = + FxHashMap::with_capacity_and_hasher(packages.len(), FxBuildHasher); + + let mut workspace_package = None; + + for package in &packages { + if let Some(conflicting) = by_name.insert(package.name(), package) { + return Err(WorkspaceDiscoveryError::DuplicatePackageNames { + name: package.name().clone(), + first: conflicting.root().to_path_buf(), + second: package.root().to_path_buf(), + }); + } + + if package.root() == workspace_root { + workspace_package = Some(package); + } else if !package.root().starts_with(workspace_root) { + return Err(WorkspaceDiscoveryError::PackageOutsideWorkspace { + package_name: package.name().clone(), + package_root: package.root().to_path_buf(), + workspace_root: workspace_root.to_path_buf(), + }); + } + } + + let workspace_package = workspace_package + .expect("workspace package to be part of the workspace's packages"); + + let settings = workspace_package + .configuration + .to_workspace_settings(workspace_root, &packages); + + packages.sort_unstable_by(|a, b| a.root().cmp(b.root())); + + return Ok(Self { + root: workspace_root.to_path_buf(), + packages, + settings, + }); + } + CollectedPackagesOrStandalone::Standalone(package) => { + closest_package = Some(package); + break; + } + } + } + + // Not a workspace itself, keep looking for an enclosing workspace. + if closest_package.is_none() { + closest_package = Some(package); + } + } + } + + // No workspace found, but maybe a pyproject.toml was found. + let package = if let Some(enclosing_package) = closest_package { + tracing::debug!("Single package workspace at '{}'", enclosing_package.root()); + + enclosing_package + } else { + tracing::debug!("The ancestor directories contain no `pyproject.toml`. Falling back to a virtual project."); + + // Create a package with a default configuration + PackageMetadata { + name: path.file_name().unwrap_or("root").into(), + root: path.to_path_buf(), + // TODO create the configuration from the pyproject toml + configuration: base_configuration.cloned().unwrap_or_default(), + } }; - Ok(workspace) + let root = package.root().to_path_buf(); + let packages = vec![package]; + let settings = packages[0] + .configuration + .to_workspace_settings(&root, &packages); + + Ok(Self { + root, + packages, + settings, + }) } pub fn root(&self) -> &SystemPath { @@ -78,6 +202,30 @@ impl WorkspaceMetadata { } impl PackageMetadata { + pub(crate) fn from_pyproject( + pyproject: PyProject, + root: SystemPathBuf, + base_configuration: Option<&Configuration>, + ) -> Self { + let name = pyproject.project.and_then(|project| project.name); + let name = name + .map(|name| Name::new(&*name)) + .unwrap_or_else(|| Name::new(root.file_name().unwrap_or("root"))); + + // TODO: load configuration from pyrpoject.toml + let mut configuration = Configuration::default(); + + if let Some(base_configuration) = base_configuration { + configuration.extend(base_configuration.clone()); + } + + PackageMetadata { + name, + root, + configuration, + } + } + pub fn name(&self) -> &Name { &self.name } @@ -86,3 +234,577 @@ impl PackageMetadata { &self.root } } + +fn collect_packages( + workspace_package: PackageMetadata, + workspace_table: &Workspace, + closest_package: Option, + base_configuration: Option<&Configuration>, + system: &dyn System, +) -> Result { + let workspace_root = workspace_package.root().to_path_buf(); + let mut member_paths = FxHashSet::default(); + + for glob in workspace_table.members() { + let full_glob = workspace_package.root().join(glob); + + let matches = system.glob(full_glob.as_str()).map_err(|error| { + WorkspaceDiscoveryError::InvalidMembersPattern { + raw_glob: glob.clone(), + source: error, + } + })?; + + for result in matches { + let path = result?; + let normalized = SystemPath::absolute(path, &workspace_root); + + // Skip over non-directory entry. E.g.finder might end up creating a `.DS_STORE` file + // that ends up matching `/projects/*`. + if system.is_directory(&normalized) { + member_paths.insert(normalized); + } else { + tracing::debug!("Ignoring non-directory workspace member '{normalized}'"); + } + } + } + + // The workspace root is always a member. Don't re-add it + let mut packages = vec![workspace_package]; + member_paths.remove(&workspace_root); + + // Add the package that is closest to the current working directory except + // if that package isn't a workspace member, then fallback to creating a single + // package workspace. + if let Some(closest_package) = closest_package { + // the closest `pyproject.toml` isn't a member of this workspace because it is + // explicitly included or simply not listed. + // Create a standalone workspace. + if !member_paths.remove(closest_package.root()) + || workspace_table.is_excluded(closest_package.root(), &workspace_root)? + { + tracing::debug!( + "Ignoring workspace '{workspace_root}' because package '{package}' is not a member", + package = closest_package.name() + ); + return Ok(CollectedPackagesOrStandalone::Standalone(closest_package)); + } + + tracing::debug!("adding package '{}'", closest_package.name()); + packages.push(closest_package); + } + + // Add all remaining member paths + for member_path in member_paths { + if workspace_table.is_excluded(&member_path, workspace_root.as_path())? { + tracing::debug!("Ignoring excluded member '{member_path}'"); + continue; + } + + let pyproject_path = member_path.join("pyproject.toml"); + + let pyproject_str = match system.read_to_string(&pyproject_path) { + Ok(pyproject_str) => pyproject_str, + + Err(error) => { + if error.kind() == std::io::ErrorKind::NotFound + && member_path + .file_name() + .is_some_and(|name| name.starts_with('.')) + { + tracing::debug!( + "Ignore member '{member_path}' because it has no pyproject.toml and is hidden", + ); + continue; + } + + return Err(WorkspaceDiscoveryError::MemberFailedToReadPyProject { + package_root: member_path, + source: error, + }); + } + }; + + let pyproject = PyProject::from_str(&pyproject_str).map_err(|error| { + WorkspaceDiscoveryError::InvalidPyProject { + source: Box::new(error), + path: pyproject_path, + } + })?; + + if pyproject.workspace().is_some() { + return Err(WorkspaceDiscoveryError::NestedWorkspaces { + package_root: member_path, + }); + } + + let package = PackageMetadata::from_pyproject(pyproject, member_path, base_configuration); + + tracing::debug!( + "Adding package '{}' at '{}'", + package.name(), + package.root() + ); + + packages.push(package); + } + + Ok(CollectedPackagesOrStandalone::Packages(packages)) +} + +enum CollectedPackagesOrStandalone { + Packages(Vec), + Standalone(PackageMetadata), +} + +#[derive(Debug, Error)] +pub enum WorkspaceDiscoveryError { + #[error("workspace path '{0}' is not a directory")] + NotADirectory(SystemPathBuf), + + #[error("nested workspaces aren't supported but the package located at '{package_root}' defines a `knot.workspace` table")] + NestedWorkspaces { package_root: SystemPathBuf }, + + #[error("the workspace contains two packages named '{name}': '{first}' and '{second}'")] + DuplicatePackageNames { + name: Name, + first: SystemPathBuf, + second: SystemPathBuf, + }, + + #[error("the package '{package_name}' located at '{package_root}' is outside the workspace's root directory '{workspace_root}'")] + PackageOutsideWorkspace { + workspace_root: SystemPathBuf, + package_name: Name, + package_root: SystemPathBuf, + }, + + #[error( + "failed to read the `pyproject.toml` for the package located at '{package_root}': {source}" + )] + MemberFailedToReadPyProject { + package_root: SystemPathBuf, + source: std::io::Error, + }, + + #[error("{path} is not a valid `pyproject.toml`: {source}")] + InvalidPyProject { + source: Box, + path: SystemPathBuf, + }, + + #[error("invalid glob '{raw_glob}' in `tool.knot.workspace.members`: {source}")] + InvalidMembersPattern { + source: glob::PatternError, + raw_glob: String, + }, + + #[error("failed to match member glob: {error}")] + FailedToMatchGlob { + #[from] + error: GlobError, + }, +} + +#[cfg(test)] +mod tests { + //! Integration tests for workspace discovery + + use crate::snapshot_workspace; + use anyhow::Context; + use insta::assert_ron_snapshot; + use ruff_db::system::{SystemPathBuf, TestSystem}; + + use crate::workspace::{WorkspaceDiscoveryError, WorkspaceMetadata}; + + #[test] + fn package_without_pyproject() -> anyhow::Result<()> { + let system = TestSystem::default(); + let root = SystemPathBuf::from("/app"); + + system + .memory_file_system() + .write_files([(root.join("foo.py"), ""), (root.join("bar.py"), "")]) + .context("Failed to write files")?; + + let workspace = WorkspaceMetadata::discover(&root, &system, None) + .context("Failed to discover workspace")?; + + assert_eq!(workspace.root(), &*root); + + snapshot_workspace!(workspace); + + Ok(()) + } + + #[test] + fn single_package() -> anyhow::Result<()> { + let system = TestSystem::default(); + let root = SystemPathBuf::from("/app"); + + system + .memory_file_system() + .write_files([ + ( + root.join("pyproject.toml"), + r#" + [project] + name = "backend" + "#, + ), + (root.join("db/__init__.py"), ""), + ]) + .context("Failed to write files")?; + + let workspace = WorkspaceMetadata::discover(&root, &system, None) + .context("Failed to discover workspace")?; + + assert_eq!(workspace.root(), &*root); + snapshot_workspace!(workspace); + + // Discovering the same package from a subdirectory should give the same result + let from_src = WorkspaceMetadata::discover(&root.join("db"), &system, None) + .context("Failed to discover workspace from src sub-directory")?; + + assert_eq!(from_src, workspace); + + Ok(()) + } + + #[test] + fn workspace_members() -> anyhow::Result<()> { + let system = TestSystem::default(); + let root = SystemPathBuf::from("/app"); + + system + .memory_file_system() + .write_files([ + ( + root.join("pyproject.toml"), + r#" + [project] + name = "workspace-root" + + [tool.knot.workspace] + members = ["packages/*"] + exclude = ["packages/excluded"] + "#, + ), + ( + root.join("packages/a/pyproject.toml"), + r#" + [project] + name = "member-a" + "#, + ), + ( + root.join("packages/x/pyproject.toml"), + r#" + [project] + name = "member-x" + "#, + ), + ]) + .context("Failed to write files")?; + + let workspace = WorkspaceMetadata::discover(&root, &system, None) + .context("Failed to discover workspace")?; + + assert_eq!(workspace.root(), &*root); + + snapshot_workspace!(workspace); + + // Discovering the same package from a member should give the same result + let from_src = WorkspaceMetadata::discover(&root.join("packages/a"), &system, None) + .context("Failed to discover workspace from src sub-directory")?; + + assert_eq!(from_src, workspace); + + Ok(()) + } + + #[test] + fn workspace_excluded() -> anyhow::Result<()> { + let system = TestSystem::default(); + let root = SystemPathBuf::from("/app"); + + system + .memory_file_system() + .write_files([ + ( + root.join("pyproject.toml"), + r#" + [project] + name = "workspace-root" + + [tool.knot.workspace] + members = ["packages/*"] + exclude = ["packages/excluded"] + "#, + ), + ( + root.join("packages/a/pyproject.toml"), + r#" + [project] + name = "member-a" + "#, + ), + ( + root.join("packages/excluded/pyproject.toml"), + r#" + [project] + name = "member-x" + "#, + ), + ]) + .context("Failed to write files")?; + + let workspace = WorkspaceMetadata::discover(&root, &system, None) + .context("Failed to discover workspace")?; + + assert_eq!(workspace.root(), &*root); + snapshot_workspace!(workspace); + + // Discovering the `workspace` for `excluded` should discover a single-package workspace + let excluded_workspace = + WorkspaceMetadata::discover(&root.join("packages/excluded"), &system, None) + .context("Failed to discover workspace from src sub-directory")?; + + assert_ne!(excluded_workspace, workspace); + + Ok(()) + } + + #[test] + fn workspace_non_unique_member_names() -> anyhow::Result<()> { + let system = TestSystem::default(); + let root = SystemPathBuf::from("/app"); + + system + .memory_file_system() + .write_files([ + ( + root.join("pyproject.toml"), + r#" + [project] + name = "workspace-root" + + [tool.knot.workspace] + members = ["packages/*"] + "#, + ), + ( + root.join("packages/a/pyproject.toml"), + r#" + [project] + name = "a" + "#, + ), + ( + root.join("packages/b/pyproject.toml"), + r#" + [project] + name = "a" + "#, + ), + ]) + .context("Failed to write files")?; + + let error = WorkspaceMetadata::discover(&root, &system, None).expect_err( + "Discovery should error because the workspace contains two packages with the same names.", + ); + + assert_error_eq(&error, "the workspace contains two packages named 'a': '/app/packages/a' and '/app/packages/b'"); + + Ok(()) + } + + #[test] + fn nested_workspaces() -> anyhow::Result<()> { + let system = TestSystem::default(); + let root = SystemPathBuf::from("/app"); + + system + .memory_file_system() + .write_files([ + ( + root.join("pyproject.toml"), + r#" + [project] + name = "workspace-root" + + [tool.knot.workspace] + members = ["packages/*"] + "#, + ), + ( + root.join("packages/a/pyproject.toml"), + r#" + [project] + name = "nested-workspace" + + [tool.knot.workspace] + members = ["packages/*"] + "#, + ), + ]) + .context("Failed to write files")?; + + let error = WorkspaceMetadata::discover(&root, &system, None).expect_err( + "Discovery should error because the workspace has a package that itself is a workspace", + ); + + assert_error_eq(&error, "nested workspaces aren't supported but the package located at '/app/packages/a' defines a `knot.workspace` table"); + + Ok(()) + } + + #[test] + fn member_missing_pyproject_toml() -> anyhow::Result<()> { + let system = TestSystem::default(); + let root = SystemPathBuf::from("/app"); + + system + .memory_file_system() + .write_files([ + ( + root.join("pyproject.toml"), + r#" + [project] + name = "workspace-root" + + [tool.knot.workspace] + members = ["packages/*"] + "#, + ), + (root.join("packages/a/test.py"), ""), + ]) + .context("Failed to write files")?; + + let error = WorkspaceMetadata::discover(&root, &system, None) + .expect_err("Discovery should error because member `a` has no `pypyroject.toml`"); + + assert_error_eq(&error, "failed to read the `pyproject.toml` for the package located at '/app/packages/a': No such file or directory"); + + Ok(()) + } + + /// Folders that match the members pattern but don't have a pyproject.toml + /// aren't valid members and discovery fails. However, don't fail + /// if the folder name indicates that it is a hidden folder that might + /// have been crated by another tool + #[test] + fn member_pattern_matching_hidden_folder() -> anyhow::Result<()> { + let system = TestSystem::default(); + let root = SystemPathBuf::from("/app"); + + system + .memory_file_system() + .write_files([ + ( + root.join("pyproject.toml"), + r#" + [project] + name = "workspace-root" + + [tool.knot.workspace] + members = ["packages/*"] + "#, + ), + (root.join("packages/.hidden/a.py"), ""), + ]) + .context("Failed to write files")?; + + let workspace = WorkspaceMetadata::discover(&root, &system, None)?; + + snapshot_workspace!(workspace); + + Ok(()) + } + + #[test] + fn member_pattern_matching_file() -> anyhow::Result<()> { + let system = TestSystem::default(); + let root = SystemPathBuf::from("/app"); + + system + .memory_file_system() + .write_files([ + ( + root.join("pyproject.toml"), + r#" + [project] + name = "workspace-root" + + [tool.knot.workspace] + members = ["packages/*"] + "#, + ), + (root.join("packages/.DS_STORE"), ""), + ]) + .context("Failed to write files")?; + + let workspace = WorkspaceMetadata::discover(&root, &system, None)?; + + snapshot_workspace!(&workspace); + + Ok(()) + } + + #[test] + fn workspace_root_not_an_ancestor_of_member() -> anyhow::Result<()> { + let system = TestSystem::default(); + let root = SystemPathBuf::from("/app"); + + system + .memory_file_system() + .write_files([ + ( + root.join("pyproject.toml"), + r#" + [project] + name = "workspace-root" + + [tool.knot.workspace] + members = ["../packages/*"] + "#, + ), + ( + root.join("../packages/a/pyproject.toml"), + r#" + [project] + name = "a" + "#, + ), + ]) + .context("Failed to write files")?; + + let error = WorkspaceMetadata::discover(&root, &system, None).expect_err( + "Discovery should error because member `a` is outside the workspace's directory`", + ); + + assert_error_eq(&error, "the package 'a' located at '/packages/a' is outside the workspace's root directory '/app'"); + + Ok(()) + } + + #[track_caller] + fn assert_error_eq(error: &WorkspaceDiscoveryError, message: &str) { + assert_eq!(error.to_string().replace('\\', "/"), message); + } + + /// Snapshots a workspace but with all paths using unix separators. + #[macro_export] + macro_rules! snapshot_workspace { + ($workspace:expr) => {{ + assert_ron_snapshot!($workspace,{ + ".root" => insta::dynamic_redaction(|content, _content_path| { + content.as_str().unwrap().replace("\\", "/") + }), + ".packages[].root" => insta::dynamic_redaction(|content, _content_path| { + content.as_str().unwrap().replace("\\", "/") + }), + }); + }}; +} +} diff --git a/crates/red_knot_workspace/src/workspace/pyproject.rs b/crates/red_knot_workspace/src/workspace/pyproject.rs new file mode 100644 index 0000000000..9f74774863 --- /dev/null +++ b/crates/red_knot_workspace/src/workspace/pyproject.rs @@ -0,0 +1,108 @@ +mod package_name; + +use pep440_rs::{Version, VersionSpecifiers}; +use serde::Deserialize; +use thiserror::Error; + +use crate::workspace::metadata::WorkspaceDiscoveryError; +pub(crate) use package_name::PackageName; +use ruff_db::system::SystemPath; + +/// A `pyproject.toml` as specified in PEP 517. +#[derive(Deserialize, Debug, Default, Clone)] +#[serde(rename_all = "kebab-case")] +pub(crate) struct PyProject { + /// PEP 621-compliant project metadata. + pub project: Option, + /// Tool-specific metadata. + pub tool: Option, +} + +impl PyProject { + pub(crate) fn workspace(&self) -> Option<&Workspace> { + self.tool + .as_ref() + .and_then(|tool| tool.knot.as_ref()) + .and_then(|knot| knot.workspace.as_ref()) + } +} + +#[derive(Error, Debug)] +pub enum PyProjectError { + #[error(transparent)] + TomlSyntax(#[from] toml::de::Error), +} + +impl PyProject { + pub(crate) fn from_str(content: &str) -> Result { + toml::from_str(content).map_err(PyProjectError::TomlSyntax) + } +} + +/// PEP 621 project metadata (`project`). +/// +/// See . +#[derive(Deserialize, Debug, Clone, PartialEq)] +#[cfg_attr(test, derive(serde::Serialize))] +#[serde(rename_all = "kebab-case")] +pub(crate) struct Project { + /// The name of the project + /// + /// Note: Intentionally option to be more permissive during deserialization. + /// `PackageMetadata::from_pyproject` reports missing names. + pub name: Option, + /// The version of the project + pub version: Option, + /// The Python versions this project is compatible with. + pub requires_python: Option, +} + +#[derive(Deserialize, Debug, Clone, PartialEq, Eq)] +pub(crate) struct Tool { + pub knot: Option, +} + +#[derive(Deserialize, Debug, Clone, PartialEq, Eq)] +#[serde(rename_all = "kebab-case", deny_unknown_fields)] +pub(crate) struct Knot { + pub(crate) workspace: Option, +} + +#[derive(Deserialize, Debug, Clone, PartialEq, Eq)] +#[serde(rename_all = "kebab-case", deny_unknown_fields)] +pub(crate) struct Workspace { + pub(crate) members: Option>, + pub(crate) exclude: Option>, +} + +impl Workspace { + pub(crate) fn members(&self) -> &[String] { + self.members.as_deref().unwrap_or_default() + } + + pub(crate) fn exclude(&self) -> &[String] { + self.exclude.as_deref().unwrap_or_default() + } + + pub(crate) fn is_excluded( + &self, + path: &SystemPath, + workspace_root: &SystemPath, + ) -> Result { + for exclude in self.exclude() { + let full_glob = + glob::Pattern::new(workspace_root.join(exclude).as_str()).map_err(|error| { + WorkspaceDiscoveryError::InvalidMembersPattern { + raw_glob: exclude.clone(), + source: error, + } + })?; + + if full_glob.matches_path(path.as_std_path()) { + return Ok(true); + } + } + + Ok(false) + } +} diff --git a/crates/red_knot_workspace/src/workspace/pyproject/package_name.rs b/crates/red_knot_workspace/src/workspace/pyproject/package_name.rs new file mode 100644 index 0000000000..f797720f34 --- /dev/null +++ b/crates/red_knot_workspace/src/workspace/pyproject/package_name.rs @@ -0,0 +1,140 @@ +use serde::{Deserialize, Deserializer, Serialize}; +use std::ops::Deref; +use thiserror::Error; + +/// The normalized name of a package. +/// +/// Converts the name to lowercase and collapses runs of `-`, `_`, and `.` down to a single `-`. +/// For example, `---`, `.`, and `__` are all converted to a single `-`. +/// +/// See: +#[derive(Debug, Default, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize)] +pub(crate) struct PackageName(String); + +impl PackageName { + /// Create a validated, normalized package name. + pub(crate) fn new(name: String) -> Result { + if name.is_empty() { + return Err(InvalidPackageNameError::Empty); + } + + if name.starts_with(['-', '_', '.']) { + return Err(InvalidPackageNameError::NonAlphanumericStart( + name.chars().next().unwrap(), + )); + } + + if name.ends_with(['-', '_', '.']) { + return Err(InvalidPackageNameError::NonAlphanumericEnd( + name.chars().last().unwrap(), + )); + } + + let Some(start) = name.find(|c: char| { + !c.is_ascii() || c.is_ascii_uppercase() || matches!(c, '-' | '_' | '.') + }) else { + return Ok(Self(name)); + }; + + let (already_normalized, maybe_normalized) = name.split_at(start); + + let mut normalized = String::with_capacity(name.len()); + normalized.push_str(already_normalized); + let mut last = None; + + for c in maybe_normalized.chars() { + if !c.is_ascii() { + return Err(InvalidPackageNameError::InvalidCharacter(c)); + } + + if c.is_ascii_uppercase() { + normalized.push(c.to_ascii_lowercase()); + } else if matches!(c, '-' | '_' | '.') { + if matches!(last, Some('-' | '_' | '.')) { + // Only keep a single instance of `-`, `_` and `.` + } else { + normalized.push('-'); + } + } else { + normalized.push(c); + } + + last = Some(c); + } + + Ok(Self(normalized)) + } + + /// Returns the underlying package name. + pub(crate) fn as_str(&self) -> &str { + &self.0 + } +} + +impl From for String { + fn from(value: PackageName) -> Self { + value.0 + } +} + +impl<'de> Deserialize<'de> for PackageName { + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + let s = String::deserialize(deserializer)?; + Self::new(s).map_err(serde::de::Error::custom) + } +} + +impl std::fmt::Display for PackageName { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + self.0.fmt(f) + } +} + +impl Deref for PackageName { + type Target = str; + fn deref(&self) -> &Self::Target { + self.as_str() + } +} + +#[derive(Error, Debug)] +pub(crate) enum InvalidPackageNameError { + #[error("name must start with letter or number but it starts with '{0}'")] + NonAlphanumericStart(char), + #[error("name must end with letter or number but it ends with '{0}'")] + NonAlphanumericEnd(char), + #[error("valid name consists only of ASCII letters and numbers, period, underscore and hyphen but name contains '{0}'" + )] + InvalidCharacter(char), + #[error("name must not be empty")] + Empty, +} + +#[cfg(test)] +mod tests { + use super::PackageName; + + #[test] + fn normalize() { + let inputs = [ + "friendly-bard", + "Friendly-Bard", + "FRIENDLY-BARD", + "friendly.bard", + "friendly_bard", + "friendly--bard", + "friendly-.bard", + "FrIeNdLy-._.-bArD", + ]; + + for input in inputs { + assert_eq!( + PackageName::new(input.to_string()).unwrap(), + PackageName::new("friendly-bard".to_string()).unwrap(), + ); + } + } +} diff --git a/crates/red_knot_workspace/src/workspace/settings.rs b/crates/red_knot_workspace/src/workspace/settings.rs index 38a633b07d..44464d2d33 100644 --- a/crates/red_knot_workspace/src/workspace/settings.rs +++ b/crates/red_knot_workspace/src/workspace/settings.rs @@ -1,10 +1,12 @@ +use crate::workspace::PackageMetadata; use red_knot_python_semantic::{ProgramSettings, PythonVersion, SearchPathSettings, SitePackages}; use ruff_db::system::{SystemPath, SystemPathBuf}; /// The resolved configurations. /// /// The main difference to [`Configuration`] is that default values are filled in. -#[derive(Debug, Clone)] +#[derive(Debug, Clone, PartialEq, Eq)] +#[cfg_attr(test, derive(serde::Serialize))] pub struct WorkspaceSettings { pub(super) program: ProgramSettings, } @@ -16,7 +18,8 @@ impl WorkspaceSettings { } /// The configuration for the workspace or a package. -#[derive(Debug, Default, Clone)] +#[derive(Debug, Default, Clone, PartialEq, Eq)] +#[cfg_attr(test, derive(serde::Serialize))] pub struct Configuration { pub target_version: Option, pub search_paths: SearchPathConfiguration, @@ -29,17 +32,22 @@ impl Configuration { self.search_paths.extend(with.search_paths); } - pub fn into_workspace_settings(self, workspace_root: &SystemPath) -> WorkspaceSettings { + pub fn to_workspace_settings( + &self, + workspace_root: &SystemPath, + _packages: &[PackageMetadata], + ) -> WorkspaceSettings { WorkspaceSettings { program: ProgramSettings { target_version: self.target_version.unwrap_or_default(), - search_paths: self.search_paths.into_settings(workspace_root), + search_paths: self.search_paths.to_settings(workspace_root), }, } } } #[derive(Debug, Default, Clone, Eq, PartialEq)] +#[cfg_attr(test, derive(serde::Serialize))] pub struct SearchPathConfiguration { /// List of user-provided paths that should take first priority in the module resolution. /// Examples in other type checkers are mypy's MYPYPATH environment variable, @@ -59,15 +67,19 @@ pub struct SearchPathConfiguration { } impl SearchPathConfiguration { - pub fn into_settings(self, workspace_root: &SystemPath) -> SearchPathSettings { - let site_packages = self.site_packages.unwrap_or(SitePackages::Known(vec![])); + pub fn to_settings(&self, workspace_root: &SystemPath) -> SearchPathSettings { + let site_packages = self + .site_packages + .clone() + .unwrap_or(SitePackages::Known(vec![])); SearchPathSettings { - extra_paths: self.extra_paths.unwrap_or_default(), + extra_paths: self.extra_paths.clone().unwrap_or_default(), src_root: self + .clone() .src_root .unwrap_or_else(|| workspace_root.to_path_buf()), - custom_typeshed: self.custom_typeshed, + custom_typeshed: self.custom_typeshed.clone(), site_packages, } } diff --git a/crates/red_knot_workspace/src/workspace/snapshots/red_knot_workspace__workspace__metadata__tests__member_pattern_matching_file.snap b/crates/red_knot_workspace/src/workspace/snapshots/red_knot_workspace__workspace__metadata__tests__member_pattern_matching_file.snap new file mode 100644 index 0000000000..9e6a35d1a5 --- /dev/null +++ b/crates/red_knot_workspace/src/workspace/snapshots/red_knot_workspace__workspace__metadata__tests__member_pattern_matching_file.snap @@ -0,0 +1,36 @@ +--- +source: crates/red_knot_workspace/src/workspace/metadata.rs +expression: "&workspace" +--- +WorkspaceMetadata( + root: "/app", + packages: [ + PackageMetadata( + name: Name("workspace-root"), + root: "/app", + configuration: Configuration( + target_version: None, + search_paths: SearchPathConfiguration( + extra_paths: None, + src_root: None, + custom_typeshed: None, + site_packages: None, + ), + ), + ), + ], + settings: WorkspaceSettings( + program: ProgramSettings( + target_version: PythonVersion( + major: 3, + minor: 8, + ), + search_paths: SearchPathSettings( + extra_paths: [], + src_root: "/app", + custom_typeshed: None, + site_packages: Known([]), + ), + ), + ), +) diff --git a/crates/red_knot_workspace/src/workspace/snapshots/red_knot_workspace__workspace__metadata__tests__member_pattern_matching_hidden_folder.snap b/crates/red_knot_workspace/src/workspace/snapshots/red_knot_workspace__workspace__metadata__tests__member_pattern_matching_hidden_folder.snap new file mode 100644 index 0000000000..04b4caf15a --- /dev/null +++ b/crates/red_knot_workspace/src/workspace/snapshots/red_knot_workspace__workspace__metadata__tests__member_pattern_matching_hidden_folder.snap @@ -0,0 +1,36 @@ +--- +source: crates/red_knot_workspace/src/workspace/metadata.rs +expression: workspace +--- +WorkspaceMetadata( + root: "/app", + packages: [ + PackageMetadata( + name: Name("workspace-root"), + root: "/app", + configuration: Configuration( + target_version: None, + search_paths: SearchPathConfiguration( + extra_paths: None, + src_root: None, + custom_typeshed: None, + site_packages: None, + ), + ), + ), + ], + settings: WorkspaceSettings( + program: ProgramSettings( + target_version: PythonVersion( + major: 3, + minor: 8, + ), + search_paths: SearchPathSettings( + extra_paths: [], + src_root: "/app", + custom_typeshed: None, + site_packages: Known([]), + ), + ), + ), +) diff --git a/crates/red_knot_workspace/src/workspace/snapshots/red_knot_workspace__workspace__metadata__tests__package_without_pyproject.snap b/crates/red_knot_workspace/src/workspace/snapshots/red_knot_workspace__workspace__metadata__tests__package_without_pyproject.snap new file mode 100644 index 0000000000..152d9f3ee0 --- /dev/null +++ b/crates/red_knot_workspace/src/workspace/snapshots/red_knot_workspace__workspace__metadata__tests__package_without_pyproject.snap @@ -0,0 +1,36 @@ +--- +source: crates/red_knot_workspace/src/workspace/metadata.rs +expression: workspace +--- +WorkspaceMetadata( + root: "/app", + packages: [ + PackageMetadata( + name: Name("app"), + root: "/app", + configuration: Configuration( + target_version: None, + search_paths: SearchPathConfiguration( + extra_paths: None, + src_root: None, + custom_typeshed: None, + site_packages: None, + ), + ), + ), + ], + settings: WorkspaceSettings( + program: ProgramSettings( + target_version: PythonVersion( + major: 3, + minor: 8, + ), + search_paths: SearchPathSettings( + extra_paths: [], + src_root: "/app", + custom_typeshed: None, + site_packages: Known([]), + ), + ), + ), +) diff --git a/crates/red_knot_workspace/src/workspace/snapshots/red_knot_workspace__workspace__metadata__tests__single_package.snap b/crates/red_knot_workspace/src/workspace/snapshots/red_knot_workspace__workspace__metadata__tests__single_package.snap new file mode 100644 index 0000000000..4cb77791d8 --- /dev/null +++ b/crates/red_knot_workspace/src/workspace/snapshots/red_knot_workspace__workspace__metadata__tests__single_package.snap @@ -0,0 +1,36 @@ +--- +source: crates/red_knot_workspace/src/workspace/metadata.rs +expression: workspace +--- +WorkspaceMetadata( + root: "/app", + packages: [ + PackageMetadata( + name: Name("backend"), + root: "/app", + configuration: Configuration( + target_version: None, + search_paths: SearchPathConfiguration( + extra_paths: None, + src_root: None, + custom_typeshed: None, + site_packages: None, + ), + ), + ), + ], + settings: WorkspaceSettings( + program: ProgramSettings( + target_version: PythonVersion( + major: 3, + minor: 8, + ), + search_paths: SearchPathSettings( + extra_paths: [], + src_root: "/app", + custom_typeshed: None, + site_packages: Known([]), + ), + ), + ), +) diff --git a/crates/red_knot_workspace/src/workspace/snapshots/red_knot_workspace__workspace__metadata__tests__workspace_excluded.snap b/crates/red_knot_workspace/src/workspace/snapshots/red_knot_workspace__workspace__metadata__tests__workspace_excluded.snap new file mode 100644 index 0000000000..374c648fc0 --- /dev/null +++ b/crates/red_knot_workspace/src/workspace/snapshots/red_knot_workspace__workspace__metadata__tests__workspace_excluded.snap @@ -0,0 +1,49 @@ +--- +source: crates/red_knot_workspace/src/workspace/metadata.rs +expression: workspace +--- +WorkspaceMetadata( + root: "/app", + packages: [ + PackageMetadata( + name: Name("workspace-root"), + root: "/app", + configuration: Configuration( + target_version: None, + search_paths: SearchPathConfiguration( + extra_paths: None, + src_root: None, + custom_typeshed: None, + site_packages: None, + ), + ), + ), + PackageMetadata( + name: Name("member-a"), + root: "/app/packages/a", + configuration: Configuration( + target_version: None, + search_paths: SearchPathConfiguration( + extra_paths: None, + src_root: None, + custom_typeshed: None, + site_packages: None, + ), + ), + ), + ], + settings: WorkspaceSettings( + program: ProgramSettings( + target_version: PythonVersion( + major: 3, + minor: 8, + ), + search_paths: SearchPathSettings( + extra_paths: [], + src_root: "/app", + custom_typeshed: None, + site_packages: Known([]), + ), + ), + ), +) diff --git a/crates/red_knot_workspace/src/workspace/snapshots/red_knot_workspace__workspace__metadata__tests__workspace_members.snap b/crates/red_knot_workspace/src/workspace/snapshots/red_knot_workspace__workspace__metadata__tests__workspace_members.snap new file mode 100644 index 0000000000..fb2496e020 --- /dev/null +++ b/crates/red_knot_workspace/src/workspace/snapshots/red_knot_workspace__workspace__metadata__tests__workspace_members.snap @@ -0,0 +1,62 @@ +--- +source: crates/red_knot_workspace/src/workspace/metadata.rs +expression: workspace +--- +WorkspaceMetadata( + root: "/app", + packages: [ + PackageMetadata( + name: Name("workspace-root"), + root: "/app", + configuration: Configuration( + target_version: None, + search_paths: SearchPathConfiguration( + extra_paths: None, + src_root: None, + custom_typeshed: None, + site_packages: None, + ), + ), + ), + PackageMetadata( + name: Name("member-a"), + root: "/app/packages/a", + configuration: Configuration( + target_version: None, + search_paths: SearchPathConfiguration( + extra_paths: None, + src_root: None, + custom_typeshed: None, + site_packages: None, + ), + ), + ), + PackageMetadata( + name: Name("member-x"), + root: "/app/packages/x", + configuration: Configuration( + target_version: None, + search_paths: SearchPathConfiguration( + extra_paths: None, + src_root: None, + custom_typeshed: None, + site_packages: None, + ), + ), + ), + ], + settings: WorkspaceSettings( + program: ProgramSettings( + target_version: PythonVersion( + major: 3, + minor: 8, + ), + search_paths: SearchPathSettings( + extra_paths: [], + src_root: "/app", + custom_typeshed: None, + site_packages: Known([]), + ), + ), + ), +) diff --git a/crates/red_knot_workspace/tests/check.rs b/crates/red_knot_workspace/tests/check.rs index 94b9c57316..8d2b5c9c4a 100644 --- a/crates/red_knot_workspace/tests/check.rs +++ b/crates/red_knot_workspace/tests/check.rs @@ -9,7 +9,7 @@ use ruff_python_ast::visitor::source_order::SourceOrderVisitor; use ruff_python_ast::{self as ast, Alias, Expr, Parameter, ParameterWithDefault, Stmt}; fn setup_db(workspace_root: &SystemPath, system: TestSystem) -> anyhow::Result { - let workspace = WorkspaceMetadata::from_path(workspace_root, &system, None)?; + let workspace = WorkspaceMetadata::discover(workspace_root, &system, None)?; RootDatabase::new(workspace, system) } diff --git a/crates/ruff_benchmark/benches/red_knot.rs b/crates/ruff_benchmark/benches/red_knot.rs index 37cddef5a0..e7d15e7fef 100644 --- a/crates/ruff_benchmark/benches/red_knot.rs +++ b/crates/ruff_benchmark/benches/red_knot.rs @@ -2,7 +2,7 @@ use rayon::ThreadPoolBuilder; use red_knot_python_semantic::PythonVersion; -use red_knot_workspace::db::RootDatabase; +use red_knot_workspace::db::{Db, RootDatabase}; use red_knot_workspace::watch::{ChangeEvent, ChangedKind}; use red_knot_workspace::workspace::settings::Configuration; use red_knot_workspace::workspace::WorkspaceMetadata; @@ -75,10 +75,10 @@ fn setup_case() -> Case { .unwrap(); let src_root = SystemPath::new("/src"); - let metadata = WorkspaceMetadata::from_path( + let metadata = WorkspaceMetadata::discover( src_root, &system, - Some(Configuration { + Some(&Configuration { target_version: Some(PythonVersion::PY312), ..Configuration::default() }), diff --git a/crates/ruff_db/Cargo.toml b/crates/ruff_db/Cargo.toml index 3410fe7449..cef0d11f8f 100644 --- a/crates/ruff_db/Cargo.toml +++ b/crates/ruff_db/Cargo.toml @@ -22,7 +22,9 @@ ruff_text_size = { workspace = true } camino = { workspace = true } countme = { workspace = true } dashmap = { workspace = true } +dunce = { workspace = true } filetime = { workspace = true } +glob = { workspace = true } ignore = { workspace = true, optional = true } matchit = { workspace = true } salsa = { workspace = true } diff --git a/crates/ruff_db/src/system.rs b/crates/ruff_db/src/system.rs index 35a916fc47..1d0bcf5e0b 100644 --- a/crates/ruff_db/src/system.rs +++ b/crates/ruff_db/src/system.rs @@ -1,9 +1,12 @@ -use std::fmt::Debug; - +pub use glob::PatternError; pub use memory_fs::MemoryFileSystem; #[cfg(feature = "os")] pub use os::OsSystem; use ruff_notebook::{Notebook, NotebookError}; +use std::error::Error; +use std::fmt::Debug; +use std::path::{Path, PathBuf}; +use std::{fmt, io}; pub use test::{DbWithTestSystem, TestSystem}; use walk_directory::WalkDirectoryBuilder; @@ -51,6 +54,10 @@ pub trait System: Debug { /// * `path` does not exist. /// * A non-final component in `path` is not a directory. /// * the symlink target path is not valid Unicode. + /// + /// ## Windows long-paths + /// Unlike `std::fs::canonicalize`, this function does remove UNC prefixes if possible. + /// See [dunce::canonicalize] for more information. fn canonicalize_path(&self, path: &SystemPath) -> Result; /// Reads the content of the file at `path` into a [`String`]. @@ -126,6 +133,19 @@ pub trait System: Debug { /// yields a single entry for that file. fn walk_directory(&self, path: &SystemPath) -> WalkDirectoryBuilder; + /// Return an iterator that produces all the `Path`s that match the given + /// pattern using default match options, which may be absolute or relative to + /// the current working directory. + /// + /// This may return an error if the pattern is invalid. + fn glob( + &self, + pattern: &str, + ) -> std::result::Result< + Box>>, + PatternError, + >; + fn as_any(&self) -> &dyn std::any::Any; fn as_any_mut(&mut self) -> &mut dyn std::any::Any; @@ -204,3 +224,59 @@ impl DirectoryEntry { self.file_type } } + +/// A glob iteration error. +/// +/// This is typically returned when a particular path cannot be read +/// to determine if its contents match the glob pattern. This is possible +/// if the program lacks the appropriate permissions, for example. +#[derive(Debug)] +pub struct GlobError { + path: PathBuf, + error: GlobErrorKind, +} + +impl GlobError { + /// The Path that the error corresponds to. + pub fn path(&self) -> &Path { + &self.path + } + + pub fn kind(&self) -> &GlobErrorKind { + &self.error + } +} + +impl Error for GlobError {} + +impl fmt::Display for GlobError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match &self.error { + GlobErrorKind::IOError(error) => { + write!( + f, + "attempting to read `{}` resulted in an error: {error}", + self.path.display(), + ) + } + GlobErrorKind::NonUtf8Path => { + write!(f, "`{}` is not a valid UTF-8 path", self.path.display(),) + } + } + } +} + +impl From for GlobError { + fn from(value: glob::GlobError) -> Self { + Self { + path: value.path().to_path_buf(), + error: GlobErrorKind::IOError(value.into_error()), + } + } +} + +#[derive(Debug)] +pub enum GlobErrorKind { + IOError(io::Error), + NonUtf8Path, +} diff --git a/crates/ruff_db/src/system/memory_fs.rs b/crates/ruff_db/src/system/memory_fs.rs index 5b584de114..ba3157cfb0 100644 --- a/crates/ruff_db/src/system/memory_fs.rs +++ b/crates/ruff_db/src/system/memory_fs.rs @@ -9,13 +9,13 @@ use rustc_hash::FxHashMap; use ruff_notebook::{Notebook, NotebookError}; use crate::system::{ - walk_directory, DirectoryEntry, FileType, Metadata, Result, SystemPath, SystemPathBuf, - SystemVirtualPath, SystemVirtualPathBuf, + walk_directory, DirectoryEntry, FileType, GlobError, GlobErrorKind, Metadata, Result, + SystemPath, SystemPathBuf, SystemVirtualPath, SystemVirtualPathBuf, }; use super::walk_directory::{ - DirectoryWalker, WalkDirectoryBuilder, WalkDirectoryConfiguration, WalkDirectoryVisitor, - WalkDirectoryVisitorBuilder, WalkState, + DirectoryWalker, ErrorKind, WalkDirectoryBuilder, WalkDirectoryConfiguration, + WalkDirectoryVisitor, WalkDirectoryVisitorBuilder, WalkState, }; /// File system that stores all content in memory. @@ -94,8 +94,11 @@ impl MemoryFileSystem { metadata(self, path.as_ref()) } - pub fn canonicalize(&self, path: impl AsRef) -> SystemPathBuf { - SystemPathBuf::from_utf8_path_buf(self.normalize_path(path)) + pub fn canonicalize(&self, path: impl AsRef) -> Result { + let path = path.as_ref(); + // Mimic the behavior of a real FS where canonicalize errors if the `path` doesn't exist + self.metadata(path)?; + Ok(SystemPathBuf::from_utf8_path_buf(self.normalize_path(path))) } pub fn is_file(&self, path: impl AsRef) -> bool { @@ -230,6 +233,46 @@ impl MemoryFileSystem { WalkDirectoryBuilder::new(path, MemoryWalker { fs: self.clone() }) } + pub fn glob( + &self, + pattern: &str, + ) -> std::result::Result< + impl Iterator>, + glob::PatternError, + > { + // Very naive implementation that iterates over all files and collects all that match the given pattern. + + let normalized = self.normalize_path(pattern); + let pattern = glob::Pattern::new(normalized.as_str())?; + let matches = std::sync::Mutex::new(Vec::new()); + + self.walk_directory("/").standard_filters(false).run(|| { + Box::new(|entry| { + match entry { + Ok(entry) => { + if pattern.matches_path(entry.path().as_std_path()) { + matches.lock().unwrap().push(Ok(entry.into_path())); + } + } + Err(error) => match error.kind { + ErrorKind::Loop { .. } => { + unreachable!("Loops aren't possible in the memory file system because it doesn't support symlinks.") + } + ErrorKind::Io { err, path } => { + matches.lock().unwrap().push(Err(GlobError { path: path.expect("walk_directory to always set a path").into_std_path_buf(), error: GlobErrorKind::IOError(err)})); + } + ErrorKind::NonUtf8Path { path } => { + matches.lock().unwrap().push(Err(GlobError { path, error: GlobErrorKind::NonUtf8Path})); + } + }, + } + WalkState::Continue + }) + }); + + Ok(matches.into_inner().unwrap().into_iter()) + } + pub fn remove_file(&self, path: impl AsRef) -> Result<()> { fn remove_file(fs: &MemoryFileSystem, path: &SystemPath) -> Result<()> { let mut by_path = fs.inner.by_path.write().unwrap(); @@ -629,7 +672,7 @@ impl DirectoryWalker for MemoryWalker { visitor.visit(Err(walk_directory::Error { depth: Some(depth), kind: walk_directory::ErrorKind::Io { - path: None, + path: Some(path.clone()), err: error, }, })); @@ -676,6 +719,7 @@ fn now() -> FileTime { #[cfg(test)] mod tests { use std::io::ErrorKind; + use std::time::Duration; use crate::system::walk_directory::tests::DirectoryEntryToString; @@ -1149,4 +1193,26 @@ mod tests { Ok(()) } + + #[test] + fn glob() -> std::io::Result<()> { + let root = SystemPath::new("/src"); + let fs = MemoryFileSystem::with_current_directory(root); + + fs.write_files([ + (root.join("foo.py"), "print('foo')"), + (root.join("a/bar.py"), "print('bar')"), + (root.join("a/.baz.py"), "print('baz')"), + ])?; + + let mut matches = fs.glob("/src/a/**").unwrap().flatten().collect::>(); + matches.sort_unstable(); + + assert_eq!(matches, vec![root.join("a/.baz.py"), root.join("a/bar.py")]); + + let matches = fs.glob("**/bar.py").unwrap().flatten().collect::>(); + assert_eq!(matches, vec![root.join("a/bar.py")]); + + Ok(()) + } } diff --git a/crates/ruff_db/src/system/os.rs b/crates/ruff_db/src/system/os.rs index 6652c4a383..843558f8f6 100644 --- a/crates/ruff_db/src/system/os.rs +++ b/crates/ruff_db/src/system/os.rs @@ -6,8 +6,8 @@ use filetime::FileTime; use ruff_notebook::{Notebook, NotebookError}; use crate::system::{ - DirectoryEntry, FileType, Metadata, Result, System, SystemPath, SystemPathBuf, - SystemVirtualPath, + DirectoryEntry, FileType, GlobError, GlobErrorKind, Metadata, Result, System, SystemPath, + SystemPathBuf, SystemVirtualPath, }; use super::walk_directory::{ @@ -64,9 +64,11 @@ impl System for OsSystem { } fn canonicalize_path(&self, path: &SystemPath) -> Result { - path.as_utf8_path() - .canonicalize_utf8() - .map(SystemPathBuf::from_utf8_path_buf) + path.as_utf8_path().canonicalize_utf8().map(|path| { + SystemPathBuf::from_utf8_path_buf(path) + .simplified() + .to_path_buf() + }) } fn read_to_string(&self, path: &SystemPath) -> Result { @@ -104,6 +106,30 @@ impl System for OsSystem { WalkDirectoryBuilder::new(path, OsDirectoryWalker {}) } + fn glob( + &self, + pattern: &str, + ) -> std::result::Result< + Box>>, + glob::PatternError, + > { + glob::glob(pattern).map(|inner| { + let iterator = inner.map(|result| { + let path = result?; + + let system_path = SystemPathBuf::from_path_buf(path).map_err(|path| GlobError { + path, + error: GlobErrorKind::NonUtf8Path, + })?; + + Ok(system_path) + }); + + let boxed: Box> = Box::new(iterator); + boxed + }) + } + fn as_any(&self) -> &dyn Any { self } diff --git a/crates/ruff_db/src/system/path.rs b/crates/ruff_db/src/system/path.rs index 346009183d..a883cad738 100644 --- a/crates/ruff_db/src/system/path.rs +++ b/crates/ruff_db/src/system/path.rs @@ -1,12 +1,8 @@ -// TODO support untitled files for the LSP use case. Wrap a `str` and `String` -// The main question is how `as_std_path` would work for untitled files, that can only exist in the LSP case -// but there's no compile time guarantee that a [`OsSystem`] never gets an untitled file path. - use camino::{Utf8Path, Utf8PathBuf}; use std::borrow::Borrow; use std::fmt::Formatter; use std::ops::Deref; -use std::path::{Path, StripPrefixError}; +use std::path::{Path, PathBuf, StripPrefixError}; /// A slice of a path on [`System`](super::System) (akin to [`str`]). /// @@ -23,6 +19,32 @@ impl SystemPath { unsafe { &*(path as *const Utf8Path as *const SystemPath) } } + /// Takes any path, and when possible, converts Windows UNC paths to regular paths. + /// If the path can't be converted, it's returned unmodified. + /// + /// On non-Windows this is no-op. + /// + /// `\\?\C:\Windows` will be converted to `C:\Windows`, + /// but `\\?\C:\COM` will be left as-is (due to a reserved filename). + /// + /// Use this to pass arbitrary paths to programs that may not be UNC-aware. + /// + /// It's generally safe to pass UNC paths to legacy programs, because + /// these paths contain a reserved prefix, so will gracefully fail + /// if used with legacy APIs that don't support UNC. + /// + /// This function does not perform any I/O. + /// + /// Currently paths with unpaired surrogates aren't converted even if they + /// could be, due to limitations of Rust's `OsStr` API. + /// + /// To check if a path remained as UNC, use `path.as_os_str().as_encoded_bytes().starts_with(b"\\\\")`. + #[inline] + pub fn simplified(&self) -> &SystemPath { + // SAFETY: simplified only trims the path, that means the returned path must be a valid UTF-8 path. + SystemPath::from_std_path(dunce::simplified(self.as_std_path())).unwrap() + } + /// Extracts the file extension, if possible. /// /// The extension is: @@ -123,6 +145,39 @@ impl SystemPath { self.0.parent().map(SystemPath::new) } + /// Produces an iterator over `SystemPath` and its ancestors. + /// + /// The iterator will yield the `SystemPath` that is returned if the [`parent`] method is used zero + /// or more times. That means, the iterator will yield `&self`, `&self.parent().unwrap()`, + /// `&self.parent().unwrap().parent().unwrap()` and so on. If the [`parent`] method returns + /// [`None`], the iterator will do likewise. The iterator will always yield at least one value, + /// namely `&self`. + /// + /// # Examples + /// + /// ``` + /// use ruff_db::system::SystemPath; + /// + /// let mut ancestors = SystemPath::new("/foo/bar").ancestors(); + /// assert_eq!(ancestors.next(), Some(SystemPath::new("/foo/bar"))); + /// assert_eq!(ancestors.next(), Some(SystemPath::new("/foo"))); + /// assert_eq!(ancestors.next(), Some(SystemPath::new("/"))); + /// assert_eq!(ancestors.next(), None); + /// + /// let mut ancestors = SystemPath::new("../foo/bar").ancestors(); + /// assert_eq!(ancestors.next(), Some(SystemPath::new("../foo/bar"))); + /// assert_eq!(ancestors.next(), Some(SystemPath::new("../foo"))); + /// assert_eq!(ancestors.next(), Some(SystemPath::new(".."))); + /// assert_eq!(ancestors.next(), Some(SystemPath::new(""))); + /// assert_eq!(ancestors.next(), None); + /// ``` + /// + /// [`parent`]: SystemPath::parent + #[inline] + pub fn ancestors(&self) -> impl Iterator { + self.0.ancestors().map(SystemPath::new) + } + /// Produces an iterator over the [`camino::Utf8Component`]s of the path. /// /// When parsing the path, there is a small amount of normalization: @@ -473,6 +528,10 @@ impl SystemPathBuf { self.0 } + pub fn into_std_path_buf(self) -> PathBuf { + self.0.into_std_path_buf() + } + #[inline] pub fn as_path(&self) -> &SystemPath { SystemPath::new(&self.0) diff --git a/crates/ruff_db/src/system/test.rs b/crates/ruff_db/src/system/test.rs index 4194c90dc0..5b318a113a 100644 --- a/crates/ruff_db/src/system/test.rs +++ b/crates/ruff_db/src/system/test.rs @@ -1,14 +1,14 @@ +use glob::PatternError; +use ruff_notebook::{Notebook, NotebookError}; +use ruff_python_trivia::textwrap; use std::any::Any; use std::panic::RefUnwindSafe; use std::sync::Arc; -use ruff_notebook::{Notebook, NotebookError}; -use ruff_python_trivia::textwrap; - use crate::files::File; use crate::system::{ - DirectoryEntry, MemoryFileSystem, Metadata, Result, System, SystemPath, SystemPathBuf, - SystemVirtualPath, + DirectoryEntry, GlobError, MemoryFileSystem, Metadata, Result, System, SystemPath, + SystemPathBuf, SystemVirtualPath, }; use crate::Db; @@ -121,6 +121,22 @@ impl System for TestSystem { } } + fn glob( + &self, + pattern: &str, + ) -> std::result::Result< + Box>>, + PatternError, + > { + match &self.inner { + TestSystemInner::Stub(fs) => { + let iterator = fs.glob(pattern)?; + Ok(Box::new(iterator)) + } + TestSystemInner::System(system) => system.glob(pattern), + } + } + fn as_any(&self) -> &dyn Any { self } @@ -142,7 +158,7 @@ impl System for TestSystem { fn canonicalize_path(&self, path: &SystemPath) -> Result { match &self.inner { TestSystemInner::System(fs) => fs.canonicalize_path(path), - TestSystemInner::Stub(fs) => Ok(fs.canonicalize(path)), + TestSystemInner::Stub(fs) => fs.canonicalize(path), } } }