diff --git a/crates/red_knot/src/args.rs b/crates/red_knot/src/args.rs index b28642bcc9..7b4a0319f4 100644 --- a/crates/red_knot/src/args.rs +++ b/crates/red_knot/src/args.rs @@ -32,6 +32,13 @@ pub(crate) enum Command { #[derive(Debug, Parser)] pub(crate) struct CheckCommand { + /// List of files or directories to check. + #[clap( + help = "List of files or directories to check [default: the project root]", + value_name = "PATH" + )] + pub paths: Vec, + /// Run the command within the given project directory. /// /// All `pyproject.toml` files will be discovered by walking up the directory tree from the given project directory, @@ -76,7 +83,7 @@ pub(crate) struct CheckCommand { #[arg(long)] pub(crate) exit_zero: bool, - /// Run in watch mode by re-running whenever files change. + /// Watch files for changes and recheck files related to the changed files. #[arg(long, short = 'W')] pub(crate) watch: bool, } diff --git a/crates/red_knot/src/main.rs b/crates/red_knot/src/main.rs index 3f41f5413f..07ee47f03e 100644 --- a/crates/red_knot/src/main.rs +++ b/crates/red_knot/src/main.rs @@ -107,6 +107,12 @@ fn run_check(args: CheckCommand) -> anyhow::Result { .transpose()? .unwrap_or_else(|| cwd.clone()); + let check_paths: Vec<_> = args + .paths + .iter() + .map(|path| SystemPath::absolute(path, &cwd)) + .collect(); + let system = OsSystem::new(cwd); let watch = args.watch; let exit_zero = args.exit_zero; @@ -118,6 +124,10 @@ fn run_check(args: CheckCommand) -> anyhow::Result { let mut db = ProjectDatabase::new(project_metadata, system)?; + if !check_paths.is_empty() { + db.project().set_included_paths(&mut db, check_paths); + } + let (main_loop, main_loop_cancellation_token) = MainLoop::new(cli_options); // Listen to Ctrl+C and abort the watch mode. @@ -255,14 +265,43 @@ impl MainLoop { Severity::Error }; - let failed = result - .iter() - .any(|diagnostic| diagnostic.severity() >= min_error_severity); - if check_revision == revision { + if db.project().files(db).is_empty() { + tracing::warn!("No python files found under the given path(s)"); + } + let mut stdout = stdout().lock(); - for diagnostic in result { - writeln!(stdout, "{}", diagnostic.display(db, &display_config))?; + + if result.is_empty() { + writeln!(stdout, "All checks passed!")?; + + if self.watcher.is_none() { + return Ok(ExitStatus::Success); + } + } else { + let mut failed = false; + let diagnostics_count = result.len(); + + for diagnostic in result { + writeln!(stdout, "{}", diagnostic.display(db, &display_config))?; + + failed |= diagnostic.severity() >= min_error_severity; + } + + writeln!( + stdout, + "Found {} diagnostic{}", + diagnostics_count, + if diagnostics_count > 1 { "s" } else { "" } + )?; + + if self.watcher.is_none() { + return Ok(if failed { + ExitStatus::Failure + } else { + ExitStatus::Success + }); + } } } else { tracing::debug!( @@ -270,14 +309,6 @@ impl MainLoop { ); } - if self.watcher.is_none() { - return Ok(if failed { - ExitStatus::Failure - } else { - ExitStatus::Success - }); - } - tracing::trace!("Counts after last check:\n{}", countme::get_all()); } @@ -322,7 +353,9 @@ impl MainLoopCancellationToken { enum MainLoopMessage { CheckWorkspace, CheckCompleted { + /// The diagnostics that were found during the check. result: Vec>, + revision: u64, }, ApplyChanges(Vec), diff --git a/crates/red_knot/tests/cli.rs b/crates/red_knot/tests/cli.rs index 3ac2d260bf..09ce6ab6bc 100644 --- a/crates/red_knot/tests/cli.rs +++ b/crates/red_knot/tests/cli.rs @@ -28,7 +28,7 @@ fn config_override() -> anyhow::Result<()> { ), ])?; - assert_cmd_snapshot!(case.command(), @r###" + assert_cmd_snapshot!(case.command(), @r" success: false exit_code: 1 ----- stdout ----- @@ -40,16 +40,18 @@ fn config_override() -> anyhow::Result<()> { | ^^^^^^^^^^^^ Type `` has no attribute `last_exc` | + Found 1 diagnostic ----- stderr ----- - "###); + "); assert_cmd_snapshot!(case.command().arg("--python-version").arg("3.12"), @r" - success: true - exit_code: 0 - ----- stdout ----- + success: true + exit_code: 0 + ----- stdout ----- + All checks passed! - ----- stderr ----- + ----- stderr ----- "); Ok(()) @@ -98,7 +100,7 @@ fn cli_arguments_are_relative_to_the_current_directory() -> anyhow::Result<()> { ])?; // Make sure that the CLI fails when the `libs` directory is not in the search path. - assert_cmd_snapshot!(case.command().current_dir(case.root().join("child")), @r###" + assert_cmd_snapshot!(case.command().current_dir(case.root().join("child")), @r" success: false exit_code: 1 ----- stdout ----- @@ -111,16 +113,18 @@ fn cli_arguments_are_relative_to_the_current_directory() -> anyhow::Result<()> { 4 | stat = add(10, 15) | + Found 1 diagnostic ----- stderr ----- - "###); + "); assert_cmd_snapshot!(case.command().current_dir(case.root().join("child")).arg("--extra-search-path").arg("../libs"), @r" - success: true - exit_code: 0 - ----- stdout ----- + success: true + exit_code: 0 + ----- stdout ----- + All checks passed! - ----- stderr ----- + ----- stderr ----- "); Ok(()) @@ -168,11 +172,12 @@ fn paths_in_configuration_files_are_relative_to_the_project_root() -> anyhow::Re ])?; assert_cmd_snapshot!(case.command().current_dir(case.root().join("child")), @r" - success: true - exit_code: 0 - ----- stdout ----- + success: true + exit_code: 0 + ----- stdout ----- + All checks passed! - ----- stderr ----- + ----- stderr ----- "); Ok(()) @@ -195,7 +200,7 @@ fn configuration_rule_severity() -> anyhow::Result<()> { // Assert that there's a possibly unresolved reference diagnostic // and that division-by-zero has a severity of error by default. - assert_cmd_snapshot!(case.command(), @r###" + assert_cmd_snapshot!(case.command(), @r" success: false exit_code: 1 ----- stdout ----- @@ -217,9 +222,10 @@ fn configuration_rule_severity() -> anyhow::Result<()> { | - Name `x` used when possibly not defined | + Found 2 diagnostics ----- stderr ----- - "###); + "); case.write_file( "pyproject.toml", @@ -230,7 +236,7 @@ fn configuration_rule_severity() -> anyhow::Result<()> { "#, )?; - assert_cmd_snapshot!(case.command(), @r###" + assert_cmd_snapshot!(case.command(), @r" success: true exit_code: 0 ----- stdout ----- @@ -243,9 +249,10 @@ fn configuration_rule_severity() -> anyhow::Result<()> { 4 | for a in range(0, y): | + Found 1 diagnostic ----- stderr ----- - "###); + "); Ok(()) } @@ -269,7 +276,7 @@ fn cli_rule_severity() -> anyhow::Result<()> { // Assert that there's a possibly unresolved reference diagnostic // and that division-by-zero has a severity of error by default. - assert_cmd_snapshot!(case.command(), @r###" + assert_cmd_snapshot!(case.command(), @r" success: false exit_code: 1 ----- stdout ----- @@ -302,9 +309,10 @@ fn cli_rule_severity() -> anyhow::Result<()> { | - Name `x` used when possibly not defined | + Found 3 diagnostics ----- stderr ----- - "###); + "); assert_cmd_snapshot!( case @@ -315,7 +323,7 @@ fn cli_rule_severity() -> anyhow::Result<()> { .arg("division-by-zero") .arg("--warn") .arg("unresolved-import"), - @r###" + @r" success: true exit_code: 0 ----- stdout ----- @@ -339,9 +347,10 @@ fn cli_rule_severity() -> anyhow::Result<()> { 6 | for a in range(0, y): | + Found 2 diagnostics ----- stderr ----- - "### + " ); Ok(()) @@ -365,7 +374,7 @@ fn cli_rule_severity_precedence() -> anyhow::Result<()> { // Assert that there's a possibly unresolved reference diagnostic // and that division-by-zero has a severity of error by default. - assert_cmd_snapshot!(case.command(), @r###" + assert_cmd_snapshot!(case.command(), @r" success: false exit_code: 1 ----- stdout ----- @@ -387,9 +396,10 @@ fn cli_rule_severity_precedence() -> anyhow::Result<()> { | - Name `x` used when possibly not defined | + Found 2 diagnostics ----- stderr ----- - "###); + "); assert_cmd_snapshot!( case @@ -401,7 +411,7 @@ fn cli_rule_severity_precedence() -> anyhow::Result<()> { // Override the error severity with warning .arg("--ignore") .arg("possibly-unresolved-reference"), - @r###" + @r" success: true exit_code: 0 ----- stdout ----- @@ -414,9 +424,10 @@ fn cli_rule_severity_precedence() -> anyhow::Result<()> { 4 | for a in range(0, y): | + Found 1 diagnostic ----- stderr ----- - "### + " ); Ok(()) @@ -436,7 +447,7 @@ fn configuration_unknown_rules() -> anyhow::Result<()> { ("test.py", "print(10)"), ])?; - assert_cmd_snapshot!(case.command(), @r###" + assert_cmd_snapshot!(case.command(), @r#" success: true exit_code: 0 ----- stdout ----- @@ -448,9 +459,10 @@ fn configuration_unknown_rules() -> anyhow::Result<()> { | --------------- Unknown lint rule `division-by-zer` | + Found 1 diagnostic ----- stderr ----- - "###); + "#); Ok(()) } @@ -460,15 +472,16 @@ fn configuration_unknown_rules() -> anyhow::Result<()> { fn cli_unknown_rules() -> anyhow::Result<()> { let case = TestCase::with_file("test.py", "print(10)")?; - assert_cmd_snapshot!(case.command().arg("--ignore").arg("division-by-zer"), @r###" + assert_cmd_snapshot!(case.command().arg("--ignore").arg("division-by-zer"), @r" success: true exit_code: 0 ----- stdout ----- warning: unknown-rule: Unknown lint rule `division-by-zer` + Found 1 diagnostic ----- stderr ----- - "###); + "); Ok(()) } @@ -477,7 +490,7 @@ fn cli_unknown_rules() -> anyhow::Result<()> { fn exit_code_only_warnings() -> anyhow::Result<()> { let case = TestCase::with_file("test.py", r"print(x) # [unresolved-reference]")?; - assert_cmd_snapshot!(case.command(), @r###" + assert_cmd_snapshot!(case.command(), @r" success: true exit_code: 0 ----- stdout ----- @@ -488,9 +501,10 @@ fn exit_code_only_warnings() -> anyhow::Result<()> { | - Name `x` used when not defined | + Found 1 diagnostic ----- stderr ----- - "###); + "); Ok(()) } @@ -505,7 +519,7 @@ fn exit_code_only_info() -> anyhow::Result<()> { "#, )?; - assert_cmd_snapshot!(case.command(), @r###" + assert_cmd_snapshot!(case.command(), @r" success: true exit_code: 0 ----- stdout ----- @@ -517,9 +531,10 @@ fn exit_code_only_info() -> anyhow::Result<()> { | -------------- info: Revealed type is `Literal[1]` | + Found 1 diagnostic ----- stderr ----- - "###); + "); Ok(()) } @@ -534,7 +549,7 @@ fn exit_code_only_info_and_error_on_warning_is_true() -> anyhow::Result<()> { "#, )?; - assert_cmd_snapshot!(case.command().arg("--error-on-warning"), @r###" + assert_cmd_snapshot!(case.command().arg("--error-on-warning"), @r" success: true exit_code: 0 ----- stdout ----- @@ -546,9 +561,10 @@ fn exit_code_only_info_and_error_on_warning_is_true() -> anyhow::Result<()> { | -------------- info: Revealed type is `Literal[1]` | + Found 1 diagnostic ----- stderr ----- - "###); + "); Ok(()) } @@ -557,7 +573,7 @@ fn exit_code_only_info_and_error_on_warning_is_true() -> anyhow::Result<()> { fn exit_code_no_errors_but_error_on_warning_is_true() -> anyhow::Result<()> { let case = TestCase::with_file("test.py", r"print(x) # [unresolved-reference]")?; - assert_cmd_snapshot!(case.command().arg("--error-on-warning"), @r###" + assert_cmd_snapshot!(case.command().arg("--error-on-warning"), @r" success: false exit_code: 1 ----- stdout ----- @@ -568,9 +584,10 @@ fn exit_code_no_errors_but_error_on_warning_is_true() -> anyhow::Result<()> { | - Name `x` used when not defined | + Found 1 diagnostic ----- stderr ----- - "###); + "); Ok(()) } @@ -588,7 +605,7 @@ fn exit_code_no_errors_but_error_on_warning_is_enabled_in_configuration() -> any ), ])?; - assert_cmd_snapshot!(case.command(), @r###" + assert_cmd_snapshot!(case.command(), @r" success: false exit_code: 1 ----- stdout ----- @@ -599,9 +616,10 @@ fn exit_code_no_errors_but_error_on_warning_is_enabled_in_configuration() -> any | - Name `x` used when not defined | + Found 1 diagnostic ----- stderr ----- - "###); + "); Ok(()) } @@ -616,7 +634,7 @@ fn exit_code_both_warnings_and_errors() -> anyhow::Result<()> { "#, )?; - assert_cmd_snapshot!(case.command(), @r###" + assert_cmd_snapshot!(case.command(), @r" success: false exit_code: 1 ----- stdout ----- @@ -636,9 +654,10 @@ fn exit_code_both_warnings_and_errors() -> anyhow::Result<()> { | ^ Cannot subscript object of type `Literal[4]` with no `__getitem__` method | + Found 2 diagnostics ----- stderr ----- - "###); + "); Ok(()) } @@ -653,7 +672,7 @@ fn exit_code_both_warnings_and_errors_and_error_on_warning_is_true() -> anyhow:: "###, )?; - assert_cmd_snapshot!(case.command().arg("--error-on-warning"), @r###" + assert_cmd_snapshot!(case.command().arg("--error-on-warning"), @r" success: false exit_code: 1 ----- stdout ----- @@ -673,9 +692,10 @@ fn exit_code_both_warnings_and_errors_and_error_on_warning_is_true() -> anyhow:: | ^ Cannot subscript object of type `Literal[4]` with no `__getitem__` method | + Found 2 diagnostics ----- stderr ----- - "###); + "); Ok(()) } @@ -690,7 +710,7 @@ fn exit_code_exit_zero_is_true() -> anyhow::Result<()> { "#, )?; - assert_cmd_snapshot!(case.command().arg("--exit-zero"), @r###" + assert_cmd_snapshot!(case.command().arg("--exit-zero"), @r" success: true exit_code: 0 ----- stdout ----- @@ -710,9 +730,10 @@ fn exit_code_exit_zero_is_true() -> anyhow::Result<()> { | ^ Cannot subscript object of type `Literal[4]` with no `__getitem__` method | + Found 2 diagnostics ----- stderr ----- - "###); + "); Ok(()) } @@ -749,7 +770,7 @@ fn user_configuration() -> anyhow::Result<()> { assert_cmd_snapshot!( case.command().current_dir(case.root().join("project")).env(config_env_var, config_directory.as_os_str()), - @r###" + @r" success: true exit_code: 0 ----- stdout ----- @@ -771,9 +792,10 @@ fn user_configuration() -> anyhow::Result<()> { | - Name `x` used when possibly not defined | + Found 2 diagnostics ----- stderr ----- - "### + " ); // The user-level configuration promotes `possibly-unresolved-reference` to an error. @@ -790,7 +812,7 @@ fn user_configuration() -> anyhow::Result<()> { assert_cmd_snapshot!( case.command().current_dir(case.root().join("project")).env(config_env_var, config_directory.as_os_str()), - @r###" + @r" success: false exit_code: 1 ----- stdout ----- @@ -812,9 +834,134 @@ fn user_configuration() -> anyhow::Result<()> { | ^ Name `x` used when possibly not defined | + Found 2 diagnostics ----- stderr ----- - "### + " + ); + + Ok(()) +} + +#[test] +fn check_specific_paths() -> anyhow::Result<()> { + let case = TestCase::with_files([ + ( + "project/main.py", + r#" + y = 4 / 0 # error: division-by-zero + "#, + ), + ( + "project/tests/test_main.py", + r#" + import does_not_exist # error: unresolved-import + "#, + ), + ( + "project/other.py", + r#" + from main2 import z # error: unresolved-import + + print(z) + "#, + ), + ])?; + + assert_cmd_snapshot!( + case.command(), + @r" + success: false + exit_code: 1 + ----- stdout ----- + error: lint:unresolved-import + --> /project/tests/test_main.py:2:8 + | + 2 | import does_not_exist # error: unresolved-import + | ^^^^^^^^^^^^^^ Cannot resolve import `does_not_exist` + | + + error: lint:division-by-zero + --> /project/main.py:2:5 + | + 2 | y = 4 / 0 # error: division-by-zero + | ^^^^^ Cannot divide object of type `Literal[4]` by zero + | + + error: lint:unresolved-import + --> /project/other.py:2:6 + | + 2 | from main2 import z # error: unresolved-import + | ^^^^^ Cannot resolve import `main2` + 3 | + 4 | print(z) + | + + Found 3 diagnostics + + ----- stderr ----- + " + ); + + // Now check only the `tests` and `other.py` files. + // We should no longer see any diagnostics related to `main.py`. + assert_cmd_snapshot!( + case.command().arg("project/tests").arg("project/other.py"), + @r" + success: false + exit_code: 1 + ----- stdout ----- + error: lint:unresolved-import + --> /project/tests/test_main.py:2:8 + | + 2 | import does_not_exist # error: unresolved-import + | ^^^^^^^^^^^^^^ Cannot resolve import `does_not_exist` + | + + error: lint:unresolved-import + --> /project/other.py:2:6 + | + 2 | from main2 import z # error: unresolved-import + | ^^^^^ Cannot resolve import `main2` + 3 | + 4 | print(z) + | + + Found 2 diagnostics + + ----- stderr ----- + " + ); + + Ok(()) +} + +#[test] +fn check_non_existing_path() -> anyhow::Result<()> { + let case = TestCase::with_files([])?; + + let mut settings = insta::Settings::clone_current(); + settings.add_filter( + ®ex::escape("The system cannot find the path specified. (os error 3)"), + "No such file or directory (os error 2)", + ); + let _s = settings.bind_to_scope(); + + assert_cmd_snapshot!( + case.command().arg("project/main.py").arg("project/tests"), + @r" + success: false + exit_code: 1 + ----- stdout ----- + error: io: `/project/main.py`: No such file or directory (os error 2) + + error: io: `/project/tests`: No such file or directory (os error 2) + + Found 2 diagnostics + + ----- stderr ----- + WARN No python files found under the given path(s) + " ); Ok(()) diff --git a/crates/red_knot/tests/file_watching.rs b/crates/red_knot/tests/file_watching.rs index a0a77f6719..5c6a78f21c 100644 --- a/crates/red_knot/tests/file_watching.rs +++ b/crates/red_knot/tests/file_watching.rs @@ -1,5 +1,6 @@ #![allow(clippy::disallowed_names)] +use std::collections::HashSet; use std::io::Write; use std::time::{Duration, Instant}; @@ -193,11 +194,29 @@ impl TestCase { Ok(()) } - fn collect_project_files(&self) -> Vec { - let files = self.db().project().files(self.db()); - let mut collected: Vec<_> = files.into_iter().collect(); - collected.sort_unstable_by_key(|file| file.path(self.db()).as_system_path().unwrap()); - collected + #[track_caller] + fn assert_indexed_project_files(&self, expected: impl IntoIterator) { + let mut expected: HashSet<_> = expected.into_iter().collect(); + + let actual = self.db().project().files(self.db()); + for file in &actual { + assert!( + expected.remove(&file), + "Indexed project files contains '{}' which was not expected.", + file.path(self.db()) + ); + } + + if !expected.is_empty() { + let paths: Vec<_> = expected + .iter() + .map(|file| file.path(self.db()).as_str()) + .collect(); + panic!( + "Indexed project files are missing the following files: {:?}", + paths.join(", ") + ); + } } fn system_file(&self, path: impl AsRef) -> Result { @@ -222,13 +241,15 @@ where } } -trait SetupFiles { - fn setup(self, context: &SetupContext) -> anyhow::Result<()>; +trait Setup { + fn setup(self, context: &mut SetupContext) -> anyhow::Result<()>; } struct SetupContext<'a> { system: &'a OsSystem, root_path: &'a SystemPath, + options: Option, + included_paths: Option>, } impl<'a> SetupContext<'a> { @@ -251,55 +272,77 @@ impl<'a> SetupContext<'a> { fn join_root_path(&self, relative: impl AsRef) -> SystemPathBuf { self.root_path().join(relative) } + + fn write_project_file( + &self, + relative_path: impl AsRef, + content: &str, + ) -> anyhow::Result<()> { + let relative_path = relative_path.as_ref(); + let absolute_path = self.join_project_path(relative_path); + Self::write_file_impl(absolute_path, content) + } + + fn write_file( + &self, + relative_path: impl AsRef, + content: &str, + ) -> anyhow::Result<()> { + let relative_path = relative_path.as_ref(); + let absolute_path = self.join_root_path(relative_path); + Self::write_file_impl(absolute_path, content) + } + + fn write_file_impl(path: impl AsRef, content: &str) -> anyhow::Result<()> { + let path = path.as_ref(); + if let Some(parent) = path.parent() { + std::fs::create_dir_all(parent) + .with_context(|| format!("Failed to create parent directory for file `{path}`"))?; + } + + let mut file = std::fs::File::create(path.as_std_path()) + .with_context(|| format!("Failed to open file `{path}`"))?; + file.write_all(content.as_bytes()) + .with_context(|| format!("Failed to write to file `{path}`"))?; + file.sync_data()?; + + Ok(()) + } + + fn set_options(&mut self, options: Options) { + self.options = Some(options); + } + + fn set_included_paths(&mut self, paths: Vec) { + self.included_paths = Some(paths); + } } -impl SetupFiles for [(P, &'static str); N] +impl Setup for [(P, &'static str); N] where P: AsRef, { - fn setup(self, context: &SetupContext) -> anyhow::Result<()> { + fn setup(self, context: &mut SetupContext) -> anyhow::Result<()> { for (relative_path, content) in self { - let relative_path = relative_path.as_ref(); - let absolute_path = context.join_project_path(relative_path); - if let Some(parent) = absolute_path.parent() { - std::fs::create_dir_all(parent).with_context(|| { - format!("Failed to create parent directory for file `{relative_path}`") - })?; - } - - let mut file = std::fs::File::create(absolute_path.as_std_path()) - .with_context(|| format!("Failed to open file `{relative_path}`"))?; - file.write_all(content.as_bytes()) - .with_context(|| format!("Failed to write to file `{relative_path}`"))?; - file.sync_data()?; + context.write_project_file(relative_path, content)?; } Ok(()) } } -impl SetupFiles for F +impl Setup for F where - F: FnOnce(&SetupContext) -> anyhow::Result<()>, + F: FnOnce(&mut SetupContext) -> anyhow::Result<()>, { - fn setup(self, context: &SetupContext) -> anyhow::Result<()> { + fn setup(self, context: &mut SetupContext) -> anyhow::Result<()> { self(context) } } fn setup(setup_files: F) -> anyhow::Result where - F: SetupFiles, -{ - setup_with_options(setup_files, |_context| None) -} - -fn setup_with_options( - setup_files: F, - create_options: impl FnOnce(&SetupContext) -> Option, -) -> anyhow::Result -where - F: SetupFiles, + F: Setup, { let temp_dir = tempfile::tempdir()?; @@ -325,16 +368,18 @@ where .with_context(|| format!("Failed to create project directory `{project_path}`"))?; let system = OsSystem::new(&project_path); - let setup_context = SetupContext { + let mut setup_context = SetupContext { system: &system, root_path: &root_path, + options: None, + included_paths: None, }; setup_files - .setup(&setup_context) + .setup(&mut setup_context) .context("Failed to setup test files")?; - if let Some(options) = create_options(&setup_context) { + if let Some(options) = setup_context.options { std::fs::write( project_path.join("pyproject.toml").as_std_path(), toml::to_string(&PyProject { @@ -348,6 +393,8 @@ where .context("Failed to write configuration")?; } + let included_paths = setup_context.included_paths; + let mut project = ProjectMetadata::discover(&project_path, &system)?; project.apply_configuration_files(&system)?; @@ -363,7 +410,11 @@ where .with_context(|| format!("Failed to create search path `{path}`"))?; } - let db = ProjectDatabase::new(project, system)?; + let mut db = ProjectDatabase::new(project, system)?; + + if let Some(included_paths) = included_paths { + db.project().set_included_paths(&mut db, included_paths); + } let (sender, receiver) = crossbeam::channel::unbounded(); let watcher = directory_watcher(move |events| sender.send(events).unwrap()) @@ -425,7 +476,7 @@ fn new_file() -> anyhow::Result<()> { let foo_path = case.project_path("foo.py"); assert_eq!(case.system_file(&foo_path), Err(FileError::NotFound)); - assert_eq!(&case.collect_project_files(), &[bar_file]); + case.assert_indexed_project_files([bar_file]); std::fs::write(foo_path.as_std_path(), "print('Hello')")?; @@ -435,7 +486,7 @@ fn new_file() -> anyhow::Result<()> { let foo = case.system_file(&foo_path).expect("foo.py to exist."); - assert_eq!(&case.collect_project_files(), &[bar_file, foo]); + case.assert_indexed_project_files([bar_file, foo]); Ok(()) } @@ -448,7 +499,7 @@ fn new_ignored_file() -> anyhow::Result<()> { let foo_path = case.project_path("foo.py"); assert_eq!(case.system_file(&foo_path), Err(FileError::NotFound)); - assert_eq!(&case.collect_project_files(), &[bar_file]); + case.assert_indexed_project_files([bar_file]); std::fs::write(foo_path.as_std_path(), "print('Hello')")?; @@ -457,15 +508,16 @@ fn new_ignored_file() -> anyhow::Result<()> { case.apply_changes(changes); assert!(case.system_file(&foo_path).is_ok()); - assert_eq!(&case.collect_project_files(), &[bar_file]); + case.assert_indexed_project_files([bar_file]); Ok(()) } #[test] fn new_non_project_file() -> anyhow::Result<()> { - let mut case = setup_with_options([("bar.py", "")], |context| { - Some(Options { + let mut case = setup(|context: &mut SetupContext| { + context.write_project_file("bar.py", "")?; + context.set_options(Options { environment: Some(EnvironmentOptions { extra_paths: Some(vec![RelativePathBuf::cli( context.join_root_path("site_packages"), @@ -473,13 +525,15 @@ fn new_non_project_file() -> anyhow::Result<()> { ..EnvironmentOptions::default() }), ..Options::default() - }) + }); + + Ok(()) })?; let bar_path = case.project_path("bar.py"); let bar_file = case.system_file(&bar_path).unwrap(); - assert_eq!(&case.collect_project_files(), &[bar_file]); + case.assert_indexed_project_files([bar_file]); // Add a file to site packages let black_path = case.root_path().join("site_packages/black.py"); @@ -492,7 +546,94 @@ fn new_non_project_file() -> anyhow::Result<()> { assert!(case.system_file(&black_path).is_ok()); // The file should not have been added to the project files - assert_eq!(&case.collect_project_files(), &[bar_file]); + case.assert_indexed_project_files([bar_file]); + + Ok(()) +} + +#[test] +fn new_files_with_explicit_included_paths() -> anyhow::Result<()> { + let mut case = setup(|context: &mut SetupContext| { + context.write_project_file("src/main.py", "")?; + context.write_project_file("src/sub/__init__.py", "")?; + context.write_project_file("src/test.py", "")?; + context.set_included_paths(vec![ + context.join_project_path("src/main.py"), + context.join_project_path("src/sub"), + ]); + Ok(()) + })?; + + let main_path = case.project_path("src/main.py"); + let main_file = case.system_file(&main_path).unwrap(); + + let sub_init_path = case.project_path("src/sub/__init__.py"); + let sub_init = case.system_file(&sub_init_path).unwrap(); + + case.assert_indexed_project_files([main_file, sub_init]); + + // Write a new file to `sub` which is an included path + let sub_a_path = case.project_path("src/sub/a.py"); + std::fs::write(sub_a_path.as_std_path(), "print('Hello')")?; + + // and write a second file in the root directory -- this should not be included + let test2_path = case.project_path("src/test2.py"); + std::fs::write(test2_path.as_std_path(), "print('Hello')")?; + + let changes = case.stop_watch(event_for_file("test2.py")); + + case.apply_changes(changes); + + let sub_a_file = case.system_file(&sub_a_path).expect("sub/a.py to exist"); + + case.assert_indexed_project_files([main_file, sub_init, sub_a_file]); + + Ok(()) +} + +#[test] +fn new_file_in_included_out_of_project_directory() -> anyhow::Result<()> { + let mut case = setup(|context: &mut SetupContext| { + context.write_project_file("src/main.py", "")?; + context.write_project_file("script.py", "")?; + context.write_file("outside_project/a.py", "")?; + + context.set_included_paths(vec![ + context.join_root_path("outside_project"), + context.join_project_path("src"), + ]); + Ok(()) + })?; + + let main_path = case.project_path("src/main.py"); + let main_file = case.system_file(&main_path).unwrap(); + + let outside_a_path = case.root_path().join("outside_project/a.py"); + let outside_a = case.system_file(&outside_a_path).unwrap(); + + case.assert_indexed_project_files([outside_a, main_file]); + + // Write a new file to `src` which should be watched + let src_a = case.project_path("src/a.py"); + std::fs::write(src_a.as_std_path(), "print('Hello')")?; + + // and write a second file to `outside_project` which should be watched too + let outside_b_path = case.root_path().join("outside_project/b.py"); + std::fs::write(outside_b_path.as_std_path(), "print('Hello')")?; + + // and a third file in the project's root that should not be included + let script2_path = case.project_path("script2.py"); + std::fs::write(script2_path.as_std_path(), "print('Hello')")?; + + let changes = case.stop_watch(event_for_file("script2.py")); + + case.apply_changes(changes); + + let src_a_file = case.system_file(&src_a).unwrap(); + let outside_b_file = case.system_file(&outside_b_path).unwrap(); + + // The file should not have been added to the project files + case.assert_indexed_project_files([main_file, outside_a, outside_b_file, src_a_file]); Ok(()) } @@ -505,7 +646,7 @@ fn changed_file() -> anyhow::Result<()> { let foo = case.system_file(&foo_path)?; assert_eq!(source_text(case.db(), foo).as_str(), foo_source); - assert_eq!(&case.collect_project_files(), &[foo]); + case.assert_indexed_project_files([foo]); update_file(&foo_path, "print('Version 2')")?; @@ -516,7 +657,7 @@ fn changed_file() -> anyhow::Result<()> { case.apply_changes(changes); assert_eq!(source_text(case.db(), foo).as_str(), "print('Version 2')"); - assert_eq!(&case.collect_project_files(), &[foo]); + case.assert_indexed_project_files([foo]); Ok(()) } @@ -530,7 +671,7 @@ fn deleted_file() -> anyhow::Result<()> { let foo = case.system_file(&foo_path)?; assert!(foo.exists(case.db())); - assert_eq!(&case.collect_project_files(), &[foo]); + case.assert_indexed_project_files([foo]); std::fs::remove_file(foo_path.as_std_path())?; @@ -539,7 +680,7 @@ fn deleted_file() -> anyhow::Result<()> { case.apply_changes(changes); assert!(!foo.exists(case.db())); - assert_eq!(&case.collect_project_files(), &[] as &[File]); + case.assert_indexed_project_files([]); Ok(()) } @@ -559,7 +700,7 @@ fn move_file_to_trash() -> anyhow::Result<()> { let foo = case.system_file(&foo_path)?; assert!(foo.exists(case.db())); - assert_eq!(&case.collect_project_files(), &[foo]); + case.assert_indexed_project_files([foo]); std::fs::rename( foo_path.as_std_path(), @@ -571,7 +712,7 @@ fn move_file_to_trash() -> anyhow::Result<()> { case.apply_changes(changes); assert!(!foo.exists(case.db())); - assert_eq!(&case.collect_project_files(), &[] as &[File]); + case.assert_indexed_project_files([]); Ok(()) } @@ -589,7 +730,7 @@ fn move_file_to_project() -> anyhow::Result<()> { let foo_in_project = case.project_path("foo.py"); assert!(case.system_file(&foo_path).is_ok()); - assert_eq!(&case.collect_project_files(), &[bar]); + case.assert_indexed_project_files([bar]); std::fs::rename(foo_path.as_std_path(), foo_in_project.as_std_path())?; @@ -600,7 +741,7 @@ fn move_file_to_project() -> anyhow::Result<()> { let foo_in_project = case.system_file(&foo_in_project)?; assert!(foo_in_project.exists(case.db())); - assert_eq!(&case.collect_project_files(), &[bar, foo_in_project]); + case.assert_indexed_project_files([bar, foo_in_project]); Ok(()) } @@ -614,7 +755,7 @@ fn rename_file() -> anyhow::Result<()> { let foo = case.system_file(&foo_path)?; - assert_eq!(case.collect_project_files(), [foo]); + case.assert_indexed_project_files([foo]); std::fs::rename(foo_path.as_std_path(), bar_path.as_std_path())?; @@ -627,7 +768,7 @@ fn rename_file() -> anyhow::Result<()> { let bar = case.system_file(&bar_path)?; assert!(bar.exists(case.db())); - assert_eq!(case.collect_project_files(), [bar]); + case.assert_indexed_project_files([bar]); Ok(()) } @@ -653,7 +794,7 @@ fn directory_moved_to_project() -> anyhow::Result<()> { ); assert_eq!(sub_a_module, None); - assert_eq!(case.collect_project_files(), &[bar]); + case.assert_indexed_project_files([bar]); let sub_new_path = case.project_path("sub"); std::fs::rename(sub_original_path.as_std_path(), sub_new_path.as_std_path()) @@ -677,7 +818,7 @@ fn directory_moved_to_project() -> anyhow::Result<()> { ) .is_some()); - assert_eq!(case.collect_project_files(), &[bar, init_file, a_file]); + case.assert_indexed_project_files([bar, init_file, a_file]); Ok(()) } @@ -705,7 +846,7 @@ fn directory_moved_to_trash() -> anyhow::Result<()> { .system_file(sub_path.join("a.py")) .expect("a.py to exist"); - assert_eq!(case.collect_project_files(), &[bar, init_file, a_file]); + case.assert_indexed_project_files([bar, init_file, a_file]); std::fs::create_dir(case.root_path().join(".trash").as_std_path())?; let trashed_sub = case.root_path().join(".trash/sub"); @@ -726,7 +867,7 @@ fn directory_moved_to_trash() -> anyhow::Result<()> { assert!(!init_file.exists(case.db())); assert!(!a_file.exists(case.db())); - assert_eq!(case.collect_project_files(), &[bar]); + case.assert_indexed_project_files([bar]); Ok(()) } @@ -760,7 +901,7 @@ fn directory_renamed() -> anyhow::Result<()> { .system_file(sub_path.join("a.py")) .expect("a.py to exist"); - assert_eq!(case.collect_project_files(), &[bar, sub_init, sub_a]); + case.assert_indexed_project_files([bar, sub_init, sub_a]); let foo_baz = case.project_path("foo/baz"); @@ -802,10 +943,7 @@ fn directory_renamed() -> anyhow::Result<()> { assert!(foo_baz_init.exists(case.db())); assert!(foo_baz_a.exists(case.db())); - assert_eq!( - case.collect_project_files(), - &[bar, foo_baz_init, foo_baz_a] - ); + case.assert_indexed_project_files([bar, foo_baz_init, foo_baz_a]); Ok(()) } @@ -834,7 +972,7 @@ fn directory_deleted() -> anyhow::Result<()> { let a_file = case .system_file(sub_path.join("a.py")) .expect("a.py to exist"); - assert_eq!(case.collect_project_files(), &[bar, init_file, a_file]); + case.assert_indexed_project_files([bar, init_file, a_file]); std::fs::remove_dir_all(sub_path.as_std_path()) .with_context(|| "Failed to remove the sub directory")?; @@ -852,15 +990,17 @@ fn directory_deleted() -> anyhow::Result<()> { assert!(!init_file.exists(case.db())); assert!(!a_file.exists(case.db())); - assert_eq!(case.collect_project_files(), &[bar]); + case.assert_indexed_project_files([bar]); Ok(()) } #[test] fn search_path() -> anyhow::Result<()> { - let mut case = setup_with_options([("bar.py", "import sub.a")], |context| { - Some(Options { + let mut case = setup(|context: &mut SetupContext| { + context.write_project_file("bar.py", "import sub.a")?; + + context.set_options(Options { environment: Some(EnvironmentOptions { extra_paths: Some(vec![RelativePathBuf::cli( context.join_root_path("site_packages"), @@ -868,7 +1008,8 @@ fn search_path() -> anyhow::Result<()> { ..EnvironmentOptions::default() }), ..Options::default() - }) + }); + Ok(()) })?; let site_packages = case.root_path().join("site_packages"); @@ -885,10 +1026,7 @@ fn search_path() -> anyhow::Result<()> { case.apply_changes(changes); assert!(resolve_module(case.db().upcast(), &ModuleName::new_static("a").unwrap()).is_some()); - assert_eq!( - case.collect_project_files(), - &[case.system_file(case.project_path("bar.py")).unwrap()] - ); + case.assert_indexed_project_files([case.system_file(case.project_path("bar.py")).unwrap()]); Ok(()) } @@ -925,8 +1063,9 @@ fn add_search_path() -> anyhow::Result<()> { #[test] fn remove_search_path() -> anyhow::Result<()> { - let mut case = setup_with_options([("bar.py", "import sub.a")], |context| { - Some(Options { + let mut case = setup(|context: &mut SetupContext| { + context.write_project_file("bar.py", "import sub.a")?; + context.set_options(Options { environment: Some(EnvironmentOptions { extra_paths: Some(vec![RelativePathBuf::cli( context.join_root_path("site_packages"), @@ -934,7 +1073,9 @@ fn remove_search_path() -> anyhow::Result<()> { ..EnvironmentOptions::default() }), ..Options::default() - }) + }); + + Ok(()) })?; // Remove site packages from the search path settings. @@ -957,30 +1098,30 @@ fn remove_search_path() -> anyhow::Result<()> { #[test] fn change_python_version_and_platform() -> anyhow::Result<()> { - let mut case = setup_with_options( + let mut case = setup(|context: &mut SetupContext| { // `sys.last_exc` is a Python 3.12 only feature // `os.getegid()` is Unix only - [( + context.write_project_file( "bar.py", r#" import sys import os print(sys.last_exc, os.getegid()) "#, - )], - |_context| { - Some(Options { - environment: Some(EnvironmentOptions { - python_version: Some(RangedValue::cli(PythonVersion::PY311)), - python_platform: Some(RangedValue::cli(PythonPlatform::Identifier( - "win32".to_string(), - ))), - ..EnvironmentOptions::default() - }), - ..Options::default() - }) - }, - )?; + )?; + context.set_options(Options { + environment: Some(EnvironmentOptions { + python_version: Some(RangedValue::cli(PythonVersion::PY311)), + python_platform: Some(RangedValue::cli(PythonPlatform::Identifier( + "win32".to_string(), + ))), + ..EnvironmentOptions::default() + }), + ..Options::default() + }); + + Ok(()) + })?; let diagnostics = case.db.check().context("Failed to check project.")?; @@ -1015,38 +1156,35 @@ print(sys.last_exc, os.getegid()) #[test] fn changed_versions_file() -> anyhow::Result<()> { - let mut case = setup_with_options( - |context: &SetupContext| { - std::fs::write( - context.join_project_path("bar.py").as_std_path(), - "import sub.a", - )?; - std::fs::create_dir_all(context.join_root_path("typeshed/stdlib").as_std_path())?; - std::fs::write( - context - .join_root_path("typeshed/stdlib/VERSIONS") - .as_std_path(), - "", - )?; - std::fs::write( - context - .join_root_path("typeshed/stdlib/os.pyi") - .as_std_path(), - "# not important", - )?; + let mut case = setup(|context: &mut SetupContext| { + std::fs::write( + context.join_project_path("bar.py").as_std_path(), + "import sub.a", + )?; + std::fs::create_dir_all(context.join_root_path("typeshed/stdlib").as_std_path())?; + std::fs::write( + context + .join_root_path("typeshed/stdlib/VERSIONS") + .as_std_path(), + "", + )?; + std::fs::write( + context + .join_root_path("typeshed/stdlib/os.pyi") + .as_std_path(), + "# not important", + )?; - Ok(()) - }, - |context| { - Some(Options { - environment: Some(EnvironmentOptions { - typeshed: Some(RelativePathBuf::cli(context.join_root_path("typeshed"))), - ..EnvironmentOptions::default() - }), - ..Options::default() - }) - }, - )?; + context.set_options(Options { + environment: Some(EnvironmentOptions { + typeshed: Some(RelativePathBuf::cli(context.join_root_path("typeshed"))), + ..EnvironmentOptions::default() + }), + ..Options::default() + }); + + Ok(()) + })?; // Unset the custom typeshed directory. assert_eq!( @@ -1091,7 +1229,7 @@ fn changed_versions_file() -> anyhow::Result<()> { /// we're seeing is that Windows only emits a single event, similar to Linux. #[test] fn hard_links_in_project() -> anyhow::Result<()> { - let mut case = setup(|context: &SetupContext| { + let mut case = setup(|context: &mut SetupContext| { let foo_path = context.join_project_path("foo.py"); std::fs::write(foo_path.as_std_path(), "print('Version 1')")?; @@ -1110,7 +1248,7 @@ fn hard_links_in_project() -> anyhow::Result<()> { assert_eq!(source_text(case.db(), foo).as_str(), "print('Version 1')"); assert_eq!(source_text(case.db(), bar).as_str(), "print('Version 1')"); - assert_eq!(case.collect_project_files(), &[bar, foo]); + case.assert_indexed_project_files([bar, foo]); // Write to the hard link target. update_file(foo_path, "print('Version 2')").context("Failed to update foo.py")?; @@ -1163,7 +1301,7 @@ fn hard_links_in_project() -> anyhow::Result<()> { ignore = "windows doesn't support observing changes to hard linked files." )] fn hard_links_to_target_outside_project() -> anyhow::Result<()> { - let mut case = setup(|context: &SetupContext| { + let mut case = setup(|context: &mut SetupContext| { let foo_path = context.join_root_path("foo.py"); std::fs::write(foo_path.as_std_path(), "print('Version 1')")?; @@ -1271,7 +1409,7 @@ mod unix { ignore = "FSEvents doesn't emit change events for symlinked directories outside of the watched paths." )] fn symlink_target_outside_watched_paths() -> anyhow::Result<()> { - let mut case = setup(|context: &SetupContext| { + let mut case = setup(|context: &mut SetupContext| { // Set up the symlink target. let link_target = context.join_root_path("bar"); std::fs::create_dir_all(link_target.as_std_path()) @@ -1352,7 +1490,7 @@ mod unix { /// ``` #[test] fn symlink_inside_project() -> anyhow::Result<()> { - let mut case = setup(|context: &SetupContext| { + let mut case = setup(|context: &mut SetupContext| { // Set up the symlink target. let link_target = context.join_project_path("patched/bar"); std::fs::create_dir_all(link_target.as_std_path()) @@ -1390,7 +1528,7 @@ mod unix { ); assert_eq!(baz.file().path(case.db()).as_system_path(), Some(&*bar_baz)); - assert_eq!(case.collect_project_files(), &[patched_bar_baz_file]); + case.assert_indexed_project_files([patched_bar_baz_file]); // Write to the symlink target. update_file(&patched_bar_baz, "def baz(): print('Version 2')") @@ -1427,7 +1565,7 @@ mod unix { bar_baz_text = bar_baz_text.as_str() ); - assert_eq!(case.collect_project_files(), &[patched_bar_baz_file]); + case.assert_indexed_project_files([patched_bar_baz_file]); Ok(()) } @@ -1445,43 +1583,39 @@ mod unix { /// ``` #[test] fn symlinked_module_search_path() -> anyhow::Result<()> { - let mut case = setup_with_options( - |context: &SetupContext| { - // Set up the symlink target. - let site_packages = context.join_root_path("site-packages"); - let bar = site_packages.join("bar"); - std::fs::create_dir_all(bar.as_std_path()) - .context("Failed to create bar directory")?; - let baz_original = bar.join("baz.py"); - std::fs::write(baz_original.as_std_path(), "def baz(): ...") - .context("Failed to write baz.py")?; + let mut case = setup(|context: &mut SetupContext| { + // Set up the symlink target. + let site_packages = context.join_root_path("site-packages"); + let bar = site_packages.join("bar"); + std::fs::create_dir_all(bar.as_std_path()).context("Failed to create bar directory")?; + let baz_original = bar.join("baz.py"); + std::fs::write(baz_original.as_std_path(), "def baz(): ...") + .context("Failed to write baz.py")?; - // Symlink the site packages in the venv to the global site packages - let venv_site_packages = - context.join_project_path(".venv/lib/python3.12/site-packages"); - std::fs::create_dir_all(venv_site_packages.parent().unwrap()) - .context("Failed to create .venv directory")?; - std::os::unix::fs::symlink( - site_packages.as_std_path(), - venv_site_packages.as_std_path(), - ) - .context("Failed to create symlink to site-packages")?; + // Symlink the site packages in the venv to the global site packages + let venv_site_packages = + context.join_project_path(".venv/lib/python3.12/site-packages"); + std::fs::create_dir_all(venv_site_packages.parent().unwrap()) + .context("Failed to create .venv directory")?; + std::os::unix::fs::symlink( + site_packages.as_std_path(), + venv_site_packages.as_std_path(), + ) + .context("Failed to create symlink to site-packages")?; - Ok(()) - }, - |_context| { - Some(Options { - environment: Some(EnvironmentOptions { - extra_paths: Some(vec![RelativePathBuf::cli( - ".venv/lib/python3.12/site-packages", - )]), - python_version: Some(RangedValue::cli(PythonVersion::PY312)), - ..EnvironmentOptions::default() - }), - ..Options::default() - }) - }, - )?; + context.set_options(Options { + environment: Some(EnvironmentOptions { + extra_paths: Some(vec![RelativePathBuf::cli( + ".venv/lib/python3.12/site-packages", + )]), + python_version: Some(RangedValue::cli(PythonVersion::PY312)), + ..EnvironmentOptions::default() + }), + ..Options::default() + }); + + Ok(()) + })?; let baz = resolve_module( case.db().upcast(), @@ -1508,7 +1642,7 @@ mod unix { Some(&*baz_original) ); - assert_eq!(case.collect_project_files(), &[]); + case.assert_indexed_project_files([]); // Write to the symlink target. update_file(&baz_original, "def baz(): print('Version 2')") @@ -1535,7 +1669,7 @@ mod unix { "def baz(): print('Version 2')" ); - assert_eq!(case.collect_project_files(), &[]); + case.assert_indexed_project_files([]); Ok(()) } @@ -1543,7 +1677,7 @@ mod unix { #[test] fn nested_projects_delete_root() -> anyhow::Result<()> { - let mut case = setup(|context: &SetupContext| { + let mut case = setup(|context: &mut SetupContext| { std::fs::write( context.join_project_path("pyproject.toml").as_std_path(), r#" @@ -1585,7 +1719,7 @@ fn nested_projects_delete_root() -> anyhow::Result<()> { fn changes_to_user_configuration() -> anyhow::Result<()> { let mut _config_dir_override: Option = None; - let mut case = setup(|context: &SetupContext| { + let mut case = setup(|context: &mut SetupContext| { std::fs::write( context.join_project_path("pyproject.toml").as_std_path(), r#" diff --git a/crates/red_knot_project/src/db/changes.rs b/crates/red_knot_project/src/db/changes.rs index a910afc516..66b6552da1 100644 --- a/crates/red_knot_project/src/db/changes.rs +++ b/crates/red_knot_project/src/db/changes.rs @@ -2,20 +2,20 @@ use crate::db::{Db, ProjectDatabase}; use crate::metadata::options::Options; use crate::watch::{ChangeEvent, CreatedKind, DeletedKind}; use crate::{Project, ProjectMetadata}; +use std::collections::BTreeSet; +use crate::walk::ProjectFilesWalker; use red_knot_python_semantic::Program; -use ruff_db::files::{system_path_to_file, File, Files}; -use ruff_db::system::walk_directory::WalkState; +use ruff_db::files::{File, Files}; use ruff_db::system::SystemPath; use ruff_db::Db as _; -use ruff_python_ast::PySourceType; use rustc_hash::FxHashSet; impl ProjectDatabase { #[tracing::instrument(level = "debug", skip(self, changes, cli_options))] pub fn apply_changes(&mut self, changes: Vec, cli_options: Option<&Options>) { let mut project = self.project(); - let project_path = project.root(self).to_path_buf(); + let project_root = project.root(self).to_path_buf(); let program = Program::get(self); let custom_stdlib_versions_path = program .custom_stdlib_search_path(self) @@ -30,7 +30,7 @@ impl ProjectDatabase { // Deduplicate the `sync` calls. Many file watchers emit multiple events for the same path. let mut synced_files = FxHashSet::default(); - let mut synced_recursively = FxHashSet::default(); + let mut sync_recursively = BTreeSet::default(); let mut sync_path = |db: &mut ProjectDatabase, path: &SystemPath| { if synced_files.insert(path.to_path_buf()) { @@ -38,13 +38,9 @@ impl ProjectDatabase { } }; - let mut sync_recursively = |db: &mut ProjectDatabase, path: &SystemPath| { - if synced_recursively.insert(path.to_path_buf()) { - Files::sync_recursively(db, path); - } - }; - for change in changes { + tracing::trace!("Handle change: {:?}", change); + if let Some(path) = change.system_path() { if matches!( path.file_name(), @@ -70,16 +66,27 @@ impl ProjectDatabase { match kind { CreatedKind::File => sync_path(self, &path), CreatedKind::Directory | CreatedKind::Any => { - sync_recursively(self, &path); + sync_recursively.insert(path.clone()); } } - if self.system().is_file(&path) { - // Add the parent directory because `walkdir` always visits explicitly passed files - // even if they match an exclude filter. - added_paths.insert(path.parent().unwrap().to_path_buf()); - } else { - added_paths.insert(path); + // Unlike other files, it's not only important to update the status of existing + // and known `File`s (`sync_recursively`), it's also important to discover new files + // that were added in the project's root (or any of the paths included for checking). + // + // This is important because `Project::check` iterates over all included files. + // The code below walks the `added_paths` and adds all files that + // should be included in the project. We can skip this check for + // paths that aren't part of the project or shouldn't be included + // when checking the project. + if project.is_path_included(self, &path) { + if self.system().is_file(&path) { + // Add the parent directory because `walkdir` always visits explicitly passed files + // even if they match an exclude filter. + added_paths.insert(path.parent().unwrap().to_path_buf()); + } else { + added_paths.insert(path); + } } } @@ -103,7 +110,7 @@ impl ProjectDatabase { project.remove_file(self, file); } } else { - sync_recursively(self, &path); + sync_recursively.insert(path.clone()); if custom_stdlib_versions_path .as_ref() @@ -112,11 +119,19 @@ impl ProjectDatabase { custom_stdlib_change = true; } - // Perform a full-reload in case the deleted directory contained the pyproject.toml. - // We may want to make this more clever in the future, to e.g. iterate over the - // indexed files and remove the once that start with the same path, unless - // the deleted path is the project configuration. - project_changed = true; + if project.is_path_included(self, &path) || path == project_root { + // TODO: Shouldn't it be enough to simply traverse the project files and remove all + // that start with the given path? + tracing::debug!( + "Reload project because of a path that could have been a directory." + ); + + // Perform a full-reload in case the deleted directory contained the pyproject.toml. + // We may want to make this more clever in the future, to e.g. iterate over the + // indexed files and remove the once that start with the same path, unless + // the deleted path is the project configuration. + project_changed = true; + } } } @@ -133,13 +148,29 @@ impl ProjectDatabase { ChangeEvent::Rescan => { project_changed = true; Files::sync_all(self); + sync_recursively.clear(); break; } } } + let sync_recursively = sync_recursively.into_iter(); + let mut last = None; + + for path in sync_recursively { + // Avoid re-syncing paths that are sub-paths of each other. + if let Some(last) = &last { + if path.starts_with(last) { + continue; + } + } + + Files::sync_recursively(self, &path); + last = Some(path); + } + if project_changed { - match ProjectMetadata::discover(&project_path, self.system()) { + match ProjectMetadata::discover(&project_root, self.system()) { Ok(mut metadata) => { if let Some(cli_options) = cli_options { metadata.apply_cli_options(cli_options.clone()); @@ -186,51 +217,24 @@ impl ProjectDatabase { } } - let mut added_paths = added_paths.into_iter(); + let diagnostics = if let Some(walker) = ProjectFilesWalker::incremental(self, added_paths) { + // Use directory walking to discover newly added files. + let (files, diagnostics) = walker.collect_vec(self); - // Use directory walking to discover newly added files. - if let Some(path) = added_paths.next() { - let mut walker = self.system().walk_directory(&path); - - for extra_path in added_paths { - walker = walker.add(&extra_path); + for file in files { + project.add_file(self, file); } - let added_paths = std::sync::Mutex::new(Vec::default()); + diagnostics + } else { + Vec::new() + }; - walker.run(|| { - Box::new(|entry| { - let Ok(entry) = entry else { - return WalkState::Continue; - }; - - if !entry.file_type().is_file() { - return WalkState::Continue; - } - - if entry.path().starts_with(&project_path) - && entry - .path() - .extension() - .and_then(PySourceType::try_from_extension) - .is_some() - { - let mut paths = added_paths.lock().unwrap(); - - paths.push(entry.into_path()); - } - - WalkState::Continue - }) - }); - - for path in added_paths.into_inner().unwrap() { - let file = system_path_to_file(self, &path); - - if let Ok(file) = file { - project.add_file(self, file); - } - } - } + // Note: We simply replace all IO related diagnostics here. This isn't ideal, because + // it removes IO errors that may still be relevant. However, tracking IO errors correctly + // across revisions doesn't feel essential, considering that they're rare. However, we could + // implement a `BTreeMap` or similar and only prune the diagnostics from paths that we've + // re-scanned (or that were removed etc). + project.replace_index_diagnostics(self, diagnostics); } } diff --git a/crates/red_knot_project/src/files.rs b/crates/red_knot_project/src/files.rs index 46707cf14f..6935577ba2 100644 --- a/crates/red_knot_project/src/files.rs +++ b/crates/red_knot_project/src/files.rs @@ -8,10 +8,7 @@ use salsa::Setter; use ruff_db::files::File; use crate::db::Db; -use crate::Project; - -/// Cheap cloneable hash set of files. -type FileSet = Arc>; +use crate::{IOErrorDiagnostic, Project}; /// The indexed files of a project. /// @@ -35,9 +32,9 @@ impl IndexedFiles { } } - fn indexed(files: FileSet) -> Self { + fn indexed(inner: Arc) -> Self { Self { - state: std::sync::Mutex::new(State::Indexed(files)), + state: std::sync::Mutex::new(State::Indexed(inner)), } } @@ -46,8 +43,8 @@ impl IndexedFiles { match &*state { State::Lazy => Index::Lazy(LazyFiles { files: state }), - State::Indexed(files) => Index::Indexed(Indexed { - files: Arc::clone(files), + State::Indexed(inner) => Index::Indexed(Indexed { + inner: Arc::clone(inner), _lifetime: PhantomData, }), } @@ -94,7 +91,7 @@ impl IndexedFiles { Some(IndexedMut { db: Some(db), project, - files: indexed, + indexed, did_change: false, }) } @@ -112,7 +109,7 @@ enum State { Lazy, /// The files are indexed. Stores the known files of a package. - Indexed(FileSet), + Indexed(Arc), } pub(super) enum Index<'db> { @@ -129,32 +126,48 @@ pub(super) struct LazyFiles<'db> { impl<'db> LazyFiles<'db> { /// Sets the indexed files of a package to `files`. - pub(super) fn set(mut self, files: FxHashSet) -> Indexed<'db> { + pub(super) fn set( + mut self, + files: FxHashSet, + diagnostics: Vec, + ) -> Indexed<'db> { let files = Indexed { - files: Arc::new(files), + inner: Arc::new(IndexedInner { files, diagnostics }), _lifetime: PhantomData, }; - *self.files = State::Indexed(Arc::clone(&files.files)); + *self.files = State::Indexed(Arc::clone(&files.inner)); files } } -/// The indexed files of a package. +/// The indexed files of the project. /// /// Note: This type is intentionally non-cloneable. Making it cloneable requires /// revisiting the locking behavior in [`IndexedFiles::indexed_mut`]. -#[derive(Debug, PartialEq, Eq)] +#[derive(Debug)] pub struct Indexed<'db> { - files: FileSet, + inner: Arc, // Preserve the lifetime of `PackageFiles`. _lifetime: PhantomData<&'db ()>, } +#[derive(Debug)] +struct IndexedInner { + files: FxHashSet, + diagnostics: Vec, +} + +impl Indexed<'_> { + pub(super) fn diagnostics(&self) -> &[IOErrorDiagnostic] { + &self.inner.diagnostics + } +} + impl Deref for Indexed<'_> { type Target = FxHashSet; fn deref(&self) -> &Self::Target { - &self.files + &self.inner.files } } @@ -165,7 +178,7 @@ impl<'a> IntoIterator for &'a Indexed<'_> { type IntoIter = IndexedIter<'a>; fn into_iter(self) -> Self::IntoIter { - self.files.iter().copied() + self.inner.files.iter().copied() } } @@ -176,13 +189,13 @@ impl<'a> IntoIterator for &'a Indexed<'_> { pub(super) struct IndexedMut<'db> { db: Option<&'db mut dyn Db>, project: Project, - files: FileSet, + indexed: Arc, did_change: bool, } impl IndexedMut<'_> { pub(super) fn insert(&mut self, file: File) -> bool { - if self.files_mut().insert(file) { + if self.inner_mut().files.insert(file) { self.did_change = true; true } else { @@ -191,7 +204,7 @@ impl IndexedMut<'_> { } pub(super) fn remove(&mut self, file: File) -> bool { - if self.files_mut().remove(&file) { + if self.inner_mut().files.remove(&file) { self.did_change = true; true } else { @@ -199,8 +212,13 @@ impl IndexedMut<'_> { } } - fn files_mut(&mut self) -> &mut FxHashSet { - Arc::get_mut(&mut self.files).expect("All references to `FilesSet` to have been dropped") + pub(super) fn set_diagnostics(&mut self, diagnostics: Vec) { + self.inner_mut().diagnostics = diagnostics; + } + + fn inner_mut(&mut self) -> &mut IndexedInner { + Arc::get_mut(&mut self.indexed) + .expect("All references to `FilesSet` should have been dropped") } fn set_impl(&mut self) { @@ -208,16 +226,16 @@ impl IndexedMut<'_> { return; }; - let files = Arc::clone(&self.files); + let indexed = Arc::clone(&self.indexed); if self.did_change { // If there are changes, set the new file_set to trigger a salsa revision change. self.project .set_file_set(db) - .to(IndexedFiles::indexed(files)); + .to(IndexedFiles::indexed(indexed)); } else { // The `indexed_mut` replaced the `state` with Lazy. Restore it back to the indexed state. - *self.project.file_set(db).state.lock().unwrap() = State::Indexed(files); + *self.project.file_set(db).state.lock().unwrap() = State::Indexed(indexed); } } } @@ -252,7 +270,7 @@ mod tests { let file = system_path_to_file(&db, "test.py").unwrap(); let files = match project.file_set(&db).get() { - Index::Lazy(lazy) => lazy.set(FxHashSet::from_iter([file])), + Index::Lazy(lazy) => lazy.set(FxHashSet::from_iter([file]), Vec::new()), Index::Indexed(files) => files, }; diff --git a/crates/red_knot_project/src/lib.rs b/crates/red_knot_project/src/lib.rs index 2126f36a84..ab0e286727 100644 --- a/crates/red_knot_project/src/lib.rs +++ b/crates/red_knot_project/src/lib.rs @@ -1,6 +1,7 @@ #![allow(clippy::ref_option)] use crate::metadata::options::OptionDiagnostic; +use crate::walk::{ProjectFilesFilter, ProjectFilesWalker}; pub use db::{Db, ProjectDatabase}; use files::{Index, Indexed, IndexedFiles}; use metadata::settings::Settings; @@ -9,23 +10,23 @@ use red_knot_python_semantic::lint::{LintRegistry, LintRegistryBuilder, RuleSele use red_knot_python_semantic::register_lints; use red_knot_python_semantic::types::check_types; use ruff_db::diagnostic::{Diagnostic, DiagnosticId, ParseDiagnostic, Severity, Span}; -use ruff_db::files::{system_path_to_file, File}; +use ruff_db::files::File; use ruff_db::parsed::parsed_module; use ruff_db::source::{source_text, SourceTextError}; -use ruff_db::system::walk_directory::WalkState; -use ruff_db::system::{FileType, SystemPath}; -use ruff_python_ast::PySourceType; -use rustc_hash::{FxBuildHasher, FxHashSet}; +use ruff_db::system::{SystemPath, SystemPathBuf}; +use rustc_hash::FxHashSet; use salsa::Durability; use salsa::Setter; use std::borrow::Cow; use std::sync::Arc; +use thiserror::Error; pub mod combine; mod db; mod files; pub mod metadata; +mod walk; pub mod watch; pub static DEFAULT_LINT_REGISTRY: std::sync::LazyLock = @@ -71,6 +72,30 @@ pub struct Project { #[return_ref] pub settings: Settings, + /// The paths that should be included when checking this project. + /// + /// The default (when this list is empty) is to include all files in the project root + /// (that satisfy the configured include and exclude patterns). + /// However, it's sometimes desired to only check a subset of the project, e.g. to see + /// the diagnostics for a single file or a folder. + /// + /// This list gets initialized by the paths passed to `knot check ` + /// + /// ## How is this different from `open_files`? + /// + /// The `included_paths` is closely related to `open_files`. The only difference is that + /// `open_files` is already a resolved set of files whereas `included_paths` is only a list of paths + /// that are resolved to files by indexing them. The other difference is that + /// new files added to any directory in `included_paths` will be indexed and added to the project + /// whereas `open_files` needs to be updated manually (e.g. by the IDE). + /// + /// In short, `open_files` is cheaper in contexts where the set of files is known, like + /// in an IDE when the user only wants to check the open tabs. This could be modeled + /// with `included_paths` too but it would require an explicit walk dir step that's simply unnecessary. + #[default] + #[return_ref] + included_paths_list: Vec, + /// Diagnostics that were generated when resolving the project settings. #[return_ref] settings_diagnostics: Vec, @@ -106,6 +131,16 @@ impl Project { self.settings(db).to_rules() } + /// Returns `true` if `path` is both part of the project and included (see `included_paths_list`). + /// + /// Unlike [Self::files], this method does not respect `.gitignore` files. It only checks + /// the project's include and exclude settings as well as the paths that were passed to `knot check `. + /// This means, that this method is an over-approximation of `Self::files` and may return `true` for paths + /// that won't be included when checking the project because they're ignored in a `.gitignore` file. + pub fn is_path_included(self, db: &dyn Db, path: &SystemPath) -> bool { + ProjectFilesFilter::from_project(db, self).is_included(path) + } + pub fn reload(self, db: &mut dyn Db, metadata: ProjectMetadata) { tracing::debug!("Reloading project"); assert_eq!(self.root(db), metadata.root()); @@ -140,6 +175,13 @@ impl Project { diagnostic })); + let files = ProjectFiles::new(db, self); + + diagnostics.extend(files.diagnostics().iter().cloned().map(|diagnostic| { + let diagnostic: Box = Box::new(diagnostic); + diagnostic + })); + let result = Arc::new(std::sync::Mutex::new(diagnostics)); let inner_result = Arc::clone(&result); @@ -147,7 +189,6 @@ impl Project { let project_span = project_span.clone(); rayon::scope(move |scope| { - let files = ProjectFiles::new(&db, self); for file in &files { let result = inner_result.clone(); let db = db.clone(); @@ -207,6 +248,30 @@ impl Project { removed } + pub fn set_included_paths(self, db: &mut dyn Db, paths: Vec) { + tracing::debug!("Setting included paths: {paths}", paths = paths.len()); + + self.set_included_paths_list(db).to(paths); + self.reload_files(db); + } + + /// Returns the paths that should be checked. + /// + /// The default is to check the entire project in which case this method returns + /// the project root. However, users can specify to only check specific sub-folders or + /// even files of a project by using `knot check `. In that case, this method + /// returns the provided absolute paths. + /// + /// Note: The CLI doesn't prohibit users from specifying paths outside the project root. + /// This can be useful to check arbitrary files, but it isn't something we recommend. + /// We should try to support this use case but it's okay if there are some limitations around it. + fn included_paths_or_root(self, db: &dyn Db) -> &[SystemPathBuf] { + match &**self.included_paths_list(db) { + [] => std::slice::from_ref(&self.metadata(db).root), + paths => paths, + } + } + /// Returns the open files in the project or `None` if the entire project should be checked. pub fn open_files(self, db: &dyn Db) -> Option<&FxHashSet> { self.open_fileset(db).as_deref() @@ -289,6 +354,17 @@ impl Project { index.insert(file); } + /// Replaces the diagnostics from indexing the project files with `diagnostics`. + /// + /// This is a no-op if the project files haven't been indexed yet. + pub fn replace_index_diagnostics(self, db: &mut dyn Db, diagnostics: Vec) { + let Some(mut index) = IndexedFiles::indexed_mut(db, self) else { + return; + }; + + index.set_diagnostics(diagnostics); + } + /// Returns the files belonging to this project. pub fn files(self, db: &dyn Db) -> Indexed<'_> { let files = self.file_set(db); @@ -296,12 +372,14 @@ impl Project { let indexed = match files.get() { Index::Lazy(vacant) => { let _entered = - tracing::debug_span!("Project::index_files", package = %self.name(db)) + tracing::debug_span!("Project::index_files", project = %self.name(db)) .entered(); - let files = discover_project_files(db, self); - tracing::info!("Found {} files in project `{}`", files.len(), self.name(db)); - vacant.set(files) + let walker = ProjectFilesWalker::new(db); + let (files, diagnostics) = walker.collect_set(db); + + tracing::info!("Indexed {} file(s)", files.len()); + vacant.set(files, diagnostics) } Index::Indexed(indexed) => indexed, }; @@ -327,8 +405,8 @@ fn check_file_impl(db: &dyn Db, file: File) -> Vec> { if let Some(read_error) = source.read_error() { diagnostics.push(Box::new(IOErrorDiagnostic { - file, - error: read_error.clone(), + file: Some(file), + error: read_error.clone().into(), })); return diagnostics; } @@ -355,53 +433,6 @@ fn check_file_impl(db: &dyn Db, file: File) -> Vec> { diagnostics } -fn discover_project_files(db: &dyn Db, project: Project) -> FxHashSet { - let paths = std::sync::Mutex::new(Vec::new()); - - db.system().walk_directory(project.root(db)).run(|| { - Box::new(|entry| { - match entry { - Ok(entry) => { - // Skip over any non python files to avoid creating too many entries in `Files`. - match entry.file_type() { - FileType::File => { - if entry - .path() - .extension() - .and_then(PySourceType::try_from_extension) - .is_some() - { - let mut paths = paths.lock().unwrap(); - paths.push(entry.into_path()); - } - } - FileType::Directory | FileType::Symlink => {} - } - } - Err(error) => { - // TODO Handle error - tracing::error!("Failed to walk path: {error}"); - } - } - - WalkState::Continue - }) - }); - - let paths = paths.into_inner().unwrap(); - let mut files = FxHashSet::with_capacity_and_hasher(paths.len(), FxBuildHasher); - - for path in paths { - // If this returns `None`, then the file was deleted between the `walk_directory` call and now. - // We can ignore this. - if let Ok(file) = system_path_to_file(db.upcast(), &path) { - files.insert(file); - } - } - - files -} - #[derive(Debug)] enum ProjectFiles<'a> { OpenFiles(&'a FxHashSet), @@ -416,6 +447,13 @@ impl<'a> ProjectFiles<'a> { ProjectFiles::Indexed(project.files(db)) } } + + fn diagnostics(&self) -> &[IOErrorDiagnostic] { + match self { + ProjectFiles::OpenFiles(_) => &[], + ProjectFiles::Indexed(indexed) => indexed.diagnostics(), + } + } } impl<'a> IntoIterator for &'a ProjectFiles<'a> { @@ -448,10 +486,10 @@ impl Iterator for ProjectFilesIter<'_> { } } -#[derive(Debug)] +#[derive(Debug, Clone)] pub struct IOErrorDiagnostic { - file: File, - error: SourceTextError, + file: Option, + error: IOErrorKind, } impl Diagnostic for IOErrorDiagnostic { @@ -464,7 +502,7 @@ impl Diagnostic for IOErrorDiagnostic { } fn span(&self) -> Option { - Some(Span::from(self.file)) + self.file.map(Span::from) } fn severity(&self) -> Severity { @@ -472,6 +510,15 @@ impl Diagnostic for IOErrorDiagnostic { } } +#[derive(Error, Debug, Clone)] +enum IOErrorKind { + #[error(transparent)] + Walk(#[from] walk::WalkError), + + #[error(transparent)] + SourceText(#[from] SourceTextError), +} + #[cfg(test)] mod tests { use crate::db::tests::TestDb; diff --git a/crates/red_knot_project/src/walk.rs b/crates/red_knot_project/src/walk.rs new file mode 100644 index 0000000000..531c823a28 --- /dev/null +++ b/crates/red_knot_project/src/walk.rs @@ -0,0 +1,256 @@ +use crate::{Db, IOErrorDiagnostic, IOErrorKind, Project}; +use ruff_db::files::{system_path_to_file, File}; +use ruff_db::system::walk_directory::{ErrorKind, WalkDirectoryBuilder, WalkState}; +use ruff_db::system::{FileType, SystemPath, SystemPathBuf}; +use ruff_python_ast::PySourceType; +use rustc_hash::{FxBuildHasher, FxHashSet}; +use std::path::PathBuf; +use thiserror::Error; + +/// Filter that decides which files are included in the project. +/// +/// In the future, this will hold a reference to the `include` and `exclude` pattern. +/// +/// This struct mainly exists because `dyn Db` isn't `Send` or `Sync`, making it impossible +/// to access fields from within the walker. +#[derive(Default, Debug)] +pub(crate) struct ProjectFilesFilter<'a> { + /// The same as [`Project::included_paths_or_root`]. + included_paths: &'a [SystemPathBuf], + + /// The filter skips checking if the path is in `included_paths` if set to `true`. + /// + /// Skipping this check is useful when the walker only walks over `included_paths`. + skip_included_paths: bool, +} + +impl<'a> ProjectFilesFilter<'a> { + pub(crate) fn from_project(db: &'a dyn Db, project: Project) -> Self { + Self { + included_paths: project.included_paths_or_root(db), + skip_included_paths: false, + } + } + + /// Returns `true` if a file is part of the project and included in the paths to check. + /// + /// A file is included in the checked files if it is a sub path of the project's root + /// (when no CLI path arguments are specified) or if it is a sub path of any path provided on the CLI (`knot check `) AND: + /// + /// * It matches a positive `include` pattern and isn't excluded by a later negative `include` pattern. + /// * It doesn't match a positive `exclude` pattern or is re-included by a later negative `exclude` pattern. + /// + /// ## Note + /// + /// This method may return `true` for files that don't end up being included when walking the + /// project tree because it doesn't consider `.gitignore` and other ignore files when deciding + /// if a file's included. + pub(crate) fn is_included(&self, path: &SystemPath) -> bool { + #[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord)] + enum CheckPathMatch { + /// The path is a partial match of the checked path (it's a sub path) + Partial, + + /// The path matches a check path exactly. + Full, + } + + let m = if self.skip_included_paths { + Some(CheckPathMatch::Partial) + } else { + self.included_paths + .iter() + .filter_map(|included_path| { + if let Ok(relative_path) = path.strip_prefix(included_path) { + // Exact matches are always included + if relative_path.as_str().is_empty() { + Some(CheckPathMatch::Full) + } else { + Some(CheckPathMatch::Partial) + } + } else { + None + } + }) + .max() + }; + + match m { + None => false, + Some(CheckPathMatch::Partial) => { + // TODO: For partial matches, only include the file if it is included by the project's include/exclude settings. + true + } + Some(CheckPathMatch::Full) => true, + } + } +} + +pub(crate) struct ProjectFilesWalker<'a> { + walker: WalkDirectoryBuilder, + + filter: ProjectFilesFilter<'a>, +} + +impl<'a> ProjectFilesWalker<'a> { + pub(crate) fn new(db: &'a dyn Db) -> Self { + let project = db.project(); + + let mut filter = ProjectFilesFilter::from_project(db, project); + // It's unnecessary to filter on included paths because it only iterates over those to start with. + filter.skip_included_paths = true; + + Self::from_paths(db, project.included_paths_or_root(db), filter) + .expect("included_paths_or_root to never return an empty iterator") + } + + /// Creates a walker for indexing the project files incrementally. + /// + /// The main difference to a full project walk is that `paths` may contain paths + /// that aren't part of the included files. + pub(crate) fn incremental

(db: &'a dyn Db, paths: impl IntoIterator) -> Option + where + P: AsRef, + { + let project = db.project(); + + let filter = ProjectFilesFilter::from_project(db, project); + + Self::from_paths(db, paths, filter) + } + + fn from_paths

( + db: &'a dyn Db, + paths: impl IntoIterator, + filter: ProjectFilesFilter<'a>, + ) -> Option + where + P: AsRef, + { + let mut paths = paths.into_iter(); + + let mut walker = db.system().walk_directory(paths.next()?.as_ref()); + + for path in paths { + walker = walker.add(path); + } + + Some(Self { walker, filter }) + } + + /// Walks the project paths and collects the paths of all files that + /// are included in the project. + pub(crate) fn walk_paths(self) -> (Vec, Vec) { + let paths = std::sync::Mutex::new(Vec::new()); + let diagnostics = std::sync::Mutex::new(Vec::new()); + + self.walker.run(|| { + Box::new(|entry| { + match entry { + Ok(entry) => { + if !self.filter.is_included(entry.path()) { + tracing::debug!("Ignoring not-included path: {}", entry.path()); + return WalkState::Skip; + } + + // Skip over any non python files to avoid creating too many entries in `Files`. + match entry.file_type() { + FileType::File => { + if entry + .path() + .extension() + .and_then(PySourceType::try_from_extension) + .is_some() + { + let mut paths = paths.lock().unwrap(); + paths.push(entry.into_path()); + } + } + FileType::Directory | FileType::Symlink => {} + } + } + Err(error) => match error.kind() { + ErrorKind::Loop { .. } => { + unreachable!("Loops shouldn't be possible without following symlinks.") + } + ErrorKind::Io { path, err } => { + let mut diagnostics = diagnostics.lock().unwrap(); + let error = if let Some(path) = path { + WalkError::IOPathError { + path: path.clone(), + error: err.to_string(), + } + } else { + WalkError::IOError { + error: err.to_string(), + } + }; + + diagnostics.push(IOErrorDiagnostic { + file: None, + error: IOErrorKind::Walk(error), + }); + } + ErrorKind::NonUtf8Path { path } => { + diagnostics.lock().unwrap().push(IOErrorDiagnostic { + file: None, + error: IOErrorKind::Walk(WalkError::NonUtf8Path { + path: path.clone(), + }), + }); + } + }, + } + + WalkState::Continue + }) + }); + + ( + paths.into_inner().unwrap(), + diagnostics.into_inner().unwrap(), + ) + } + + pub(crate) fn collect_vec(self, db: &dyn Db) -> (Vec, Vec) { + let (paths, diagnostics) = self.walk_paths(); + + ( + paths + .into_iter() + .filter_map(move |path| { + // If this returns `None`, then the file was deleted between the `walk_directory` call and now. + // We can ignore this. + system_path_to_file(db.upcast(), &path).ok() + }) + .collect(), + diagnostics, + ) + } + + pub(crate) fn collect_set(self, db: &dyn Db) -> (FxHashSet, Vec) { + let (paths, diagnostics) = self.walk_paths(); + + let mut files = FxHashSet::with_capacity_and_hasher(paths.len(), FxBuildHasher); + + for path in paths { + if let Ok(file) = system_path_to_file(db.upcast(), &path) { + files.insert(file); + } + } + + (files, diagnostics) + } +} + +#[derive(Error, Debug, Clone)] +pub(crate) enum WalkError { + #[error("`{path}`: {error}")] + IOPathError { path: SystemPathBuf, error: String }, + + #[error("Failed to walk project directory: {error}")] + IOError { error: String }, + + #[error("`{path}` is not a valid UTF-8 path")] + NonUtf8Path { path: PathBuf }, +} diff --git a/crates/red_knot_project/src/watch/project_watcher.rs b/crates/red_knot_project/src/watch/project_watcher.rs index d9fb2d9610..8179575919 100644 --- a/crates/red_knot_project/src/watch/project_watcher.rs +++ b/crates/red_knot_project/src/watch/project_watcher.rs @@ -6,7 +6,7 @@ use tracing::info; use red_knot_python_semantic::system_module_search_paths; use ruff_cache::{CacheKey, CacheKeyHasher}; use ruff_db::system::{SystemPath, SystemPathBuf}; -use ruff_db::{Db as _, Upcast}; +use ruff_db::Upcast; use crate::db::{Db, ProjectDatabase}; use crate::watch::Watcher; @@ -42,9 +42,9 @@ impl ProjectWatcher { pub fn update(&mut self, db: &ProjectDatabase) { let search_paths: Vec<_> = system_module_search_paths(db.upcast()).collect(); - let project_path = db.project().root(db).to_path_buf(); + let project_path = db.project().root(db); - let new_cache_key = Self::compute_cache_key(&project_path, &search_paths); + let new_cache_key = Self::compute_cache_key(project_path, &search_paths); if self.cache_key == Some(new_cache_key) { return; @@ -68,41 +68,47 @@ impl ProjectWatcher { self.has_errored_paths = false; - let project_path = db - .system() - .canonicalize_path(&project_path) - .unwrap_or(project_path); - let config_paths = db .project() .metadata(db) .extra_configuration_paths() .iter() - .cloned(); + .map(SystemPathBuf::as_path); + + // Watch both the project root and any paths provided by the user on the CLI (removing any redundant nested paths). + // This is necessary to observe changes to files that are outside the project root. + // We always need to watch the project root to observe changes to its configuration. + let included_paths = ruff_db::system::deduplicate_nested_paths( + std::iter::once(project_path).chain( + db.project() + .included_paths_list(db) + .iter() + .map(SystemPathBuf::as_path), + ), + ); // Find the non-overlapping module search paths and filter out paths that are already covered by the project. // Module search paths are already canonicalized. let unique_module_paths = ruff_db::system::deduplicate_nested_paths( search_paths .into_iter() - .filter(|path| !path.starts_with(&project_path)), - ) - .map(SystemPath::to_path_buf); + .filter(|path| !path.starts_with(project_path)), + ); // Now add the new paths, first starting with the project path and then // adding the library search paths, and finally the paths for configurations. - for path in std::iter::once(project_path) + for path in included_paths .chain(unique_module_paths) .chain(config_paths) { // Log a warning. It's not worth aborting if registering a single folder fails because // Ruff otherwise stills works as expected. - if let Err(error) = self.watcher.watch(&path) { + if let Err(error) = self.watcher.watch(path) { // TODO: Log a user-facing warning. tracing::warn!("Failed to setup watcher for path `{path}`: {error}. You have to restart Ruff after making changes to files under this path or you might see stale results."); self.has_errored_paths = true; } else { - self.watched_paths.push(path); + self.watched_paths.push(path.to_path_buf()); } } diff --git a/scripts/knot_benchmark/src/benchmark/cases.py b/scripts/knot_benchmark/src/benchmark/cases.py index eb444c5d13..c95350dba5 100644 --- a/scripts/knot_benchmark/src/benchmark/cases.py +++ b/scripts/knot_benchmark/src/benchmark/cases.py @@ -68,12 +68,7 @@ class Knot(Tool): ) def cold_command(self, project: Project, venv: Venv) -> Command: - command = [str(self.path), "check", "-v"] - - assert len(project.include) < 2, "Knot doesn't support multiple source folders" - - if project.include: - command.extend(["--project", project.include[0]]) + command = [str(self.path), "check", "-v", *project.include] command.extend(["--python", str(venv.path)])