#![allow(clippy::ref_option)] use crate::metadata::options::OptionDiagnostic; use crate::walk::{ProjectFilesFilter, ProjectFilesWalker}; pub use db::{Db, ProjectDatabase}; use files::{Index, Indexed, IndexedFiles}; use metadata::settings::Settings; pub use metadata::{ProjectMetadata, ProjectMetadataError}; use ruff_db::diagnostic::{ Annotation, Diagnostic, DiagnosticId, Severity, Span, SubDiagnostic, create_parse_diagnostic, create_unsupported_syntax_diagnostic, }; use ruff_db::files::File; use ruff_db::parsed::parsed_module; use ruff_db::source::{SourceTextError, source_text}; use ruff_db::system::{SystemPath, SystemPathBuf}; use rustc_hash::FxHashSet; use salsa::Durability; use salsa::Setter; use std::backtrace::BacktraceStatus; use std::panic::{AssertUnwindSafe, UnwindSafe}; use std::sync::Arc; use thiserror::Error; use tracing::error; use ty_python_semantic::lint::{LintRegistry, LintRegistryBuilder, RuleSelection}; use ty_python_semantic::types::check_types; use ty_python_semantic::{add_inferred_python_version_hint_to_diagnostic, register_lints}; pub mod combine; mod db; mod files; pub mod metadata; mod walk; pub mod watch; pub static DEFAULT_LINT_REGISTRY: std::sync::LazyLock = std::sync::LazyLock::new(default_lints_registry); pub fn default_lints_registry() -> LintRegistry { let mut builder = LintRegistryBuilder::default(); register_lints(&mut builder); builder.build() } /// The project as a Salsa ingredient. /// /// ## How is a project different from a program? /// There are two (related) motivations: /// /// 1. Program is defined in `ruff_db` and it can't reference the settings types for the linter and formatter /// without introducing a cyclic dependency. The project is defined in a higher level crate /// where it can reference these setting types. /// 2. Running `ruff check` with different target versions results in different programs (settings) but /// it remains the same project. That's why program is a narrowed view of the project only /// holding on to the most fundamental settings required for checking. #[salsa::input] pub struct Project { /// The files that are open in the project. /// /// Setting the open files to a non-`None` value changes `check` to only check the /// open files rather than all files in the project. #[returns(as_deref)] #[default] open_fileset: Option>>, /// The first-party files of this project. #[default] #[returns(ref)] file_set: IndexedFiles, /// The metadata describing the project, including the unresolved options. #[returns(ref)] pub metadata: ProjectMetadata, /// The resolved project settings. #[returns(ref)] pub settings: Settings, /// The paths that should be included when checking this project. /// /// The default (when this list is empty) is to include all files in the project root /// (that satisfy the configured include and exclude patterns). /// However, it's sometimes desired to only check a subset of the project, e.g. to see /// the diagnostics for a single file or a folder. /// /// This list gets initialized by the paths passed to `ty check ` /// /// ## How is this different from `open_files`? /// /// The `included_paths` is closely related to `open_files`. The only difference is that /// `open_files` is already a resolved set of files whereas `included_paths` is only a list of paths /// that are resolved to files by indexing them. The other difference is that /// new files added to any directory in `included_paths` will be indexed and added to the project /// whereas `open_files` needs to be updated manually (e.g. by the IDE). /// /// In short, `open_files` is cheaper in contexts where the set of files is known, like /// in an IDE when the user only wants to check the open tabs. This could be modeled /// with `included_paths` too but it would require an explicit walk dir step that's simply unnecessary. #[default] #[returns(deref)] included_paths_list: Vec, /// Diagnostics that were generated when resolving the project settings. #[returns(deref)] settings_diagnostics: Vec, } /// A progress reporter. pub trait Reporter: Send + Sync { /// Initialize the reporter with the number of files. fn set_files(&mut self, files: usize); /// Report the completion of a given file. fn report_file(&self, file: &File); } /// A no-op implementation of [`Reporter`]. #[derive(Default)] pub struct DummyReporter; impl Reporter for DummyReporter { fn set_files(&mut self, _files: usize) {} fn report_file(&self, _file: &File) {} } #[salsa::tracked] impl Project { pub fn from_metadata(db: &dyn Db, metadata: ProjectMetadata) -> Self { let (settings, settings_diagnostics) = metadata.options().to_settings(db); Project::builder(metadata, settings, settings_diagnostics) .durability(Durability::MEDIUM) .open_fileset_durability(Durability::LOW) .file_set_durability(Durability::LOW) .new(db) } pub fn root(self, db: &dyn Db) -> &SystemPath { self.metadata(db).root() } pub fn name(self, db: &dyn Db) -> &str { self.metadata(db).name() } /// Returns the resolved linter rules for the project. /// /// This is a salsa query to prevent re-computing queries if other, unrelated /// settings change. For example, we don't want that changing the terminal settings /// invalidates any type checking queries. #[salsa::tracked(returns(deref))] pub fn rules(self, db: &dyn Db) -> Arc { self.settings(db).to_rules() } /// Returns `true` if `path` is both part of the project and included (see `included_paths_list`). /// /// Unlike [Self::files], this method does not respect `.gitignore` files. It only checks /// the project's include and exclude settings as well as the paths that were passed to `ty check `. /// This means, that this method is an over-approximation of `Self::files` and may return `true` for paths /// that won't be included when checking the project because they're ignored in a `.gitignore` file. pub fn is_path_included(self, db: &dyn Db, path: &SystemPath) -> bool { ProjectFilesFilter::from_project(db, self).is_included(path) } pub fn reload(self, db: &mut dyn Db, metadata: ProjectMetadata) { tracing::debug!("Reloading project"); assert_eq!(self.root(db), metadata.root()); if &metadata != self.metadata(db) { let (settings, settings_diagnostics) = metadata.options().to_settings(db); if self.settings(db) != &settings { self.set_settings(db).to(settings); } if self.settings_diagnostics(db) != settings_diagnostics { self.set_settings_diagnostics(db).to(settings_diagnostics); } self.set_metadata(db).to(metadata); } self.reload_files(db); } /// Checks all open files in the project and its dependencies. pub(crate) fn check( self, db: &ProjectDatabase, mut reporter: AssertUnwindSafe<&mut dyn Reporter>, ) -> Vec { let project_span = tracing::debug_span!("Project::check"); let _span = project_span.enter(); tracing::debug!("Checking project '{name}'", name = self.name(db)); let mut diagnostics: Vec = Vec::new(); diagnostics.extend( self.settings_diagnostics(db) .iter() .map(OptionDiagnostic::to_diagnostic), ); let files = ProjectFiles::new(db, self); reporter.set_files(files.len()); diagnostics.extend( files .diagnostics() .iter() .map(IOErrorDiagnostic::to_diagnostic), ); let file_diagnostics = std::sync::Mutex::new(vec![]); { let db = db.clone(); let file_diagnostics = &file_diagnostics; let project_span = &project_span; let reporter = &reporter; rayon::scope(move |scope| { for file in &files { let db = db.clone(); scope.spawn(move |_| { let check_file_span = tracing::debug_span!(parent: project_span, "check_file", ?file); let _entered = check_file_span.entered(); let result = check_file_impl(&db, file); file_diagnostics.lock().unwrap().extend(result); reporter.report_file(&file); }); } }); } let mut file_diagnostics = file_diagnostics.into_inner().unwrap(); file_diagnostics.sort_by(|left, right| { left.rendering_sort_key(db) .cmp(&right.rendering_sort_key(db)) }); diagnostics.extend(file_diagnostics); diagnostics } pub(crate) fn check_file(self, db: &dyn Db, file: File) -> Vec { let mut file_diagnostics: Vec<_> = self .settings_diagnostics(db) .iter() .map(OptionDiagnostic::to_diagnostic) .collect(); let check_diagnostics = check_file_impl(db, file); file_diagnostics.extend(check_diagnostics); file_diagnostics } /// Opens a file in the project. /// /// This changes the behavior of `check` to only check the open files rather than all files in the project. pub fn open_file(self, db: &mut dyn Db, file: File) { tracing::debug!("Opening file `{}`", file.path(db)); let mut open_files = self.take_open_files(db); open_files.insert(file); self.set_open_files(db, open_files); } /// Closes a file in the project. pub fn close_file(self, db: &mut dyn Db, file: File) -> bool { tracing::debug!("Closing file `{}`", file.path(db)); let mut open_files = self.take_open_files(db); let removed = open_files.remove(&file); if removed { self.set_open_files(db, open_files); } removed } pub fn set_included_paths(self, db: &mut dyn Db, paths: Vec) { tracing::debug!("Setting included paths: {paths}", paths = paths.len()); self.set_included_paths_list(db).to(paths); self.reload_files(db); } /// Returns the paths that should be checked. /// /// The default is to check the entire project in which case this method returns /// the project root. However, users can specify to only check specific sub-folders or /// even files of a project by using `ty check `. In that case, this method /// returns the provided absolute paths. /// /// Note: The CLI doesn't prohibit users from specifying paths outside the project root. /// This can be useful to check arbitrary files, but it isn't something we recommend. /// We should try to support this use case but it's okay if there are some limitations around it. fn included_paths_or_root(self, db: &dyn Db) -> &[SystemPathBuf] { match self.included_paths_list(db) { [] => std::slice::from_ref(&self.metadata(db).root), paths => paths, } } /// Returns the open files in the project or `None` if the entire project should be checked. pub fn open_files(self, db: &dyn Db) -> Option<&FxHashSet> { self.open_fileset(db) } /// Sets the open files in the project. /// /// This changes the behavior of `check` to only check the open files rather than all files in the project. #[tracing::instrument(level = "debug", skip(self, db))] pub fn set_open_files(self, db: &mut dyn Db, open_files: FxHashSet) { tracing::debug!("Set open project files (count: {})", open_files.len()); self.set_open_fileset(db).to(Some(Arc::new(open_files))); } /// This takes the open files from the project and returns them. /// /// This changes the behavior of `check` to check all files in the project instead of just the open files. fn take_open_files(self, db: &mut dyn Db) -> FxHashSet { tracing::debug!("Take open project files"); // Salsa will cancel any pending queries and remove its own reference to `open_files` // so that the reference counter to `open_files` now drops to 1. let open_files = self.set_open_fileset(db).to(None); if let Some(open_files) = open_files { Arc::try_unwrap(open_files).unwrap() } else { FxHashSet::default() } } /// Returns `true` if the file is open in the project. /// /// A file is considered open when: /// * explicitly set as an open file using [`open_file`](Self::open_file) /// * It has a [`SystemPath`] and belongs to a package's `src` files /// * It has a [`SystemVirtualPath`](ruff_db::system::SystemVirtualPath) pub fn is_file_open(self, db: &dyn Db, file: File) -> bool { let path = file.path(db); // Try to return early to avoid adding a dependency on `open_files` or `file_set` which // both have a durability of `LOW`. if path.is_vendored_path() { return false; } if let Some(open_files) = self.open_files(db) { open_files.contains(&file) } else if file.path(db).is_system_path() { self.files(db).contains(&file) } else { file.path(db).is_system_virtual_path() } } #[tracing::instrument(level = "debug", skip(self, db))] pub fn remove_file(self, db: &mut dyn Db, file: File) { tracing::debug!( "Removing file `{}` from project `{}`", file.path(db), self.name(db) ); let Some(mut index) = IndexedFiles::indexed_mut(db, self) else { return; }; index.remove(file); } pub fn add_file(self, db: &mut dyn Db, file: File) { tracing::debug!( "Adding file `{}` to project `{}`", file.path(db), self.name(db) ); let Some(mut index) = IndexedFiles::indexed_mut(db, self) else { return; }; index.insert(file); } /// Replaces the diagnostics from indexing the project files with `diagnostics`. /// /// This is a no-op if the project files haven't been indexed yet. pub fn replace_index_diagnostics(self, db: &mut dyn Db, diagnostics: Vec) { let Some(mut index) = IndexedFiles::indexed_mut(db, self) else { return; }; index.set_diagnostics(diagnostics); } /// Returns the files belonging to this project. pub fn files(self, db: &dyn Db) -> Indexed<'_> { let files = self.file_set(db); match files.get() { Index::Lazy(vacant) => { let _entered = tracing::debug_span!("Project::index_files", project = %self.name(db)) .entered(); let walker = ProjectFilesWalker::new(db); let (files, diagnostics) = walker.collect_set(db); tracing::info!("Indexed {} file(s)", files.len()); vacant.set(files, diagnostics) } Index::Indexed(indexed) => indexed, } } pub fn reload_files(self, db: &mut dyn Db) { tracing::debug!("Reloading files for project `{}`", self.name(db)); if !self.file_set(db).is_lazy() { // Force a re-index of the files in the next revision. self.set_file_set(db).to(IndexedFiles::lazy()); } } } fn check_file_impl(db: &dyn Db, file: File) -> Vec { let mut diagnostics: Vec = Vec::new(); // Abort checking if there are IO errors. let source = source_text(db.upcast(), file); if let Some(read_error) = source.read_error() { diagnostics.push( IOErrorDiagnostic { file: Some(file), error: read_error.clone().into(), } .to_diagnostic(), ); return diagnostics; } let parsed = parsed_module(db.upcast(), file); diagnostics.extend( parsed .errors() .iter() .map(|error| create_parse_diagnostic(file, error)), ); diagnostics.extend(parsed.unsupported_syntax_errors().iter().map(|error| { let mut error = create_unsupported_syntax_diagnostic(file, error); add_inferred_python_version_hint_to_diagnostic(db.upcast(), &mut error, "parsing syntax"); error })); { let db = AssertUnwindSafe(db); match catch(&**db, file, || check_types(db.upcast(), file)) { Ok(Some(type_check_diagnostics)) => { diagnostics.extend(type_check_diagnostics.into_iter().cloned()); } Ok(None) => {} Err(diagnostic) => diagnostics.push(diagnostic), } } diagnostics.sort_unstable_by_key(|diagnostic| { diagnostic .primary_span() .and_then(|span| span.range()) .unwrap_or_default() .start() }); diagnostics } #[derive(Debug)] enum ProjectFiles<'a> { OpenFiles(&'a FxHashSet), Indexed(files::Indexed<'a>), } impl<'a> ProjectFiles<'a> { fn new(db: &'a dyn Db, project: Project) -> Self { if let Some(open_files) = project.open_files(db) { ProjectFiles::OpenFiles(open_files) } else { ProjectFiles::Indexed(project.files(db)) } } fn diagnostics(&self) -> &[IOErrorDiagnostic] { match self { ProjectFiles::OpenFiles(_) => &[], ProjectFiles::Indexed(indexed) => indexed.diagnostics(), } } fn len(&self) -> usize { match self { ProjectFiles::OpenFiles(open_files) => open_files.len(), ProjectFiles::Indexed(indexed) => indexed.len(), } } } impl<'a> IntoIterator for &'a ProjectFiles<'a> { type Item = File; type IntoIter = ProjectFilesIter<'a>; fn into_iter(self) -> Self::IntoIter { match self { ProjectFiles::OpenFiles(files) => ProjectFilesIter::OpenFiles(files.iter()), ProjectFiles::Indexed(indexed) => ProjectFilesIter::Indexed { files: indexed.into_iter(), }, } } } enum ProjectFilesIter<'db> { OpenFiles(std::collections::hash_set::Iter<'db, File>), Indexed { files: files::IndexedIter<'db> }, } impl Iterator for ProjectFilesIter<'_> { type Item = File; fn next(&mut self) -> Option { match self { ProjectFilesIter::OpenFiles(files) => files.next().copied(), ProjectFilesIter::Indexed { files } => files.next(), } } } #[derive(Debug, Clone)] pub struct IOErrorDiagnostic { file: Option, error: IOErrorKind, } impl IOErrorDiagnostic { fn to_diagnostic(&self) -> Diagnostic { let mut diag = Diagnostic::new(DiagnosticId::Io, Severity::Error, &self.error); if let Some(file) = self.file { diag.annotate(Annotation::primary(Span::from(file))); } diag } } #[derive(Error, Debug, Clone)] enum IOErrorKind { #[error(transparent)] Walk(#[from] walk::WalkError), #[error(transparent)] SourceText(#[from] SourceTextError), } fn catch(db: &dyn Db, file: File, f: F) -> Result, Diagnostic> where F: FnOnce() -> R + UnwindSafe, { match ruff_db::panic::catch_unwind(|| { // Ignore salsa errors salsa::Cancelled::catch(f).ok() }) { Ok(result) => Ok(result), Err(error) => { use std::fmt::Write; let mut message = String::new(); message.push_str("Panicked"); if let Some(location) = error.location { let _ = write!(&mut message, " at {location}"); } let _ = write!( &mut message, " when checking `{file}`", file = file.path(db) ); if let Some(payload) = error.payload.as_str() { let _ = write!(&mut message, ": `{payload}`"); } let mut diagnostic = Diagnostic::new(DiagnosticId::Panic, Severity::Fatal, message); diagnostic.sub(SubDiagnostic::new( Severity::Info, "This indicates a bug in ty.", )); let report_message = "If you could open an issue at https://github.com/astral-sh/ty/issues/new?title=%5Bpanic%5D, we'd be very appreciative!"; diagnostic.sub(SubDiagnostic::new(Severity::Info, report_message)); diagnostic.sub(SubDiagnostic::new( Severity::Info, format!( "Platform: {os} {arch}", os = std::env::consts::OS, arch = std::env::consts::ARCH ), )); diagnostic.sub(SubDiagnostic::new( Severity::Info, format!( "Args: {args:?}", args = std::env::args().collect::>() ), )); if let Some(backtrace) = error.backtrace { match backtrace.status() { BacktraceStatus::Disabled => { diagnostic.sub(SubDiagnostic::new( Severity::Info, "run with `RUST_BACKTRACE=1` environment variable to show the full backtrace information", )); } BacktraceStatus::Captured => { diagnostic.sub(SubDiagnostic::new( Severity::Info, format!("Backtrace:\n{backtrace}"), )); } _ => {} } } if let Some(backtrace) = error.salsa_backtrace { salsa::attach(db, || { diagnostic.sub(SubDiagnostic::new(Severity::Info, backtrace.to_string())); }); } Err(diagnostic) } } } #[cfg(test)] mod tests { use crate::db::tests::TestDb; use crate::{ProjectMetadata, check_file_impl}; use ruff_db::files::system_path_to_file; use ruff_db::source::source_text; use ruff_db::system::{DbWithTestSystem, DbWithWritableSystem as _, SystemPath, SystemPathBuf}; use ruff_db::testing::assert_function_query_was_not_run; use ruff_python_ast::name::Name; use ty_python_semantic::types::check_types; use ty_python_semantic::{ Program, ProgramSettings, PythonPlatform, PythonVersionWithSource, SearchPathSettings, }; #[test] fn check_file_skips_type_checking_when_file_cant_be_read() -> ruff_db::system::Result<()> { let project = ProjectMetadata::new(Name::new_static("test"), SystemPathBuf::from("/")); let mut db = TestDb::new(project); let path = SystemPath::new("test.py"); Program::from_settings( &db, ProgramSettings { python_version: PythonVersionWithSource::default(), python_platform: PythonPlatform::default(), search_paths: SearchPathSettings::new(vec![SystemPathBuf::from(".")]), }, ) .expect("Failed to configure program settings"); db.write_file(path, "x = 10")?; let file = system_path_to_file(&db, path).unwrap(); // Now the file gets deleted before we had a chance to read its source text. db.memory_file_system().remove_file(path)?; file.sync(&mut db); assert_eq!(source_text(&db, file).as_str(), ""); assert_eq!( check_file_impl(&db, file) .into_iter() .map(|diagnostic| diagnostic.primary_message().to_string()) .collect::>(), vec!["Failed to read file: No such file or directory".to_string()] ); let events = db.take_salsa_events(); assert_function_query_was_not_run(&db, check_types, file, &events); // The user now creates a new file with an empty text. The source text // content returned by `source_text` remains unchanged, but the diagnostics should get updated. db.write_file(path, "").unwrap(); assert_eq!(source_text(&db, file).as_str(), ""); assert_eq!( check_file_impl(&db, file) .into_iter() .map(|diagnostic| diagnostic.primary_message().to_string()) .collect::>(), vec![] as Vec ); Ok(()) } }