ruff/crates/ty_project/src/lib.rs
2025-05-27 08:00:38 +02:00

721 lines
25 KiB
Rust

#![allow(clippy::ref_option)]
use crate::metadata::options::OptionDiagnostic;
use crate::walk::{ProjectFilesFilter, ProjectFilesWalker};
pub use db::{Db, ProjectDatabase};
use files::{Index, Indexed, IndexedFiles};
use metadata::settings::Settings;
pub use metadata::{ProjectMetadata, ProjectMetadataError};
use ruff_db::diagnostic::{
Annotation, Diagnostic, DiagnosticId, Severity, Span, SubDiagnostic, create_parse_diagnostic,
create_unsupported_syntax_diagnostic,
};
use ruff_db::files::File;
use ruff_db::parsed::parsed_module;
use ruff_db::source::{SourceTextError, source_text};
use ruff_db::system::{SystemPath, SystemPathBuf};
use rustc_hash::FxHashSet;
use salsa::Durability;
use salsa::Setter;
use std::backtrace::BacktraceStatus;
use std::panic::{AssertUnwindSafe, UnwindSafe};
use std::sync::Arc;
use thiserror::Error;
use tracing::error;
use ty_python_semantic::lint::{LintRegistry, LintRegistryBuilder, RuleSelection};
use ty_python_semantic::types::check_types;
use ty_python_semantic::{add_inferred_python_version_hint_to_diagnostic, register_lints};
pub mod combine;
mod db;
mod files;
pub mod metadata;
mod walk;
pub mod watch;
pub static DEFAULT_LINT_REGISTRY: std::sync::LazyLock<LintRegistry> =
std::sync::LazyLock::new(default_lints_registry);
pub fn default_lints_registry() -> LintRegistry {
let mut builder = LintRegistryBuilder::default();
register_lints(&mut builder);
builder.build()
}
/// The project as a Salsa ingredient.
///
/// ## How is a project different from a program?
/// There are two (related) motivations:
///
/// 1. Program is defined in `ruff_db` and it can't reference the settings types for the linter and formatter
/// without introducing a cyclic dependency. The project is defined in a higher level crate
/// where it can reference these setting types.
/// 2. Running `ruff check` with different target versions results in different programs (settings) but
/// it remains the same project. That's why program is a narrowed view of the project only
/// holding on to the most fundamental settings required for checking.
#[salsa::input]
pub struct Project {
/// The files that are open in the project.
///
/// Setting the open files to a non-`None` value changes `check` to only check the
/// open files rather than all files in the project.
#[returns(as_deref)]
#[default]
open_fileset: Option<Arc<FxHashSet<File>>>,
/// The first-party files of this project.
#[default]
#[returns(ref)]
file_set: IndexedFiles,
/// The metadata describing the project, including the unresolved options.
#[returns(ref)]
pub metadata: ProjectMetadata,
/// The resolved project settings.
#[returns(ref)]
pub settings: Settings,
/// The paths that should be included when checking this project.
///
/// The default (when this list is empty) is to include all files in the project root
/// (that satisfy the configured include and exclude patterns).
/// However, it's sometimes desired to only check a subset of the project, e.g. to see
/// the diagnostics for a single file or a folder.
///
/// This list gets initialized by the paths passed to `ty check <paths>`
///
/// ## How is this different from `open_files`?
///
/// The `included_paths` is closely related to `open_files`. The only difference is that
/// `open_files` is already a resolved set of files whereas `included_paths` is only a list of paths
/// that are resolved to files by indexing them. The other difference is that
/// new files added to any directory in `included_paths` will be indexed and added to the project
/// whereas `open_files` needs to be updated manually (e.g. by the IDE).
///
/// In short, `open_files` is cheaper in contexts where the set of files is known, like
/// in an IDE when the user only wants to check the open tabs. This could be modeled
/// with `included_paths` too but it would require an explicit walk dir step that's simply unnecessary.
#[default]
#[returns(deref)]
included_paths_list: Vec<SystemPathBuf>,
/// Diagnostics that were generated when resolving the project settings.
#[returns(deref)]
settings_diagnostics: Vec<OptionDiagnostic>,
}
/// A progress reporter.
pub trait Reporter: Send + Sync {
/// Initialize the reporter with the number of files.
fn set_files(&mut self, files: usize);
/// Report the completion of a given file.
fn report_file(&self, file: &File);
}
/// A no-op implementation of [`Reporter`].
#[derive(Default)]
pub struct DummyReporter;
impl Reporter for DummyReporter {
fn set_files(&mut self, _files: usize) {}
fn report_file(&self, _file: &File) {}
}
#[salsa::tracked]
impl Project {
pub fn from_metadata(db: &dyn Db, metadata: ProjectMetadata) -> Self {
let (settings, settings_diagnostics) = metadata.options().to_settings(db);
Project::builder(metadata, settings, settings_diagnostics)
.durability(Durability::MEDIUM)
.open_fileset_durability(Durability::LOW)
.file_set_durability(Durability::LOW)
.new(db)
}
pub fn root(self, db: &dyn Db) -> &SystemPath {
self.metadata(db).root()
}
pub fn name(self, db: &dyn Db) -> &str {
self.metadata(db).name()
}
/// Returns the resolved linter rules for the project.
///
/// This is a salsa query to prevent re-computing queries if other, unrelated
/// settings change. For example, we don't want that changing the terminal settings
/// invalidates any type checking queries.
#[salsa::tracked(returns(deref))]
pub fn rules(self, db: &dyn Db) -> Arc<RuleSelection> {
self.settings(db).to_rules()
}
/// Returns `true` if `path` is both part of the project and included (see `included_paths_list`).
///
/// Unlike [Self::files], this method does not respect `.gitignore` files. It only checks
/// the project's include and exclude settings as well as the paths that were passed to `ty check <paths>`.
/// This means, that this method is an over-approximation of `Self::files` and may return `true` for paths
/// that won't be included when checking the project because they're ignored in a `.gitignore` file.
pub fn is_path_included(self, db: &dyn Db, path: &SystemPath) -> bool {
ProjectFilesFilter::from_project(db, self).is_included(path)
}
pub fn reload(self, db: &mut dyn Db, metadata: ProjectMetadata) {
tracing::debug!("Reloading project");
assert_eq!(self.root(db), metadata.root());
if &metadata != self.metadata(db) {
let (settings, settings_diagnostics) = metadata.options().to_settings(db);
if self.settings(db) != &settings {
self.set_settings(db).to(settings);
}
if self.settings_diagnostics(db) != settings_diagnostics {
self.set_settings_diagnostics(db).to(settings_diagnostics);
}
self.set_metadata(db).to(metadata);
}
self.reload_files(db);
}
/// Checks all open files in the project and its dependencies.
pub(crate) fn check(
self,
db: &ProjectDatabase,
mut reporter: AssertUnwindSafe<&mut dyn Reporter>,
) -> Vec<Diagnostic> {
let project_span = tracing::debug_span!("Project::check");
let _span = project_span.enter();
tracing::debug!("Checking project '{name}'", name = self.name(db));
let mut diagnostics: Vec<Diagnostic> = Vec::new();
diagnostics.extend(
self.settings_diagnostics(db)
.iter()
.map(OptionDiagnostic::to_diagnostic),
);
let files = ProjectFiles::new(db, self);
reporter.set_files(files.len());
diagnostics.extend(
files
.diagnostics()
.iter()
.map(IOErrorDiagnostic::to_diagnostic),
);
let file_diagnostics = std::sync::Mutex::new(vec![]);
{
let db = db.clone();
let file_diagnostics = &file_diagnostics;
let project_span = &project_span;
let reporter = &reporter;
rayon::scope(move |scope| {
for file in &files {
let db = db.clone();
scope.spawn(move |_| {
let check_file_span =
tracing::debug_span!(parent: project_span, "check_file", ?file);
let _entered = check_file_span.entered();
let result = check_file_impl(&db, file);
file_diagnostics.lock().unwrap().extend(result);
reporter.report_file(&file);
});
}
});
}
let mut file_diagnostics = file_diagnostics.into_inner().unwrap();
file_diagnostics.sort_by(|left, right| {
left.rendering_sort_key(db)
.cmp(&right.rendering_sort_key(db))
});
diagnostics.extend(file_diagnostics);
diagnostics
}
pub(crate) fn check_file(self, db: &dyn Db, file: File) -> Vec<Diagnostic> {
let mut file_diagnostics: Vec<_> = self
.settings_diagnostics(db)
.iter()
.map(OptionDiagnostic::to_diagnostic)
.collect();
let check_diagnostics = check_file_impl(db, file);
file_diagnostics.extend(check_diagnostics);
file_diagnostics
}
/// Opens a file in the project.
///
/// This changes the behavior of `check` to only check the open files rather than all files in the project.
pub fn open_file(self, db: &mut dyn Db, file: File) {
tracing::debug!("Opening file `{}`", file.path(db));
let mut open_files = self.take_open_files(db);
open_files.insert(file);
self.set_open_files(db, open_files);
}
/// Closes a file in the project.
pub fn close_file(self, db: &mut dyn Db, file: File) -> bool {
tracing::debug!("Closing file `{}`", file.path(db));
let mut open_files = self.take_open_files(db);
let removed = open_files.remove(&file);
if removed {
self.set_open_files(db, open_files);
}
removed
}
pub fn set_included_paths(self, db: &mut dyn Db, paths: Vec<SystemPathBuf>) {
tracing::debug!("Setting included paths: {paths}", paths = paths.len());
self.set_included_paths_list(db).to(paths);
self.reload_files(db);
}
/// Returns the paths that should be checked.
///
/// The default is to check the entire project in which case this method returns
/// the project root. However, users can specify to only check specific sub-folders or
/// even files of a project by using `ty check <paths>`. In that case, this method
/// returns the provided absolute paths.
///
/// Note: The CLI doesn't prohibit users from specifying paths outside the project root.
/// This can be useful to check arbitrary files, but it isn't something we recommend.
/// We should try to support this use case but it's okay if there are some limitations around it.
fn included_paths_or_root(self, db: &dyn Db) -> &[SystemPathBuf] {
match self.included_paths_list(db) {
[] => std::slice::from_ref(&self.metadata(db).root),
paths => paths,
}
}
/// Returns the open files in the project or `None` if the entire project should be checked.
pub fn open_files(self, db: &dyn Db) -> Option<&FxHashSet<File>> {
self.open_fileset(db)
}
/// Sets the open files in the project.
///
/// This changes the behavior of `check` to only check the open files rather than all files in the project.
#[tracing::instrument(level = "debug", skip(self, db))]
pub fn set_open_files(self, db: &mut dyn Db, open_files: FxHashSet<File>) {
tracing::debug!("Set open project files (count: {})", open_files.len());
self.set_open_fileset(db).to(Some(Arc::new(open_files)));
}
/// This takes the open files from the project and returns them.
///
/// This changes the behavior of `check` to check all files in the project instead of just the open files.
fn take_open_files(self, db: &mut dyn Db) -> FxHashSet<File> {
tracing::debug!("Take open project files");
// Salsa will cancel any pending queries and remove its own reference to `open_files`
// so that the reference counter to `open_files` now drops to 1.
let open_files = self.set_open_fileset(db).to(None);
if let Some(open_files) = open_files {
Arc::try_unwrap(open_files).unwrap()
} else {
FxHashSet::default()
}
}
/// Returns `true` if the file is open in the project.
///
/// A file is considered open when:
/// * explicitly set as an open file using [`open_file`](Self::open_file)
/// * It has a [`SystemPath`] and belongs to a package's `src` files
/// * It has a [`SystemVirtualPath`](ruff_db::system::SystemVirtualPath)
pub fn is_file_open(self, db: &dyn Db, file: File) -> bool {
let path = file.path(db);
// Try to return early to avoid adding a dependency on `open_files` or `file_set` which
// both have a durability of `LOW`.
if path.is_vendored_path() {
return false;
}
if let Some(open_files) = self.open_files(db) {
open_files.contains(&file)
} else if file.path(db).is_system_path() {
self.files(db).contains(&file)
} else {
file.path(db).is_system_virtual_path()
}
}
#[tracing::instrument(level = "debug", skip(self, db))]
pub fn remove_file(self, db: &mut dyn Db, file: File) {
tracing::debug!(
"Removing file `{}` from project `{}`",
file.path(db),
self.name(db)
);
let Some(mut index) = IndexedFiles::indexed_mut(db, self) else {
return;
};
index.remove(file);
}
pub fn add_file(self, db: &mut dyn Db, file: File) {
tracing::debug!(
"Adding file `{}` to project `{}`",
file.path(db),
self.name(db)
);
let Some(mut index) = IndexedFiles::indexed_mut(db, self) else {
return;
};
index.insert(file);
}
/// Replaces the diagnostics from indexing the project files with `diagnostics`.
///
/// This is a no-op if the project files haven't been indexed yet.
pub fn replace_index_diagnostics(self, db: &mut dyn Db, diagnostics: Vec<IOErrorDiagnostic>) {
let Some(mut index) = IndexedFiles::indexed_mut(db, self) else {
return;
};
index.set_diagnostics(diagnostics);
}
/// Returns the files belonging to this project.
pub fn files(self, db: &dyn Db) -> Indexed<'_> {
let files = self.file_set(db);
match files.get() {
Index::Lazy(vacant) => {
let _entered =
tracing::debug_span!("Project::index_files", project = %self.name(db))
.entered();
let walker = ProjectFilesWalker::new(db);
let (files, diagnostics) = walker.collect_set(db);
tracing::info!("Indexed {} file(s)", files.len());
vacant.set(files, diagnostics)
}
Index::Indexed(indexed) => indexed,
}
}
pub fn reload_files(self, db: &mut dyn Db) {
tracing::debug!("Reloading files for project `{}`", self.name(db));
if !self.file_set(db).is_lazy() {
// Force a re-index of the files in the next revision.
self.set_file_set(db).to(IndexedFiles::lazy());
}
}
}
fn check_file_impl(db: &dyn Db, file: File) -> Vec<Diagnostic> {
let mut diagnostics: Vec<Diagnostic> = Vec::new();
// Abort checking if there are IO errors.
let source = source_text(db.upcast(), file);
if let Some(read_error) = source.read_error() {
diagnostics.push(
IOErrorDiagnostic {
file: Some(file),
error: read_error.clone().into(),
}
.to_diagnostic(),
);
return diagnostics;
}
let parsed = parsed_module(db.upcast(), file);
diagnostics.extend(
parsed
.errors()
.iter()
.map(|error| create_parse_diagnostic(file, error)),
);
diagnostics.extend(parsed.unsupported_syntax_errors().iter().map(|error| {
let mut error = create_unsupported_syntax_diagnostic(file, error);
add_inferred_python_version_hint_to_diagnostic(db.upcast(), &mut error, "parsing syntax");
error
}));
{
let db = AssertUnwindSafe(db);
match catch(&**db, file, || check_types(db.upcast(), file)) {
Ok(Some(type_check_diagnostics)) => {
diagnostics.extend(type_check_diagnostics.into_iter().cloned());
}
Ok(None) => {}
Err(diagnostic) => diagnostics.push(diagnostic),
}
}
diagnostics.sort_unstable_by_key(|diagnostic| {
diagnostic
.primary_span()
.and_then(|span| span.range())
.unwrap_or_default()
.start()
});
diagnostics
}
#[derive(Debug)]
enum ProjectFiles<'a> {
OpenFiles(&'a FxHashSet<File>),
Indexed(files::Indexed<'a>),
}
impl<'a> ProjectFiles<'a> {
fn new(db: &'a dyn Db, project: Project) -> Self {
if let Some(open_files) = project.open_files(db) {
ProjectFiles::OpenFiles(open_files)
} else {
ProjectFiles::Indexed(project.files(db))
}
}
fn diagnostics(&self) -> &[IOErrorDiagnostic] {
match self {
ProjectFiles::OpenFiles(_) => &[],
ProjectFiles::Indexed(indexed) => indexed.diagnostics(),
}
}
fn len(&self) -> usize {
match self {
ProjectFiles::OpenFiles(open_files) => open_files.len(),
ProjectFiles::Indexed(indexed) => indexed.len(),
}
}
}
impl<'a> IntoIterator for &'a ProjectFiles<'a> {
type Item = File;
type IntoIter = ProjectFilesIter<'a>;
fn into_iter(self) -> Self::IntoIter {
match self {
ProjectFiles::OpenFiles(files) => ProjectFilesIter::OpenFiles(files.iter()),
ProjectFiles::Indexed(indexed) => ProjectFilesIter::Indexed {
files: indexed.into_iter(),
},
}
}
}
enum ProjectFilesIter<'db> {
OpenFiles(std::collections::hash_set::Iter<'db, File>),
Indexed { files: files::IndexedIter<'db> },
}
impl Iterator for ProjectFilesIter<'_> {
type Item = File;
fn next(&mut self) -> Option<Self::Item> {
match self {
ProjectFilesIter::OpenFiles(files) => files.next().copied(),
ProjectFilesIter::Indexed { files } => files.next(),
}
}
}
#[derive(Debug, Clone)]
pub struct IOErrorDiagnostic {
file: Option<File>,
error: IOErrorKind,
}
impl IOErrorDiagnostic {
fn to_diagnostic(&self) -> Diagnostic {
let mut diag = Diagnostic::new(DiagnosticId::Io, Severity::Error, &self.error);
if let Some(file) = self.file {
diag.annotate(Annotation::primary(Span::from(file)));
}
diag
}
}
#[derive(Error, Debug, Clone)]
enum IOErrorKind {
#[error(transparent)]
Walk(#[from] walk::WalkError),
#[error(transparent)]
SourceText(#[from] SourceTextError),
}
fn catch<F, R>(db: &dyn Db, file: File, f: F) -> Result<Option<R>, Diagnostic>
where
F: FnOnce() -> R + UnwindSafe,
{
match ruff_db::panic::catch_unwind(|| {
// Ignore salsa errors
salsa::Cancelled::catch(f).ok()
}) {
Ok(result) => Ok(result),
Err(error) => {
use std::fmt::Write;
let mut message = String::new();
message.push_str("Panicked");
if let Some(location) = error.location {
let _ = write!(&mut message, " at {location}");
}
let _ = write!(
&mut message,
" when checking `{file}`",
file = file.path(db)
);
if let Some(payload) = error.payload.as_str() {
let _ = write!(&mut message, ": `{payload}`");
}
let mut diagnostic = Diagnostic::new(DiagnosticId::Panic, Severity::Fatal, message);
diagnostic.sub(SubDiagnostic::new(
Severity::Info,
"This indicates a bug in ty.",
));
let report_message = "If you could open an issue at https://github.com/astral-sh/ty/issues/new?title=%5Bpanic%5D, we'd be very appreciative!";
diagnostic.sub(SubDiagnostic::new(Severity::Info, report_message));
diagnostic.sub(SubDiagnostic::new(
Severity::Info,
format!(
"Platform: {os} {arch}",
os = std::env::consts::OS,
arch = std::env::consts::ARCH
),
));
diagnostic.sub(SubDiagnostic::new(
Severity::Info,
format!(
"Args: {args:?}",
args = std::env::args().collect::<Vec<_>>()
),
));
if let Some(backtrace) = error.backtrace {
match backtrace.status() {
BacktraceStatus::Disabled => {
diagnostic.sub(SubDiagnostic::new(
Severity::Info,
"run with `RUST_BACKTRACE=1` environment variable to show the full backtrace information",
));
}
BacktraceStatus::Captured => {
diagnostic.sub(SubDiagnostic::new(
Severity::Info,
format!("Backtrace:\n{backtrace}"),
));
}
_ => {}
}
}
if let Some(backtrace) = error.salsa_backtrace {
salsa::attach(db, || {
diagnostic.sub(SubDiagnostic::new(Severity::Info, backtrace.to_string()));
});
}
Err(diagnostic)
}
}
}
#[cfg(test)]
mod tests {
use crate::db::tests::TestDb;
use crate::{ProjectMetadata, check_file_impl};
use ruff_db::files::system_path_to_file;
use ruff_db::source::source_text;
use ruff_db::system::{DbWithTestSystem, DbWithWritableSystem as _, SystemPath, SystemPathBuf};
use ruff_db::testing::assert_function_query_was_not_run;
use ruff_python_ast::name::Name;
use ty_python_semantic::types::check_types;
use ty_python_semantic::{
Program, ProgramSettings, PythonPlatform, PythonVersionWithSource, SearchPathSettings,
};
#[test]
fn check_file_skips_type_checking_when_file_cant_be_read() -> ruff_db::system::Result<()> {
let project = ProjectMetadata::new(Name::new_static("test"), SystemPathBuf::from("/"));
let mut db = TestDb::new(project);
let path = SystemPath::new("test.py");
Program::from_settings(
&db,
ProgramSettings {
python_version: PythonVersionWithSource::default(),
python_platform: PythonPlatform::default(),
search_paths: SearchPathSettings::new(vec![SystemPathBuf::from(".")]),
},
)
.expect("Failed to configure program settings");
db.write_file(path, "x = 10")?;
let file = system_path_to_file(&db, path).unwrap();
// Now the file gets deleted before we had a chance to read its source text.
db.memory_file_system().remove_file(path)?;
file.sync(&mut db);
assert_eq!(source_text(&db, file).as_str(), "");
assert_eq!(
check_file_impl(&db, file)
.into_iter()
.map(|diagnostic| diagnostic.primary_message().to_string())
.collect::<Vec<_>>(),
vec!["Failed to read file: No such file or directory".to_string()]
);
let events = db.take_salsa_events();
assert_function_query_was_not_run(&db, check_types, file, &events);
// The user now creates a new file with an empty text. The source text
// content returned by `source_text` remains unchanged, but the diagnostics should get updated.
db.write_file(path, "").unwrap();
assert_eq!(source_text(&db, file).as_str(), "");
assert_eq!(
check_file_impl(&db, file)
.into_iter()
.map(|diagnostic| diagnostic.primary_message().to_string())
.collect::<Vec<_>>(),
vec![] as Vec<String>
);
Ok(())
}
}