Rename Red Knot (#17820)

This commit is contained in:
Micha Reiser 2025-05-03 19:49:15 +02:00 committed by GitHub
parent e6a798b962
commit b51c4f82ea
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
1564 changed files with 1598 additions and 1578 deletions

View file

@ -0,0 +1,191 @@
use std::{collections::HashMap, hash::BuildHasher};
use ruff_db::system::SystemPathBuf;
use ruff_python_ast::PythonVersion;
use ty_python_semantic::{PythonPath, PythonPlatform};
/// Combine two values, preferring the values in `self`.
///
/// The logic should follow that of Cargo's `config.toml`:
///
/// > If a key is specified in multiple config files, the values will get merged together.
/// > Numbers, strings, and booleans will use the value in the deeper config directory taking
/// > precedence over ancestor directories, where the home directory is the lowest priority.
/// > Arrays will be joined together with higher precedence items being placed later in the
/// > merged array.
///
/// ## uv Compatibility
///
/// The merging behavior differs from uv in that values with higher precedence in arrays
/// are placed later in the merged array. This is because we want to support overriding
/// earlier values and values from other configurations, including unsetting them.
/// For example: patterns coming last in file inclusion and exclusion patterns
/// allow overriding earlier patterns, matching the `gitignore` behavior.
/// Generally speaking, it feels more intuitive if later values override earlier values
/// than the other way around: `ty --exclude png --exclude "!important.png"`.
///
/// The main downside of this approach is that the ordering can be surprising in cases
/// where the option has a "first match" semantic and not a "last match" wins.
/// One such example is `extra-paths` where the semantics is given by Python:
/// the module on the first matching search path wins.
///
/// ```toml
/// [environment]
/// extra-paths = ["b", "c"]
/// ```
///
/// ```bash
/// ty --extra-paths a
/// ```
///
/// That's why a user might expect that this configuration results in `["a", "b", "c"]`,
/// because the CLI has higher precedence. However, the current implementation results in a
/// resolved extra search path of `["b", "c", "a"]`, which means `a` will be tried last.
///
/// There's an argument here that the user should be able to specify the order of the paths,
/// because only then is the user in full control of where to insert the path when specifying `extra-paths`
/// in multiple sources.
///
/// ## Macro
/// You can automatically derive `Combine` for structs with named fields by using `derive(ruff_macros::Combine)`.
pub trait Combine {
#[must_use]
fn combine(mut self, other: Self) -> Self
where
Self: Sized,
{
self.combine_with(other);
self
}
fn combine_with(&mut self, other: Self);
}
impl<T> Combine for Option<T>
where
T: Combine,
{
fn combine(self, other: Self) -> Self
where
Self: Sized,
{
match (self, other) {
(Some(a), Some(b)) => Some(a.combine(b)),
(None, Some(b)) => Some(b),
(a, _) => a,
}
}
fn combine_with(&mut self, other: Self) {
match (self, other) {
(Some(a), Some(b)) => {
a.combine_with(b);
}
(a @ None, Some(b)) => {
*a = Some(b);
}
_ => {}
}
}
}
impl<T> Combine for Vec<T> {
fn combine_with(&mut self, mut other: Self) {
// `self` takes precedence over `other` but values with higher precedence must be placed after.
// Swap the vectors so that `other` is the one that gets extended, so that the values of `self` come after.
std::mem::swap(self, &mut other);
self.extend(other);
}
}
impl<K, V, S> Combine for HashMap<K, V, S>
where
K: Eq + std::hash::Hash,
S: BuildHasher,
{
fn combine_with(&mut self, mut other: Self) {
// `self` takes precedence over `other` but `extend` overrides existing values.
// Swap the hash maps so that `self` is the one that gets extended.
std::mem::swap(self, &mut other);
self.extend(other);
}
}
/// Implements [`Combine`] for a value that always returns `self` when combined with another value.
macro_rules! impl_noop_combine {
($name:ident) => {
impl Combine for $name {
#[inline(always)]
fn combine_with(&mut self, _other: Self) {}
#[inline(always)]
fn combine(self, _other: Self) -> Self {
self
}
}
};
}
impl_noop_combine!(SystemPathBuf);
impl_noop_combine!(PythonPlatform);
impl_noop_combine!(PythonPath);
impl_noop_combine!(PythonVersion);
// std types
impl_noop_combine!(bool);
impl_noop_combine!(usize);
impl_noop_combine!(u8);
impl_noop_combine!(u16);
impl_noop_combine!(u32);
impl_noop_combine!(u64);
impl_noop_combine!(u128);
impl_noop_combine!(isize);
impl_noop_combine!(i8);
impl_noop_combine!(i16);
impl_noop_combine!(i32);
impl_noop_combine!(i64);
impl_noop_combine!(i128);
impl_noop_combine!(String);
#[cfg(test)]
mod tests {
use crate::combine::Combine;
use std::collections::HashMap;
#[test]
fn combine_option() {
assert_eq!(Some(1).combine(Some(2)), Some(1));
assert_eq!(None.combine(Some(2)), Some(2));
assert_eq!(Some(1).combine(None), Some(1));
}
#[test]
fn combine_vec() {
assert_eq!(None.combine(Some(vec![1, 2, 3])), Some(vec![1, 2, 3]));
assert_eq!(Some(vec![1, 2, 3]).combine(None), Some(vec![1, 2, 3]));
assert_eq!(
Some(vec![1, 2, 3]).combine(Some(vec![4, 5, 6])),
Some(vec![4, 5, 6, 1, 2, 3])
);
}
#[test]
fn combine_map() {
let a: HashMap<u32, _> = HashMap::from_iter([(1, "a"), (2, "a"), (3, "a")]);
let b: HashMap<u32, _> = HashMap::from_iter([(0, "b"), (2, "b"), (5, "b")]);
assert_eq!(None.combine(Some(b.clone())), Some(b.clone()));
assert_eq!(Some(a.clone()).combine(None), Some(a.clone()));
assert_eq!(
Some(a).combine(Some(b)),
Some(HashMap::from_iter([
(0, "b"),
// The value from `a` takes precedence
(1, "a"),
(2, "a"),
(3, "a"),
(5, "b")
]))
);
}
}

341
crates/ty_project/src/db.rs Normal file
View file

@ -0,0 +1,341 @@
use std::panic::RefUnwindSafe;
use std::sync::Arc;
use crate::DEFAULT_LINT_REGISTRY;
use crate::{Project, ProjectMetadata};
use ruff_db::diagnostic::Diagnostic;
use ruff_db::files::{File, Files};
use ruff_db::system::System;
use ruff_db::vendored::VendoredFileSystem;
use ruff_db::{Db as SourceDb, Upcast};
use salsa::plumbing::ZalsaDatabase;
use salsa::{Cancelled, Event};
use ty_ide::Db as IdeDb;
use ty_python_semantic::lint::{LintRegistry, RuleSelection};
use ty_python_semantic::{Db as SemanticDb, Program};
mod changes;
#[salsa::db]
pub trait Db: SemanticDb + Upcast<dyn SemanticDb> {
fn project(&self) -> Project;
}
#[salsa::db]
#[derive(Clone)]
pub struct ProjectDatabase {
project: Option<Project>,
storage: salsa::Storage<ProjectDatabase>,
files: Files,
system: Arc<dyn System + Send + Sync + RefUnwindSafe>,
}
impl ProjectDatabase {
pub fn new<S>(project_metadata: ProjectMetadata, system: S) -> anyhow::Result<Self>
where
S: System + 'static + Send + Sync + RefUnwindSafe,
{
let mut db = Self {
project: None,
storage: salsa::Storage::default(),
files: Files::default(),
system: Arc::new(system),
};
// TODO: Use the `program_settings` to compute the key for the database's persistent
// cache and load the cache if it exists.
// we may want to have a dedicated method for this?
// Initialize the `Program` singleton
let program_settings = project_metadata.to_program_settings(db.system());
Program::from_settings(&db, program_settings)?;
db.project = Some(Project::from_metadata(&db, project_metadata));
Ok(db)
}
/// Checks all open files in the project and its dependencies.
pub fn check(&self) -> Result<Vec<Diagnostic>, Cancelled> {
self.with_db(|db| db.project().check(db))
}
#[tracing::instrument(level = "debug", skip(self))]
pub fn check_file(&self, file: File) -> Result<Vec<Diagnostic>, Cancelled> {
self.with_db(|db| self.project().check_file(db, file))
}
/// Returns a mutable reference to the system.
///
/// WARNING: Triggers a new revision, canceling other database handles. This can lead to deadlock.
pub fn system_mut(&mut self) -> &mut dyn System {
// TODO: Use a more official method to cancel other queries.
// https://salsa.zulipchat.com/#narrow/stream/333573-salsa-3.2E0/topic/Expose.20an.20API.20to.20cancel.20other.20queries
let _ = self.zalsa_mut();
Arc::get_mut(&mut self.system).unwrap()
}
pub(crate) fn with_db<F, T>(&self, f: F) -> Result<T, Cancelled>
where
F: FnOnce(&ProjectDatabase) -> T + std::panic::UnwindSafe,
{
Cancelled::catch(|| f(self))
}
}
impl Upcast<dyn SemanticDb> for ProjectDatabase {
fn upcast(&self) -> &(dyn SemanticDb + 'static) {
self
}
fn upcast_mut(&mut self) -> &mut (dyn SemanticDb + 'static) {
self
}
}
impl Upcast<dyn SourceDb> for ProjectDatabase {
fn upcast(&self) -> &(dyn SourceDb + 'static) {
self
}
fn upcast_mut(&mut self) -> &mut (dyn SourceDb + 'static) {
self
}
}
impl Upcast<dyn IdeDb> for ProjectDatabase {
fn upcast(&self) -> &(dyn IdeDb + 'static) {
self
}
fn upcast_mut(&mut self) -> &mut (dyn IdeDb + 'static) {
self
}
}
#[salsa::db]
impl IdeDb for ProjectDatabase {}
#[salsa::db]
impl SemanticDb for ProjectDatabase {
fn is_file_open(&self, file: File) -> bool {
let Some(project) = &self.project else {
return false;
};
project.is_file_open(self, file)
}
fn rule_selection(&self) -> Arc<RuleSelection> {
self.project().rules(self)
}
fn lint_registry(&self) -> &LintRegistry {
&DEFAULT_LINT_REGISTRY
}
}
#[salsa::db]
impl SourceDb for ProjectDatabase {
fn vendored(&self) -> &VendoredFileSystem {
ty_vendored::file_system()
}
fn system(&self) -> &dyn System {
&*self.system
}
fn files(&self) -> &Files {
&self.files
}
fn python_version(&self) -> ruff_python_ast::PythonVersion {
Program::get(self).python_version(self)
}
}
#[salsa::db]
impl salsa::Database for ProjectDatabase {
fn salsa_event(&self, event: &dyn Fn() -> Event) {
if !tracing::enabled!(tracing::Level::TRACE) {
return;
}
let event = event();
if matches!(event.kind, salsa::EventKind::WillCheckCancellation) {
return;
}
tracing::trace!("Salsa event: {event:?}");
}
}
#[salsa::db]
impl Db for ProjectDatabase {
fn project(&self) -> Project {
self.project.unwrap()
}
}
#[cfg(feature = "format")]
mod format {
use crate::ProjectDatabase;
use ruff_db::files::File;
use ruff_db::Upcast;
use ruff_python_formatter::{Db as FormatDb, PyFormatOptions};
#[salsa::db]
impl FormatDb for ProjectDatabase {
fn format_options(&self, file: File) -> PyFormatOptions {
let source_ty = file.source_type(self);
PyFormatOptions::from_source_type(source_ty)
}
}
impl Upcast<dyn FormatDb> for ProjectDatabase {
fn upcast(&self) -> &(dyn FormatDb + 'static) {
self
}
fn upcast_mut(&mut self) -> &mut (dyn FormatDb + 'static) {
self
}
}
}
#[cfg(test)]
pub(crate) mod tests {
use std::sync::Arc;
use salsa::Event;
use ruff_db::files::Files;
use ruff_db::system::{DbWithTestSystem, System, TestSystem};
use ruff_db::vendored::VendoredFileSystem;
use ruff_db::{Db as SourceDb, Upcast};
use ty_python_semantic::lint::{LintRegistry, RuleSelection};
use ty_python_semantic::{Db as SemanticDb, Program};
use crate::db::Db;
use crate::DEFAULT_LINT_REGISTRY;
use crate::{Project, ProjectMetadata};
#[salsa::db]
#[derive(Clone)]
pub(crate) struct TestDb {
storage: salsa::Storage<Self>,
events: Arc<std::sync::Mutex<Vec<Event>>>,
files: Files,
system: TestSystem,
vendored: VendoredFileSystem,
project: Option<Project>,
}
impl TestDb {
pub(crate) fn new(project: ProjectMetadata) -> Self {
let mut db = Self {
storage: salsa::Storage::default(),
system: TestSystem::default(),
vendored: ty_vendored::file_system().clone(),
files: Files::default(),
events: Arc::default(),
project: None,
};
let project = Project::from_metadata(&db, project);
db.project = Some(project);
db
}
}
impl TestDb {
/// Takes the salsa events.
///
/// ## Panics
/// If there are any pending salsa snapshots.
pub(crate) fn take_salsa_events(&mut self) -> Vec<salsa::Event> {
let inner = Arc::get_mut(&mut self.events).expect("no pending salsa snapshots");
let events = inner.get_mut().unwrap();
std::mem::take(&mut *events)
}
}
impl DbWithTestSystem for TestDb {
fn test_system(&self) -> &TestSystem {
&self.system
}
fn test_system_mut(&mut self) -> &mut TestSystem {
&mut self.system
}
}
#[salsa::db]
impl SourceDb for TestDb {
fn vendored(&self) -> &VendoredFileSystem {
&self.vendored
}
fn system(&self) -> &dyn System {
&self.system
}
fn files(&self) -> &Files {
&self.files
}
fn python_version(&self) -> ruff_python_ast::PythonVersion {
Program::get(self).python_version(self)
}
}
impl Upcast<dyn SemanticDb> for TestDb {
fn upcast(&self) -> &(dyn SemanticDb + 'static) {
self
}
fn upcast_mut(&mut self) -> &mut (dyn SemanticDb + 'static) {
self
}
}
impl Upcast<dyn SourceDb> for TestDb {
fn upcast(&self) -> &(dyn SourceDb + 'static) {
self
}
fn upcast_mut(&mut self) -> &mut (dyn SourceDb + 'static) {
self
}
}
#[salsa::db]
impl ty_python_semantic::Db for TestDb {
fn is_file_open(&self, file: ruff_db::files::File) -> bool {
!file.path(self).is_vendored_path()
}
fn rule_selection(&self) -> Arc<RuleSelection> {
self.project().rules(self)
}
fn lint_registry(&self) -> &LintRegistry {
&DEFAULT_LINT_REGISTRY
}
}
#[salsa::db]
impl Db for TestDb {
fn project(&self) -> Project {
self.project.unwrap()
}
}
#[salsa::db]
impl salsa::Database for TestDb {
fn salsa_event(&self, event: &dyn Fn() -> Event) {
let mut events = self.events.lock().unwrap();
events.push(event());
}
}
}

View file

@ -0,0 +1,240 @@
use crate::db::{Db, ProjectDatabase};
use crate::metadata::options::Options;
use crate::watch::{ChangeEvent, CreatedKind, DeletedKind};
use crate::{Project, ProjectMetadata};
use std::collections::BTreeSet;
use crate::walk::ProjectFilesWalker;
use ruff_db::files::{File, Files};
use ruff_db::system::SystemPath;
use ruff_db::Db as _;
use rustc_hash::FxHashSet;
use ty_python_semantic::Program;
impl ProjectDatabase {
#[tracing::instrument(level = "debug", skip(self, changes, cli_options))]
pub fn apply_changes(&mut self, changes: Vec<ChangeEvent>, cli_options: Option<&Options>) {
let mut project = self.project();
let project_root = project.root(self).to_path_buf();
let program = Program::get(self);
let custom_stdlib_versions_path = program
.custom_stdlib_search_path(self)
.map(|path| path.join("VERSIONS"));
// Are there structural changes to the project
let mut project_changed = false;
// Changes to a custom stdlib path's VERSIONS
let mut custom_stdlib_change = false;
// Paths that were added
let mut added_paths = FxHashSet::default();
// Deduplicate the `sync` calls. Many file watchers emit multiple events for the same path.
let mut synced_files = FxHashSet::default();
let mut sync_recursively = BTreeSet::default();
let mut sync_path = |db: &mut ProjectDatabase, path: &SystemPath| {
if synced_files.insert(path.to_path_buf()) {
File::sync_path(db, path);
}
};
for change in changes {
tracing::trace!("Handle change: {:?}", change);
if let Some(path) = change.system_path() {
if matches!(
path.file_name(),
Some(".gitignore" | ".ignore" | "ty.toml" | "pyproject.toml")
) {
// Changes to ignore files or settings can change the project structure or add/remove files.
project_changed = true;
continue;
}
if Some(path) == custom_stdlib_versions_path.as_deref() {
custom_stdlib_change = true;
}
}
match change {
ChangeEvent::Changed { path, kind: _ } | ChangeEvent::Opened(path) => {
sync_path(self, &path);
}
ChangeEvent::Created { kind, path } => {
match kind {
CreatedKind::File => sync_path(self, &path),
CreatedKind::Directory | CreatedKind::Any => {
sync_recursively.insert(path.clone());
}
}
// Unlike other files, it's not only important to update the status of existing
// and known `File`s (`sync_recursively`), it's also important to discover new files
// that were added in the project's root (or any of the paths included for checking).
//
// This is important because `Project::check` iterates over all included files.
// The code below walks the `added_paths` and adds all files that
// should be included in the project. We can skip this check for
// paths that aren't part of the project or shouldn't be included
// when checking the project.
if project.is_path_included(self, &path) {
if self.system().is_file(&path) {
// Add the parent directory because `walkdir` always visits explicitly passed files
// even if they match an exclude filter.
added_paths.insert(path.parent().unwrap().to_path_buf());
} else {
added_paths.insert(path);
}
}
}
ChangeEvent::Deleted { kind, path } => {
let is_file = match kind {
DeletedKind::File => true,
DeletedKind::Directory => {
// file watchers emit an event for every deleted file. No need to scan the entire dir.
continue;
}
DeletedKind::Any => self
.files
.try_system(self, &path)
.is_some_and(|file| file.exists(self)),
};
if is_file {
sync_path(self, &path);
if let Some(file) = self.files().try_system(self, &path) {
project.remove_file(self, file);
}
} else {
sync_recursively.insert(path.clone());
if custom_stdlib_versions_path
.as_ref()
.is_some_and(|versions_path| versions_path.starts_with(&path))
{
custom_stdlib_change = true;
}
if project.is_path_included(self, &path) || path == project_root {
// TODO: Shouldn't it be enough to simply traverse the project files and remove all
// that start with the given path?
tracing::debug!(
"Reload project because of a path that could have been a directory."
);
// Perform a full-reload in case the deleted directory contained the pyproject.toml.
// We may want to make this more clever in the future, to e.g. iterate over the
// indexed files and remove the once that start with the same path, unless
// the deleted path is the project configuration.
project_changed = true;
}
}
}
ChangeEvent::CreatedVirtual(path) | ChangeEvent::ChangedVirtual(path) => {
File::sync_virtual_path(self, &path);
}
ChangeEvent::DeletedVirtual(path) => {
if let Some(virtual_file) = self.files().try_virtual_file(&path) {
virtual_file.close(self);
}
}
ChangeEvent::Rescan => {
project_changed = true;
Files::sync_all(self);
sync_recursively.clear();
break;
}
}
}
let sync_recursively = sync_recursively.into_iter();
let mut last = None;
for path in sync_recursively {
// Avoid re-syncing paths that are sub-paths of each other.
if let Some(last) = &last {
if path.starts_with(last) {
continue;
}
}
Files::sync_recursively(self, &path);
last = Some(path);
}
if project_changed {
match ProjectMetadata::discover(&project_root, self.system()) {
Ok(mut metadata) => {
if let Some(cli_options) = cli_options {
metadata.apply_cli_options(cli_options.clone());
}
if let Err(error) = metadata.apply_configuration_files(self.system()) {
tracing::error!(
"Failed to apply configuration files, continuing without applying them: {error}"
);
}
let program_settings = metadata.to_program_settings(self.system());
let program = Program::get(self);
if let Err(error) = program.update_from_settings(self, program_settings) {
tracing::error!("Failed to update the program settings, keeping the old program settings: {error}");
}
if metadata.root() == project.root(self) {
tracing::debug!("Reloading project after structural change");
project.reload(self, metadata);
} else {
tracing::debug!("Replace project after structural change");
project = Project::from_metadata(self, metadata);
self.project = Some(project);
}
}
Err(error) => {
tracing::error!(
"Failed to load project, keeping old project configuration: {error}"
);
}
}
return;
} else if custom_stdlib_change {
let search_paths = project
.metadata(self)
.to_program_settings(self.system())
.search_paths;
if let Err(error) = program.update_search_paths(self, &search_paths) {
tracing::error!("Failed to set the new search paths: {error}");
}
}
let diagnostics = if let Some(walker) = ProjectFilesWalker::incremental(self, added_paths) {
// Use directory walking to discover newly added files.
let (files, diagnostics) = walker.collect_vec(self);
for file in files {
project.add_file(self, file);
}
diagnostics
} else {
Vec::new()
};
// Note: We simply replace all IO related diagnostics here. This isn't ideal, because
// it removes IO errors that may still be relevant. However, tracking IO errors correctly
// across revisions doesn't feel essential, considering that they're rare. However, we could
// implement a `BTreeMap` or similar and only prune the diagnostics from paths that we've
// re-scanned (or that were removed etc).
project.replace_index_diagnostics(self, diagnostics);
}
}

View file

@ -0,0 +1,296 @@
use std::marker::PhantomData;
use std::ops::Deref;
use std::sync::Arc;
use rustc_hash::FxHashSet;
use salsa::Setter;
use ruff_db::files::File;
use crate::db::Db;
use crate::{IOErrorDiagnostic, Project};
/// The indexed files of a project.
///
/// The indexing happens lazily, but the files are then cached for subsequent reads.
///
/// ## Implementation
/// The implementation uses internal mutability to transition between the lazy and indexed state
/// without triggering a new salsa revision. This is safe because the initial indexing happens on first access,
/// so no query can be depending on the contents of the indexed files before that. All subsequent mutations to
/// the indexed files must go through `IndexedMut`, which uses the Salsa setter `package.set_file_set` to
/// ensure that Salsa always knows when the set of indexed files have changed.
#[derive(Debug)]
pub struct IndexedFiles {
state: std::sync::Mutex<State>,
}
impl IndexedFiles {
pub fn lazy() -> Self {
Self {
state: std::sync::Mutex::new(State::Lazy),
}
}
fn indexed(inner: Arc<IndexedInner>) -> Self {
Self {
state: std::sync::Mutex::new(State::Indexed(inner)),
}
}
pub(super) fn get(&self) -> Index {
let state = self.state.lock().unwrap();
match &*state {
State::Lazy => Index::Lazy(LazyFiles { files: state }),
State::Indexed(inner) => Index::Indexed(Indexed {
inner: Arc::clone(inner),
_lifetime: PhantomData,
}),
}
}
pub(super) fn is_lazy(&self) -> bool {
matches!(*self.state.lock().unwrap(), State::Lazy)
}
/// Returns a mutable view on the index that allows cheap in-place mutations.
///
/// The changes are automatically written back to the database once the view is dropped.
pub(super) fn indexed_mut(db: &mut dyn Db, project: Project) -> Option<IndexedMut> {
// Calling `zalsa_mut` cancels all pending salsa queries. This ensures that there are no pending
// reads to the file set.
// TODO: Use a non-internal API instead https://salsa.zulipchat.com/#narrow/stream/333573-salsa-3.2E0/topic/Expose.20an.20API.20to.20cancel.20other.20queries
let _ = db.as_dyn_database_mut().zalsa_mut();
// Replace the state with lazy. The `IndexedMut` guard restores the state
// to `State::Indexed` or sets a new `PackageFiles` when it gets dropped to ensure the state
// is restored to how it has been before replacing the value.
//
// It isn't necessary to hold on to the lock after this point:
// * The above call to `zalsa_mut` guarantees that there's exactly **one** DB reference.
// * `Indexed` has a `'db` lifetime, and this method requires a `&mut db`.
// This means that there can't be any pending reference to `Indexed` because Rust
// doesn't allow borrowing `db` as mutable (to call this method) and immutable (`Indexed<'db>`) at the same time.
// There can't be any other `Indexed<'db>` references created by clones of this DB because
// all clones must have been dropped at this point and the `Indexed`
// can't outlive the database (constrained by the `db` lifetime).
let state = {
let files = project.file_set(db);
let mut locked = files.state.lock().unwrap();
std::mem::replace(&mut *locked, State::Lazy)
};
let indexed = match state {
// If it's already lazy, just return. We also don't need to restore anything because the
// replace above was a no-op.
State::Lazy => return None,
State::Indexed(indexed) => indexed,
};
Some(IndexedMut {
db: Some(db),
project,
indexed,
did_change: false,
})
}
}
impl Default for IndexedFiles {
fn default() -> Self {
Self::lazy()
}
}
#[derive(Debug)]
enum State {
/// The files of a package haven't been indexed yet.
Lazy,
/// The files are indexed. Stores the known files of a package.
Indexed(Arc<IndexedInner>),
}
pub(super) enum Index<'db> {
/// The index has not yet been computed. Allows inserting the files.
Lazy(LazyFiles<'db>),
Indexed(Indexed<'db>),
}
/// Package files that have not been indexed yet.
pub(super) struct LazyFiles<'db> {
files: std::sync::MutexGuard<'db, State>,
}
impl<'db> LazyFiles<'db> {
/// Sets the indexed files of a package to `files`.
pub(super) fn set(
mut self,
files: FxHashSet<File>,
diagnostics: Vec<IOErrorDiagnostic>,
) -> Indexed<'db> {
let files = Indexed {
inner: Arc::new(IndexedInner { files, diagnostics }),
_lifetime: PhantomData,
};
*self.files = State::Indexed(Arc::clone(&files.inner));
files
}
}
/// The indexed files of the project.
///
/// Note: This type is intentionally non-cloneable. Making it cloneable requires
/// revisiting the locking behavior in [`IndexedFiles::indexed_mut`].
#[derive(Debug)]
pub struct Indexed<'db> {
inner: Arc<IndexedInner>,
// Preserve the lifetime of `PackageFiles`.
_lifetime: PhantomData<&'db ()>,
}
#[derive(Debug)]
struct IndexedInner {
files: FxHashSet<File>,
diagnostics: Vec<IOErrorDiagnostic>,
}
impl Indexed<'_> {
pub(super) fn diagnostics(&self) -> &[IOErrorDiagnostic] {
&self.inner.diagnostics
}
}
impl Deref for Indexed<'_> {
type Target = FxHashSet<File>;
fn deref(&self) -> &Self::Target {
&self.inner.files
}
}
pub(super) type IndexedIter<'a> = std::iter::Copied<std::collections::hash_set::Iter<'a, File>>;
impl<'a> IntoIterator for &'a Indexed<'_> {
type Item = File;
type IntoIter = IndexedIter<'a>;
fn into_iter(self) -> Self::IntoIter {
self.inner.files.iter().copied()
}
}
/// A Mutable view of a project's indexed files.
///
/// Allows in-place mutation of the files without deep cloning the hash set.
/// The changes are written back when the mutable view is dropped or by calling [`Self::set`] manually.
pub(super) struct IndexedMut<'db> {
db: Option<&'db mut dyn Db>,
project: Project,
indexed: Arc<IndexedInner>,
did_change: bool,
}
impl IndexedMut<'_> {
pub(super) fn insert(&mut self, file: File) -> bool {
if self.inner_mut().files.insert(file) {
self.did_change = true;
true
} else {
false
}
}
pub(super) fn remove(&mut self, file: File) -> bool {
if self.inner_mut().files.remove(&file) {
self.did_change = true;
true
} else {
false
}
}
pub(super) fn set_diagnostics(&mut self, diagnostics: Vec<IOErrorDiagnostic>) {
self.inner_mut().diagnostics = diagnostics;
}
fn inner_mut(&mut self) -> &mut IndexedInner {
Arc::get_mut(&mut self.indexed)
.expect("All references to `FilesSet` should have been dropped")
}
fn set_impl(&mut self) {
let Some(db) = self.db.take() else {
return;
};
let indexed = Arc::clone(&self.indexed);
if self.did_change {
// If there are changes, set the new file_set to trigger a salsa revision change.
self.project
.set_file_set(db)
.to(IndexedFiles::indexed(indexed));
} else {
// The `indexed_mut` replaced the `state` with Lazy. Restore it back to the indexed state.
*self.project.file_set(db).state.lock().unwrap() = State::Indexed(indexed);
}
}
}
impl Drop for IndexedMut<'_> {
fn drop(&mut self) {
self.set_impl();
}
}
#[cfg(test)]
mod tests {
use rustc_hash::FxHashSet;
use crate::db::tests::TestDb;
use crate::db::Db;
use crate::files::Index;
use crate::ProjectMetadata;
use ruff_db::files::system_path_to_file;
use ruff_db::system::{DbWithWritableSystem as _, SystemPathBuf};
use ruff_python_ast::name::Name;
#[test]
fn re_entrance() -> anyhow::Result<()> {
let metadata = ProjectMetadata::new(Name::new_static("test"), SystemPathBuf::from("/test"));
let mut db = TestDb::new(metadata);
db.write_file("test.py", "")?;
let project = db.project();
let file = system_path_to_file(&db, "test.py").unwrap();
let files = match project.file_set(&db).get() {
Index::Lazy(lazy) => lazy.set(FxHashSet::from_iter([file]), Vec::new()),
Index::Indexed(files) => files,
};
// Calling files a second time should not dead-lock.
// This can e.g. happen when `check_file` iterates over all files and
// `is_file_open` queries the open files.
let files_2 = project.file_set(&db).get();
match files_2 {
Index::Lazy(_) => {
panic!("Expected indexed files, got lazy files");
}
Index::Indexed(files_2) => {
assert_eq!(
files_2.iter().collect::<Vec<_>>(),
files.iter().collect::<Vec<_>>()
);
}
}
Ok(())
}
}

View file

@ -0,0 +1,721 @@
#![allow(clippy::ref_option)]
use crate::metadata::options::OptionDiagnostic;
use crate::walk::{ProjectFilesFilter, ProjectFilesWalker};
pub use db::{Db, ProjectDatabase};
use files::{Index, Indexed, IndexedFiles};
use metadata::settings::Settings;
pub use metadata::{ProjectDiscoveryError, ProjectMetadata};
use ruff_db::diagnostic::{
create_parse_diagnostic, create_unsupported_syntax_diagnostic, Annotation, Diagnostic,
DiagnosticId, Severity, Span, SubDiagnostic,
};
use ruff_db::files::File;
use ruff_db::parsed::parsed_module;
use ruff_db::source::{source_text, SourceTextError};
use ruff_db::system::{SystemPath, SystemPathBuf};
use rustc_hash::FxHashSet;
use salsa::Durability;
use salsa::Setter;
use std::backtrace::BacktraceStatus;
use std::panic::{AssertUnwindSafe, UnwindSafe};
use std::sync::Arc;
use thiserror::Error;
use tracing::error;
use ty_python_semantic::lint::{LintRegistry, LintRegistryBuilder, RuleSelection};
use ty_python_semantic::register_lints;
use ty_python_semantic::types::check_types;
pub mod combine;
mod db;
mod files;
pub mod metadata;
mod walk;
pub mod watch;
pub static DEFAULT_LINT_REGISTRY: std::sync::LazyLock<LintRegistry> =
std::sync::LazyLock::new(default_lints_registry);
pub fn default_lints_registry() -> LintRegistry {
let mut builder = LintRegistryBuilder::default();
register_lints(&mut builder);
builder.build()
}
/// The project as a Salsa ingredient.
///
/// ## How is a project different from a program?
/// There are two (related) motivations:
///
/// 1. Program is defined in `ruff_db` and it can't reference the settings types for the linter and formatter
/// without introducing a cyclic dependency. The project is defined in a higher level crate
/// where it can reference these setting types.
/// 2. Running `ruff check` with different target versions results in different programs (settings) but
/// it remains the same project. That's why program is a narrowed view of the project only
/// holding on to the most fundamental settings required for checking.
#[salsa::input]
pub struct Project {
/// The files that are open in the project.
///
/// Setting the open files to a non-`None` value changes `check` to only check the
/// open files rather than all files in the project.
#[return_ref]
#[default]
open_fileset: Option<Arc<FxHashSet<File>>>,
/// The first-party files of this project.
#[default]
#[return_ref]
file_set: IndexedFiles,
/// The metadata describing the project, including the unresolved options.
#[return_ref]
pub metadata: ProjectMetadata,
/// The resolved project settings.
#[return_ref]
pub settings: Settings,
/// The paths that should be included when checking this project.
///
/// The default (when this list is empty) is to include all files in the project root
/// (that satisfy the configured include and exclude patterns).
/// However, it's sometimes desired to only check a subset of the project, e.g. to see
/// the diagnostics for a single file or a folder.
///
/// This list gets initialized by the paths passed to `ty check <paths>`
///
/// ## How is this different from `open_files`?
///
/// The `included_paths` is closely related to `open_files`. The only difference is that
/// `open_files` is already a resolved set of files whereas `included_paths` is only a list of paths
/// that are resolved to files by indexing them. The other difference is that
/// new files added to any directory in `included_paths` will be indexed and added to the project
/// whereas `open_files` needs to be updated manually (e.g. by the IDE).
///
/// In short, `open_files` is cheaper in contexts where the set of files is known, like
/// in an IDE when the user only wants to check the open tabs. This could be modeled
/// with `included_paths` too but it would require an explicit walk dir step that's simply unnecessary.
#[default]
#[return_ref]
included_paths_list: Vec<SystemPathBuf>,
/// Diagnostics that were generated when resolving the project settings.
#[return_ref]
settings_diagnostics: Vec<OptionDiagnostic>,
}
#[salsa::tracked]
impl Project {
pub fn from_metadata(db: &dyn Db, metadata: ProjectMetadata) -> Self {
let (settings, settings_diagnostics) = metadata.options().to_settings(db);
Project::builder(metadata, settings, settings_diagnostics)
.durability(Durability::MEDIUM)
.open_fileset_durability(Durability::LOW)
.file_set_durability(Durability::LOW)
.new(db)
}
pub fn root(self, db: &dyn Db) -> &SystemPath {
self.metadata(db).root()
}
pub fn name(self, db: &dyn Db) -> &str {
self.metadata(db).name()
}
/// Returns the resolved linter rules for the project.
///
/// This is a salsa query to prevent re-computing queries if other, unrelated
/// settings change. For example, we don't want that changing the terminal settings
/// invalidates any type checking queries.
#[salsa::tracked]
pub fn rules(self, db: &dyn Db) -> Arc<RuleSelection> {
self.settings(db).to_rules()
}
/// Returns `true` if `path` is both part of the project and included (see `included_paths_list`).
///
/// Unlike [Self::files], this method does not respect `.gitignore` files. It only checks
/// the project's include and exclude settings as well as the paths that were passed to `ty check <paths>`.
/// This means, that this method is an over-approximation of `Self::files` and may return `true` for paths
/// that won't be included when checking the project because they're ignored in a `.gitignore` file.
pub fn is_path_included(self, db: &dyn Db, path: &SystemPath) -> bool {
ProjectFilesFilter::from_project(db, self).is_included(path)
}
pub fn reload(self, db: &mut dyn Db, metadata: ProjectMetadata) {
tracing::debug!("Reloading project");
assert_eq!(self.root(db), metadata.root());
if &metadata != self.metadata(db) {
let (settings, settings_diagnostics) = metadata.options().to_settings(db);
if self.settings(db) != &settings {
self.set_settings(db).to(settings);
}
if self.settings_diagnostics(db) != &settings_diagnostics {
self.set_settings_diagnostics(db).to(settings_diagnostics);
}
self.set_metadata(db).to(metadata);
}
self.reload_files(db);
}
/// Checks all open files in the project and its dependencies.
pub(crate) fn check(self, db: &ProjectDatabase) -> Vec<Diagnostic> {
let project_span = tracing::debug_span!("Project::check");
let _span = project_span.enter();
tracing::debug!("Checking project '{name}'", name = self.name(db));
let mut diagnostics: Vec<Diagnostic> = Vec::new();
diagnostics.extend(
self.settings_diagnostics(db)
.iter()
.map(OptionDiagnostic::to_diagnostic),
);
let files = ProjectFiles::new(db, self);
diagnostics.extend(
files
.diagnostics()
.iter()
.map(IOErrorDiagnostic::to_diagnostic),
);
let file_diagnostics = Arc::new(std::sync::Mutex::new(vec![]));
{
let file_diagnostics = Arc::clone(&file_diagnostics);
let db = db.clone();
let project_span = project_span.clone();
rayon::scope(move |scope| {
for file in &files {
let result = Arc::clone(&file_diagnostics);
let db = db.clone();
let project_span = project_span.clone();
scope.spawn(move |_| {
let check_file_span =
tracing::debug_span!(parent: &project_span, "check_file", ?file);
let _entered = check_file_span.entered();
let file_diagnostics = check_file_impl(&db, file);
result.lock().unwrap().extend(file_diagnostics);
});
}
});
}
let mut file_diagnostics = Arc::into_inner(file_diagnostics)
.unwrap()
.into_inner()
.unwrap();
// We sort diagnostics in a way that keeps them in source order
// and grouped by file. After that, we fall back to severity
// (with fatal messages sorting before info messages) and then
// finally the diagnostic ID.
file_diagnostics.sort_by(|d1, d2| {
if let (Some(span1), Some(span2)) = (d1.primary_span(), d2.primary_span()) {
let order = span1
.file()
.path(db)
.as_str()
.cmp(span2.file().path(db).as_str());
if order.is_ne() {
return order;
}
if let (Some(range1), Some(range2)) = (span1.range(), span2.range()) {
let order = range1.start().cmp(&range2.start());
if order.is_ne() {
return order;
}
}
}
// Reverse so that, e.g., Fatal sorts before Info.
let order = d1.severity().cmp(&d2.severity()).reverse();
if order.is_ne() {
return order;
}
d1.id().cmp(&d2.id())
});
diagnostics.extend(file_diagnostics);
diagnostics
}
pub(crate) fn check_file(self, db: &dyn Db, file: File) -> Vec<Diagnostic> {
let mut file_diagnostics: Vec<_> = self
.settings_diagnostics(db)
.iter()
.map(OptionDiagnostic::to_diagnostic)
.collect();
let check_diagnostics = check_file_impl(db, file);
file_diagnostics.extend(check_diagnostics);
file_diagnostics
}
/// Opens a file in the project.
///
/// This changes the behavior of `check` to only check the open files rather than all files in the project.
pub fn open_file(self, db: &mut dyn Db, file: File) {
tracing::debug!("Opening file `{}`", file.path(db));
let mut open_files = self.take_open_files(db);
open_files.insert(file);
self.set_open_files(db, open_files);
}
/// Closes a file in the project.
pub fn close_file(self, db: &mut dyn Db, file: File) -> bool {
tracing::debug!("Closing file `{}`", file.path(db));
let mut open_files = self.take_open_files(db);
let removed = open_files.remove(&file);
if removed {
self.set_open_files(db, open_files);
}
removed
}
pub fn set_included_paths(self, db: &mut dyn Db, paths: Vec<SystemPathBuf>) {
tracing::debug!("Setting included paths: {paths}", paths = paths.len());
self.set_included_paths_list(db).to(paths);
self.reload_files(db);
}
/// Returns the paths that should be checked.
///
/// The default is to check the entire project in which case this method returns
/// the project root. However, users can specify to only check specific sub-folders or
/// even files of a project by using `ty check <paths>`. In that case, this method
/// returns the provided absolute paths.
///
/// Note: The CLI doesn't prohibit users from specifying paths outside the project root.
/// This can be useful to check arbitrary files, but it isn't something we recommend.
/// We should try to support this use case but it's okay if there are some limitations around it.
fn included_paths_or_root(self, db: &dyn Db) -> &[SystemPathBuf] {
match &**self.included_paths_list(db) {
[] => std::slice::from_ref(&self.metadata(db).root),
paths => paths,
}
}
/// Returns the open files in the project or `None` if the entire project should be checked.
pub fn open_files(self, db: &dyn Db) -> Option<&FxHashSet<File>> {
self.open_fileset(db).as_deref()
}
/// Sets the open files in the project.
///
/// This changes the behavior of `check` to only check the open files rather than all files in the project.
#[tracing::instrument(level = "debug", skip(self, db))]
pub fn set_open_files(self, db: &mut dyn Db, open_files: FxHashSet<File>) {
tracing::debug!("Set open project files (count: {})", open_files.len());
self.set_open_fileset(db).to(Some(Arc::new(open_files)));
}
/// This takes the open files from the project and returns them.
///
/// This changes the behavior of `check` to check all files in the project instead of just the open files.
fn take_open_files(self, db: &mut dyn Db) -> FxHashSet<File> {
tracing::debug!("Take open project files");
// Salsa will cancel any pending queries and remove its own reference to `open_files`
// so that the reference counter to `open_files` now drops to 1.
let open_files = self.set_open_fileset(db).to(None);
if let Some(open_files) = open_files {
Arc::try_unwrap(open_files).unwrap()
} else {
FxHashSet::default()
}
}
/// Returns `true` if the file is open in the project.
///
/// A file is considered open when:
/// * explicitly set as an open file using [`open_file`](Self::open_file)
/// * It has a [`SystemPath`] and belongs to a package's `src` files
/// * It has a [`SystemVirtualPath`](ruff_db::system::SystemVirtualPath)
pub fn is_file_open(self, db: &dyn Db, file: File) -> bool {
let path = file.path(db);
// Try to return early to avoid adding a dependency on `open_files` or `file_set` which
// both have a durability of `LOW`.
if path.is_vendored_path() {
return false;
}
if let Some(open_files) = self.open_files(db) {
open_files.contains(&file)
} else if file.path(db).is_system_path() {
self.files(db).contains(&file)
} else {
file.path(db).is_system_virtual_path()
}
}
#[tracing::instrument(level = "debug", skip(self, db))]
pub fn remove_file(self, db: &mut dyn Db, file: File) {
tracing::debug!(
"Removing file `{}` from project `{}`",
file.path(db),
self.name(db)
);
let Some(mut index) = IndexedFiles::indexed_mut(db, self) else {
return;
};
index.remove(file);
}
pub fn add_file(self, db: &mut dyn Db, file: File) {
tracing::debug!(
"Adding file `{}` to project `{}`",
file.path(db),
self.name(db)
);
let Some(mut index) = IndexedFiles::indexed_mut(db, self) else {
return;
};
index.insert(file);
}
/// Replaces the diagnostics from indexing the project files with `diagnostics`.
///
/// This is a no-op if the project files haven't been indexed yet.
pub fn replace_index_diagnostics(self, db: &mut dyn Db, diagnostics: Vec<IOErrorDiagnostic>) {
let Some(mut index) = IndexedFiles::indexed_mut(db, self) else {
return;
};
index.set_diagnostics(diagnostics);
}
/// Returns the files belonging to this project.
pub fn files(self, db: &dyn Db) -> Indexed<'_> {
let files = self.file_set(db);
let indexed = match files.get() {
Index::Lazy(vacant) => {
let _entered =
tracing::debug_span!("Project::index_files", project = %self.name(db))
.entered();
let walker = ProjectFilesWalker::new(db);
let (files, diagnostics) = walker.collect_set(db);
tracing::info!("Indexed {} file(s)", files.len());
vacant.set(files, diagnostics)
}
Index::Indexed(indexed) => indexed,
};
indexed
}
pub fn reload_files(self, db: &mut dyn Db) {
tracing::debug!("Reloading files for project `{}`", self.name(db));
if !self.file_set(db).is_lazy() {
// Force a re-index of the files in the next revision.
self.set_file_set(db).to(IndexedFiles::lazy());
}
}
}
fn check_file_impl(db: &dyn Db, file: File) -> Vec<Diagnostic> {
let mut diagnostics: Vec<Diagnostic> = Vec::new();
// Abort checking if there are IO errors.
let source = source_text(db.upcast(), file);
if let Some(read_error) = source.read_error() {
diagnostics.push(
IOErrorDiagnostic {
file: Some(file),
error: read_error.clone().into(),
}
.to_diagnostic(),
);
return diagnostics;
}
let parsed = parsed_module(db.upcast(), file);
diagnostics.extend(
parsed
.errors()
.iter()
.map(|error| create_parse_diagnostic(file, error)),
);
diagnostics.extend(
parsed
.unsupported_syntax_errors()
.iter()
.map(|error| create_unsupported_syntax_diagnostic(file, error)),
);
{
let db = AssertUnwindSafe(db);
match catch(&**db, file, || check_types(db.upcast(), file)) {
Ok(Some(type_check_diagnostics)) => {
diagnostics.extend(type_check_diagnostics.into_iter().cloned());
}
Ok(None) => {}
Err(diagnostic) => diagnostics.push(diagnostic),
}
}
diagnostics.sort_unstable_by_key(|diagnostic| {
diagnostic
.primary_span()
.and_then(|span| span.range())
.unwrap_or_default()
.start()
});
diagnostics
}
#[derive(Debug)]
enum ProjectFiles<'a> {
OpenFiles(&'a FxHashSet<File>),
Indexed(files::Indexed<'a>),
}
impl<'a> ProjectFiles<'a> {
fn new(db: &'a dyn Db, project: Project) -> Self {
if let Some(open_files) = project.open_files(db) {
ProjectFiles::OpenFiles(open_files)
} else {
ProjectFiles::Indexed(project.files(db))
}
}
fn diagnostics(&self) -> &[IOErrorDiagnostic] {
match self {
ProjectFiles::OpenFiles(_) => &[],
ProjectFiles::Indexed(indexed) => indexed.diagnostics(),
}
}
}
impl<'a> IntoIterator for &'a ProjectFiles<'a> {
type Item = File;
type IntoIter = ProjectFilesIter<'a>;
fn into_iter(self) -> Self::IntoIter {
match self {
ProjectFiles::OpenFiles(files) => ProjectFilesIter::OpenFiles(files.iter()),
ProjectFiles::Indexed(indexed) => ProjectFilesIter::Indexed {
files: indexed.into_iter(),
},
}
}
}
enum ProjectFilesIter<'db> {
OpenFiles(std::collections::hash_set::Iter<'db, File>),
Indexed { files: files::IndexedIter<'db> },
}
impl Iterator for ProjectFilesIter<'_> {
type Item = File;
fn next(&mut self) -> Option<Self::Item> {
match self {
ProjectFilesIter::OpenFiles(files) => files.next().copied(),
ProjectFilesIter::Indexed { files } => files.next(),
}
}
}
#[derive(Debug, Clone)]
pub struct IOErrorDiagnostic {
file: Option<File>,
error: IOErrorKind,
}
impl IOErrorDiagnostic {
fn to_diagnostic(&self) -> Diagnostic {
let mut diag = Diagnostic::new(DiagnosticId::Io, Severity::Error, &self.error);
if let Some(file) = self.file {
diag.annotate(Annotation::primary(Span::from(file)));
}
diag
}
}
#[derive(Error, Debug, Clone)]
enum IOErrorKind {
#[error(transparent)]
Walk(#[from] walk::WalkError),
#[error(transparent)]
SourceText(#[from] SourceTextError),
}
fn catch<F, R>(db: &dyn Db, file: File, f: F) -> Result<Option<R>, Diagnostic>
where
F: FnOnce() -> R + UnwindSafe,
{
match ruff_db::panic::catch_unwind(|| {
// Ignore salsa errors
salsa::Cancelled::catch(f).ok()
}) {
Ok(result) => Ok(result),
Err(error) => {
use std::fmt::Write;
let mut message = String::new();
message.push_str("Panicked");
if let Some(location) = error.location {
let _ = write!(&mut message, " at {location}");
}
let _ = write!(
&mut message,
" when checking `{file}`",
file = file.path(db)
);
if let Some(payload) = error.payload.as_str() {
let _ = write!(&mut message, ": `{payload}`");
}
let mut diagnostic = Diagnostic::new(DiagnosticId::Panic, Severity::Fatal, message);
diagnostic.sub(SubDiagnostic::new(
Severity::Info,
"This indicates a bug in ty.",
));
let report_message = "If you could open an issue at https://github.com/astral-sh/ty/issues/new?title=%5Bpanic%5D we'd be very appreciative!";
diagnostic.sub(SubDiagnostic::new(Severity::Info, report_message));
diagnostic.sub(SubDiagnostic::new(
Severity::Info,
format!(
"Platform: {os} {arch}",
os = std::env::consts::OS,
arch = std::env::consts::ARCH
),
));
diagnostic.sub(SubDiagnostic::new(
Severity::Info,
format!(
"Args: {args:?}",
args = std::env::args().collect::<Vec<_>>()
),
));
if let Some(backtrace) = error.backtrace {
match backtrace.status() {
BacktraceStatus::Disabled => {
diagnostic.sub(SubDiagnostic::new(
Severity::Info,
"run with `RUST_BACKTRACE=1` environment variable to show the full backtrace information",
));
}
BacktraceStatus::Captured => {
diagnostic.sub(SubDiagnostic::new(
Severity::Info,
format!("Backtrace:\n{backtrace}"),
));
}
_ => {}
}
}
if let Some(backtrace) = error.salsa_backtrace {
salsa::attach(db, || {
diagnostic.sub(SubDiagnostic::new(Severity::Info, backtrace.to_string()));
});
}
Err(diagnostic)
}
}
}
#[cfg(test)]
mod tests {
use crate::db::tests::TestDb;
use crate::{check_file_impl, ProjectMetadata};
use ruff_db::files::system_path_to_file;
use ruff_db::source::source_text;
use ruff_db::system::{DbWithTestSystem, DbWithWritableSystem as _, SystemPath, SystemPathBuf};
use ruff_db::testing::assert_function_query_was_not_run;
use ruff_python_ast::name::Name;
use ruff_python_ast::PythonVersion;
use ty_python_semantic::types::check_types;
use ty_python_semantic::{Program, ProgramSettings, PythonPlatform, SearchPathSettings};
#[test]
fn check_file_skips_type_checking_when_file_cant_be_read() -> ruff_db::system::Result<()> {
let project = ProjectMetadata::new(Name::new_static("test"), SystemPathBuf::from("/"));
let mut db = TestDb::new(project);
let path = SystemPath::new("test.py");
Program::from_settings(
&db,
ProgramSettings {
python_version: PythonVersion::default(),
python_platform: PythonPlatform::default(),
search_paths: SearchPathSettings::new(vec![SystemPathBuf::from(".")]),
},
)
.expect("Failed to configure program settings");
db.write_file(path, "x = 10")?;
let file = system_path_to_file(&db, path).unwrap();
// Now the file gets deleted before we had a chance to read its source text.
db.memory_file_system().remove_file(path)?;
file.sync(&mut db);
assert_eq!(source_text(&db, file).as_str(), "");
assert_eq!(
check_file_impl(&db, file)
.into_iter()
.map(|diagnostic| diagnostic.primary_message().to_string())
.collect::<Vec<_>>(),
vec!["Failed to read file: No such file or directory".to_string()]
);
let events = db.take_salsa_events();
assert_function_query_was_not_run(&db, check_types, file, &events);
// The user now creates a new file with an empty text. The source text
// content returned by `source_text` remains unchanged, but the diagnostics should get updated.
db.write_file(path, "").unwrap();
assert_eq!(source_text(&db, file).as_str(), "");
assert_eq!(
check_file_impl(&db, file)
.into_iter()
.map(|diagnostic| diagnostic.primary_message().to_string())
.collect::<Vec<_>>(),
vec![] as Vec<String>
);
Ok(())
}
}

View file

@ -0,0 +1,942 @@
use configuration_file::{ConfigurationFile, ConfigurationFileError};
use ruff_db::system::{System, SystemPath, SystemPathBuf};
use ruff_python_ast::name::Name;
use std::sync::Arc;
use thiserror::Error;
use ty_python_semantic::ProgramSettings;
use crate::combine::Combine;
use crate::metadata::pyproject::{Project, PyProject, PyProjectError, ResolveRequiresPythonError};
use crate::metadata::value::ValueSource;
use options::Options;
use options::TyTomlError;
mod configuration_file;
pub mod options;
pub mod pyproject;
pub mod settings;
pub mod value;
#[derive(Debug, PartialEq, Eq)]
#[cfg_attr(test, derive(serde::Serialize))]
pub struct ProjectMetadata {
pub(super) name: Name,
pub(super) root: SystemPathBuf,
/// The raw options
pub(super) options: Options,
/// Paths of configurations other than the project's configuration that were combined into [`Self::options`].
///
/// This field stores the paths of the configuration files, mainly for
/// knowing which files to watch for changes.
///
/// The path ordering doesn't imply precedence.
#[cfg_attr(test, serde(skip_serializing_if = "Vec::is_empty"))]
pub(super) extra_configuration_paths: Vec<SystemPathBuf>,
}
impl ProjectMetadata {
/// Creates a project with the given name and root that uses the default options.
pub fn new(name: Name, root: SystemPathBuf) -> Self {
Self {
name,
root,
extra_configuration_paths: Vec::default(),
options: Options::default(),
}
}
/// Loads a project from a `pyproject.toml` file.
pub(crate) fn from_pyproject(
pyproject: PyProject,
root: SystemPathBuf,
) -> Result<Self, ResolveRequiresPythonError> {
Self::from_options(
pyproject.tool.and_then(|tool| tool.ty).unwrap_or_default(),
root,
pyproject.project.as_ref(),
)
}
/// Loads a project from a set of options with an optional pyproject-project table.
pub fn from_options(
mut options: Options,
root: SystemPathBuf,
project: Option<&Project>,
) -> Result<Self, ResolveRequiresPythonError> {
let name = project
.and_then(|project| project.name.as_deref())
.map(|name| Name::new(&**name))
.unwrap_or_else(|| Name::new(root.file_name().unwrap_or("root")));
// If the `options` don't specify a python version but the `project.requires-python` field is set,
// use that as a lower bound instead.
if let Some(project) = project {
if options
.environment
.as_ref()
.is_none_or(|env| env.python_version.is_none())
{
if let Some(requires_python) = project.resolve_requires_python_lower_bound()? {
let mut environment = options.environment.unwrap_or_default();
environment.python_version = Some(requires_python);
options.environment = Some(environment);
}
}
}
Ok(Self {
name,
root,
options,
extra_configuration_paths: Vec::new(),
})
}
/// Discovers the closest project at `path` and returns its metadata.
///
/// The algorithm traverses upwards in the `path`'s ancestor chain and uses the following precedence
/// the resolve the project's root.
///
/// 1. The closest `pyproject.toml` with a `tool.ty` section or `ty.toml`.
/// 1. The closest `pyproject.toml`.
/// 1. Fallback to use `path` as the root and use the default settings.
pub fn discover(
path: &SystemPath,
system: &dyn System,
) -> Result<ProjectMetadata, ProjectDiscoveryError> {
tracing::debug!("Searching for a project in '{path}'");
if !system.is_directory(path) {
return Err(ProjectDiscoveryError::NotADirectory(path.to_path_buf()));
}
let mut closest_project: Option<ProjectMetadata> = None;
for project_root in path.ancestors() {
let pyproject_path = project_root.join("pyproject.toml");
let pyproject = if let Ok(pyproject_str) = system.read_to_string(&pyproject_path) {
match PyProject::from_toml_str(
&pyproject_str,
ValueSource::File(Arc::new(pyproject_path.clone())),
) {
Ok(pyproject) => Some(pyproject),
Err(error) => {
return Err(ProjectDiscoveryError::InvalidPyProject {
path: pyproject_path,
source: Box::new(error),
})
}
}
} else {
None
};
// A `ty.toml` takes precedence over a `pyproject.toml`.
let ty_toml_path = project_root.join("ty.toml");
if let Ok(ty_str) = system.read_to_string(&ty_toml_path) {
let options = match Options::from_toml_str(
&ty_str,
ValueSource::File(Arc::new(ty_toml_path.clone())),
) {
Ok(options) => options,
Err(error) => {
return Err(ProjectDiscoveryError::InvalidTyToml {
path: ty_toml_path,
source: Box::new(error),
})
}
};
if pyproject
.as_ref()
.is_some_and(|project| project.ty().is_some())
{
// TODO: Consider using a diagnostic here
tracing::warn!("Ignoring the `tool.ty` section in `{pyproject_path}` because `{ty_toml_path}` takes precedence.");
}
tracing::debug!("Found project at '{}'", project_root);
let metadata = ProjectMetadata::from_options(
options,
project_root.to_path_buf(),
pyproject
.as_ref()
.and_then(|pyproject| pyproject.project.as_ref()),
)
.map_err(|err| {
ProjectDiscoveryError::InvalidRequiresPythonConstraint {
source: err,
path: pyproject_path,
}
})?;
return Ok(metadata);
}
if let Some(pyproject) = pyproject {
let has_ty_section = pyproject.ty().is_some();
let metadata =
ProjectMetadata::from_pyproject(pyproject, project_root.to_path_buf())
.map_err(
|err| ProjectDiscoveryError::InvalidRequiresPythonConstraint {
source: err,
path: pyproject_path,
},
)?;
if has_ty_section {
tracing::debug!("Found project at '{}'", project_root);
return Ok(metadata);
}
// Not a project itself, keep looking for an enclosing project.
if closest_project.is_none() {
closest_project = Some(metadata);
}
}
}
// No project found, but maybe a pyproject.toml was found.
let metadata = if let Some(closest_project) = closest_project {
tracing::debug!(
"Project without `tool.ty` section: '{}'",
closest_project.root()
);
closest_project
} else {
tracing::debug!("The ancestor directories contain no `pyproject.toml`. Falling back to a virtual project.");
// Create a project with a default configuration
Self::new(
path.file_name().unwrap_or("root").into(),
path.to_path_buf(),
)
};
Ok(metadata)
}
pub fn root(&self) -> &SystemPath {
&self.root
}
pub fn name(&self) -> &str {
&self.name
}
pub fn options(&self) -> &Options {
&self.options
}
pub fn extra_configuration_paths(&self) -> &[SystemPathBuf] {
&self.extra_configuration_paths
}
pub fn to_program_settings(&self, system: &dyn System) -> ProgramSettings {
self.options.to_program_settings(self.root(), system)
}
/// Combine the project options with the CLI options where the CLI options take precedence.
pub fn apply_cli_options(&mut self, options: Options) {
self.options = options.combine(std::mem::take(&mut self.options));
}
/// Applies the options from the configuration files to the project's options.
///
/// This includes:
///
/// * The user-level configuration
pub fn apply_configuration_files(
&mut self,
system: &dyn System,
) -> Result<(), ConfigurationFileError> {
if let Some(user) = ConfigurationFile::user(system)? {
tracing::debug!(
"Applying user-level configuration loaded from `{path}`.",
path = user.path()
);
self.apply_configuration_file(user);
}
Ok(())
}
/// Applies a lower-precedence configuration files to the project's options.
fn apply_configuration_file(&mut self, options: ConfigurationFile) {
self.extra_configuration_paths
.push(options.path().to_owned());
self.options.combine_with(options.into_options());
}
}
#[derive(Debug, Error)]
pub enum ProjectDiscoveryError {
#[error("project path '{0}' is not a directory")]
NotADirectory(SystemPathBuf),
#[error("{path} is not a valid `pyproject.toml`: {source}")]
InvalidPyProject {
source: Box<PyProjectError>,
path: SystemPathBuf,
},
#[error("{path} is not a valid `ty.toml`: {source}")]
InvalidTyToml {
source: Box<TyTomlError>,
path: SystemPathBuf,
},
#[error("Invalid `requires-python` version specifier (`{path}`): {source}")]
InvalidRequiresPythonConstraint {
source: ResolveRequiresPythonError,
path: SystemPathBuf,
},
}
#[cfg(test)]
mod tests {
//! Integration tests for project discovery
use anyhow::{anyhow, Context};
use insta::assert_ron_snapshot;
use ruff_db::system::{SystemPathBuf, TestSystem};
use ruff_python_ast::PythonVersion;
use crate::{ProjectDiscoveryError, ProjectMetadata};
#[test]
fn project_without_pyproject() -> anyhow::Result<()> {
let system = TestSystem::default();
let root = SystemPathBuf::from("/app");
system
.memory_file_system()
.write_files_all([(root.join("foo.py"), ""), (root.join("bar.py"), "")])
.context("Failed to write files")?;
let project =
ProjectMetadata::discover(&root, &system).context("Failed to discover project")?;
assert_eq!(project.root(), &*root);
with_escaped_paths(|| {
assert_ron_snapshot!(&project, @r#"
ProjectMetadata(
name: Name("app"),
root: "/app",
options: Options(),
)
"#);
});
Ok(())
}
#[test]
fn project_with_pyproject() -> anyhow::Result<()> {
let system = TestSystem::default();
let root = SystemPathBuf::from("/app");
system
.memory_file_system()
.write_files_all([
(
root.join("pyproject.toml"),
r#"
[project]
name = "backend"
"#,
),
(root.join("db/__init__.py"), ""),
])
.context("Failed to write files")?;
let project =
ProjectMetadata::discover(&root, &system).context("Failed to discover project")?;
assert_eq!(project.root(), &*root);
with_escaped_paths(|| {
assert_ron_snapshot!(&project, @r#"
ProjectMetadata(
name: Name("backend"),
root: "/app",
options: Options(),
)
"#);
});
// Discovering the same package from a subdirectory should give the same result
let from_src = ProjectMetadata::discover(&root.join("db"), &system)
.context("Failed to discover project from src sub-directory")?;
assert_eq!(from_src, project);
Ok(())
}
#[test]
fn project_with_invalid_pyproject() -> anyhow::Result<()> {
let system = TestSystem::default();
let root = SystemPathBuf::from("/app");
system
.memory_file_system()
.write_files_all([
(
root.join("pyproject.toml"),
r#"
[project]
name = "backend"
[tool.ty
"#,
),
(root.join("db/__init__.py"), ""),
])
.context("Failed to write files")?;
let Err(error) = ProjectMetadata::discover(&root, &system) else {
return Err(anyhow!("Expected project discovery to fail because of invalid syntax in the pyproject.toml"));
};
assert_error_eq(
&error,
r#"/app/pyproject.toml is not a valid `pyproject.toml`: TOML parse error at line 5, column 29
|
5 | [tool.ty
| ^
invalid table header
expected `.`, `]`
"#,
);
Ok(())
}
#[test]
fn nested_projects_in_sub_project() -> anyhow::Result<()> {
let system = TestSystem::default();
let root = SystemPathBuf::from("/app");
system
.memory_file_system()
.write_files_all([
(
root.join("pyproject.toml"),
r#"
[project]
name = "project-root"
[tool.ty.src]
root = "src"
"#,
),
(
root.join("packages/a/pyproject.toml"),
r#"
[project]
name = "nested-project"
[tool.ty.src]
root = "src"
"#,
),
])
.context("Failed to write files")?;
let sub_project = ProjectMetadata::discover(&root.join("packages/a"), &system)?;
with_escaped_paths(|| {
assert_ron_snapshot!(sub_project, @r#"
ProjectMetadata(
name: Name("nested-project"),
root: "/app/packages/a",
options: Options(
src: Some(SrcOptions(
root: Some("src"),
)),
),
)
"#);
});
Ok(())
}
#[test]
fn nested_projects_in_root_project() -> anyhow::Result<()> {
let system = TestSystem::default();
let root = SystemPathBuf::from("/app");
system
.memory_file_system()
.write_files_all([
(
root.join("pyproject.toml"),
r#"
[project]
name = "project-root"
[tool.ty.src]
root = "src"
"#,
),
(
root.join("packages/a/pyproject.toml"),
r#"
[project]
name = "nested-project"
[tool.ty.src]
root = "src"
"#,
),
])
.context("Failed to write files")?;
let root = ProjectMetadata::discover(&root, &system)?;
with_escaped_paths(|| {
assert_ron_snapshot!(root, @r#"
ProjectMetadata(
name: Name("project-root"),
root: "/app",
options: Options(
src: Some(SrcOptions(
root: Some("src"),
)),
),
)
"#);
});
Ok(())
}
#[test]
fn nested_projects_without_ty_sections() -> anyhow::Result<()> {
let system = TestSystem::default();
let root = SystemPathBuf::from("/app");
system
.memory_file_system()
.write_files_all([
(
root.join("pyproject.toml"),
r#"
[project]
name = "project-root"
"#,
),
(
root.join("packages/a/pyproject.toml"),
r#"
[project]
name = "nested-project"
"#,
),
])
.context("Failed to write files")?;
let sub_project = ProjectMetadata::discover(&root.join("packages/a"), &system)?;
with_escaped_paths(|| {
assert_ron_snapshot!(sub_project, @r#"
ProjectMetadata(
name: Name("nested-project"),
root: "/app/packages/a",
options: Options(),
)
"#);
});
Ok(())
}
#[test]
fn nested_projects_with_outer_ty_section() -> anyhow::Result<()> {
let system = TestSystem::default();
let root = SystemPathBuf::from("/app");
system
.memory_file_system()
.write_files_all([
(
root.join("pyproject.toml"),
r#"
[project]
name = "project-root"
[tool.ty.environment]
python-version = "3.10"
"#,
),
(
root.join("packages/a/pyproject.toml"),
r#"
[project]
name = "nested-project"
"#,
),
])
.context("Failed to write files")?;
let root = ProjectMetadata::discover(&root.join("packages/a"), &system)?;
with_escaped_paths(|| {
assert_ron_snapshot!(root, @r#"
ProjectMetadata(
name: Name("project-root"),
root: "/app",
options: Options(
environment: Some(EnvironmentOptions(
r#python-version: Some("3.10"),
)),
),
)
"#);
});
Ok(())
}
/// A `ty.toml` takes precedence over any `pyproject.toml`.
///
/// However, the `pyproject.toml` is still loaded to get the project name and, in the future,
/// the requires-python constraint.
#[test]
fn project_with_ty_and_pyproject_toml() -> anyhow::Result<()> {
let system = TestSystem::default();
let root = SystemPathBuf::from("/app");
system
.memory_file_system()
.write_files_all([
(
root.join("pyproject.toml"),
r#"
[project]
name = "super-app"
requires-python = ">=3.12"
[tool.ty.src]
root = "this_option_is_ignored"
"#,
),
(
root.join("ty.toml"),
r#"
[src]
root = "src"
"#,
),
])
.context("Failed to write files")?;
let root = ProjectMetadata::discover(&root, &system)?;
with_escaped_paths(|| {
assert_ron_snapshot!(root, @r#"
ProjectMetadata(
name: Name("super-app"),
root: "/app",
options: Options(
environment: Some(EnvironmentOptions(
r#python-version: Some("3.12"),
)),
src: Some(SrcOptions(
root: Some("src"),
)),
),
)
"#);
});
Ok(())
}
#[test]
fn requires_python_major_minor() -> anyhow::Result<()> {
let system = TestSystem::default();
let root = SystemPathBuf::from("/app");
system
.memory_file_system()
.write_file_all(
root.join("pyproject.toml"),
r#"
[project]
requires-python = ">=3.12"
"#,
)
.context("Failed to write file")?;
let root = ProjectMetadata::discover(&root, &system)?;
assert_eq!(
root.options
.environment
.unwrap_or_default()
.python_version
.as_deref(),
Some(&PythonVersion::PY312)
);
Ok(())
}
#[test]
fn requires_python_major_only() -> anyhow::Result<()> {
let system = TestSystem::default();
let root = SystemPathBuf::from("/app");
system
.memory_file_system()
.write_file_all(
root.join("pyproject.toml"),
r#"
[project]
requires-python = ">=3"
"#,
)
.context("Failed to write file")?;
let root = ProjectMetadata::discover(&root, &system)?;
assert_eq!(
root.options
.environment
.unwrap_or_default()
.python_version
.as_deref(),
Some(&PythonVersion::from((3, 0)))
);
Ok(())
}
/// A `requires-python` constraint with major, minor and patch can be simplified
/// to major and minor (e.g. 3.12.1 -> 3.12).
#[test]
fn requires_python_major_minor_patch() -> anyhow::Result<()> {
let system = TestSystem::default();
let root = SystemPathBuf::from("/app");
system
.memory_file_system()
.write_file_all(
root.join("pyproject.toml"),
r#"
[project]
requires-python = ">=3.12.8"
"#,
)
.context("Failed to write file")?;
let root = ProjectMetadata::discover(&root, &system)?;
assert_eq!(
root.options
.environment
.unwrap_or_default()
.python_version
.as_deref(),
Some(&PythonVersion::PY312)
);
Ok(())
}
#[test]
fn requires_python_beta_version() -> anyhow::Result<()> {
let system = TestSystem::default();
let root = SystemPathBuf::from("/app");
system
.memory_file_system()
.write_file_all(
root.join("pyproject.toml"),
r#"
[project]
requires-python = ">= 3.13.0b0"
"#,
)
.context("Failed to write file")?;
let root = ProjectMetadata::discover(&root, &system)?;
assert_eq!(
root.options
.environment
.unwrap_or_default()
.python_version
.as_deref(),
Some(&PythonVersion::PY313)
);
Ok(())
}
#[test]
fn requires_python_greater_than_major_minor() -> anyhow::Result<()> {
let system = TestSystem::default();
let root = SystemPathBuf::from("/app");
system
.memory_file_system()
.write_file_all(
root.join("pyproject.toml"),
r#"
[project]
# This is somewhat nonsensical because 3.12.1 > 3.12 is true.
# That's why simplifying the constraint to >= 3.12 is correct
requires-python = ">3.12"
"#,
)
.context("Failed to write file")?;
let root = ProjectMetadata::discover(&root, &system)?;
assert_eq!(
root.options
.environment
.unwrap_or_default()
.python_version
.as_deref(),
Some(&PythonVersion::PY312)
);
Ok(())
}
/// `python-version` takes precedence if both `requires-python` and `python-version` are configured.
#[test]
fn requires_python_and_python_version() -> anyhow::Result<()> {
let system = TestSystem::default();
let root = SystemPathBuf::from("/app");
system
.memory_file_system()
.write_file_all(
root.join("pyproject.toml"),
r#"
[project]
requires-python = ">=3.12"
[tool.ty.environment]
python-version = "3.10"
"#,
)
.context("Failed to write file")?;
let root = ProjectMetadata::discover(&root, &system)?;
assert_eq!(
root.options
.environment
.unwrap_or_default()
.python_version
.as_deref(),
Some(&PythonVersion::PY310)
);
Ok(())
}
#[test]
fn requires_python_less_than() -> anyhow::Result<()> {
let system = TestSystem::default();
let root = SystemPathBuf::from("/app");
system
.memory_file_system()
.write_file_all(
root.join("pyproject.toml"),
r#"
[project]
requires-python = "<3.12"
"#,
)
.context("Failed to write file")?;
let Err(error) = ProjectMetadata::discover(&root, &system) else {
return Err(anyhow!("Expected project discovery to fail because the `requires-python` doesn't specify a lower bound (it only specifies an upper bound)."));
};
assert_error_eq(&error, "Invalid `requires-python` version specifier (`/app/pyproject.toml`): value `<3.12` does not contain a lower bound. Add a lower bound to indicate the minimum compatible Python version (e.g., `>=3.13`) or specify a version in `environment.python-version`.");
Ok(())
}
#[test]
fn requires_python_no_specifiers() -> anyhow::Result<()> {
let system = TestSystem::default();
let root = SystemPathBuf::from("/app");
system
.memory_file_system()
.write_file_all(
root.join("pyproject.toml"),
r#"
[project]
requires-python = ""
"#,
)
.context("Failed to write file")?;
let Err(error) = ProjectMetadata::discover(&root, &system) else {
return Err(anyhow!("Expected project discovery to fail because the `requires-python` specifiers are empty and don't define a lower bound."));
};
assert_error_eq(&error, "Invalid `requires-python` version specifier (`/app/pyproject.toml`): value `` does not contain a lower bound. Add a lower bound to indicate the minimum compatible Python version (e.g., `>=3.13`) or specify a version in `environment.python-version`.");
Ok(())
}
#[test]
fn requires_python_too_large_major_version() -> anyhow::Result<()> {
let system = TestSystem::default();
let root = SystemPathBuf::from("/app");
system
.memory_file_system()
.write_file_all(
root.join("pyproject.toml"),
r#"
[project]
requires-python = ">=999.0"
"#,
)
.context("Failed to write file")?;
let Err(error) = ProjectMetadata::discover(&root, &system) else {
return Err(anyhow!("Expected project discovery to fail because of the requires-python major version that is larger than 255."));
};
assert_error_eq(&error, "Invalid `requires-python` version specifier (`/app/pyproject.toml`): The major version `999` is larger than the maximum supported value 255");
Ok(())
}
#[track_caller]
fn assert_error_eq(error: &ProjectDiscoveryError, message: &str) {
assert_eq!(error.to_string().replace('\\', "/"), message);
}
fn with_escaped_paths<R>(f: impl FnOnce() -> R) -> R {
let mut settings = insta::Settings::clone_current();
settings.add_dynamic_redaction(".root", |content, _path| {
content.as_str().unwrap().replace('\\', "/")
});
settings.bind(f)
}
}

View file

@ -0,0 +1,69 @@
use std::sync::Arc;
use ruff_db::system::{System, SystemPath, SystemPathBuf};
use thiserror::Error;
use crate::metadata::value::ValueSource;
use super::options::{Options, TyTomlError};
/// A `ty.toml` configuration file with the options it contains.
pub(crate) struct ConfigurationFile {
path: SystemPathBuf,
options: Options,
}
impl ConfigurationFile {
/// Loads the user-level configuration file if it exists.
///
/// Returns `None` if the file does not exist or if the concept of user-level configurations
/// doesn't exist on `system`.
pub(crate) fn user(system: &dyn System) -> Result<Option<Self>, ConfigurationFileError> {
let Some(configuration_directory) = system.user_config_directory() else {
return Ok(None);
};
let ty_toml_path = configuration_directory.join("ty").join("ty.toml");
tracing::debug!(
"Searching for a user-level configuration at `{path}`",
path = &ty_toml_path
);
let Ok(ty_toml_str) = system.read_to_string(&ty_toml_path) else {
return Ok(None);
};
match Options::from_toml_str(
&ty_toml_str,
ValueSource::File(Arc::new(ty_toml_path.clone())),
) {
Ok(options) => Ok(Some(Self {
path: ty_toml_path,
options,
})),
Err(error) => Err(ConfigurationFileError::InvalidTyToml {
source: Box::new(error),
path: ty_toml_path,
}),
}
}
/// Returns the path to the configuration file.
pub(crate) fn path(&self) -> &SystemPath {
&self.path
}
pub(crate) fn into_options(self) -> Options {
self.options
}
}
#[derive(Debug, Error)]
pub enum ConfigurationFileError {
#[error("{path} is not a valid `ty.toml`: {source}")]
InvalidTyToml {
source: Box<TyTomlError>,
path: SystemPathBuf,
},
}

View file

@ -0,0 +1,420 @@
use crate::metadata::value::{RangedValue, RelativePathBuf, ValueSource, ValueSourceGuard};
use crate::Db;
use ruff_db::diagnostic::{Annotation, Diagnostic, DiagnosticFormat, DiagnosticId, Severity, Span};
use ruff_db::files::system_path_to_file;
use ruff_db::system::{System, SystemPath};
use ruff_macros::Combine;
use ruff_python_ast::PythonVersion;
use rustc_hash::FxHashMap;
use serde::{Deserialize, Serialize};
use std::fmt::Debug;
use thiserror::Error;
use ty_python_semantic::lint::{GetLintError, Level, LintSource, RuleSelection};
use ty_python_semantic::{ProgramSettings, PythonPath, PythonPlatform, SearchPathSettings};
use super::settings::{Settings, TerminalSettings};
/// The options for the project.
#[derive(Debug, Default, Clone, PartialEq, Eq, Combine, Serialize, Deserialize)]
#[serde(rename_all = "kebab-case", deny_unknown_fields)]
#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
pub struct Options {
/// Configures the type checking environment.
#[serde(skip_serializing_if = "Option::is_none")]
pub environment: Option<EnvironmentOptions>,
#[serde(skip_serializing_if = "Option::is_none")]
pub src: Option<SrcOptions>,
/// Configures the enabled lints and their severity.
#[serde(skip_serializing_if = "Option::is_none")]
pub rules: Option<Rules>,
#[serde(skip_serializing_if = "Option::is_none")]
pub terminal: Option<TerminalOptions>,
#[serde(skip_serializing_if = "Option::is_none")]
pub respect_ignore_files: Option<bool>,
}
impl Options {
pub(crate) fn from_toml_str(content: &str, source: ValueSource) -> Result<Self, TyTomlError> {
let _guard = ValueSourceGuard::new(source, true);
let options = toml::from_str(content)?;
Ok(options)
}
pub fn deserialize_with<'de, D>(source: ValueSource, deserializer: D) -> Result<Self, D::Error>
where
D: serde::Deserializer<'de>,
{
let _guard = ValueSourceGuard::new(source, false);
Self::deserialize(deserializer)
}
pub(crate) fn to_program_settings(
&self,
project_root: &SystemPath,
system: &dyn System,
) -> ProgramSettings {
let python_version = self
.environment
.as_ref()
.and_then(|env| env.python_version.as_deref().copied())
.unwrap_or_default();
let python_platform = self
.environment
.as_ref()
.and_then(|env| env.python_platform.as_deref().cloned())
.unwrap_or_else(|| {
let default = PythonPlatform::default();
tracing::info!("Defaulting to python-platform `{default}`");
default
});
ProgramSettings {
python_version,
python_platform,
search_paths: self.to_search_path_settings(project_root, system),
}
}
fn to_search_path_settings(
&self,
project_root: &SystemPath,
system: &dyn System,
) -> SearchPathSettings {
let src_roots = if let Some(src_root) = self.src.as_ref().and_then(|src| src.root.as_ref())
{
vec![src_root.absolute(project_root, system)]
} else {
let src = project_root.join("src");
// Default to `src` and the project root if `src` exists and the root hasn't been specified.
if system.is_directory(&src) {
vec![project_root.to_path_buf(), src]
} else {
vec![project_root.to_path_buf()]
}
};
let (extra_paths, python, typeshed) = self
.environment
.as_ref()
.map(|env| {
(
env.extra_paths.clone(),
env.python.clone(),
env.typeshed.clone(),
)
})
.unwrap_or_default();
SearchPathSettings {
extra_paths: extra_paths
.unwrap_or_default()
.into_iter()
.map(|path| path.absolute(project_root, system))
.collect(),
src_roots,
custom_typeshed: typeshed.map(|path| path.absolute(project_root, system)),
python_path: python
.map(|python_path| {
PythonPath::from_cli_flag(python_path.absolute(project_root, system))
})
.or_else(|| {
std::env::var("VIRTUAL_ENV")
.ok()
.map(PythonPath::from_virtual_env_var)
})
.unwrap_or_else(|| PythonPath::Discover(project_root.to_path_buf())),
}
}
#[must_use]
pub(crate) fn to_settings(&self, db: &dyn Db) -> (Settings, Vec<OptionDiagnostic>) {
let (rules, diagnostics) = self.to_rule_selection(db);
let mut settings = Settings::new(rules, self.respect_ignore_files);
if let Some(terminal) = self.terminal.as_ref() {
settings.set_terminal(TerminalSettings {
output_format: terminal
.output_format
.as_deref()
.copied()
.unwrap_or_default(),
error_on_warning: terminal.error_on_warning.unwrap_or_default(),
});
}
(settings, diagnostics)
}
#[must_use]
fn to_rule_selection(&self, db: &dyn Db) -> (RuleSelection, Vec<OptionDiagnostic>) {
let registry = db.lint_registry();
let mut diagnostics = Vec::new();
// Initialize the selection with the defaults
let mut selection = RuleSelection::from_registry(registry);
let rules = self
.rules
.as_ref()
.into_iter()
.flat_map(|rules| rules.inner.iter());
for (rule_name, level) in rules {
let source = rule_name.source();
match registry.get(rule_name) {
Ok(lint) => {
let lint_source = match source {
ValueSource::File(_) => LintSource::File,
ValueSource::Cli => LintSource::Cli,
};
if let Ok(severity) = Severity::try_from(**level) {
selection.enable(lint, severity, lint_source);
} else {
selection.disable(lint);
}
}
Err(error) => {
// `system_path_to_file` can return `Err` if the file was deleted since the configuration
// was read. This should be rare and it should be okay to default to not showing a configuration
// file in that case.
let file = source
.file()
.and_then(|path| system_path_to_file(db.upcast(), path).ok());
// TODO: Add a note if the value was configured on the CLI
let diagnostic = match error {
GetLintError::Unknown(_) => OptionDiagnostic::new(
DiagnosticId::UnknownRule,
format!("Unknown lint rule `{rule_name}`"),
Severity::Warning,
),
GetLintError::PrefixedWithCategory { suggestion, .. } => {
OptionDiagnostic::new(
DiagnosticId::UnknownRule,
format!(
"Unknown lint rule `{rule_name}`. Did you mean `{suggestion}`?"
),
Severity::Warning,
)
}
GetLintError::Removed(_) => OptionDiagnostic::new(
DiagnosticId::UnknownRule,
format!("Unknown lint rule `{rule_name}`"),
Severity::Warning,
),
};
let span = file.map(Span::from).map(|span| {
if let Some(range) = rule_name.range() {
span.with_range(range)
} else {
span
}
});
diagnostics.push(diagnostic.with_span(span));
}
}
}
(selection, diagnostics)
}
}
#[derive(Debug, Default, Clone, Eq, PartialEq, Combine, Serialize, Deserialize)]
#[serde(rename_all = "kebab-case", deny_unknown_fields)]
#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
pub struct EnvironmentOptions {
/// Specifies the version of Python that will be used to analyze the source code.
/// The version should be specified as a string in the format `M.m` where `M` is the major version
/// and `m` is the minor (e.g. "3.0" or "3.6").
/// If a version is provided, ty will generate errors if the source code makes use of language features
/// that are not supported in that version.
/// It will also tailor its use of type stub files, which conditionalizes type definitions based on the version.
#[serde(skip_serializing_if = "Option::is_none")]
pub python_version: Option<RangedValue<PythonVersion>>,
/// Specifies the target platform that will be used to analyze the source code.
/// If specified, ty will tailor its use of type stub files,
/// which conditionalize type definitions based on the platform.
///
/// If no platform is specified, ty will use the current platform:
/// - `win32` for Windows
/// - `darwin` for macOS
/// - `android` for Android
/// - `ios` for iOS
/// - `linux` for everything else
#[serde(skip_serializing_if = "Option::is_none")]
pub python_platform: Option<RangedValue<PythonPlatform>>,
/// List of user-provided paths that should take first priority in the module resolution.
/// Examples in other type checkers are mypy's MYPYPATH environment variable,
/// or pyright's stubPath configuration setting.
#[serde(skip_serializing_if = "Option::is_none")]
pub extra_paths: Option<Vec<RelativePathBuf>>,
/// Optional path to a "typeshed" directory on disk for us to use for standard-library types.
/// If this is not provided, we will fallback to our vendored typeshed stubs for the stdlib,
/// bundled as a zip file in the binary
#[serde(skip_serializing_if = "Option::is_none")]
pub typeshed: Option<RelativePathBuf>,
/// Path to the Python installation from which ty resolves type information and third-party dependencies.
///
/// ty will search in the path's `site-packages` directories for type information and
/// third-party imports.
///
/// This option is commonly used to specify the path to a virtual environment.
#[serde(skip_serializing_if = "Option::is_none")]
pub python: Option<RelativePathBuf>,
}
#[derive(Debug, Default, Clone, Eq, PartialEq, Combine, Serialize, Deserialize)]
#[serde(rename_all = "kebab-case", deny_unknown_fields)]
#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
pub struct SrcOptions {
/// The root of the project, used for finding first-party modules.
#[serde(skip_serializing_if = "Option::is_none")]
pub root: Option<RelativePathBuf>,
}
#[derive(Debug, Default, Clone, Eq, PartialEq, Combine, Serialize, Deserialize)]
#[serde(rename_all = "kebab-case", transparent)]
#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
pub struct Rules {
#[cfg_attr(feature = "schemars", schemars(with = "schema::Rules"))]
inner: FxHashMap<RangedValue<String>, RangedValue<Level>>,
}
impl FromIterator<(RangedValue<String>, RangedValue<Level>)> for Rules {
fn from_iter<T: IntoIterator<Item = (RangedValue<String>, RangedValue<Level>)>>(
iter: T,
) -> Self {
Self {
inner: iter.into_iter().collect(),
}
}
}
#[derive(Debug, Default, Clone, Eq, PartialEq, Combine, Serialize, Deserialize)]
#[serde(rename_all = "kebab-case", deny_unknown_fields)]
#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
pub struct TerminalOptions {
/// The format to use for printing diagnostic messages.
///
/// Defaults to `full`.
#[serde(skip_serializing_if = "Option::is_none")]
pub output_format: Option<RangedValue<DiagnosticFormat>>,
/// Use exit code 1 if there are any warning-level diagnostics.
///
/// Defaults to `false`.
pub error_on_warning: Option<bool>,
}
#[cfg(feature = "schemars")]
mod schema {
use crate::DEFAULT_LINT_REGISTRY;
use schemars::gen::SchemaGenerator;
use schemars::schema::{
InstanceType, Metadata, ObjectValidation, Schema, SchemaObject, SubschemaValidation,
};
use schemars::JsonSchema;
use ty_python_semantic::lint::Level;
pub(super) struct Rules;
impl JsonSchema for Rules {
fn schema_name() -> String {
"Rules".to_string()
}
fn json_schema(gen: &mut SchemaGenerator) -> Schema {
let registry = &*DEFAULT_LINT_REGISTRY;
let level_schema = gen.subschema_for::<Level>();
let properties: schemars::Map<String, Schema> = registry
.lints()
.iter()
.map(|lint| {
(
lint.name().to_string(),
Schema::Object(SchemaObject {
metadata: Some(Box::new(Metadata {
title: Some(lint.summary().to_string()),
description: Some(lint.documentation()),
deprecated: lint.status.is_deprecated(),
default: Some(lint.default_level.to_string().into()),
..Metadata::default()
})),
subschemas: Some(Box::new(SubschemaValidation {
one_of: Some(vec![level_schema.clone()]),
..Default::default()
})),
..Default::default()
}),
)
})
.collect();
Schema::Object(SchemaObject {
instance_type: Some(InstanceType::Object.into()),
object: Some(Box::new(ObjectValidation {
properties,
// Allow unknown rules: ty will warn about them.
// It gives a better experience when using an older ty version because
// the schema will not deny rules that have been removed in newer versions.
additional_properties: Some(Box::new(level_schema)),
..ObjectValidation::default()
})),
..Default::default()
})
}
}
}
#[derive(Error, Debug)]
pub enum TyTomlError {
#[error(transparent)]
TomlSyntax(#[from] toml::de::Error),
}
#[derive(Debug, PartialEq, Eq, Clone)]
pub struct OptionDiagnostic {
id: DiagnosticId,
message: String,
severity: Severity,
span: Option<Span>,
}
impl OptionDiagnostic {
pub fn new(id: DiagnosticId, message: String, severity: Severity) -> Self {
Self {
id,
message,
severity,
span: None,
}
}
#[must_use]
fn with_span(self, span: Option<Span>) -> Self {
OptionDiagnostic { span, ..self }
}
pub(crate) fn to_diagnostic(&self) -> Diagnostic {
if let Some(ref span) = self.span {
let mut diag = Diagnostic::new(self.id, self.severity, "");
diag.annotate(Annotation::primary(span.clone()).message(&self.message));
diag
} else {
Diagnostic::new(self.id, self.severity, &self.message)
}
}
}

View file

@ -0,0 +1,267 @@
use crate::metadata::options::Options;
use crate::metadata::value::{RangedValue, ValueSource, ValueSourceGuard};
use pep440_rs::{release_specifiers_to_ranges, Version, VersionSpecifiers};
use ruff_python_ast::PythonVersion;
use serde::{Deserialize, Deserializer, Serialize};
use std::collections::Bound;
use std::ops::Deref;
use thiserror::Error;
/// A `pyproject.toml` as specified in PEP 517.
#[derive(Deserialize, Serialize, Debug, Default, Clone)]
#[serde(rename_all = "kebab-case")]
pub struct PyProject {
/// PEP 621-compliant project metadata.
pub project: Option<Project>,
/// Tool-specific metadata.
pub tool: Option<Tool>,
}
impl PyProject {
pub(crate) fn ty(&self) -> Option<&Options> {
self.tool.as_ref().and_then(|tool| tool.ty.as_ref())
}
}
#[derive(Error, Debug)]
pub enum PyProjectError {
#[error(transparent)]
TomlSyntax(#[from] toml::de::Error),
}
impl PyProject {
pub(crate) fn from_toml_str(
content: &str,
source: ValueSource,
) -> Result<Self, PyProjectError> {
let _guard = ValueSourceGuard::new(source, true);
toml::from_str(content).map_err(PyProjectError::TomlSyntax)
}
}
/// PEP 621 project metadata (`project`).
///
/// See <https://packaging.python.org/en/latest/specifications/pyproject-toml>.
#[derive(Deserialize, Serialize, Debug, Clone, PartialEq)]
#[serde(rename_all = "kebab-case")]
pub struct Project {
/// The name of the project
///
/// Note: Intentionally option to be more permissive during deserialization.
/// `PackageMetadata::from_pyproject` reports missing names.
pub name: Option<RangedValue<PackageName>>,
/// The version of the project
pub version: Option<RangedValue<Version>>,
/// The Python versions this project is compatible with.
pub requires_python: Option<RangedValue<VersionSpecifiers>>,
}
impl Project {
pub(super) fn resolve_requires_python_lower_bound(
&self,
) -> Result<Option<RangedValue<PythonVersion>>, ResolveRequiresPythonError> {
let Some(requires_python) = self.requires_python.as_ref() else {
return Ok(None);
};
tracing::debug!("Resolving requires-python constraint: `{requires_python}`");
let ranges = release_specifiers_to_ranges((**requires_python).clone());
let Some((lower, _)) = ranges.bounding_range() else {
return Ok(None);
};
let version = match lower {
// Ex) `>=3.10.1` -> `>=3.10`
Bound::Included(version) => version,
// Ex) `>3.10.1` -> `>=3.10` or `>3.10` -> `>=3.10`
// The second example looks obscure at first but it is required because
// `3.10.1 > 3.10` is true but we only have two digits here. So including 3.10 is the
// right move. Overall, using `>` without a patch release is most likely bogus.
Bound::Excluded(version) => version,
// Ex) `<3.10` or ``
Bound::Unbounded => {
return Err(ResolveRequiresPythonError::NoLowerBound(
requires_python.to_string(),
))
}
};
// Take the major and minor version
let mut versions = version.release().iter().take(2);
let Some(major) = versions.next().copied() else {
return Ok(None);
};
let minor = versions.next().copied().unwrap_or_default();
tracing::debug!("Resolved requires-python constraint to: {major}.{minor}");
let major =
u8::try_from(major).map_err(|_| ResolveRequiresPythonError::TooLargeMajor(major))?;
let minor =
u8::try_from(minor).map_err(|_| ResolveRequiresPythonError::TooLargeMajor(minor))?;
Ok(Some(
requires_python
.clone()
.map_value(|_| PythonVersion::from((major, minor))),
))
}
}
#[derive(Debug, Error)]
pub enum ResolveRequiresPythonError {
#[error("The major version `{0}` is larger than the maximum supported value 255")]
TooLargeMajor(u64),
#[error("The minor version `{0}` is larger than the maximum supported value 255")]
TooLargeMinor(u64),
#[error("value `{0}` does not contain a lower bound. Add a lower bound to indicate the minimum compatible Python version (e.g., `>=3.13`) or specify a version in `environment.python-version`.")]
NoLowerBound(String),
}
#[derive(Deserialize, Serialize, Debug, Clone, PartialEq, Eq)]
#[serde(rename_all = "kebab-case")]
pub struct Tool {
pub ty: Option<Options>,
}
/// The normalized name of a package.
///
/// Converts the name to lowercase and collapses runs of `-`, `_`, and `.` down to a single `-`.
/// For example, `---`, `.`, and `__` are all converted to a single `-`.
///
/// See: <https://packaging.python.org/en/latest/specifications/name-normalization/>
#[derive(Debug, Default, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize)]
pub struct PackageName(String);
impl PackageName {
/// Create a validated, normalized package name.
pub(crate) fn new(name: String) -> Result<Self, InvalidPackageNameError> {
if name.is_empty() {
return Err(InvalidPackageNameError::Empty);
}
if name.starts_with(['-', '_', '.']) {
return Err(InvalidPackageNameError::NonAlphanumericStart(
name.chars().next().unwrap(),
));
}
if name.ends_with(['-', '_', '.']) {
return Err(InvalidPackageNameError::NonAlphanumericEnd(
name.chars().last().unwrap(),
));
}
let Some(start) = name.find(|c: char| {
!c.is_ascii() || c.is_ascii_uppercase() || matches!(c, '-' | '_' | '.')
}) else {
return Ok(Self(name));
};
let (already_normalized, maybe_normalized) = name.split_at(start);
let mut normalized = String::with_capacity(name.len());
normalized.push_str(already_normalized);
let mut last = None;
for c in maybe_normalized.chars() {
if !c.is_ascii() {
return Err(InvalidPackageNameError::InvalidCharacter(c));
}
if c.is_ascii_uppercase() {
normalized.push(c.to_ascii_lowercase());
} else if matches!(c, '-' | '_' | '.') {
if matches!(last, Some('-' | '_' | '.')) {
// Only keep a single instance of `-`, `_` and `.`
} else {
normalized.push('-');
}
} else {
normalized.push(c);
}
last = Some(c);
}
Ok(Self(normalized))
}
/// Returns the underlying package name.
pub(crate) fn as_str(&self) -> &str {
&self.0
}
}
impl From<PackageName> for String {
fn from(value: PackageName) -> Self {
value.0
}
}
impl<'de> Deserialize<'de> for PackageName {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: Deserializer<'de>,
{
let s = String::deserialize(deserializer)?;
Self::new(s).map_err(serde::de::Error::custom)
}
}
impl std::fmt::Display for PackageName {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
self.0.fmt(f)
}
}
impl Deref for PackageName {
type Target = str;
fn deref(&self) -> &Self::Target {
self.as_str()
}
}
#[derive(Error, Debug)]
pub(crate) enum InvalidPackageNameError {
#[error("name must start with letter or number but it starts with '{0}'")]
NonAlphanumericStart(char),
#[error("name must end with letter or number but it ends with '{0}'")]
NonAlphanumericEnd(char),
#[error("valid name consists only of ASCII letters and numbers, period, underscore and hyphen but name contains '{0}'"
)]
InvalidCharacter(char),
#[error("name must not be empty")]
Empty,
}
#[cfg(test)]
mod tests {
use super::PackageName;
#[test]
fn normalize() {
let inputs = [
"friendly-bard",
"Friendly-Bard",
"FRIENDLY-BARD",
"friendly.bard",
"friendly_bard",
"friendly--bard",
"friendly-.bard",
"FrIeNdLy-._.-bArD",
];
for input in inputs {
assert_eq!(
PackageName::new(input.to_string()).unwrap(),
PackageName::new("friendly-bard".to_string()).unwrap(),
);
}
}
}

View file

@ -0,0 +1,62 @@
use std::sync::Arc;
use ruff_db::diagnostic::DiagnosticFormat;
use ty_python_semantic::lint::RuleSelection;
/// The resolved [`super::Options`] for the project.
///
/// Unlike [`super::Options`], the struct has default values filled in and
/// uses representations that are optimized for reads (instead of preserving the source representation).
/// It's also not required that this structure precisely resembles the TOML schema, although
/// it's encouraged to use a similar structure.
///
/// It's worth considering to adding a salsa query for specific settings to
/// limit the blast radius when only some settings change. For example,
/// changing the terminal settings shouldn't invalidate any core type-checking queries.
/// This can be achieved by adding a salsa query for the type checking specific settings.
///
/// Settings that are part of [`ty_python_semantic::ProgramSettings`] are not included here.
#[derive(Clone, Debug, Eq, PartialEq)]
pub struct Settings {
rules: Arc<RuleSelection>,
terminal: TerminalSettings,
respect_ignore_files: bool,
}
impl Settings {
pub fn new(rules: RuleSelection, respect_ignore_files: Option<bool>) -> Self {
Self {
rules: Arc::new(rules),
terminal: TerminalSettings::default(),
respect_ignore_files: respect_ignore_files.unwrap_or(true),
}
}
pub fn rules(&self) -> &RuleSelection {
&self.rules
}
pub fn respect_ignore_files(&self) -> bool {
self.respect_ignore_files
}
pub fn to_rules(&self) -> Arc<RuleSelection> {
self.rules.clone()
}
pub fn terminal(&self) -> &TerminalSettings {
&self.terminal
}
pub fn set_terminal(&mut self, terminal: TerminalSettings) {
self.terminal = terminal;
}
}
#[derive(Debug, Clone, PartialEq, Eq, Default)]
pub struct TerminalSettings {
pub output_format: DiagnosticFormat,
pub error_on_warning: bool,
}

View file

@ -0,0 +1,346 @@
use crate::combine::Combine;
use crate::Db;
use ruff_db::system::{System, SystemPath, SystemPathBuf};
use ruff_macros::Combine;
use ruff_text_size::{TextRange, TextSize};
use serde::{Deserialize, Deserializer};
use std::cell::RefCell;
use std::cmp::Ordering;
use std::fmt;
use std::hash::{Hash, Hasher};
use std::ops::{Deref, DerefMut};
use std::sync::Arc;
use toml::Spanned;
#[derive(Clone, Debug)]
pub enum ValueSource {
/// Value loaded from a project's configuration file.
///
/// Ideally, we'd use [`ruff_db::files::File`] but we can't because the database hasn't been
/// created when loading the configuration.
File(Arc<SystemPathBuf>),
/// The value comes from a CLI argument, while it's left open if specified using a short argument,
/// long argument (`--extra-paths`) or `--config key=value`.
Cli,
}
impl ValueSource {
pub fn file(&self) -> Option<&SystemPath> {
match self {
ValueSource::File(path) => Some(&**path),
ValueSource::Cli => None,
}
}
}
thread_local! {
/// Serde doesn't provide any easy means to pass a value to a [`Deserialize`] implementation,
/// but we want to associate each deserialized [`RelativePath`] with the source from
/// which it originated. We use a thread local variable to work around this limitation.
///
/// Use the [`ValueSourceGuard`] to initialize the thread local before calling into any
/// deserialization code. It ensures that the thread local variable gets cleaned up
/// once deserialization is done (once the guard gets dropped).
static VALUE_SOURCE: RefCell<Option<(ValueSource, bool)>> = const { RefCell::new(None) };
}
/// Guard to safely change the [`VALUE_SOURCE`] for the current thread.
#[must_use]
pub(super) struct ValueSourceGuard {
prev_value: Option<(ValueSource, bool)>,
}
impl ValueSourceGuard {
pub(super) fn new(source: ValueSource, is_toml: bool) -> Self {
let prev = VALUE_SOURCE.replace(Some((source, is_toml)));
Self { prev_value: prev }
}
}
impl Drop for ValueSourceGuard {
fn drop(&mut self) {
VALUE_SOURCE.set(self.prev_value.take());
}
}
/// A value that "remembers" where it comes from (source) and its range in source.
///
/// ## Equality, Hash, and Ordering
/// The equality, hash, and ordering are solely based on the value. They disregard the value's range
/// or source.
///
/// This ensures that two resolved configurations are identical even if the position of a value has changed
/// or if the values were loaded from different sources.
#[derive(Clone, serde::Serialize)]
#[serde(transparent)]
#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
pub struct RangedValue<T> {
value: T,
#[serde(skip)]
source: ValueSource,
/// The byte range of `value` in `source`.
///
/// Can be `None` because not all sources support a range.
/// For example, arguments provided on the CLI won't have a range attached.
#[serde(skip)]
range: Option<TextRange>,
}
impl<T> RangedValue<T> {
pub fn new(value: T, source: ValueSource) -> Self {
Self::with_range(value, source, TextRange::default())
}
pub fn cli(value: T) -> Self {
Self::with_range(value, ValueSource::Cli, TextRange::default())
}
pub fn with_range(value: T, source: ValueSource, range: TextRange) -> Self {
Self {
value,
range: Some(range),
source,
}
}
pub fn range(&self) -> Option<TextRange> {
self.range
}
pub fn source(&self) -> &ValueSource {
&self.source
}
#[must_use]
pub fn with_source(mut self, source: ValueSource) -> Self {
self.source = source;
self
}
#[must_use]
pub fn map_value<R>(self, f: impl FnOnce(T) -> R) -> RangedValue<R> {
RangedValue {
value: f(self.value),
source: self.source,
range: self.range,
}
}
pub fn into_inner(self) -> T {
self.value
}
}
impl<T> Combine for RangedValue<T> {
fn combine(self, _other: Self) -> Self
where
Self: Sized,
{
self
}
fn combine_with(&mut self, _other: Self) {}
}
impl<T> IntoIterator for RangedValue<T>
where
T: IntoIterator,
{
type Item = T::Item;
type IntoIter = T::IntoIter;
fn into_iter(self) -> Self::IntoIter {
self.value.into_iter()
}
}
// The type already has an `iter` method thanks to `Deref`.
#[allow(clippy::into_iter_without_iter)]
impl<'a, T> IntoIterator for &'a RangedValue<T>
where
&'a T: IntoIterator,
{
type Item = <&'a T as IntoIterator>::Item;
type IntoIter = <&'a T as IntoIterator>::IntoIter;
fn into_iter(self) -> Self::IntoIter {
self.value.into_iter()
}
}
// The type already has a `into_iter_mut` method thanks to `DerefMut`.
#[allow(clippy::into_iter_without_iter)]
impl<'a, T> IntoIterator for &'a mut RangedValue<T>
where
&'a mut T: IntoIterator,
{
type Item = <&'a mut T as IntoIterator>::Item;
type IntoIter = <&'a mut T as IntoIterator>::IntoIter;
fn into_iter(self) -> Self::IntoIter {
self.value.into_iter()
}
}
impl<T> fmt::Debug for RangedValue<T>
where
T: fmt::Debug,
{
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
self.value.fmt(f)
}
}
impl<T> fmt::Display for RangedValue<T>
where
T: fmt::Display,
{
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
self.value.fmt(f)
}
}
impl<T> Deref for RangedValue<T> {
type Target = T;
fn deref(&self) -> &Self::Target {
&self.value
}
}
impl<T> DerefMut for RangedValue<T> {
fn deref_mut(&mut self) -> &mut T {
&mut self.value
}
}
impl<T, U: ?Sized> AsRef<U> for RangedValue<T>
where
T: AsRef<U>,
{
fn as_ref(&self) -> &U {
self.value.as_ref()
}
}
impl<T: PartialEq> PartialEq for RangedValue<T> {
fn eq(&self, other: &Self) -> bool {
self.value.eq(&other.value)
}
}
impl<T: PartialEq<T>> PartialEq<T> for RangedValue<T> {
fn eq(&self, other: &T) -> bool {
self.value.eq(other)
}
}
impl<T: Eq> Eq for RangedValue<T> {}
impl<T: Hash> Hash for RangedValue<T> {
fn hash<H: Hasher>(&self, state: &mut H) {
self.value.hash(state);
}
}
impl<T: PartialOrd> PartialOrd for RangedValue<T> {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
self.value.partial_cmp(&other.value)
}
}
impl<T: PartialOrd<T>> PartialOrd<T> for RangedValue<T> {
fn partial_cmp(&self, other: &T) -> Option<Ordering> {
self.value.partial_cmp(other)
}
}
impl<T: Ord> Ord for RangedValue<T> {
fn cmp(&self, other: &Self) -> Ordering {
self.value.cmp(&other.value)
}
}
impl<'de, T> Deserialize<'de> for RangedValue<T>
where
T: Deserialize<'de>,
{
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: Deserializer<'de>,
{
VALUE_SOURCE.with_borrow(|source| {
let (source, has_span) = source.clone().unwrap();
if has_span {
let spanned: Spanned<T> = Spanned::deserialize(deserializer)?;
let span = spanned.span();
let range = TextRange::new(
TextSize::try_from(span.start)
.expect("Configuration file to be smaller than 4GB"),
TextSize::try_from(span.end)
.expect("Configuration file to be smaller than 4GB"),
);
Ok(Self::with_range(spanned.into_inner(), source, range))
} else {
Ok(Self::new(T::deserialize(deserializer)?, source))
}
})
}
}
/// A possibly relative path in a configuration file.
///
/// Relative paths in configuration files or from CLI options
/// require different anchoring:
///
/// * CLI: The path is relative to the current working directory
/// * Configuration file: The path is relative to the project's root.
#[derive(
Debug,
Clone,
serde::Serialize,
serde::Deserialize,
PartialEq,
Eq,
PartialOrd,
Ord,
Hash,
Combine,
)]
#[serde(transparent)]
#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
pub struct RelativePathBuf(RangedValue<SystemPathBuf>);
impl RelativePathBuf {
pub fn new(path: impl AsRef<SystemPath>, source: ValueSource) -> Self {
Self(RangedValue::new(path.as_ref().to_path_buf(), source))
}
pub fn cli(path: impl AsRef<SystemPath>) -> Self {
Self::new(path, ValueSource::Cli)
}
/// Returns the relative path as specified by the user.
pub fn path(&self) -> &SystemPath {
&self.0
}
/// Returns the owned relative path.
pub fn into_path_buf(self) -> SystemPathBuf {
self.0.into_inner()
}
/// Resolves the absolute path for `self` based on its origin.
pub fn absolute_with_db(&self, db: &dyn Db) -> SystemPathBuf {
self.absolute(db.project().root(db), db.system())
}
/// Resolves the absolute path for `self` based on its origin.
pub fn absolute(&self, project_root: &SystemPath, system: &dyn System) -> SystemPathBuf {
let relative_to = match &self.0.source {
ValueSource::File(_) => project_root,
ValueSource::Cli => system.current_directory(),
};
SystemPath::absolute(&self.0, relative_to)
}
}

View file

@ -0,0 +1,260 @@
use crate::{Db, IOErrorDiagnostic, IOErrorKind, Project};
use ruff_db::files::{system_path_to_file, File};
use ruff_db::system::walk_directory::{ErrorKind, WalkDirectoryBuilder, WalkState};
use ruff_db::system::{FileType, SystemPath, SystemPathBuf};
use ruff_python_ast::PySourceType;
use rustc_hash::{FxBuildHasher, FxHashSet};
use std::path::PathBuf;
use thiserror::Error;
/// Filter that decides which files are included in the project.
///
/// In the future, this will hold a reference to the `include` and `exclude` pattern.
///
/// This struct mainly exists because `dyn Db` isn't `Send` or `Sync`, making it impossible
/// to access fields from within the walker.
#[derive(Default, Debug)]
pub(crate) struct ProjectFilesFilter<'a> {
/// The same as [`Project::included_paths_or_root`].
included_paths: &'a [SystemPathBuf],
/// The filter skips checking if the path is in `included_paths` if set to `true`.
///
/// Skipping this check is useful when the walker only walks over `included_paths`.
skip_included_paths: bool,
}
impl<'a> ProjectFilesFilter<'a> {
pub(crate) fn from_project(db: &'a dyn Db, project: Project) -> Self {
Self {
included_paths: project.included_paths_or_root(db),
skip_included_paths: false,
}
}
/// Returns `true` if a file is part of the project and included in the paths to check.
///
/// A file is included in the checked files if it is a sub path of the project's root
/// (when no CLI path arguments are specified) or if it is a sub path of any path provided on the CLI (`ty check <paths>`) AND:
///
/// * It matches a positive `include` pattern and isn't excluded by a later negative `include` pattern.
/// * It doesn't match a positive `exclude` pattern or is re-included by a later negative `exclude` pattern.
///
/// ## Note
///
/// This method may return `true` for files that don't end up being included when walking the
/// project tree because it doesn't consider `.gitignore` and other ignore files when deciding
/// if a file's included.
pub(crate) fn is_included(&self, path: &SystemPath) -> bool {
#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
enum CheckPathMatch {
/// The path is a partial match of the checked path (it's a sub path)
Partial,
/// The path matches a check path exactly.
Full,
}
let m = if self.skip_included_paths {
Some(CheckPathMatch::Partial)
} else {
self.included_paths
.iter()
.filter_map(|included_path| {
if let Ok(relative_path) = path.strip_prefix(included_path) {
// Exact matches are always included
if relative_path.as_str().is_empty() {
Some(CheckPathMatch::Full)
} else {
Some(CheckPathMatch::Partial)
}
} else {
None
}
})
.max()
};
match m {
None => false,
Some(CheckPathMatch::Partial) => {
// TODO: For partial matches, only include the file if it is included by the project's include/exclude settings.
true
}
Some(CheckPathMatch::Full) => true,
}
}
}
pub(crate) struct ProjectFilesWalker<'a> {
walker: WalkDirectoryBuilder,
filter: ProjectFilesFilter<'a>,
}
impl<'a> ProjectFilesWalker<'a> {
pub(crate) fn new(db: &'a dyn Db) -> Self {
let project = db.project();
let mut filter = ProjectFilesFilter::from_project(db, project);
// It's unnecessary to filter on included paths because it only iterates over those to start with.
filter.skip_included_paths = true;
Self::from_paths(db, project.included_paths_or_root(db), filter)
.expect("included_paths_or_root to never return an empty iterator")
}
/// Creates a walker for indexing the project files incrementally.
///
/// The main difference to a full project walk is that `paths` may contain paths
/// that aren't part of the included files.
pub(crate) fn incremental<P>(db: &'a dyn Db, paths: impl IntoIterator<Item = P>) -> Option<Self>
where
P: AsRef<SystemPath>,
{
let project = db.project();
let filter = ProjectFilesFilter::from_project(db, project);
Self::from_paths(db, paths, filter)
}
fn from_paths<P>(
db: &'a dyn Db,
paths: impl IntoIterator<Item = P>,
filter: ProjectFilesFilter<'a>,
) -> Option<Self>
where
P: AsRef<SystemPath>,
{
let mut paths = paths.into_iter();
let mut walker = db
.system()
.walk_directory(paths.next()?.as_ref())
.standard_filters(db.project().settings(db).respect_ignore_files())
.ignore_hidden(false);
for path in paths {
walker = walker.add(path);
}
Some(Self { walker, filter })
}
/// Walks the project paths and collects the paths of all files that
/// are included in the project.
pub(crate) fn walk_paths(self) -> (Vec<SystemPathBuf>, Vec<IOErrorDiagnostic>) {
let paths = std::sync::Mutex::new(Vec::new());
let diagnostics = std::sync::Mutex::new(Vec::new());
self.walker.run(|| {
Box::new(|entry| {
match entry {
Ok(entry) => {
if !self.filter.is_included(entry.path()) {
tracing::debug!("Ignoring not-included path: {}", entry.path());
return WalkState::Skip;
}
// Skip over any non python files to avoid creating too many entries in `Files`.
match entry.file_type() {
FileType::File => {
if entry
.path()
.extension()
.and_then(PySourceType::try_from_extension)
.is_some()
{
let mut paths = paths.lock().unwrap();
paths.push(entry.into_path());
}
}
FileType::Directory | FileType::Symlink => {}
}
}
Err(error) => match error.kind() {
ErrorKind::Loop { .. } => {
unreachable!("Loops shouldn't be possible without following symlinks.")
}
ErrorKind::Io { path, err } => {
let mut diagnostics = diagnostics.lock().unwrap();
let error = if let Some(path) = path {
WalkError::IOPathError {
path: path.clone(),
error: err.to_string(),
}
} else {
WalkError::IOError {
error: err.to_string(),
}
};
diagnostics.push(IOErrorDiagnostic {
file: None,
error: IOErrorKind::Walk(error),
});
}
ErrorKind::NonUtf8Path { path } => {
diagnostics.lock().unwrap().push(IOErrorDiagnostic {
file: None,
error: IOErrorKind::Walk(WalkError::NonUtf8Path {
path: path.clone(),
}),
});
}
},
}
WalkState::Continue
})
});
(
paths.into_inner().unwrap(),
diagnostics.into_inner().unwrap(),
)
}
pub(crate) fn collect_vec(self, db: &dyn Db) -> (Vec<File>, Vec<IOErrorDiagnostic>) {
let (paths, diagnostics) = self.walk_paths();
(
paths
.into_iter()
.filter_map(move |path| {
// If this returns `None`, then the file was deleted between the `walk_directory` call and now.
// We can ignore this.
system_path_to_file(db.upcast(), &path).ok()
})
.collect(),
diagnostics,
)
}
pub(crate) fn collect_set(self, db: &dyn Db) -> (FxHashSet<File>, Vec<IOErrorDiagnostic>) {
let (paths, diagnostics) = self.walk_paths();
let mut files = FxHashSet::with_capacity_and_hasher(paths.len(), FxBuildHasher);
for path in paths {
if let Ok(file) = system_path_to_file(db.upcast(), &path) {
files.insert(file);
}
}
(files, diagnostics)
}
}
#[derive(Error, Debug, Clone)]
pub(crate) enum WalkError {
#[error("`{path}`: {error}")]
IOPathError { path: SystemPathBuf, error: String },
#[error("Failed to walk project directory: {error}")]
IOError { error: String },
#[error("`{path}` is not a valid UTF-8 path")]
NonUtf8Path { path: PathBuf },
}

View file

@ -0,0 +1,133 @@
pub use project_watcher::ProjectWatcher;
use ruff_db::system::{SystemPath, SystemPathBuf, SystemVirtualPathBuf};
pub use watcher::{directory_watcher, EventHandler, Watcher};
mod project_watcher;
mod watcher;
/// Classification of a file system change event.
///
/// ## Renaming a path
/// Renaming a path creates a [`ChangeEvent::Deleted`] event for the old path and/or a [`ChangeEvent::Created`] for the new location.
/// Whether both events are created or just one of them depends from where to where the path was moved:
///
/// * Inside the watched directory: Both events are created.
/// * From a watched directory to a non-watched directory: Only a [`ChangeEvent::Deleted`] event is created.
/// * From a non-watched directory to a watched directory: Only a [`ChangeEvent::Created`] event is created.
///
/// ## Renaming a directory
/// It's up to the file watcher implementation to aggregate the rename event for a directory to a single rename
/// event instead of emitting an event for each file or subdirectory in that path.
#[derive(Debug, PartialEq, Eq)]
pub enum ChangeEvent {
/// The file corresponding to the given path was opened in an editor.
Opened(SystemPathBuf),
/// A new path was created
Created {
path: SystemPathBuf,
kind: CreatedKind,
},
/// The content or metadata of a path was changed.
Changed {
path: SystemPathBuf,
kind: ChangedKind,
},
/// A path was deleted.
Deleted {
path: SystemPathBuf,
kind: DeletedKind,
},
/// A new virtual path was created.
CreatedVirtual(SystemVirtualPathBuf),
/// The content of a virtual path was changed.
ChangedVirtual(SystemVirtualPathBuf),
/// A virtual path was deleted.
DeletedVirtual(SystemVirtualPathBuf),
/// The file watcher failed to observe some changes and now is out of sync with the file system.
///
/// This can happen if many files are changed at once. The consumer should rescan all files to catch up
/// with the file system.
Rescan,
}
impl ChangeEvent {
/// Creates a new [`Changed`] event for the file content at the given path.
///
/// [`Changed`]: ChangeEvent::Changed
pub fn file_content_changed(path: SystemPathBuf) -> ChangeEvent {
ChangeEvent::Changed {
path,
kind: ChangedKind::FileContent,
}
}
pub fn file_name(&self) -> Option<&str> {
self.system_path().and_then(|path| path.file_name())
}
pub fn system_path(&self) -> Option<&SystemPath> {
match self {
ChangeEvent::Opened(path)
| ChangeEvent::Created { path, .. }
| ChangeEvent::Changed { path, .. }
| ChangeEvent::Deleted { path, .. } => Some(path),
_ => None,
}
}
pub const fn is_rescan(&self) -> bool {
matches!(self, ChangeEvent::Rescan)
}
pub const fn is_created(&self) -> bool {
matches!(self, ChangeEvent::Created { .. })
}
pub const fn is_changed(&self) -> bool {
matches!(self, ChangeEvent::Changed { .. })
}
pub const fn is_deleted(&self) -> bool {
matches!(self, ChangeEvent::Deleted { .. })
}
}
/// Classification of an event that creates a new path.
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
pub enum CreatedKind {
/// A file was created.
File,
/// A directory was created.
Directory,
/// A file, directory, or any other kind of path was created.
Any,
}
/// Classification of an event related to a content or metadata change.
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
pub enum ChangedKind {
/// The content of a file was changed.
FileContent,
/// The metadata of a file was changed.
FileMetadata,
/// Either the content or metadata of a path was changed.
Any,
}
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
pub enum DeletedKind {
File,
Directory,
Any,
}

View file

@ -0,0 +1,166 @@
use std::fmt::{Formatter, Write};
use std::hash::Hasher;
use tracing::info;
use ruff_cache::{CacheKey, CacheKeyHasher};
use ruff_db::system::{SystemPath, SystemPathBuf};
use ruff_db::Upcast;
use ty_python_semantic::system_module_search_paths;
use crate::db::{Db, ProjectDatabase};
use crate::watch::Watcher;
/// Wrapper around a [`Watcher`] that watches the relevant paths of a project.
pub struct ProjectWatcher {
watcher: Watcher,
/// The paths that need to be watched. This includes paths for which setting up file watching failed.
watched_paths: Vec<SystemPathBuf>,
/// True if registering a watcher for any path failed.
has_errored_paths: bool,
/// Cache key over the paths that need watching. It allows short-circuiting if the paths haven't changed.
cache_key: Option<u64>,
}
impl ProjectWatcher {
/// Create a new project watcher.
pub fn new(watcher: Watcher, db: &ProjectDatabase) -> Self {
let mut watcher = Self {
watcher,
watched_paths: Vec::new(),
cache_key: None,
has_errored_paths: false,
};
watcher.update(db);
watcher
}
pub fn update(&mut self, db: &ProjectDatabase) {
let search_paths: Vec<_> = system_module_search_paths(db.upcast()).collect();
let project_path = db.project().root(db);
let new_cache_key = Self::compute_cache_key(project_path, &search_paths);
if self.cache_key == Some(new_cache_key) {
return;
}
// Unregister all watch paths because ordering is important for linux because
// it only emits an event for the last added watcher if a subtree is covered by multiple watchers.
// A path can be covered by multiple watchers if a subdirectory symlinks to a path that's covered by another watch path:
// ```text
// - bar
// - baz.py
// - project
// - bar -> /bar
// - foo.py
// ```
for path in self.watched_paths.drain(..) {
if let Err(error) = self.watcher.unwatch(&path) {
info!("Failed to remove the file watcher for path `{path}`: {error}");
}
}
self.has_errored_paths = false;
let config_paths = db
.project()
.metadata(db)
.extra_configuration_paths()
.iter()
.map(SystemPathBuf::as_path);
// Watch both the project root and any paths provided by the user on the CLI (removing any redundant nested paths).
// This is necessary to observe changes to files that are outside the project root.
// We always need to watch the project root to observe changes to its configuration.
let included_paths = ruff_db::system::deduplicate_nested_paths(
std::iter::once(project_path).chain(
db.project()
.included_paths_list(db)
.iter()
.map(SystemPathBuf::as_path),
),
);
// Find the non-overlapping module search paths and filter out paths that are already covered by the project.
// Module search paths are already canonicalized.
let unique_module_paths = ruff_db::system::deduplicate_nested_paths(
search_paths
.into_iter()
.filter(|path| !path.starts_with(project_path)),
);
// Now add the new paths, first starting with the project path and then
// adding the library search paths, and finally the paths for configurations.
for path in included_paths
.chain(unique_module_paths)
.chain(config_paths)
{
// Log a warning. It's not worth aborting if registering a single folder fails because
// Ruff otherwise stills works as expected.
if let Err(error) = self.watcher.watch(path) {
// TODO: Log a user-facing warning.
tracing::warn!("Failed to setup watcher for path `{path}`: {error}. You have to restart Ruff after making changes to files under this path or you might see stale results.");
self.has_errored_paths = true;
} else {
self.watched_paths.push(path.to_path_buf());
}
}
info!(
"Set up file watchers for {}",
DisplayWatchedPaths {
paths: &self.watched_paths
}
);
self.cache_key = Some(new_cache_key);
}
fn compute_cache_key(project_root: &SystemPath, search_paths: &[&SystemPath]) -> u64 {
let mut cache_key_hasher = CacheKeyHasher::new();
search_paths.cache_key(&mut cache_key_hasher);
project_root.cache_key(&mut cache_key_hasher);
cache_key_hasher.finish()
}
/// Returns `true` if setting up watching for any path failed.
pub fn has_errored_paths(&self) -> bool {
self.has_errored_paths
}
pub fn flush(&self) {
self.watcher.flush();
}
pub fn stop(self) {
self.watcher.stop();
}
}
struct DisplayWatchedPaths<'a> {
paths: &'a [SystemPathBuf],
}
impl std::fmt::Display for DisplayWatchedPaths<'_> {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
f.write_char('[')?;
let mut iter = self.paths.iter();
if let Some(first) = iter.next() {
write!(f, "\"{first}\"")?;
for path in iter {
write!(f, ", \"{path}\"")?;
}
}
f.write_char(']')
}
}

View file

@ -0,0 +1,423 @@
use notify::event::{CreateKind, MetadataKind, ModifyKind, RemoveKind, RenameMode};
use notify::{recommended_watcher, EventKind, RecommendedWatcher, RecursiveMode, Watcher as _};
use ruff_db::system::{SystemPath, SystemPathBuf};
use crate::watch::{ChangeEvent, ChangedKind, CreatedKind, DeletedKind};
/// Creates a new watcher observing file system changes.
///
/// The watcher debounces events, but guarantees to send all changes eventually (even if the file system keeps changing).
pub fn directory_watcher<H>(handler: H) -> notify::Result<Watcher>
where
H: EventHandler,
{
let (sender, receiver) = crossbeam::channel::bounded(20);
let debouncer = std::thread::Builder::new()
.name("watcher::debouncer".to_string())
.spawn(move || {
// Wait for the next set of changes
for message in &receiver {
let event = match message {
DebouncerMessage::Event(event) => event,
DebouncerMessage::Flush => {
continue;
}
};
let mut debouncer = Debouncer::default();
debouncer.add_result(event);
// Debounce any new incoming changes:
// * Take any new incoming change events and merge them with the previous change events
// * If there are no new incoming change events after 10 ms, flush the changes and wait for the next notify event.
// * Flush no later than after 3s.
loop {
let start = std::time::Instant::now();
crossbeam::select! {
recv(receiver) -> message => {
match message {
Ok(DebouncerMessage::Event(event)) => {
debouncer.add_result(event);
// Ensure that we flush the changes eventually.
if start.elapsed() > std::time::Duration::from_secs(3) {
break;
}
}
Ok(DebouncerMessage::Flush) => {
break;
}
Err(_) => {
// There are no more senders. That means `stop` was called.
// Drop all events and exit immediately.
return;
}
}
},
default(std::time::Duration::from_millis(10)) => {
break;
}
}
}
// No more file changes after 10 ms, send the changes and schedule a new analysis
let events = debouncer.into_events();
if !events.is_empty() {
handler.handle(events);
}
}
})
.unwrap();
let debouncer_sender = sender.clone();
let watcher =
recommended_watcher(move |event| sender.send(DebouncerMessage::Event(event)).unwrap())?;
Ok(Watcher {
inner: Some(WatcherInner {
watcher,
debouncer_sender,
debouncer_thread: debouncer,
}),
})
}
#[derive(Debug)]
enum DebouncerMessage {
/// A new file system event.
Event(notify::Result<notify::Event>),
Flush,
}
pub struct Watcher {
inner: Option<WatcherInner>,
}
struct WatcherInner {
watcher: RecommendedWatcher,
debouncer_sender: crossbeam::channel::Sender<DebouncerMessage>,
debouncer_thread: std::thread::JoinHandle<()>,
}
impl Watcher {
/// Sets up file watching for `path`.
pub fn watch(&mut self, path: &SystemPath) -> notify::Result<()> {
tracing::debug!("Watching path: `{path}`");
self.inner_mut()
.watcher
.watch(path.as_std_path(), RecursiveMode::Recursive)
}
/// Stops file watching for `path`.
pub fn unwatch(&mut self, path: &SystemPath) -> notify::Result<()> {
tracing::debug!("Unwatching path: `{path}`");
self.inner_mut().watcher.unwatch(path.as_std_path())
}
/// Stops the file watcher.
///
/// Pending events will be discarded.
///
/// The call blocks until the watcher has stopped.
pub fn stop(mut self) {
tracing::debug!("Stop file watcher");
self.set_stop();
}
/// Flushes any pending events.
pub fn flush(&self) {
self.inner()
.debouncer_sender
.send(DebouncerMessage::Flush)
.unwrap();
}
fn set_stop(&mut self) {
if let Some(inner) = self.inner.take() {
// drop the watcher to ensure there will be no more events.
// and to drop the sender used by the notify callback.
drop(inner.watcher);
// Drop "our" sender to ensure the sender count goes down to 0.
// The debouncer thread will end as soon as the sender count is 0.
drop(inner.debouncer_sender);
// Wait for the debouncer to finish, propagate any panics
inner.debouncer_thread.join().unwrap();
}
}
fn inner(&self) -> &WatcherInner {
self.inner.as_ref().expect("Watcher to be running")
}
fn inner_mut(&mut self) -> &mut WatcherInner {
self.inner.as_mut().expect("Watcher to be running")
}
}
impl Drop for Watcher {
fn drop(&mut self) {
self.set_stop();
}
}
#[derive(Default)]
struct Debouncer {
events: Vec<ChangeEvent>,
rescan_event: Option<ChangeEvent>,
}
impl Debouncer {
fn add_result(&mut self, result: notify::Result<notify::Event>) {
tracing::trace!("Handling file watcher event: {result:?}");
match result {
Ok(event) => self.add_event(event),
Err(error) => self.add_error(error),
}
}
#[allow(clippy::unused_self, clippy::needless_pass_by_value)]
fn add_error(&mut self, error: notify::Error) {
// Micha: I skimmed through some of notify's source code and it seems the most common errors
// are IO errors. All other errors should really only happen when adding or removing a watched folders.
// It's not clear what an upstream handler should do in the case of an IOError (other than logging it).
// That's what we do for now as well.
tracing::warn!("File watcher error: {error:?}");
}
fn add_event(&mut self, event: notify::Event) {
if self.rescan_event.is_some() {
// We're already in a rescan state, ignore all other events
return;
}
// If the file watcher is out of sync or we observed too many changes, trigger a full rescan
if event.need_rescan() || self.events.len() > 10000 {
self.events = Vec::new();
self.rescan_event = Some(ChangeEvent::Rescan);
return;
}
let kind = event.kind;
// There are cases where paths can be empty.
// https://github.com/astral-sh/ruff/issues/14222
let Some(path) = event.paths.into_iter().next() else {
tracing::debug!("Ignoring change event with kind '{kind:?}' without a path",);
return;
};
let path = match SystemPathBuf::from_path_buf(path) {
Ok(path) => path,
Err(path) => {
tracing::debug!(
"Ignore change to non-UTF8 path `{path}`: {kind:?}",
path = path.display()
);
// Ignore non-UTF8 paths because they aren't handled by the rest of the system.
return;
}
};
let event = match kind {
EventKind::Create(create) => {
let kind = match create {
CreateKind::File => CreatedKind::File,
CreateKind::Folder => CreatedKind::Directory,
CreateKind::Any | CreateKind::Other => {
CreatedKind::from(FileType::from_path(&path))
}
};
ChangeEvent::Created { path, kind }
}
EventKind::Modify(modify) => match modify {
ModifyKind::Metadata(metadata) => {
if FileType::from_path(&path) != FileType::File {
// Only interested in file metadata events.
return;
}
match metadata {
MetadataKind::Any | MetadataKind::Permissions | MetadataKind::Other => {
ChangeEvent::Changed {
path,
kind: ChangedKind::FileMetadata,
}
}
MetadataKind::AccessTime
| MetadataKind::WriteTime
| MetadataKind::Ownership
| MetadataKind::Extended => {
// We're not interested in these metadata changes
return;
}
}
}
ModifyKind::Data(_) => ChangeEvent::Changed {
kind: ChangedKind::FileContent,
path,
},
ModifyKind::Name(rename) => match rename {
RenameMode::From => {
// TODO: notify_debouncer_full matches the `RenameMode::From` and `RenameMode::To` events.
// Matching the from and to event would have the added advantage that we know the
// type of the path that was renamed, allowing `apply_changes` to avoid traversing the
// entire package.
// https://github.com/notify-rs/notify/blob/128bf6230c03d39dbb7f301ff7b20e594e34c3a2/notify-debouncer-full/src/lib.rs#L293-L297
ChangeEvent::Deleted {
kind: DeletedKind::Any,
path,
}
}
RenameMode::To => ChangeEvent::Created {
kind: CreatedKind::from(FileType::from_path(&path)),
path,
},
RenameMode::Both => {
// Both is only emitted when moving a path from within a watched directory
// to another watched directory. The event is not emitted if the `to` or `from` path
// lay outside the watched directory. However, the `To` and `From` events are always emitted.
// That's why we ignore `Both` and instead rely on `To` and `From`.
return;
}
RenameMode::Other => {
// Skip over any other rename events
return;
}
RenameMode::Any => {
// Guess the action based on the current file system state
if path.as_std_path().exists() {
let file_type = FileType::from_path(&path);
ChangeEvent::Created {
kind: file_type.into(),
path,
}
} else {
ChangeEvent::Deleted {
kind: DeletedKind::Any,
path,
}
}
}
},
ModifyKind::Other => {
// Skip other modification events that are not content or metadata related
return;
}
ModifyKind::Any => {
if !path.as_std_path().is_file() {
return;
}
ChangeEvent::Changed {
path,
kind: ChangedKind::Any,
}
}
},
EventKind::Access(_) => {
// We're not interested in any access events
return;
}
EventKind::Remove(kind) => {
let kind = match kind {
RemoveKind::File => DeletedKind::File,
RemoveKind::Folder => DeletedKind::Directory,
RemoveKind::Any | RemoveKind::Other => DeletedKind::Any,
};
ChangeEvent::Deleted { path, kind }
}
EventKind::Other => {
// Skip over meta events
return;
}
EventKind::Any => {
tracing::debug!("Skipping any FS event for `{path}`");
return;
}
};
self.events.push(event);
}
fn into_events(self) -> Vec<ChangeEvent> {
if let Some(rescan_event) = self.rescan_event {
vec![rescan_event]
} else {
self.events
}
}
}
pub trait EventHandler: Send + 'static {
fn handle(&self, changes: Vec<ChangeEvent>);
}
impl<F> EventHandler for F
where
F: Fn(Vec<ChangeEvent>) + Send + 'static,
{
fn handle(&self, changes: Vec<ChangeEvent>) {
let f = self;
f(changes);
}
}
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
enum FileType {
/// The event is related to a directory.
File,
/// The event is related to a directory.
Directory,
/// It's unknown whether the event is related to a file or a directory or if it is any other file type.
Any,
}
impl FileType {
fn from_path(path: &SystemPath) -> FileType {
match path.as_std_path().metadata() {
Ok(metadata) if metadata.is_file() => FileType::File,
Ok(metadata) if metadata.is_dir() => FileType::Directory,
Ok(_) | Err(_) => FileType::Any,
}
}
}
impl From<FileType> for CreatedKind {
fn from(value: FileType) -> Self {
match value {
FileType::File => Self::File,
FileType::Directory => Self::Directory,
FileType::Any => Self::Any,
}
}
}