diff --git a/Cargo.lock b/Cargo.lock index 12c5fb78df..2ca81e0a40 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3933,6 +3933,7 @@ name = "ty_project" version = "0.0.0" dependencies = [ "anyhow", + "camino", "crossbeam", "glob", "globset", diff --git a/crates/ruff_db/src/system/path.rs b/crates/ruff_db/src/system/path.rs index 7bd15890fa..1f22f991f1 100644 --- a/crates/ruff_db/src/system/path.rs +++ b/crates/ruff_db/src/system/path.rs @@ -369,6 +369,30 @@ impl SystemPath { Some(SystemPath::new(Utf8Path::from_path(path)?)) } + /// Returns `true` if the `SystemPath` is absolute, i.e., if it is independent of + /// the current directory. + /// + /// * On Unix, a path is absolute if it starts with the root, so + /// `is_absolute` and [`has_root`] are equivalent. + /// + /// * On Windows, a path is absolute if it has a prefix and starts with the + /// root: `c:\windows` is absolute, while `c:temp` and `\temp` are not. + /// + /// # Examples + /// + /// ``` + /// use ruff_db::system::SystemPath; + /// + /// assert!(!SystemPath::new("foo.txt").is_absolute()); + /// ``` + /// + /// [`has_root`]: Utf8Path::has_root + #[inline] + #[must_use] + pub fn is_absolute(&self) -> bool { + self.0.is_absolute() + } + /// Makes a path absolute and normalizes it without accessing the file system. /// /// Adapted from [cargo](https://github.com/rust-lang/cargo/blob/fede83ccf973457de319ba6fa0e36ead454d2e20/src/cargo/util/paths.rs#L61) diff --git a/crates/ty_project/Cargo.toml b/crates/ty_project/Cargo.toml index 2560b05d49..0694218d4e 100644 --- a/crates/ty_project/Cargo.toml +++ b/crates/ty_project/Cargo.toml @@ -24,6 +24,7 @@ ty_python_semantic = { workspace = true, features = ["serde"] } ty_vendored = { workspace = true } anyhow = { workspace = true } +camino = { workspace = true } crossbeam = { workspace = true } ignore = { workspace = true } glob = { workspace = true } diff --git a/crates/ty_project/src/metadata/value.rs b/crates/ty_project/src/metadata/value.rs index 9754618956..a5c6f4c985 100644 --- a/crates/ty_project/src/metadata/value.rs +++ b/crates/ty_project/src/metadata/value.rs @@ -384,7 +384,14 @@ impl RelativePathPattern { }; if let Some(after) = self.0.strip_prefix('!') { - format!("!{}", SystemPath::absolute(after, relative_to)) + // Patterns starting with `**` don't need to be anchored. + if after.starts_with("**") { + self.0.to_string() + } else { + format!("!{}", SystemPath::absolute(after, relative_to)) + } + } else if self.0.starts_with("**") { + self.0.to_string() } else { SystemPath::absolute(&self.0, relative_to).into_string() } diff --git a/crates/ty_project/src/walk.rs b/crates/ty_project/src/walk.rs index c6b92654fb..65dec3cfe4 100644 --- a/crates/ty_project/src/walk.rs +++ b/crates/ty_project/src/walk.rs @@ -3,11 +3,10 @@ use globset::{Candidate, GlobBuilder, GlobSet, GlobSetBuilder}; use regex_automata::util::pool::Pool; use ruff_db::files::{File, system_path_to_file}; use ruff_db::system::walk_directory::{ErrorKind, WalkDirectoryBuilder, WalkState}; -use ruff_db::system::{FileType, SystemPath, SystemPathBuf, deduplicate_nested_paths}; +use ruff_db::system::{FileType, SystemPath, SystemPathBuf}; use ruff_python_ast::PySourceType; -use rustc_hash::{FxBuildHasher, FxHashSet}; +use rustc_hash::{FxBuildHasher, FxHashMap, FxHashSet}; use std::borrow::Cow; -use std::collections::BTreeSet; use std::path::PathBuf; use std::sync::Arc; use thiserror::Error; @@ -95,7 +94,7 @@ impl<'a> ProjectFilesFilter<'a> { // TODO: Do we need to use `matched_path_or_any_parents` when not walking? - let matched = self.files_patterns.matches(path, is_directory); + let matched = self.files_patterns.matches_path(path, is_directory); tracing::debug!("path `{path} matches {matched:?}"); // TODO: For partial matches, only include the file if it is included by the project's include/exclude settings. match matched { @@ -296,7 +295,7 @@ pub struct FilePatterns { set: GlobSet, patterns: Box<[FilePattern]>, matches: Option>>>, - static_prefixes: Option>, + static_prefixes: IncludedPrefixes, num_positive: usize, } @@ -306,12 +305,21 @@ impl FilePatterns { set: GlobSet::empty(), patterns: Box::default(), matches: None, - static_prefixes: Some(BTreeSet::new()), + static_prefixes: IncludedPrefixes::new(), num_positive: 0, } } - pub(crate) fn matches(&self, path: &SystemPath, is_directory: bool) -> PatternMatch { + pub(crate) fn matches(&self, path: impl AsRef) -> PatternMatch { + self.matches_path(path.as_ref(), false) + } + + pub(crate) fn matches_directory(&self, path: impl AsRef) -> PatternMatch { + self.matches_path(path.as_ref(), true) + } + + pub(crate) fn matches_path(&self, path: &SystemPath, is_directory: bool) -> PatternMatch { + debug_assert!(path.is_absolute(), "Path {path} isn't absolute"); if self.patterns.is_empty() { return PatternMatch::None; } @@ -336,16 +344,10 @@ impl FilePatterns { if self.num_positive > 0 { if is_directory { - if let Some(static_prefixes) = self.static_prefixes.as_ref() { - // Skip directories for which we know that no glob has a shared prefix with. - // E.g. if `files = ["src"], skip `tests` - if static_prefixes - .range(..=path.to_path_buf()) - .next() - .is_none() - { - return PatternMatch::Exclude(ExcludeReason::NoIncludePattern); - } + // Skip directories for which we know that no glob has a shared prefix with. + // E.g. if `files = ["src"], skip `tests` + if dbg!(self.static_prefixes.is_statically_excluded(path)) { + return PatternMatch::Exclude(ExcludeReason::NoIncludePattern); } } else { // If this is a file and there's at least one include pattern but the file doesn't match it, @@ -356,6 +358,23 @@ impl FilePatterns { PatternMatch::None } + + pub(crate) fn match_once(&self, path: &SystemPath) -> PatternMatch { + for parent in path.ancestors().skip(1) { + match self.matches_directory(parent) { + PatternMatch::Include | PatternMatch::None => { + continue; + } + PatternMatch::Exclude(exclude_reason) => { + return PatternMatch::Exclude(exclude_reason); + } + } + } + + // At this point it is known that no parent path is excluded. + // TODO: This could be adirectory too + self.matches(path) + } } impl PartialEq for FilePatterns { @@ -374,11 +393,11 @@ impl std::fmt::Debug for FilePatterns { } } -#[derive(Clone, Debug)] +#[derive(Debug)] pub(crate) struct FilePatternsBuilder { set: GlobSetBuilder, patterns: Vec, - static_prefixes: Option>, + static_prefixes: IncludedPrefixes, num_positive: usize, } @@ -387,7 +406,7 @@ impl FilePatternsBuilder { Self { set: GlobSetBuilder::new(), patterns: Vec::new(), - static_prefixes: Some(Vec::new()), + static_prefixes: IncludedPrefixes::new(), num_positive: 0, } } @@ -406,6 +425,11 @@ impl FilePatternsBuilder { glob = after; } + debug_assert!( + SystemPath::new(glob).is_absolute(), + "The glob {input} isn't anchored" + ); + // A pattern ending with a `/` should only match directories. E.g. `src/` only matches directories // whereas `src` matches both files and directories. // We need to remove the `/` to ensure that a path missing the trailing `/` matches. @@ -424,7 +448,7 @@ impl FilePatternsBuilder { // If the last component contains no wildcards or extension, consider it an implicit glob // This turns `src` into `src/**/*` // TODO: Should we also enable this behavior for `is_only_directory` patterns? - if is_implicit_glob(glob) && !pattern.negated { + if !glob.ends_with("**") && !pattern.negated { let parsed = GlobBuilder::new(&format!("{glob}/**")) .literal_separator(true) .backslash_escape(true) @@ -457,25 +481,28 @@ impl FilePatternsBuilder { if !pattern.negated { self.num_positive += 1; + let mut parent = self.static_prefixes.root_mut(); + // Do a best effort at extracting a static prefix from a positive include match. // This allows short-circuting traversal of folders that are known to not overlap with any positive // match. However, we have to be careful. Any path starting with a `**` requires visiting all folders. - if let Some(static_prefixes) = self.static_prefixes.as_mut() { - let mut static_prefix = SystemPathBuf::new(); - for component in SystemPath::new(glob).components() { - if glob::Pattern::escape(component.as_str()) == component.as_str() { - static_prefix.push(component); - } else { - break; - } + for component in SystemPath::new(glob).components() { + if glob::Pattern::escape(component.as_str()) != component.as_str() { + *parent = IncludedPrefix::Dynamic; + break; } - if static_prefix.as_str().is_empty() { - // If we see a `**/` pattern, then we have to visit all directories. - self.static_prefixes.take(); - } else { - static_prefixes.push(static_prefix); - } + let static_parent = match parent { + IncludedPrefix::Dynamic => { + break; + } + IncludedPrefix::Static(static_prefix) => static_prefix, + }; + + parent = static_parent + .0 + .entry(component.to_string()) + .or_insert_with(|| IncludedPrefix::Static(StaticPrefix::default())); } } @@ -486,15 +513,11 @@ impl FilePatternsBuilder { } pub(crate) fn build(self) -> Result { - let static_prefixes = self - .static_prefixes - .map(|prefixes| deduplicate_nested_paths(prefixes).collect::>()); - Ok(FilePatterns { set: self.set.build()?, patterns: self.patterns.into(), matches: Some(Arc::new(Pool::new(|| vec![]))), - static_prefixes, + static_prefixes: self.static_prefixes, num_positive: self.num_positive, }) } @@ -517,7 +540,7 @@ pub(crate) enum ExcludeReason { /// The path is excluded because it matches a negative pattern. Match, - /// It's a file path that doesn't match any include pattern. + /// It's a path that doesn't match any include pattern. NoIncludePattern, } @@ -542,10 +565,58 @@ fn is_implicit_glob(pattern: &str) -> bool { .is_some_and(|last| !last.as_str().contains(['.', '*', '?'])) } +#[derive(Clone, Debug)] +struct IncludedPrefixes { + root: IncludedPrefix, +} + +impl IncludedPrefixes { + fn new() -> Self { + Self { + root: IncludedPrefix::Static(StaticPrefix(FxHashMap::default())), + } + } + + fn root_mut(&mut self) -> &mut IncludedPrefix { + &mut self.root + } + + fn is_statically_excluded(&self, path: &SystemPath) -> bool { + let mut current = &self.root; + + for component in path.components() { + match current { + IncludedPrefix::Dynamic => return false, + IncludedPrefix::Static(static_prefix) => { + match static_prefix.0.get(component.as_str()) { + Some(parent) => { + current = parent; + } + None => { + return true; + } + } + } + } + } + + false + } +} + +#[derive(Clone, Debug)] +enum IncludedPrefix { + /// The path contains at least one dynamic child pattern. E.g. if `a/*/b` and `a/c`, then `a` is dynamic because of the first pattern. + Dynamic, + /// All it's children are fixed. + Static(StaticPrefix), +} + +#[derive(Clone, Debug, Default)] +struct StaticPrefix(FxHashMap); + #[cfg(test)] mod tests { - use ruff_db::system::SystemPath; - use crate::walk::{ExcludeReason, FilePatterns, FilePatternsBuilder, PatternMatch}; fn create_patterns(patterns: impl IntoIterator) -> FilePatterns { @@ -560,71 +631,106 @@ mod tests { builder.build().unwrap() } + /// The pattern set matching `**` always returns `Include` #[test] fn all() { - let patterns = create_patterns(["**"]); + let patterns = create_patterns(["/**"]); - assert_eq!( - patterns.matches(SystemPath::new("/src"), true), - PatternMatch::Include - ); - assert_eq!( - patterns.matches(SystemPath::new("/src/"), true), - PatternMatch::Include - ); + assert_eq!(patterns.matches_directory("/src"), PatternMatch::Include); + assert_eq!(patterns.matches_directory("/src/"), PatternMatch::Include); + assert_eq!(patterns.matches_directory("/"), PatternMatch::Include); + assert_eq!(patterns.matches("/test.py"), PatternMatch::Include); + } + + /// The empty pattern set always returns `None`. + #[test] + fn empty() { + let patterns = create_patterns([]); + + assert_eq!(patterns.matches("/a.foo"), PatternMatch::None); + assert_eq!(patterns.matches("/a"), PatternMatch::None); + assert_eq!(patterns.matches("/"), PatternMatch::None); + } + + #[test] + fn simple() { + let patterns = create_patterns(["/*.foo", "!/*.bar"]); + assert_eq!(patterns.matches("/a.foo"), PatternMatch::Include); + assert_eq!(patterns.matches_directory("/a.foo"), PatternMatch::Include); assert_eq!( - patterns.matches(SystemPath::new("/"), true), - PatternMatch::Include + patterns.matches("/a.rs"), + PatternMatch::Exclude(ExcludeReason::NoIncludePattern) + ); + assert_eq!(patterns.matches_directory("/a.rs"), PatternMatch::None); + assert_eq!( + patterns.matches("/a.bar"), + PatternMatch::Exclude(ExcludeReason::Match) ); assert_eq!( - patterns.matches(SystemPath::new("/test.py"), true), - PatternMatch::Include + patterns.matches_directory("/a.bar"), + PatternMatch::Exclude(ExcludeReason::Match) ); } + #[test] + fn only_excludes() { + let patterns = create_patterns(["!/*.bar"]); + assert_eq!(patterns.matches("/a.rs"), PatternMatch::None); + assert_eq!(patterns.matches_directory("/a.rs"), PatternMatch::None); + assert_eq!( + patterns.matches("/a.bar"), + PatternMatch::Exclude(ExcludeReason::Match) + ); + assert_eq!( + patterns.matches_directory("/a.bar"), + PatternMatch::Exclude(ExcludeReason::Match) + ); + } + + #[test] + fn precedence() { + let patterns = create_patterns(["/*.foo", "!/*.bar.foo"]); + assert_eq!(patterns.matches("/a.foo"), PatternMatch::Include); + assert_eq!( + patterns.matches("/a.baz"), + PatternMatch::Exclude(ExcludeReason::NoIncludePattern) + ); + assert_eq!( + patterns.matches("/a.bar.foo"), + PatternMatch::Exclude(ExcludeReason::Match) + ); + } + + /// `directory/` matches the directory `directory` and its contents. It doesn't match files. #[test] fn implicit_directory_pattern() { - // Patterns ending with a slash only match directories with the given name, but not files. - // It includes all files in said directory let patterns = create_patterns(["/src/"]); - assert_eq!( - patterns.matches(SystemPath::new("/src"), true), - PatternMatch::Include - ); - assert_eq!( - patterns.matches(SystemPath::new("/src/"), true), - PatternMatch::Include - ); + assert_eq!(patterns.matches_directory("/src"), PatternMatch::Include); + assert_eq!(patterns.matches_directory("/src/"), PatternMatch::Include); // Don't include files, because the pattern ends with `/` assert_eq!( - patterns.matches(SystemPath::new("/src"), false), + patterns.matches("/src"), PatternMatch::Exclude(ExcludeReason::NoIncludePattern) ); // But include the content of src - assert_eq!( - patterns.matches(SystemPath::new("/src/test.py"), false), - PatternMatch::Include - ); + assert_eq!(patterns.matches("/src/test.py"), PatternMatch::Include); // Deep nesting assert_eq!( - patterns.matches(SystemPath::new("/src/glob/builder.py"), false), + patterns.matches("/src/glob/builder.py"), PatternMatch::Include ); // Or a file with the same name - assert_eq!( - patterns.matches(SystemPath::new("/src/src"), false), - PatternMatch::Include - ); + assert_eq!(patterns.matches("/src/src"), PatternMatch::Include); // Or a directory with the same name assert_eq!( - patterns.matches(SystemPath::new("/src/src"), true), + patterns.matches_directory("/src/src"), PatternMatch::Include ); } @@ -635,66 +741,130 @@ mod tests { // It includes all files in said directory let patterns = create_patterns(["/src"]); - assert_eq!( - patterns.matches(SystemPath::new("/src"), true), - PatternMatch::Include - ); - assert_eq!( - patterns.matches(SystemPath::new("/src/"), true), - PatternMatch::Include - ); + assert_eq!(patterns.matches_directory("/src"), PatternMatch::Include); + assert_eq!(patterns.matches("/src/"), PatternMatch::Include); // Also include files - assert_eq!( - patterns.matches(SystemPath::new("/src"), false), - PatternMatch::Include - ); + assert_eq!(patterns.matches("/src"), PatternMatch::Include); - assert_eq!( - patterns.matches(SystemPath::new("/src/test.py"), false), - PatternMatch::Include - ); + assert_eq!(patterns.matches("/src/test.py"), PatternMatch::Include); // Deep nesting assert_eq!( - patterns.matches(SystemPath::new("/src/glob/builder.py"), false), + patterns.matches("/src/glob/builder.py"), PatternMatch::Include ); // Or a file with the same name - assert_eq!( - patterns.matches(SystemPath::new("/src/src"), false), - PatternMatch::Include - ); + assert_eq!(patterns.matches("/src/src"), PatternMatch::Include); // Or a directory with the same name assert_eq!( - patterns.matches(SystemPath::new("/src/src"), true), + patterns.matches_directory("/src/src"), PatternMatch::Include ); } + /// Patterns where the last part has an extension match files or directories (without their content). #[test] fn pattern_with_extension() { - // Patterns with an extension only match files or directories with the exact name. - let patterns = create_patterns(["test.py"]); + let patterns = create_patterns(["/test.py"]); assert_eq!( - patterns.matches(SystemPath::new("test.py"), true), + patterns.matches_directory("/test.py"), PatternMatch::Include ); assert_eq!( - patterns.matches(SystemPath::new("test.py"), false), + patterns.matches_directory("/test.py"), PatternMatch::Include ); assert_eq!( - patterns.matches(SystemPath::new("test.py/abcd"), false), + patterns.matches("/test.py/abcd"), PatternMatch::Exclude(ExcludeReason::NoIncludePattern) ); assert_eq!( - patterns.matches(SystemPath::new("test.py/abcd"), true), + patterns.matches_directory("/test.py/abcd"), + PatternMatch::Exclude(ExcludeReason::NoIncludePattern) + ); + } + + /// Tests that `matches` returns `Exclude` if: + /// + /// * There's at least one include + /// * The parent component of `paths` are known to not overlap with any include pattern + /// + /// This allows to avoid traversing directories for which it is known that no file will match + /// any include pattern. For example, we want to avoid traversing `tests` if the pattern is `["src"]`. + #[test] + fn directory_pruning() { + let patterns = create_patterns(["/a/b/test-*/d", "/a/b/c/e", "/b/c"]); + + // Paths that can be statically pruned because they match no known prefix + assert_eq!( + patterns.matches_directory("/a/x"), + PatternMatch::Exclude(ExcludeReason::NoIncludePattern), + ); + + assert_eq!( + patterns.matches_directory("/x"), + PatternMatch::Exclude(ExcludeReason::NoIncludePattern), + ); + + // Paths that are known to be included + assert_eq!(patterns.matches_directory("/b/c"), PatternMatch::Include); + assert_eq!( + patterns.matches_directory("/a/b/test-x/d"), + PatternMatch::Include + ); + assert_eq!( + patterns.matches_directory("/a/b/c/e"), + PatternMatch::Include + ); + + // Path's that can't be pruned because they could match the `test-*` wildcard pattern + assert_eq!(patterns.matches_directory("/a/b/b/d"), PatternMatch::None); + + // Path's that can't be pruned because they match a known prefix (in this case `/b/c`) but they don't + // match a pattern themselves + assert_eq!(patterns.matches_directory("/b"), PatternMatch::None) + } + + #[test] + fn prefix_wildcard_include() { + let patterns = create_patterns(["/**/test/**", "/a/b/c/e", "/b/c"]); + + assert_eq!( + patterns.matches_directory("/src/test/"), + PatternMatch::Include + ); + assert_eq!( + patterns.matches_directory("/a/b/c/e"), + PatternMatch::Include + ); + assert_eq!(patterns.matches_directory("/b/c"), PatternMatch::Include); + + // We can't skip over the following directories because of the `**` wildcard + assert_eq!( + patterns.matches_directory("/not_included/a/b"), + PatternMatch::None + ); + } + + #[test] + fn nested_prefix_wildcard_include() { + let patterns = create_patterns(["/src/**/test", "/a/b", "/src/abcd/main.py"]); + + assert_eq!(patterns.matches_directory("/a/b"), PatternMatch::Include); + assert_eq!( + patterns.matches_directory("/src/test"), + PatternMatch::Include + ); + + // We can't skip over the following directories because of the `**` wildcard + assert_eq!( + patterns.matches_directory("/src/not_included/a/b"), PatternMatch::None ); }