Prototype of file inclusion and exclusion

This commit is contained in:
Micha Reiser 2025-06-04 17:05:00 +02:00
parent 9e8a7e9353
commit ff6f0b6ab8
No known key found for this signature in database
14 changed files with 645 additions and 41 deletions

3
Cargo.lock generated
View file

@ -3935,10 +3935,13 @@ dependencies = [
"anyhow",
"crossbeam",
"glob",
"globset",
"ignore",
"insta",
"notify",
"pep440_rs",
"rayon",
"regex-automata 0.4.9",
"ruff_cache",
"ruff_db",
"ruff_macros",

View file

@ -126,6 +126,7 @@ quote = { version = "1.0.23" }
rand = { version = "0.9.0" }
rayon = { version = "1.10.0" }
regex = { version = "1.10.2" }
regex-automata = { version = "0.4.9" }
rustc-hash = { version = "2.0.0" }
rustc-stable-hash = { version = "0.1.2" }
# When updating salsa, make sure to also update the revision in `fuzz/Cargo.toml`
@ -165,7 +166,7 @@ tracing-subscriber = { version = "0.3.18", default-features = false, features =
"env-filter",
"fmt",
"ansi",
"smallvec"
"smallvec",
] }
tryfn = { version = "0.2.1" }
typed-arena = { version = "2.0.2" }
@ -175,11 +176,7 @@ unicode-width = { version = "0.2.0" }
unicode_names2 = { version = "1.2.2" }
unicode-normalization = { version = "0.1.23" }
url = { version = "2.5.0" }
uuid = { version = "1.6.1", features = [
"v4",
"fast-rng",
"macro-diagnostics",
] }
uuid = { version = "1.6.1", features = ["v4", "fast-rng", "macro-diagnostics"] }
walkdir = { version = "2.3.2" }
wasm-bindgen = { version = "0.2.92" }
wasm-bindgen-test = { version = "0.3.42" }
@ -214,8 +211,8 @@ must_use_candidate = "allow"
similar_names = "allow"
single_match_else = "allow"
too_many_lines = "allow"
needless_continue = "allow" # An explicit continue can be more readable, especially if the alternative is an empty block.
unnecessary_debug_formatting = "allow" # too many instances, the display also doesn't quote the path which is often desired in logs where we use them the most often.
needless_continue = "allow" # An explicit continue can be more readable, especially if the alternative is an empty block.
unnecessary_debug_formatting = "allow" # too many instances, the display also doesn't quote the path which is often desired in logs where we use them the most often.
# Without the hashes we run into a `rustfmt` bug in some snapshot tests, see #13250
needless_raw_string_hashes = "allow"
# Disallowed restriction lints

View file

@ -652,6 +652,8 @@ pub enum DiagnosticId {
/// Some I/O operation failed
Io,
NoFiles,
/// Some code contains a syntax error
InvalidSyntax,
@ -699,6 +701,7 @@ impl DiagnosticId {
DiagnosticId::Lint(name) => name.as_str(),
DiagnosticId::RevealedType => "revealed-type",
DiagnosticId::UnknownRule => "unknown-rule",
DiagnosticId::NoFiles => "no-files",
}
}

View file

@ -534,6 +534,10 @@ impl SystemPathBuf {
self.0
}
pub fn into_string(self) -> String {
self.0.into_string()
}
pub fn into_std_path_buf(self) -> PathBuf {
self.0.into_std_path_buf()
}
@ -822,7 +826,7 @@ impl ruff_cache::CacheKey for SystemVirtualPathBuf {
///
/// # Examples
/// ```rust
/// use ruff_db::system::{SystemPath, deduplicate_nested_paths};///
/// use ruff_db::system::{SystemPath, deduplicate_nested_paths};
///
/// let paths = vec![SystemPath::new("/a/b/c"), SystemPath::new("/a/b"), SystemPath::new("/a/beta"), SystemPath::new("/a/b/c")];
/// assert_eq!(deduplicate_nested_paths(paths).collect::<Vec<_>>(), &[SystemPath::new("/a/b"), SystemPath::new("/a/beta")]);

View file

@ -143,6 +143,23 @@ typeshed = "/path/to/custom/typeshed"
## `src`
#### `files`
TODO
**Default value**: `null`
**Type**: `list[pattern]`
**Example usage** (`pyproject.toml`):
```toml
[tool.ty.src]
files = ["./app", "!app/build"]
```
---
#### `respect-ignore-files`
Whether to automatically exclude files that are ignored by `.ignore`,

View file

@ -19,7 +19,7 @@ use colored::Colorize;
use crossbeam::channel as crossbeam_channel;
use rayon::ThreadPoolBuilder;
use ruff_db::Upcast;
use ruff_db::diagnostic::{Diagnostic, DisplayDiagnosticConfig, Severity};
use ruff_db::diagnostic::{Diagnostic, DiagnosticId, DisplayDiagnosticConfig, Severity};
use ruff_db::max_parallelism;
use ruff_db::system::{OsSystem, SystemPath, SystemPathBuf};
use salsa::plumbing::ZalsaDatabase;
@ -276,7 +276,7 @@ impl MainLoop {
}
MainLoopMessage::CheckCompleted {
result,
mut result,
revision: check_revision,
} => {
let terminal_settings = db.project().settings(db).terminal();
@ -286,7 +286,11 @@ impl MainLoop {
if check_revision == revision {
if db.project().files(db).is_empty() {
tracing::warn!("No python files found under the given path(s)");
result.push(Diagnostic::new(
DiagnosticId::NoFiles,
Severity::Warning,
"No python files found under the given path(s)",
));
}
let mut stdout = stdout().lock();

View file

@ -25,7 +25,9 @@ ty_vendored = { workspace = true }
anyhow = { workspace = true }
crossbeam = { workspace = true }
ignore = { workspace = true }
glob = { workspace = true }
globset = { workspace = true }
notify = { workspace = true }
pep440_rs = { workspace = true, features = ["version-ranges"] }
rayon = { workspace = true }
@ -35,6 +37,7 @@ schemars = { workspace = true, optional = true }
serde = { workspace = true }
thiserror = { workspace = true }
toml = { workspace = true }
regex-automata = { workspace = true }
tracing = { workspace = true }
[dev-dependencies]

View file

@ -7,7 +7,7 @@ use std::collections::BTreeSet;
use crate::walk::ProjectFilesWalker;
use ruff_db::Db as _;
use ruff_db::files::{File, Files};
use ruff_db::system::SystemPath;
use ruff_db::system::{FileType, SystemPath};
use rustc_hash::FxHashSet;
use ty_python_semantic::Program;
@ -113,8 +113,16 @@ impl ProjectDatabase {
// should be included in the project. We can skip this check for
// paths that aren't part of the project or shouldn't be included
// when checking the project.
if project.is_path_included(self, &path) {
if self.system().is_file(&path) {
let metadata = self
.system()
.path_metadata(&path)
.map(|metadata| metadata.file_type());
if project.is_path_included(
self,
&path,
matches!(metadata, Ok(FileType::Directory)),
) {
if matches!(metadata, Ok(FileType::File)) {
// Add the parent directory because `walkdir` always visits explicitly passed files
// even if they match an exclude filter.
added_paths.insert(path.parent().unwrap().to_path_buf());
@ -153,7 +161,7 @@ impl ProjectDatabase {
result.custom_stdlib_changed = true;
}
if project.is_path_included(self, &path) || path == project_root {
if project.is_path_included(self, &path, true) || path == project_root {
// TODO: Shouldn't it be enough to simply traverse the project files and remove all
// that start with the given path?
tracing::debug!(

View file

@ -127,7 +127,7 @@ impl Reporter for DummyReporter {
#[salsa::tracked]
impl Project {
pub fn from_metadata(db: &dyn Db, metadata: ProjectMetadata) -> Self {
let (settings, settings_diagnostics) = metadata.options().to_settings(db);
let (settings, settings_diagnostics) = metadata.options().to_settings(db, metadata.root());
Project::builder(metadata, settings, settings_diagnostics)
.durability(Durability::MEDIUM)
@ -160,8 +160,8 @@ impl Project {
/// the project's include and exclude settings as well as the paths that were passed to `ty check <paths>`.
/// This means, that this method is an over-approximation of `Self::files` and may return `true` for paths
/// that won't be included when checking the project because they're ignored in a `.gitignore` file.
pub fn is_path_included(self, db: &dyn Db, path: &SystemPath) -> bool {
ProjectFilesFilter::from_project(db, self).is_included(path)
pub fn is_path_included(self, db: &dyn Db, path: &SystemPath, is_directory: bool) -> bool {
ProjectFilesFilter::from_project(db, self).is_included(path, is_directory)
}
pub fn reload(self, db: &mut dyn Db, metadata: ProjectMetadata) {
@ -169,7 +169,8 @@ impl Project {
assert_eq!(self.root(db), metadata.root());
if &metadata != self.metadata(db) {
let (settings, settings_diagnostics) = metadata.options().to_settings(db);
let (settings, settings_diagnostics) =
metadata.options().to_settings(db, metadata.root());
if self.settings(db) != &settings {
self.set_settings(db).to(settings);

View file

@ -1,5 +1,9 @@
use crate::Db;
use crate::metadata::value::{RangedValue, RelativePathBuf, ValueSource, ValueSourceGuard};
use crate::metadata::settings::SrcSettings;
use crate::metadata::value::{
RangedValue, RelativePathBuf, RelativePathPattern, ValueSource, ValueSourceGuard,
};
use crate::walk::FilePatternsBuilder;
use ruff_db::diagnostic::{Annotation, Diagnostic, DiagnosticFormat, DiagnosticId, Severity, Span};
use ruff_db::files::system_path_to_file;
use ruff_db::system::{System, SystemPath, SystemPathBuf};
@ -199,10 +203,20 @@ impl Options {
}
#[must_use]
pub(crate) fn to_settings(&self, db: &dyn Db) -> (Settings, Vec<OptionDiagnostic>) {
pub(crate) fn to_settings(
&self,
db: &dyn Db,
project_root: &SystemPath,
) -> (Settings, Vec<OptionDiagnostic>) {
let (rules, diagnostics) = self.to_rule_selection(db);
let mut settings = Settings::new(rules, self.src.as_ref());
let mut settings = Settings::new(rules);
if let Some(src) = self.src.as_ref() {
tracing::debug!("found src options: {src:?}");
// TODO: Error handling
settings.set_src(src.to_settings(db.system(), project_root).unwrap());
}
if let Some(terminal) = self.terminal.as_ref() {
settings.set_terminal(TerminalSettings {
@ -408,6 +422,17 @@ pub struct SrcOptions {
)]
pub root: Option<RelativePathBuf>,
/// TODO
#[serde(skip_serializing_if = "Option::is_none")]
#[option(
default = r#"null"#,
value_type = "list[pattern]",
example = r#"
files = ["./app", "!app/build"]
"#
)]
pub files: Option<Vec<RelativePathPattern>>,
/// Whether to automatically exclude files that are ignored by `.ignore`,
/// `.gitignore`, `.git/info/exclude`, and global `gitignore` files.
/// Enabled by default.
@ -422,6 +447,31 @@ pub struct SrcOptions {
pub respect_ignore_files: Option<bool>,
}
impl SrcOptions {
fn to_settings(
&self,
system: &dyn System,
project_root: &SystemPath,
// diagnostics: &mut Vec<OptionDiagnostic>,
) -> Result<SrcSettings, ()> {
// TODO: Error handling, default exclusions
let mut files = FilePatternsBuilder::new();
for pattern in self.files.iter().flatten() {
files.add(&pattern.absolute(project_root, system)).unwrap();
}
let src = SrcSettings {
respect_ignore_files: self.respect_ignore_files.unwrap_or(true),
files: files.build().unwrap(),
};
tracing::debug!("Resolved src settings: {src:?}");
Ok(src)
}
}
#[derive(Debug, Default, Clone, Eq, PartialEq, Combine, Serialize, Deserialize)]
#[serde(rename_all = "kebab-case", transparent)]
#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]

View file

@ -1,9 +1,9 @@
use std::sync::Arc;
use crate::metadata::options::SrcOptions;
use ruff_db::diagnostic::DiagnosticFormat;
use std::sync::Arc;
use ty_python_semantic::lint::RuleSelection;
use crate::walk::FilePatterns;
/// The resolved [`super::Options`] for the project.
///
/// Unlike [`super::Options`], the struct has default values filled in and
@ -23,19 +23,15 @@ pub struct Settings {
terminal: TerminalSettings,
respect_ignore_files: bool,
src: SrcSettings,
}
impl Settings {
pub fn new(rules: RuleSelection, src_options: Option<&SrcOptions>) -> Self {
let respect_ignore_files = src_options
.and_then(|src| src.respect_ignore_files)
.unwrap_or(true);
pub fn new(rules: RuleSelection) -> Self {
Self {
rules: Arc::new(rules),
terminal: TerminalSettings::default(),
respect_ignore_files,
src: SrcSettings::default(),
}
}
@ -43,8 +39,12 @@ impl Settings {
&self.rules
}
pub fn respect_ignore_files(&self) -> bool {
self.respect_ignore_files
pub fn src(&self) -> &SrcSettings {
&self.src
}
pub fn set_src(&mut self, src: SrcSettings) {
self.src = src;
}
pub fn to_rules(&self) -> Arc<RuleSelection> {
@ -65,3 +65,20 @@ pub struct TerminalSettings {
pub output_format: DiagnosticFormat,
pub error_on_warning: bool,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct SrcSettings {
pub respect_ignore_files: bool,
pub files: FilePatterns,
}
impl Default for SrcSettings {
fn default() -> Self {
Self {
respect_ignore_files: true,
// TODO: This should include all files by default
files: FilePatterns::empty(),
}
}
}

View file

@ -344,3 +344,49 @@ impl RelativePathBuf {
SystemPath::absolute(&self.0, relative_to)
}
}
/// A relative path pattern that allows for negative patterns (git ignore style).
#[derive(
Debug,
Clone,
serde::Serialize,
serde::Deserialize,
PartialEq,
Eq,
PartialOrd,
Ord,
Hash,
Combine,
)]
#[serde(transparent)]
#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
pub struct RelativePathPattern(RangedValue<String>);
impl RelativePathPattern {
pub fn new(pattern: String, source: ValueSource) -> Self {
Self(RangedValue::new(pattern, source))
}
pub fn cli(pattern: String) -> Self {
Self::new(pattern, ValueSource::Cli)
}
/// Returns the relative pattern.
pub fn pattern(&self) -> &str {
&self.0
}
/// Resolves the relative pattern to an absolute pattern.
pub fn absolute(&self, project_root: &SystemPath, system: &dyn System) -> String {
let relative_to = match &self.0.source {
ValueSource::File(_) => project_root,
ValueSource::Cli => system.current_directory(),
};
if let Some(after) = self.0.strip_prefix('!') {
format!("!{}", SystemPath::absolute(after, relative_to))
} else {
SystemPath::absolute(&self.0, relative_to).into_string()
}
}
}

View file

@ -1,10 +1,15 @@
use crate::{Db, IOErrorDiagnostic, IOErrorKind, Project};
use globset::{Candidate, GlobBuilder, GlobSet, GlobSetBuilder};
use regex_automata::util::pool::Pool;
use ruff_db::files::{File, system_path_to_file};
use ruff_db::system::walk_directory::{ErrorKind, WalkDirectoryBuilder, WalkState};
use ruff_db::system::{FileType, SystemPath, SystemPathBuf};
use ruff_db::system::{FileType, SystemPath, SystemPathBuf, deduplicate_nested_paths};
use ruff_python_ast::PySourceType;
use rustc_hash::{FxBuildHasher, FxHashSet};
use std::borrow::Cow;
use std::collections::BTreeSet;
use std::path::PathBuf;
use std::sync::Arc;
use thiserror::Error;
/// Filter that decides which files are included in the project.
@ -13,11 +18,15 @@ use thiserror::Error;
///
/// This struct mainly exists because `dyn Db` isn't `Send` or `Sync`, making it impossible
/// to access fields from within the walker.
#[derive(Default, Debug)]
#[derive(Debug)]
pub(crate) struct ProjectFilesFilter<'a> {
/// The same as [`Project::included_paths_or_root`].
included_paths: &'a [SystemPathBuf],
files_patterns: &'a FilePatterns,
project_root: &'a SystemPath,
/// The filter skips checking if the path is in `included_paths` if set to `true`.
///
/// Skipping this check is useful when the walker only walks over `included_paths`.
@ -28,6 +37,8 @@ impl<'a> ProjectFilesFilter<'a> {
pub(crate) fn from_project(db: &'a dyn Db, project: Project) -> Self {
Self {
included_paths: project.included_paths_or_root(db),
project_root: project.root(db),
files_patterns: &project.settings(db).src().files,
skip_included_paths: false,
}
}
@ -45,7 +56,7 @@ impl<'a> ProjectFilesFilter<'a> {
/// This method may return `true` for files that don't end up being included when walking the
/// project tree because it doesn't consider `.gitignore` and other ignore files when deciding
/// if a file's included.
pub(crate) fn is_included(&self, path: &SystemPath) -> bool {
pub(crate) fn is_included(&self, path: &SystemPath, is_directory: bool) -> bool {
#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
enum CheckPathMatch {
/// The path is a partial match of the checked path (it's a sub path)
@ -78,8 +89,26 @@ impl<'a> ProjectFilesFilter<'a> {
match m {
None => false,
Some(CheckPathMatch::Partial) => {
if path == self.project_root {
return true;
}
// TODO: Do we need to use `matched_path_or_any_parents` when not walking?
let matched = self.files_patterns.matches(path, is_directory);
tracing::debug!("path `{path} matches {matched:?}");
// TODO: For partial matches, only include the file if it is included by the project's include/exclude settings.
true
match matched {
// We need to traverse directories that don't match because `a` doesn't match the pattern `a/b/c/d.py`
// but we need to traverse the directory to successfully match `a/b/c/d.py`.
// This is very unfortunate because it means ty traverses all directories when e.g. using `files = ["src"]`.
// TODO(micha): 04.06.2025: It would be nice if we could avoid traversing directories
// that are known can never match because they don't share a common prefix with any of the globs.
// But we'd need to be careful in the precense of `**/test` patterns because they can match any path.
PatternMatch::None => true,
PatternMatch::Exclude(_) => false,
PatternMatch::Include => true,
}
}
Some(CheckPathMatch::Full) => true,
}
@ -132,7 +161,7 @@ impl<'a> ProjectFilesWalker<'a> {
let mut walker = db
.system()
.walk_directory(paths.next()?.as_ref())
.standard_filters(db.project().settings(db).respect_ignore_files())
.standard_filters(db.project().settings(db).src().respect_ignore_files)
.ignore_hidden(false);
for path in paths {
@ -152,7 +181,10 @@ impl<'a> ProjectFilesWalker<'a> {
Box::new(|entry| {
match entry {
Ok(entry) => {
if !self.filter.is_included(entry.path()) {
if !self
.filter
.is_included(entry.path(), entry.file_type().is_directory())
{
tracing::debug!("Ignoring not-included path: {}", entry.path());
return WalkState::Skip;
}
@ -258,3 +290,412 @@ pub(crate) enum WalkError {
#[error("`{path}` is not a valid UTF-8 path")]
NonUtf8Path { path: PathBuf },
}
#[derive(Clone)]
pub struct FilePatterns {
set: GlobSet,
patterns: Box<[FilePattern]>,
matches: Option<Arc<Pool<Vec<usize>>>>,
static_prefixes: Option<BTreeSet<SystemPathBuf>>,
num_positive: usize,
}
impl FilePatterns {
pub(crate) fn empty() -> Self {
Self {
set: GlobSet::empty(),
patterns: Box::default(),
matches: None,
static_prefixes: Some(BTreeSet::new()),
num_positive: 0,
}
}
pub(crate) fn matches(&self, path: &SystemPath, is_directory: bool) -> PatternMatch {
if self.patterns.is_empty() {
return PatternMatch::None;
}
let candidate = Candidate::new(path);
let mut matches = self.matches.as_ref().unwrap().get();
self.set.matches_candidate_into(&candidate, &mut *matches);
for &i in matches.iter().rev() {
let pattern = &self.patterns[i];
if pattern.is_only_directory && !is_directory {
continue;
}
return if pattern.negated {
PatternMatch::Exclude(ExcludeReason::Match)
} else {
PatternMatch::Include
};
}
if self.num_positive > 0 {
if is_directory {
if let Some(static_prefixes) = self.static_prefixes.as_ref() {
// Skip directories for which we know that no glob has a shared prefix with.
// E.g. if `files = ["src"], skip `tests`
if static_prefixes
.range(..=path.to_path_buf())
.next()
.is_none()
{
return PatternMatch::Exclude(ExcludeReason::NoIncludePattern);
}
}
} else {
// If this is a file and there's at least one include pattern but the file doesn't match it,
// then the file is excluded. If there are only exclude patterns, than the file should be included.
return PatternMatch::Exclude(ExcludeReason::NoIncludePattern);
}
}
PatternMatch::None
}
}
impl PartialEq for FilePatterns {
fn eq(&self, other: &Self) -> bool {
self.patterns == other.patterns
}
}
impl Eq for FilePatterns {}
impl std::fmt::Debug for FilePatterns {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("FilePatterns")
.field("patterns", &self.patterns)
.finish()
}
}
#[derive(Clone, Debug)]
pub(crate) struct FilePatternsBuilder {
set: GlobSetBuilder,
patterns: Vec<FilePattern>,
static_prefixes: Option<Vec<SystemPathBuf>>,
num_positive: usize,
}
impl FilePatternsBuilder {
pub(crate) fn new() -> Self {
Self {
set: GlobSetBuilder::new(),
patterns: Vec::new(),
static_prefixes: Some(Vec::new()),
num_positive: 0,
}
}
pub(crate) fn add(&mut self, input: &str) -> Result<&mut Self, globset::Error> {
let mut pattern = FilePattern {
negated: false,
is_only_directory: false,
original: input.to_string(),
};
let mut glob = input;
if let Some(after) = glob.strip_prefix('!') {
pattern.negated = true;
glob = after;
}
// A pattern ending with a `/` should only match directories. E.g. `src/` only matches directories
// whereas `src` matches both files and directories.
// We need to remove the `/` to ensure that a path missing the trailing `/` matches.
if let Some(before) = glob.strip_suffix('/') {
pattern.is_only_directory = true;
glob = before;
// If the slash was escaped, then remove the escape.
// See: https://github.com/BurntSushi/ripgrep/issues/2236
let trailing_backslashes = glob.chars().rev().filter(|c| *c == '\\').count();
if trailing_backslashes % 2 == 1 {
glob = &glob[..glob.len() - trailing_backslashes]
}
}
// If the last component contains no wildcards or extension, consider it an implicit glob
// This turns `src` into `src/**/*`
// TODO: Should we also enable this behavior for `is_only_directory` patterns?
if is_implicit_glob(glob) && !pattern.negated {
let parsed = GlobBuilder::new(&format!("{glob}/**"))
.literal_separator(true)
.backslash_escape(true)
// TODO: Map the error to the pattern the user provided.
.build()?;
self.set.add(parsed);
self.patterns.push(FilePattern {
is_only_directory: false,
..pattern.clone()
});
}
let mut actual = Cow::Borrowed(glob);
// If the glob ends with `/**`, then we should only match everything
// inside a directory, but not the directory itself. Standard globs
// will match the directory. So we add `/*` to force the issue.
if actual.ends_with("/**") {
actual = Cow::Owned(format!("{}/*", actual));
}
// Unlike gitignore, anchor paths (don't insert a `**` prefix).
let parsed = GlobBuilder::new(&*actual)
.literal_separator(true)
.backslash_escape(true)
// TODO: Map the error to the pattern the user provided.
.build()?;
if !pattern.negated {
self.num_positive += 1;
// Do a best effort at extracting a static prefix from a positive include match.
// This allows short-circuting traversal of folders that are known to not overlap with any positive
// match. However, we have to be careful. Any path starting with a `**` requires visiting all folders.
if let Some(static_prefixes) = self.static_prefixes.as_mut() {
let mut static_prefix = SystemPathBuf::new();
for component in SystemPath::new(glob).components() {
if glob::Pattern::escape(component.as_str()) == component.as_str() {
static_prefix.push(component);
} else {
break;
}
}
if static_prefix.as_str().is_empty() {
// If we see a `**/` pattern, then we have to visit all directories.
self.static_prefixes.take();
} else {
static_prefixes.push(static_prefix);
}
}
}
self.set.add(parsed);
self.patterns.push(pattern);
Ok(self)
}
pub(crate) fn build(self) -> Result<FilePatterns, globset::Error> {
let static_prefixes = self
.static_prefixes
.map(|prefixes| deduplicate_nested_paths(prefixes).collect::<BTreeSet<_>>());
Ok(FilePatterns {
set: self.set.build()?,
patterns: self.patterns.into(),
matches: Some(Arc::new(Pool::new(|| vec![]))),
static_prefixes,
num_positive: self.num_positive,
})
}
}
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
pub(crate) enum PatternMatch {
/// The highest precedence pattern is an include pattern.
Include,
/// The highest precedence pattern is a negated pattern (the file should not be included).
Exclude(ExcludeReason),
/// No pattern matched the path.
None,
}
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
pub(crate) enum ExcludeReason {
/// The path is excluded because it matches a negative pattern.
Match,
/// It's a file path that doesn't match any include pattern.
NoIncludePattern,
}
#[derive(Clone, Debug, PartialEq, Eq)]
pub(crate) struct FilePattern {
/// The pattern as specified by the user.
original: String,
/// Whether the glob should only match directories (`src/` matches only directories).
is_only_directory: bool,
/// Whether this pattern was negated.
negated: bool,
}
fn is_implicit_glob(pattern: &str) -> bool {
let as_path = SystemPath::new(pattern);
as_path
.components()
.last()
.is_some_and(|last| !last.as_str().contains(['.', '*', '?']))
}
#[cfg(test)]
mod tests {
use ruff_db::system::SystemPath;
use crate::walk::{ExcludeReason, FilePatterns, FilePatternsBuilder, PatternMatch};
fn create_patterns(patterns: impl IntoIterator<Item = &'static str>) -> FilePatterns {
let mut builder = FilePatternsBuilder::new();
for pattern in patterns {
builder.add(pattern).unwrap_or_else(|err| {
panic!("Invalid pattern '{pattern}`: {err}");
});
}
builder.build().unwrap()
}
#[test]
fn all() {
let patterns = create_patterns(["**"]);
assert_eq!(
patterns.matches(SystemPath::new("/src"), true),
PatternMatch::Include
);
assert_eq!(
patterns.matches(SystemPath::new("/src/"), true),
PatternMatch::Include
);
assert_eq!(
patterns.matches(SystemPath::new("/"), true),
PatternMatch::Include
);
assert_eq!(
patterns.matches(SystemPath::new("/test.py"), true),
PatternMatch::Include
);
}
#[test]
fn implicit_directory_pattern() {
// Patterns ending with a slash only match directories with the given name, but not files.
// It includes all files in said directory
let patterns = create_patterns(["/src/"]);
assert_eq!(
patterns.matches(SystemPath::new("/src"), true),
PatternMatch::Include
);
assert_eq!(
patterns.matches(SystemPath::new("/src/"), true),
PatternMatch::Include
);
// Don't include files, because the pattern ends with `/`
assert_eq!(
patterns.matches(SystemPath::new("/src"), false),
PatternMatch::Exclude(ExcludeReason::NoIncludePattern)
);
// But include the content of src
assert_eq!(
patterns.matches(SystemPath::new("/src/test.py"), false),
PatternMatch::Include
);
// Deep nesting
assert_eq!(
patterns.matches(SystemPath::new("/src/glob/builder.py"), false),
PatternMatch::Include
);
// Or a file with the same name
assert_eq!(
patterns.matches(SystemPath::new("/src/src"), false),
PatternMatch::Include
);
// Or a directory with the same name
assert_eq!(
patterns.matches(SystemPath::new("/src/src"), true),
PatternMatch::Include
);
}
#[test]
fn implicit_pattern() {
// Patterns ending without a slash include both files and directories.
// It includes all files in said directory
let patterns = create_patterns(["/src"]);
assert_eq!(
patterns.matches(SystemPath::new("/src"), true),
PatternMatch::Include
);
assert_eq!(
patterns.matches(SystemPath::new("/src/"), true),
PatternMatch::Include
);
// Also include files
assert_eq!(
patterns.matches(SystemPath::new("/src"), false),
PatternMatch::Include
);
assert_eq!(
patterns.matches(SystemPath::new("/src/test.py"), false),
PatternMatch::Include
);
// Deep nesting
assert_eq!(
patterns.matches(SystemPath::new("/src/glob/builder.py"), false),
PatternMatch::Include
);
// Or a file with the same name
assert_eq!(
patterns.matches(SystemPath::new("/src/src"), false),
PatternMatch::Include
);
// Or a directory with the same name
assert_eq!(
patterns.matches(SystemPath::new("/src/src"), true),
PatternMatch::Include
);
}
#[test]
fn pattern_with_extension() {
// Patterns with an extension only match files or directories with the exact name.
let patterns = create_patterns(["test.py"]);
assert_eq!(
patterns.matches(SystemPath::new("test.py"), true),
PatternMatch::Include
);
assert_eq!(
patterns.matches(SystemPath::new("test.py"), false),
PatternMatch::Include
);
assert_eq!(
patterns.matches(SystemPath::new("test.py/abcd"), false),
PatternMatch::Exclude(ExcludeReason::NoIncludePattern)
);
assert_eq!(
patterns.matches(SystemPath::new("test.py/abcd"), true),
PatternMatch::None
);
}
}

10
ty.schema.json generated
View file

@ -851,6 +851,16 @@
"SrcOptions": {
"type": "object",
"properties": {
"files": {
"description": "TODO",
"type": [
"array",
"null"
],
"items": {
"type": "string"
}
},
"respect-ignore-files": {
"description": "Whether to automatically exclude files that are ignored by `.ignore`, `.gitignore`, `.git/info/exclude`, and global `gitignore` files. Enabled by default.",
"type": [