Deduplicate input paths (#20105)

<!--
Thank you for contributing to Ruff/ty! To help us out with reviewing,
please consider the following:

- Does this pull request include a summary of the change? (See below.)
- Does this pull request include a descriptive title? (Please prefix
with `[ty]` for ty pull
  requests.)
- Does this pull request include references to any relevant issues?
-->

## Summary

<!-- What's the purpose of the change? What does it do, and why? -->

Fixes #20035, fixes #19395

This is for deduplicating input paths to avoid processing the same file
multiple times.

This is my first contribution, so I'm sorry if I miss something. Please
tell me if this is needed for this feature.

## Test Plan

<!-- How was it tested? -->

I just added a test `find_python_files_deduplicated` in
eee1020e32/crates/ruff_workspace/src/resolver.rs (L1017)
. This pull request adds changes to `WalkPythonFilesState::finish`,
which is used in `python_files_in_path`, so they affect some commands
such as `analyze`, `format`, `check` and so on. I will add snapshot
tests for them if necessary.

I’ve already confirmed that the same thing happens with ruff check as
well.

```
$ echo "x   = 1" > example/foo.py
$ uvx ruff check example example/foo.py
I002 [*] Missing required import: `from __future__ import annotations`
--> /path/to/example/foo.py:1:1
help: Insert required import: `from __future__ import annotations`

I002 [*] Missing required import: `from __future__ import annotations`
--> /path/to/example/foo.py:1:1
help: Insert required import: `from __future__ import annotations`

Found 2 errors.
[*] 2 fixable with the `--fix` option.
```
This commit is contained in:
Takayuki Maeda 2025-09-20 03:40:23 +09:00 committed by GitHub
parent 3bf4dae452
commit bd5b3e4f6e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 107 additions and 14 deletions

View file

@ -523,10 +523,42 @@ impl<'config> WalkPythonFilesState<'config> {
let (files, error) = self.merged.into_inner().unwrap();
error?;
Ok((files, self.resolver.into_inner().unwrap()))
let deduplicated_files = deduplicate_files(files);
Ok((deduplicated_files, self.resolver.into_inner().unwrap()))
}
}
/// Deduplicate files by path, prioritizing `Root` files over `Nested` files.
///
/// When the same path appears both as a directly specified input (`Root`)
/// and via directory traversal (`Nested`), keep the `Root` entry and drop
/// the `Nested` entry.
///
/// Dropping the root entry means that the explicitly passed path may be
/// unintentionally ignored, since it is treated as nested and can be excluded
/// despite being requested.
///
/// Concretely, with `lint.exclude = ["foo.py"]` and `ruff check . foo.py`,
/// we must keep `Root(foo.py)` and drop `Nested(foo.py)` so `foo.py` is
/// linted as the user requested.
fn deduplicate_files(mut files: ResolvedFiles) -> ResolvedFiles {
// Sort by path; for identical paths, prefer Root over Nested; place errors after files
files.sort_by(|a, b| match (a, b) {
(Ok(a_file), Ok(b_file)) => a_file.cmp(b_file),
(Ok(_), Err(_)) => Ordering::Less,
(Err(_), Ok(_)) => Ordering::Greater,
(Err(_), Err(_)) => Ordering::Equal,
});
files.dedup_by(|a, b| match (a, b) {
(Ok(a_file), Ok(b_file)) => a_file.path() == b_file.path(),
_ => false,
});
files
}
struct PythonFilesVisitorBuilder<'s, 'config> {
state: &'s WalkPythonFilesState<'config>,
transformer: &'s (dyn ConfigurationTransformer + Sync),
@ -682,7 +714,7 @@ impl Drop for PythonFilesVisitor<'_, '_> {
}
}
#[derive(Clone, Debug, PartialEq, Eq)]
#[derive(Clone, Debug, PartialEq, Eq, Ord, PartialOrd)]
pub enum ResolvedFile {
/// File explicitly passed to the CLI
Root(PathBuf),
@ -715,18 +747,6 @@ impl ResolvedFile {
}
}
impl PartialOrd for ResolvedFile {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
Some(self.cmp(other))
}
}
impl Ord for ResolvedFile {
fn cmp(&self, other: &Self) -> Ordering {
self.path().cmp(other.path())
}
}
/// Return `true` if the Python file at [`Path`] is _not_ excluded.
pub fn python_file_at_path(
path: &Path,