Deduplicate input paths (#20105)

<!--
Thank you for contributing to Ruff/ty! To help us out with reviewing,
please consider the following:

- Does this pull request include a summary of the change? (See below.)
- Does this pull request include a descriptive title? (Please prefix
with `[ty]` for ty pull
  requests.)
- Does this pull request include references to any relevant issues?
-->

## Summary

<!-- What's the purpose of the change? What does it do, and why? -->

Fixes #20035, fixes #19395

This is for deduplicating input paths to avoid processing the same file
multiple times.

This is my first contribution, so I'm sorry if I miss something. Please
tell me if this is needed for this feature.

## Test Plan

<!-- How was it tested? -->

I just added a test `find_python_files_deduplicated` in
eee1020e32/crates/ruff_workspace/src/resolver.rs (L1017)
. This pull request adds changes to `WalkPythonFilesState::finish`,
which is used in `python_files_in_path`, so they affect some commands
such as `analyze`, `format`, `check` and so on. I will add snapshot
tests for them if necessary.

I’ve already confirmed that the same thing happens with ruff check as
well.

```
$ echo "x   = 1" > example/foo.py
$ uvx ruff check example example/foo.py
I002 [*] Missing required import: `from __future__ import annotations`
--> /path/to/example/foo.py:1:1
help: Insert required import: `from __future__ import annotations`

I002 [*] Missing required import: `from __future__ import annotations`
--> /path/to/example/foo.py:1:1
help: Insert required import: `from __future__ import annotations`

Found 2 errors.
[*] 2 fixable with the `--fix` option.
```
This commit is contained in:
Takayuki Maeda 2025-09-20 03:40:23 +09:00 committed by GitHub
parent 3bf4dae452
commit bd5b3e4f6e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 107 additions and 14 deletions

View file

@ -500,6 +500,35 @@ OTHER = "OTHER"
Ok(()) Ok(())
} }
/// Regression test for <https://github.com/astral-sh/ruff/issues/20035>
#[test]
fn deduplicate_directory_and_explicit_file() -> Result<()> {
let tempdir = TempDir::new()?;
let root = tempdir.path();
let main = root.join("main.py");
fs::write(&main, "x = 1\n")?;
assert_cmd_snapshot!(
Command::new(get_cargo_bin(BIN_NAME))
.current_dir(root)
.args(["format", "--no-cache", "--check"])
.arg(".")
.arg("main.py"),
@r"
success: false
exit_code: 1
----- stdout -----
Would reformat: main.py
1 file would be reformatted
----- stderr -----
"
);
Ok(())
}
#[test] #[test]
fn syntax_error() -> Result<()> { fn syntax_error() -> Result<()> {
let tempdir = TempDir::new()?; let tempdir = TempDir::new()?;

View file

@ -271,6 +271,50 @@ OTHER = "OTHER"
Ok(()) Ok(())
} }
/// Regression test for <https://github.com/astral-sh/ruff/issues/20035>
#[test]
fn deduplicate_directory_and_explicit_file() -> Result<()> {
let tempdir = TempDir::new()?;
let root = tempdir.path();
let ruff_toml = tempdir.path().join("ruff.toml");
fs::write(
&ruff_toml,
r#"
[lint]
exclude = ["main.py"]
"#,
)?;
let main = root.join("main.py");
fs::write(&main, "import os\n")?;
insta::with_settings!({
filters => vec![(tempdir_filter(&tempdir).as_str(), "[TMP]/")]
}, {
assert_cmd_snapshot!(
Command::new(get_cargo_bin(BIN_NAME))
.current_dir(root)
.args(STDIN_BASE_OPTIONS)
.args(["--config", &ruff_toml.file_name().unwrap().to_string_lossy()])
.arg(".")
// Explicitly pass main.py, should be linted regardless of it being excluded by lint.exclude
.arg("main.py"),
@r"
success: false
exit_code: 1
----- stdout -----
main.py:1:8: F401 [*] `os` imported but unused
Found 1 error.
[*] 1 fixable with the `--fix` option.
----- stderr -----
"
);
});
Ok(())
}
#[test] #[test]
fn exclude_stdin() -> Result<()> { fn exclude_stdin() -> Result<()> {
let tempdir = TempDir::new()?; let tempdir = TempDir::new()?;

View file

@ -523,10 +523,42 @@ impl<'config> WalkPythonFilesState<'config> {
let (files, error) = self.merged.into_inner().unwrap(); let (files, error) = self.merged.into_inner().unwrap();
error?; error?;
Ok((files, self.resolver.into_inner().unwrap())) let deduplicated_files = deduplicate_files(files);
Ok((deduplicated_files, self.resolver.into_inner().unwrap()))
} }
} }
/// Deduplicate files by path, prioritizing `Root` files over `Nested` files.
///
/// When the same path appears both as a directly specified input (`Root`)
/// and via directory traversal (`Nested`), keep the `Root` entry and drop
/// the `Nested` entry.
///
/// Dropping the root entry means that the explicitly passed path may be
/// unintentionally ignored, since it is treated as nested and can be excluded
/// despite being requested.
///
/// Concretely, with `lint.exclude = ["foo.py"]` and `ruff check . foo.py`,
/// we must keep `Root(foo.py)` and drop `Nested(foo.py)` so `foo.py` is
/// linted as the user requested.
fn deduplicate_files(mut files: ResolvedFiles) -> ResolvedFiles {
// Sort by path; for identical paths, prefer Root over Nested; place errors after files
files.sort_by(|a, b| match (a, b) {
(Ok(a_file), Ok(b_file)) => a_file.cmp(b_file),
(Ok(_), Err(_)) => Ordering::Less,
(Err(_), Ok(_)) => Ordering::Greater,
(Err(_), Err(_)) => Ordering::Equal,
});
files.dedup_by(|a, b| match (a, b) {
(Ok(a_file), Ok(b_file)) => a_file.path() == b_file.path(),
_ => false,
});
files
}
struct PythonFilesVisitorBuilder<'s, 'config> { struct PythonFilesVisitorBuilder<'s, 'config> {
state: &'s WalkPythonFilesState<'config>, state: &'s WalkPythonFilesState<'config>,
transformer: &'s (dyn ConfigurationTransformer + Sync), transformer: &'s (dyn ConfigurationTransformer + Sync),
@ -682,7 +714,7 @@ impl Drop for PythonFilesVisitor<'_, '_> {
} }
} }
#[derive(Clone, Debug, PartialEq, Eq)] #[derive(Clone, Debug, PartialEq, Eq, Ord, PartialOrd)]
pub enum ResolvedFile { pub enum ResolvedFile {
/// File explicitly passed to the CLI /// File explicitly passed to the CLI
Root(PathBuf), Root(PathBuf),
@ -715,18 +747,6 @@ impl ResolvedFile {
} }
} }
impl PartialOrd for ResolvedFile {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
Some(self.cmp(other))
}
}
impl Ord for ResolvedFile {
fn cmp(&self, other: &Self) -> Ordering {
self.path().cmp(other.path())
}
}
/// Return `true` if the Python file at [`Path`] is _not_ excluded. /// Return `true` if the Python file at [`Path`] is _not_ excluded.
pub fn python_file_at_path( pub fn python_file_at_path(
path: &Path, path: &Path,