Avoid sorting all paths in the format command (#8181)

## Summary

Related to https://github.com/astral-sh/ruff/issues/8135.

If we're not printing a `--diff`, or a summary of `--check` changes, we
can avoid sorting the list of results. Further, when sorting, we only
need to sort a small subset of the entries, in the common case (i.e., in
general, it's much more likely that a file is formatted than not).

## Test Plan

Local benchmarks suggest a 5-10% speedup on the cached behavior:

```
❯ hyperfine --warmup 3 "./target/release/ruff format ../airflow" "./target/release/sort format ../airflow"
Benchmark 1: ./target/release/ruff format ../airflow
  Time (mean ± σ):      70.3 ms ±   5.2 ms    [User: 52.1 ms, System: 59.0 ms]
  Range (min … max):    68.3 ms … 101.7 ms    42 runs

  Warning: Statistical outliers were detected. Consider re-running this benchmark on a quiet PC without any interferences from other programs. It might help to use the '--warmup' or '--prepare' options.

Benchmark 2: ./target/release/sort format ../airflow
  Time (mean ± σ):      66.0 ms ±   1.4 ms    [User: 48.3 ms, System: 58.4 ms]
  Range (min … max):    64.7 ms …  71.8 ms    44 runs

  Warning: Statistical outliers were detected. Consider re-running this benchmark on a quiet PC without any interferences from other programs. It might help to use the '--warmup' or '--prepare' options.

Summary
  './target/release/sort format ../airflow' ran
    1.07 ± 0.08 times faster than './target/release/ruff format ../airflow'
```
This commit is contained in:
Charlie Marsh 2023-10-24 13:54:06 -07:00 committed by GitHub
parent 2d0769e324
commit 0236e0751c
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -107,7 +107,7 @@ pub(crate) fn format(
};
let start = Instant::now();
let (mut results, mut errors): (Vec<_>, Vec<_>) = paths
let (results, mut errors): (Vec<_>, Vec<_>) = paths
.par_iter()
.filter_map(|entry| {
match entry {
@ -168,27 +168,6 @@ pub(crate) fn format(
});
let duration = start.elapsed();
// Make output deterministic, at least as long as we have a path
results.sort_unstable_by(|x, y| x.path.cmp(&y.path));
errors.sort_by(|x, y| {
fn get_key(error: &FormatCommandError) -> Option<&PathBuf> {
match &error {
FormatCommandError::Ignore(ignore) => {
if let ignore::Error::WithPath { path, .. } = ignore {
Some(path)
} else {
None
}
}
FormatCommandError::Panic(path, _)
| FormatCommandError::Read(path, _)
| FormatCommandError::Format(path, _)
| FormatCommandError::Write(path, _) => path.as_ref(),
}
}
get_key(x).cmp(&get_key(y))
});
debug!(
"Formatted {} files in {:.2?}",
results.len() + errors.len(),
@ -198,15 +177,21 @@ pub(crate) fn format(
caches.persist()?;
// Report on any errors.
errors.sort_unstable_by(|a, b| a.path().cmp(&b.path()));
for error in &errors {
error!("{error}");
}
results.sort_unstable_by(|a, b| a.path.cmp(&b.path));
let results = FormatResults::new(results.as_slice(), mode);
if mode.is_diff() {
results.write_diff(&mut stdout().lock())?;
match mode {
FormatMode::Write => {}
FormatMode::Check => {
results.write_changed(&mut stdout().lock())?;
}
FormatMode::Diff => {
results.write_diff(&mut stdout().lock())?;
}
}
// Report on the formatting changes.
@ -470,27 +455,55 @@ impl<'a> FormatResults<'a> {
})
}
/// Write a diff of the formatting changes to the given writer.
fn write_diff(&self, f: &mut impl Write) -> io::Result<()> {
for result in self.results {
if let FormatResult::Diff {
unformatted,
formatted,
} = &result.result
{
let text_diff =
TextDiff::from_lines(unformatted.source_code(), formatted.source_code());
let mut unified_diff = text_diff.unified_diff();
unified_diff.header(
&fs::relativize_path(&result.path),
&fs::relativize_path(&result.path),
);
unified_diff.to_writer(&mut *f)?;
}
for (path, unformatted, formatted) in self
.results
.iter()
.filter_map(|result| {
if let FormatResult::Diff {
unformatted,
formatted,
} = &result.result
{
Some((result.path.as_path(), unformatted, formatted))
} else {
None
}
})
.sorted_unstable_by_key(|(path, _, _)| *path)
{
let text_diff =
TextDiff::from_lines(unformatted.source_code(), formatted.source_code());
let mut unified_diff = text_diff.unified_diff();
unified_diff.header(&fs::relativize_path(path), &fs::relativize_path(path));
unified_diff.to_writer(&mut *f)?;
}
Ok(())
}
/// Write a list of the files that would be changed to the given writer.
fn write_changed(&self, f: &mut impl Write) -> io::Result<()> {
for path in self
.results
.iter()
.filter_map(|result| {
if result.result.is_formatted() {
Some(result.path.as_path())
} else {
None
}
})
.sorted_unstable()
{
writeln!(f, "Would reformat: {}", fs::relativize_path(path).bold())?;
}
Ok(())
}
/// Write a summary of the formatting results to the given writer.
fn write_summary(&self, f: &mut impl Write) -> io::Result<()> {
// Compute the number of changed and unchanged files.
let mut changed = 0u32;
@ -498,14 +511,6 @@ impl<'a> FormatResults<'a> {
for result in self.results {
match &result.result {
FormatResult::Formatted => {
// If we're running in check mode, report on any files that would be formatted.
if self.mode.is_check() {
writeln!(
f,
"Would reformat: {}",
fs::relativize_path(&result.path).bold()
)?;
}
changed += 1;
}
FormatResult::Unchanged => unchanged += 1,
@ -564,6 +569,24 @@ pub(crate) enum FormatCommandError {
Write(Option<PathBuf>, SourceError),
}
impl FormatCommandError {
fn path(&self) -> Option<&Path> {
match self {
Self::Ignore(err) => {
if let ignore::Error::WithPath { path, .. } = err {
Some(path.as_path())
} else {
None
}
}
Self::Panic(path, _)
| Self::Read(path, _)
| Self::Format(path, _)
| Self::Write(path, _) => path.as_deref(),
}
}
}
impl Display for FormatCommandError {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
match self {