mirror of
https://github.com/astral-sh/ruff.git
synced 2025-08-03 18:28:24 +00:00
Use rayon to parallelize the stability check
<!-- Thank you for contributing to Ruff! To help us out with reviewing, please consider the following: - Does this pull request include a summary of the change? (See below.) - Does this pull request include a descriptive title? - Does this pull request include references to any relevant issues? --> ## Summary This PR uses rayon to parallelize the stability check by scheduling each project as its own task. <!-- What's the purpose of the change? What does it do, and why? --> ## Test Plan I ran the ecosystem check. It now makes use of all cores (except at the end, there are some large projects). ## Performance The check now completes in minutes where it took about 30 minutes before. <!-- How was it tested? -->
This commit is contained in:
parent
9c2a75284b
commit
dc65007fe9
3 changed files with 252 additions and 107 deletions
|
@ -2,23 +2,27 @@
|
|||
//! known as formatter stability or formatter idempotency, and that the formatter prints
|
||||
//! syntactically valid code. As our test cases cover only a limited amount of code, this allows
|
||||
//! checking entire repositories.
|
||||
#![allow(clippy::print_stdout)]
|
||||
|
||||
use std::fmt::{Display, Formatter};
|
||||
use std::io::stdout;
|
||||
use std::io::Write;
|
||||
use std::panic::catch_unwind;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::process::ExitCode;
|
||||
use std::sync::mpsc::channel;
|
||||
use std::time::{Duration, Instant};
|
||||
use std::{fmt, fs, iter};
|
||||
|
||||
use anyhow::{bail, Context};
|
||||
use clap::Parser;
|
||||
use log::debug;
|
||||
use similar::{ChangeTag, TextDiff};
|
||||
|
||||
use ruff::resolver::python_files_in_path;
|
||||
use ruff::settings::types::{FilePattern, FilePatternSet};
|
||||
use ruff_cli::args::CheckArgs;
|
||||
use ruff_cli::resolve::resolve;
|
||||
use ruff_python_formatter::{format_module, PyFormatOptions};
|
||||
use similar::{ChangeTag, TextDiff};
|
||||
use std::io::Write;
|
||||
use std::panic::catch_unwind;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::process::ExitCode;
|
||||
use std::time::Instant;
|
||||
use std::{fs, io, iter};
|
||||
|
||||
/// Control the verbosity of the output
|
||||
#[derive(Copy, Clone, PartialEq, Eq, clap::ValueEnum, Default)]
|
||||
|
@ -58,24 +62,16 @@ struct WrapperArgs {
|
|||
|
||||
pub(crate) fn main(args: &Args) -> anyhow::Result<ExitCode> {
|
||||
let all_success = if args.multi_project {
|
||||
let mut all_success = true;
|
||||
for base_dir in &args.files {
|
||||
for dir in base_dir.read_dir()? {
|
||||
let dir = dir?;
|
||||
println!("Starting {}", dir.path().display());
|
||||
let success = check_repo(&Args {
|
||||
files: vec![dir.path().clone()],
|
||||
..*args
|
||||
});
|
||||
println!("Finished {}: {:?}", dir.path().display(), success);
|
||||
if !matches!(success, Ok(true)) {
|
||||
all_success = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
all_success
|
||||
check_multi_project(args)
|
||||
} else {
|
||||
check_repo(args)?
|
||||
let result = check_repo(args)?;
|
||||
|
||||
#[allow(clippy::print_stdout)]
|
||||
{
|
||||
print!("{}", result.display(args.format));
|
||||
}
|
||||
|
||||
result.is_success()
|
||||
};
|
||||
if all_success {
|
||||
Ok(ExitCode::SUCCESS)
|
||||
|
@ -84,8 +80,92 @@ pub(crate) fn main(args: &Args) -> anyhow::Result<ExitCode> {
|
|||
}
|
||||
}
|
||||
|
||||
enum Message {
|
||||
Start {
|
||||
path: PathBuf,
|
||||
},
|
||||
Failed {
|
||||
path: PathBuf,
|
||||
error: anyhow::Error,
|
||||
},
|
||||
Finished {
|
||||
path: PathBuf,
|
||||
result: CheckRepoResult,
|
||||
},
|
||||
}
|
||||
|
||||
fn check_multi_project(args: &Args) -> bool {
|
||||
let mut all_success = true;
|
||||
let mut total_errors = 0;
|
||||
let mut total_files = 0;
|
||||
let start = Instant::now();
|
||||
|
||||
rayon::scope(|scope| {
|
||||
let (sender, receiver) = channel();
|
||||
|
||||
for base_dir in &args.files {
|
||||
for dir in base_dir.read_dir().unwrap() {
|
||||
let path = dir.unwrap().path().clone();
|
||||
|
||||
let sender = sender.clone();
|
||||
|
||||
scope.spawn(move |_| {
|
||||
sender.send(Message::Start { path: path.clone() }).unwrap();
|
||||
|
||||
match check_repo(&Args {
|
||||
files: vec![path.clone()],
|
||||
..*args
|
||||
}) {
|
||||
Ok(result) => sender.send(Message::Finished { result, path }),
|
||||
Err(error) => sender.send(Message::Failed { error, path }),
|
||||
}
|
||||
.unwrap();
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
scope.spawn(|_| {
|
||||
let mut stdout = stdout().lock();
|
||||
|
||||
for message in receiver {
|
||||
match message {
|
||||
Message::Start { path } => {
|
||||
writeln!(stdout, "Starting {}", path.display()).unwrap();
|
||||
}
|
||||
Message::Finished { path, result } => {
|
||||
total_errors += result.diagnostics.len();
|
||||
total_files += result.file_count;
|
||||
writeln!(
|
||||
stdout,
|
||||
"Finished {}\n{}\n",
|
||||
path.display(),
|
||||
result.display(args.format)
|
||||
)
|
||||
.unwrap();
|
||||
all_success = all_success && result.is_success();
|
||||
}
|
||||
Message::Failed { path, error } => {
|
||||
writeln!(stdout, "Failed {}: {}", path.display(), error).unwrap();
|
||||
all_success = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
let duration = start.elapsed();
|
||||
|
||||
#[allow(clippy::print_stdout)]
|
||||
{
|
||||
println!("{total_errors} stability errors in {total_files} files");
|
||||
println!("Finished in {}s", duration.as_secs_f32());
|
||||
}
|
||||
|
||||
all_success
|
||||
}
|
||||
|
||||
/// Returns whether the check was successful
|
||||
pub(crate) fn check_repo(args: &Args) -> anyhow::Result<bool> {
|
||||
fn check_repo(args: &Args) -> anyhow::Result<CheckRepoResult> {
|
||||
let start = Instant::now();
|
||||
|
||||
// Find files to check (or in this case, format twice). Adapted from ruff_cli
|
||||
|
@ -113,7 +193,7 @@ pub(crate) fn check_repo(args: &Args) -> anyhow::Result<bool> {
|
|||
}
|
||||
|
||||
let mut formatted_counter = 0;
|
||||
let errors = paths
|
||||
let errors: Vec<_> = paths
|
||||
.into_iter()
|
||||
.map(|dir_entry| {
|
||||
// Doesn't make sense to recover here in this test script
|
||||
|
@ -130,8 +210,14 @@ pub(crate) fn check_repo(args: &Args) -> anyhow::Result<bool> {
|
|||
let result = match catch_unwind(|| check_file(&file)) {
|
||||
Ok(result) => result,
|
||||
Err(panic) => {
|
||||
if let Ok(message) = panic.downcast::<String>() {
|
||||
Err(FormatterStabilityError::Panic { message: *message })
|
||||
if let Some(message) = panic.downcast_ref::<String>() {
|
||||
Err(FormatterStabilityError::Panic {
|
||||
message: message.clone(),
|
||||
})
|
||||
} else if let Some(&message) = panic.downcast_ref::<&str>() {
|
||||
Err(FormatterStabilityError::Panic {
|
||||
message: message.to_string(),
|
||||
})
|
||||
} else {
|
||||
Err(FormatterStabilityError::Panic {
|
||||
// This should not happen, but it can
|
||||
|
@ -144,94 +230,27 @@ pub(crate) fn check_repo(args: &Args) -> anyhow::Result<bool> {
|
|||
})
|
||||
// We only care about the errors
|
||||
.filter_map(|(result, file)| match result {
|
||||
Err(err) => Some((err, file)),
|
||||
Err(error) => Some(Diagnostic { file, error }),
|
||||
Ok(()) => None,
|
||||
});
|
||||
})
|
||||
.collect();
|
||||
|
||||
let mut any_errors = false;
|
||||
|
||||
// Don't collect the iterator so we already see errors while it's still processing
|
||||
for (error, file) in errors {
|
||||
any_errors = true;
|
||||
match error {
|
||||
FormatterStabilityError::Unstable {
|
||||
formatted,
|
||||
reformatted,
|
||||
} => {
|
||||
println!("Unstable formatting {}", file.display());
|
||||
match args.format {
|
||||
Format::Minimal => {}
|
||||
Format::Default => {
|
||||
diff_show_only_changes(
|
||||
io::stdout().lock().by_ref(),
|
||||
&formatted,
|
||||
&reformatted,
|
||||
)?;
|
||||
}
|
||||
Format::Full => {
|
||||
let diff = TextDiff::from_lines(&formatted, &reformatted)
|
||||
.unified_diff()
|
||||
.header("Formatted once", "Formatted twice")
|
||||
.to_string();
|
||||
println!(
|
||||
r#"Reformatting the formatted code a second time resulted in formatting changes.
|
||||
---
|
||||
{diff}---
|
||||
|
||||
Formatted once:
|
||||
---
|
||||
{formatted}---
|
||||
|
||||
Formatted twice:
|
||||
---
|
||||
{reformatted}---"#,
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
FormatterStabilityError::InvalidSyntax { err, formatted } => {
|
||||
println!(
|
||||
"Formatter generated invalid syntax {}: {}",
|
||||
file.display(),
|
||||
err
|
||||
);
|
||||
if args.format == Format::Full {
|
||||
println!("---\n{formatted}\n---\n");
|
||||
}
|
||||
}
|
||||
FormatterStabilityError::Panic { message } => {
|
||||
println!("Panic {}: {}", file.display(), message);
|
||||
}
|
||||
FormatterStabilityError::Other(err) => {
|
||||
println!("Uncategorized error {}: {}", file.display(), err);
|
||||
}
|
||||
}
|
||||
|
||||
if args.exit_first_error {
|
||||
return Ok(false);
|
||||
}
|
||||
}
|
||||
let duration = start.elapsed();
|
||||
println!(
|
||||
"Formatting {} files twice took {:.2}s",
|
||||
formatted_counter,
|
||||
duration.as_secs_f32()
|
||||
);
|
||||
|
||||
if any_errors {
|
||||
Ok(false)
|
||||
} else {
|
||||
Ok(true)
|
||||
}
|
||||
Ok(CheckRepoResult {
|
||||
duration,
|
||||
file_count: formatted_counter,
|
||||
diagnostics: errors,
|
||||
})
|
||||
}
|
||||
|
||||
/// A compact diff that only shows a header and changes, but nothing unchanged. This makes viewing
|
||||
/// multiple errors easier.
|
||||
fn diff_show_only_changes(
|
||||
writer: &mut impl Write,
|
||||
writer: &mut Formatter,
|
||||
formatted: &str,
|
||||
reformatted: &str,
|
||||
) -> io::Result<()> {
|
||||
) -> fmt::Result {
|
||||
for changes in TextDiff::from_lines(formatted, reformatted)
|
||||
.unified_diff()
|
||||
.iter_hunks()
|
||||
|
@ -245,12 +264,136 @@ fn diff_show_only_changes(
|
|||
writeln!(writer, "{}", changes.header())?;
|
||||
}
|
||||
write!(writer, "{}", change.tag())?;
|
||||
writer.write_all(change.value().as_bytes())?;
|
||||
writer.write_str(change.value())?;
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
struct CheckRepoResult {
|
||||
duration: Duration,
|
||||
file_count: usize,
|
||||
diagnostics: Vec<Diagnostic>,
|
||||
}
|
||||
|
||||
impl CheckRepoResult {
|
||||
fn display(&self, format: Format) -> DisplayCheckRepoResult {
|
||||
DisplayCheckRepoResult {
|
||||
result: self,
|
||||
format,
|
||||
}
|
||||
}
|
||||
|
||||
fn is_success(&self) -> bool {
|
||||
self.diagnostics.is_empty()
|
||||
}
|
||||
}
|
||||
|
||||
struct DisplayCheckRepoResult<'a> {
|
||||
result: &'a CheckRepoResult,
|
||||
format: Format,
|
||||
}
|
||||
|
||||
impl Display for DisplayCheckRepoResult<'_> {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
||||
let CheckRepoResult {
|
||||
duration,
|
||||
file_count,
|
||||
diagnostics,
|
||||
} = self.result;
|
||||
|
||||
for diagnostic in diagnostics {
|
||||
write!(f, "{}", diagnostic.display(self.format))?;
|
||||
}
|
||||
|
||||
writeln!(
|
||||
f,
|
||||
"Formatting {} files twice took {:.2}s",
|
||||
file_count,
|
||||
duration.as_secs_f32()
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct Diagnostic {
|
||||
file: PathBuf,
|
||||
error: FormatterStabilityError,
|
||||
}
|
||||
|
||||
impl Diagnostic {
|
||||
fn display(&self, format: Format) -> DisplayDiagnostic {
|
||||
DisplayDiagnostic {
|
||||
diagnostic: self,
|
||||
format,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct DisplayDiagnostic<'a> {
|
||||
format: Format,
|
||||
diagnostic: &'a Diagnostic,
|
||||
}
|
||||
|
||||
impl Display for DisplayDiagnostic<'_> {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
||||
let Diagnostic { file, error } = &self.diagnostic;
|
||||
|
||||
match error {
|
||||
FormatterStabilityError::Unstable {
|
||||
formatted,
|
||||
reformatted,
|
||||
} => {
|
||||
writeln!(f, "Unstable formatting {}", file.display())?;
|
||||
match self.format {
|
||||
Format::Minimal => {}
|
||||
Format::Default => {
|
||||
diff_show_only_changes(f, formatted, reformatted)?;
|
||||
}
|
||||
Format::Full => {
|
||||
let diff = TextDiff::from_lines(formatted.as_str(), reformatted.as_str())
|
||||
.unified_diff()
|
||||
.header("Formatted once", "Formatted twice")
|
||||
.to_string();
|
||||
writeln!(
|
||||
f,
|
||||
r#"Reformatting the formatted code a second time resulted in formatting changes.
|
||||
---
|
||||
{diff}---
|
||||
|
||||
Formatted once:
|
||||
---
|
||||
{formatted}---
|
||||
|
||||
Formatted twice:
|
||||
---
|
||||
{reformatted}---\n"#,
|
||||
)?;
|
||||
}
|
||||
}
|
||||
}
|
||||
FormatterStabilityError::InvalidSyntax { err, formatted } => {
|
||||
writeln!(
|
||||
f,
|
||||
"Formatter generated invalid syntax {}: {}",
|
||||
file.display(),
|
||||
err
|
||||
)?;
|
||||
if self.format == Format::Full {
|
||||
writeln!(f, "---\n{formatted}\n---\n")?;
|
||||
}
|
||||
}
|
||||
FormatterStabilityError::Panic { message } => {
|
||||
writeln!(f, "Panic {}: {}", file.display(), message)?;
|
||||
}
|
||||
FormatterStabilityError::Other(err) => {
|
||||
writeln!(f, "Uncategorized error {}: {}", file.display(), err)?;
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
enum FormatterStabilityError {
|
||||
/// First and second pass of the formatter are different
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue