mirror of
https://github.com/astral-sh/ruff.git
synced 2025-10-01 14:21:53 +00:00
Use rayon to parallelize the stability check
<!-- Thank you for contributing to Ruff! To help us out with reviewing, please consider the following: - Does this pull request include a summary of the change? (See below.) - Does this pull request include a descriptive title? - Does this pull request include references to any relevant issues? --> ## Summary This PR uses rayon to parallelize the stability check by scheduling each project as its own task. <!-- What's the purpose of the change? What does it do, and why? --> ## Test Plan I ran the ecosystem check. It now makes use of all cores (except at the end, there are some large projects). ## Performance The check now completes in minutes where it took about 30 minutes before. <!-- How was it tested? -->
This commit is contained in:
parent
9c2a75284b
commit
dc65007fe9
3 changed files with 252 additions and 107 deletions
1
Cargo.lock
generated
1
Cargo.lock
generated
|
@ -1983,6 +1983,7 @@ dependencies = [
|
||||||
"log",
|
"log",
|
||||||
"once_cell",
|
"once_cell",
|
||||||
"pretty_assertions",
|
"pretty_assertions",
|
||||||
|
"rayon",
|
||||||
"regex",
|
"regex",
|
||||||
"ruff",
|
"ruff",
|
||||||
"ruff_cli",
|
"ruff_cli",
|
||||||
|
|
|
@ -26,6 +26,7 @@ log = { workspace = true }
|
||||||
once_cell = { workspace = true }
|
once_cell = { workspace = true }
|
||||||
pretty_assertions = { version = "1.3.0" }
|
pretty_assertions = { version = "1.3.0" }
|
||||||
regex = { workspace = true }
|
regex = { workspace = true }
|
||||||
|
rayon = "1.7.0"
|
||||||
rustpython-format = { workspace = true }
|
rustpython-format = { workspace = true }
|
||||||
rustpython-parser = { workspace = true }
|
rustpython-parser = { workspace = true }
|
||||||
schemars = { workspace = true }
|
schemars = { workspace = true }
|
||||||
|
|
|
@ -2,23 +2,27 @@
|
||||||
//! known as formatter stability or formatter idempotency, and that the formatter prints
|
//! known as formatter stability or formatter idempotency, and that the formatter prints
|
||||||
//! syntactically valid code. As our test cases cover only a limited amount of code, this allows
|
//! syntactically valid code. As our test cases cover only a limited amount of code, this allows
|
||||||
//! checking entire repositories.
|
//! checking entire repositories.
|
||||||
#![allow(clippy::print_stdout)]
|
|
||||||
|
use std::fmt::{Display, Formatter};
|
||||||
|
use std::io::stdout;
|
||||||
|
use std::io::Write;
|
||||||
|
use std::panic::catch_unwind;
|
||||||
|
use std::path::{Path, PathBuf};
|
||||||
|
use std::process::ExitCode;
|
||||||
|
use std::sync::mpsc::channel;
|
||||||
|
use std::time::{Duration, Instant};
|
||||||
|
use std::{fmt, fs, iter};
|
||||||
|
|
||||||
use anyhow::{bail, Context};
|
use anyhow::{bail, Context};
|
||||||
use clap::Parser;
|
use clap::Parser;
|
||||||
use log::debug;
|
use log::debug;
|
||||||
|
use similar::{ChangeTag, TextDiff};
|
||||||
|
|
||||||
use ruff::resolver::python_files_in_path;
|
use ruff::resolver::python_files_in_path;
|
||||||
use ruff::settings::types::{FilePattern, FilePatternSet};
|
use ruff::settings::types::{FilePattern, FilePatternSet};
|
||||||
use ruff_cli::args::CheckArgs;
|
use ruff_cli::args::CheckArgs;
|
||||||
use ruff_cli::resolve::resolve;
|
use ruff_cli::resolve::resolve;
|
||||||
use ruff_python_formatter::{format_module, PyFormatOptions};
|
use ruff_python_formatter::{format_module, PyFormatOptions};
|
||||||
use similar::{ChangeTag, TextDiff};
|
|
||||||
use std::io::Write;
|
|
||||||
use std::panic::catch_unwind;
|
|
||||||
use std::path::{Path, PathBuf};
|
|
||||||
use std::process::ExitCode;
|
|
||||||
use std::time::Instant;
|
|
||||||
use std::{fs, io, iter};
|
|
||||||
|
|
||||||
/// Control the verbosity of the output
|
/// Control the verbosity of the output
|
||||||
#[derive(Copy, Clone, PartialEq, Eq, clap::ValueEnum, Default)]
|
#[derive(Copy, Clone, PartialEq, Eq, clap::ValueEnum, Default)]
|
||||||
|
@ -58,24 +62,16 @@ struct WrapperArgs {
|
||||||
|
|
||||||
pub(crate) fn main(args: &Args) -> anyhow::Result<ExitCode> {
|
pub(crate) fn main(args: &Args) -> anyhow::Result<ExitCode> {
|
||||||
let all_success = if args.multi_project {
|
let all_success = if args.multi_project {
|
||||||
let mut all_success = true;
|
check_multi_project(args)
|
||||||
for base_dir in &args.files {
|
|
||||||
for dir in base_dir.read_dir()? {
|
|
||||||
let dir = dir?;
|
|
||||||
println!("Starting {}", dir.path().display());
|
|
||||||
let success = check_repo(&Args {
|
|
||||||
files: vec![dir.path().clone()],
|
|
||||||
..*args
|
|
||||||
});
|
|
||||||
println!("Finished {}: {:?}", dir.path().display(), success);
|
|
||||||
if !matches!(success, Ok(true)) {
|
|
||||||
all_success = false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
all_success
|
|
||||||
} else {
|
} else {
|
||||||
check_repo(args)?
|
let result = check_repo(args)?;
|
||||||
|
|
||||||
|
#[allow(clippy::print_stdout)]
|
||||||
|
{
|
||||||
|
print!("{}", result.display(args.format));
|
||||||
|
}
|
||||||
|
|
||||||
|
result.is_success()
|
||||||
};
|
};
|
||||||
if all_success {
|
if all_success {
|
||||||
Ok(ExitCode::SUCCESS)
|
Ok(ExitCode::SUCCESS)
|
||||||
|
@ -84,8 +80,92 @@ pub(crate) fn main(args: &Args) -> anyhow::Result<ExitCode> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
enum Message {
|
||||||
|
Start {
|
||||||
|
path: PathBuf,
|
||||||
|
},
|
||||||
|
Failed {
|
||||||
|
path: PathBuf,
|
||||||
|
error: anyhow::Error,
|
||||||
|
},
|
||||||
|
Finished {
|
||||||
|
path: PathBuf,
|
||||||
|
result: CheckRepoResult,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
fn check_multi_project(args: &Args) -> bool {
|
||||||
|
let mut all_success = true;
|
||||||
|
let mut total_errors = 0;
|
||||||
|
let mut total_files = 0;
|
||||||
|
let start = Instant::now();
|
||||||
|
|
||||||
|
rayon::scope(|scope| {
|
||||||
|
let (sender, receiver) = channel();
|
||||||
|
|
||||||
|
for base_dir in &args.files {
|
||||||
|
for dir in base_dir.read_dir().unwrap() {
|
||||||
|
let path = dir.unwrap().path().clone();
|
||||||
|
|
||||||
|
let sender = sender.clone();
|
||||||
|
|
||||||
|
scope.spawn(move |_| {
|
||||||
|
sender.send(Message::Start { path: path.clone() }).unwrap();
|
||||||
|
|
||||||
|
match check_repo(&Args {
|
||||||
|
files: vec![path.clone()],
|
||||||
|
..*args
|
||||||
|
}) {
|
||||||
|
Ok(result) => sender.send(Message::Finished { result, path }),
|
||||||
|
Err(error) => sender.send(Message::Failed { error, path }),
|
||||||
|
}
|
||||||
|
.unwrap();
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
scope.spawn(|_| {
|
||||||
|
let mut stdout = stdout().lock();
|
||||||
|
|
||||||
|
for message in receiver {
|
||||||
|
match message {
|
||||||
|
Message::Start { path } => {
|
||||||
|
writeln!(stdout, "Starting {}", path.display()).unwrap();
|
||||||
|
}
|
||||||
|
Message::Finished { path, result } => {
|
||||||
|
total_errors += result.diagnostics.len();
|
||||||
|
total_files += result.file_count;
|
||||||
|
writeln!(
|
||||||
|
stdout,
|
||||||
|
"Finished {}\n{}\n",
|
||||||
|
path.display(),
|
||||||
|
result.display(args.format)
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
all_success = all_success && result.is_success();
|
||||||
|
}
|
||||||
|
Message::Failed { path, error } => {
|
||||||
|
writeln!(stdout, "Failed {}: {}", path.display(), error).unwrap();
|
||||||
|
all_success = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
let duration = start.elapsed();
|
||||||
|
|
||||||
|
#[allow(clippy::print_stdout)]
|
||||||
|
{
|
||||||
|
println!("{total_errors} stability errors in {total_files} files");
|
||||||
|
println!("Finished in {}s", duration.as_secs_f32());
|
||||||
|
}
|
||||||
|
|
||||||
|
all_success
|
||||||
|
}
|
||||||
|
|
||||||
/// Returns whether the check was successful
|
/// Returns whether the check was successful
|
||||||
pub(crate) fn check_repo(args: &Args) -> anyhow::Result<bool> {
|
fn check_repo(args: &Args) -> anyhow::Result<CheckRepoResult> {
|
||||||
let start = Instant::now();
|
let start = Instant::now();
|
||||||
|
|
||||||
// Find files to check (or in this case, format twice). Adapted from ruff_cli
|
// Find files to check (or in this case, format twice). Adapted from ruff_cli
|
||||||
|
@ -113,7 +193,7 @@ pub(crate) fn check_repo(args: &Args) -> anyhow::Result<bool> {
|
||||||
}
|
}
|
||||||
|
|
||||||
let mut formatted_counter = 0;
|
let mut formatted_counter = 0;
|
||||||
let errors = paths
|
let errors: Vec<_> = paths
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.map(|dir_entry| {
|
.map(|dir_entry| {
|
||||||
// Doesn't make sense to recover here in this test script
|
// Doesn't make sense to recover here in this test script
|
||||||
|
@ -130,8 +210,14 @@ pub(crate) fn check_repo(args: &Args) -> anyhow::Result<bool> {
|
||||||
let result = match catch_unwind(|| check_file(&file)) {
|
let result = match catch_unwind(|| check_file(&file)) {
|
||||||
Ok(result) => result,
|
Ok(result) => result,
|
||||||
Err(panic) => {
|
Err(panic) => {
|
||||||
if let Ok(message) = panic.downcast::<String>() {
|
if let Some(message) = panic.downcast_ref::<String>() {
|
||||||
Err(FormatterStabilityError::Panic { message: *message })
|
Err(FormatterStabilityError::Panic {
|
||||||
|
message: message.clone(),
|
||||||
|
})
|
||||||
|
} else if let Some(&message) = panic.downcast_ref::<&str>() {
|
||||||
|
Err(FormatterStabilityError::Panic {
|
||||||
|
message: message.to_string(),
|
||||||
|
})
|
||||||
} else {
|
} else {
|
||||||
Err(FormatterStabilityError::Panic {
|
Err(FormatterStabilityError::Panic {
|
||||||
// This should not happen, but it can
|
// This should not happen, but it can
|
||||||
|
@ -144,94 +230,27 @@ pub(crate) fn check_repo(args: &Args) -> anyhow::Result<bool> {
|
||||||
})
|
})
|
||||||
// We only care about the errors
|
// We only care about the errors
|
||||||
.filter_map(|(result, file)| match result {
|
.filter_map(|(result, file)| match result {
|
||||||
Err(err) => Some((err, file)),
|
Err(error) => Some(Diagnostic { file, error }),
|
||||||
Ok(()) => None,
|
Ok(()) => None,
|
||||||
});
|
})
|
||||||
|
.collect();
|
||||||
|
|
||||||
let mut any_errors = false;
|
|
||||||
|
|
||||||
// Don't collect the iterator so we already see errors while it's still processing
|
|
||||||
for (error, file) in errors {
|
|
||||||
any_errors = true;
|
|
||||||
match error {
|
|
||||||
FormatterStabilityError::Unstable {
|
|
||||||
formatted,
|
|
||||||
reformatted,
|
|
||||||
} => {
|
|
||||||
println!("Unstable formatting {}", file.display());
|
|
||||||
match args.format {
|
|
||||||
Format::Minimal => {}
|
|
||||||
Format::Default => {
|
|
||||||
diff_show_only_changes(
|
|
||||||
io::stdout().lock().by_ref(),
|
|
||||||
&formatted,
|
|
||||||
&reformatted,
|
|
||||||
)?;
|
|
||||||
}
|
|
||||||
Format::Full => {
|
|
||||||
let diff = TextDiff::from_lines(&formatted, &reformatted)
|
|
||||||
.unified_diff()
|
|
||||||
.header("Formatted once", "Formatted twice")
|
|
||||||
.to_string();
|
|
||||||
println!(
|
|
||||||
r#"Reformatting the formatted code a second time resulted in formatting changes.
|
|
||||||
---
|
|
||||||
{diff}---
|
|
||||||
|
|
||||||
Formatted once:
|
|
||||||
---
|
|
||||||
{formatted}---
|
|
||||||
|
|
||||||
Formatted twice:
|
|
||||||
---
|
|
||||||
{reformatted}---"#,
|
|
||||||
);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
FormatterStabilityError::InvalidSyntax { err, formatted } => {
|
|
||||||
println!(
|
|
||||||
"Formatter generated invalid syntax {}: {}",
|
|
||||||
file.display(),
|
|
||||||
err
|
|
||||||
);
|
|
||||||
if args.format == Format::Full {
|
|
||||||
println!("---\n{formatted}\n---\n");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
FormatterStabilityError::Panic { message } => {
|
|
||||||
println!("Panic {}: {}", file.display(), message);
|
|
||||||
}
|
|
||||||
FormatterStabilityError::Other(err) => {
|
|
||||||
println!("Uncategorized error {}: {}", file.display(), err);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if args.exit_first_error {
|
|
||||||
return Ok(false);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
let duration = start.elapsed();
|
let duration = start.elapsed();
|
||||||
println!(
|
|
||||||
"Formatting {} files twice took {:.2}s",
|
|
||||||
formatted_counter,
|
|
||||||
duration.as_secs_f32()
|
|
||||||
);
|
|
||||||
|
|
||||||
if any_errors {
|
Ok(CheckRepoResult {
|
||||||
Ok(false)
|
duration,
|
||||||
} else {
|
file_count: formatted_counter,
|
||||||
Ok(true)
|
diagnostics: errors,
|
||||||
}
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
/// A compact diff that only shows a header and changes, but nothing unchanged. This makes viewing
|
/// A compact diff that only shows a header and changes, but nothing unchanged. This makes viewing
|
||||||
/// multiple errors easier.
|
/// multiple errors easier.
|
||||||
fn diff_show_only_changes(
|
fn diff_show_only_changes(
|
||||||
writer: &mut impl Write,
|
writer: &mut Formatter,
|
||||||
formatted: &str,
|
formatted: &str,
|
||||||
reformatted: &str,
|
reformatted: &str,
|
||||||
) -> io::Result<()> {
|
) -> fmt::Result {
|
||||||
for changes in TextDiff::from_lines(formatted, reformatted)
|
for changes in TextDiff::from_lines(formatted, reformatted)
|
||||||
.unified_diff()
|
.unified_diff()
|
||||||
.iter_hunks()
|
.iter_hunks()
|
||||||
|
@ -245,12 +264,136 @@ fn diff_show_only_changes(
|
||||||
writeln!(writer, "{}", changes.header())?;
|
writeln!(writer, "{}", changes.header())?;
|
||||||
}
|
}
|
||||||
write!(writer, "{}", change.tag())?;
|
write!(writer, "{}", change.tag())?;
|
||||||
writer.write_all(change.value().as_bytes())?;
|
writer.write_str(change.value())?;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
struct CheckRepoResult {
|
||||||
|
duration: Duration,
|
||||||
|
file_count: usize,
|
||||||
|
diagnostics: Vec<Diagnostic>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl CheckRepoResult {
|
||||||
|
fn display(&self, format: Format) -> DisplayCheckRepoResult {
|
||||||
|
DisplayCheckRepoResult {
|
||||||
|
result: self,
|
||||||
|
format,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn is_success(&self) -> bool {
|
||||||
|
self.diagnostics.is_empty()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
struct DisplayCheckRepoResult<'a> {
|
||||||
|
result: &'a CheckRepoResult,
|
||||||
|
format: Format,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Display for DisplayCheckRepoResult<'_> {
|
||||||
|
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
||||||
|
let CheckRepoResult {
|
||||||
|
duration,
|
||||||
|
file_count,
|
||||||
|
diagnostics,
|
||||||
|
} = self.result;
|
||||||
|
|
||||||
|
for diagnostic in diagnostics {
|
||||||
|
write!(f, "{}", diagnostic.display(self.format))?;
|
||||||
|
}
|
||||||
|
|
||||||
|
writeln!(
|
||||||
|
f,
|
||||||
|
"Formatting {} files twice took {:.2}s",
|
||||||
|
file_count,
|
||||||
|
duration.as_secs_f32()
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
struct Diagnostic {
|
||||||
|
file: PathBuf,
|
||||||
|
error: FormatterStabilityError,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Diagnostic {
|
||||||
|
fn display(&self, format: Format) -> DisplayDiagnostic {
|
||||||
|
DisplayDiagnostic {
|
||||||
|
diagnostic: self,
|
||||||
|
format,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
struct DisplayDiagnostic<'a> {
|
||||||
|
format: Format,
|
||||||
|
diagnostic: &'a Diagnostic,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Display for DisplayDiagnostic<'_> {
|
||||||
|
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
||||||
|
let Diagnostic { file, error } = &self.diagnostic;
|
||||||
|
|
||||||
|
match error {
|
||||||
|
FormatterStabilityError::Unstable {
|
||||||
|
formatted,
|
||||||
|
reformatted,
|
||||||
|
} => {
|
||||||
|
writeln!(f, "Unstable formatting {}", file.display())?;
|
||||||
|
match self.format {
|
||||||
|
Format::Minimal => {}
|
||||||
|
Format::Default => {
|
||||||
|
diff_show_only_changes(f, formatted, reformatted)?;
|
||||||
|
}
|
||||||
|
Format::Full => {
|
||||||
|
let diff = TextDiff::from_lines(formatted.as_str(), reformatted.as_str())
|
||||||
|
.unified_diff()
|
||||||
|
.header("Formatted once", "Formatted twice")
|
||||||
|
.to_string();
|
||||||
|
writeln!(
|
||||||
|
f,
|
||||||
|
r#"Reformatting the formatted code a second time resulted in formatting changes.
|
||||||
|
---
|
||||||
|
{diff}---
|
||||||
|
|
||||||
|
Formatted once:
|
||||||
|
---
|
||||||
|
{formatted}---
|
||||||
|
|
||||||
|
Formatted twice:
|
||||||
|
---
|
||||||
|
{reformatted}---\n"#,
|
||||||
|
)?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
FormatterStabilityError::InvalidSyntax { err, formatted } => {
|
||||||
|
writeln!(
|
||||||
|
f,
|
||||||
|
"Formatter generated invalid syntax {}: {}",
|
||||||
|
file.display(),
|
||||||
|
err
|
||||||
|
)?;
|
||||||
|
if self.format == Format::Full {
|
||||||
|
writeln!(f, "---\n{formatted}\n---\n")?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
FormatterStabilityError::Panic { message } => {
|
||||||
|
writeln!(f, "Panic {}: {}", file.display(), message)?;
|
||||||
|
}
|
||||||
|
FormatterStabilityError::Other(err) => {
|
||||||
|
writeln!(f, "Uncategorized error {}: {}", file.display(), err)?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
enum FormatterStabilityError {
|
enum FormatterStabilityError {
|
||||||
/// First and second pass of the formatter are different
|
/// First and second pass of the formatter are different
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue