Use more threads when discovering python files (#12258)

This commit is contained in:
Micha Reiser 2024-07-10 09:29:17 +02:00 committed by GitHub
parent 0bb2fc6eec
commit 4cc7bc9d32
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -9,7 +9,7 @@ use std::sync::RwLock;
use anyhow::Result;
use anyhow::{anyhow, bail};
use globset::{Candidate, GlobSet};
use ignore::{WalkBuilder, WalkState};
use ignore::{DirEntry, Error, ParallelVisitor, WalkBuilder, WalkState};
use itertools::Itertools;
use log::debug;
use matchit::{InsertError, Match, Router};
@ -378,21 +378,91 @@ pub fn python_files_in_path<'a>(
}
builder.standard_filters(resolver.respect_gitignore());
builder.hidden(false);
builder.threads(
std::thread::available_parallelism()
.map_or(1, std::num::NonZeroUsize::get)
.min(12),
);
let walker = builder.build_parallel();
// Run the `WalkParallel` to collect all Python files.
let is_hierarchical = resolver.is_hierarchical();
let error: std::sync::Mutex<Result<()>> = std::sync::Mutex::new(Ok(()));
let resolver: RwLock<Resolver> = RwLock::new(resolver);
let files: std::sync::Mutex<Vec<Result<ResolvedFile, ignore::Error>>> =
std::sync::Mutex::new(vec![]);
walker.run(|| {
Box::new(|result| {
let state = WalkPythonFilesState::new(resolver);
let mut visitor = PythonFilesVisitorBuilder::new(transformer, &state);
walker.visit(&mut visitor);
state.finish()
}
type ResolvedFiles = Vec<Result<ResolvedFile, ignore::Error>>;
struct WalkPythonFilesState<'config> {
is_hierarchical: bool,
merged: std::sync::Mutex<(ResolvedFiles, Result<()>)>,
resolver: RwLock<Resolver<'config>>,
}
impl<'config> WalkPythonFilesState<'config> {
fn new(resolver: Resolver<'config>) -> Self {
Self {
is_hierarchical: resolver.is_hierarchical(),
merged: std::sync::Mutex::new((Vec::new(), Ok(()))),
resolver: RwLock::new(resolver),
}
}
fn finish(self) -> Result<(Vec<Result<ResolvedFile, ignore::Error>>, Resolver<'config>)> {
let (files, error) = self.merged.into_inner().unwrap();
error?;
Ok((files, self.resolver.into_inner().unwrap()))
}
}
struct PythonFilesVisitorBuilder<'s, 'config> {
state: &'s WalkPythonFilesState<'config>,
transformer: &'s dyn ConfigurationTransformer,
}
impl<'s, 'config> PythonFilesVisitorBuilder<'s, 'config> {
fn new(
transformer: &'s dyn ConfigurationTransformer,
state: &'s WalkPythonFilesState<'config>,
) -> Self {
Self { state, transformer }
}
}
struct PythonFilesVisitor<'s, 'config> {
local_files: Vec<Result<ResolvedFile, ignore::Error>>,
local_error: Result<()>,
global: &'s WalkPythonFilesState<'config>,
transformer: &'s dyn ConfigurationTransformer,
}
impl<'config, 's> ignore::ParallelVisitorBuilder<'s> for PythonFilesVisitorBuilder<'s, 'config>
where
'config: 's,
{
fn build(&mut self) -> Box<dyn ignore::ParallelVisitor + 's> {
Box::new(PythonFilesVisitor {
local_files: vec![],
local_error: Ok(()),
global: self.state,
transformer: self.transformer,
})
}
}
impl ParallelVisitor for PythonFilesVisitor<'_, '_> {
fn visit(&mut self, result: std::result::Result<DirEntry, Error>) -> WalkState {
// Respect our own exclusion behavior.
if let Ok(entry) = &result {
if entry.depth() > 0 {
let path = entry.path();
let resolver = resolver.read().unwrap();
let resolver = self.global.resolver.read().unwrap();
let settings = resolver.resolve(path);
if let Some(file_name) = path.file_name() {
let file_path = Candidate::new(path);
@ -421,7 +491,7 @@ pub fn python_files_in_path<'a>(
// Search for the `pyproject.toml` file in this directory, before we visit any
// of its contents.
if is_hierarchical {
if self.global.is_hierarchical {
if let Ok(entry) = &result {
if entry
.file_type()
@ -431,19 +501,19 @@ pub fn python_files_in_path<'a>(
Ok(Some(pyproject)) => match resolve_scoped_settings(
&pyproject,
Relativity::Parent,
transformer,
self.transformer,
) {
Ok((root, settings)) => {
resolver.write().unwrap().add(root, settings);
self.global.resolver.write().unwrap().add(root, settings);
}
Err(err) => {
*error.lock().unwrap() = Err(err);
self.local_error = Err(err);
return WalkState::Quit;
}
},
Ok(None) => {}
Err(err) => {
*error.lock().unwrap() = Err(err);
self.local_error = Err(err);
return WalkState::Quit;
}
}
@ -462,7 +532,7 @@ pub fn python_files_in_path<'a>(
} else {
// Otherwise, check if the file is included.
let path = entry.path();
let resolver = resolver.read().unwrap();
let resolver = self.global.resolver.read().unwrap();
let settings = resolver.resolve(path);
if settings.file_resolver.include.is_match(path) {
debug!("Included path via `include`: {:?}", path);
@ -476,21 +546,34 @@ pub fn python_files_in_path<'a>(
};
if let Some(resolved) = resolved {
files.lock().unwrap().push(Ok(resolved));
self.local_files.push(Ok(resolved));
}
}
Err(err) => {
files.lock().unwrap().push(Err(err));
self.local_files.push(Err(err));
}
}
WalkState::Continue
})
});
}
}
error.into_inner().unwrap()?;
impl Drop for PythonFilesVisitor<'_, '_> {
fn drop(&mut self) {
let mut merged = self.global.merged.lock().unwrap();
let (ref mut files, ref mut error) = &mut *merged;
Ok((files.into_inner().unwrap(), resolver.into_inner().unwrap()))
if files.is_empty() {
*files = std::mem::take(&mut self.local_files);
} else {
files.append(&mut self.local_files);
}
let local_error = std::mem::replace(&mut self.local_error, Ok(()));
if error.is_ok() {
*error = local_error;
}
}
}
#[derive(Clone, Debug, PartialEq, Eq)]