Use more threads when discovering python files (#12258)

This commit is contained in:
Micha Reiser 2024-07-10 09:29:17 +02:00 committed by GitHub
parent 0bb2fc6eec
commit 4cc7bc9d32
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -9,7 +9,7 @@ use std::sync::RwLock;
use anyhow::Result; use anyhow::Result;
use anyhow::{anyhow, bail}; use anyhow::{anyhow, bail};
use globset::{Candidate, GlobSet}; use globset::{Candidate, GlobSet};
use ignore::{WalkBuilder, WalkState}; use ignore::{DirEntry, Error, ParallelVisitor, WalkBuilder, WalkState};
use itertools::Itertools; use itertools::Itertools;
use log::debug; use log::debug;
use matchit::{InsertError, Match, Router}; use matchit::{InsertError, Match, Router};
@ -378,119 +378,202 @@ pub fn python_files_in_path<'a>(
} }
builder.standard_filters(resolver.respect_gitignore()); builder.standard_filters(resolver.respect_gitignore());
builder.hidden(false); builder.hidden(false);
builder.threads(
std::thread::available_parallelism()
.map_or(1, std::num::NonZeroUsize::get)
.min(12),
);
let walker = builder.build_parallel(); let walker = builder.build_parallel();
// Run the `WalkParallel` to collect all Python files. // Run the `WalkParallel` to collect all Python files.
let is_hierarchical = resolver.is_hierarchical();
let error: std::sync::Mutex<Result<()>> = std::sync::Mutex::new(Ok(())); let state = WalkPythonFilesState::new(resolver);
let resolver: RwLock<Resolver> = RwLock::new(resolver); let mut visitor = PythonFilesVisitorBuilder::new(transformer, &state);
let files: std::sync::Mutex<Vec<Result<ResolvedFile, ignore::Error>>> = walker.visit(&mut visitor);
std::sync::Mutex::new(vec![]);
walker.run(|| { state.finish()
Box::new(|result| { }
// Respect our own exclusion behavior.
if let Ok(entry) = &result { type ResolvedFiles = Vec<Result<ResolvedFile, ignore::Error>>;
if entry.depth() > 0 {
let path = entry.path(); struct WalkPythonFilesState<'config> {
let resolver = resolver.read().unwrap(); is_hierarchical: bool,
let settings = resolver.resolve(path); merged: std::sync::Mutex<(ResolvedFiles, Result<()>)>,
if let Some(file_name) = path.file_name() { resolver: RwLock<Resolver<'config>>,
let file_path = Candidate::new(path); }
let file_basename = Candidate::new(file_name);
if match_candidate_exclusion( impl<'config> WalkPythonFilesState<'config> {
&file_path, fn new(resolver: Resolver<'config>) -> Self {
&file_basename, Self {
&settings.file_resolver.exclude, is_hierarchical: resolver.is_hierarchical(),
) { merged: std::sync::Mutex::new((Vec::new(), Ok(()))),
debug!("Ignored path via `exclude`: {:?}", path); resolver: RwLock::new(resolver),
return WalkState::Skip; }
} else if match_candidate_exclusion( }
&file_path,
&file_basename, fn finish(self) -> Result<(Vec<Result<ResolvedFile, ignore::Error>>, Resolver<'config>)> {
&settings.file_resolver.extend_exclude, let (files, error) = self.merged.into_inner().unwrap();
) { error?;
debug!("Ignored path via `extend-exclude`: {:?}", path);
return WalkState::Skip; Ok((files, self.resolver.into_inner().unwrap()))
} }
} else { }
debug!("Ignored path due to error in parsing: {:?}", path);
struct PythonFilesVisitorBuilder<'s, 'config> {
state: &'s WalkPythonFilesState<'config>,
transformer: &'s dyn ConfigurationTransformer,
}
impl<'s, 'config> PythonFilesVisitorBuilder<'s, 'config> {
fn new(
transformer: &'s dyn ConfigurationTransformer,
state: &'s WalkPythonFilesState<'config>,
) -> Self {
Self { state, transformer }
}
}
struct PythonFilesVisitor<'s, 'config> {
local_files: Vec<Result<ResolvedFile, ignore::Error>>,
local_error: Result<()>,
global: &'s WalkPythonFilesState<'config>,
transformer: &'s dyn ConfigurationTransformer,
}
impl<'config, 's> ignore::ParallelVisitorBuilder<'s> for PythonFilesVisitorBuilder<'s, 'config>
where
'config: 's,
{
fn build(&mut self) -> Box<dyn ignore::ParallelVisitor + 's> {
Box::new(PythonFilesVisitor {
local_files: vec![],
local_error: Ok(()),
global: self.state,
transformer: self.transformer,
})
}
}
impl ParallelVisitor for PythonFilesVisitor<'_, '_> {
fn visit(&mut self, result: std::result::Result<DirEntry, Error>) -> WalkState {
// Respect our own exclusion behavior.
if let Ok(entry) = &result {
if entry.depth() > 0 {
let path = entry.path();
let resolver = self.global.resolver.read().unwrap();
let settings = resolver.resolve(path);
if let Some(file_name) = path.file_name() {
let file_path = Candidate::new(path);
let file_basename = Candidate::new(file_name);
if match_candidate_exclusion(
&file_path,
&file_basename,
&settings.file_resolver.exclude,
) {
debug!("Ignored path via `exclude`: {:?}", path);
return WalkState::Skip;
} else if match_candidate_exclusion(
&file_path,
&file_basename,
&settings.file_resolver.extend_exclude,
) {
debug!("Ignored path via `extend-exclude`: {:?}", path);
return WalkState::Skip; return WalkState::Skip;
} }
} else {
debug!("Ignored path due to error in parsing: {:?}", path);
return WalkState::Skip;
} }
} }
}
// Search for the `pyproject.toml` file in this directory, before we visit any // Search for the `pyproject.toml` file in this directory, before we visit any
// of its contents. // of its contents.
if is_hierarchical { if self.global.is_hierarchical {
if let Ok(entry) = &result { if let Ok(entry) = &result {
if entry if entry
.file_type() .file_type()
.is_some_and(|file_type| file_type.is_dir()) .is_some_and(|file_type| file_type.is_dir())
{ {
match settings_toml(entry.path()) { match settings_toml(entry.path()) {
Ok(Some(pyproject)) => match resolve_scoped_settings( Ok(Some(pyproject)) => match resolve_scoped_settings(
&pyproject, &pyproject,
Relativity::Parent, Relativity::Parent,
transformer, self.transformer,
) { ) {
Ok((root, settings)) => { Ok((root, settings)) => {
resolver.write().unwrap().add(root, settings); self.global.resolver.write().unwrap().add(root, settings);
} }
Err(err) => {
*error.lock().unwrap() = Err(err);
return WalkState::Quit;
}
},
Ok(None) => {}
Err(err) => { Err(err) => {
*error.lock().unwrap() = Err(err); self.local_error = Err(err);
return WalkState::Quit; return WalkState::Quit;
} }
},
Ok(None) => {}
Err(err) => {
self.local_error = Err(err);
return WalkState::Quit;
} }
} }
} }
} }
}
match result { match result {
Ok(entry) => { Ok(entry) => {
// Ignore directories // Ignore directories
let resolved = if entry.file_type().map_or(true, |ft| ft.is_dir()) { let resolved = if entry.file_type().map_or(true, |ft| ft.is_dir()) {
None None
} else if entry.depth() == 0 { } else if entry.depth() == 0 {
// Accept all files that are passed-in directly. // Accept all files that are passed-in directly.
Some(ResolvedFile::Root(entry.into_path())) Some(ResolvedFile::Root(entry.into_path()))
} else {
// Otherwise, check if the file is included.
let path = entry.path();
let resolver = self.global.resolver.read().unwrap();
let settings = resolver.resolve(path);
if settings.file_resolver.include.is_match(path) {
debug!("Included path via `include`: {:?}", path);
Some(ResolvedFile::Nested(entry.into_path()))
} else if settings.file_resolver.extend_include.is_match(path) {
debug!("Included path via `extend-include`: {:?}", path);
Some(ResolvedFile::Nested(entry.into_path()))
} else { } else {
// Otherwise, check if the file is included. None
let path = entry.path();
let resolver = resolver.read().unwrap();
let settings = resolver.resolve(path);
if settings.file_resolver.include.is_match(path) {
debug!("Included path via `include`: {:?}", path);
Some(ResolvedFile::Nested(entry.into_path()))
} else if settings.file_resolver.extend_include.is_match(path) {
debug!("Included path via `extend-include`: {:?}", path);
Some(ResolvedFile::Nested(entry.into_path()))
} else {
None
}
};
if let Some(resolved) = resolved {
files.lock().unwrap().push(Ok(resolved));
} }
} };
Err(err) => {
files.lock().unwrap().push(Err(err)); if let Some(resolved) = resolved {
self.local_files.push(Ok(resolved));
} }
} }
Err(err) => {
self.local_files.push(Err(err));
}
}
WalkState::Continue WalkState::Continue
}) }
}); }
error.into_inner().unwrap()?; impl Drop for PythonFilesVisitor<'_, '_> {
fn drop(&mut self) {
let mut merged = self.global.merged.lock().unwrap();
let (ref mut files, ref mut error) = &mut *merged;
Ok((files.into_inner().unwrap(), resolver.into_inner().unwrap())) if files.is_empty() {
*files = std::mem::take(&mut self.local_files);
} else {
files.append(&mut self.local_files);
}
let local_error = std::mem::replace(&mut self.local_error, Ok(()));
if error.is_ok() {
*error = local_error;
}
}
} }
#[derive(Clone, Debug, PartialEq, Eq)] #[derive(Clone, Debug, PartialEq, Eq)]