Open cache files in parallel (#5120)

## Summary

Open cache files in parallel (again), brings the performance back to be roughly equal to the old implementation.

## Test Plan

Existing tests should keep working.
This commit is contained in:
Thomas de Zeeuw 2023-06-20 17:43:09 +02:00 committed by GitHub
parent 062b6e5c2b
commit 17f1ecd56e
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
8 changed files with 353 additions and 125 deletions

View file

@ -1,4 +1,4 @@
use std::collections::{hash_map, HashMap};
use std::collections::HashMap;
use std::fmt::Write;
use std::io;
use std::path::{Path, PathBuf};
@ -22,7 +22,7 @@ use ruff_python_ast::imports::ImportMap;
use ruff_python_ast::source_code::SourceFileBuilder;
use crate::args::Overrides;
use crate::cache::{self, PackageCache};
use crate::cache::{self, Cache};
use crate::diagnostics::Diagnostics;
use crate::panic::catch_unwind;
@ -77,37 +77,21 @@ pub(crate) fn run(
pyproject_config,
);
// Create a cache per package, if enabled.
let package_caches = if cache.into() {
let mut caches = HashMap::new();
// TODO(thomas): try to merge this with the detection of package roots
// above or with the parallel iteration below.
for entry in &paths {
let Ok(entry) = entry else { continue };
let path = entry.path();
let package = path
.parent()
.and_then(|parent| package_roots.get(parent))
.and_then(|package| *package);
// For paths not in a package, e.g. scripts, we use the path as
// the package root.
let package_root = package.unwrap_or(path);
let settings = resolver.resolve_all(path, pyproject_config);
if let hash_map::Entry::Vacant(entry) = caches.entry(package_root) {
let cache = PackageCache::open(
// Load the caches.
let caches = bool::from(cache).then(|| {
package_roots
.par_iter()
.map(|(package_root, _)| {
let settings = resolver.resolve_all(package_root, pyproject_config);
let cache = Cache::open(
&settings.cli.cache_dir,
package_root.to_owned(),
package_root.to_path_buf(),
&settings.lib,
)?;
entry.insert(cache);
}
}
Some(caches)
} else {
None
};
);
(&**package_root, cache)
})
.collect::<HashMap<&Path, Cache>>()
});
let start = Instant::now();
let mut diagnostics: Diagnostics = paths
@ -121,17 +105,13 @@ pub(crate) fn run(
.and_then(|parent| package_roots.get(parent))
.and_then(|package| *package);
let package_cache = package_caches.as_ref().map(|package_caches| {
let package_root = package.unwrap_or(path);
let package_cache = package_caches
.get(package_root)
.expect("failed to get package cache");
package_cache
});
let settings = resolver.resolve_all(path, pyproject_config);
let package_root = package.unwrap_or_else(|| path.parent().unwrap_or(path));
let cache = caches
.as_ref()
.map(|caches| caches.get(&package_root).unwrap());
lint_path(path, package, settings, package_cache, noqa, autofix).map_err(|e| {
lint_path(path, package, settings, cache, noqa, autofix).map_err(|e| {
(Some(path.to_owned()), {
let mut error = e.to_string();
for cause in e.chain() {
@ -188,11 +168,11 @@ pub(crate) fn run(
diagnostics.messages.sort();
// Store the package caches.
if let Some(package_caches) = package_caches {
for package_cache in package_caches.values() {
package_cache.store()?;
}
// Store the caches.
if let Some(caches) = caches {
caches
.into_par_iter()
.try_for_each(|(_, cache)| cache.store())?;
}
let duration = start.elapsed();
@ -207,12 +187,12 @@ fn lint_path(
path: &Path,
package: Option<&Path>,
settings: &AllSettings,
package_cache: Option<&PackageCache>,
cache: Option<&Cache>,
noqa: flags::Noqa,
autofix: flags::FixMode,
) -> Result<Diagnostics> {
let result = catch_unwind(|| {
crate::diagnostics::lint_path(path, package, settings, package_cache, noqa, autofix)
crate::diagnostics::lint_path(path, package, settings, cache, noqa, autofix)
});
match result {