mirror of
https://github.com/astral-sh/ruff.git
synced 2025-09-26 11:59:10 +00:00
Only use a single cache file per Python package (#5117)
## Summary This changes the caching design from one cache file per source file, to one cache file per package. This greatly reduces the amount of cache files that are opened and written, while maintaining roughly the same (combined) size as bincode is very compact. Below are some very much not scientific performance tests. It uses projects/sources to check: * small.py: single, 31 bytes Python file with 2 errors. * test.py: single, 43k Python file with 8 errors. * fastapi: FastAPI repo, 1134 files checked, 0 errors. Source | Before # files | After # files | Before size | After size -------|-------|-------|-------|------- small.py | 1 | 1 | 20 K | 20 K test.py | 1 | 1 | 60 K | 60 K fastapi | 1134 | 518 | 4.5 M | 2.3 M One question that might come up is why fastapi still has 518 cache files and not 1? That is because this is using the existing package resolution, which sees examples, docs, etc. as separate from the "main" source code (in the fastapi directory in the repo). In this future it might be worth consider switching to a one cache file per repo strategy. This new design is not perfect and does have a number of known issues. First, like the old design it doesn't remove the cache for a source file that has been (re)moved until `ruff clean` is called. Second, this currently uses a large mutex around the mutation of the package cache (e.g. inserting result). This could be (or become) a bottleneck. It's future work to test and improve this (if needed). Third, currently the packages and opened and stored in a sequential loop, this could be done parallel. This is also future work. ## Test Plan Run `ruff check` (with caching enabled) twice on any Python source code and it should produce the same results.
This commit is contained in:
parent
b8d378b0a3
commit
e3c12764f8
7 changed files with 285 additions and 249 deletions
|
@ -1,3 +1,5 @@
|
|||
use std::collections::{hash_map, HashMap};
|
||||
use std::fmt::Write;
|
||||
use std::io;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::time::Instant;
|
||||
|
@ -20,7 +22,7 @@ use ruff_python_ast::imports::ImportMap;
|
|||
use ruff_python_ast::source_code::SourceFileBuilder;
|
||||
|
||||
use crate::args::Overrides;
|
||||
use crate::cache;
|
||||
use crate::cache::{self, PackageCache};
|
||||
use crate::diagnostics::Diagnostics;
|
||||
use crate::panic::catch_unwind;
|
||||
|
||||
|
@ -75,6 +77,38 @@ pub(crate) fn run(
|
|||
pyproject_config,
|
||||
);
|
||||
|
||||
// Create a cache per package, if enabled.
|
||||
let package_caches = if cache.into() {
|
||||
let mut caches = HashMap::new();
|
||||
// TODO(thomas): try to merge this with the detection of package roots
|
||||
// above or with the parallel iteration below.
|
||||
for entry in &paths {
|
||||
let Ok(entry) = entry else { continue };
|
||||
let path = entry.path();
|
||||
let package = path
|
||||
.parent()
|
||||
.and_then(|parent| package_roots.get(parent))
|
||||
.and_then(|package| *package);
|
||||
// For paths not in a package, e.g. scripts, we use the path as
|
||||
// the package root.
|
||||
let package_root = package.unwrap_or(path);
|
||||
|
||||
let settings = resolver.resolve_all(path, pyproject_config);
|
||||
|
||||
if let hash_map::Entry::Vacant(entry) = caches.entry(package_root) {
|
||||
let cache = PackageCache::open(
|
||||
&settings.cli.cache_dir,
|
||||
package_root.to_owned(),
|
||||
&settings.lib,
|
||||
)?;
|
||||
entry.insert(cache);
|
||||
}
|
||||
}
|
||||
Some(caches)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
let start = Instant::now();
|
||||
let mut diagnostics: Diagnostics = paths
|
||||
.par_iter()
|
||||
|
@ -86,13 +120,22 @@ pub(crate) fn run(
|
|||
.parent()
|
||||
.and_then(|parent| package_roots.get(parent))
|
||||
.and_then(|package| *package);
|
||||
|
||||
let package_cache = package_caches.as_ref().map(|package_caches| {
|
||||
let package_root = package.unwrap_or(path);
|
||||
let package_cache = package_caches
|
||||
.get(package_root)
|
||||
.expect("failed to get package cache");
|
||||
package_cache
|
||||
});
|
||||
|
||||
let settings = resolver.resolve_all(path, pyproject_config);
|
||||
|
||||
lint_path(path, package, settings, cache, noqa, autofix).map_err(|e| {
|
||||
lint_path(path, package, settings, package_cache, noqa, autofix).map_err(|e| {
|
||||
(Some(path.to_owned()), {
|
||||
let mut error = e.to_string();
|
||||
for cause in e.chain() {
|
||||
error += &format!("\n Caused by: {cause}");
|
||||
write!(&mut error, "\n Caused by: {cause}").unwrap();
|
||||
}
|
||||
error
|
||||
})
|
||||
|
@ -145,6 +188,13 @@ pub(crate) fn run(
|
|||
|
||||
diagnostics.messages.sort();
|
||||
|
||||
// Store the package caches.
|
||||
if let Some(package_caches) = package_caches {
|
||||
for package_cache in package_caches.values() {
|
||||
package_cache.store()?;
|
||||
}
|
||||
}
|
||||
|
||||
let duration = start.elapsed();
|
||||
debug!("Checked {:?} files in: {:?}", paths.len(), duration);
|
||||
|
||||
|
@ -157,12 +207,12 @@ fn lint_path(
|
|||
path: &Path,
|
||||
package: Option<&Path>,
|
||||
settings: &AllSettings,
|
||||
cache: flags::Cache,
|
||||
package_cache: Option<&PackageCache>,
|
||||
noqa: flags::Noqa,
|
||||
autofix: flags::FixMode,
|
||||
) -> Result<Diagnostics> {
|
||||
let result = catch_unwind(|| {
|
||||
crate::diagnostics::lint_path(path, package, settings, cache, noqa, autofix)
|
||||
crate::diagnostics::lint_path(path, package, settings, package_cache, noqa, autofix)
|
||||
});
|
||||
|
||||
match result {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue