mirror of
https://github.com/astral-sh/ruff.git
synced 2025-09-28 21:05:08 +00:00
Open cache files in parallel (#5120)
## Summary Open cache files in parallel (again), brings the performance back to be roughly equal to the old implementation. ## Test Plan Existing tests should keep working.
This commit is contained in:
parent
062b6e5c2b
commit
17f1ecd56e
8 changed files with 353 additions and 125 deletions
|
@ -6,11 +6,12 @@ use std::path::{Path, PathBuf};
|
|||
use std::sync::Mutex;
|
||||
use std::time::SystemTime;
|
||||
|
||||
use anyhow::{anyhow, Context, Result};
|
||||
use anyhow::{Context, Result};
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use ruff::message::Message;
|
||||
use ruff::settings::Settings;
|
||||
use ruff::warn_user;
|
||||
use ruff_cache::{CacheKey, CacheKeyHasher};
|
||||
use ruff_diagnostics::{DiagnosticKind, Fix};
|
||||
use ruff_python_ast::imports::ImportMap;
|
||||
|
@ -19,33 +20,45 @@ use ruff_text_size::{TextRange, TextSize};
|
|||
|
||||
use crate::diagnostics::Diagnostics;
|
||||
|
||||
/// On disk representation of a cache of a package.
|
||||
#[derive(Deserialize, Debug, Serialize)]
|
||||
pub(crate) struct PackageCache {
|
||||
/// [`Path`] that is relative to the package root in [`PackageCache`].
|
||||
pub(crate) type RelativePath = Path;
|
||||
/// [`PathBuf`] that is relative to the package root in [`PackageCache`].
|
||||
pub(crate) type RelativePathBuf = PathBuf;
|
||||
|
||||
/// Cache.
|
||||
///
|
||||
/// `Cache` holds everything required to display the diagnostics for a single
|
||||
/// package. The on-disk representation is represented in [`PackageCache`] (and
|
||||
/// related) types.
|
||||
///
|
||||
/// This type manages the cache file, reading it from disk and writing it back
|
||||
/// to disk (if required).
|
||||
pub(crate) struct Cache {
|
||||
/// Location of the cache.
|
||||
///
|
||||
/// Not stored on disk, just used as a storage location.
|
||||
#[serde(skip)]
|
||||
path: PathBuf,
|
||||
/// Path to the root of the package.
|
||||
/// Package cache read from disk.
|
||||
package: PackageCache,
|
||||
/// Changes made compared to the (current) `package`.
|
||||
///
|
||||
/// Usually this is a directory, but it can also be a single file in case of
|
||||
/// single file "packages", e.g. scripts.
|
||||
package_root: PathBuf,
|
||||
/// Mapping of source file path to it's cached data.
|
||||
// TODO: look into concurrent hashmap or similar instead of a mutex.
|
||||
files: Mutex<HashMap<RelativePathBuf, FileCache>>,
|
||||
/// Files that are linted, but are not in `package.files` or are in
|
||||
/// `package.files` but are outdated. This gets merged with `package.files`
|
||||
/// when the cache is written back to disk in [`Cache::store`].
|
||||
new_files: Mutex<HashMap<RelativePathBuf, FileCache>>,
|
||||
}
|
||||
|
||||
impl PackageCache {
|
||||
/// Open or create a new package cache.
|
||||
impl Cache {
|
||||
/// Open or create a new cache.
|
||||
///
|
||||
/// `package_root` must be canonicalized.
|
||||
pub(crate) fn open(
|
||||
cache_dir: &Path,
|
||||
package_root: PathBuf,
|
||||
settings: &Settings,
|
||||
) -> Result<PackageCache> {
|
||||
/// `cache_dir` is considered the root directory of the cache, which can be
|
||||
/// local to the project, global or otherwise set by the user.
|
||||
///
|
||||
/// `package_root` is the path to root of the package that is contained
|
||||
/// within this cache and must be canonicalized (to avoid considering `./`
|
||||
/// and `../project` being different).
|
||||
///
|
||||
/// Finally `settings` is used to ensure we don't open a cache for different
|
||||
/// settings.
|
||||
pub(crate) fn open(cache_dir: &Path, package_root: PathBuf, settings: &Settings) -> Cache {
|
||||
debug_assert!(package_root.is_absolute(), "package root not canonicalized");
|
||||
|
||||
let mut buf = itoa::Buffer::new();
|
||||
|
@ -56,40 +69,66 @@ impl PackageCache {
|
|||
Ok(file) => file,
|
||||
Err(err) if err.kind() == io::ErrorKind::NotFound => {
|
||||
// No cache exist yet, return an empty cache.
|
||||
return Ok(PackageCache {
|
||||
path,
|
||||
package_root,
|
||||
files: Mutex::new(HashMap::new()),
|
||||
});
|
||||
return Cache::empty(path, package_root);
|
||||
}
|
||||
Err(err) => {
|
||||
return Err(err)
|
||||
.with_context(|| format!("Failed to open cache file '{}'", path.display()))?
|
||||
warn_user!("Failed to open cache file '{}': {err}", path.display());
|
||||
return Cache::empty(path, package_root);
|
||||
}
|
||||
};
|
||||
|
||||
let mut cache: PackageCache = bincode::deserialize_from(BufReader::new(file))
|
||||
.with_context(|| format!("Failed parse cache file '{}'", path.display()))?;
|
||||
let mut package: PackageCache = match bincode::deserialize_from(BufReader::new(file)) {
|
||||
Ok(package) => package,
|
||||
Err(err) => {
|
||||
warn_user!("Failed parse cache file '{}': {err}", path.display());
|
||||
return Cache::empty(path, package_root);
|
||||
}
|
||||
};
|
||||
|
||||
// Sanity check.
|
||||
if cache.package_root != package_root {
|
||||
return Err(anyhow!(
|
||||
if package.package_root != package_root {
|
||||
warn_user!(
|
||||
"Different package root in cache: expected '{}', got '{}'",
|
||||
package_root.display(),
|
||||
cache.package_root.display(),
|
||||
));
|
||||
package.package_root.display(),
|
||||
);
|
||||
package.files.clear();
|
||||
}
|
||||
Cache {
|
||||
path,
|
||||
package,
|
||||
new_files: Mutex::new(HashMap::new()),
|
||||
}
|
||||
|
||||
cache.path = path;
|
||||
Ok(cache)
|
||||
}
|
||||
|
||||
/// Store the cache to disk.
|
||||
pub(crate) fn store(&self) -> Result<()> {
|
||||
/// Create an empty `Cache`.
|
||||
fn empty(path: PathBuf, package_root: PathBuf) -> Cache {
|
||||
Cache {
|
||||
path,
|
||||
package: PackageCache {
|
||||
package_root,
|
||||
files: HashMap::new(),
|
||||
},
|
||||
new_files: Mutex::new(HashMap::new()),
|
||||
}
|
||||
}
|
||||
|
||||
/// Store the cache to disk, if it has been changed.
|
||||
pub(crate) fn store(mut self) -> Result<()> {
|
||||
let new_files = self.new_files.into_inner().unwrap();
|
||||
if new_files.is_empty() {
|
||||
// No changes made, no need to write the same cache file back to
|
||||
// disk.
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
// Add/overwrite the changes made.
|
||||
self.package.files.extend(new_files);
|
||||
|
||||
let file = File::create(&self.path)
|
||||
.with_context(|| format!("Failed to create cache file '{}'", self.path.display()))?;
|
||||
let writer = BufWriter::new(file);
|
||||
bincode::serialize_into(writer, &self).with_context(|| {
|
||||
bincode::serialize_into(writer, &self.package).with_context(|| {
|
||||
format!(
|
||||
"Failed to serialise cache to file '{}'",
|
||||
self.path.display()
|
||||
|
@ -101,7 +140,7 @@ impl PackageCache {
|
|||
///
|
||||
/// Returns `None` if `path` is not within the package.
|
||||
pub(crate) fn relative_path<'a>(&self, path: &'a Path) -> Option<&'a RelativePath> {
|
||||
path.strip_prefix(&self.package_root).ok()
|
||||
path.strip_prefix(&self.package.package_root).ok()
|
||||
}
|
||||
|
||||
/// Get the cached results for a single file at relative `path`. This uses
|
||||
|
@ -114,33 +153,34 @@ impl PackageCache {
|
|||
&self,
|
||||
path: &RelativePath,
|
||||
file_last_modified: SystemTime,
|
||||
) -> Option<FileCache> {
|
||||
let files = self.files.lock().unwrap();
|
||||
let file = files.get(path)?;
|
||||
) -> Option<&FileCache> {
|
||||
let file = self.package.files.get(path)?;
|
||||
|
||||
// Make sure the file hasn't changed since the cached run.
|
||||
if file.last_modified != file_last_modified {
|
||||
return None;
|
||||
}
|
||||
|
||||
Some(file.clone())
|
||||
Some(file)
|
||||
}
|
||||
|
||||
/// Add or update a file cache at `path` relative to the package root.
|
||||
pub(crate) fn update(&self, path: RelativePathBuf, file: FileCache) {
|
||||
self.files.lock().unwrap().insert(path, file);
|
||||
}
|
||||
|
||||
/// Remove a file cache at `path` relative to the package root.
|
||||
pub(crate) fn remove(&self, path: &RelativePath) {
|
||||
self.files.lock().unwrap().remove(path);
|
||||
self.new_files.lock().unwrap().insert(path, file);
|
||||
}
|
||||
}
|
||||
|
||||
/// [`Path`] that is relative to the package root in [`PackageCache`].
|
||||
pub(crate) type RelativePath = Path;
|
||||
/// [`PathBuf`] that is relative to the package root in [`PackageCache`].
|
||||
pub(crate) type RelativePathBuf = PathBuf;
|
||||
/// On disk representation of a cache of a package.
|
||||
#[derive(Deserialize, Debug, Serialize)]
|
||||
struct PackageCache {
|
||||
/// Path to the root of the package.
|
||||
///
|
||||
/// Usually this is a directory, but it can also be a single file in case of
|
||||
/// single file "packages", e.g. scripts.
|
||||
package_root: PathBuf,
|
||||
/// Mapping of source file path to it's cached data.
|
||||
files: HashMap<RelativePathBuf, FileCache>,
|
||||
}
|
||||
|
||||
/// On disk representation of the cache per source file.
|
||||
#[derive(Clone, Deserialize, Debug, Serialize)]
|
||||
|
@ -198,23 +238,23 @@ impl FileCache {
|
|||
}
|
||||
|
||||
/// Convert the file cache into `Diagnostics`, using `path` as file name.
|
||||
pub(crate) fn into_diagnostics(self, path: &Path) -> Diagnostics {
|
||||
pub(crate) fn as_diagnostics(&self, path: &Path) -> Diagnostics {
|
||||
let messages = if self.messages.is_empty() {
|
||||
Vec::new()
|
||||
} else {
|
||||
let file = SourceFileBuilder::new(path.to_string_lossy(), self.source).finish();
|
||||
let file = SourceFileBuilder::new(path.to_string_lossy(), &*self.source).finish();
|
||||
self.messages
|
||||
.into_iter()
|
||||
.iter()
|
||||
.map(|msg| Message {
|
||||
kind: msg.kind,
|
||||
kind: msg.kind.clone(),
|
||||
range: msg.range,
|
||||
fix: msg.fix,
|
||||
fix: msg.fix.clone(),
|
||||
file: file.clone(),
|
||||
noqa_offset: msg.noqa_offset,
|
||||
})
|
||||
.collect()
|
||||
};
|
||||
Diagnostics::new(messages, self.imports)
|
||||
Diagnostics::new(messages, self.imports.clone())
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -257,3 +297,204 @@ pub(crate) fn init(path: &Path) -> Result<()> {
|
|||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use std::env::temp_dir;
|
||||
use std::fs;
|
||||
use std::io::{self, Write};
|
||||
use std::path::Path;
|
||||
|
||||
use ruff::settings::{flags, AllSettings};
|
||||
use ruff_cache::CACHE_DIR_NAME;
|
||||
|
||||
use crate::cache::{self, Cache};
|
||||
use crate::diagnostics::{lint_path, Diagnostics};
|
||||
|
||||
#[test]
|
||||
fn same_results() {
|
||||
let mut cache_dir = temp_dir();
|
||||
cache_dir.push("ruff_tests/cache_same_results");
|
||||
let _ = fs::remove_dir_all(&cache_dir);
|
||||
cache::init(&cache_dir).unwrap();
|
||||
|
||||
let settings = AllSettings::default();
|
||||
|
||||
let package_root = fs::canonicalize("../ruff/resources/test/fixtures").unwrap();
|
||||
let cache = Cache::open(&cache_dir, package_root.clone(), &settings.lib);
|
||||
assert_eq!(cache.new_files.lock().unwrap().len(), 0);
|
||||
|
||||
let mut paths = Vec::new();
|
||||
let mut parse_errors = Vec::new();
|
||||
let mut expected_diagnostics = Diagnostics::default();
|
||||
for entry in fs::read_dir(&package_root).unwrap() {
|
||||
let entry = entry.unwrap();
|
||||
if !entry.file_type().unwrap().is_dir() {
|
||||
continue;
|
||||
}
|
||||
|
||||
let dir_path = entry.path();
|
||||
if dir_path.ends_with(CACHE_DIR_NAME) {
|
||||
continue;
|
||||
}
|
||||
|
||||
for entry in fs::read_dir(dir_path).unwrap() {
|
||||
let entry = entry.unwrap();
|
||||
if !entry.file_type().unwrap().is_file() {
|
||||
continue;
|
||||
}
|
||||
|
||||
let path = entry.path();
|
||||
if path.ends_with("pyproject.toml") || path.ends_with("R.ipynb") {
|
||||
continue;
|
||||
}
|
||||
|
||||
let diagnostics = lint_path(
|
||||
&path,
|
||||
Some(&package_root),
|
||||
&settings,
|
||||
Some(&cache),
|
||||
flags::Noqa::Enabled,
|
||||
flags::FixMode::Generate,
|
||||
)
|
||||
.unwrap();
|
||||
if diagnostics
|
||||
.messages
|
||||
.iter()
|
||||
.any(|m| m.kind.name == "SyntaxError")
|
||||
{
|
||||
parse_errors.push(path.clone());
|
||||
}
|
||||
paths.push(path);
|
||||
expected_diagnostics += diagnostics;
|
||||
}
|
||||
}
|
||||
assert_ne!(paths, &[] as &[std::path::PathBuf], "no files checked");
|
||||
|
||||
cache.store().unwrap();
|
||||
|
||||
let cache = Cache::open(&cache_dir, package_root.clone(), &settings.lib);
|
||||
assert_ne!(cache.package.files.len(), 0);
|
||||
|
||||
parse_errors.sort();
|
||||
|
||||
for path in &paths {
|
||||
if parse_errors.binary_search(path).is_ok() {
|
||||
continue; // We don't cache parsing errors.
|
||||
}
|
||||
|
||||
let relative_path = cache.relative_path(path).unwrap();
|
||||
|
||||
assert!(
|
||||
cache.package.files.contains_key(relative_path),
|
||||
"missing file from cache: '{}'",
|
||||
relative_path.display()
|
||||
);
|
||||
}
|
||||
|
||||
let mut got_diagnostics = Diagnostics::default();
|
||||
for path in paths {
|
||||
got_diagnostics += lint_path(
|
||||
&path,
|
||||
Some(&package_root),
|
||||
&settings,
|
||||
Some(&cache),
|
||||
flags::Noqa::Enabled,
|
||||
flags::FixMode::Generate,
|
||||
)
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
// Not stored in the cache.
|
||||
expected_diagnostics.source_kind.clear();
|
||||
got_diagnostics.source_kind.clear();
|
||||
assert!(expected_diagnostics == got_diagnostics);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn invalidation() {
|
||||
// NOTE: keep in sync with actual file.
|
||||
const SOURCE: &[u8] = b"# NOTE: sync with cache::invalidation test\na = 1\n\n__all__ = list([\"a\", \"b\"])\n";
|
||||
|
||||
let mut cache_dir = temp_dir();
|
||||
cache_dir.push("ruff_tests/cache_invalidation");
|
||||
let _ = fs::remove_dir_all(&cache_dir);
|
||||
cache::init(&cache_dir).unwrap();
|
||||
|
||||
let settings = AllSettings::default();
|
||||
let package_root = fs::canonicalize("resources/test/fixtures/cache_mutable").unwrap();
|
||||
let cache = Cache::open(&cache_dir, package_root.clone(), &settings.lib);
|
||||
assert_eq!(cache.new_files.lock().unwrap().len(), 0);
|
||||
|
||||
let path = package_root.join("source.py");
|
||||
let mut expected_diagnostics = lint_path(
|
||||
&path,
|
||||
Some(&package_root),
|
||||
&settings,
|
||||
Some(&cache),
|
||||
flags::Noqa::Enabled,
|
||||
flags::FixMode::Generate,
|
||||
)
|
||||
.unwrap();
|
||||
assert_eq!(cache.new_files.lock().unwrap().len(), 1);
|
||||
|
||||
cache.store().unwrap();
|
||||
|
||||
let tests = [
|
||||
// File change.
|
||||
(|path| {
|
||||
let mut file = fs::OpenOptions::new()
|
||||
.write(true)
|
||||
.truncate(true)
|
||||
.open(path)?;
|
||||
file.write_all(SOURCE)?;
|
||||
file.sync_data()?;
|
||||
Ok(|_| Ok(()))
|
||||
}) as fn(&Path) -> io::Result<fn(&Path) -> io::Result<()>>,
|
||||
// Regression for issue #3086.
|
||||
#[cfg(unix)]
|
||||
|path| {
|
||||
flip_execute_permission_bit(path)?;
|
||||
Ok(flip_execute_permission_bit)
|
||||
},
|
||||
];
|
||||
|
||||
#[cfg(unix)]
|
||||
#[allow(clippy::items_after_statements)]
|
||||
fn flip_execute_permission_bit(path: &Path) -> io::Result<()> {
|
||||
use std::os::unix::fs::PermissionsExt;
|
||||
let file = fs::OpenOptions::new().write(true).open(path)?;
|
||||
let perms = file.metadata()?.permissions();
|
||||
file.set_permissions(PermissionsExt::from_mode(perms.mode() ^ 0o111))
|
||||
}
|
||||
|
||||
for change_file in tests {
|
||||
let cleanup = change_file(&path).unwrap();
|
||||
|
||||
let cache = Cache::open(&cache_dir, package_root.clone(), &settings.lib);
|
||||
|
||||
let mut got_diagnostics = lint_path(
|
||||
&path,
|
||||
Some(&package_root),
|
||||
&settings,
|
||||
Some(&cache),
|
||||
flags::Noqa::Enabled,
|
||||
flags::FixMode::Generate,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
cleanup(&path).unwrap();
|
||||
|
||||
assert_eq!(
|
||||
cache.new_files.lock().unwrap().len(),
|
||||
1,
|
||||
"cache must not be used"
|
||||
);
|
||||
|
||||
// Not store in the cache.
|
||||
expected_diagnostics.source_kind.clear();
|
||||
got_diagnostics.source_kind.clear();
|
||||
assert!(expected_diagnostics == got_diagnostics);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue