Add caching to formatter (#8089)

This commit is contained in:
Micha Reiser 2023-10-23 17:43:08 +09:00 committed by GitHub
parent c0710a1dd4
commit 6fc35dd075
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
8 changed files with 627 additions and 260 deletions

View file

@ -365,6 +365,14 @@ pub struct FormatCommand {
/// Path to the `pyproject.toml` or `ruff.toml` file to use for configuration.
#[arg(long, conflicts_with = "isolated")]
pub config: Option<PathBuf>,
/// Disable cache reads.
#[arg(short, long, help_heading = "Miscellaneous")]
pub no_cache: bool,
/// Path to the cache directory.
#[arg(long, env = "RUFF_CACHE_DIR", help_heading = "Miscellaneous")]
pub cache_dir: Option<PathBuf>,
/// Respect file exclusions via `.gitignore` and other standard ignore files.
/// Use `--no-respect-gitignore` to disable.
#[arg(
@ -535,6 +543,7 @@ impl FormatCommand {
config: self.config,
files: self.files,
isolated: self.isolated,
no_cache: self.no_cache,
stdin_filename: self.stdin_filename,
},
CliOverrides {
@ -547,6 +556,8 @@ impl FormatCommand {
preview: resolve_bool_arg(self.preview, self.no_preview).map(PreviewMode::from),
force_exclude: resolve_bool_arg(self.force_exclude, self.no_force_exclude),
target_version: self.target_version,
cache_dir: self.cache_dir,
// Unsupported on the formatter CLI, but required on `Overrides`.
..CliOverrides::default()
},
@ -590,6 +601,7 @@ pub struct CheckArguments {
#[allow(clippy::struct_excessive_bools)]
pub struct FormatArguments {
pub check: bool,
pub no_cache: bool,
pub diff: bool,
pub config: Option<PathBuf>,
pub files: Vec<PathBuf>,

View file

@ -1,4 +1,5 @@
use std::collections::HashMap;
use std::fmt::Debug;
use std::fs::{self, File};
use std::hash::Hasher;
use std::io::{self, BufReader, BufWriter, Write};
@ -8,6 +9,11 @@ use std::sync::Mutex;
use std::time::{Duration, SystemTime};
use anyhow::{Context, Result};
use filetime::FileTime;
use itertools::Itertools;
use log::{debug, error};
use rayon::iter::ParallelIterator;
use rayon::iter::{IntoParallelIterator, ParallelBridge};
use rustc_hash::FxHashMap;
use serde::{Deserialize, Serialize};
@ -15,22 +21,50 @@ use ruff_cache::{CacheKey, CacheKeyHasher};
use ruff_diagnostics::{DiagnosticKind, Fix};
use ruff_linter::message::Message;
use ruff_linter::warn_user;
use ruff_macros::CacheKey;
use ruff_notebook::NotebookIndex;
use ruff_python_ast::imports::ImportMap;
use ruff_source_file::SourceFileBuilder;
use ruff_text_size::{TextRange, TextSize};
use ruff_workspace::resolver::{PyprojectConfig, PyprojectDiscoveryStrategy, Resolver};
use ruff_workspace::Settings;
use crate::cache;
use crate::diagnostics::Diagnostics;
/// Maximum duration for which we keep a file in cache that hasn't been seen.
const MAX_LAST_SEEN: Duration = Duration::from_secs(30 * 24 * 60 * 60); // 30 days.
/// [`Path`] that is relative to the package root in [`PackageCache`].
pub(crate) type RelativePath = Path;
/// [`PathBuf`] that is relative to the package root in [`PackageCache`].
pub(crate) type RelativePathBuf = PathBuf;
#[derive(CacheKey)]
pub(crate) struct FileCacheKey {
/// Timestamp when the file was last modified before the (cached) check.
file_last_modified: FileTime,
/// Permissions of the file before the (cached) check.
file_permissions_mode: u32,
}
impl FileCacheKey {
pub(crate) fn from_path(path: &Path) -> io::Result<FileCacheKey> {
// Construct a cache key for the file
let metadata = path.metadata()?;
#[cfg(unix)]
let permissions = {
use std::os::unix::fs::PermissionsExt;
metadata.permissions().mode()
};
#[cfg(windows)]
let permissions: u32 = metadata.permissions().readonly().into();
Ok(FileCacheKey {
file_last_modified: FileTime::from_last_modification_time(&metadata),
file_permissions_mode: permissions,
})
}
}
/// Cache.
///
/// `Cache` holds everything required to display the diagnostics for a single
@ -50,7 +84,7 @@ pub(crate) struct Cache {
/// Files that are linted, but are not in `package.files` or are in
/// `package.files` but are outdated. This gets merged with `package.files`
/// when the cache is written back to disk in [`Cache::store`].
new_files: Mutex<HashMap<RelativePathBuf, FileCache>>,
changes: Mutex<Vec<Change>>,
/// The "current" timestamp used as cache for the updates of
/// [`FileCache::last_seen`]
last_seen_cache: u64,
@ -65,7 +99,7 @@ impl Cache {
///
/// Finally `settings` is used to ensure we don't open a cache for different
/// settings. It also defines the directory where to store the cache.
pub(crate) fn open(package_root: PathBuf, settings: &Settings) -> Cache {
pub(crate) fn open(package_root: PathBuf, settings: &Settings) -> Self {
debug_assert!(package_root.is_absolute(), "package root not canonicalized");
let mut buf = itoa::Buffer::new();
@ -105,7 +139,7 @@ impl Cache {
}
/// Create an empty `Cache`.
fn empty(path: PathBuf, package_root: PathBuf) -> Cache {
fn empty(path: PathBuf, package_root: PathBuf) -> Self {
let package = PackageCache {
package_root,
files: HashMap::new(),
@ -114,37 +148,25 @@ impl Cache {
}
#[allow(clippy::cast_possible_truncation)]
fn new(path: PathBuf, package: PackageCache) -> Cache {
fn new(path: PathBuf, package: PackageCache) -> Self {
Cache {
path,
package,
new_files: Mutex::new(HashMap::new()),
changes: Mutex::new(Vec::new()),
// SAFETY: this will be truncated to the year ~2554 (so don't use
// this code after that!).
last_seen_cache: SystemTime::UNIX_EPOCH.elapsed().unwrap().as_millis() as u64,
}
}
/// Store the cache to disk, if it has been changed.
#[allow(clippy::cast_possible_truncation)]
pub(crate) fn store(mut self) -> Result<()> {
let new_files = self.new_files.into_inner().unwrap();
if new_files.is_empty() {
/// Applies the pending changes and persists the cache to disk, if it has been changed.
pub(crate) fn persist(mut self) -> Result<()> {
if !self.save() {
// No changes made, no need to write the same cache file back to
// disk.
return Ok(());
}
// Remove cached files that we haven't seen in a while.
let now = self.last_seen_cache;
self.package.files.retain(|_, file| {
// SAFETY: this will be truncated to the year ~2554.
(now - *file.last_seen.get_mut()) <= MAX_LAST_SEEN.as_millis() as u64
});
// Apply any changes made and keep track of when we last saw files.
self.package.files.extend(new_files);
let file = File::create(&self.path)
.with_context(|| format!("Failed to create cache file '{}'", self.path.display()))?;
let writer = BufWriter::new(file);
@ -156,6 +178,53 @@ impl Cache {
})
}
/// Applies the pending changes without storing the cache to disk.
#[allow(clippy::cast_possible_truncation)]
pub(crate) fn save(&mut self) -> bool {
/// Maximum duration for which we keep a file in cache that hasn't been seen.
const MAX_LAST_SEEN: Duration = Duration::from_secs(30 * 24 * 60 * 60); // 30 days.
let changes = std::mem::take(self.changes.get_mut().unwrap());
if changes.is_empty() {
return false;
}
// Remove cached files that we haven't seen in a while.
let now = self.last_seen_cache;
self.package.files.retain(|_, file| {
// SAFETY: this will be truncated to the year ~2554.
(now - *file.last_seen.get_mut()) <= MAX_LAST_SEEN.as_millis() as u64
});
// Apply any changes made and keep track of when we last saw files.
for change in changes {
let entry = self
.package
.files
.entry(change.path)
.and_modify(|existing| {
if existing.key != change.new_key {
// Reset the data if the key change.
existing.data = FileCacheData::default();
}
existing.key = change.new_key;
existing
.last_seen
.store(self.last_seen_cache, Ordering::Relaxed);
})
.or_insert_with(|| FileCache {
key: change.new_key,
last_seen: AtomicU64::new(self.last_seen_cache),
data: FileCacheData::default(),
});
change.new_data.apply(&mut entry.data);
}
true
}
/// Returns the relative path based on `path` and the package root.
///
/// Returns `None` if `path` is not within the package.
@ -169,7 +238,7 @@ impl Cache {
///
/// This returns `None` if `key` differs from the cached key or if the
/// cache doesn't contain results for the file.
pub(crate) fn get<T: CacheKey>(&self, path: &RelativePath, key: &T) -> Option<&FileCache> {
pub(crate) fn get(&self, path: &RelativePath, key: &FileCacheKey) -> Option<&FileCache> {
let file = self.package.files.get(path)?;
let mut hasher = CacheKeyHasher::new();
@ -185,50 +254,34 @@ impl Cache {
Some(file)
}
/// Add or update a file cache at `path` relative to the package root.
pub(crate) fn update<T: CacheKey>(
&self,
path: RelativePathBuf,
key: T,
messages: &[Message],
imports: &ImportMap,
notebook_index: Option<&NotebookIndex>,
) {
let source = if let Some(msg) = messages.first() {
msg.file.source_text().to_owned()
} else {
String::new() // No messages, no need to keep the source!
};
let messages = messages
.iter()
.map(|msg| {
// Make sure that all message use the same source file.
assert!(
msg.file == messages.first().unwrap().file,
"message uses a different source file"
);
CacheMessage {
kind: msg.kind.clone(),
range: msg.range,
fix: msg.fix.clone(),
noqa_offset: msg.noqa_offset,
pub(crate) fn is_formatted(&self, path: &RelativePath, key: &FileCacheKey) -> bool {
self.get(path, key)
.is_some_and(|entry| entry.data.formatted)
}
})
.collect();
/// Add or update a file cache at `path` relative to the package root.
fn update(&self, path: RelativePathBuf, key: &FileCacheKey, data: ChangeData) {
let mut hasher = CacheKeyHasher::new();
key.cache_key(&mut hasher);
let file = FileCache {
key: hasher.finish(),
last_seen: AtomicU64::new(self.last_seen_cache),
imports: imports.clone(),
messages,
source,
notebook_index: notebook_index.cloned(),
};
self.new_files.lock().unwrap().insert(path, file);
self.changes.lock().unwrap().push(Change {
path,
new_key: hasher.finish(),
new_data: data,
});
}
pub(crate) fn update_lint(
&self,
path: RelativePathBuf,
key: &FileCacheKey,
data: LintCacheData,
) {
self.update(path, key, ChangeData::Lint(data));
}
pub(crate) fn set_formatted(&self, path: RelativePathBuf, key: &FileCacheKey) {
self.update(path, key, ChangeData::Formatted);
}
}
@ -254,28 +307,19 @@ pub(crate) struct FileCache {
/// Represented as the number of milliseconds since Unix epoch. This will
/// break in 1970 + ~584 years (~2554).
last_seen: AtomicU64,
/// Imports made.
imports: ImportMap,
/// Diagnostic messages.
messages: Vec<CacheMessage>,
/// Source code of the file.
///
/// # Notes
///
/// This will be empty if `messages` is empty.
source: String,
/// Notebook index if this file is a Jupyter Notebook.
notebook_index: Option<NotebookIndex>,
data: FileCacheData,
}
impl FileCache {
/// Convert the file cache into `Diagnostics`, using `path` as file name.
pub(crate) fn as_diagnostics(&self, path: &Path) -> Diagnostics {
let messages = if self.messages.is_empty() {
pub(crate) fn to_diagnostics(&self, path: &Path) -> Option<Diagnostics> {
self.data.lint.as_ref().map(|lint| {
let messages = if lint.messages.is_empty() {
Vec::new()
} else {
let file = SourceFileBuilder::new(path.to_string_lossy(), &*self.source).finish();
self.messages
let file = SourceFileBuilder::new(path.to_string_lossy(), &*lint.source).finish();
lint.messages
.iter()
.map(|msg| Message {
kind: msg.kind.clone(),
@ -286,23 +330,20 @@ impl FileCache {
})
.collect()
};
let notebook_indexes = if let Some(notebook_index) = self.notebook_index.as_ref() {
let notebook_indexes = if let Some(notebook_index) = lint.notebook_index.as_ref() {
FxHashMap::from_iter([(path.to_string_lossy().to_string(), notebook_index.clone())])
} else {
FxHashMap::default()
};
Diagnostics::new(messages, self.imports.clone(), notebook_indexes)
Diagnostics::new(messages, lint.imports.clone(), notebook_indexes)
})
}
}
/// On disk representation of a diagnostic message.
#[derive(Deserialize, Debug, Serialize)]
struct CacheMessage {
kind: DiagnosticKind,
/// Range into the message's [`FileCache::source`].
range: TextRange,
fix: Option<Fix>,
noqa_offset: TextSize,
#[derive(Debug, Default, Deserialize, Serialize)]
struct FileCacheData {
lint: Option<LintCacheData>,
formatted: bool,
}
/// Returns a hash key based on the `package_root`, `settings` and the crate
@ -335,32 +376,209 @@ pub(crate) fn init(path: &Path) -> Result<()> {
Ok(())
}
#[derive(Deserialize, Debug, Serialize, PartialEq)]
pub(crate) struct LintCacheData {
/// Imports made.
pub(super) imports: ImportMap,
/// Diagnostic messages.
pub(super) messages: Vec<CacheMessage>,
/// Source code of the file.
///
/// # Notes
///
/// This will be empty if `messages` is empty.
pub(super) source: String,
/// Notebook index if this file is a Jupyter Notebook.
pub(super) notebook_index: Option<NotebookIndex>,
}
impl LintCacheData {
pub(crate) fn from_messages(
messages: &[Message],
imports: ImportMap,
notebook_index: Option<NotebookIndex>,
) -> Self {
let source = if let Some(msg) = messages.first() {
msg.file.source_text().to_owned()
} else {
String::new() // No messages, no need to keep the source!
};
let messages = messages
.iter()
.map(|msg| {
// Make sure that all message use the same source file.
assert_eq!(
msg.file,
messages.first().unwrap().file,
"message uses a different source file"
);
CacheMessage {
kind: msg.kind.clone(),
range: msg.range,
fix: msg.fix.clone(),
noqa_offset: msg.noqa_offset,
}
})
.collect();
Self {
imports,
messages,
source,
notebook_index,
}
}
}
/// On disk representation of a diagnostic message.
#[derive(Deserialize, Debug, Serialize, PartialEq)]
pub(super) struct CacheMessage {
kind: DiagnosticKind,
/// Range into the message's [`FileCache::source`].
range: TextRange,
fix: Option<Fix>,
noqa_offset: TextSize,
}
pub(crate) trait PackageCaches {
fn get(&self, package_root: &Path) -> Option<&Cache>;
fn persist(self) -> anyhow::Result<()>;
}
impl<T> PackageCaches for Option<T>
where
T: PackageCaches,
{
fn get(&self, package_root: &Path) -> Option<&Cache> {
match self {
None => None,
Some(caches) => caches.get(package_root),
}
}
fn persist(self) -> Result<()> {
match self {
None => Ok(()),
Some(caches) => caches.persist(),
}
}
}
pub(crate) struct PackageCacheMap<'a>(FxHashMap<&'a Path, Cache>);
impl<'a> PackageCacheMap<'a> {
pub(crate) fn init(
pyproject_config: &PyprojectConfig,
package_roots: &FxHashMap<&'a Path, Option<&'a Path>>,
resolver: &Resolver,
) -> Self {
fn init_cache(path: &Path) {
if let Err(e) = cache::init(path) {
error!("Failed to initialize cache at {}: {e:?}", path.display());
}
}
match pyproject_config.strategy {
PyprojectDiscoveryStrategy::Fixed => {
init_cache(&pyproject_config.settings.cache_dir);
}
PyprojectDiscoveryStrategy::Hierarchical => {
for settings in
std::iter::once(&pyproject_config.settings).chain(resolver.settings())
{
init_cache(&settings.cache_dir);
}
}
}
Self(
package_roots
.iter()
.map(|(package, package_root)| package_root.unwrap_or(package))
.unique()
.par_bridge()
.map(|cache_root| {
let settings = resolver.resolve(cache_root, pyproject_config);
let cache = Cache::open(cache_root.to_path_buf(), settings);
(cache_root, cache)
})
.collect(),
)
}
}
impl PackageCaches for PackageCacheMap<'_> {
fn get(&self, package_root: &Path) -> Option<&Cache> {
let cache = self.0.get(package_root);
if cache.is_none() {
debug!("No cache found for {}", package_root.display());
}
cache
}
fn persist(self) -> Result<()> {
self.0
.into_par_iter()
.try_for_each(|(_, cache)| cache.persist())
}
}
#[derive(Debug)]
struct Change {
path: PathBuf,
new_key: u64,
new_data: ChangeData,
}
#[derive(Debug)]
enum ChangeData {
Lint(LintCacheData),
Formatted,
}
impl ChangeData {
fn apply(self, data: &mut FileCacheData) {
match self {
ChangeData::Lint(new_lint) => {
data.lint = Some(new_lint);
}
ChangeData::Formatted => {
data.formatted = true;
}
}
}
}
#[cfg(test)]
mod tests {
use filetime::{set_file_mtime, FileTime};
use ruff_linter::settings::types::UnsafeFixes;
use std::env::temp_dir;
use std::fs;
use std::io;
use std::io::Write;
use std::path::{Path, PathBuf};
use std::sync::atomic::AtomicU64;
use std::time::SystemTime;
use anyhow::Result;
use filetime::{set_file_mtime, FileTime};
use itertools::Itertools;
use test_case::test_case;
use ruff_cache::CACHE_DIR_NAME;
use ruff_linter::settings::flags;
use crate::cache::RelativePathBuf;
use crate::cache::{self, Cache, FileCache};
use crate::diagnostics::{lint_path, Diagnostics};
use std::sync::atomic::AtomicU64;
use anyhow::Result;
use ruff_python_ast::imports::ImportMap;
use ruff_linter::settings::types::UnsafeFixes;
use ruff_linter::source_kind::SourceKind;
use ruff_python_ast::PySourceType;
use ruff_workspace::Settings;
use test_case::test_case;
use crate::cache::{self, FileCache, FileCacheData, FileCacheKey};
use crate::cache::{Cache, RelativePathBuf};
use crate::commands::format::{format_path, FormatCommandError, FormatMode, FormatResult};
use crate::diagnostics::{lint_path, Diagnostics};
#[test_case("../ruff_linter/resources/test/fixtures", "ruff_tests/cache_same_results_ruff_linter"; "ruff_linter_fixtures")]
#[test_case("../ruff_notebook/resources/test/fixtures", "ruff_tests/cache_same_results_ruff_notebook"; "ruff_notebook_fixtures")]
@ -377,7 +595,7 @@ mod tests {
let package_root = fs::canonicalize(package_root).unwrap();
let cache = Cache::open(package_root.clone(), &settings);
assert_eq!(cache.new_files.lock().unwrap().len(), 0);
assert_eq!(cache.changes.lock().unwrap().len(), 0);
let mut paths = Vec::new();
let mut parse_errors = Vec::new();
@ -427,7 +645,7 @@ mod tests {
}
assert_ne!(paths, &[] as &[std::path::PathBuf], "no files checked");
cache.store().unwrap();
cache.persist().unwrap();
let cache = Cache::open(package_root.clone(), &settings);
assert_ne!(cache.package.files.len(), 0);
@ -472,21 +690,21 @@ mod tests {
let test_cache = TestCache::new("cache_adds_file_on_lint");
let cache = test_cache.open();
test_cache.write_source_file("source.py", source);
assert_eq!(cache.new_files.lock().unwrap().len(), 0);
assert_eq!(cache.changes.lock().unwrap().len(), 0);
cache.store().unwrap();
cache.persist().unwrap();
let cache = test_cache.open();
test_cache
.lint_file_with_cache("source.py", &cache)
.expect("Failed to lint test file");
assert_eq!(
cache.new_files.lock().unwrap().len(),
cache.changes.lock().unwrap().len(),
1,
"A single new file should be added to the cache"
);
cache.store().unwrap();
cache.persist().unwrap();
}
#[test]
@ -497,9 +715,9 @@ mod tests {
let cache = test_cache.open();
test_cache.write_source_file("source_1.py", source);
test_cache.write_source_file("source_2.py", source);
assert_eq!(cache.new_files.lock().unwrap().len(), 0);
assert_eq!(cache.changes.lock().unwrap().len(), 0);
cache.store().unwrap();
cache.persist().unwrap();
let cache = test_cache.open();
test_cache
@ -509,12 +727,39 @@ mod tests {
.lint_file_with_cache("source_2.py", &cache)
.expect("Failed to lint test file");
assert_eq!(
cache.new_files.lock().unwrap().len(),
cache.changes.lock().unwrap().len(),
2,
"Both files should be added to the cache"
);
cache.persist().unwrap();
}
#[test]
fn cache_adds_files_on_format() {
let source: &[u8] = b"a = 1\n\n__all__ = list([\"a\", \"b\"])\n";
let test_cache = TestCache::new("cache_adds_files_on_format");
let cache = test_cache.open();
test_cache.write_source_file("source_1.py", source);
test_cache.write_source_file("source_2.py", source);
assert_eq!(cache.changes.lock().unwrap().len(), 0);
cache.persist().unwrap();
let cache = test_cache.open();
test_cache
.format_file_with_cache("source_1.py", &cache)
.expect("Failed to format test file");
test_cache
.format_file_with_cache("source_2.py", &cache)
.expect("Failed to format test file");
assert_eq!(
cache.changes.lock().unwrap().len(),
2,
"Both files should be added to the cache"
);
cache.store().unwrap();
cache.persist().unwrap();
}
#[test]
@ -524,13 +769,13 @@ mod tests {
let test_cache = TestCache::new("cache_invalidated_on_file_modified_time");
let cache = test_cache.open();
let source_path = test_cache.write_source_file("source.py", source);
assert_eq!(cache.new_files.lock().unwrap().len(), 0);
assert_eq!(cache.changes.lock().unwrap().len(), 0);
let expected_diagnostics = test_cache
.lint_file_with_cache("source.py", &cache)
.expect("Failed to lint test file");
cache.store().unwrap();
cache.persist().unwrap();
let cache = test_cache.open();
// Update the modified time of the file to a time in the future
@ -545,7 +790,7 @@ mod tests {
.expect("Failed to lint test file");
assert_eq!(
cache.new_files.lock().unwrap().len(),
cache.changes.lock().unwrap().len(),
1,
"Cache should not be used, the file should be treated as new and added to the cache"
);
@ -583,13 +828,13 @@ mod tests {
let test_cache = TestCache::new("cache_invalidated_on_permission_change");
let cache = test_cache.open();
let path = test_cache.write_source_file("source.py", source);
assert_eq!(cache.new_files.lock().unwrap().len(), 0);
assert_eq!(cache.changes.lock().unwrap().len(), 0);
let expected_diagnostics = test_cache
.lint_file_with_cache("source.py", &cache)
.unwrap();
cache.store().unwrap();
cache.persist().unwrap();
let cache = test_cache.open();
// Flip the permissions on the file
@ -603,7 +848,7 @@ mod tests {
.unwrap();
assert_eq!(
cache.new_files.lock().unwrap().len(),
cache.changes.lock().unwrap().len(),
1,
"Cache should not be used, the file should be treated as new and added to the cache"
);
@ -615,8 +860,8 @@ mod tests {
}
#[test]
fn cache_removes_stale_files_on_store() {
let test_cache = TestCache::new("cache_removes_stale_files_on_store");
fn cache_removes_stale_files_on_persist() {
let test_cache = TestCache::new("cache_removes_stale_files_on_persist");
let mut cache = test_cache.open();
// Add a file to the cache that hasn't been linted or seen since the '70s!
@ -626,10 +871,7 @@ mod tests {
FileCache {
key: 123,
last_seen: AtomicU64::new(123),
imports: ImportMap::new(),
messages: Vec::new(),
source: String::new(),
notebook_index: None,
data: FileCacheData::default(),
},
);
@ -637,34 +879,125 @@ mod tests {
let source: &[u8] = b"a = 1\n\n__all__ = list([\"a\", \"b\"])\n";
test_cache.write_source_file("new.py", source);
let new_path_key = RelativePathBuf::from("new.py");
assert_eq!(cache.new_files.lock().unwrap().len(), 0);
assert_eq!(cache.changes.lock().unwrap().len(), 0);
test_cache
.lint_file_with_cache("new.py", &cache)
.expect("Failed to lint test file");
// Storing the cache should remove the old (`old.py`) file.
cache.store().unwrap();
cache.persist().unwrap();
// So we when we open the cache again it shouldn't contain `old.py`.
let cache = test_cache.open();
assert!(
cache.package.files.keys().collect_vec() == vec![&new_path_key],
assert_eq!(
cache.package.files.keys().collect_vec(),
vec![&new_path_key],
"Only the new file should be present"
);
}
#[test]
fn format_updates_cache_entry() {
let source: &[u8] = b"a = 1\n\n__all__ = list([\"a\", \"b\"])\n";
let test_cache = TestCache::new("format_updates_cache_entry");
let cache = test_cache.open();
test_cache.write_source_file("source.py", source);
assert_eq!(cache.changes.lock().unwrap().len(), 0);
cache.persist().unwrap();
let cache = test_cache.open();
// Cache the lint results
test_cache
.lint_file_with_cache("source.py", &cache)
.expect("Failed to lint test file");
cache.persist().unwrap();
let mut cache = test_cache.open();
// Now lint the file
test_cache
.format_file_with_cache("source.py", &cache)
.expect("Failed to format test file");
cache.save();
assert_eq!(cache.package.files.len(), 1);
let Some(file_cache) = cache.get(
Path::new("source.py"),
&FileCacheKey::from_path(&test_cache.package_root.join("source.py")).unwrap(),
) else {
panic!("Cache entry for `source.py` is missing.");
};
assert!(file_cache.data.lint.is_some());
assert!(file_cache.data.formatted);
}
#[test]
fn file_changes_invalidate_file_cache() {
let source: &[u8] = b"a = 1\n\n__all__ = list([\"a\", \"b\"])\n";
let test_cache = TestCache::new("file_changes_invalidate_file_cache");
let cache = test_cache.open();
let source_path = test_cache.write_source_file("source.py", source);
assert_eq!(cache.changes.lock().unwrap().len(), 0);
cache.persist().unwrap();
let cache = test_cache.open();
// Cache the format and lint results
test_cache
.lint_file_with_cache("source.py", &cache)
.expect("Failed to lint test file");
test_cache
.format_file_with_cache("source.py", &cache)
.expect("Failed to format test file");
cache.persist().unwrap();
let mut cache = test_cache.open();
assert_eq!(cache.package.files.len(), 1);
set_file_mtime(
&source_path,
FileTime::from_system_time(SystemTime::now() + std::time::Duration::from_secs(1)),
)
.unwrap();
test_cache
.format_file_with_cache("source.py", &cache)
.expect("Failed to format test file");
cache.save();
assert_eq!(cache.package.files.len(), 1);
let Some(file_cache) = cache.get(
Path::new("source.py"),
&FileCacheKey::from_path(&source_path).unwrap(),
) else {
panic!("Cache entry for `source.py` is missing.");
};
assert_eq!(file_cache.data.lint, None);
assert!(file_cache.data.formatted);
}
struct TestCache {
package_root: PathBuf,
settings: Settings,
}
impl TestCache {
fn new(name: &str) -> Self {
fn new(test_case: &str) -> Self {
// Build a new cache directory and clear it
let mut test_dir = temp_dir();
test_dir.push("ruff_tests/cache");
test_dir.push(name);
test_dir.push(test_case);
let _ = fs::remove_dir_all(&test_dir);
@ -718,6 +1051,22 @@ mod tests {
UnsafeFixes::Enabled,
)
}
fn format_file_with_cache(
&self,
path: &str,
cache: &Cache,
) -> Result<FormatResult, FormatCommandError> {
let file_path = self.package_root.join(path);
format_path(
&file_path,
&self.settings.formatter,
&SourceKind::Python(std::fs::read_to_string(&file_path).unwrap()),
PySourceType::Python,
FormatMode::Write,
Some(cache),
)
}
}
impl Drop for TestCache {

View file

@ -1,4 +1,3 @@
use std::collections::HashMap;
use std::fmt::Write;
use std::io;
use std::path::{Path, PathBuf};
@ -7,28 +6,26 @@ use std::time::Instant;
use anyhow::Result;
use colored::Colorize;
use ignore::Error;
use itertools::Itertools;
use log::{debug, error, warn};
#[cfg(not(target_family = "wasm"))]
use rayon::prelude::*;
use ruff_linter::settings::types::UnsafeFixes;
use rustc_hash::FxHashMap;
use ruff_diagnostics::Diagnostic;
use ruff_linter::message::Message;
use ruff_linter::registry::Rule;
use ruff_linter::settings::types::UnsafeFixes;
use ruff_linter::settings::{flags, LinterSettings};
use ruff_linter::{fs, warn_user_once, IOError};
use ruff_python_ast::imports::ImportMap;
use ruff_source_file::SourceFileBuilder;
use ruff_text_size::{TextRange, TextSize};
use ruff_workspace::resolver::{
match_exclusion, python_files_in_path, PyprojectConfig, PyprojectDiscoveryStrategy,
ResolvedFile,
match_exclusion, python_files_in_path, PyprojectConfig, ResolvedFile,
};
use crate::args::CliOverrides;
use crate::cache::{self, Cache};
use crate::cache::{Cache, PackageCacheMap, PackageCaches};
use crate::diagnostics::Diagnostics;
use crate::panic::catch_unwind;
@ -52,28 +49,6 @@ pub(crate) fn check(
return Ok(Diagnostics::default());
}
// Initialize the cache.
if cache.into() {
fn init_cache(path: &Path) {
if let Err(e) = cache::init(path) {
error!("Failed to initialize cache at {}: {e:?}", path.display());
}
}
match pyproject_config.strategy {
PyprojectDiscoveryStrategy::Fixed => {
init_cache(&pyproject_config.settings.cache_dir);
}
PyprojectDiscoveryStrategy::Hierarchical => {
for settings in
std::iter::once(&pyproject_config.settings).chain(resolver.settings())
{
init_cache(&settings.cache_dir);
}
}
}
};
// Discover the package root for each Python file.
let package_roots = resolver.package_roots(
&paths
@ -85,19 +60,15 @@ pub(crate) fn check(
);
// Load the caches.
let caches = bool::from(cache).then(|| {
package_roots
.iter()
.map(|(package, package_root)| package_root.unwrap_or(package))
.unique()
.par_bridge()
.map(|cache_root| {
let settings = resolver.resolve(cache_root, pyproject_config);
let cache = Cache::open(cache_root.to_path_buf(), settings);
(cache_root, cache)
})
.collect::<HashMap<&Path, Cache>>()
});
let caches = if bool::from(cache) {
Some(PackageCacheMap::init(
pyproject_config,
&package_roots,
&resolver,
))
} else {
None
};
let start = Instant::now();
let diagnostics_per_file = paths.par_iter().filter_map(|resolved_file| {
@ -122,14 +93,7 @@ pub(crate) fn check(
}
let cache_root = package.unwrap_or_else(|| path.parent().unwrap_or(path));
let cache = caches.as_ref().and_then(|caches| {
if let Some(cache) = caches.get(&cache_root) {
Some(cache)
} else {
debug!("No cache found for {}", cache_root.display());
None
}
});
let cache = caches.get(cache_root);
lint_path(
path,
@ -210,11 +174,7 @@ pub(crate) fn check(
all_diagnostics.messages.sort();
// Store the caches.
if let Some(caches) = caches {
caches
.into_par_iter()
.try_for_each(|(_, cache)| cache.store())?;
}
caches.persist()?;
let duration = start.elapsed();
debug!("Checked {:?} files in: {:?}", checked_files, duration);
@ -266,13 +226,12 @@ mod test {
use std::os::unix::fs::OpenOptionsExt;
use anyhow::Result;
use ruff_linter::settings::types::UnsafeFixes;
use rustc_hash::FxHashMap;
use tempfile::TempDir;
use ruff_linter::message::{Emitter, EmitterContext, TextEmitter};
use ruff_linter::registry::Rule;
use ruff_linter::settings::types::UnsafeFixes;
use ruff_linter::settings::{flags, LinterSettings};
use ruff_workspace::resolver::{PyprojectConfig, PyprojectDiscoveryStrategy};
use ruff_workspace::Settings;

View file

@ -10,7 +10,7 @@ use colored::Colorize;
use itertools::Itertools;
use log::error;
use rayon::iter::Either::{Left, Right};
use rayon::iter::{IntoParallelIterator, ParallelIterator};
use rayon::iter::{IntoParallelRefIterator, ParallelIterator};
use similar::TextDiff;
use thiserror::Error;
use tracing::debug;
@ -23,10 +23,11 @@ use ruff_linter::warn_user_once;
use ruff_python_ast::{PySourceType, SourceType};
use ruff_python_formatter::{format_module_source, FormatModuleError};
use ruff_text_size::{TextLen, TextRange, TextSize};
use ruff_workspace::resolver::{match_exclusion, python_files_in_path};
use ruff_workspace::resolver::{match_exclusion, python_files_in_path, ResolvedFile};
use ruff_workspace::FormatterSettings;
use crate::args::{CliOverrides, FormatArguments};
use crate::cache::{Cache, FileCacheKey, PackageCacheMap, PackageCaches};
use crate::panic::{catch_unwind, PanicError};
use crate::resolve::resolve;
use crate::ExitStatus;
@ -73,9 +74,34 @@ pub(crate) fn format(
return Ok(ExitStatus::Success);
}
// Discover the package root for each Python file.
let package_roots = resolver.package_roots(
&paths
.iter()
.flatten()
.map(ResolvedFile::path)
.collect::<Vec<_>>(),
&pyproject_config,
);
let caches = if cli.no_cache {
None
} else {
// `--no-cache` doesn't respect code changes, and so is often confusing during
// development.
#[cfg(debug_assertions)]
crate::warn_user!("Detected debug build without --no-cache.");
Some(PackageCacheMap::init(
&pyproject_config,
&package_roots,
&resolver,
))
};
let start = Instant::now();
let (mut results, mut errors): (Vec<_>, Vec<_>) = paths
.into_par_iter()
.par_iter()
.filter_map(|entry| {
match entry {
Ok(resolved_file) => {
@ -110,6 +136,13 @@ pub(crate) fn format(
}
};
let package = path
.parent()
.and_then(|parent| package_roots.get(parent).copied())
.flatten();
let cache_root = package.unwrap_or_else(|| path.parent().unwrap_or(path));
let cache = caches.get(cache_root);
Some(
match catch_unwind(|| {
format_path(
@ -118,21 +151,22 @@ pub(crate) fn format(
&unformatted,
source_type,
mode,
cache,
)
}) {
Ok(inner) => inner.map(|result| FormatPathResult {
path: resolved_file.into_path(),
path: resolved_file.path().to_path_buf(),
unformatted,
result,
}),
Err(error) => Err(FormatCommandError::Panic(
Some(resolved_file.into_path()),
Some(resolved_file.path().to_path_buf()),
error,
)),
},
)
}
Err(err) => Some(Err(FormatCommandError::Ignore(err))),
Err(err) => Some(Err(FormatCommandError::Ignore(err.clone()))),
}
})
.partition_map(|result| match result {
@ -168,6 +202,8 @@ pub(crate) fn format(
duration
);
caches.persist()?;
// Report on any errors.
for error in &errors {
error!("{error}");
@ -214,13 +250,26 @@ pub(crate) fn format(
/// Format the file at the given [`Path`].
#[tracing::instrument(level="debug", skip_all, fields(path = %path.display()))]
fn format_path(
pub(crate) fn format_path(
path: &Path,
settings: &FormatterSettings,
unformatted: &SourceKind,
source_type: PySourceType,
mode: FormatMode,
cache: Option<&Cache>,
) -> Result<FormatResult, FormatCommandError> {
if let Some(cache) = cache {
let relative_path = cache
.relative_path(path)
.expect("wrong package cache for file");
if let Ok(cache_key) = FileCacheKey::from_path(path) {
if cache.is_formatted(relative_path, &cache_key) {
return Ok(FormatResult::Unchanged);
}
}
}
// Format the source.
let format_result = match format_source(unformatted, source_type, Some(path), settings)? {
FormattedSource::Formatted(formatted) => match mode {
@ -231,13 +280,35 @@ fn format_path(
formatted
.write(&mut writer)
.map_err(|err| FormatCommandError::Write(Some(path.to_path_buf()), err))?;
if let Some(cache) = cache {
if let Ok(cache_key) = FileCacheKey::from_path(path) {
let relative_path = cache
.relative_path(path)
.expect("wrong package cache for file");
cache.set_formatted(relative_path.to_path_buf(), &cache_key);
}
}
FormatResult::Formatted
}
FormatMode::Check => FormatResult::Formatted,
FormatMode::Diff => FormatResult::Diff(formatted),
},
FormattedSource::Unchanged => FormatResult::Unchanged,
FormattedSource::Unchanged => {
if let Some(cache) = cache {
if let Ok(cache_key) = FileCacheKey::from_path(path) {
let relative_path = cache
.relative_path(path)
.expect("wrong package cache for file");
cache.set_formatted(relative_path.to_path_buf(), &cache_key);
}
}
FormatResult::Unchanged
}
};
Ok(format_result)
}

View file

@ -3,16 +3,14 @@
use std::fs::File;
use std::io;
use std::ops::{Add, AddAssign};
#[cfg(unix)]
use std::os::unix::fs::PermissionsExt;
use std::path::Path;
use anyhow::{Context, Result};
use colored::Colorize;
use filetime::FileTime;
use log::{debug, error, warn};
use rustc_hash::FxHashMap;
use crate::cache::{Cache, FileCacheKey, LintCacheData};
use ruff_diagnostics::Diagnostic;
use ruff_linter::linter::{lint_fix, lint_only, FixTable, FixerResult, LinterResult};
use ruff_linter::logging::DisplayParseError;
@ -23,7 +21,6 @@ use ruff_linter::settings::types::UnsafeFixes;
use ruff_linter::settings::{flags, LinterSettings};
use ruff_linter::source_kind::{SourceError, SourceKind};
use ruff_linter::{fs, IOError, SyntaxError};
use ruff_macros::CacheKey;
use ruff_notebook::{Notebook, NotebookError, NotebookIndex};
use ruff_python_ast::imports::ImportMap;
use ruff_python_ast::{SourceType, TomlSourceType};
@ -31,33 +28,6 @@ use ruff_source_file::{LineIndex, SourceCode, SourceFileBuilder};
use ruff_text_size::{TextRange, TextSize};
use ruff_workspace::Settings;
use crate::cache::Cache;
#[derive(CacheKey)]
pub(crate) struct FileCacheKey {
/// Timestamp when the file was last modified before the (cached) check.
file_last_modified: FileTime,
/// Permissions of the file before the (cached) check.
file_permissions_mode: u32,
}
impl FileCacheKey {
fn from_path(path: &Path) -> io::Result<FileCacheKey> {
// Construct a cache key for the file
let metadata = path.metadata()?;
#[cfg(unix)]
let permissions = metadata.permissions().mode();
#[cfg(windows)]
let permissions: u32 = metadata.permissions().readonly().into();
Ok(FileCacheKey {
file_last_modified: FileTime::from_last_modification_time(&metadata),
file_permissions_mode: permissions,
})
}
}
#[derive(Debug, Default, PartialEq)]
pub(crate) struct Diagnostics {
pub(crate) messages: Vec<Message>,
@ -230,9 +200,11 @@ pub(crate) fn lint_path(
.expect("wrong package cache for file");
let cache_key = FileCacheKey::from_path(path).context("Failed to create cache key")?;
if let Some(cache) = cache.get(relative_path, &cache_key) {
return Ok(cache.as_diagnostics(path));
let cached_diagnostics = cache
.get(relative_path, &cache_key)
.and_then(|entry| entry.to_diagnostics(path));
if let Some(diagnostics) = cached_diagnostics {
return Ok(diagnostics);
}
// Stash the file metadata for later so when we update the cache it reflects the prerun
@ -332,12 +304,14 @@ pub(crate) fn lint_path(
if let Some((cache, relative_path, key)) = caching {
// We don't cache parsing errors.
if parse_error.is_none() {
cache.update(
cache.update_lint(
relative_path.to_owned(),
key,
&key,
LintCacheData::from_messages(
&messages,
&imports,
source_kind.as_ipy_notebook().map(Notebook::index),
imports.clone(),
source_kind.as_ipy_notebook().map(Notebook::index).cloned(),
),
);
}
}

View file

@ -110,7 +110,7 @@ fn mixed_line_endings() -> Result<()> {
assert_cmd_snapshot!(Command::new(get_cargo_bin(BIN_NAME))
.current_dir(tempdir.path())
.args(["format", "--diff", "--isolated"])
.args(["format", "--no-cache", "--diff", "--isolated"])
.arg("."), @r###"
success: true
exit_code: 0
@ -173,7 +173,7 @@ OTHER = "OTHER"
assert_cmd_snapshot!(Command::new(get_cargo_bin(BIN_NAME))
.current_dir(tempdir.path())
.args(["format", "--check", "--config"])
.args(["format", "--no-cache", "--check", "--config"])
.arg(ruff_toml.file_name().unwrap())
// Explicitly pass test.py, should be formatted regardless of it being excluded by format.exclude
.arg(test_path.file_name().unwrap())
@ -322,7 +322,7 @@ format = "json"
#[test]
fn test_diff() {
let args = ["format", "--isolated", "--diff"];
let args = ["format", "--no-cache", "--isolated", "--diff"];
let fixtures = Path::new("resources").join("test").join("fixtures");
let paths = [
fixtures.join("unformatted.py"),
@ -365,7 +365,7 @@ fn test_diff() {
#[test]
fn test_diff_no_change() {
let args = ["format", "--isolated", "--diff"];
let args = ["format", "--no-cache", "--isolated", "--diff"];
let fixtures = Path::new("resources").join("test").join("fixtures");
let paths = [fixtures.join("unformatted.py")];
insta::with_settings!({filters => vec![

View file

@ -390,15 +390,17 @@ Options:
-h, --help
Print help
Miscellaneous:
-n, --no-cache Disable cache reads
--cache-dir <CACHE_DIR> Path to the cache directory [env: RUFF_CACHE_DIR=]
--isolated Ignore all configuration files
--stdin-filename <STDIN_FILENAME> The name of the file when passing it through stdin
File selection:
--respect-gitignore Respect file exclusions via `.gitignore` and other standard ignore files. Use `--no-respect-gitignore` to disable
--exclude <FILE_PATTERN> List of paths, used to omit files and/or directories from analysis
--force-exclude Enforce exclusions, even for paths passed to Ruff directly on the command-line. Use `--no-force-exclude` to disable
Miscellaneous:
--isolated Ignore all configuration files
--stdin-filename <STDIN_FILENAME> The name of the file when passing it through stdin
Log levels:
-v, --verbose Enable verbose logging
-q, --quiet Print diagnostics, but nothing else