Use atomic write when persisting cache (#9981)

This commit is contained in:
Micha Reiser 2024-02-14 15:09:21 +01:00 committed by GitHub
parent f40e012b4e
commit bb8d2034e2
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 23 additions and 7 deletions

View file

@ -48,6 +48,7 @@ serde = { workspace = true }
serde_json = { workspace = true } serde_json = { workspace = true }
shellexpand = { workspace = true } shellexpand = { workspace = true }
strum = { workspace = true, features = [] } strum = { workspace = true, features = [] }
tempfile = { workspace = true }
thiserror = { workspace = true } thiserror = { workspace = true }
toml = { workspace = true } toml = { workspace = true }
tracing = { workspace = true, features = ["log"] } tracing = { workspace = true, features = ["log"] }

View file

@ -1,7 +1,7 @@
use std::fmt::Debug; use std::fmt::Debug;
use std::fs::{self, File}; use std::fs::{self, File};
use std::hash::Hasher; use std::hash::Hasher;
use std::io::{self, BufReader, BufWriter, Write}; use std::io::{self, BufReader, Write};
use std::path::{Path, PathBuf}; use std::path::{Path, PathBuf};
use std::sync::atomic::{AtomicU64, Ordering}; use std::sync::atomic::{AtomicU64, Ordering};
use std::sync::Mutex; use std::sync::Mutex;
@ -15,6 +15,7 @@ use rayon::iter::ParallelIterator;
use rayon::iter::{IntoParallelIterator, ParallelBridge}; use rayon::iter::{IntoParallelIterator, ParallelBridge};
use rustc_hash::FxHashMap; use rustc_hash::FxHashMap;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use tempfile::NamedTempFile;
use ruff_cache::{CacheKey, CacheKeyHasher}; use ruff_cache::{CacheKey, CacheKeyHasher};
use ruff_diagnostics::{DiagnosticKind, Fix}; use ruff_diagnostics::{DiagnosticKind, Fix};
@ -165,15 +166,29 @@ impl Cache {
return Ok(()); return Ok(());
} }
let file = File::create(&self.path) // Write the cache to a temporary file first and then rename it for an "atomic" write.
.with_context(|| format!("Failed to create cache file '{}'", self.path.display()))?; // Protects against data loss if the process is killed during the write and races between different ruff
let writer = BufWriter::new(file); // processes, resulting in a corrupted cache file. https://github.com/astral-sh/ruff/issues/8147#issuecomment-1943345964
bincode::serialize_into(writer, &self.package).with_context(|| { let mut temp_file =
NamedTempFile::new_in(self.path.parent().expect("Write path must have a parent"))
.context("Failed to create temporary file")?;
// Serialize to in-memory buffer because hyperfine benchmark showed that it's faster than
// using a `BufWriter` and our cache files are small enough that streaming isn't necessary.
let serialized =
bincode::serialize(&self.package).context("Failed to serialize cache data")?;
temp_file
.write_all(&serialized)
.context("Failed to write serialized cache to temporary file.")?;
temp_file.persist(&self.path).with_context(|| {
format!( format!(
"Failed to serialise cache to file '{}'", "Failed to rename temporary cache file to {}",
self.path.display() self.path.display()
) )
}) })?;
Ok(())
} }
/// Applies the pending changes without storing the cache to disk. /// Applies the pending changes without storing the cache to disk.