This commit is contained in:
Trevor Burnham 2025-12-22 16:43:49 -05:00 committed by GitHub
commit 4162856447
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 332 additions and 2 deletions

2
Cargo.lock generated
View file

@ -5711,6 +5711,7 @@ name = "uv-cache"
version = "0.0.8"
dependencies = [
"clap",
"filetime",
"fs-err",
"nanoid",
"rmp-serde",
@ -5719,6 +5720,7 @@ dependencies = [
"serde",
"tempfile",
"thiserror 2.0.17",
"tokio",
"tracing",
"uv-cache-info",
"uv-cache-key",

View file

@ -28,6 +28,7 @@ uv-static = { workspace = true }
clap = { workspace = true, features = ["derive", "env"], optional = true }
fs-err = { workspace = true, features = ["tokio"] }
tokio = { workspace = true }
nanoid = { workspace = true }
rmp-serde = { workspace = true }
rustc-hash = { workspace = true }
@ -37,3 +38,6 @@ tempfile = { workspace = true }
thiserror = { workspace = true }
tracing = { workspace = true }
walkdir = { workspace = true }
[dev-dependencies]
filetime = { workspace = true }

View file

@ -5,6 +5,7 @@ use std::ops::Deref;
use std::path::{Path, PathBuf};
use std::str::FromStr;
use std::sync::Arc;
use std::time::{Duration, SystemTime};
use rustc_hash::FxHashMap;
use tracing::{debug, trace, warn};
@ -480,11 +481,25 @@ impl Cache {
Err(err) => return Err(err.into()),
};
Ok(Self {
let cache = Self {
root: std::path::absolute(root).map_err(Error::Absolute)?,
lock_file,
..self
})
};
// Spawn background autoprune task (fire-and-forget).
if !cache.is_temporary() {
let root = cache.root.clone();
tokio::spawn(async move {
// Brief delay to avoid competing with startup I/O.
tokio::time::sleep(Duration::from_secs(1)).await;
if let Err(err) = autoprune(&root) {
debug!("Autoprune failed: {err}");
}
});
}
Ok(cache)
}
/// Initialize the [`Cache`], assuming that there are no other uv processes running.
@ -841,6 +856,133 @@ impl Cache {
}
}
/// Prune cache entries that are safe to remove without an exclusive lock.
///
/// This function is designed to run in the background while other uv processes are using the cache.
/// It only removes entries that are provably unused:
/// - Archives that have no symlinks pointing to them and are old enough that any in-flight
/// operations would have completed
/// - Outdated cache bucket versions (e.g., `wheels-v4` when current is `wheels-v5`)
fn autoprune(root: &Path) -> io::Result<()> {
// Grace period: only remove archives older than this to avoid races with in-flight operations.
let grace_period = Duration::from_secs(3600);
let cutoff = SystemTime::now() - grace_period;
// Remove outdated top-level bucket directories.
for entry in fs_err::read_dir(root)? {
let entry = entry?;
let name = entry.file_name();
// Skip marker files.
if name == "CACHEDIR.TAG" || name == ".gitignore" || name == ".git" || name == ".lock" {
continue;
}
// Skip temp directories (created by tempfile crate, used for in-flight operations).
if name.to_string_lossy().starts_with(".tmp") {
continue;
}
let metadata = entry.metadata()?;
if metadata.is_dir() {
// If the directory is not a current cache bucket, remove it.
if CacheBucket::iter().all(|bucket| name != bucket.to_str()) {
debug!(
"Autoprune: removing outdated cache bucket: {}",
entry.path().display()
);
rm_rf(entry.path())?;
}
}
}
// Find all archive references.
let references = find_archive_references_for_autoprune(root)?;
// Remove old unreferenced archives.
let archive_dir = root.join(CacheBucket::Archive.to_str());
if archive_dir.is_dir() {
for entry in fs_err::read_dir(&archive_dir)? {
let entry = entry?;
let metadata = entry.metadata()?;
// Skip if modified recently (might be in-flight).
if let Ok(modified) = metadata.modified() {
if modified > cutoff {
continue;
}
}
// Skip if referenced.
if let Ok(canonical) = fs_err::canonicalize(entry.path()) {
if references.contains(&canonical) {
continue;
}
}
debug!(
"Autoprune: removing unreferenced archive: {}",
entry.path().display()
);
rm_rf(entry.path())?;
}
}
Ok(())
}
/// Find all paths that reference entries in the archive bucket.
///
/// This is a simplified version of `Cache::find_archive_references` that only returns
/// the set of referenced archive paths (not the reverse mapping).
fn find_archive_references_for_autoprune(
root: &Path,
) -> io::Result<rustc_hash::FxHashSet<PathBuf>> {
let mut references = rustc_hash::FxHashSet::default();
for bucket in [CacheBucket::SourceDistributions, CacheBucket::Wheels] {
let bucket_path = root.join(bucket.to_str());
if !bucket_path.is_dir() {
continue;
}
for entry in walkdir::WalkDir::new(&bucket_path)
.into_iter()
.filter_entry(|entry| {
!(entry.file_name() == "src"
|| entry.file_name() == ".lock"
|| entry.file_name() == ".gitignore"
|| entry.path().extension().is_some_and(|ext| {
ext.eq_ignore_ascii_case("lock")
|| ext.eq_ignore_ascii_case("whl")
|| ext.eq_ignore_ascii_case("http")
|| ext.eq_ignore_ascii_case("rev")
|| ext.eq_ignore_ascii_case("msgpack")
}))
})
{
let entry = entry?;
// On Unix, archive references use symlinks.
#[cfg(unix)]
let is_link = entry.file_type().is_symlink();
// On Windows, archive references are files containing structured data.
#[cfg(windows)]
let is_link = entry.file_type().is_file();
if !is_link {
continue;
}
if let Ok(target) = entry.path().canonicalize() {
references.insert(target);
}
}
}
Ok(references)
}
/// An archive (unzipped wheel) that exists in the local cache.
#[derive(Debug, Clone)]
#[allow(unused)]
@ -1443,3 +1585,185 @@ mod tests {
assert!(Link::from_str("archive-v0/").is_err());
}
}
#[cfg(test)]
mod autoprune_tests {
use super::*;
use std::time::{Duration, SystemTime};
use tempfile::TempDir;
fn create_test_cache() -> TempDir {
let temp = TempDir::new().unwrap();
let root = temp.path();
// Create current bucket versions
fs_err::create_dir_all(root.join("archive-v0")).unwrap();
fs_err::create_dir_all(root.join("wheels-v5")).unwrap();
fs_err::create_dir_all(root.join("sdists-v9")).unwrap();
// Create marker files
fs_err::write(root.join(".gitignore"), "*").unwrap();
fs_err::write(root.join("CACHEDIR.TAG"), "").unwrap();
temp
}
#[test]
fn test_autoprune_removes_outdated_buckets() {
let temp = create_test_cache();
let root = temp.path();
// Create outdated bucket versions
fs_err::create_dir_all(root.join("wheels-v4")).unwrap();
fs_err::create_dir_all(root.join("simple-v10")).unwrap();
assert!(root.join("wheels-v4").exists());
assert!(root.join("simple-v10").exists());
autoprune(root).unwrap();
// Outdated buckets should be removed
assert!(!root.join("wheels-v4").exists());
assert!(!root.join("simple-v10").exists());
// Current buckets should remain
assert!(root.join("wheels-v5").exists());
assert!(root.join("archive-v0").exists());
}
#[test]
fn test_autoprune_preserves_current_buckets() {
let temp = create_test_cache();
let root = temp.path();
autoprune(root).unwrap();
// All current buckets should remain
assert!(root.join("archive-v0").exists());
assert!(root.join("wheels-v5").exists());
assert!(root.join("sdists-v9").exists());
assert!(root.join(".gitignore").exists());
assert!(root.join("CACHEDIR.TAG").exists());
}
#[test]
fn test_autoprune_preserves_temp_directories() {
let temp = create_test_cache();
let root = temp.path();
// Create temp directories like tempfile crate does
let temp_dir = root.join(".tmpABC123");
fs_err::create_dir_all(&temp_dir).unwrap();
fs_err::write(temp_dir.join("in_progress.whl"), "data").unwrap();
autoprune(root).unwrap();
// Temp directories should be preserved (they're used for in-flight operations)
assert!(temp_dir.exists());
}
#[test]
fn test_autoprune_removes_old_unreferenced_archives() {
let temp = create_test_cache();
let root = temp.path();
// Create an unreferenced archive
let archive_path = root.join("archive-v0").join("test-archive-123");
fs_err::create_dir_all(&archive_path).unwrap();
fs_err::write(archive_path.join("somefile"), "data").unwrap();
// Set mtime to 2 hours ago (beyond grace period)
let old_time = SystemTime::now() - Duration::from_secs(7200);
filetime::set_file_mtime(
&archive_path,
filetime::FileTime::from_system_time(old_time),
)
.unwrap();
assert!(archive_path.exists());
autoprune(root).unwrap();
// Unreferenced old archive should be removed
assert!(!archive_path.exists());
}
#[test]
fn test_autoprune_preserves_recent_unreferenced_archives() {
let temp = create_test_cache();
let root = temp.path();
// Create an unreferenced archive with recent mtime
let archive_path = root.join("archive-v0").join("recent-archive");
fs_err::create_dir_all(&archive_path).unwrap();
fs_err::write(archive_path.join("somefile"), "data").unwrap();
// mtime is now (within grace period)
autoprune(root).unwrap();
// Recent archive should be preserved (grace period protection)
assert!(archive_path.exists());
}
#[test]
#[cfg(unix)]
fn test_autoprune_preserves_referenced_archives() {
let temp = create_test_cache();
let root = temp.path();
// Create an archive
let archive_path = root.join("archive-v0").join("referenced-archive");
fs_err::create_dir_all(&archive_path).unwrap();
fs_err::write(archive_path.join("somefile"), "data").unwrap();
// Set mtime to 2 hours ago
let old_time = SystemTime::now() - Duration::from_secs(7200);
filetime::set_file_mtime(
&archive_path,
filetime::FileTime::from_system_time(old_time),
)
.unwrap();
// Create a symlink referencing the archive
let wheel_dir = root.join("wheels-v5").join("pypi").join("somepackage");
fs_err::create_dir_all(&wheel_dir).unwrap();
fs_err::os::unix::fs::symlink(&archive_path, wheel_dir.join("link")).unwrap();
autoprune(root).unwrap();
// Referenced archive should be preserved even though it's old
assert!(archive_path.exists());
}
#[test]
fn test_find_archive_references_empty() {
let temp = create_test_cache();
let root = temp.path();
let refs = find_archive_references_for_autoprune(root).unwrap();
assert!(refs.is_empty());
}
#[test]
#[cfg(unix)]
fn test_find_archive_references_with_symlinks() {
let temp = create_test_cache();
let root = temp.path();
// Create an archive
let archive_path = root.join("archive-v0").join("my-archive");
fs_err::create_dir_all(&archive_path).unwrap();
// Create symlinks in wheels bucket
let wheel_dir = root.join("wheels-v5").join("pypi").join("pkg");
fs_err::create_dir_all(&wheel_dir).unwrap();
fs_err::os::unix::fs::symlink(&archive_path, wheel_dir.join("link1")).unwrap();
fs_err::os::unix::fs::symlink(&archive_path, wheel_dir.join("link2")).unwrap();
let refs = find_archive_references_for_autoprune(root).unwrap();
// Should find the canonical path of the archive
let canonical = fs_err::canonicalize(&archive_path).unwrap();
assert!(refs.contains(&canonical));
}
}