Add a garbage collection mechanism to the CLI (#1217)

## Summary

Detects unused cache entries, which can come in a few forms:

1. Directories that are out-dated via our versioning scheme.
2. Old source distribution builds (i.e., we have a more recent version).
3. Old wheels (stored in `archive-v0`, but not symlinked-to from
anywhere in the cache).

Closes https://github.com/astral-sh/puffin/issues/1059.
This commit is contained in:
Charlie Marsh 2024-03-21 14:07:48 -04:00 committed by GitHub
parent 7ee90dc71f
commit 0f96386032
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 348 additions and 28 deletions

View file

@ -24,7 +24,9 @@ clap = { workspace = true, features = ["derive", "env"], optional = true }
directories = { workspace = true }
fs-err = { workspace = true, features = ["tokio"] }
nanoid = { workspace = true }
rustc-hash = { workspace = true }
serde = { workspace = true, features = ["derive"] }
tempfile = { workspace = true }
tracing = { workspace = true }
url = { workspace = true }
walkdir = { workspace = true }

View file

@ -6,10 +6,12 @@ use std::ops::Deref;
use std::path::{Path, PathBuf};
use std::sync::Arc;
use distribution_types::InstalledDist;
use fs_err as fs;
use rustc_hash::FxHashSet;
use tempfile::{tempdir, TempDir};
use tracing::debug;
use distribution_types::InstalledDist;
use uv_fs::directories;
use uv_normalize::PackageName;
@ -283,17 +285,72 @@ impl Cache {
/// Returns the number of entries removed from the cache.
pub fn remove(&self, name: &PackageName) -> Result<Removal, io::Error> {
let mut summary = Removal::default();
for bucket in [
CacheBucket::Wheels,
CacheBucket::BuiltWheels,
CacheBucket::Git,
CacheBucket::Interpreter,
CacheBucket::Simple,
] {
for bucket in CacheBucket::iter() {
summary += bucket.remove(self, name)?;
}
Ok(summary)
}
/// Run the garbage collector on the cache, removing any dangling entries.
pub fn prune(&self) -> Result<Removal, io::Error> {
let mut summary = Removal::default();
// First, remove any top-level directories that are unused. These typically represent
// outdated cache buckets (e.g., `wheels-v0`, when latest is `wheels-v1`).
for entry in fs::read_dir(&self.root)? {
let entry = entry?;
let metadata = entry.metadata()?;
if entry.file_name() == "CACHEDIR.TAG"
|| entry.file_name() == ".gitignore"
|| entry.file_name() == ".git"
{
continue;
}
if metadata.is_dir() {
// If the directory is not a cache bucket, remove it.
if CacheBucket::iter().all(|bucket| entry.file_name() != bucket.to_str()) {
let path = entry.path();
debug!("Removing dangling cache entry: {}", path.display());
summary += rm_rf(path)?;
}
} else {
// If the file is not a marker file, remove it.
let path = entry.path();
debug!("Removing dangling cache entry: {}", path.display());
summary += rm_rf(path)?;
}
}
// Second, remove any unused archives (by searching for archives that are not symlinked).
// TODO(charlie): Remove any unused source distributions. This requires introspecting the
// cache contents, e.g., reading and deserializing the manifests.
let mut references = FxHashSet::default();
for bucket in CacheBucket::iter() {
let bucket = self.bucket(bucket);
if bucket.is_dir() {
for entry in walkdir::WalkDir::new(bucket) {
let entry = entry?;
if entry.file_type().is_symlink() {
references.insert(entry.path().canonicalize()?);
}
}
}
}
for entry in fs::read_dir(self.bucket(CacheBucket::Archive))? {
let entry = entry?;
let path = entry.path().canonicalize()?;
if !references.contains(&path) {
debug!("Removing dangling cache entry: {}", path.display());
summary += rm_rf(path)?;
}
}
Ok(summary)
}
}
/// The different kinds of data in the cache are stored in different bucket, which in our case
@ -633,6 +690,21 @@ impl CacheBucket {
}
Ok(summary)
}
/// Return an iterator over all cache buckets.
pub fn iter() -> impl Iterator<Item = CacheBucket> {
[
CacheBucket::Wheels,
CacheBucket::BuiltWheels,
CacheBucket::FlatIndex,
CacheBucket::Git,
CacheBucket::Interpreter,
CacheBucket::Simple,
CacheBucket::Archive,
]
.iter()
.copied()
}
}
impl Display for CacheBucket {