mirror of
https://github.com/astral-sh/uv.git
synced 2025-11-02 04:48:18 +00:00
Add a garbage collection mechanism to the CLI (#1217)
## Summary Detects unused cache entries, which can come in a few forms: 1. Directories that are out-dated via our versioning scheme. 2. Old source distribution builds (i.e., we have a more recent version). 3. Old wheels (stored in `archive-v0`, but not symlinked-to from anywhere in the cache). Closes https://github.com/astral-sh/puffin/issues/1059.
This commit is contained in:
parent
7ee90dc71f
commit
0f96386032
8 changed files with 348 additions and 28 deletions
|
|
@ -24,7 +24,9 @@ clap = { workspace = true, features = ["derive", "env"], optional = true }
|
|||
directories = { workspace = true }
|
||||
fs-err = { workspace = true, features = ["tokio"] }
|
||||
nanoid = { workspace = true }
|
||||
rustc-hash = { workspace = true }
|
||||
serde = { workspace = true, features = ["derive"] }
|
||||
tempfile = { workspace = true }
|
||||
tracing = { workspace = true }
|
||||
url = { workspace = true }
|
||||
walkdir = { workspace = true }
|
||||
|
|
|
|||
|
|
@ -6,10 +6,12 @@ use std::ops::Deref;
|
|||
use std::path::{Path, PathBuf};
|
||||
use std::sync::Arc;
|
||||
|
||||
use distribution_types::InstalledDist;
|
||||
use fs_err as fs;
|
||||
use rustc_hash::FxHashSet;
|
||||
use tempfile::{tempdir, TempDir};
|
||||
use tracing::debug;
|
||||
|
||||
use distribution_types::InstalledDist;
|
||||
use uv_fs::directories;
|
||||
use uv_normalize::PackageName;
|
||||
|
||||
|
|
@ -283,17 +285,72 @@ impl Cache {
|
|||
/// Returns the number of entries removed from the cache.
|
||||
pub fn remove(&self, name: &PackageName) -> Result<Removal, io::Error> {
|
||||
let mut summary = Removal::default();
|
||||
for bucket in [
|
||||
CacheBucket::Wheels,
|
||||
CacheBucket::BuiltWheels,
|
||||
CacheBucket::Git,
|
||||
CacheBucket::Interpreter,
|
||||
CacheBucket::Simple,
|
||||
] {
|
||||
for bucket in CacheBucket::iter() {
|
||||
summary += bucket.remove(self, name)?;
|
||||
}
|
||||
Ok(summary)
|
||||
}
|
||||
|
||||
/// Run the garbage collector on the cache, removing any dangling entries.
|
||||
pub fn prune(&self) -> Result<Removal, io::Error> {
|
||||
let mut summary = Removal::default();
|
||||
|
||||
// First, remove any top-level directories that are unused. These typically represent
|
||||
// outdated cache buckets (e.g., `wheels-v0`, when latest is `wheels-v1`).
|
||||
for entry in fs::read_dir(&self.root)? {
|
||||
let entry = entry?;
|
||||
let metadata = entry.metadata()?;
|
||||
|
||||
if entry.file_name() == "CACHEDIR.TAG"
|
||||
|| entry.file_name() == ".gitignore"
|
||||
|| entry.file_name() == ".git"
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
if metadata.is_dir() {
|
||||
// If the directory is not a cache bucket, remove it.
|
||||
if CacheBucket::iter().all(|bucket| entry.file_name() != bucket.to_str()) {
|
||||
let path = entry.path();
|
||||
debug!("Removing dangling cache entry: {}", path.display());
|
||||
summary += rm_rf(path)?;
|
||||
}
|
||||
} else {
|
||||
// If the file is not a marker file, remove it.
|
||||
let path = entry.path();
|
||||
debug!("Removing dangling cache entry: {}", path.display());
|
||||
summary += rm_rf(path)?;
|
||||
}
|
||||
}
|
||||
|
||||
// Second, remove any unused archives (by searching for archives that are not symlinked).
|
||||
// TODO(charlie): Remove any unused source distributions. This requires introspecting the
|
||||
// cache contents, e.g., reading and deserializing the manifests.
|
||||
let mut references = FxHashSet::default();
|
||||
|
||||
for bucket in CacheBucket::iter() {
|
||||
let bucket = self.bucket(bucket);
|
||||
if bucket.is_dir() {
|
||||
for entry in walkdir::WalkDir::new(bucket) {
|
||||
let entry = entry?;
|
||||
if entry.file_type().is_symlink() {
|
||||
references.insert(entry.path().canonicalize()?);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for entry in fs::read_dir(self.bucket(CacheBucket::Archive))? {
|
||||
let entry = entry?;
|
||||
let path = entry.path().canonicalize()?;
|
||||
if !references.contains(&path) {
|
||||
debug!("Removing dangling cache entry: {}", path.display());
|
||||
summary += rm_rf(path)?;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(summary)
|
||||
}
|
||||
}
|
||||
|
||||
/// The different kinds of data in the cache are stored in different bucket, which in our case
|
||||
|
|
@ -633,6 +690,21 @@ impl CacheBucket {
|
|||
}
|
||||
Ok(summary)
|
||||
}
|
||||
|
||||
/// Return an iterator over all cache buckets.
|
||||
pub fn iter() -> impl Iterator<Item = CacheBucket> {
|
||||
[
|
||||
CacheBucket::Wheels,
|
||||
CacheBucket::BuiltWheels,
|
||||
CacheBucket::FlatIndex,
|
||||
CacheBucket::Git,
|
||||
CacheBucket::Interpreter,
|
||||
CacheBucket::Simple,
|
||||
CacheBucket::Archive,
|
||||
]
|
||||
.iter()
|
||||
.copied()
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for CacheBucket {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue