mirror of
https://github.com/astral-sh/uv.git
synced 2025-11-02 12:59:45 +00:00
Add a garbage collection mechanism to the CLI (#1217)
## Summary Detects unused cache entries, which can come in a few forms: 1. Directories that are out-dated via our versioning scheme. 2. Old source distribution builds (i.e., we have a more recent version). 3. Old wheels (stored in `archive-v0`, but not symlinked-to from anywhere in the cache). Closes https://github.com/astral-sh/puffin/issues/1059.
This commit is contained in:
parent
7ee90dc71f
commit
0f96386032
8 changed files with 348 additions and 28 deletions
2
Cargo.lock
generated
2
Cargo.lock
generated
|
|
@ -4467,8 +4467,10 @@ dependencies = [
|
|||
"distribution-types",
|
||||
"fs-err",
|
||||
"nanoid",
|
||||
"rustc-hash",
|
||||
"serde",
|
||||
"tempfile",
|
||||
"tracing",
|
||||
"url",
|
||||
"uv-fs",
|
||||
"uv-normalize",
|
||||
|
|
|
|||
|
|
@ -24,7 +24,9 @@ clap = { workspace = true, features = ["derive", "env"], optional = true }
|
|||
directories = { workspace = true }
|
||||
fs-err = { workspace = true, features = ["tokio"] }
|
||||
nanoid = { workspace = true }
|
||||
rustc-hash = { workspace = true }
|
||||
serde = { workspace = true, features = ["derive"] }
|
||||
tempfile = { workspace = true }
|
||||
tracing = { workspace = true }
|
||||
url = { workspace = true }
|
||||
walkdir = { workspace = true }
|
||||
|
|
|
|||
|
|
@ -6,10 +6,12 @@ use std::ops::Deref;
|
|||
use std::path::{Path, PathBuf};
|
||||
use std::sync::Arc;
|
||||
|
||||
use distribution_types::InstalledDist;
|
||||
use fs_err as fs;
|
||||
use rustc_hash::FxHashSet;
|
||||
use tempfile::{tempdir, TempDir};
|
||||
use tracing::debug;
|
||||
|
||||
use distribution_types::InstalledDist;
|
||||
use uv_fs::directories;
|
||||
use uv_normalize::PackageName;
|
||||
|
||||
|
|
@ -283,17 +285,72 @@ impl Cache {
|
|||
/// Returns the number of entries removed from the cache.
|
||||
pub fn remove(&self, name: &PackageName) -> Result<Removal, io::Error> {
|
||||
let mut summary = Removal::default();
|
||||
for bucket in [
|
||||
CacheBucket::Wheels,
|
||||
CacheBucket::BuiltWheels,
|
||||
CacheBucket::Git,
|
||||
CacheBucket::Interpreter,
|
||||
CacheBucket::Simple,
|
||||
] {
|
||||
for bucket in CacheBucket::iter() {
|
||||
summary += bucket.remove(self, name)?;
|
||||
}
|
||||
Ok(summary)
|
||||
}
|
||||
|
||||
/// Run the garbage collector on the cache, removing any dangling entries.
|
||||
pub fn prune(&self) -> Result<Removal, io::Error> {
|
||||
let mut summary = Removal::default();
|
||||
|
||||
// First, remove any top-level directories that are unused. These typically represent
|
||||
// outdated cache buckets (e.g., `wheels-v0`, when latest is `wheels-v1`).
|
||||
for entry in fs::read_dir(&self.root)? {
|
||||
let entry = entry?;
|
||||
let metadata = entry.metadata()?;
|
||||
|
||||
if entry.file_name() == "CACHEDIR.TAG"
|
||||
|| entry.file_name() == ".gitignore"
|
||||
|| entry.file_name() == ".git"
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
if metadata.is_dir() {
|
||||
// If the directory is not a cache bucket, remove it.
|
||||
if CacheBucket::iter().all(|bucket| entry.file_name() != bucket.to_str()) {
|
||||
let path = entry.path();
|
||||
debug!("Removing dangling cache entry: {}", path.display());
|
||||
summary += rm_rf(path)?;
|
||||
}
|
||||
} else {
|
||||
// If the file is not a marker file, remove it.
|
||||
let path = entry.path();
|
||||
debug!("Removing dangling cache entry: {}", path.display());
|
||||
summary += rm_rf(path)?;
|
||||
}
|
||||
}
|
||||
|
||||
// Second, remove any unused archives (by searching for archives that are not symlinked).
|
||||
// TODO(charlie): Remove any unused source distributions. This requires introspecting the
|
||||
// cache contents, e.g., reading and deserializing the manifests.
|
||||
let mut references = FxHashSet::default();
|
||||
|
||||
for bucket in CacheBucket::iter() {
|
||||
let bucket = self.bucket(bucket);
|
||||
if bucket.is_dir() {
|
||||
for entry in walkdir::WalkDir::new(bucket) {
|
||||
let entry = entry?;
|
||||
if entry.file_type().is_symlink() {
|
||||
references.insert(entry.path().canonicalize()?);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for entry in fs::read_dir(self.bucket(CacheBucket::Archive))? {
|
||||
let entry = entry?;
|
||||
let path = entry.path().canonicalize()?;
|
||||
if !references.contains(&path) {
|
||||
debug!("Removing dangling cache entry: {}", path.display());
|
||||
summary += rm_rf(path)?;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(summary)
|
||||
}
|
||||
}
|
||||
|
||||
/// The different kinds of data in the cache are stored in different bucket, which in our case
|
||||
|
|
@ -633,6 +690,21 @@ impl CacheBucket {
|
|||
}
|
||||
Ok(summary)
|
||||
}
|
||||
|
||||
/// Return an iterator over all cache buckets.
|
||||
pub fn iter() -> impl Iterator<Item = CacheBucket> {
|
||||
[
|
||||
CacheBucket::Wheels,
|
||||
CacheBucket::BuiltWheels,
|
||||
CacheBucket::FlatIndex,
|
||||
CacheBucket::Git,
|
||||
CacheBucket::Interpreter,
|
||||
CacheBucket::Simple,
|
||||
CacheBucket::Archive,
|
||||
]
|
||||
.iter()
|
||||
.copied()
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for CacheBucket {
|
||||
|
|
|
|||
|
|
@ -7,10 +7,10 @@ use uv_cache::Cache;
|
|||
use uv_fs::Simplified;
|
||||
use uv_normalize::PackageName;
|
||||
|
||||
use crate::commands::ExitStatus;
|
||||
use crate::commands::{human_readable_bytes, ExitStatus};
|
||||
use crate::printer::Printer;
|
||||
|
||||
/// Clear the cache.
|
||||
/// Clear the cache, removing all entries or those linked to specific packages.
|
||||
pub(crate) fn cache_clean(
|
||||
packages: &[PackageName],
|
||||
cache: &Cache,
|
||||
|
|
@ -123,19 +123,3 @@ pub(crate) fn cache_clean(
|
|||
|
||||
Ok(ExitStatus::Success)
|
||||
}
|
||||
|
||||
/// Formats a number of bytes into a human readable SI-prefixed size.
|
||||
///
|
||||
/// Returns a tuple of `(quantity, units)`.
|
||||
#[allow(
|
||||
clippy::cast_possible_truncation,
|
||||
clippy::cast_possible_wrap,
|
||||
clippy::cast_precision_loss,
|
||||
clippy::cast_sign_loss
|
||||
)]
|
||||
fn human_readable_bytes(bytes: u64) -> (f32, &'static str) {
|
||||
static UNITS: [&str; 7] = ["B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB"];
|
||||
let bytes = bytes as f32;
|
||||
let i = ((bytes.log2() / 10.0) as usize).min(UNITS.len() - 1);
|
||||
(bytes / 1024_f32.powi(i as i32), UNITS[i])
|
||||
}
|
||||
|
|
|
|||
66
crates/uv/src/commands/cache_prune.rs
Normal file
66
crates/uv/src/commands/cache_prune.rs
Normal file
|
|
@ -0,0 +1,66 @@
|
|||
use std::fmt::Write;
|
||||
|
||||
use anyhow::{Context, Result};
|
||||
use owo_colors::OwoColorize;
|
||||
|
||||
use uv_cache::Cache;
|
||||
use uv_fs::Simplified;
|
||||
|
||||
use crate::commands::{human_readable_bytes, ExitStatus};
|
||||
use crate::printer::Printer;
|
||||
|
||||
/// Prune all unreachable objects from the cache.
|
||||
pub(crate) fn cache_prune(cache: &Cache, printer: Printer) -> Result<ExitStatus> {
|
||||
if !cache.root().exists() {
|
||||
writeln!(
|
||||
printer.stderr(),
|
||||
"No cache found at: {}",
|
||||
cache.root().user_display().cyan()
|
||||
)?;
|
||||
return Ok(ExitStatus::Success);
|
||||
}
|
||||
|
||||
writeln!(
|
||||
printer.stderr(),
|
||||
"Pruning cache at: {}",
|
||||
cache.root().user_display().cyan()
|
||||
)?;
|
||||
|
||||
let summary = cache
|
||||
.prune()
|
||||
.with_context(|| format!("Failed to prune cache at: {}", cache.root().user_display()))?;
|
||||
|
||||
// Write a summary of the number of files and directories removed.
|
||||
match (summary.num_files, summary.num_dirs) {
|
||||
(0, 0) => {
|
||||
write!(printer.stderr(), "No unused entries found")?;
|
||||
}
|
||||
(0, 1) => {
|
||||
write!(printer.stderr(), "Removed 1 directory")?;
|
||||
}
|
||||
(0, num_dirs_removed) => {
|
||||
write!(printer.stderr(), "Removed {num_dirs_removed} directories")?;
|
||||
}
|
||||
(1, _) => {
|
||||
write!(printer.stderr(), "Removed 1 file")?;
|
||||
}
|
||||
(num_files_removed, _) => {
|
||||
write!(printer.stderr(), "Removed {num_files_removed} files")?;
|
||||
}
|
||||
}
|
||||
|
||||
// If any, write a summary of the total byte count removed.
|
||||
if summary.total_bytes > 0 {
|
||||
let bytes = if summary.total_bytes < 1024 {
|
||||
format!("{}B", summary.total_bytes)
|
||||
} else {
|
||||
let (bytes, unit) = human_readable_bytes(summary.total_bytes);
|
||||
format!("{bytes:.1}{unit}")
|
||||
};
|
||||
write!(printer.stderr(), " ({})", bytes.green())?;
|
||||
}
|
||||
|
||||
writeln!(printer.stderr())?;
|
||||
|
||||
Ok(ExitStatus::Success)
|
||||
}
|
||||
|
|
@ -6,6 +6,7 @@ use owo_colors::OwoColorize;
|
|||
|
||||
pub(crate) use cache_clean::cache_clean;
|
||||
pub(crate) use cache_dir::cache_dir;
|
||||
pub(crate) use cache_prune::cache_prune;
|
||||
use distribution_types::InstalledMetadata;
|
||||
pub(crate) use pip_check::pip_check;
|
||||
pub(crate) use pip_compile::{extra_name_with_clap_error, pip_compile};
|
||||
|
|
@ -28,6 +29,7 @@ use crate::printer::Printer;
|
|||
|
||||
mod cache_clean;
|
||||
mod cache_dir;
|
||||
mod cache_prune;
|
||||
mod pip_check;
|
||||
mod pip_compile;
|
||||
mod pip_freeze;
|
||||
|
|
@ -155,3 +157,19 @@ pub(super) async fn compile_bytecode(
|
|||
)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Formats a number of bytes into a human readable SI-prefixed size.
|
||||
///
|
||||
/// Returns a tuple of `(quantity, units)`.
|
||||
#[allow(
|
||||
clippy::cast_possible_truncation,
|
||||
clippy::cast_possible_wrap,
|
||||
clippy::cast_precision_loss,
|
||||
clippy::cast_sign_loss
|
||||
)]
|
||||
pub(super) fn human_readable_bytes(bytes: u64) -> (f32, &'static str) {
|
||||
static UNITS: [&str; 7] = ["B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB"];
|
||||
let bytes = bytes as f32;
|
||||
let i = ((bytes.log2() / 10.0) as usize).min(UNITS.len() - 1);
|
||||
(bytes / 1024_f32.powi(i as i32), UNITS[i])
|
||||
}
|
||||
|
|
|
|||
|
|
@ -137,7 +137,7 @@ enum Commands {
|
|||
/// Manage the `uv` executable.
|
||||
#[clap(name = "self")]
|
||||
Self_(SelfNamespace),
|
||||
/// Remove all items from the cache.
|
||||
/// Clear the cache, removing all entries or those linked to specific packages.
|
||||
#[clap(hide = true)]
|
||||
Clean(CleanArgs),
|
||||
/// Display uv's version
|
||||
|
|
@ -170,8 +170,10 @@ struct CacheNamespace {
|
|||
|
||||
#[derive(Subcommand)]
|
||||
enum CacheCommand {
|
||||
/// Remove all items from the cache.
|
||||
/// Clear the cache, removing all entries or those linked to specific packages.
|
||||
Clean(CleanArgs),
|
||||
/// Prune all unreachable objects from the cache.
|
||||
Prune,
|
||||
/// Show the cache directory.
|
||||
Dir,
|
||||
}
|
||||
|
|
@ -1759,6 +1761,9 @@ async fn run() -> Result<ExitStatus> {
|
|||
command: CacheCommand::Clean(args),
|
||||
})
|
||||
| Commands::Clean(args) => commands::cache_clean(&args.package, &cache, printer),
|
||||
Commands::Cache(CacheNamespace {
|
||||
command: CacheCommand::Prune,
|
||||
}) => commands::cache_prune(&cache, printer),
|
||||
Commands::Cache(CacheNamespace {
|
||||
command: CacheCommand::Dir,
|
||||
}) => {
|
||||
|
|
|
|||
171
crates/uv/tests/cache_prune.rs
Normal file
171
crates/uv/tests/cache_prune.rs
Normal file
|
|
@ -0,0 +1,171 @@
|
|||
#![cfg(all(feature = "python", feature = "pypi"))]
|
||||
|
||||
use std::process::Command;
|
||||
|
||||
use anyhow::Result;
|
||||
use assert_cmd::prelude::*;
|
||||
use assert_fs::prelude::*;
|
||||
|
||||
use common::uv_snapshot;
|
||||
|
||||
use crate::common::{get_bin, TestContext, INSTA_FILTERS};
|
||||
|
||||
mod common;
|
||||
|
||||
/// Create a `cache prune` command with options shared across scenarios.
|
||||
fn prune_command(context: &TestContext) -> Command {
|
||||
let mut command = Command::new(get_bin());
|
||||
command
|
||||
.arg("cache")
|
||||
.arg("prune")
|
||||
.arg("--cache-dir")
|
||||
.arg(context.cache_dir.path())
|
||||
.env("VIRTUAL_ENV", context.venv.as_os_str())
|
||||
.current_dir(&context.temp_dir);
|
||||
|
||||
if cfg!(all(windows, debug_assertions)) {
|
||||
// TODO(konstin): Reduce stack usage in debug mode enough that the tests pass with the
|
||||
// default windows stack of 1MB
|
||||
command.env("UV_STACK_SIZE", (8 * 1024 * 1024).to_string());
|
||||
}
|
||||
|
||||
command
|
||||
}
|
||||
|
||||
/// Create a `pip sync` command with options shared across scenarios.
|
||||
fn sync_command(context: &TestContext) -> Command {
|
||||
let mut command = Command::new(get_bin());
|
||||
command
|
||||
.arg("pip")
|
||||
.arg("sync")
|
||||
.arg("--cache-dir")
|
||||
.arg(context.cache_dir.path())
|
||||
.env("VIRTUAL_ENV", context.venv.as_os_str())
|
||||
.current_dir(&context.temp_dir);
|
||||
|
||||
if cfg!(all(windows, debug_assertions)) {
|
||||
// TODO(konstin): Reduce stack usage in debug mode enough that the tests pass with the
|
||||
// default windows stack of 1MB
|
||||
command.env("UV_STACK_SIZE", (8 * 1024 * 1024).to_string());
|
||||
}
|
||||
|
||||
command
|
||||
}
|
||||
|
||||
/// `cache prune` should be a no-op if there's nothing out-of-date in the cache.
|
||||
#[test]
|
||||
fn prune_no_op() -> Result<()> {
|
||||
let context = TestContext::new("3.12");
|
||||
|
||||
let requirements_txt = context.temp_dir.child("requirements.txt");
|
||||
requirements_txt.write_str("anyio")?;
|
||||
|
||||
// Install a requirement, to populate the cache.
|
||||
sync_command(&context)
|
||||
.arg("requirements.txt")
|
||||
.assert()
|
||||
.success();
|
||||
|
||||
let filters = [(r"Pruning cache at: .*", "Pruning cache at: [CACHE_DIR]")]
|
||||
.into_iter()
|
||||
.chain(INSTA_FILTERS.to_vec())
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
uv_snapshot!(filters, prune_command(&context).arg("--verbose"), @r###"
|
||||
success: true
|
||||
exit_code: 0
|
||||
----- stdout -----
|
||||
|
||||
----- stderr -----
|
||||
Pruning cache at: [CACHE_DIR]
|
||||
No unused entries found
|
||||
"###);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// `cache prune` should remove any stale top-level directories from the cache.
|
||||
#[test]
|
||||
fn prune_stale_directory() -> Result<()> {
|
||||
let context = TestContext::new("3.12");
|
||||
|
||||
let requirements_txt = context.temp_dir.child("requirements.txt");
|
||||
requirements_txt.write_str("anyio")?;
|
||||
|
||||
// Install a requirement, to populate the cache.
|
||||
sync_command(&context)
|
||||
.arg("requirements.txt")
|
||||
.assert()
|
||||
.success();
|
||||
|
||||
// Add a stale directory to the cache.
|
||||
let simple = context.cache_dir.child("simple-v4");
|
||||
simple.create_dir_all()?;
|
||||
|
||||
let filters = [
|
||||
(r"Pruning cache at: .*", "Pruning cache at: [CACHE_DIR]"),
|
||||
(
|
||||
r"Removing dangling cache entry: .*[\\|/]simple-v4",
|
||||
"Pruning cache at: [CACHE_DIR]/simple-v4",
|
||||
),
|
||||
]
|
||||
.into_iter()
|
||||
.chain(INSTA_FILTERS.to_vec())
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
uv_snapshot!(filters, prune_command(&context).arg("--verbose"), @r###"
|
||||
success: true
|
||||
exit_code: 0
|
||||
----- stdout -----
|
||||
|
||||
----- stderr -----
|
||||
Pruning cache at: [CACHE_DIR]
|
||||
DEBUG Pruning cache at: [CACHE_DIR]/simple-v4
|
||||
Removed 1 directory
|
||||
"###);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// `cache prune` should remove any stale symlink from the cache.
|
||||
#[test]
|
||||
fn prune_stale_symlink() -> Result<()> {
|
||||
let context = TestContext::new("3.12");
|
||||
|
||||
let requirements_txt = context.temp_dir.child("requirements.txt");
|
||||
requirements_txt.write_str("anyio")?;
|
||||
|
||||
// Install a requirement, to populate the cache.
|
||||
sync_command(&context)
|
||||
.arg("requirements.txt")
|
||||
.assert()
|
||||
.success();
|
||||
|
||||
// Remove the wheels directory, causing the symlink to become stale.
|
||||
let wheels = context.cache_dir.child("wheels-v0");
|
||||
fs_err::remove_dir_all(wheels)?;
|
||||
|
||||
let filters = [
|
||||
(r"Pruning cache at: .*", "Pruning cache at: [CACHE_DIR]"),
|
||||
(
|
||||
r"Removing dangling cache entry: .*[\\|/]archive-v0[\\|/].*",
|
||||
"Pruning cache at: [CACHE_DIR]/archive-v0/anyio",
|
||||
),
|
||||
]
|
||||
.into_iter()
|
||||
.chain(INSTA_FILTERS.to_vec())
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
uv_snapshot!(filters, prune_command(&context).arg("--verbose"), @r###"
|
||||
success: true
|
||||
exit_code: 0
|
||||
----- stdout -----
|
||||
|
||||
----- stderr -----
|
||||
Pruning cache at: [CACHE_DIR]
|
||||
DEBUG Pruning cache at: [CACHE_DIR]/archive-v0/anyio
|
||||
Removed 44 files ([SIZE])
|
||||
"###);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue