mirror of
https://github.com/astral-sh/uv.git
synced 2025-08-04 19:08:04 +00:00
Prune unused source distributions from the cache (#7112)
## Summary This has bothered me for a while and should be fairly impactful for users. It requires a weird implementation, since the distribution-building crate depends on the cache, and so the prune operation can't live in the cache, since it needs to access internals of the distribution-building crate. Closes https://github.com/astral-sh/uv/issues/7096.
This commit is contained in:
parent
1422e18674
commit
93fe3e83be
10 changed files with 213 additions and 10 deletions
1
Cargo.lock
generated
1
Cargo.lock
generated
|
@ -4812,6 +4812,7 @@ dependencies = [
|
|||
"uv-types",
|
||||
"uv-warnings",
|
||||
"uv-workspace",
|
||||
"walkdir",
|
||||
"zip",
|
||||
]
|
||||
|
||||
|
|
|
@ -18,7 +18,7 @@ use uv_normalize::PackageName;
|
|||
pub use crate::by_timestamp::CachedByTimestamp;
|
||||
#[cfg(feature = "clap")]
|
||||
pub use crate::cli::CacheArgs;
|
||||
use crate::removal::{rm_rf, Removal};
|
||||
pub use crate::removal::{rm_rf, Removal};
|
||||
pub use crate::timestamp::Timestamp;
|
||||
pub use crate::wheel::WheelCache;
|
||||
use crate::wheel::WheelCacheKind;
|
||||
|
@ -458,9 +458,7 @@ impl Cache {
|
|||
}
|
||||
}
|
||||
|
||||
// Third, remove any unused archives (by searching for archives that are not symlinked).
|
||||
// TODO(charlie): Remove any unused source distributions. This requires introspecting the
|
||||
// cache contents, e.g., reading and deserializing the manifests.
|
||||
// Fourth, remove any unused archives (by searching for archives that are not symlinked).
|
||||
let mut references = FxHashSet::default();
|
||||
|
||||
for bucket in CacheBucket::iter() {
|
||||
|
|
|
@ -7,7 +7,7 @@ use std::path::Path;
|
|||
|
||||
/// Remove a file or directory and all its contents, returning a [`Removal`] with
|
||||
/// the number of files and directories removed, along with a total byte count.
|
||||
pub(crate) fn rm_rf(path: impl AsRef<Path>) -> io::Result<Removal> {
|
||||
pub fn rm_rf(path: impl AsRef<Path>) -> io::Result<Removal> {
|
||||
let mut removal = Removal::default();
|
||||
removal.rm_rf(path.as_ref())?;
|
||||
Ok(removal)
|
||||
|
|
|
@ -46,6 +46,7 @@ tokio = { workspace = true }
|
|||
tokio-util = { workspace = true, features = ["compat"] }
|
||||
tracing = { workspace = true }
|
||||
url = { workspace = true }
|
||||
walkdir = { workspace = true }
|
||||
zip = { workspace = true }
|
||||
|
||||
[dev-dependencies]
|
||||
|
|
|
@ -44,6 +44,8 @@ pub enum Error {
|
|||
CacheDecode(#[from] rmp_serde::decode::Error),
|
||||
#[error("Failed to serialize cache entry")]
|
||||
CacheEncode(#[from] rmp_serde::encode::Error),
|
||||
#[error("Failed to walk the distribution cache")]
|
||||
CacheWalk(#[source] walkdir::Error),
|
||||
|
||||
// Build error
|
||||
#[error(transparent)]
|
||||
|
|
|
@ -4,6 +4,7 @@ pub use error::Error;
|
|||
pub use index::{BuiltWheelIndex, RegistryWheelIndex};
|
||||
pub use metadata::{ArchiveMetadata, LoweredRequirement, Metadata, RequiresDist};
|
||||
pub use reporter::Reporter;
|
||||
pub use source::prune;
|
||||
|
||||
mod archive;
|
||||
mod distribution_database;
|
||||
|
|
|
@ -22,7 +22,8 @@ use install_wheel_rs::metadata::read_archive_metadata;
|
|||
use platform_tags::Tags;
|
||||
use pypi_types::{HashDigest, Metadata12, Metadata23, RequiresTxt};
|
||||
use uv_cache::{
|
||||
ArchiveTimestamp, CacheBucket, CacheEntry, CacheShard, CachedByTimestamp, Timestamp, WheelCache,
|
||||
ArchiveTimestamp, Cache, CacheBucket, CacheEntry, CacheShard, CachedByTimestamp, Removal,
|
||||
Timestamp, WheelCache,
|
||||
};
|
||||
use uv_client::{
|
||||
CacheControl, CachedClientError, Connectivity, DataWithCachePolicy, RegistryClient,
|
||||
|
@ -1610,6 +1611,78 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
|
|||
}
|
||||
}
|
||||
|
||||
/// Prune any unused source distributions from the cache.
|
||||
pub fn prune(cache: &Cache) -> Result<Removal, Error> {
|
||||
let mut removal = Removal::default();
|
||||
|
||||
let bucket = cache.bucket(CacheBucket::SourceDistributions);
|
||||
if bucket.is_dir() {
|
||||
for entry in walkdir::WalkDir::new(bucket) {
|
||||
let entry = entry.map_err(Error::CacheWalk)?;
|
||||
|
||||
// If we find a `revision.http` file, read the pointer, and remove any extraneous
|
||||
// directories.
|
||||
if entry.file_name() == "revision.http" {
|
||||
let pointer = HttpRevisionPointer::read_from(entry.path())?;
|
||||
if let Some(pointer) = pointer {
|
||||
// Remove all sibling directories that are not referenced by the pointer.
|
||||
for sibling in entry
|
||||
.path()
|
||||
.parent()
|
||||
.unwrap()
|
||||
.read_dir()
|
||||
.map_err(Error::CacheRead)?
|
||||
{
|
||||
let sibling = sibling.map_err(Error::CacheRead)?;
|
||||
if sibling.file_type().map_err(Error::CacheRead)?.is_dir() {
|
||||
let sibling_name = sibling.file_name();
|
||||
if sibling_name != pointer.revision.id().as_str() {
|
||||
debug!(
|
||||
"Removing dangling source revision: {}",
|
||||
sibling.path().display()
|
||||
);
|
||||
removal +=
|
||||
uv_cache::rm_rf(sibling.path()).map_err(Error::CacheWrite)?;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If we find a `revision.rev` file, read the pointer, and remove any extraneous
|
||||
// directories.
|
||||
if entry.file_name() == "revision.rev" {
|
||||
let pointer = LocalRevisionPointer::read_from(entry.path())?;
|
||||
if let Some(pointer) = pointer {
|
||||
// Remove all sibling directories that are not referenced by the pointer.
|
||||
for sibling in entry
|
||||
.path()
|
||||
.parent()
|
||||
.unwrap()
|
||||
.read_dir()
|
||||
.map_err(Error::CacheRead)?
|
||||
{
|
||||
let sibling = sibling.map_err(Error::CacheRead)?;
|
||||
if sibling.file_type().map_err(Error::CacheRead)?.is_dir() {
|
||||
let sibling_name = sibling.file_name();
|
||||
if sibling_name != pointer.revision.id().as_str() {
|
||||
debug!(
|
||||
"Removing dangling source revision: {}",
|
||||
sibling.path().display()
|
||||
);
|
||||
removal +=
|
||||
uv_cache::rm_rf(sibling.path()).map_err(Error::CacheWrite)?;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(removal)
|
||||
}
|
||||
|
||||
/// Validate that the source distribution matches the built metadata.
|
||||
fn validate(source: &BuildableSource<'_>, metadata: &Metadata23) -> Result<(), Error> {
|
||||
if let Some(name) = source.name() {
|
||||
|
|
|
@ -63,6 +63,16 @@ impl RevisionId {
|
|||
fn new() -> Self {
|
||||
Self(nanoid::nanoid!())
|
||||
}
|
||||
|
||||
pub(crate) fn as_str(&self) -> &str {
|
||||
self.0.as_str()
|
||||
}
|
||||
}
|
||||
|
||||
impl AsRef<str> for RevisionId {
|
||||
fn as_ref(&self) -> &str {
|
||||
self.0.as_ref()
|
||||
}
|
||||
}
|
||||
|
||||
impl AsRef<Path> for RevisionId {
|
||||
|
|
|
@ -3,7 +3,7 @@ use std::fmt::Write;
|
|||
use anyhow::{Context, Result};
|
||||
use owo_colors::OwoColorize;
|
||||
|
||||
use uv_cache::Cache;
|
||||
use uv_cache::{Cache, Removal};
|
||||
use uv_fs::Simplified;
|
||||
|
||||
use crate::commands::{human_readable_bytes, ExitStatus};
|
||||
|
@ -26,7 +26,14 @@ pub(crate) fn cache_prune(ci: bool, cache: &Cache, printer: Printer) -> Result<E
|
|||
cache.root().user_display().cyan()
|
||||
)?;
|
||||
|
||||
let summary = cache
|
||||
let mut summary = Removal::default();
|
||||
|
||||
// Prune the source distribution cache, which is tightly coupled to the builder crate.
|
||||
summary += uv_distribution::prune(cache)
|
||||
.with_context(|| format!("Failed to prune cache at: {}", cache.root().user_display()))?;
|
||||
|
||||
// Prune the remaining cache buckets.
|
||||
summary += cache
|
||||
.prune(ci)
|
||||
.with_context(|| format!("Failed to prune cache at: {}", cache.root().user_display()))?;
|
||||
|
||||
|
|
|
@ -106,7 +106,7 @@ fn prune_cached_env() {
|
|||
.chain([
|
||||
// The cache entry does not have a stable key, so we filter it out
|
||||
(
|
||||
r"\[CACHE_DIR\](\\|\/)(.+)(\\|\/).*",
|
||||
r"\[CACHE_DIR\](\\|\/)(.*?)(\\|\/).*",
|
||||
"[CACHE_DIR]/$2/[ENTRY]",
|
||||
),
|
||||
])
|
||||
|
@ -151,7 +151,7 @@ fn prune_stale_symlink() -> Result<()> {
|
|||
.chain([
|
||||
// The cache entry does not have a stable key, so we filter it out
|
||||
(
|
||||
r"\[CACHE_DIR\](\\|\/)(.+)(\\|\/).*",
|
||||
r"\[CACHE_DIR\](\\|\/)(.*?)(\\|\/).*",
|
||||
"[CACHE_DIR]/$2/[ENTRY]",
|
||||
),
|
||||
])
|
||||
|
@ -252,3 +252,113 @@ fn prune_unzipped() -> Result<()> {
|
|||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// `cache prune` should remove any stale source distribution revisions.
|
||||
#[test]
|
||||
fn prune_stale_revision() -> Result<()> {
|
||||
let context = TestContext::new("3.12");
|
||||
|
||||
let pyproject_toml = context.temp_dir.child("pyproject.toml");
|
||||
pyproject_toml.write_str(
|
||||
r#"
|
||||
[project]
|
||||
name = "project"
|
||||
version = "0.1.0"
|
||||
requires-python = ">=3.12"
|
||||
dependencies = []
|
||||
|
||||
[build-system]
|
||||
requires = ["setuptools>=42"]
|
||||
build-backend = "setuptools.build_meta"
|
||||
"#,
|
||||
)?;
|
||||
|
||||
context.temp_dir.child("src").child("__init__.py").touch()?;
|
||||
context.temp_dir.child("README").touch()?;
|
||||
|
||||
// Install the same package twice, with `--reinstall`.
|
||||
uv_snapshot!(context.filters(), context
|
||||
.pip_install()
|
||||
.arg(".")
|
||||
.arg("--reinstall"), @r###"
|
||||
success: true
|
||||
exit_code: 0
|
||||
----- stdout -----
|
||||
|
||||
----- stderr -----
|
||||
Resolved 1 package in [TIME]
|
||||
Prepared 1 package in [TIME]
|
||||
Installed 1 package in [TIME]
|
||||
+ project==0.1.0 (from file://[TEMP_DIR]/)
|
||||
"###);
|
||||
|
||||
uv_snapshot!(context.filters(), context
|
||||
.pip_install()
|
||||
.arg(".")
|
||||
.arg("--reinstall"), @r###"
|
||||
success: true
|
||||
exit_code: 0
|
||||
----- stdout -----
|
||||
|
||||
----- stderr -----
|
||||
Resolved 1 package in [TIME]
|
||||
Prepared 1 package in [TIME]
|
||||
Uninstalled 1 package in [TIME]
|
||||
Installed 1 package in [TIME]
|
||||
~ project==0.1.0 (from file://[TEMP_DIR]/)
|
||||
"###);
|
||||
|
||||
let filters: Vec<_> = context
|
||||
.filters()
|
||||
.into_iter()
|
||||
.chain([
|
||||
// The cache entry does not have a stable key, so we filter it out
|
||||
(
|
||||
r"\[CACHE_DIR\](\\|\/)(.*?)(\\|\/).*",
|
||||
"[CACHE_DIR]/$2/[ENTRY]",
|
||||
),
|
||||
])
|
||||
.collect();
|
||||
|
||||
// Pruning should remove the unused revision.
|
||||
uv_snapshot!(&filters, context.prune().arg("--verbose"), @r###"
|
||||
success: true
|
||||
exit_code: 0
|
||||
----- stdout -----
|
||||
|
||||
----- stderr -----
|
||||
DEBUG uv [VERSION] ([COMMIT] DATE)
|
||||
Pruning cache at: [CACHE_DIR]/
|
||||
DEBUG Removing dangling source revision: [CACHE_DIR]/built-wheels-v3/[ENTRY]
|
||||
DEBUG Removing dangling cache entry: [CACHE_DIR]/archive-v0/[ENTRY]
|
||||
Removed 8 files ([SIZE])
|
||||
"###);
|
||||
|
||||
// Uninstall and reinstall the package. We should use the cached version.
|
||||
uv_snapshot!(context.filters(), context
|
||||
.pip_uninstall()
|
||||
.arg("."), @r###"
|
||||
success: true
|
||||
exit_code: 0
|
||||
----- stdout -----
|
||||
|
||||
----- stderr -----
|
||||
Uninstalled 1 package in [TIME]
|
||||
- project==0.1.0 (from file://[TEMP_DIR]/)
|
||||
"###);
|
||||
|
||||
uv_snapshot!(context.filters(), context
|
||||
.pip_install()
|
||||
.arg("."), @r###"
|
||||
success: true
|
||||
exit_code: 0
|
||||
----- stdout -----
|
||||
|
||||
----- stderr -----
|
||||
Resolved 1 package in [TIME]
|
||||
Installed 1 package in [TIME]
|
||||
+ project==0.1.0 (from file://[TEMP_DIR]/)
|
||||
"###);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue