diff --git a/crates/uv-cache/src/archive.rs b/crates/uv-cache/src/archive.rs new file mode 100644 index 000000000..31243f25c --- /dev/null +++ b/crates/uv-cache/src/archive.rs @@ -0,0 +1,24 @@ +use std::path::Path; + +/// A unique identifier for an archive (unzipped wheel) in the cache. +#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] +pub struct ArchiveId(String); + +impl Default for ArchiveId { + fn default() -> Self { + Self::new() + } +} + +impl ArchiveId { + /// Generate a new unique identifier for an archive. + pub fn new() -> Self { + Self(nanoid::nanoid!()) + } +} + +impl AsRef for ArchiveId { + fn as_ref(&self) -> &Path { + self.0.as_ref() + } +} diff --git a/crates/uv-cache/src/lib.rs b/crates/uv-cache/src/lib.rs index e232d5992..5c07c3902 100644 --- a/crates/uv-cache/src/lib.rs +++ b/crates/uv-cache/src/lib.rs @@ -23,7 +23,9 @@ use crate::removal::{rm_rf, Removal}; pub use crate::timestamp::Timestamp; pub use crate::wheel::WheelCache; use crate::wheel::WheelCacheKind; +pub use archive::ArchiveId; +mod archive; mod by_timestamp; #[cfg(feature = "clap")] mod cli; @@ -173,6 +175,11 @@ impl Cache { CacheEntry::new(self.bucket(cache_bucket).join(dir), file) } + /// Return the path to an archive in the cache. + pub fn archive(&self, id: &ArchiveId) -> PathBuf { + self.bucket(CacheBucket::Archive).join(id) + } + /// Returns `true` if a cache entry must be revalidated given the [`Refresh`] policy. pub fn must_revalidate(&self, package: &PackageName) -> bool { match &self.refresh { @@ -214,18 +221,18 @@ impl Cache { } } - /// Persist a temporary directory to the artifact store. + /// Persist a temporary directory to the artifact store, returning its unique ID. pub async fn persist( &self, temp_dir: impl AsRef, path: impl AsRef, - ) -> io::Result { + ) -> io::Result { // Create a unique ID for the artifact. // TODO(charlie): Support content-addressed persistence via SHAs. - let id = nanoid::nanoid!(); + let id = ArchiveId::new(); // Move the temporary directory into the directory store. - let archive_entry = self.entry(CacheBucket::Archive, "", id); + let archive_entry = self.entry(CacheBucket::Archive, "", &id); fs_err::create_dir_all(archive_entry.dir())?; uv_fs::rename_with_retry(temp_dir.as_ref(), archive_entry.path()).await?; @@ -233,7 +240,7 @@ impl Cache { fs_err::create_dir_all(path.as_ref().parent().expect("Cache entry to have parent"))?; uv_fs::replace_symlink(archive_entry.path(), path.as_ref())?; - Ok(archive_entry.into_path_buf()) + Ok(id) } /// Initialize a directory for use as a cache. diff --git a/crates/uv-distribution/src/archive.rs b/crates/uv-distribution/src/archive.rs index a53de619d..3d02d9e85 100644 --- a/crates/uv-distribution/src/archive.rs +++ b/crates/uv-distribution/src/archive.rs @@ -1,31 +1,20 @@ -use std::path::PathBuf; - use distribution_types::Hashed; use pypi_types::HashDigest; +use uv_cache::ArchiveId; /// An archive (unzipped wheel) that exists in the local cache. #[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] pub struct Archive { - /// The path to the archive entry in the wheel's archive bucket. - pub path: PathBuf, + /// The unique ID of the entry in the wheel's archive bucket. + pub id: ArchiveId, /// The computed hashes of the archive. pub hashes: Vec, } impl Archive { - /// Create a new [`Archive`] with the given path and hashes. - pub(crate) fn new(path: PathBuf, hashes: Vec) -> Self { - Self { path, hashes } - } - - /// Return the path to the archive entry in the wheel's archive bucket. - pub fn path(&self) -> &PathBuf { - &self.path - } - - /// Return the computed hashes of the archive. - pub fn hashes(&self) -> &[HashDigest] { - &self.hashes + /// Create a new [`Archive`] with the given ID and hashes. + pub(crate) fn new(id: ArchiveId, hashes: Vec) -> Self { + Self { id, hashes } } } diff --git a/crates/uv-distribution/src/distribution_database.rs b/crates/uv-distribution/src/distribution_database.rs index 04c941a5d..67bc80cb5 100644 --- a/crates/uv-distribution/src/distribution_database.rs +++ b/crates/uv-distribution/src/distribution_database.rs @@ -1,5 +1,5 @@ use std::io; -use std::path::{Path, PathBuf}; +use std::path::Path; use std::sync::Arc; use futures::{FutureExt, TryStreamExt}; @@ -16,7 +16,7 @@ use distribution_types::{ }; use platform_tags::Tags; use pypi_types::{HashDigest, Metadata23}; -use uv_cache::{ArchiveTimestamp, CacheBucket, CacheEntry, Timestamp, WheelCache}; +use uv_cache::{ArchiveId, ArchiveTimestamp, CacheBucket, CacheEntry, Timestamp, WheelCache}; use uv_client::{ CacheControl, CachedClientError, Connectivity, DataWithCachePolicy, RegistryClient, }; @@ -136,11 +136,11 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context> // Unzip into the editable wheel directory. let path = editable_wheel_dir.join(&disk_filename); let target = editable_wheel_dir.join(cache_key::digest(&editable.path)); - let archive = self.unzip_wheel(&path, &target).await?; + let id = self.unzip_wheel(&path, &target).await?; let wheel = LocalWheel { dist, filename, - archive, + archive: self.build_context.cache().archive(&id), hashes: vec![], }; @@ -200,7 +200,7 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context> { Ok(archive) => Ok(LocalWheel { dist: Dist::Built(dist.clone()), - archive: archive.path, + archive: self.build_context.cache().archive(&archive.id), hashes: archive.hashes, filename: wheel.filename.clone(), }), @@ -216,7 +216,7 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context> .await?; Ok(LocalWheel { dist: Dist::Built(dist.clone()), - archive: archive.path, + archive: self.build_context.cache().archive(&archive.id), hashes: archive.hashes, filename: wheel.filename.clone(), }) @@ -246,7 +246,7 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context> { Ok(archive) => Ok(LocalWheel { dist: Dist::Built(dist.clone()), - archive: archive.path, + archive: self.build_context.cache().archive(&archive.id), hashes: archive.hashes, filename: wheel.filename.clone(), }), @@ -268,7 +268,7 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context> .await?; Ok(LocalWheel { dist: Dist::Built(dist.clone()), - archive: archive.path, + archive: self.build_context.cache().archive(&archive.id), hashes: archive.hashes, filename: wheel.filename.clone(), }) @@ -326,11 +326,13 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context> } // Otherwise, unzip the wheel. + let id = self + .unzip_wheel(&built_wheel.path, &built_wheel.target) + .await?; + Ok(LocalWheel { dist: Dist::Source(dist.clone()), - archive: self - .unzip_wheel(&built_wheel.path, &built_wheel.target) - .await?, + archive: self.build_context.cache().archive(&id), hashes: built_wheel.hashes, filename: built_wheel.filename, }) @@ -442,14 +444,15 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context> } // Persist the temporary directory to the directory store. - let path = self + let id = self .build_context .cache() .persist(temp_dir.into_path(), wheel_entry.path()) .await .map_err(Error::CacheRead)?; + Ok(Archive::new( - path, + id, hashers.into_iter().map(HashDigest::from).collect(), )) } @@ -557,14 +560,14 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context> }; // Persist the temporary directory to the directory store. - let path = self + let id = self .build_context .cache() .persist(temp_dir.into_path(), wheel_entry.path()) .await .map_err(Error::CacheRead)?; - Ok(Archive::new(path, hashes)) + Ok(Archive::new(id, hashes)) } .instrument(info_span!("wheel", wheel = %dist)) }; @@ -632,7 +635,7 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context> if let Some(archive) = archive { Ok(LocalWheel { dist: Dist::Built(dist.clone()), - archive: archive.path, + archive: self.build_context.cache().archive(&archive.id), hashes: archive.hashes, filename: filename.clone(), }) @@ -649,7 +652,7 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context> Ok(LocalWheel { dist: Dist::Built(dist.clone()), - archive: archive.path, + archive: self.build_context.cache().archive(&archive.id), hashes: archive.hashes, filename: filename.clone(), }) @@ -672,18 +675,18 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context> // Exhaust the reader to compute the hash. hasher.finish().await.map_err(Error::HashExhaustion)?; + let hashes = hashers.into_iter().map(HashDigest::from).collect(); + // Persist the temporary directory to the directory store. - let archive = self + let id = self .build_context .cache() .persist(temp_dir.into_path(), wheel_entry.path()) .await .map_err(Error::CacheWrite)?; - let hashes = hashers.into_iter().map(HashDigest::from).collect(); - // Create an archive. - let archive = Archive::new(archive, hashes); + let archive = Archive::new(id, hashes); // Write the archive pointer to the cache. let pointer = LocalArchivePointer { @@ -694,7 +697,7 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context> Ok(LocalWheel { dist: Dist::Built(dist.clone()), - archive: archive.path, + archive: self.build_context.cache().archive(&archive.id), hashes: archive.hashes, filename: filename.clone(), }) @@ -702,7 +705,7 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context> } /// Unzip a wheel into the cache, returning the path to the unzipped directory. - async fn unzip_wheel(&self, path: &Path, target: &Path) -> Result { + async fn unzip_wheel(&self, path: &Path, target: &Path) -> Result { let temp_dir = tokio::task::spawn_blocking({ let path = path.to_owned(); let root = self.build_context.cache().root().to_path_buf(); @@ -716,14 +719,14 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context> .await??; // Persist the temporary directory to the directory store. - let archive = self + let id = self .build_context .cache() .persist(temp_dir.into_path(), target) .await .map_err(Error::CacheWrite)?; - Ok(archive) + Ok(id) } /// Returns a GET [`reqwest::Request`] for the given URL. diff --git a/crates/uv-distribution/src/index/cached_wheel.rs b/crates/uv-distribution/src/index/cached_wheel.rs index 7423d30cb..eea658c8f 100644 --- a/crates/uv-distribution/src/index/cached_wheel.rs +++ b/crates/uv-distribution/src/index/cached_wheel.rs @@ -4,9 +4,9 @@ use distribution_filename::WheelFilename; use distribution_types::{CachedDirectUrlDist, CachedRegistryDist, Hashed}; use pep508_rs::VerbatimUrl; use pypi_types::HashDigest; -use uv_cache::CacheEntry; +use uv_cache::{Cache, CacheBucket, CacheEntry}; -use crate::{HttpArchivePointer, LocalArchivePointer}; +use crate::{Archive, HttpArchivePointer, LocalArchivePointer}; #[derive(Debug, Clone)] pub struct CachedWheel { @@ -54,18 +54,17 @@ impl CachedWheel { } /// Read a cached wheel from a `.http` pointer (e.g., `anyio-4.0.0-py3-none-any.http`). - pub fn from_http_pointer(path: &Path) -> Option { + pub fn from_http_pointer(path: &Path, cache: &Cache) -> Option { // Determine the wheel filename. let filename = path.file_name()?.to_str()?; let filename = WheelFilename::from_stem(filename).ok()?; // Read the pointer. let pointer = HttpArchivePointer::read_from(path).ok()??; - let archive = pointer.into_archive(); + let Archive { id, hashes } = pointer.into_archive(); // Convert to a cached wheel. - let entry = CacheEntry::from_path(archive.path); - let hashes = archive.hashes; + let entry = cache.entry(CacheBucket::Archive, "", id); Some(Self { filename, entry, @@ -74,18 +73,17 @@ impl CachedWheel { } /// Read a cached wheel from a `.rev` pointer (e.g., `anyio-4.0.0-py3-none-any.rev`). - pub fn from_local_pointer(path: &Path) -> Option { + pub fn from_local_pointer(path: &Path, cache: &Cache) -> Option { // Determine the wheel filename. let filename = path.file_name()?.to_str()?; let filename = WheelFilename::from_stem(filename).ok()?; // Read the pointer. let pointer = LocalArchivePointer::read_from(path).ok()??; - let archive = pointer.into_archive(); + let Archive { id, hashes } = pointer.into_archive(); // Convert to a cached wheel. - let entry = CacheEntry::from_path(archive.path); - let hashes = archive.hashes; + let entry = cache.entry(CacheBucket::Archive, "", id); Some(Self { filename, entry, diff --git a/crates/uv-distribution/src/index/registry_wheel_index.rs b/crates/uv-distribution/src/index/registry_wheel_index.rs index 10043920d..4cf9c0594 100644 --- a/crates/uv-distribution/src/index/registry_wheel_index.rs +++ b/crates/uv-distribution/src/index/registry_wheel_index.rs @@ -116,7 +116,9 @@ impl<'a> RegistryWheelIndex<'a> { .extension() .is_some_and(|ext| ext.eq_ignore_ascii_case("http")) { - if let Some(wheel) = CachedWheel::from_http_pointer(&wheel_dir.join(&file)) { + if let Some(wheel) = + CachedWheel::from_http_pointer(&wheel_dir.join(&file), cache) + { // Enforce hash-checking based on the built distribution. if wheel.satisfies(hasher.get(package)) { Self::add_wheel(wheel, tags, &mut versions); @@ -128,7 +130,9 @@ impl<'a> RegistryWheelIndex<'a> { .extension() .is_some_and(|ext| ext.eq_ignore_ascii_case("rev")) { - if let Some(wheel) = CachedWheel::from_local_pointer(&wheel_dir.join(&file)) { + if let Some(wheel) = + CachedWheel::from_local_pointer(&wheel_dir.join(&file), cache) + { // Enforce hash-checking based on the built distribution. if wheel.satisfies(hasher.get(package)) { Self::add_wheel(wheel, tags, &mut versions); diff --git a/crates/uv-distribution/src/source/revision.rs b/crates/uv-distribution/src/source/revision.rs index aadc2945a..64cbc127b 100644 --- a/crates/uv-distribution/src/source/revision.rs +++ b/crates/uv-distribution/src/source/revision.rs @@ -1,5 +1,6 @@ use distribution_types::Hashed; use serde::{Deserialize, Serialize}; +use std::path::Path; use pypi_types::HashDigest; @@ -11,7 +12,7 @@ use pypi_types::HashDigest; /// the distribution, despite the reported version number remaining the same. #[derive(Debug, Clone, Serialize, Deserialize)] pub(crate) struct Revision { - id: String, + id: RevisionId, hashes: Vec, } @@ -19,13 +20,13 @@ impl Revision { /// Initialize a new [`Revision`] with a random UUID. pub(crate) fn new() -> Self { Self { - id: nanoid::nanoid!(), + id: RevisionId::new(), hashes: vec![], } } /// Return the unique ID of the manifest. - pub(crate) fn id(&self) -> &str { + pub(crate) fn id(&self) -> &RevisionId { &self.id } @@ -52,3 +53,20 @@ impl Hashed for Revision { &self.hashes } } + +/// A unique identifier for a revision of a source distribution. +#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] +pub(crate) struct RevisionId(String); + +impl RevisionId { + /// Generate a new unique identifier for an archive. + fn new() -> Self { + Self(nanoid::nanoid!()) + } +} + +impl AsRef for RevisionId { + fn as_ref(&self) -> &Path { + self.0.as_ref() + } +} diff --git a/crates/uv-installer/src/plan.rs b/crates/uv-installer/src/plan.rs index d0b191bae..9895ac3be 100644 --- a/crates/uv-installer/src/plan.rs +++ b/crates/uv-installer/src/plan.rs @@ -264,7 +264,7 @@ impl<'a> Planner<'a> { wheel.filename, wheel.url, archive.hashes, - archive.path, + cache.archive(&archive.id), ); debug!("URL wheel requirement already cached: {cached_dist}"); @@ -306,7 +306,7 @@ impl<'a> Planner<'a> { wheel.filename, wheel.url, archive.hashes, - archive.path, + cache.archive(&archive.id), ); debug!(