Store IDs rather than paths in the cache (#2985)

## Summary

Similar to `Revision`, we now store IDs in the `Archive` entires rather
than absolute paths. This makes the cache robust to moves, etc.

Closes https://github.com/astral-sh/uv/issues/2908.
This commit is contained in:
Charlie Marsh 2024-04-10 21:07:51 -04:00 committed by GitHub
parent c294c7098f
commit 32f129c245
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 107 additions and 64 deletions

View file

@ -0,0 +1,24 @@
use std::path::Path;
/// A unique identifier for an archive (unzipped wheel) in the cache.
#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
pub struct ArchiveId(String);
impl Default for ArchiveId {
fn default() -> Self {
Self::new()
}
}
impl ArchiveId {
/// Generate a new unique identifier for an archive.
pub fn new() -> Self {
Self(nanoid::nanoid!())
}
}
impl AsRef<Path> for ArchiveId {
fn as_ref(&self) -> &Path {
self.0.as_ref()
}
}

View file

@ -23,7 +23,9 @@ use crate::removal::{rm_rf, Removal};
pub use crate::timestamp::Timestamp;
pub use crate::wheel::WheelCache;
use crate::wheel::WheelCacheKind;
pub use archive::ArchiveId;
mod archive;
mod by_timestamp;
#[cfg(feature = "clap")]
mod cli;
@ -173,6 +175,11 @@ impl Cache {
CacheEntry::new(self.bucket(cache_bucket).join(dir), file)
}
/// Return the path to an archive in the cache.
pub fn archive(&self, id: &ArchiveId) -> PathBuf {
self.bucket(CacheBucket::Archive).join(id)
}
/// Returns `true` if a cache entry must be revalidated given the [`Refresh`] policy.
pub fn must_revalidate(&self, package: &PackageName) -> bool {
match &self.refresh {
@ -214,18 +221,18 @@ impl Cache {
}
}
/// Persist a temporary directory to the artifact store.
/// Persist a temporary directory to the artifact store, returning its unique ID.
pub async fn persist(
&self,
temp_dir: impl AsRef<Path>,
path: impl AsRef<Path>,
) -> io::Result<PathBuf> {
) -> io::Result<ArchiveId> {
// Create a unique ID for the artifact.
// TODO(charlie): Support content-addressed persistence via SHAs.
let id = nanoid::nanoid!();
let id = ArchiveId::new();
// Move the temporary directory into the directory store.
let archive_entry = self.entry(CacheBucket::Archive, "", id);
let archive_entry = self.entry(CacheBucket::Archive, "", &id);
fs_err::create_dir_all(archive_entry.dir())?;
uv_fs::rename_with_retry(temp_dir.as_ref(), archive_entry.path()).await?;
@ -233,7 +240,7 @@ impl Cache {
fs_err::create_dir_all(path.as_ref().parent().expect("Cache entry to have parent"))?;
uv_fs::replace_symlink(archive_entry.path(), path.as_ref())?;
Ok(archive_entry.into_path_buf())
Ok(id)
}
/// Initialize a directory for use as a cache.

View file

@ -1,31 +1,20 @@
use std::path::PathBuf;
use distribution_types::Hashed;
use pypi_types::HashDigest;
use uv_cache::ArchiveId;
/// An archive (unzipped wheel) that exists in the local cache.
#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
pub struct Archive {
/// The path to the archive entry in the wheel's archive bucket.
pub path: PathBuf,
/// The unique ID of the entry in the wheel's archive bucket.
pub id: ArchiveId,
/// The computed hashes of the archive.
pub hashes: Vec<HashDigest>,
}
impl Archive {
/// Create a new [`Archive`] with the given path and hashes.
pub(crate) fn new(path: PathBuf, hashes: Vec<HashDigest>) -> Self {
Self { path, hashes }
}
/// Return the path to the archive entry in the wheel's archive bucket.
pub fn path(&self) -> &PathBuf {
&self.path
}
/// Return the computed hashes of the archive.
pub fn hashes(&self) -> &[HashDigest] {
&self.hashes
/// Create a new [`Archive`] with the given ID and hashes.
pub(crate) fn new(id: ArchiveId, hashes: Vec<HashDigest>) -> Self {
Self { id, hashes }
}
}

View file

@ -1,5 +1,5 @@
use std::io;
use std::path::{Path, PathBuf};
use std::path::Path;
use std::sync::Arc;
use futures::{FutureExt, TryStreamExt};
@ -16,7 +16,7 @@ use distribution_types::{
};
use platform_tags::Tags;
use pypi_types::{HashDigest, Metadata23};
use uv_cache::{ArchiveTimestamp, CacheBucket, CacheEntry, Timestamp, WheelCache};
use uv_cache::{ArchiveId, ArchiveTimestamp, CacheBucket, CacheEntry, Timestamp, WheelCache};
use uv_client::{
CacheControl, CachedClientError, Connectivity, DataWithCachePolicy, RegistryClient,
};
@ -136,11 +136,11 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context>
// Unzip into the editable wheel directory.
let path = editable_wheel_dir.join(&disk_filename);
let target = editable_wheel_dir.join(cache_key::digest(&editable.path));
let archive = self.unzip_wheel(&path, &target).await?;
let id = self.unzip_wheel(&path, &target).await?;
let wheel = LocalWheel {
dist,
filename,
archive,
archive: self.build_context.cache().archive(&id),
hashes: vec![],
};
@ -200,7 +200,7 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context>
{
Ok(archive) => Ok(LocalWheel {
dist: Dist::Built(dist.clone()),
archive: archive.path,
archive: self.build_context.cache().archive(&archive.id),
hashes: archive.hashes,
filename: wheel.filename.clone(),
}),
@ -216,7 +216,7 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context>
.await?;
Ok(LocalWheel {
dist: Dist::Built(dist.clone()),
archive: archive.path,
archive: self.build_context.cache().archive(&archive.id),
hashes: archive.hashes,
filename: wheel.filename.clone(),
})
@ -246,7 +246,7 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context>
{
Ok(archive) => Ok(LocalWheel {
dist: Dist::Built(dist.clone()),
archive: archive.path,
archive: self.build_context.cache().archive(&archive.id),
hashes: archive.hashes,
filename: wheel.filename.clone(),
}),
@ -268,7 +268,7 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context>
.await?;
Ok(LocalWheel {
dist: Dist::Built(dist.clone()),
archive: archive.path,
archive: self.build_context.cache().archive(&archive.id),
hashes: archive.hashes,
filename: wheel.filename.clone(),
})
@ -326,11 +326,13 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context>
}
// Otherwise, unzip the wheel.
let id = self
.unzip_wheel(&built_wheel.path, &built_wheel.target)
.await?;
Ok(LocalWheel {
dist: Dist::Source(dist.clone()),
archive: self
.unzip_wheel(&built_wheel.path, &built_wheel.target)
.await?,
archive: self.build_context.cache().archive(&id),
hashes: built_wheel.hashes,
filename: built_wheel.filename,
})
@ -442,14 +444,15 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context>
}
// Persist the temporary directory to the directory store.
let path = self
let id = self
.build_context
.cache()
.persist(temp_dir.into_path(), wheel_entry.path())
.await
.map_err(Error::CacheRead)?;
Ok(Archive::new(
path,
id,
hashers.into_iter().map(HashDigest::from).collect(),
))
}
@ -557,14 +560,14 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context>
};
// Persist the temporary directory to the directory store.
let path = self
let id = self
.build_context
.cache()
.persist(temp_dir.into_path(), wheel_entry.path())
.await
.map_err(Error::CacheRead)?;
Ok(Archive::new(path, hashes))
Ok(Archive::new(id, hashes))
}
.instrument(info_span!("wheel", wheel = %dist))
};
@ -632,7 +635,7 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context>
if let Some(archive) = archive {
Ok(LocalWheel {
dist: Dist::Built(dist.clone()),
archive: archive.path,
archive: self.build_context.cache().archive(&archive.id),
hashes: archive.hashes,
filename: filename.clone(),
})
@ -649,7 +652,7 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context>
Ok(LocalWheel {
dist: Dist::Built(dist.clone()),
archive: archive.path,
archive: self.build_context.cache().archive(&archive.id),
hashes: archive.hashes,
filename: filename.clone(),
})
@ -672,18 +675,18 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context>
// Exhaust the reader to compute the hash.
hasher.finish().await.map_err(Error::HashExhaustion)?;
let hashes = hashers.into_iter().map(HashDigest::from).collect();
// Persist the temporary directory to the directory store.
let archive = self
let id = self
.build_context
.cache()
.persist(temp_dir.into_path(), wheel_entry.path())
.await
.map_err(Error::CacheWrite)?;
let hashes = hashers.into_iter().map(HashDigest::from).collect();
// Create an archive.
let archive = Archive::new(archive, hashes);
let archive = Archive::new(id, hashes);
// Write the archive pointer to the cache.
let pointer = LocalArchivePointer {
@ -694,7 +697,7 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context>
Ok(LocalWheel {
dist: Dist::Built(dist.clone()),
archive: archive.path,
archive: self.build_context.cache().archive(&archive.id),
hashes: archive.hashes,
filename: filename.clone(),
})
@ -702,7 +705,7 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context>
}
/// Unzip a wheel into the cache, returning the path to the unzipped directory.
async fn unzip_wheel(&self, path: &Path, target: &Path) -> Result<PathBuf, Error> {
async fn unzip_wheel(&self, path: &Path, target: &Path) -> Result<ArchiveId, Error> {
let temp_dir = tokio::task::spawn_blocking({
let path = path.to_owned();
let root = self.build_context.cache().root().to_path_buf();
@ -716,14 +719,14 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context>
.await??;
// Persist the temporary directory to the directory store.
let archive = self
let id = self
.build_context
.cache()
.persist(temp_dir.into_path(), target)
.await
.map_err(Error::CacheWrite)?;
Ok(archive)
Ok(id)
}
/// Returns a GET [`reqwest::Request`] for the given URL.

View file

@ -4,9 +4,9 @@ use distribution_filename::WheelFilename;
use distribution_types::{CachedDirectUrlDist, CachedRegistryDist, Hashed};
use pep508_rs::VerbatimUrl;
use pypi_types::HashDigest;
use uv_cache::CacheEntry;
use uv_cache::{Cache, CacheBucket, CacheEntry};
use crate::{HttpArchivePointer, LocalArchivePointer};
use crate::{Archive, HttpArchivePointer, LocalArchivePointer};
#[derive(Debug, Clone)]
pub struct CachedWheel {
@ -54,18 +54,17 @@ impl CachedWheel {
}
/// Read a cached wheel from a `.http` pointer (e.g., `anyio-4.0.0-py3-none-any.http`).
pub fn from_http_pointer(path: &Path) -> Option<Self> {
pub fn from_http_pointer(path: &Path, cache: &Cache) -> Option<Self> {
// Determine the wheel filename.
let filename = path.file_name()?.to_str()?;
let filename = WheelFilename::from_stem(filename).ok()?;
// Read the pointer.
let pointer = HttpArchivePointer::read_from(path).ok()??;
let archive = pointer.into_archive();
let Archive { id, hashes } = pointer.into_archive();
// Convert to a cached wheel.
let entry = CacheEntry::from_path(archive.path);
let hashes = archive.hashes;
let entry = cache.entry(CacheBucket::Archive, "", id);
Some(Self {
filename,
entry,
@ -74,18 +73,17 @@ impl CachedWheel {
}
/// Read a cached wheel from a `.rev` pointer (e.g., `anyio-4.0.0-py3-none-any.rev`).
pub fn from_local_pointer(path: &Path) -> Option<Self> {
pub fn from_local_pointer(path: &Path, cache: &Cache) -> Option<Self> {
// Determine the wheel filename.
let filename = path.file_name()?.to_str()?;
let filename = WheelFilename::from_stem(filename).ok()?;
// Read the pointer.
let pointer = LocalArchivePointer::read_from(path).ok()??;
let archive = pointer.into_archive();
let Archive { id, hashes } = pointer.into_archive();
// Convert to a cached wheel.
let entry = CacheEntry::from_path(archive.path);
let hashes = archive.hashes;
let entry = cache.entry(CacheBucket::Archive, "", id);
Some(Self {
filename,
entry,

View file

@ -116,7 +116,9 @@ impl<'a> RegistryWheelIndex<'a> {
.extension()
.is_some_and(|ext| ext.eq_ignore_ascii_case("http"))
{
if let Some(wheel) = CachedWheel::from_http_pointer(&wheel_dir.join(&file)) {
if let Some(wheel) =
CachedWheel::from_http_pointer(&wheel_dir.join(&file), cache)
{
// Enforce hash-checking based on the built distribution.
if wheel.satisfies(hasher.get(package)) {
Self::add_wheel(wheel, tags, &mut versions);
@ -128,7 +130,9 @@ impl<'a> RegistryWheelIndex<'a> {
.extension()
.is_some_and(|ext| ext.eq_ignore_ascii_case("rev"))
{
if let Some(wheel) = CachedWheel::from_local_pointer(&wheel_dir.join(&file)) {
if let Some(wheel) =
CachedWheel::from_local_pointer(&wheel_dir.join(&file), cache)
{
// Enforce hash-checking based on the built distribution.
if wheel.satisfies(hasher.get(package)) {
Self::add_wheel(wheel, tags, &mut versions);

View file

@ -1,5 +1,6 @@
use distribution_types::Hashed;
use serde::{Deserialize, Serialize};
use std::path::Path;
use pypi_types::HashDigest;
@ -11,7 +12,7 @@ use pypi_types::HashDigest;
/// the distribution, despite the reported version number remaining the same.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub(crate) struct Revision {
id: String,
id: RevisionId,
hashes: Vec<HashDigest>,
}
@ -19,13 +20,13 @@ impl Revision {
/// Initialize a new [`Revision`] with a random UUID.
pub(crate) fn new() -> Self {
Self {
id: nanoid::nanoid!(),
id: RevisionId::new(),
hashes: vec![],
}
}
/// Return the unique ID of the manifest.
pub(crate) fn id(&self) -> &str {
pub(crate) fn id(&self) -> &RevisionId {
&self.id
}
@ -52,3 +53,20 @@ impl Hashed for Revision {
&self.hashes
}
}
/// A unique identifier for a revision of a source distribution.
#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
pub(crate) struct RevisionId(String);
impl RevisionId {
/// Generate a new unique identifier for an archive.
fn new() -> Self {
Self(nanoid::nanoid!())
}
}
impl AsRef<Path> for RevisionId {
fn as_ref(&self) -> &Path {
self.0.as_ref()
}
}

View file

@ -264,7 +264,7 @@ impl<'a> Planner<'a> {
wheel.filename,
wheel.url,
archive.hashes,
archive.path,
cache.archive(&archive.id),
);
debug!("URL wheel requirement already cached: {cached_dist}");
@ -306,7 +306,7 @@ impl<'a> Planner<'a> {
wheel.filename,
wheel.url,
archive.hashes,
archive.path,
cache.archive(&archive.id),
);
debug!(