Create dedicated abstractions for .rev and .http pointers (#2977)

## Summary

This PR formalizes some of the concepts we use in the cache for
"pointers to things".

In the wheel cache, we have files like
`annotated_types-0.6.0-py3-none-any.http`. This represents an unzipped
wheel, cached alongside an HTTP caching policy. We now have a struct for
this to encapsulate the logic: `HttpArchivePointer`.

Similarly, we have files like `annotated_types-0.6.0-py3-none-any.rev`.
This represents an unzipped local wheel, alongside with a timestamp. We
now have a struct for this to encapsulate the logic:
`LocalArchivePointer`.

We have similar structs for source distributions too.
This commit is contained in:
Charlie Marsh 2024-04-10 17:30:27 -04:00 committed by GitHub
parent 006379c50c
commit 5583b90c30
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 249 additions and 169 deletions

View file

@ -71,6 +71,12 @@ impl CacheEntry {
} }
} }
impl AsRef<Path> for CacheEntry {
fn as_ref(&self) -> &Path {
&self.0
}
}
/// A subdirectory within the cache. /// A subdirectory within the cache.
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
pub struct CacheShard(PathBuf); pub struct CacheShard(PathBuf);

View file

@ -16,8 +16,10 @@ use distribution_types::{
}; };
use platform_tags::Tags; use platform_tags::Tags;
use pypi_types::{HashDigest, Metadata23}; use pypi_types::{HashDigest, Metadata23};
use uv_cache::{ArchiveTimestamp, CacheBucket, CacheEntry, CachedByTimestamp, WheelCache}; use uv_cache::{ArchiveTimestamp, CacheBucket, CacheEntry, Timestamp, WheelCache};
use uv_client::{CacheControl, CachedClientError, Connectivity, RegistryClient}; use uv_client::{
CacheControl, CachedClientError, Connectivity, DataWithCachePolicy, RegistryClient,
};
use uv_configuration::{NoBinary, NoBuild}; use uv_configuration::{NoBinary, NoBuild};
use uv_extract::hash::Hasher; use uv_extract::hash::Hasher;
use uv_fs::write_atomic; use uv_fs::write_atomic;
@ -178,7 +180,6 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context>
WheelCache::Index(&wheel.index).wheel_dir(wheel.name().as_ref()), WheelCache::Index(&wheel.index).wheel_dir(wheel.name().as_ref()),
wheel.filename.stem(), wheel.filename.stem(),
); );
return self return self
.load_wheel(path, &wheel.filename, cache_entry, dist, hashes) .load_wheel(path, &wheel.filename, cache_entry, dist, hashes)
.await; .await;
@ -618,11 +619,17 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context>
let modified = ArchiveTimestamp::from_file(path).map_err(Error::CacheRead)?; let modified = ArchiveTimestamp::from_file(path).map_err(Error::CacheRead)?;
// Attempt to read the archive pointer from the cache. // Attempt to read the archive pointer from the cache.
let archive_entry = wheel_entry.with_file(format!("{}.rev", filename.stem())); let pointer_entry = wheel_entry.with_file(format!("{}.rev", filename.stem()));
let archive = read_timestamped_archive(&archive_entry, modified)?; let pointer = LocalArchivePointer::read_from(&pointer_entry)?;
// Extract the archive from the pointer.
let archive = pointer
.filter(|pointer| pointer.is_up_to_date(modified))
.map(LocalArchivePointer::into_archive)
.filter(|archive| archive.has_digests(hashes));
// If the file is already unzipped, and the cache is up-to-date, return it. // If the file is already unzipped, and the cache is up-to-date, return it.
if let Some(archive) = archive.filter(|archive| archive.has_digests(hashes)) { if let Some(archive) = archive {
Ok(LocalWheel { Ok(LocalWheel {
dist: Dist::Built(dist.clone()), dist: Dist::Built(dist.clone()),
archive: archive.path, archive: archive.path,
@ -632,7 +639,13 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context>
} else if hashes.is_none() { } else if hashes.is_none() {
// Otherwise, unzip the wheel. // Otherwise, unzip the wheel.
let archive = Archive::new(self.unzip_wheel(path, wheel_entry.path()).await?, vec![]); let archive = Archive::new(self.unzip_wheel(path, wheel_entry.path()).await?, vec![]);
write_timestamped_archive(&archive_entry, archive.clone(), modified).await?;
// Write the archive pointer to the cache.
let pointer = LocalArchivePointer {
timestamp: modified.timestamp(),
archive: archive.clone(),
};
pointer.write_to(&pointer_entry).await?;
Ok(LocalWheel { Ok(LocalWheel {
dist: Dist::Built(dist.clone()), dist: Dist::Built(dist.clone()),
@ -669,9 +682,15 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context>
let hashes = hashers.into_iter().map(HashDigest::from).collect(); let hashes = hashers.into_iter().map(HashDigest::from).collect();
// Write the archive pointer to the cache. // Create an archive.
let archive = Archive::new(archive, hashes); let archive = Archive::new(archive, hashes);
write_timestamped_archive(&archive_entry, archive.clone(), modified).await?;
// Write the archive pointer to the cache.
let pointer = LocalArchivePointer {
timestamp: modified.timestamp(),
archive: archive.clone(),
};
pointer.write_to(&pointer_entry).await?;
Ok(LocalWheel { Ok(LocalWheel {
dist: Dist::Built(dist.clone()), dist: Dist::Built(dist.clone()),
@ -728,37 +747,67 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context>
} }
} }
/// Write a timestamped archive path to the cache. /// A pointer to an archive in the cache, fetched from an HTTP archive.
async fn write_timestamped_archive( ///
cache_entry: &CacheEntry, /// Encoded with `MsgPack`, and represented on disk by a `.http` file.
data: Archive, #[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
modified: ArchiveTimestamp, pub struct HttpArchivePointer {
) -> Result<(), Error> { archive: Archive,
write_atomic(
cache_entry.path(),
rmp_serde::to_vec(&CachedByTimestamp {
timestamp: modified.timestamp(),
data,
})?,
)
.await
.map_err(Error::CacheWrite)
} }
/// Read an existing timestamped archive path, if it exists and is up-to-date. impl HttpArchivePointer {
pub fn read_timestamped_archive( /// Read an [`HttpArchivePointer`] from the cache.
cache_entry: &CacheEntry, pub fn read_from(path: impl AsRef<Path>) -> Result<Option<Self>, Error> {
modified: ArchiveTimestamp, match fs_err::File::open(path.as_ref()) {
) -> Result<Option<Archive>, Error> { Ok(file) => {
match fs_err::read(cache_entry.path()) { let data = DataWithCachePolicy::from_reader(file)?.data;
Ok(cached) => { let archive = rmp_serde::from_slice::<Archive>(&data)?;
let cached = rmp_serde::from_slice::<CachedByTimestamp<Archive>>(&cached)?; Ok(Some(Self { archive }))
if cached.timestamp == modified.timestamp() {
return Ok(Some(cached.data));
} }
Err(err) if err.kind() == io::ErrorKind::NotFound => Ok(None),
Err(err) => Err(Error::CacheRead(err)),
} }
Err(err) if err.kind() == io::ErrorKind::NotFound => {}
Err(err) => return Err(Error::CacheRead(err)),
} }
Ok(None)
/// Return the [`Archive`] from the pointer.
pub fn into_archive(self) -> Archive {
self.archive
}
}
/// A pointer to an archive in the cache, fetched from a local path.
///
/// Encoded with `MsgPack`, and represented on disk by a `.rev` file.
#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
pub struct LocalArchivePointer {
timestamp: Timestamp,
archive: Archive,
}
impl LocalArchivePointer {
/// Read an [`LocalArchivePointer`] from the cache.
pub fn read_from(path: impl AsRef<Path>) -> Result<Option<Self>, Error> {
match fs_err::read(path) {
Ok(cached) => Ok(Some(rmp_serde::from_slice::<LocalArchivePointer>(&cached)?)),
Err(err) if err.kind() == io::ErrorKind::NotFound => Ok(None),
Err(err) => Err(Error::CacheRead(err)),
}
}
/// Write an [`LocalArchivePointer`] to the cache.
pub async fn write_to(&self, entry: &CacheEntry) -> Result<(), Error> {
write_atomic(entry.path(), rmp_serde::to_vec(&self)?)
.await
.map_err(Error::CacheWrite)
}
/// Returns `true` if the archive is up-to-date with the given modified timestamp.
pub fn is_up_to_date(&self, modified: ArchiveTimestamp) -> bool {
self.timestamp == modified.timestamp()
}
/// Return the [`Archive`] from the pointer.
pub fn into_archive(self) -> Archive {
self.archive
}
} }

View file

@ -7,7 +7,7 @@ use uv_fs::symlinks;
use uv_types::HashStrategy; use uv_types::HashStrategy;
use crate::index::cached_wheel::CachedWheel; use crate::index::cached_wheel::CachedWheel;
use crate::source::{read_http_revision, read_timestamped_revision, REVISION}; use crate::source::{HttpRevisionPointer, LocalRevisionPointer, HTTP_REVISION, LOCAL_REVISION};
use crate::Error; use crate::Error;
/// A local index of built distributions for a specific source distribution. /// A local index of built distributions for a specific source distribution.
@ -40,12 +40,13 @@ impl<'a> BuiltWheelIndex<'a> {
); );
// Read the revision from the cache. // Read the revision from the cache.
let revision_entry = cache_shard.entry(REVISION); let Some(pointer) = HttpRevisionPointer::read_from(cache_shard.entry(HTTP_REVISION))?
let Some(revision) = read_http_revision(&revision_entry)? else { else {
return Ok(None); return Ok(None);
}; };
// Enforce hash-checking by omitting any wheels that don't satisfy the required hashes. // Enforce hash-checking by omitting any wheels that don't satisfy the required hashes.
let revision = pointer.into_revision();
if !revision.satisfies(self.hasher.get(&source_dist.name)) { if !revision.satisfies(self.hasher.get(&source_dist.name)) {
return Ok(None); return Ok(None);
} }
@ -60,6 +61,12 @@ impl<'a> BuiltWheelIndex<'a> {
WheelCache::Path(&source_dist.url).root(), WheelCache::Path(&source_dist.url).root(),
); );
// Read the revision from the cache.
let Some(pointer) = LocalRevisionPointer::read_from(cache_shard.entry(LOCAL_REVISION))?
else {
return Ok(None);
};
// Determine the last-modified time of the source distribution. // Determine the last-modified time of the source distribution.
let Some(modified) = let Some(modified) =
ArchiveTimestamp::from_path(&source_dist.path).map_err(Error::CacheRead)? ArchiveTimestamp::from_path(&source_dist.path).map_err(Error::CacheRead)?
@ -67,13 +74,13 @@ impl<'a> BuiltWheelIndex<'a> {
return Err(Error::DirWithoutEntrypoint); return Err(Error::DirWithoutEntrypoint);
}; };
// Read the revision from the cache. // If the distribution is stale, omit it from the index.
let revision_entry = cache_shard.entry(REVISION); if !pointer.is_up_to_date(modified) {
let Some(revision) = read_timestamped_revision(&revision_entry, modified)? else {
return Ok(None); return Ok(None);
}; }
// Enforce hash-checking by omitting any wheels that don't satisfy the required hashes. // Enforce hash-checking by omitting any wheels that don't satisfy the required hashes.
let revision = pointer.into_revision();
if !revision.satisfies(self.hasher.get(&source_dist.name)) { if !revision.satisfies(self.hasher.get(&source_dist.name)) {
return Ok(None); return Ok(None);
} }

View file

@ -4,10 +4,9 @@ use distribution_filename::WheelFilename;
use distribution_types::{CachedDirectUrlDist, CachedRegistryDist, Hashed}; use distribution_types::{CachedDirectUrlDist, CachedRegistryDist, Hashed};
use pep508_rs::VerbatimUrl; use pep508_rs::VerbatimUrl;
use pypi_types::HashDigest; use pypi_types::HashDigest;
use uv_cache::{CacheEntry, CachedByTimestamp}; use uv_cache::CacheEntry;
use uv_client::DataWithCachePolicy;
use crate::archive::Archive; use crate::{HttpArchivePointer, LocalArchivePointer};
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
pub struct CachedWheel { pub struct CachedWheel {
@ -61,9 +60,8 @@ impl CachedWheel {
let filename = WheelFilename::from_stem(filename).ok()?; let filename = WheelFilename::from_stem(filename).ok()?;
// Read the pointer. // Read the pointer.
let file = fs_err::File::open(path).ok()?; let pointer = HttpArchivePointer::read_from(path).ok()??;
let data = DataWithCachePolicy::from_reader(file).ok()?.data; let archive = pointer.into_archive();
let archive = rmp_serde::from_slice::<Archive>(&data).ok()?;
// Convert to a cached wheel. // Convert to a cached wheel.
let entry = CacheEntry::from_path(archive.path); let entry = CacheEntry::from_path(archive.path);
@ -76,16 +74,14 @@ impl CachedWheel {
} }
/// Read a cached wheel from a `.rev` pointer (e.g., `anyio-4.0.0-py3-none-any.rev`). /// Read a cached wheel from a `.rev` pointer (e.g., `anyio-4.0.0-py3-none-any.rev`).
pub fn from_revision_pointer(path: &Path) -> Option<Self> { pub fn from_local_pointer(path: &Path) -> Option<Self> {
// Determine the wheel filename. // Determine the wheel filename.
let filename = path.file_name()?.to_str()?; let filename = path.file_name()?.to_str()?;
let filename = WheelFilename::from_stem(filename).ok()?; let filename = WheelFilename::from_stem(filename).ok()?;
// Read the pointer. // Read the pointer.
let cached = fs_err::read(path).ok()?; let pointer = LocalArchivePointer::read_from(path).ok()??;
let archive = rmp_serde::from_slice::<CachedByTimestamp<Archive>>(&cached) let archive = pointer.into_archive();
.ok()?
.data;
// Convert to a cached wheel. // Convert to a cached wheel.
let entry = CacheEntry::from_path(archive.path); let entry = CacheEntry::from_path(archive.path);

View file

@ -13,7 +13,7 @@ use uv_normalize::PackageName;
use uv_types::HashStrategy; use uv_types::HashStrategy;
use crate::index::cached_wheel::CachedWheel; use crate::index::cached_wheel::CachedWheel;
use crate::source::{read_http_revision, REVISION}; use crate::source::{HttpRevisionPointer, HTTP_REVISION};
/// A local index of distributions that originate from a registry, like `PyPI`. /// A local index of distributions that originate from a registry, like `PyPI`.
#[derive(Debug)] #[derive(Debug)]
@ -128,8 +128,7 @@ impl<'a> RegistryWheelIndex<'a> {
.extension() .extension()
.is_some_and(|ext| ext.eq_ignore_ascii_case("rev")) .is_some_and(|ext| ext.eq_ignore_ascii_case("rev"))
{ {
if let Some(wheel) = CachedWheel::from_revision_pointer(&wheel_dir.join(&file)) if let Some(wheel) = CachedWheel::from_local_pointer(&wheel_dir.join(&file)) {
{
// Enforce hash-checking based on the built distribution. // Enforce hash-checking based on the built distribution.
if wheel.satisfies(hasher.get(package)) { if wheel.satisfies(hasher.get(package)) {
Self::add_wheel(wheel, tags, &mut versions); Self::add_wheel(wheel, tags, &mut versions);
@ -149,9 +148,10 @@ impl<'a> RegistryWheelIndex<'a> {
for shard in directories(&cache_shard) { for shard in directories(&cache_shard) {
// Read the existing metadata from the cache, if it exists. // Read the existing metadata from the cache, if it exists.
let cache_shard = cache_shard.shard(shard); let cache_shard = cache_shard.shard(shard);
let revision_entry = cache_shard.entry(REVISION); let revision_entry = cache_shard.entry(HTTP_REVISION);
if let Ok(Some(revision)) = read_http_revision(&revision_entry) { if let Ok(Some(pointer)) = HttpRevisionPointer::read_from(&revision_entry) {
// Enforce hash-checking based on the source distribution. // Enforce hash-checking based on the source distribution.
let revision = pointer.into_revision();
if revision.satisfies(hasher.get(package)) { if revision.satisfies(hasher.get(package)) {
for wheel_dir in symlinks(cache_shard.join(revision.id())) { for wheel_dir in symlinks(cache_shard.join(revision.id())) {
if let Some(wheel) = CachedWheel::from_built_source(&wheel_dir) { if let Some(wheel) = CachedWheel::from_built_source(&wheel_dir) {

View file

@ -1,5 +1,5 @@
pub use archive::Archive; pub use archive::Archive;
pub use distribution_database::{read_timestamped_archive, DistributionDatabase}; pub use distribution_database::{DistributionDatabase, HttpArchivePointer, LocalArchivePointer};
pub use download::LocalWheel; pub use download::LocalWheel;
pub use error::Error; pub use error::Error;
pub use git::{is_same_reference, to_precise}; pub use git::{is_same_reference, to_precise};

View file

@ -23,7 +23,8 @@ use install_wheel_rs::metadata::read_archive_metadata;
use platform_tags::Tags; use platform_tags::Tags;
use pypi_types::{HashDigest, Metadata23}; use pypi_types::{HashDigest, Metadata23};
use uv_cache::{ use uv_cache::{
ArchiveTimestamp, CacheBucket, CacheEntry, CacheShard, CachedByTimestamp, Freshness, WheelCache, ArchiveTimestamp, CacheBucket, CacheEntry, CacheShard, CachedByTimestamp, Freshness, Timestamp,
WheelCache,
}; };
use uv_client::{ use uv_client::{
CacheControl, CachedClientError, Connectivity, DataWithCachePolicy, RegistryClient, CacheControl, CachedClientError, Connectivity, DataWithCachePolicy, RegistryClient,
@ -49,8 +50,11 @@ pub struct SourceDistributionBuilder<'a, T: BuildContext> {
reporter: Option<Arc<dyn Reporter>>, reporter: Option<Arc<dyn Reporter>>,
} }
/// The name of the file that contains the revision ID, encoded via `MsgPack`. /// The name of the file that contains the revision ID for a remote distribution, encoded via `MsgPack`.
pub(crate) const REVISION: &str = "revision.msgpack"; pub(crate) const HTTP_REVISION: &str = "revision.http";
/// The name of the file that contains the revision ID for a local distribution, encoded via `MsgPack`.
pub(crate) const LOCAL_REVISION: &str = "revision.rev";
/// The name of the file that contains the cached distribution metadata, encoded via `MsgPack`. /// The name of the file that contains the cached distribution metadata, encoded via `MsgPack`.
pub(crate) const METADATA: &str = "metadata.msgpack"; pub(crate) const METADATA: &str = "metadata.msgpack";
@ -509,7 +513,7 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
cache_shard: &CacheShard, cache_shard: &CacheShard,
hashes: HashPolicy<'_>, hashes: HashPolicy<'_>,
) -> Result<Revision, Error> { ) -> Result<Revision, Error> {
let cache_entry = cache_shard.entry(REVISION); let cache_entry = cache_shard.entry(HTTP_REVISION);
let cache_control = match self.client.connectivity() { let cache_control = match self.client.connectivity() {
Connectivity::Online => CacheControl::from( Connectivity::Online => CacheControl::from(
self.build_context self.build_context
@ -738,13 +742,16 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
let modified = ArchiveTimestamp::from_file(&resource.path).map_err(Error::CacheRead)?; let modified = ArchiveTimestamp::from_file(&resource.path).map_err(Error::CacheRead)?;
// Read the existing metadata from the cache. // Read the existing metadata from the cache.
let revision_entry = cache_shard.entry(REVISION); let revision_entry = cache_shard.entry(LOCAL_REVISION);
// If the revision already exists, return it. There's no need to check for freshness, since // If the revision already exists, return it. There's no need to check for freshness, since
// we use an exact timestamp. // we use an exact timestamp.
if let Some(revision) = read_timestamped_revision(&revision_entry, modified)? { if let Some(pointer) = LocalRevisionPointer::read_from(&revision_entry)? {
if revision.has_digests(hashes) { if pointer.is_up_to_date(modified) {
return Ok(revision); let revision = pointer.into_revision();
if revision.has_digests(hashes) {
return Ok(revision);
}
} }
} }
@ -929,14 +936,31 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
}; };
// Read the existing metadata from the cache. // Read the existing metadata from the cache.
let revision_entry = cache_shard.entry(REVISION); let entry = cache_shard.entry(LOCAL_REVISION);
let revision_freshness = self let freshness = self
.build_context .build_context
.cache() .cache()
.freshness(&revision_entry, source.name()) .freshness(&entry, source.name())
.map_err(Error::CacheRead)?; .map_err(Error::CacheRead)?;
refresh_timestamped_revision(&revision_entry, revision_freshness, modified).await // If the revision is fresh, return it.
if freshness.is_fresh() {
if let Some(pointer) = LocalRevisionPointer::read_from(&entry)? {
if pointer.timestamp == modified.timestamp() {
return Ok(pointer.into_revision());
}
}
}
// Otherwise, we need to create a new revision.
let revision = Revision::new();
let pointer = LocalRevisionPointer {
timestamp: modified.timestamp(),
revision: revision.clone(),
};
pointer.write_to(&entry).await?;
Ok(revision)
} }
/// Build a source distribution from a Git repository. /// Build a source distribution from a Git repository.
@ -1418,37 +1442,74 @@ fn validate(source: &BuildableSource<'_>, metadata: &Metadata23) -> Result<(), E
Ok(()) Ok(())
} }
/// Read an existing HTTP-cached [`Revision`], if it exists. /// A pointer to a source distribution revision in the cache, fetched from an HTTP archive.
pub(crate) fn read_http_revision(cache_entry: &CacheEntry) -> Result<Option<Revision>, Error> { ///
match fs_err::File::open(cache_entry.path()) { /// Encoded with `MsgPack`, and represented on disk by a `.http` file.
Ok(file) => { #[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
let data = DataWithCachePolicy::from_reader(file)?.data; pub(crate) struct HttpRevisionPointer {
Ok(Some(rmp_serde::from_slice::<Revision>(&data)?)) revision: Revision,
}
impl HttpRevisionPointer {
/// Read an [`HttpRevisionPointer`] from the cache.
pub(crate) fn read_from(path: impl AsRef<Path>) -> Result<Option<Self>, Error> {
match fs_err::File::open(path.as_ref()) {
Ok(file) => {
let data = DataWithCachePolicy::from_reader(file)?.data;
let revision = rmp_serde::from_slice::<Revision>(&data)?;
Ok(Some(Self { revision }))
}
Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(None),
Err(err) => Err(Error::CacheRead(err)),
} }
Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(None), }
Err(err) => Err(Error::CacheRead(err)),
/// Return the [`Revision`] from the pointer.
pub(crate) fn into_revision(self) -> Revision {
self.revision
} }
} }
/// Read an existing timestamped [`Revision`], if it exists and is up-to-date. /// A pointer to a source distribution revision in the cache, fetched from a local path.
/// ///
/// If the cache entry is stale, a new entry will be created. /// Encoded with `MsgPack`, and represented on disk by a `.rev` file.
pub(crate) fn read_timestamped_revision( #[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
cache_entry: &CacheEntry, pub(crate) struct LocalRevisionPointer {
modified: ArchiveTimestamp, timestamp: Timestamp,
) -> Result<Option<Revision>, Error> { revision: Revision,
// If the cache entry is up-to-date, return it. }
match fs_err::read(cache_entry.path()) {
Ok(cached) => { impl LocalRevisionPointer {
let cached = rmp_serde::from_slice::<CachedByTimestamp<Revision>>(&cached)?; /// Read an [`LocalRevisionPointer`] from the cache.
if cached.timestamp == modified.timestamp() { pub(crate) fn read_from(path: impl AsRef<Path>) -> Result<Option<Self>, Error> {
return Ok(Some(cached.data)); match fs_err::read(path) {
} Ok(cached) => Ok(Some(rmp_serde::from_slice::<LocalRevisionPointer>(
&cached,
)?)),
Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(None),
Err(err) => Err(Error::CacheRead(err)),
} }
Err(err) if err.kind() == std::io::ErrorKind::NotFound => {}
Err(err) => return Err(Error::CacheRead(err)),
} }
Ok(None)
/// Write an [`LocalRevisionPointer`] to the cache.
async fn write_to(&self, entry: &CacheEntry) -> Result<(), Error> {
fs::create_dir_all(&entry.dir())
.await
.map_err(Error::CacheWrite)?;
write_atomic(entry.path(), rmp_serde::to_vec(&self)?)
.await
.map_err(Error::CacheWrite)
}
/// Returns `true` if the revision is up-to-date with the given modified timestamp.
pub(crate) fn is_up_to_date(&self, modified: ArchiveTimestamp) -> bool {
self.timestamp == modified.timestamp()
}
/// Return the [`Revision`] from the pointer.
pub(crate) fn into_revision(self) -> Revision {
self.revision
}
} }
/// Read the [`Metadata23`] from a source distribution's `PKG-INFO` file, if it uses Metadata 2.2 /// Read the [`Metadata23`] from a source distribution's `PKG-INFO` file, if it uses Metadata 2.2
@ -1503,38 +1564,6 @@ async fn read_pyproject_toml(
Ok(metadata) Ok(metadata)
} }
/// Read an existing timestamped [`Manifest`], if it exists and is up-to-date.
///
/// If the cache entry is stale, a new entry will be created.
async fn refresh_timestamped_revision(
cache_entry: &CacheEntry,
freshness: Freshness,
modified: ArchiveTimestamp,
) -> Result<Revision, Error> {
// If we know the exact modification time, we don't need to force a revalidate.
if matches!(modified, ArchiveTimestamp::Exact(_)) || freshness.is_fresh() {
if let Some(revision) = read_timestamped_revision(cache_entry, modified)? {
return Ok(revision);
}
}
// Otherwise, create a new revision.
let revision = Revision::new();
fs::create_dir_all(&cache_entry.dir())
.await
.map_err(Error::CacheWrite)?;
write_atomic(
cache_entry.path(),
rmp_serde::to_vec(&CachedByTimestamp {
timestamp: modified.timestamp(),
data: revision.clone(),
})?,
)
.await
.map_err(Error::CacheWrite)?;
Ok(revision)
}
/// Read an existing cached [`Metadata23`], if it exists. /// Read an existing cached [`Metadata23`], if it exists.
async fn read_cached_metadata(cache_entry: &CacheEntry) -> Result<Option<Metadata23>, Error> { async fn read_cached_metadata(cache_entry: &CacheEntry) -> Result<Option<Metadata23>, Error> {
match fs::read(&cache_entry.path()).await { match fs::read(&cache_entry.path()).await {

View file

@ -1,6 +1,5 @@
use std::collections::hash_map::Entry; use std::collections::hash_map::Entry;
use std::hash::BuildHasherDefault; use std::hash::BuildHasherDefault;
use std::io;
use anyhow::{bail, Result}; use anyhow::{bail, Result};
use rustc_hash::FxHashMap; use rustc_hash::FxHashMap;
@ -14,9 +13,11 @@ use distribution_types::{
use pep508_rs::{Requirement, VersionOrUrl}; use pep508_rs::{Requirement, VersionOrUrl};
use platform_tags::Tags; use platform_tags::Tags;
use uv_cache::{ArchiveTarget, ArchiveTimestamp, Cache, CacheBucket, WheelCache}; use uv_cache::{ArchiveTarget, ArchiveTimestamp, Cache, CacheBucket, WheelCache};
use uv_client::DataWithCachePolicy;
use uv_configuration::{NoBinary, Reinstall}; use uv_configuration::{NoBinary, Reinstall};
use uv_distribution::{read_timestamped_archive, Archive, BuiltWheelIndex, RegistryWheelIndex}; use uv_distribution::{
BuiltWheelIndex, HttpArchivePointer, LocalArchivePointer, RegistryWheelIndex,
};
use uv_fs::Simplified; use uv_fs::Simplified;
use uv_interpreter::PythonEnvironment; use uv_interpreter::PythonEnvironment;
use uv_types::HashStrategy; use uv_types::HashStrategy;
@ -256,31 +257,20 @@ impl<'a> Planner<'a> {
.entry(format!("{}.http", wheel.filename.stem())); .entry(format!("{}.http", wheel.filename.stem()));
// Read the HTTP pointer. // Read the HTTP pointer.
match fs_err::File::open(cache_entry.path()) { if let Some(pointer) = HttpArchivePointer::read_from(&cache_entry)? {
Ok(file) => { let archive = pointer.into_archive();
let data = DataWithCachePolicy::from_reader(file)?.data; if archive.satisfies(hasher.get(&requirement.name)) {
let archive = rmp_serde::from_slice::<Archive>(&data)?; let cached_dist = CachedDirectUrlDist::from_url(
wheel.filename,
wheel.url,
archive.hashes,
archive.path,
);
// Enforce hash checking. debug!("URL wheel requirement already cached: {cached_dist}");
if archive.satisfies(hasher.get(&requirement.name)) { cached.push(CachedDist::Url(cached_dist));
let cached_dist = CachedDirectUrlDist::from_url( continue;
wheel.filename,
wheel.url,
archive.hashes,
archive.path,
);
debug!(
"URL wheel requirement already cached: {cached_dist}"
);
cached.push(CachedDist::Url(cached_dist));
continue;
}
} }
Err(err) if err.kind() == io::ErrorKind::NotFound => {
// The cache entry doesn't exist, so it's not fresh.
}
Err(err) => return Err(err.into()),
} }
} }
Dist::Built(BuiltDist::Path(wheel)) => { Dist::Built(BuiltDist::Path(wheel)) => {
@ -307,21 +297,24 @@ impl<'a> Planner<'a> {
) )
.entry(format!("{}.rev", wheel.filename.stem())); .entry(format!("{}.rev", wheel.filename.stem()));
if let Some(archive) = read_timestamped_archive( if let Some(pointer) = LocalArchivePointer::read_from(&cache_entry)? {
&cache_entry, let timestamp = ArchiveTimestamp::from_file(&wheel.path)?;
ArchiveTimestamp::from_file(&wheel.path)?, if pointer.is_up_to_date(timestamp) {
)? { let archive = pointer.into_archive();
if archive.satisfies(hasher.get(&requirement.name)) { if archive.satisfies(hasher.get(&requirement.name)) {
let cached_dist = CachedDirectUrlDist::from_url( let cached_dist = CachedDirectUrlDist::from_url(
wheel.filename, wheel.filename,
wheel.url, wheel.url,
archive.hashes, archive.hashes,
archive.path, archive.path,
); );
debug!("Path wheel requirement already cached: {cached_dist}"); debug!(
cached.push(CachedDist::Url(cached_dist)); "Path wheel requirement already cached: {cached_dist}"
continue; );
cached.push(CachedDist::Url(cached_dist));
continue;
}
} }
} }
} }