Remove refresh checks from the install plan (#1119)

## Summary

Rather than checking cache freshness in the install plan, it's a lot
simple to have the install plan _never_ return cached data when the
refresh policy is in place, and then rely on the distribution database
to check for freshness. The original implementation didn't support this,
since the distribution database was rebuilding things too often. Now, it
rarely rebuilds (it's much better about this), so it seems conceptually
much simpler to split up the responsibilities like this.
This commit is contained in:
Charlie Marsh 2024-01-25 19:48:16 -08:00 committed by GitHub
parent 50057cd5f2
commit f593b65447
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 57 additions and 93 deletions

View file

@ -160,6 +160,15 @@ impl Cache {
CacheEntry::new(self.bucket(cache_bucket).join(dir), file) CacheEntry::new(self.bucket(cache_bucket).join(dir), file)
} }
/// Returns `true` if a cache entry must be revalidated given the [`Refresh`] policy.
pub fn must_revalidate(&self, package: &PackageName) -> bool {
match &self.refresh {
Refresh::None => false,
Refresh::All(_) => true,
Refresh::Packages(packages, _) => packages.contains(package),
}
}
/// Returns `true` if a cache entry is up-to-date given the [`Refresh`] policy. /// Returns `true` if a cache entry is up-to-date given the [`Refresh`] policy.
pub fn freshness( pub fn freshness(
&self, &self,

View file

@ -1,8 +1,7 @@
use distribution_types::{git_reference, DirectUrlSourceDist, GitSourceDist, Name, PathSourceDist}; use distribution_types::{git_reference, DirectUrlSourceDist, GitSourceDist, Name, PathSourceDist};
use platform_tags::Tags; use platform_tags::Tags;
use puffin_cache::{ArchiveTimestamp, Cache, CacheBucket, CacheShard, Freshness, WheelCache}; use puffin_cache::{ArchiveTimestamp, Cache, CacheBucket, CacheShard, WheelCache};
use puffin_fs::symlinks; use puffin_fs::symlinks;
use puffin_normalize::PackageName;
use crate::index::cached_wheel::CachedWheel; use crate::index::cached_wheel::CachedWheel;
use crate::source::{read_http_manifest, read_timestamp_manifest, MANIFEST}; use crate::source::{read_http_manifest, read_timestamp_manifest, MANIFEST};
@ -34,12 +33,7 @@ impl BuiltWheelIndex {
return Ok(None); return Ok(None);
}; };
Ok(Self::find( Ok(Self::find(&cache_shard.shard(manifest.id()), tags))
&cache_shard.shard(manifest.id()),
source_dist.name(),
cache,
tags,
))
} }
/// Return the most compatible [`CachedWheel`] for a given source distribution at a local path. /// Return the most compatible [`CachedWheel`] for a given source distribution at a local path.
@ -66,12 +60,7 @@ impl BuiltWheelIndex {
return Ok(None); return Ok(None);
}; };
Ok(Self::find( Ok(Self::find(&cache_shard.shard(manifest.id()), tags))
&cache_shard.shard(manifest.id()),
source_dist.name(),
cache,
tags,
))
} }
/// Return the most compatible [`CachedWheel`] for a given source distribution at a git URL. /// Return the most compatible [`CachedWheel`] for a given source distribution at a git URL.
@ -86,7 +75,7 @@ impl BuiltWheelIndex {
.remote_wheel_dir(source_dist.name().as_ref()), .remote_wheel_dir(source_dist.name().as_ref()),
); );
Self::find(&cache_shard, source_dist.name(), cache, tags) Self::find(&cache_shard, tags)
} }
/// Find the "best" distribution in the index for a given source distribution. /// Find the "best" distribution in the index for a given source distribution.
@ -105,12 +94,7 @@ impl BuiltWheelIndex {
/// ``` /// ```
/// ///
/// The `shard` should be `built-wheels-v0/pypi/django-allauth-0.51.0.tar.gz`. /// The `shard` should be `built-wheels-v0/pypi/django-allauth-0.51.0.tar.gz`.
fn find( fn find(shard: &CacheShard, tags: &Tags) -> Option<CachedWheel> {
shard: &CacheShard,
package: &PackageName,
cache: &Cache,
tags: &Tags,
) -> Option<CachedWheel> {
let mut candidate: Option<CachedWheel> = None; let mut candidate: Option<CachedWheel> = None;
// Unzipped wheels are stored as symlinks into the archive directory. // Unzipped wheels are stored as symlinks into the archive directory.
@ -118,15 +102,6 @@ impl BuiltWheelIndex {
match CachedWheel::from_path(&subdir) { match CachedWheel::from_path(&subdir) {
None => {} None => {}
Some(dist_info) => { Some(dist_info) => {
// If the [`Refresh`] policy is set, ignore entries that were created before
// the cutoff.
if cache
.freshness(&dist_info.entry, Some(package))
.is_ok_and(Freshness::is_stale)
{
continue;
}
// Pick the wheel with the highest priority // Pick the wheel with the highest priority
let compatibility = dist_info.filename.compatibility(tags); let compatibility = dist_info.filename.compatibility(tags);

View file

@ -7,7 +7,7 @@ use rustc_hash::FxHashMap;
use distribution_types::{CachedRegistryDist, FlatIndexLocation, IndexLocations, IndexUrl}; use distribution_types::{CachedRegistryDist, FlatIndexLocation, IndexLocations, IndexUrl};
use pep440_rs::Version; use pep440_rs::Version;
use platform_tags::Tags; use platform_tags::Tags;
use puffin_cache::{Cache, CacheBucket, Freshness, WheelCache}; use puffin_cache::{Cache, CacheBucket, WheelCache};
use puffin_fs::{directories, symlinks}; use puffin_fs::{directories, symlinks};
use puffin_normalize::PackageName; use puffin_normalize::PackageName;
@ -94,7 +94,7 @@ impl<'a> RegistryWheelIndex<'a> {
WheelCache::Index(index_url).remote_wheel_dir(package.to_string()), WheelCache::Index(index_url).remote_wheel_dir(package.to_string()),
); );
Self::add_directory(&wheel_dir, package, cache, tags, &mut versions); Self::add_directory(&wheel_dir, tags, &mut versions);
// Index all the built wheels, created by downloading and building source distributions // Index all the built wheels, created by downloading and building source distributions
// from the registry. // from the registry.
@ -109,13 +109,7 @@ impl<'a> RegistryWheelIndex<'a> {
let cache_shard = cache_shard.shard(shard); let cache_shard = cache_shard.shard(shard);
let manifest_entry = cache_shard.entry(MANIFEST); let manifest_entry = cache_shard.entry(MANIFEST);
if let Ok(Some(manifest)) = read_http_manifest(&manifest_entry) { if let Ok(Some(manifest)) = read_http_manifest(&manifest_entry) {
Self::add_directory( Self::add_directory(cache_shard.join(manifest.id()), tags, &mut versions);
cache_shard.join(manifest.id()),
package,
cache,
tags,
&mut versions,
);
}; };
} }
} }
@ -128,8 +122,6 @@ impl<'a> RegistryWheelIndex<'a> {
/// Each subdirectory in the given path is expected to be that of an unzipped wheel. /// Each subdirectory in the given path is expected to be that of an unzipped wheel.
fn add_directory( fn add_directory(
path: impl AsRef<Path>, path: impl AsRef<Path>,
package: &PackageName,
cache: &Cache,
tags: &Tags, tags: &Tags,
versions: &mut BTreeMap<Version, CachedRegistryDist>, versions: &mut BTreeMap<Version, CachedRegistryDist>,
) { ) {
@ -138,13 +130,6 @@ impl<'a> RegistryWheelIndex<'a> {
match CachedWheel::from_path(&wheel_dir) { match CachedWheel::from_path(&wheel_dir) {
None => {} None => {}
Some(dist_info) => { Some(dist_info) => {
if cache
.freshness(&dist_info.entry, Some(package))
.is_ok_and(Freshness::is_stale)
{
continue;
}
let dist_info = dist_info.into_registry_dist(); let dist_info = dist_info.into_registry_dist();
// Pick the wheel with the highest priority // Pick the wheel with the highest priority

View file

@ -12,10 +12,9 @@ use distribution_types::{
}; };
use pep508_rs::{Requirement, VersionOrUrl}; use pep508_rs::{Requirement, VersionOrUrl};
use platform_tags::Tags; use platform_tags::Tags;
use puffin_cache::{ use puffin_cache::{ArchiveTimestamp, Cache, CacheBucket, CacheEntry, Timestamp, WheelCache};
ArchiveTimestamp, Cache, CacheBucket, CacheEntry, Freshness, Timestamp, WheelCache,
};
use puffin_distribution::{BuiltWheelIndex, RegistryWheelIndex}; use puffin_distribution::{BuiltWheelIndex, RegistryWheelIndex};
use puffin_fs::NormalizedDisplay;
use puffin_interpreter::Virtualenv; use puffin_interpreter::Virtualenv;
use puffin_normalize::PackageName; use puffin_normalize::PackageName;
use puffin_traits::NoBinary; use puffin_traits::NoBinary;
@ -186,6 +185,12 @@ impl<'a> Planner<'a> {
} }
} }
if cache.must_revalidate(&requirement.name) {
debug!("Must revalidate requirement: {requirement}");
remote.push(requirement.clone());
continue;
}
// Identify any locally-available distributions that satisfy the requirement. // Identify any locally-available distributions that satisfy the requirement.
match requirement.version_or_url.as_ref() { match requirement.version_or_url.as_ref() {
None => { None => {
@ -247,36 +252,29 @@ impl<'a> Planner<'a> {
) )
.entry(wheel.filename.stem()); .entry(wheel.filename.stem());
if cache match cache_entry.path().canonicalize() {
.freshness(&cache_entry, Some(wheel.name())) Ok(archive) => {
.is_ok_and(Freshness::is_fresh) let cached_dist = CachedDirectUrlDist::from_url(
{ wheel.filename,
match cache_entry.path().canonicalize() { wheel.url,
Ok(archive) => { archive,
let cached_dist = CachedDirectUrlDist::from_url( );
wheel.filename,
wheel.url,
archive,
);
debug!( debug!("URL wheel requirement already cached: {cached_dist}");
"URL wheel requirement already cached: {cached_dist}" local.push(CachedDist::Url(cached_dist));
); continue;
local.push(CachedDist::Url(cached_dist));
continue;
}
Err(err) if err.kind() == io::ErrorKind::NotFound => {
// The cache entry doesn't exist, so it's not fresh.
}
Err(err) => return Err(err.into()),
} }
Err(err) if err.kind() == io::ErrorKind::NotFound => {
// The cache entry doesn't exist, so it's not fresh.
}
Err(err) => return Err(err.into()),
} }
} }
Dist::Built(BuiltDist::Path(wheel)) => { Dist::Built(BuiltDist::Path(wheel)) => {
if !wheel.filename.is_compatible(tags) { if !wheel.filename.is_compatible(tags) {
bail!( bail!(
"A path dependency is incompatible with the current platform: {}", "A path dependency is incompatible with the current platform: {}",
wheel.path.display() wheel.path.normalized_display()
); );
} }
@ -297,28 +295,25 @@ impl<'a> Planner<'a> {
) )
.entry(wheel.filename.stem()); .entry(wheel.filename.stem());
if cache if not_modified_cache(&cache_entry, &wheel.path)? {
.freshness(&cache_entry, Some(wheel.name())) match cache_entry.path().canonicalize() {
.is_ok_and(Freshness::is_fresh) Ok(archive) => {
{ let cached_dist = CachedDirectUrlDist::from_url(
if not_modified_cache(&cache_entry, &wheel.path)? { wheel.filename,
match cache_entry.path().canonicalize() { wheel.url,
Ok(archive) => { archive,
let cached_dist = CachedDirectUrlDist::from_url( );
wheel.filename,
wheel.url,
archive,
);
debug!("URL wheel requirement already cached: {cached_dist}"); debug!(
local.push(CachedDist::Url(cached_dist)); "URL wheel requirement already cached: {cached_dist}"
continue; );
} local.push(CachedDist::Url(cached_dist));
Err(err) if err.kind() == io::ErrorKind::NotFound => { continue;
// The cache entry doesn't exist, so it's not fresh.
}
Err(err) => return Err(err.into()),
} }
Err(err) if err.kind() == io::ErrorKind::NotFound => {
// The cache entry doesn't exist, so it's not fresh.
}
Err(err) => return Err(err.into()),
} }
} }
} }