Add --refresh behavior to the cache (#1057)

## Summary

This PR is an alternative approach to #949 which should be much safer.
As in #949, we add a `Refresh` policy to the cache. However, instead of
deleting entries from the cache the first time we read them, we now
check if the entry is sufficiently new (created after the start of the
command) if the refresh policy applies. If the entry is stale, then we
avoid reading it and continue onward, relying on the cache to
appropriately overwrite based on "new" data. (This relies on the
preceding PRs, which ensure the cache is append-only, and ensure that we
can atomically overwrite.)

Unfortunately, there are just a lot of paths through the cache, and
didn't data is handled with different policies, so I really had to go
through and consider the "right" behavior for each case. For example,
the HTTP requests can use `max-age=0, must-revalidate`. But for the
routes that are based on filesystem modification, we need to do
something slightly different.

Closes #945.
This commit is contained in:
Charlie Marsh 2024-01-23 18:30:26 -05:00 committed by GitHub
parent cf8b452414
commit 1b3a3f4e80
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
16 changed files with 657 additions and 184 deletions

View file

@ -99,6 +99,8 @@ impl Deref for CacheShard {
pub struct Cache {
/// The cache directory.
root: PathBuf,
/// The refresh strategy to use when reading from the cache.
refresh: Refresh,
/// A temporary cache directory, if the user requested `--no-cache`.
///
/// Included to ensure that the temporary directory exists for the length of the operation, but
@ -111,6 +113,7 @@ impl Cache {
pub fn from_path(root: impl Into<PathBuf>) -> Result<Self, io::Error> {
Ok(Self {
root: Self::init(root)?,
refresh: Refresh::None,
_temp_dir_drop: None,
})
}
@ -120,10 +123,17 @@ impl Cache {
let temp_dir = tempdir()?;
Ok(Self {
root: Self::init(temp_dir.path())?,
refresh: Refresh::None,
_temp_dir_drop: Some(Arc::new(temp_dir)),
})
}
/// Set the [`Refresh`] policy for the cache.
#[must_use]
pub fn with_refresh(self, refresh: Refresh) -> Self {
Self { refresh, ..self }
}
/// Return the root of the cache.
pub fn root(&self) -> &Path {
&self.root
@ -149,13 +159,42 @@ impl Cache {
CacheEntry::new(self.bucket(cache_bucket).join(dir), file)
}
/// Persist a temporary directory to the artifact store.
pub fn persist(
/// Returns `true` if a cache entry is up-to-date given the [`Refresh`] policy.
pub fn freshness(
&self,
temp_dir: impl AsRef<Path>,
path: impl AsRef<Path>,
) -> Result<(), io::Error> {
entry: &CacheEntry,
package: Option<&PackageName>,
) -> io::Result<Freshness> {
// Grab the cutoff timestamp, if it's relevant.
let timestamp = match &self.refresh {
Refresh::None => return Ok(Freshness::Fresh),
Refresh::All(timestamp) => timestamp,
Refresh::Packages(packages, timestamp) => {
if package.map_or(true, |package| packages.contains(package)) {
timestamp
} else {
return Ok(Freshness::Fresh);
}
}
};
match fs::metadata(entry.path()) {
Ok(metadata) => {
if metadata.modified()? >= *timestamp {
Ok(Freshness::Fresh)
} else {
Ok(Freshness::Stale)
}
}
Err(err) if err.kind() == io::ErrorKind::NotFound => Ok(Freshness::Missing),
Err(err) => Err(err),
}
}
/// Persist a temporary directory to the artifact store.
pub fn persist(&self, temp_dir: impl AsRef<Path>, path: impl AsRef<Path>) -> io::Result<()> {
// Create a unique ID for the artifact.
// TODO(charlie): Support content-addressed persistence via SHAs.
let id = uuid::Uuid::new_v4();
// Move the temporary directory into the directory store.
@ -589,33 +628,103 @@ impl Display for CacheBucket {
}
}
/// Return the modification timestamp for an archive, which could be a file (like a wheel or a zip
/// archive) or a directory containing a Python package.
///
/// If the path is to a directory with no entrypoint (i.e., no `pyproject.toml` or `setup.py`),
/// returns `None`.
pub fn archive_mtime(path: &Path) -> Result<Option<SystemTime>, io::Error> {
let metadata = fs_err::metadata(path)?;
if metadata.is_file() {
// `modified()` is infallible on Windows and Unix (i.e., all platforms we support).
Ok(Some(metadata.modified()?))
} else {
if let Some(metadata) = path
.join("pyproject.toml")
.metadata()
.ok()
.filter(std::fs::Metadata::is_file)
{
Ok(Some(metadata.modified()?))
} else if let Some(metadata) = path
.join("setup.py")
.metadata()
.ok()
.filter(std::fs::Metadata::is_file)
{
Ok(Some(metadata.modified()?))
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum ArchiveTimestamp {
/// The archive consists of a single file with the given modification time.
Exact(SystemTime),
/// The archive consists of a directory. The modification time is the latest modification time
/// of the `pyproject.toml` or `setup.py` file in the directory.
Approximate(SystemTime),
}
impl ArchiveTimestamp {
/// Return the modification timestamp for an archive, which could be a file (like a wheel or a zip
/// archive) or a directory containing a Python package.
///
/// If the path is to a directory with no entrypoint (i.e., no `pyproject.toml` or `setup.py`),
/// returns `None`.
pub fn from_path(path: impl AsRef<Path>) -> Result<Option<Self>, io::Error> {
let metadata = fs_err::metadata(path.as_ref())?;
if metadata.is_file() {
// `modified()` is infallible on Windows and Unix (i.e., all platforms we support).
Ok(Some(Self::Exact(metadata.modified()?)))
} else {
Ok(None)
if let Some(metadata) = path
.as_ref()
.join("pyproject.toml")
.metadata()
.ok()
.filter(std::fs::Metadata::is_file)
{
Ok(Some(Self::Approximate(metadata.modified()?)))
} else if let Some(metadata) = path
.as_ref()
.join("setup.py")
.metadata()
.ok()
.filter(std::fs::Metadata::is_file)
{
Ok(Some(Self::Approximate(metadata.modified()?)))
} else {
Ok(None)
}
}
}
/// Return the modification timestamp for an archive.
pub fn timestamp(&self) -> SystemTime {
match self {
Self::Exact(timestamp) => *timestamp,
Self::Approximate(timestamp) => *timestamp,
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Freshness {
/// The cache entry is fresh according to the [`Refresh`] policy.
Fresh,
/// The cache entry is stale according to the [`Refresh`] policy.
Stale,
/// The cache entry does not exist.
Missing,
}
impl Freshness {
pub const fn is_fresh(self) -> bool {
matches!(self, Self::Fresh)
}
pub const fn is_stale(self) -> bool {
matches!(self, Self::Stale)
}
}
/// A refresh policy for cache entries.
#[derive(Debug, Clone)]
pub enum Refresh {
/// Don't refresh any entries.
None,
/// Refresh entries linked to the given packages, if created before the given timestamp.
Packages(Vec<PackageName>, SystemTime),
/// Refresh all entries created before the given timestamp.
All(SystemTime),
}
impl Refresh {
/// Determine the refresh strategy to use based on the command-line arguments.
pub fn from_args(refresh: bool, refresh_package: Vec<PackageName>) -> Self {
if refresh {
Self::All(SystemTime::now())
} else if !refresh_package.is_empty() {
Self::Packages(refresh_package, SystemTime::now())
} else {
Self::None
}
}
/// Returns `true` if no packages should be reinstalled.
pub fn is_none(&self) -> bool {
matches!(self, Self::None)
}
}

View file

@ -9,7 +9,7 @@ use serde::de::DeserializeOwned;
use serde::{Deserialize, Serialize};
use tracing::{debug, info_span, instrument, trace, warn, Instrument};
use puffin_cache::CacheEntry;
use puffin_cache::{CacheEntry, Freshness};
use puffin_fs::write_atomic;
use crate::cache_headers::CacheHeaders;
@ -104,6 +104,7 @@ impl CachedClient {
&self,
req: Request,
cache_entry: &CacheEntry,
cache_control: CacheControl,
response_callback: Callback,
) -> Result<Payload, CachedClientError<CallBackError>>
where
@ -136,7 +137,7 @@ impl CachedClient {
None
};
let cached_response = self.send_cached(req, cached).boxed().await?;
let cached_response = self.send_cached(req, cache_control, cached).boxed().await?;
let write_cache = info_span!("write_cache", file = %cache_entry.path().display());
match cached_response {
@ -190,6 +191,7 @@ impl CachedClient {
async fn send_cached<T: Serialize + DeserializeOwned>(
&self,
mut req: Request,
cache_control: CacheControl,
cached: Option<DataWithCachePolicy<T>>,
) -> Result<CachedResponse<T>, crate::Error> {
// The converted types are from the specific `reqwest` types to the more generic `http`
@ -198,6 +200,7 @@ impl CachedClient {
req.try_clone()
.expect("You can't use streaming request bodies with this function"),
)?;
let url = req.url().clone();
let cached_response = if let Some(cached) = cached {
// Avoid sending revalidation requests for immutable responses.
@ -206,6 +209,17 @@ impl CachedClient {
return Ok(CachedResponse::FreshCache(cached.data));
}
// Apply the cache control header, if necessary.
match cache_control {
CacheControl::None => {}
CacheControl::MustRevalidate => {
converted_req.headers_mut().insert(
http::header::CACHE_CONTROL,
http::HeaderValue::from_static("max-age=0, must-revalidate"),
);
}
}
match cached
.cache_policy
.before_request(&converted_req, SystemTime::now())
@ -300,3 +314,21 @@ impl CachedClient {
))
}
}
#[derive(Debug, Clone, Copy)]
pub enum CacheControl {
/// Respect the `cache-control` header from the response.
None,
/// Apply `max-age=0, must-revalidate` to the request.
MustRevalidate,
}
impl From<Freshness> for CacheControl {
fn from(value: Freshness) -> Self {
match value {
Freshness::Fresh => CacheControl::None,
Freshness::Stale => CacheControl::MustRevalidate,
Freshness::Missing => CacheControl::None,
}
}
}

View file

@ -19,6 +19,7 @@ use puffin_cache::{Cache, CacheBucket};
use puffin_normalize::PackageName;
use pypi_types::Hashes;
use crate::cached_client::CacheControl;
use crate::html::SimpleHtml;
use crate::{Error, RegistryClient};
@ -91,6 +92,8 @@ impl<'a> FlatIndexClient<'a> {
"html",
format!("{}.msgpack", cache_key::digest(&url.to_string())),
);
let cache_control = CacheControl::from(self.cache.freshness(&cache_entry, None)?);
let cached_client = self.client.cached_client();
let flat_index_request = cached_client
@ -124,7 +127,12 @@ impl<'a> FlatIndexClient<'a> {
.instrument(info_span!("parse_flat_index_html", url = % url))
};
let files = cached_client
.get_cached_with_callback(flat_index_request, &cache_entry, parse_simple_response)
.get_cached_with_callback(
flat_index_request,
&cache_entry,
cache_control,
parse_simple_response,
)
.await?;
Ok(files
.into_iter()

View file

@ -1,4 +1,4 @@
pub use cached_client::{CachedClient, CachedClientError, DataWithCachePolicy};
pub use cached_client::{CacheControl, CachedClient, CachedClientError, DataWithCachePolicy};
pub use error::Error;
pub use flat_index::{FlatDistributions, FlatIndex, FlatIndexClient, FlatIndexError};
pub use registry_client::{

View file

@ -24,6 +24,7 @@ use puffin_cache::{Cache, CacheBucket, WheelCache};
use puffin_normalize::PackageName;
use pypi_types::{BaseUrl, Metadata21, SimpleJson};
use crate::cached_client::CacheControl;
use crate::html::SimpleHtml;
use crate::remote_metadata::wheel_metadata_from_remote_zip;
use crate::{CachedClient, CachedClientError, Error};
@ -166,6 +167,8 @@ impl RegistryClient {
}),
format!("{package_name}.msgpack"),
);
let cache_control =
CacheControl::from(self.cache.freshness(&cache_entry, Some(package_name))?);
let simple_request = self
.client
@ -211,7 +214,12 @@ impl RegistryClient {
};
let result = self
.client
.get_cached_with_callback(simple_request, &cache_entry, parse_simple_response)
.get_cached_with_callback(
simple_request,
&cache_entry,
cache_control,
parse_simple_response,
)
.await;
Ok(result)
}
@ -286,6 +294,8 @@ impl RegistryClient {
WheelCache::Index(index).remote_wheel_dir(filename.name.as_ref()),
format!("{}.msgpack", filename.stem()),
);
let cache_control =
CacheControl::from(self.cache.freshness(&cache_entry, Some(&filename.name))?);
let response_callback = |response: Response| async {
let bytes = response.bytes().await?;
@ -299,7 +309,7 @@ impl RegistryClient {
let req = self.client.uncached().get(url.clone()).build()?;
Ok(self
.client
.get_cached_with_callback(req, &cache_entry, response_callback)
.get_cached_with_callback(req, &cache_entry, cache_control, response_callback)
.await?)
} else {
// If we lack PEP 658 support, try using HTTP range requests to read only the
@ -322,6 +332,8 @@ impl RegistryClient {
cache_shard.remote_wheel_dir(filename.name.as_ref()),
format!("{}.msgpack", filename.stem()),
);
let cache_control =
CacheControl::from(self.cache.freshness(&cache_entry, Some(&filename.name))?);
// This response callback is special, we actually make a number of subsequent requests to
// fetch the file from the remote zip.
@ -343,7 +355,12 @@ impl RegistryClient {
let req = self.client.uncached().head(url.clone()).build()?;
let result = self
.client
.get_cached_with_callback(req, &cache_entry, read_metadata_range_request)
.get_cached_with_callback(
req,
&cache_entry,
cache_control,
read_metadata_range_request,
)
.await
.map_err(crate::Error::from);

View file

@ -15,7 +15,7 @@ use distribution_types::{
};
use platform_tags::Tags;
use puffin_cache::{Cache, CacheBucket, WheelCache};
use puffin_client::{CachedClientError, RegistryClient};
use puffin_client::{CacheControl, CachedClientError, RegistryClient};
use puffin_extract::unzip_no_seek;
use puffin_git::GitSource;
use puffin_traits::{BuildContext, NoBinary};
@ -35,6 +35,8 @@ pub enum DistributionDatabaseError {
#[error(transparent)]
Request(#[from] reqwest::Error),
#[error(transparent)]
Io(#[from] io::Error),
#[error(transparent)]
SourceBuild(#[from] SourceDistError),
#[error("Git operation failed")]
Git(#[source] anyhow::Error),
@ -129,6 +131,7 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context>
wheel.filename.stem(),
);
// TODO(charlie): There's no need to unzip if the wheel is unchanged.
return Ok(LocalWheel::Disk(DiskWheel {
dist: dist.clone(),
path: path.clone(),
@ -167,9 +170,11 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context>
};
let req = self.client.cached_client().uncached().get(url).build()?;
let cache_control =
CacheControl::from(self.cache.freshness(&http_entry, Some(wheel.name()))?);
self.client
.cached_client()
.get_cached_with_callback(req, &http_entry, download)
.get_cached_with_callback(req, &http_entry, cache_control, download)
.await
.map_err(|err| match err {
CachedClientError::Callback(err) => err,
@ -222,9 +227,11 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context>
.uncached()
.get(wheel.url.raw().clone())
.build()?;
let cache_control =
CacheControl::from(self.cache.freshness(&http_entry, Some(wheel.name()))?);
self.client
.cached_client()
.get_cached_with_callback(req, &http_entry, download)
.get_cached_with_callback(req, &http_entry, cache_control, download)
.await
.map_err(|err| match err {
CachedClientError::Callback(err) => err,
@ -249,6 +256,7 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context>
wheel.filename.stem(),
);
// TODO(charlie): There's no need to unzip if the wheel is unchanged.
Ok(LocalWheel::Disk(DiskWheel {
dist: dist.clone(),
path: wheel.path.clone(),
@ -262,12 +270,23 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context>
let _guard = lock.lock().await;
let built_wheel = self.builder.download_and_build(source_dist).boxed().await?;
Ok(LocalWheel::Built(BuiltWheel {
dist: dist.clone(),
path: built_wheel.path,
target: built_wheel.target,
filename: built_wheel.filename,
}))
// If the wheel was unzipped previously, respect it. Source distributions are
// cached under a unique build ID, so unzipped directories are never stale.
if built_wheel.target.exists() {
Ok(LocalWheel::Unzipped(UnzippedWheel {
dist: dist.clone(),
target: built_wheel.target,
filename: built_wheel.filename,
}))
} else {
Ok(LocalWheel::Built(BuiltWheel {
dist: dist.clone(),
path: built_wheel.path,
target: built_wheel.target,
filename: built_wheel.filename,
}))
}
}
}
}

View file

@ -1,7 +1,8 @@
use distribution_types::{git_reference, DirectUrlSourceDist, GitSourceDist, Name, PathSourceDist};
use platform_tags::Tags;
use puffin_cache::{Cache, CacheBucket, CacheShard, WheelCache};
use puffin_cache::{ArchiveTimestamp, Cache, CacheBucket, CacheShard, Freshness, WheelCache};
use puffin_fs::symlinks;
use puffin_normalize::PackageName;
use crate::index::cached_wheel::CachedWheel;
use crate::source::{read_http_manifest, read_timestamp_manifest, MANIFEST};
@ -26,13 +27,19 @@ impl BuiltWheelIndex {
WheelCache::Url(source_dist.url.raw()).remote_wheel_dir(source_dist.name().as_ref()),
);
// Read the existing metadata from the cache, if it exists.
// Read the manifest from the cache. There's no need to enforce freshness, since we
// enforce freshness on the entries.
let manifest_entry = cache_shard.entry(MANIFEST);
let Some(manifest) = read_http_manifest(&manifest_entry)? else {
return Ok(None);
};
Ok(Self::find(&cache_shard.shard(manifest.digest()), tags))
Ok(Self::find(
&cache_shard.shard(manifest.digest()),
source_dist.name(),
cache,
tags,
))
}
/// Return the most compatible [`CachedWheel`] for a given source distribution at a local path.
@ -47,17 +54,23 @@ impl BuiltWheelIndex {
);
// Determine the last-modified time of the source distribution.
let Some(modified) = puffin_cache::archive_mtime(&source_dist.path)? else {
let Some(modified) = ArchiveTimestamp::from_path(&source_dist.path)? else {
return Err(SourceDistError::DirWithoutEntrypoint);
};
// Read the existing metadata from the cache, if it's up-to-date.
// Read the manifest from the cache. There's no need to enforce freshness, since we
// enforce freshness on the entries.
let manifest_entry = cache_shard.entry(MANIFEST);
let Some(manifest) = read_timestamp_manifest(&manifest_entry, modified)? else {
return Ok(None);
};
Ok(Self::find(&cache_shard.shard(manifest.digest()), tags))
Ok(Self::find(
&cache_shard.shard(manifest.digest()),
source_dist.name(),
cache,
tags,
))
}
/// Return the most compatible [`CachedWheel`] for a given source distribution at a git URL.
@ -72,7 +85,7 @@ impl BuiltWheelIndex {
.remote_wheel_dir(source_dist.name().as_ref()),
);
Self::find(&cache_shard, tags)
Self::find(&cache_shard, source_dist.name(), cache, tags)
}
/// Find the "best" distribution in the index for a given source distribution.
@ -91,7 +104,12 @@ impl BuiltWheelIndex {
/// ```
///
/// The `shard` should be `built-wheels-v0/pypi/django-allauth-0.51.0.tar.gz`.
pub fn find(shard: &CacheShard, tags: &Tags) -> Option<CachedWheel> {
fn find(
shard: &CacheShard,
package: &PackageName,
cache: &Cache,
tags: &Tags,
) -> Option<CachedWheel> {
let mut candidate: Option<CachedWheel> = None;
// Unzipped wheels are stored as symlinks into the archive directory.
@ -99,6 +117,15 @@ impl BuiltWheelIndex {
match CachedWheel::from_path(&subdir) {
None => {}
Some(dist_info) => {
// If the [`Refresh`] policy is set, ignore entries that were created before
// the cutoff.
if cache
.freshness(&dist_info.entry, Some(package))
.is_ok_and(Freshness::is_stale)
{
continue;
}
// Pick the wheel with the highest priority
let compatibility = dist_info.filename.compatibility(tags);

View file

@ -7,7 +7,7 @@ use rustc_hash::FxHashMap;
use distribution_types::{CachedRegistryDist, FlatIndexLocation, IndexLocations, IndexUrl};
use pep440_rs::Version;
use platform_tags::Tags;
use puffin_cache::{Cache, CacheBucket, WheelCache};
use puffin_cache::{Cache, CacheBucket, Freshness, WheelCache};
use puffin_fs::{directories, symlinks};
use puffin_normalize::PackageName;
@ -94,7 +94,7 @@ impl<'a> RegistryWheelIndex<'a> {
WheelCache::Index(index_url).remote_wheel_dir(package.to_string()),
);
Self::add_directory(&*wheel_dir, tags, &mut versions);
Self::add_directory(&wheel_dir, package, cache, tags, &mut versions);
// Index all the built wheels, created by downloading and building source distributions
// from the registry.
@ -109,7 +109,13 @@ impl<'a> RegistryWheelIndex<'a> {
let cache_shard = cache_shard.shard(shard);
let manifest_entry = cache_shard.entry(MANIFEST);
if let Ok(Some(manifest)) = read_http_manifest(&manifest_entry) {
Self::add_directory(cache_shard.join(manifest.digest()), tags, &mut versions);
Self::add_directory(
cache_shard.join(manifest.digest()),
package,
cache,
tags,
&mut versions,
);
};
}
}
@ -122,6 +128,8 @@ impl<'a> RegistryWheelIndex<'a> {
/// Each subdirectory in the given path is expected to be that of an unzipped wheel.
fn add_directory(
path: impl AsRef<Path>,
package: &PackageName,
cache: &Cache,
tags: &Tags,
versions: &mut BTreeMap<Version, CachedRegistryDist>,
) {
@ -130,6 +138,13 @@ impl<'a> RegistryWheelIndex<'a> {
match CachedWheel::from_path(&wheel_dir) {
None => {}
Some(dist_info) => {
if cache
.freshness(&dist_info.entry, Some(package))
.is_ok_and(Freshness::is_stale)
{
continue;
}
let dist_info = dist_info.into_registry_dist();
// Pick the wheel with the highest priority

View file

@ -4,7 +4,7 @@ use std::str::FromStr;
use distribution_filename::WheelFilename;
use platform_tags::Tags;
use puffin_cache::CacheShard;
use puffin_fs::directories;
use puffin_fs::files;
/// The information about the wheel we either just built or got from the cache.
#[derive(Debug, Clone)]
@ -20,8 +20,8 @@ pub struct BuiltWheelMetadata {
impl BuiltWheelMetadata {
/// Find a compatible wheel in the cache based on the given manifest.
pub(crate) fn find_in_cache(tags: &Tags, cache_shard: &CacheShard) -> Option<Self> {
for directory in directories(cache_shard) {
if let Some(metadata) = Self::from_path(directory) {
for directory in files(cache_shard) {
if let Some(metadata) = Self::from_path(directory, cache_shard) {
// Validate that the wheel is compatible with the target platform.
if metadata.filename.is_compatible(tags) {
return Some(metadata);
@ -32,11 +32,11 @@ impl BuiltWheelMetadata {
}
/// Try to parse a distribution from a cached directory name (like `typing-extensions-4.8.0-py3-none-any.whl`).
fn from_path(path: PathBuf) -> Option<Self> {
fn from_path(path: PathBuf, cache_shard: &CacheShard) -> Option<Self> {
let filename = path.file_name()?.to_str()?;
let filename = WheelFilename::from_str(filename).ok()?;
Some(Self {
target: path.join(filename.stem()),
target: cache_shard.join(filename.stem()),
path,
filename,
})

View file

@ -23,8 +23,10 @@ use distribution_types::{
use install_wheel_rs::read_dist_info;
use pep508_rs::VerbatimUrl;
use platform_tags::Tags;
use puffin_cache::{CacheBucket, CacheEntry, CacheShard, CachedByTimestamp, WheelCache};
use puffin_client::{CachedClient, CachedClientError, DataWithCachePolicy};
use puffin_cache::{
ArchiveTimestamp, CacheBucket, CacheEntry, CacheShard, CachedByTimestamp, Freshness, WheelCache,
};
use puffin_client::{CacheControl, CachedClient, CachedClientError, DataWithCachePolicy};
use puffin_fs::{write_atomic, LockedFile};
use puffin_git::{Fetch, GitSource};
use puffin_traits::{BuildContext, BuildKind, SourceBuildTrait};
@ -247,6 +249,11 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> {
subdirectory: Option<&'data Path>,
) -> Result<BuiltWheelMetadata, SourceDistError> {
let cache_entry = cache_shard.entry(MANIFEST);
let cache_control = CacheControl::from(
self.build_context
.cache()
.freshness(&cache_entry, Some(source_dist.name()))?,
);
let download = |response| {
async {
@ -267,14 +274,16 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> {
let req = self.cached_client.uncached().get(url.clone()).build()?;
let manifest = self
.cached_client
.get_cached_with_callback(req, &cache_entry, download)
.get_cached_with_callback(req, &cache_entry, cache_control, download)
.await
.map_err(|err| match err {
CachedClientError::Callback(err) => err,
CachedClientError::Client(err) => SourceDistError::Client(err),
})?;
// From here on, scope all operations to the current build.
// From here on, scope all operations to the current build. Within the manifest shard,
// there's no need to check for freshness, since entries have to be fresher than the
// manifest itself.
let cache_shard = cache_shard.shard(manifest.digest());
// If the cache contains a compatible wheel, return it.
@ -282,8 +291,6 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> {
return Ok(built_wheel);
}
// At this point, we're seeing cached metadata (as in, we have an up-to-date source
// distribution), but the wheel(s) we built previously are incompatible.
let task = self
.reporter
.as_ref()
@ -310,12 +317,9 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> {
let metadata_entry = cache_shard.entry(METADATA);
write_atomic(metadata_entry.path(), rmp_serde::to_vec(&metadata)?).await?;
let path = cache_shard.join(&disk_filename);
let target = cache_shard.join(wheel_filename.stem());
Ok(BuiltWheelMetadata {
path,
target,
path: cache_shard.join(&disk_filename),
target: cache_shard.join(wheel_filename.stem()),
filename: wheel_filename,
})
}
@ -334,6 +338,11 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> {
subdirectory: Option<&'data Path>,
) -> Result<Metadata21, SourceDistError> {
let cache_entry = cache_shard.entry(MANIFEST);
let cache_control = CacheControl::from(
self.build_context
.cache()
.freshness(&cache_entry, Some(source_dist.name()))?,
);
let download = |response| {
async {
@ -354,18 +363,22 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> {
let req = self.cached_client.uncached().get(url.clone()).build()?;
let manifest = self
.cached_client
.get_cached_with_callback(req, &cache_entry, download)
.get_cached_with_callback(req, &cache_entry, cache_control, download)
.await
.map_err(|err| match err {
CachedClientError::Callback(err) => err,
CachedClientError::Client(err) => SourceDistError::Client(err),
})?;
// From here on, scope all operations to the current build.
// From here on, scope all operations to the current build. Within the manifest shard,
// there's no need to check for freshness, since entries have to be fresher than the
// manifest itself.
let cache_shard = cache_shard.shard(manifest.digest());
// If the cache contains compatible metadata, return it.
if let Some(metadata) = read_cached_metadata(&cache_shard.entry(METADATA)).await? {
let metadata_entry = cache_shard.entry(METADATA);
if let Some(metadata) = read_cached_metadata(&metadata_entry).await? {
debug!("Using cached metadata for {source_dist}");
return Ok(metadata.clone());
}
@ -386,8 +399,6 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> {
return Ok(metadata);
}
// At this point, we're seeing cached metadata (as in, we have an up-to-date source
// distribution), but the wheel(s) we built previously are incompatible.
let task = self
.reporter
.as_ref()
@ -429,15 +440,22 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> {
);
// Determine the last-modified time of the source distribution.
let Some(modified) = puffin_cache::archive_mtime(&path_source_dist.path)? else {
let Some(modified) = ArchiveTimestamp::from_path(&path_source_dist.path)? else {
return Err(SourceDistError::DirWithoutEntrypoint);
};
// Read the existing metadata from the cache, to clear stale wheels.
// Read the existing metadata from the cache.
let manifest_entry = cache_shard.entry(MANIFEST);
let manifest = refresh_timestamp_manifest(&manifest_entry, modified).await?;
let manifest_freshness = self
.build_context
.cache()
.freshness(&manifest_entry, Some(source_dist.name()))?;
let manifest =
refresh_timestamp_manifest(&manifest_entry, manifest_freshness, modified).await?;
// From here on, scope all operations to the current build.
// From here on, scope all operations to the current build. Within the manifest shard,
// there's no need to check for freshness, since entries have to be fresher than the
// manifest itself.
let cache_shard = cache_shard.shard(manifest.digest());
// If the cache contains a compatible wheel, return it.
@ -488,20 +506,36 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> {
);
// Determine the last-modified time of the source distribution.
let Some(modified) = puffin_cache::archive_mtime(&path_source_dist.path)? else {
let Some(modified) = ArchiveTimestamp::from_path(&path_source_dist.path)? else {
return Err(SourceDistError::DirWithoutEntrypoint);
};
// Read the existing metadata from the cache, to clear stale entries.
let manifest_entry = cache_shard.entry(MANIFEST);
let manifest = refresh_timestamp_manifest(&manifest_entry, modified).await?;
let manifest_freshness = self
.build_context
.cache()
.freshness(&manifest_entry, Some(source_dist.name()))?;
let manifest =
refresh_timestamp_manifest(&manifest_entry, manifest_freshness, modified).await?;
// From here on, scope all operations to the current build.
// From here on, scope all operations to the current build. Within the manifest shard,
// there's no need to check for freshness, since entries have to be fresher than the
// manifest itself.
let cache_shard = cache_shard.shard(manifest.digest());
// If the cache contains compatible metadata, return it.
if let Some(metadata) = read_cached_metadata(&cache_shard.entry(METADATA)).await? {
return Ok(metadata.clone());
let metadata_entry = cache_shard.entry(METADATA);
if self
.build_context
.cache()
.freshness(&metadata_entry, Some(source_dist.name()))
.is_ok_and(Freshness::is_fresh)
{
if let Some(metadata) = read_cached_metadata(&metadata_entry).await? {
debug!("Using cached metadata for {source_dist}");
return Ok(metadata.clone());
}
}
// If the backend supports `prepare_metadata_for_build_wheel`, use it.
@ -611,8 +645,17 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> {
);
// If the cache contains compatible metadata, return it.
if let Some(metadata) = read_cached_metadata(&cache_shard.entry(METADATA)).await? {
return Ok(metadata.clone());
let metadata_entry = cache_shard.entry(METADATA);
if self
.build_context
.cache()
.freshness(&metadata_entry, Some(source_dist.name()))
.is_ok_and(Freshness::is_fresh)
{
if let Some(metadata) = read_cached_metadata(&metadata_entry).await? {
debug!("Using cached metadata for {source_dist}");
return Ok(metadata.clone());
}
}
// If the backend supports `prepare_metadata_for_build_wheel`, use it.
@ -902,13 +945,13 @@ pub(crate) fn read_http_manifest(
/// If the cache entry is stale, a new entry will be created.
pub(crate) fn read_timestamp_manifest(
cache_entry: &CacheEntry,
modified: SystemTime,
modified: ArchiveTimestamp,
) -> Result<Option<Manifest>, SourceDistError> {
// If the cache entry is up-to-date, return it.
match std::fs::read(cache_entry.path()) {
Ok(cached) => {
let cached = rmp_serde::from_slice::<CachedByTimestamp<SystemTime, Manifest>>(&cached)?;
if cached.timestamp == modified {
if cached.timestamp == modified.timestamp() {
return Ok(Some(cached.data));
}
}
@ -923,11 +966,14 @@ pub(crate) fn read_timestamp_manifest(
/// If the cache entry is stale, a new entry will be created.
pub(crate) async fn refresh_timestamp_manifest(
cache_entry: &CacheEntry,
modified: SystemTime,
freshness: Freshness,
modified: ArchiveTimestamp,
) -> Result<Manifest, SourceDistError> {
// If the cache entry is up-to-date, return it.
if let Some(manifest) = read_timestamp_manifest(cache_entry, modified)? {
return Ok(manifest);
// If we know the exact modification time, we don't need to force a revalidate.
if matches!(modified, ArchiveTimestamp::Exact(_)) || freshness.is_fresh() {
if let Some(manifest) = read_timestamp_manifest(cache_entry, modified)? {
return Ok(manifest);
}
}
// Otherwise, create a new manifest.
@ -936,7 +982,7 @@ pub(crate) async fn refresh_timestamp_manifest(
write_atomic(
cache_entry.path(),
rmp_serde::to_vec(&CachedByTimestamp {
timestamp: modified,
timestamp: modified.timestamp(),
data: manifest,
})?,
)

View file

@ -131,6 +131,30 @@ pub fn symlinks(path: impl AsRef<Path>) -> impl Iterator<Item = PathBuf> {
.map(|entry| entry.path())
}
/// Iterate over the files in a directory.
///
/// If the directory does not exist, returns an empty iterator.
pub fn files(path: impl AsRef<Path>) -> impl Iterator<Item = PathBuf> {
path.as_ref()
.read_dir()
.ok()
.into_iter()
.flatten()
.filter_map(|entry| match entry {
Ok(entry) => Some(entry),
Err(err) => {
warn!("Failed to read entry: {}", err);
None
}
})
.filter(|entry| {
entry
.file_type()
.map_or(false, |file_type| file_type.is_file())
})
.map(|entry| entry.path())
}
/// A file lock that is automatically released when dropped.
#[derive(Debug)]
pub struct LockedFile(fs_err::File);

View file

@ -12,7 +12,7 @@ use distribution_types::{
};
use pep508_rs::{Requirement, VersionOrUrl};
use platform_tags::Tags;
use puffin_cache::{Cache, CacheBucket, CacheEntry, WheelCache};
use puffin_cache::{ArchiveTimestamp, Cache, CacheBucket, CacheEntry, Freshness, WheelCache};
use puffin_distribution::{BuiltWheelIndex, RegistryWheelIndex};
use puffin_interpreter::Virtualenv;
use puffin_normalize::PackageName;
@ -48,6 +48,11 @@ impl<'a> Planner<'a> {
/// Partition a set of requirements into those that should be linked from the cache, those that
/// need to be downloaded, and those that should be removed.
///
/// The install plan will respect cache [`Freshness`]. Specifically, if refresh is enabled, the
/// plan will respect cache entries created after the current time (as per the [`Refresh`]
/// policy). Otherwise, entries will be ignored. The downstream distribution database may still
/// read those entries from the cache after revalidating them.
#[allow(clippy::too_many_arguments)]
pub fn build(
self,
@ -140,9 +145,6 @@ impl<'a> Planner<'a> {
};
if reinstall {
// If necessary, purge the cached distributions.
debug!("Purging cached distributions for: {requirement}");
cache.purge(&requirement.name)?;
if let Some(distribution) = site_packages.remove(&requirement.name) {
reinstalls.push(distribution);
}
@ -164,7 +166,7 @@ impl<'a> Planner<'a> {
if &distribution.url == url.raw() {
// If the requirement came from a local path, check freshness.
if let Ok(archive) = url.to_file_path() {
if is_fresh_install(distribution, &archive)? {
if not_modified_install(distribution, &archive)? {
debug!("Requirement already satisfied (and up-to-date): {distribution}");
continue;
}
@ -243,16 +245,21 @@ impl<'a> Planner<'a> {
)
.entry(wheel.filename.stem());
if cache_entry.path().exists() {
let cached_dist = CachedDirectUrlDist::from_url(
wheel.filename,
wheel.url,
cache_entry.into_path_buf(),
);
if cache
.freshness(&cache_entry, Some(wheel.name()))
.is_ok_and(Freshness::is_fresh)
{
if cache_entry.path().exists() {
let cached_dist = CachedDirectUrlDist::from_url(
wheel.filename,
wheel.url,
cache_entry.into_path_buf(),
);
debug!("URL wheel requirement already cached: {cached_dist}");
local.push(CachedDist::Url(cached_dist));
continue;
debug!("URL wheel requirement already cached: {cached_dist}");
local.push(CachedDist::Url(cached_dist));
continue;
}
}
}
Dist::Built(BuiltDist::Path(wheel)) => {
@ -280,16 +287,21 @@ impl<'a> Planner<'a> {
)
.entry(wheel.filename.stem());
if is_fresh_cache(&cache_entry, &wheel.path)? {
let cached_dist = CachedDirectUrlDist::from_url(
wheel.filename,
wheel.url,
cache_entry.into_path_buf(),
);
if cache
.freshness(&cache_entry, Some(wheel.name()))
.is_ok_and(Freshness::is_fresh)
{
if not_modified_cache(&cache_entry, &wheel.path)? {
let cached_dist = CachedDirectUrlDist::from_url(
wheel.filename,
wheel.url,
cache_entry.into_path_buf(),
);
debug!("Path wheel requirement already cached: {cached_dist}");
local.push(CachedDist::Url(cached_dist));
continue;
debug!("Path wheel requirement already cached: {cached_dist}");
local.push(CachedDist::Url(cached_dist));
continue;
}
}
}
Dist::Source(SourceDist::DirectUrl(sdist)) => {
@ -357,32 +369,21 @@ impl<'a> Planner<'a> {
}
}
/// Returns `true` if the cache entry linked to the file at the given [`Path`] is fresh.
/// Returns `true` if the cache entry linked to the file at the given [`Path`] is not-modified.
///
/// A cache entry is considered fresh if it exists and is newer than the file at the given path.
/// If the cache entry is stale, it will be removed from the cache.
fn is_fresh_cache(cache_entry: &CacheEntry, artifact: &Path) -> Result<bool, io::Error> {
/// A cache entry is not modified if it exists and is newer than the file at the given path.
fn not_modified_cache(cache_entry: &CacheEntry, artifact: &Path) -> Result<bool, io::Error> {
match fs_err::metadata(cache_entry.path()).and_then(|metadata| metadata.modified()) {
Ok(cache_mtime) => {
// Determine the modification time of the wheel.
let Some(artifact_mtime) = puffin_cache::archive_mtime(artifact)? else {
// The artifact doesn't exist, so it's not fresh.
return Ok(false);
};
if cache_mtime >= artifact_mtime {
Ok(true)
if let Some(artifact_mtime) = ArchiveTimestamp::from_path(artifact)? {
Ok(cache_mtime >= artifact_mtime.timestamp())
} else {
debug!(
"Removing stale built wheels for: {}",
cache_entry.path().display()
);
if let Err(err) = fs_err::remove_dir_all(cache_entry.dir()) {
warn!("Failed to remove stale built wheel cache directory: {err}");
}
// The artifact doesn't exist, so it's not fresh.
Ok(false)
}
}
Err(err) if err.kind() == std::io::ErrorKind::NotFound => {
Err(err) if err.kind() == io::ErrorKind::NotFound => {
// The cache entry doesn't exist, so it's not fresh.
Ok(false)
}
@ -390,20 +391,20 @@ fn is_fresh_cache(cache_entry: &CacheEntry, artifact: &Path) -> Result<bool, io:
}
}
/// Returns `true` if the installed distribution linked to the file at the given [`Path`] is fresh,
/// based on the modification time of the installed distribution.
fn is_fresh_install(dist: &InstalledDirectUrlDist, artifact: &Path) -> Result<bool, io::Error> {
/// Returns `true` if the installed distribution linked to the file at the given [`Path`] is
/// not-modified based on the modification time of the installed distribution.
fn not_modified_install(dist: &InstalledDirectUrlDist, artifact: &Path) -> Result<bool, io::Error> {
// Determine the modification time of the installed distribution.
let dist_metadata = fs_err::metadata(&dist.path)?;
let dist_mtime = dist_metadata.modified()?;
// Determine the modification time of the wheel.
let Some(artifact_mtime) = puffin_cache::archive_mtime(artifact)? else {
if let Some(artifact_mtime) = ArchiveTimestamp::from_path(artifact)? {
Ok(dist_mtime >= artifact_mtime.timestamp())
} else {
// The artifact doesn't exist, so it's not fresh.
return Ok(false);
};
Ok(dist_mtime >= artifact_mtime)
Ok(false)
}
}
#[derive(Debug, Default)]
@ -425,7 +426,7 @@ pub struct Plan {
pub extraneous: Vec<InstalledDist>,
}
#[derive(Debug)]
#[derive(Debug, Clone)]
pub enum Reinstall {
/// Don't reinstall any packages; respect the existing installation.
None,

View file

@ -11,7 +11,7 @@ use pep440_rs::Version;
use pep508_rs::MarkerEnvironment;
use platform_host::Platform;
use platform_tags::{Tags, TagsError};
use puffin_cache::{Cache, CacheBucket, CachedByTimestamp};
use puffin_cache::{Cache, CacheBucket, CachedByTimestamp, Freshness};
use puffin_fs::write_atomic_sync;
use crate::python_platform::PythonPlatform;
@ -272,6 +272,7 @@ impl InterpreterQueryResult {
/// time as a cache key.
pub(crate) fn query_cached(executable: &Path, cache: &Cache) -> Result<Self, Error> {
let executable_bytes = executable.as_os_str().as_encoded_bytes();
let cache_entry = cache.entry(
CacheBucket::Interpreter,
"",
@ -281,25 +282,30 @@ impl InterpreterQueryResult {
let modified = Timestamp::from_path(fs_err::canonicalize(executable)?.as_ref())?;
// Read from the cache.
if let Ok(data) = fs::read(cache_entry.path()) {
match rmp_serde::from_slice::<CachedByTimestamp<Timestamp, Self>>(&data) {
Ok(cached) => {
if cached.timestamp == modified {
debug!("Using cached markers for: {}", executable.display());
return Ok(cached.data);
}
if cache
.freshness(&cache_entry, None)
.is_ok_and(Freshness::is_fresh)
{
if let Ok(data) = fs::read(cache_entry.path()) {
match rmp_serde::from_slice::<CachedByTimestamp<Timestamp, Self>>(&data) {
Ok(cached) => {
if cached.timestamp == modified {
debug!("Using cached markers for: {}", executable.display());
return Ok(cached.data);
}
debug!(
"Ignoring stale cached markers for: {}",
executable.display()
);
}
Err(err) => {
warn!(
"Broken cache entry at {}, removing: {err}",
cache_entry.path().display()
);
let _ = fs_err::remove_file(cache_entry.path());
debug!(
"Ignoring stale cached markers for: {}",
executable.display()
);
}
Err(err) => {
warn!(
"Broken cache entry at {}, removing: {err}",
cache_entry.path().display()
);
let _ = fs_err::remove_file(cache_entry.path());
}
}
}
}

View file

@ -10,7 +10,7 @@ use clap::{Args, Parser, Subcommand};
use owo_colors::OwoColorize;
use distribution_types::{FlatIndexLocation, IndexLocations, IndexUrl};
use puffin_cache::{Cache, CacheArgs};
use puffin_cache::{Cache, CacheArgs, Refresh};
use puffin_installer::{NoBinary, Reinstall};
use puffin_interpreter::PythonVersion;
use puffin_normalize::{ExtraName, PackageName};
@ -200,6 +200,14 @@ struct PipCompileArgs {
#[clap(short, long)]
output_file: Option<PathBuf>,
/// Refresh all cached data.
#[clap(long)]
refresh: bool,
/// Refresh cached data for a specific package.
#[clap(long)]
refresh_package: Vec<PackageName>,
/// The URL of the Python Package Index.
#[clap(long, short, default_value = IndexUrl::Pypi.as_str(), env = "PUFFIN_INDEX_URL")]
index_url: IndexUrl,
@ -264,7 +272,7 @@ struct PipCompileArgs {
/// Timestamps are given either as RFC 3339 timestamps such as `2006-12-02T02:07:43Z` or as
/// UTC dates in the same format such as `2006-12-02`. Dates are interpreted as including this
/// day, i.e. until midnight UTC that day.
#[arg(long, value_parser = date_or_datetime, hide = true)]
#[arg(long, value_parser = date_or_datetime)]
exclude_newer: Option<DateTime<Utc>>,
}
@ -275,16 +283,22 @@ struct PipSyncArgs {
#[clap(required(true))]
src_file: Vec<PathBuf>,
/// Reinstall all packages, overwriting any entries in the cache and replacing any existing
/// packages in the environment.
/// Reinstall all packages, regardless of whether they're already installed.
#[clap(long, alias = "force-reinstall")]
reinstall: bool,
/// Reinstall a specific package, overwriting any entries in the cache and replacing any
/// existing versions in the environment.
/// Reinstall a specific package, regardless of whether it's already installed.
#[clap(long)]
reinstall_package: Vec<PackageName>,
/// Refresh all cached data.
#[clap(long)]
refresh: bool,
/// Refresh cached data for a specific package.
#[clap(long)]
refresh_package: Vec<PackageName>,
/// The method to use when installing packages from the global cache.
#[clap(long, value_enum, default_value_t = install_wheel_rs::linker::LinkMode::default())]
link_mode: install_wheel_rs::linker::LinkMode,
@ -390,16 +404,22 @@ struct PipInstallArgs {
#[clap(long, conflicts_with = "extra")]
all_extras: bool,
/// Reinstall all packages, overwriting any entries in the cache and replacing any existing
/// packages in the environment.
/// Reinstall all packages, regardless of whether they're already installed.
#[clap(long, alias = "force-reinstall")]
reinstall: bool,
/// Reinstall a specific package, overwriting any entries in the cache and replacing any
/// existing versions in the environment.
/// Reinstall a specific package, regardless of whether it's already installed.
#[clap(long)]
reinstall_package: Vec<PackageName>,
/// Refresh all cached data.
#[clap(long)]
refresh: bool,
/// Refresh cached data for a specific package.
#[clap(long)]
refresh_package: Vec<PackageName>,
/// The method to use when installing packages from the global cache.
#[clap(long, value_enum, default_value_t = install_wheel_rs::linker::LinkMode::default())]
link_mode: install_wheel_rs::linker::LinkMode,
@ -619,6 +639,7 @@ async fn inner() -> Result<ExitStatus> {
Commands::Pip(PipArgs {
command: PipCommand::Compile(args),
}) => {
let cache = cache.with_refresh(Refresh::from_args(args.refresh, args.refresh_package));
let requirements = args
.src_file
.into_iter()
@ -675,6 +696,7 @@ async fn inner() -> Result<ExitStatus> {
Commands::Pip(PipArgs {
command: PipCommand::Sync(args),
}) => {
let cache = cache.with_refresh(Refresh::from_args(args.refresh, args.refresh_package));
let index_urls = IndexLocations::from_args(
args.index_url,
args.extra_index_url,
@ -709,6 +731,7 @@ async fn inner() -> Result<ExitStatus> {
Commands::Pip(PipArgs {
command: PipCommand::Install(args),
}) => {
let cache = cache.with_refresh(Refresh::from_args(args.refresh, args.refresh_package));
let requirements = args
.package
.into_iter()

View file

@ -728,7 +728,7 @@ fn reinstall_build_system() -> Result<()> {
----- stderr -----
Resolved 8 packages in [TIME]
Downloaded 8 packages in [TIME]
Downloaded 7 packages in [TIME]
Installed 8 packages in [TIME]
+ blinker==1.7.0
+ click==8.1.7
@ -930,7 +930,6 @@ fn reinstall_no_binary() -> Result<()> {
----- stderr -----
Resolved 7 packages in [TIME]
Downloaded 1 package in [TIME]
Installed 1 package in [TIME]
- flask==3.0.0
+ flask==3.0.0

View file

@ -2366,8 +2366,6 @@ fn reinstall() -> Result<()> {
----- stdout -----
----- stderr -----
Resolved 2 packages in [TIME]
Downloaded 2 packages in [TIME]
Uninstalled 2 packages in [TIME]
Installed 2 packages in [TIME]
- markupsafe==2.1.3
@ -2442,8 +2440,6 @@ fn reinstall_package() -> Result<()> {
----- stdout -----
----- stderr -----
Resolved 1 package in [TIME]
Downloaded 1 package in [TIME]
Uninstalled 1 package in [TIME]
Installed 1 package in [TIME]
- tomli==2.0.1
@ -2515,8 +2511,6 @@ fn reinstall_git() -> Result<()> {
----- stdout -----
----- stderr -----
Resolved 1 package in [TIME]
Downloaded 1 package in [TIME]
Uninstalled 1 package in [TIME]
Installed 1 package in [TIME]
- werkzeug==2.0.0 (from git+https://github.com/pallets/werkzeug.git@af160e0b6b7ddd81c22f1652c728ff5ac72d5c74)
@ -2529,6 +2523,159 @@ fn reinstall_git() -> Result<()> {
Ok(())
}
/// Verify that we can force refresh of cached data.
#[test]
fn refresh() -> Result<()> {
let temp_dir = assert_fs::TempDir::new()?;
let cache_dir = assert_fs::TempDir::new()?;
let venv = create_venv_py312(&temp_dir, &cache_dir);
let requirements_txt = temp_dir.child("requirements.txt");
requirements_txt.touch()?;
requirements_txt.write_str("MarkupSafe==2.1.3\ntomli==2.0.1")?;
insta::with_settings!({
filters => INSTA_FILTERS.to_vec()
}, {
assert_cmd_snapshot!(Command::new(get_cargo_bin(BIN_NAME))
.arg("pip")
.arg("sync")
.arg("requirements.txt")
.arg("--strict")
.arg("--cache-dir")
.arg(cache_dir.path())
.env("VIRTUAL_ENV", venv.as_os_str())
.current_dir(&temp_dir), @r###"
success: true
exit_code: 0
----- stdout -----
----- stderr -----
Resolved 2 packages in [TIME]
Downloaded 2 packages in [TIME]
Installed 2 packages in [TIME]
+ markupsafe==2.1.3
+ tomli==2.0.1
"###);
});
check_command(&venv, "import markupsafe", &temp_dir);
check_command(&venv, "import tomli", &temp_dir);
// Re-run the installation into with `--refresh`. Ensure that we resolve and download the
// latest versions of the packages.
let parent = assert_fs::TempDir::new()?;
let venv = create_venv_py312(&parent, &cache_dir);
insta::with_settings!({
filters => INSTA_FILTERS.to_vec()
}, {
assert_cmd_snapshot!(Command::new(get_cargo_bin(BIN_NAME))
.arg("pip")
.arg("sync")
.arg("requirements.txt")
.arg("--refresh")
.arg("--strict")
.arg("--cache-dir")
.arg(cache_dir.path())
.env("VIRTUAL_ENV", venv.as_os_str())
.current_dir(&temp_dir), @r###"
success: true
exit_code: 0
----- stdout -----
----- stderr -----
Resolved 2 packages in [TIME]
Downloaded 2 packages in [TIME]
Installed 2 packages in [TIME]
+ markupsafe==2.1.3
+ tomli==2.0.1
"###);
});
check_command(&venv, "import markupsafe", &temp_dir);
check_command(&venv, "import tomli", &temp_dir);
Ok(())
}
/// Verify that we can force refresh of selective packages.
#[test]
fn refresh_package() -> Result<()> {
let temp_dir = assert_fs::TempDir::new()?;
let cache_dir = assert_fs::TempDir::new()?;
let venv = create_venv_py312(&temp_dir, &cache_dir);
let requirements_txt = temp_dir.child("requirements.txt");
requirements_txt.touch()?;
requirements_txt.write_str("MarkupSafe==2.1.3\ntomli==2.0.1")?;
insta::with_settings!({
filters => INSTA_FILTERS.to_vec()
}, {
assert_cmd_snapshot!(Command::new(get_cargo_bin(BIN_NAME))
.arg("pip")
.arg("sync")
.arg("requirements.txt")
.arg("--strict")
.arg("--cache-dir")
.arg(cache_dir.path())
.env("VIRTUAL_ENV", venv.as_os_str())
.current_dir(&temp_dir), @r###"
success: true
exit_code: 0
----- stdout -----
----- stderr -----
Resolved 2 packages in [TIME]
Downloaded 2 packages in [TIME]
Installed 2 packages in [TIME]
+ markupsafe==2.1.3
+ tomli==2.0.1
"###);
});
check_command(&venv, "import markupsafe", &temp_dir);
check_command(&venv, "import tomli", &temp_dir);
// Re-run the installation into with `--refresh`. Ensure that we resolve and download the
// latest versions of the packages.
let parent = assert_fs::TempDir::new()?;
let venv = create_venv_py312(&parent, &cache_dir);
insta::with_settings!({
filters => INSTA_FILTERS.to_vec()
}, {
assert_cmd_snapshot!(Command::new(get_cargo_bin(BIN_NAME))
.arg("pip")
.arg("sync")
.arg("requirements.txt")
.arg("--refresh-package")
.arg("tomli")
.arg("--strict")
.arg("--cache-dir")
.arg(cache_dir.path())
.env("VIRTUAL_ENV", venv.as_os_str())
.current_dir(&temp_dir), @r###"
success: true
exit_code: 0
----- stdout -----
----- stderr -----
Resolved 1 package in [TIME]
Downloaded 1 package in [TIME]
Installed 2 packages in [TIME]
+ markupsafe==2.1.3
+ tomli==2.0.1
"###);
});
check_command(&venv, "import markupsafe", &temp_dir);
check_command(&venv, "import tomli", &temp_dir);
Ok(())
}
#[test]
#[cfg(feature = "maturin")]
fn sync_editable() -> Result<()> {