Mark path-based cache entries as stale during install plan (#957)

## Summary

This is a small correctness improvement that ensures that we avoid using
stale cache entries for local dependencies in the install plan. We
already have some logic like this in the source distribution builder,
but it didn't apply in the install plan, and so we'd end up using stale
wheels.

Specifically, now, if you create a new local wheel, and run `pip sync`,
we'll mark the cache entries as stale and make sure we unzip it and
install it. (If the wheel is _already_ installed, we won't reinstall it
though, which will be a separate change. This is just about reading from
the cache, not the environment.)
This commit is contained in:
Charlie Marsh 2024-01-18 14:13:29 -05:00 committed by GitHub
parent f852d986f3
commit f17bad0a75
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
9 changed files with 198 additions and 83 deletions

View file

@ -131,47 +131,6 @@ impl CachedDirectUrlDist {
}
}
#[derive(Debug, Clone)]
pub struct CachedWheel {
/// The filename of the wheel.
pub filename: WheelFilename,
/// The path to the wheel.
pub path: PathBuf,
}
impl CachedWheel {
/// Try to parse a distribution from a cached directory name (like `typing-extensions-4.8.0-py3-none-any`).
pub fn from_path(path: &Path) -> Option<Self> {
let filename = path.file_name()?.to_str()?;
let filename = WheelFilename::from_stem(filename).ok()?;
if path.is_file() {
return None;
}
let path = path.to_path_buf();
Some(Self { filename, path })
}
/// Convert a [`CachedWheel`] into a [`CachedRegistryDist`].
pub fn into_registry_dist(self) -> CachedRegistryDist {
CachedRegistryDist {
filename: self.filename,
path: self.path,
}
}
/// Convert a [`CachedWheel`] into a [`CachedDirectUrlDist`].
pub fn into_url_dist(self, url: VerbatimUrl) -> CachedDirectUrlDist {
CachedDirectUrlDist {
filename: self.filename,
url,
path: self.path,
editable: false,
}
}
}
impl Name for CachedRegistryDist {
fn name(&self) -> &PackageName {
&self.filename.name

View file

@ -4,6 +4,7 @@ use std::io::Write;
use std::ops::Deref;
use std::path::{Path, PathBuf};
use std::sync::Arc;
use std::time::SystemTime;
use fs_err as fs;
use tempfile::{tempdir, TempDir};
@ -32,6 +33,11 @@ impl CacheEntry {
Self(dir.into().join(file))
}
/// Create a new [`CacheEntry`] from a path.
pub fn from_path(path: impl Into<PathBuf>) -> Self {
Self(path.into())
}
/// Convert the [`CacheEntry`] into a [`PathBuf`].
#[inline]
pub fn into_path_buf(self) -> PathBuf {
@ -540,3 +546,34 @@ impl Display for CacheBucket {
f.write_str(self.to_str())
}
}
/// Return the modification timestamp for an archive, which could be a file (like a wheel or a zip
/// archive) or a directory containing a Python package.
///
/// If the path is to a directory with no entrypoint (i.e., no `pyproject.toml` or `setup.py`),
/// returns `None`.
pub fn archive_mtime(path: &Path) -> Result<Option<SystemTime>, io::Error> {
let metadata = fs_err::metadata(path)?;
if metadata.is_file() {
// `modified()` is infallible on Windows and Unix (i.e., all platforms we support).
Ok(Some(metadata.modified()?))
} else {
if let Some(metadata) = path
.join("pyproject.toml")
.metadata()
.ok()
.filter(std::fs::Metadata::is_file)
{
Ok(Some(metadata.modified()?))
} else if let Some(metadata) = path
.join("setup.py")
.metadata()
.ok()
.filter(std::fs::Metadata::is_file)
{
Ok(Some(metadata.modified()?))
} else {
Ok(None)
}
}
}

View file

@ -7,7 +7,6 @@ use std::process::Command;
use anyhow::{Context, Result};
use assert_cmd::prelude::*;
use assert_fs::prelude::*;
use assert_fs::TempDir;
use indoc::indoc;
use insta_cmd::_macro_support::insta;
use insta_cmd::{assert_cmd_snapshot, get_cargo_bin};
@ -1088,7 +1087,69 @@ fn install_local_wheel() -> Result<()> {
.collect();
insta::with_settings!({
filters => filters
filters => filters.clone()
}, {
assert_cmd_snapshot!(Command::new(get_cargo_bin(BIN_NAME))
.arg("pip")
.arg("sync")
.arg("requirements.txt")
.arg("--strict")
.arg("--cache-dir")
.arg(cache_dir.path())
.env("VIRTUAL_ENV", venv.as_os_str())
.current_dir(&temp_dir), @r###"
success: true
exit_code: 0
----- stdout -----
----- stderr -----
Resolved 1 package in [TIME]
Downloaded 1 package in [TIME]
Installed 1 package in [TIME]
+ tomli==2.0.1 (from file://[TEMP_DIR]/tomli-2.0.1-py3-none-any.whl)
"###);
});
check_command(&venv, "import tomli", &temp_dir);
// Create a new virtual environment.
let venv = create_venv_py312(&temp_dir, &cache_dir);
// Reinstall. The wheel should come from the cache, so there shouldn't be a "download".
insta::with_settings!({
filters => filters.clone()
}, {
assert_cmd_snapshot!(Command::new(get_cargo_bin(BIN_NAME))
.arg("pip")
.arg("sync")
.arg("requirements.txt")
.arg("--strict")
.arg("--cache-dir")
.arg(cache_dir.path())
.env("VIRTUAL_ENV", venv.as_os_str())
.current_dir(&temp_dir), @r###"
success: true
exit_code: 0
----- stdout -----
----- stderr -----
Installed 1 package in [TIME]
+ tomli==2.0.1 (from file://[TEMP_DIR]/tomli-2.0.1-py3-none-any.whl)
"###);
});
check_command(&venv, "import tomli", &temp_dir);
// Create a new virtual environment.
let venv = create_venv_py312(&temp_dir, &cache_dir);
// "Modify" the wheel.
let archive_file = std::fs::File::open(&archive)?;
archive_file.set_modified(std::time::SystemTime::now())?;
// Reinstall. The wheel should be "downloaded" again.
insta::with_settings!({
filters => filters.clone()
}, {
assert_cmd_snapshot!(Command::new(get_cargo_bin(BIN_NAME))
.arg("pip")
@ -2740,8 +2801,8 @@ fn sync_legacy_sdist_setuptools() -> Result<()> {
/// Sync using `--find-links` with a local directory.
#[test]
fn find_links() -> Result<()> {
let temp_dir = TempDir::new()?;
let cache_dir = TempDir::new()?;
let temp_dir = assert_fs::TempDir::new()?;
let cache_dir = assert_fs::TempDir::new()?;
let venv = create_venv_py312(&temp_dir, &cache_dir);
let requirements_txt = temp_dir.child("requirements.txt");

View file

@ -1,8 +1,9 @@
use distribution_types::CachedWheel;
use platform_tags::Tags;
use puffin_cache::CacheShard;
use puffin_fs::directories;
use crate::index::cached_wheel::CachedWheel;
/// A local index of built distributions for a specific source distribution.
pub struct BuiltWheelIndex;
@ -38,10 +39,6 @@ impl BuiltWheelIndex {
continue;
}
// TODO(charlie): Consider taking into account the freshness checks that we
// encode when building source distributions (e.g., timestamps). For now, we
// assume that distributions are immutable when installing (i.e., in this
// index).
if let Some(existing) = candidate.as_ref() {
// Override if the wheel is newer, or "more" compatible.
if dist_info.filename.version > existing.filename.version

View file

@ -0,0 +1,45 @@
use std::path::Path;
use distribution_filename::WheelFilename;
use distribution_types::{CachedDirectUrlDist, CachedRegistryDist};
use pep508_rs::VerbatimUrl;
use puffin_cache::CacheEntry;
#[derive(Debug, Clone)]
pub struct CachedWheel {
/// The filename of the wheel.
pub filename: WheelFilename,
/// The [`CacheEntry`] for the wheel.
pub entry: CacheEntry,
}
impl CachedWheel {
/// Try to parse a distribution from a cached directory name (like `typing-extensions-4.8.0-py3-none-any`).
pub fn from_path(path: &Path) -> Option<Self> {
let filename = path.file_name()?.to_str()?;
let filename = WheelFilename::from_stem(filename).ok()?;
if path.is_file() {
return None;
}
let entry = CacheEntry::from_path(path);
Some(Self { filename, entry })
}
/// Convert a [`CachedWheel`] into a [`CachedRegistryDist`].
pub fn into_registry_dist(self) -> CachedRegistryDist {
CachedRegistryDist {
filename: self.filename,
path: self.entry.into_path_buf(),
}
}
/// Convert a [`CachedWheel`] into a [`CachedDirectUrlDist`].
pub fn into_url_dist(self, url: VerbatimUrl) -> CachedDirectUrlDist {
CachedDirectUrlDist {
filename: self.filename,
url,
path: self.entry.into_path_buf(),
editable: false,
}
}
}

View file

@ -2,4 +2,5 @@ pub use built_wheel_index::BuiltWheelIndex;
pub use registry_wheel_index::RegistryWheelIndex;
mod built_wheel_index;
mod cached_wheel;
mod registry_wheel_index;

View file

@ -4,9 +4,8 @@ use std::path::Path;
use rustc_hash::FxHashMap;
use distribution_types::{
CachedRegistryDist, CachedWheel, FlatIndexLocation, IndexLocations, IndexUrl,
};
use crate::index::cached_wheel::CachedWheel;
use distribution_types::{CachedRegistryDist, FlatIndexLocation, IndexLocations, IndexUrl};
use pep440_rs::Version;
use platform_tags::Tags;
use puffin_cache::{Cache, CacheBucket, WheelCache};

View file

@ -446,30 +446,8 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> {
);
// Determine the last-modified time of the source distribution.
let file_metadata = fs_err::metadata(&path_source_dist.path)?;
let modified = if file_metadata.is_file() {
// `modified()` is infallible on windows and unix (i.e., all platforms we support).
file_metadata.modified()?
} else {
if let Some(metadata) = path_source_dist
.path
.join("pyproject.toml")
.metadata()
.ok()
.filter(std::fs::Metadata::is_file)
{
metadata.modified()?
} else if let Some(metadata) = path_source_dist
.path
.join("setup.py")
.metadata()
.ok()
.filter(std::fs::Metadata::is_file)
{
metadata.modified()?
} else {
return Err(SourceDistError::DirWithoutEntrypoint);
}
let Some(modified) = puffin_cache::archive_mtime(&path_source_dist.path)? else {
return Err(SourceDistError::DirWithoutEntrypoint);
};
// Read the existing metadata from the cache.

View file

@ -1,4 +1,6 @@
use std::hash::BuildHasherDefault;
use std::io;
use std::path::Path;
use anyhow::{bail, Result};
use rustc_hash::FxHashSet;
@ -10,7 +12,7 @@ use distribution_types::{
};
use pep508_rs::{Requirement, VersionOrUrl};
use platform_tags::Tags;
use puffin_cache::{Cache, CacheBucket, WheelCache};
use puffin_cache::{Cache, CacheBucket, CacheEntry, WheelCache};
use puffin_distribution::{BuiltWheelIndex, RegistryWheelIndex};
use puffin_interpreter::Virtualenv;
use puffin_normalize::PackageName;
@ -149,6 +151,7 @@ impl<'a> Planner<'a> {
// If the requirement comes from a direct URL, check by URL.
Some(VersionOrUrl::Url(url)) => {
if let InstalledDist::Url(distribution) = &distribution {
// TODO(charlie): Check freshness for path dependencies.
if &distribution.url == url.raw() {
debug!("Requirement already satisfied: {distribution}");
continue;
@ -241,7 +244,7 @@ impl<'a> Planner<'a> {
wheel.filename.stem(),
);
if cache_entry.path().exists() {
if is_fresh(&cache_entry, &wheel.path)? {
let cached_dist = CachedDirectUrlDist::from_url(
wheel.filename,
wheel.url,
@ -278,10 +281,12 @@ impl<'a> Planner<'a> {
);
if let Some(wheel) = BuiltWheelIndex::find(&cache_shard, tags) {
let cached_dist = wheel.into_url_dist(url.clone());
debug!("Path source requirement already cached: {cached_dist}");
local.push(CachedDist::Url(cached_dist));
continue;
if is_fresh(&wheel.entry, &sdist.path)? {
let cached_dist = wheel.into_url_dist(url.clone());
debug!("Path source requirement already cached: {cached_dist}");
local.push(CachedDist::Url(cached_dist));
continue;
}
}
}
Dist::Source(SourceDist::Git(sdist)) => {
@ -337,6 +342,39 @@ impl<'a> Planner<'a> {
}
}
/// Returns `true` if the cache entry linked to the file at the given [`Path`] is fresh.
///
/// A cache entry is considered fresh if it exists and is newer than the file at the given path.
/// If the cache entry is stale, it will be removed from the cache.
fn is_fresh(cache_entry: &CacheEntry, artifact: &Path) -> Result<bool, io::Error> {
match fs_err::metadata(cache_entry.path()).and_then(|metadata| metadata.modified()) {
Ok(cache_mtime) => {
// Determine the modification time of the wheel.
let Some(artifact_mtime) = puffin_cache::archive_mtime(artifact)? else {
// The artifact doesn't exist, so it's not fresh.
return Ok(false);
};
if cache_mtime >= artifact_mtime {
Ok(true)
} else {
debug!(
"Removing stale built wheels for: {}",
cache_entry.path().display()
);
if let Err(err) = fs_err::remove_dir_all(cache_entry.dir()) {
warn!("Failed to remove stale built wheel cache directory: {err}");
}
Ok(false)
}
}
Err(err) if err.kind() == std::io::ErrorKind::NotFound => {
// The cache entry doesn't exist, so it's not fresh.
Ok(false)
}
Err(err) => Err(err),
}
}
#[derive(Debug, Default)]
pub struct Plan {
/// The distributions that are not already installed in the current environment, but are