diff --git a/crates/distribution-types/src/cached.rs b/crates/distribution-types/src/cached.rs index b1c08475f..eddbd1114 100644 --- a/crates/distribution-types/src/cached.rs +++ b/crates/distribution-types/src/cached.rs @@ -131,47 +131,6 @@ impl CachedDirectUrlDist { } } -#[derive(Debug, Clone)] -pub struct CachedWheel { - /// The filename of the wheel. - pub filename: WheelFilename, - /// The path to the wheel. - pub path: PathBuf, -} - -impl CachedWheel { - /// Try to parse a distribution from a cached directory name (like `typing-extensions-4.8.0-py3-none-any`). - pub fn from_path(path: &Path) -> Option { - let filename = path.file_name()?.to_str()?; - let filename = WheelFilename::from_stem(filename).ok()?; - if path.is_file() { - return None; - } - - let path = path.to_path_buf(); - - Some(Self { filename, path }) - } - - /// Convert a [`CachedWheel`] into a [`CachedRegistryDist`]. - pub fn into_registry_dist(self) -> CachedRegistryDist { - CachedRegistryDist { - filename: self.filename, - path: self.path, - } - } - - /// Convert a [`CachedWheel`] into a [`CachedDirectUrlDist`]. - pub fn into_url_dist(self, url: VerbatimUrl) -> CachedDirectUrlDist { - CachedDirectUrlDist { - filename: self.filename, - url, - path: self.path, - editable: false, - } - } -} - impl Name for CachedRegistryDist { fn name(&self) -> &PackageName { &self.filename.name diff --git a/crates/puffin-cache/src/lib.rs b/crates/puffin-cache/src/lib.rs index 7477b1ead..4c053fc5e 100644 --- a/crates/puffin-cache/src/lib.rs +++ b/crates/puffin-cache/src/lib.rs @@ -4,6 +4,7 @@ use std::io::Write; use std::ops::Deref; use std::path::{Path, PathBuf}; use std::sync::Arc; +use std::time::SystemTime; use fs_err as fs; use tempfile::{tempdir, TempDir}; @@ -32,6 +33,11 @@ impl CacheEntry { Self(dir.into().join(file)) } + /// Create a new [`CacheEntry`] from a path. + pub fn from_path(path: impl Into) -> Self { + Self(path.into()) + } + /// Convert the [`CacheEntry`] into a [`PathBuf`]. #[inline] pub fn into_path_buf(self) -> PathBuf { @@ -540,3 +546,34 @@ impl Display for CacheBucket { f.write_str(self.to_str()) } } + +/// Return the modification timestamp for an archive, which could be a file (like a wheel or a zip +/// archive) or a directory containing a Python package. +/// +/// If the path is to a directory with no entrypoint (i.e., no `pyproject.toml` or `setup.py`), +/// returns `None`. +pub fn archive_mtime(path: &Path) -> Result, io::Error> { + let metadata = fs_err::metadata(path)?; + if metadata.is_file() { + // `modified()` is infallible on Windows and Unix (i.e., all platforms we support). + Ok(Some(metadata.modified()?)) + } else { + if let Some(metadata) = path + .join("pyproject.toml") + .metadata() + .ok() + .filter(std::fs::Metadata::is_file) + { + Ok(Some(metadata.modified()?)) + } else if let Some(metadata) = path + .join("setup.py") + .metadata() + .ok() + .filter(std::fs::Metadata::is_file) + { + Ok(Some(metadata.modified()?)) + } else { + Ok(None) + } + } +} diff --git a/crates/puffin-cli/tests/pip_sync.rs b/crates/puffin-cli/tests/pip_sync.rs index ef05c7607..ae5c0433f 100644 --- a/crates/puffin-cli/tests/pip_sync.rs +++ b/crates/puffin-cli/tests/pip_sync.rs @@ -7,7 +7,6 @@ use std::process::Command; use anyhow::{Context, Result}; use assert_cmd::prelude::*; use assert_fs::prelude::*; -use assert_fs::TempDir; use indoc::indoc; use insta_cmd::_macro_support::insta; use insta_cmd::{assert_cmd_snapshot, get_cargo_bin}; @@ -1088,7 +1087,69 @@ fn install_local_wheel() -> Result<()> { .collect(); insta::with_settings!({ - filters => filters + filters => filters.clone() + }, { + assert_cmd_snapshot!(Command::new(get_cargo_bin(BIN_NAME)) + .arg("pip") + .arg("sync") + .arg("requirements.txt") + .arg("--strict") + .arg("--cache-dir") + .arg(cache_dir.path()) + .env("VIRTUAL_ENV", venv.as_os_str()) + .current_dir(&temp_dir), @r###" + success: true + exit_code: 0 + ----- stdout ----- + + ----- stderr ----- + Resolved 1 package in [TIME] + Downloaded 1 package in [TIME] + Installed 1 package in [TIME] + + tomli==2.0.1 (from file://[TEMP_DIR]/tomli-2.0.1-py3-none-any.whl) + "###); + }); + + check_command(&venv, "import tomli", &temp_dir); + + // Create a new virtual environment. + let venv = create_venv_py312(&temp_dir, &cache_dir); + + // Reinstall. The wheel should come from the cache, so there shouldn't be a "download". + insta::with_settings!({ + filters => filters.clone() + }, { + assert_cmd_snapshot!(Command::new(get_cargo_bin(BIN_NAME)) + .arg("pip") + .arg("sync") + .arg("requirements.txt") + .arg("--strict") + .arg("--cache-dir") + .arg(cache_dir.path()) + .env("VIRTUAL_ENV", venv.as_os_str()) + .current_dir(&temp_dir), @r###" + success: true + exit_code: 0 + ----- stdout ----- + + ----- stderr ----- + Installed 1 package in [TIME] + + tomli==2.0.1 (from file://[TEMP_DIR]/tomli-2.0.1-py3-none-any.whl) + "###); + }); + + check_command(&venv, "import tomli", &temp_dir); + + // Create a new virtual environment. + let venv = create_venv_py312(&temp_dir, &cache_dir); + + // "Modify" the wheel. + let archive_file = std::fs::File::open(&archive)?; + archive_file.set_modified(std::time::SystemTime::now())?; + + // Reinstall. The wheel should be "downloaded" again. + insta::with_settings!({ + filters => filters.clone() }, { assert_cmd_snapshot!(Command::new(get_cargo_bin(BIN_NAME)) .arg("pip") @@ -2740,8 +2801,8 @@ fn sync_legacy_sdist_setuptools() -> Result<()> { /// Sync using `--find-links` with a local directory. #[test] fn find_links() -> Result<()> { - let temp_dir = TempDir::new()?; - let cache_dir = TempDir::new()?; + let temp_dir = assert_fs::TempDir::new()?; + let cache_dir = assert_fs::TempDir::new()?; let venv = create_venv_py312(&temp_dir, &cache_dir); let requirements_txt = temp_dir.child("requirements.txt"); diff --git a/crates/puffin-distribution/src/index/built_wheel_index.rs b/crates/puffin-distribution/src/index/built_wheel_index.rs index 6680e4796..a5591d02d 100644 --- a/crates/puffin-distribution/src/index/built_wheel_index.rs +++ b/crates/puffin-distribution/src/index/built_wheel_index.rs @@ -1,8 +1,9 @@ -use distribution_types::CachedWheel; use platform_tags::Tags; use puffin_cache::CacheShard; use puffin_fs::directories; +use crate::index::cached_wheel::CachedWheel; + /// A local index of built distributions for a specific source distribution. pub struct BuiltWheelIndex; @@ -38,10 +39,6 @@ impl BuiltWheelIndex { continue; } - // TODO(charlie): Consider taking into account the freshness checks that we - // encode when building source distributions (e.g., timestamps). For now, we - // assume that distributions are immutable when installing (i.e., in this - // index). if let Some(existing) = candidate.as_ref() { // Override if the wheel is newer, or "more" compatible. if dist_info.filename.version > existing.filename.version diff --git a/crates/puffin-distribution/src/index/cached_wheel.rs b/crates/puffin-distribution/src/index/cached_wheel.rs new file mode 100644 index 000000000..33f9902e4 --- /dev/null +++ b/crates/puffin-distribution/src/index/cached_wheel.rs @@ -0,0 +1,45 @@ +use std::path::Path; + +use distribution_filename::WheelFilename; +use distribution_types::{CachedDirectUrlDist, CachedRegistryDist}; +use pep508_rs::VerbatimUrl; +use puffin_cache::CacheEntry; + +#[derive(Debug, Clone)] +pub struct CachedWheel { + /// The filename of the wheel. + pub filename: WheelFilename, + /// The [`CacheEntry`] for the wheel. + pub entry: CacheEntry, +} + +impl CachedWheel { + /// Try to parse a distribution from a cached directory name (like `typing-extensions-4.8.0-py3-none-any`). + pub fn from_path(path: &Path) -> Option { + let filename = path.file_name()?.to_str()?; + let filename = WheelFilename::from_stem(filename).ok()?; + if path.is_file() { + return None; + } + let entry = CacheEntry::from_path(path); + Some(Self { filename, entry }) + } + + /// Convert a [`CachedWheel`] into a [`CachedRegistryDist`]. + pub fn into_registry_dist(self) -> CachedRegistryDist { + CachedRegistryDist { + filename: self.filename, + path: self.entry.into_path_buf(), + } + } + + /// Convert a [`CachedWheel`] into a [`CachedDirectUrlDist`]. + pub fn into_url_dist(self, url: VerbatimUrl) -> CachedDirectUrlDist { + CachedDirectUrlDist { + filename: self.filename, + url, + path: self.entry.into_path_buf(), + editable: false, + } + } +} diff --git a/crates/puffin-distribution/src/index/mod.rs b/crates/puffin-distribution/src/index/mod.rs index aebeef891..c0b79595f 100644 --- a/crates/puffin-distribution/src/index/mod.rs +++ b/crates/puffin-distribution/src/index/mod.rs @@ -2,4 +2,5 @@ pub use built_wheel_index::BuiltWheelIndex; pub use registry_wheel_index::RegistryWheelIndex; mod built_wheel_index; +mod cached_wheel; mod registry_wheel_index; diff --git a/crates/puffin-distribution/src/index/registry_wheel_index.rs b/crates/puffin-distribution/src/index/registry_wheel_index.rs index 06d318f7a..72783582b 100644 --- a/crates/puffin-distribution/src/index/registry_wheel_index.rs +++ b/crates/puffin-distribution/src/index/registry_wheel_index.rs @@ -4,9 +4,8 @@ use std::path::Path; use rustc_hash::FxHashMap; -use distribution_types::{ - CachedRegistryDist, CachedWheel, FlatIndexLocation, IndexLocations, IndexUrl, -}; +use crate::index::cached_wheel::CachedWheel; +use distribution_types::{CachedRegistryDist, FlatIndexLocation, IndexLocations, IndexUrl}; use pep440_rs::Version; use platform_tags::Tags; use puffin_cache::{Cache, CacheBucket, WheelCache}; diff --git a/crates/puffin-distribution/src/source/mod.rs b/crates/puffin-distribution/src/source/mod.rs index 733c18d65..4ccdb67e2 100644 --- a/crates/puffin-distribution/src/source/mod.rs +++ b/crates/puffin-distribution/src/source/mod.rs @@ -446,30 +446,8 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> { ); // Determine the last-modified time of the source distribution. - let file_metadata = fs_err::metadata(&path_source_dist.path)?; - let modified = if file_metadata.is_file() { - // `modified()` is infallible on windows and unix (i.e., all platforms we support). - file_metadata.modified()? - } else { - if let Some(metadata) = path_source_dist - .path - .join("pyproject.toml") - .metadata() - .ok() - .filter(std::fs::Metadata::is_file) - { - metadata.modified()? - } else if let Some(metadata) = path_source_dist - .path - .join("setup.py") - .metadata() - .ok() - .filter(std::fs::Metadata::is_file) - { - metadata.modified()? - } else { - return Err(SourceDistError::DirWithoutEntrypoint); - } + let Some(modified) = puffin_cache::archive_mtime(&path_source_dist.path)? else { + return Err(SourceDistError::DirWithoutEntrypoint); }; // Read the existing metadata from the cache. diff --git a/crates/puffin-installer/src/plan.rs b/crates/puffin-installer/src/plan.rs index 2b5efad45..8979e4528 100644 --- a/crates/puffin-installer/src/plan.rs +++ b/crates/puffin-installer/src/plan.rs @@ -1,4 +1,6 @@ use std::hash::BuildHasherDefault; +use std::io; +use std::path::Path; use anyhow::{bail, Result}; use rustc_hash::FxHashSet; @@ -10,7 +12,7 @@ use distribution_types::{ }; use pep508_rs::{Requirement, VersionOrUrl}; use platform_tags::Tags; -use puffin_cache::{Cache, CacheBucket, WheelCache}; +use puffin_cache::{Cache, CacheBucket, CacheEntry, WheelCache}; use puffin_distribution::{BuiltWheelIndex, RegistryWheelIndex}; use puffin_interpreter::Virtualenv; use puffin_normalize::PackageName; @@ -149,6 +151,7 @@ impl<'a> Planner<'a> { // If the requirement comes from a direct URL, check by URL. Some(VersionOrUrl::Url(url)) => { if let InstalledDist::Url(distribution) = &distribution { + // TODO(charlie): Check freshness for path dependencies. if &distribution.url == url.raw() { debug!("Requirement already satisfied: {distribution}"); continue; @@ -241,7 +244,7 @@ impl<'a> Planner<'a> { wheel.filename.stem(), ); - if cache_entry.path().exists() { + if is_fresh(&cache_entry, &wheel.path)? { let cached_dist = CachedDirectUrlDist::from_url( wheel.filename, wheel.url, @@ -278,10 +281,12 @@ impl<'a> Planner<'a> { ); if let Some(wheel) = BuiltWheelIndex::find(&cache_shard, tags) { - let cached_dist = wheel.into_url_dist(url.clone()); - debug!("Path source requirement already cached: {cached_dist}"); - local.push(CachedDist::Url(cached_dist)); - continue; + if is_fresh(&wheel.entry, &sdist.path)? { + let cached_dist = wheel.into_url_dist(url.clone()); + debug!("Path source requirement already cached: {cached_dist}"); + local.push(CachedDist::Url(cached_dist)); + continue; + } } } Dist::Source(SourceDist::Git(sdist)) => { @@ -337,6 +342,39 @@ impl<'a> Planner<'a> { } } +/// Returns `true` if the cache entry linked to the file at the given [`Path`] is fresh. +/// +/// A cache entry is considered fresh if it exists and is newer than the file at the given path. +/// If the cache entry is stale, it will be removed from the cache. +fn is_fresh(cache_entry: &CacheEntry, artifact: &Path) -> Result { + match fs_err::metadata(cache_entry.path()).and_then(|metadata| metadata.modified()) { + Ok(cache_mtime) => { + // Determine the modification time of the wheel. + let Some(artifact_mtime) = puffin_cache::archive_mtime(artifact)? else { + // The artifact doesn't exist, so it's not fresh. + return Ok(false); + }; + if cache_mtime >= artifact_mtime { + Ok(true) + } else { + debug!( + "Removing stale built wheels for: {}", + cache_entry.path().display() + ); + if let Err(err) = fs_err::remove_dir_all(cache_entry.dir()) { + warn!("Failed to remove stale built wheel cache directory: {err}"); + } + Ok(false) + } + } + Err(err) if err.kind() == std::io::ErrorKind::NotFound => { + // The cache entry doesn't exist, so it's not fresh. + Ok(false) + } + Err(err) => Err(err), + } +} + #[derive(Debug, Default)] pub struct Plan { /// The distributions that are not already installed in the current environment, but are