From 738e8341e25eb63db6f8a13127944593a4c15866 Mon Sep 17 00:00:00 2001 From: Charlie Marsh Date: Wed, 24 Jan 2024 11:21:31 -0800 Subject: [PATCH] Use a consistent `Timestamp` struct (#1081) ## Summary This PR uses `ctime` consistently on Unix as a more conservative approach to change detection. It also ensures that our timestamp abstraction is entirely internal, so we can change the representation and logic easily across the codebase in the future. --- crates/puffin-cache/src/by_timestamp.rs | 4 +- crates/puffin-cache/src/lib.rs | 26 +++++----- crates/puffin-cache/src/timestamp.rs | 46 ++++++++++++++++ .../src/distribution_database.rs | 10 ++-- crates/puffin-distribution/src/source/mod.rs | 3 +- crates/puffin-installer/src/plan.rs | 18 ++++--- crates/puffin-interpreter/src/interpreter.rs | 52 ++----------------- 7 files changed, 83 insertions(+), 76 deletions(-) create mode 100644 crates/puffin-cache/src/timestamp.rs diff --git a/crates/puffin-cache/src/by_timestamp.rs b/crates/puffin-cache/src/by_timestamp.rs index 5ab2368b4..2a02d70a5 100644 --- a/crates/puffin-cache/src/by_timestamp.rs +++ b/crates/puffin-cache/src/by_timestamp.rs @@ -1,7 +1,9 @@ use serde::{Deserialize, Serialize}; +use crate::timestamp::Timestamp; + #[derive(Deserialize, Serialize)] -pub struct CachedByTimestamp { +pub struct CachedByTimestamp { pub timestamp: Timestamp, pub data: Data, } diff --git a/crates/puffin-cache/src/lib.rs b/crates/puffin-cache/src/lib.rs index a867d8d9a..bae42f14c 100644 --- a/crates/puffin-cache/src/lib.rs +++ b/crates/puffin-cache/src/lib.rs @@ -4,7 +4,6 @@ use std::io::Write; use std::ops::Deref; use std::path::{Path, PathBuf}; use std::sync::Arc; -use std::time::SystemTime; use fs_err as fs; use tempfile::{tempdir, TempDir}; @@ -16,11 +15,13 @@ use puffin_normalize::PackageName; pub use crate::by_timestamp::CachedByTimestamp; #[cfg(feature = "clap")] pub use crate::cli::CacheArgs; +pub use crate::timestamp::Timestamp; pub use crate::wheel::WheelCache; use crate::wheel::WheelCacheKind; mod by_timestamp; mod cli; +mod timestamp; mod wheel; /// A [`CacheEntry`] which may or may not exist yet. @@ -180,7 +181,7 @@ impl Cache { match fs::metadata(entry.path()) { Ok(metadata) => { - if metadata.modified()? >= *timestamp { + if Timestamp::from_metadata(&metadata) >= *timestamp { Ok(Freshness::Fresh) } else { Ok(Freshness::Stale) @@ -631,10 +632,10 @@ impl Display for CacheBucket { #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum ArchiveTimestamp { /// The archive consists of a single file with the given modification time. - Exact(SystemTime), + Exact(Timestamp), /// The archive consists of a directory. The modification time is the latest modification time /// of the `pyproject.toml` or `setup.py` file in the directory. - Approximate(SystemTime), + Approximate(Timestamp), } impl ArchiveTimestamp { @@ -646,8 +647,7 @@ impl ArchiveTimestamp { pub fn from_path(path: impl AsRef) -> Result, io::Error> { let metadata = fs_err::metadata(path.as_ref())?; if metadata.is_file() { - // `modified()` is infallible on Windows and Unix (i.e., all platforms we support). - Ok(Some(Self::Exact(metadata.modified()?))) + Ok(Some(Self::Exact(Timestamp::from_metadata(&metadata)))) } else { if let Some(metadata) = path .as_ref() @@ -656,7 +656,7 @@ impl ArchiveTimestamp { .ok() .filter(std::fs::Metadata::is_file) { - Ok(Some(Self::Approximate(metadata.modified()?))) + Ok(Some(Self::Approximate(Timestamp::from_metadata(&metadata)))) } else if let Some(metadata) = path .as_ref() .join("setup.py") @@ -664,7 +664,7 @@ impl ArchiveTimestamp { .ok() .filter(std::fs::Metadata::is_file) { - Ok(Some(Self::Approximate(metadata.modified()?))) + Ok(Some(Self::Approximate(Timestamp::from_metadata(&metadata)))) } else { Ok(None) } @@ -672,7 +672,7 @@ impl ArchiveTimestamp { } /// Return the modification timestamp for an archive. - pub fn timestamp(&self) -> SystemTime { + pub fn timestamp(&self) -> Timestamp { match self { Self::Exact(timestamp) => *timestamp, Self::Approximate(timestamp) => *timestamp, @@ -706,18 +706,18 @@ pub enum Refresh { /// Don't refresh any entries. None, /// Refresh entries linked to the given packages, if created before the given timestamp. - Packages(Vec, SystemTime), + Packages(Vec, Timestamp), /// Refresh all entries created before the given timestamp. - All(SystemTime), + All(Timestamp), } impl Refresh { /// Determine the refresh strategy to use based on the command-line arguments. pub fn from_args(refresh: bool, refresh_package: Vec) -> Self { if refresh { - Self::All(SystemTime::now()) + Self::All(Timestamp::now()) } else if !refresh_package.is_empty() { - Self::Packages(refresh_package, SystemTime::now()) + Self::Packages(refresh_package, Timestamp::now()) } else { Self::None } diff --git a/crates/puffin-cache/src/timestamp.rs b/crates/puffin-cache/src/timestamp.rs new file mode 100644 index 000000000..e5ffc708c --- /dev/null +++ b/crates/puffin-cache/src/timestamp.rs @@ -0,0 +1,46 @@ +use serde::{Deserialize, Serialize}; +use std::path::Path; + +/// A timestamp used to measure changes to a file. +/// +/// On Unix, this uses `ctime` as a conservative approach. `ctime` should detect all +/// modifications, including some that we don't care about, like hardlink modifications. +/// On other platforms, it uses `mtime`. +/// +/// See: +/// See: +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Deserialize, Serialize)] +pub struct Timestamp(std::time::SystemTime); + +impl Timestamp { + /// Return the [`Timestamp`] for the given path. + pub fn from_path(path: impl AsRef) -> std::io::Result { + let metadata = path.as_ref().metadata()?; + Ok(Self::from_metadata(&metadata)) + } + + /// Return the [`Timestamp`] for the given metadata. + pub fn from_metadata(metadata: &std::fs::Metadata) -> Self { + #[cfg(unix)] + { + use std::os::unix::fs::MetadataExt; + + let ctime = u64::try_from(metadata.ctime()).expect("ctime to be representable as u64"); + let ctime_nsec = u32::try_from(metadata.ctime_nsec()) + .expect("ctime_nsec to be representable as u32"); + let duration = std::time::Duration::new(ctime, ctime_nsec); + Self(std::time::UNIX_EPOCH + duration) + } + + #[cfg(not(unix))] + { + let modified = metadata.modified().expect("modified time to be available"); + Self(modified) + } + } + + /// Return the current [`Timestamp`]. + pub fn now() -> Self { + Self(std::time::SystemTime::now()) + } +} diff --git a/crates/puffin-distribution/src/distribution_database.rs b/crates/puffin-distribution/src/distribution_database.rs index 15a389dcc..9aebca702 100644 --- a/crates/puffin-distribution/src/distribution_database.rs +++ b/crates/puffin-distribution/src/distribution_database.rs @@ -14,7 +14,7 @@ use distribution_types::{ BuiltDist, DirectGitUrl, Dist, FileLocation, LocalEditable, Name, SourceDist, }; use platform_tags::Tags; -use puffin_cache::{Cache, CacheBucket, WheelCache}; +use puffin_cache::{Cache, CacheBucket, Timestamp, WheelCache}; use puffin_client::{CacheControl, CachedClientError, RegistryClient}; use puffin_extract::unzip_no_seek; use puffin_fs::metadata_if_exists; @@ -138,7 +138,9 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context> metadata_if_exists(cache_entry.path())?, metadata_if_exists(path)?, ) { - if cache_metadata.modified()? > path_metadata.modified()? { + let cache_modified = Timestamp::from_metadata(&cache_metadata); + let path_modified = Timestamp::from_metadata(&path_metadata); + if cache_modified >= path_modified { return Ok(LocalWheel::Unzipped(UnzippedWheel { dist: dist.clone(), target: cache_entry.into_path_buf(), @@ -278,7 +280,9 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context> metadata_if_exists(cache_entry.path())?, metadata_if_exists(&wheel.path)?, ) { - if cache_metadata.modified()? > path_metadata.modified()? { + let cache_modified = Timestamp::from_metadata(&cache_metadata); + let path_modified = Timestamp::from_metadata(&path_metadata); + if cache_modified >= path_modified { return Ok(LocalWheel::Unzipped(UnzippedWheel { dist: dist.clone(), target: cache_entry.into_path_buf(), diff --git a/crates/puffin-distribution/src/source/mod.rs b/crates/puffin-distribution/src/source/mod.rs index 0a02f6643..ae48c8d85 100644 --- a/crates/puffin-distribution/src/source/mod.rs +++ b/crates/puffin-distribution/src/source/mod.rs @@ -3,7 +3,6 @@ use std::path::{Path, PathBuf}; use std::str::FromStr; use std::sync::Arc; -use std::time::SystemTime; use anyhow::Result; use fs_err::tokio as fs; @@ -950,7 +949,7 @@ pub(crate) fn read_timestamp_manifest( // If the cache entry is up-to-date, return it. match std::fs::read(cache_entry.path()) { Ok(cached) => { - let cached = rmp_serde::from_slice::>(&cached)?; + let cached = rmp_serde::from_slice::>(&cached)?; if cached.timestamp == modified.timestamp() { return Ok(Some(cached.data)); } diff --git a/crates/puffin-installer/src/plan.rs b/crates/puffin-installer/src/plan.rs index ab15fa025..66c1bcd34 100644 --- a/crates/puffin-installer/src/plan.rs +++ b/crates/puffin-installer/src/plan.rs @@ -12,7 +12,9 @@ use distribution_types::{ }; use pep508_rs::{Requirement, VersionOrUrl}; use platform_tags::Tags; -use puffin_cache::{ArchiveTimestamp, Cache, CacheBucket, CacheEntry, Freshness, WheelCache}; +use puffin_cache::{ + ArchiveTimestamp, Cache, CacheBucket, CacheEntry, Freshness, Timestamp, WheelCache, +}; use puffin_distribution::{BuiltWheelIndex, RegistryWheelIndex}; use puffin_interpreter::Virtualenv; use puffin_normalize::PackageName; @@ -373,11 +375,11 @@ impl<'a> Planner<'a> { /// /// A cache entry is not modified if it exists and is newer than the file at the given path. fn not_modified_cache(cache_entry: &CacheEntry, artifact: &Path) -> Result { - match fs_err::metadata(cache_entry.path()).and_then(|metadata| metadata.modified()) { - Ok(cache_mtime) => { + match fs_err::metadata(cache_entry.path()).map(|metadata| Timestamp::from_metadata(&metadata)) { + Ok(cache_timestamp) => { // Determine the modification time of the wheel. - if let Some(artifact_mtime) = ArchiveTimestamp::from_path(artifact)? { - Ok(cache_mtime >= artifact_mtime.timestamp()) + if let Some(artifact_timestamp) = ArchiveTimestamp::from_path(artifact)? { + Ok(cache_timestamp >= artifact_timestamp.timestamp()) } else { // The artifact doesn't exist, so it's not fresh. Ok(false) @@ -396,11 +398,11 @@ fn not_modified_cache(cache_entry: &CacheEntry, artifact: &Path) -> Result Result { // Determine the modification time of the installed distribution. let dist_metadata = fs_err::metadata(&dist.path)?; - let dist_mtime = dist_metadata.modified()?; + let dist_timestamp = Timestamp::from_metadata(&dist_metadata); // Determine the modification time of the wheel. - if let Some(artifact_mtime) = ArchiveTimestamp::from_path(artifact)? { - Ok(dist_mtime >= artifact_mtime.timestamp()) + if let Some(artifact_timestamp) = ArchiveTimestamp::from_path(artifact)? { + Ok(dist_timestamp >= artifact_timestamp.timestamp()) } else { // The artifact doesn't exist, so it's not fresh. Ok(false) diff --git a/crates/puffin-interpreter/src/interpreter.rs b/crates/puffin-interpreter/src/interpreter.rs index 9869594d6..9ecc661d1 100644 --- a/crates/puffin-interpreter/src/interpreter.rs +++ b/crates/puffin-interpreter/src/interpreter.rs @@ -11,7 +11,7 @@ use pep440_rs::Version; use pep508_rs::MarkerEnvironment; use platform_host::Platform; use platform_tags::{Tags, TagsError}; -use puffin_cache::{Cache, CacheBucket, CachedByTimestamp, Freshness}; +use puffin_cache::{Cache, CacheBucket, CachedByTimestamp, Freshness, Timestamp}; use puffin_fs::write_atomic_sync; use crate::python_platform::PythonPlatform; @@ -355,7 +355,7 @@ impl InterpreterQueryResult { format!("{}.msgpack", digest(&executable_bytes)), ); - let modified = Timestamp::from_path(fs_err::canonicalize(executable)?.as_ref())?; + let modified = Timestamp::from_path(fs_err::canonicalize(executable)?)?; // Read from the cache. if cache @@ -363,7 +363,7 @@ impl InterpreterQueryResult { .is_ok_and(Freshness::is_fresh) { if let Ok(data) = fs::read(cache_entry.path()) { - match rmp_serde::from_slice::>(&data) { + match rmp_serde::from_slice::>(&data) { Ok(cached) => { if cached.timestamp == modified { debug!("Using cached markers for: {}", executable.display()); @@ -407,52 +407,6 @@ impl InterpreterQueryResult { } } -#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Deserialize, Serialize)] -enum Timestamp { - // On Unix, use `ctime` and `ctime_nsec`. - Unix(i64, i64), - // On Windows, use `last_write_time`. - Windows(u64), - // On other platforms, use Rust's modified time. - Generic(std::time::SystemTime), -} - -impl Timestamp { - /// Return the [`Timestamp`] for the given path. - /// - /// On Unix, this uses `ctime` as a conservative approach. `ctime` should detect all - /// modifications, including some that we don't care about, like hardlink modifications. - /// On other platforms, it uses `mtime`. - fn from_path(path: &Path) -> Result { - #[cfg(unix)] - { - use std::os::unix::fs::MetadataExt; - - let metadata = path.metadata()?; - let ctime = metadata.ctime(); - let ctime_nsec = metadata.ctime_nsec(); - - Ok(Self::Unix(ctime, ctime_nsec)) - } - - #[cfg(windows)] - { - use std::os::windows::fs::MetadataExt; - - let metadata = path.metadata()?; - let modified = metadata.last_write_time(); - Ok(Self::Windows(modified)) - } - - #[cfg(not(any(unix, windows)))] - { - let metadata = path.metadata()?; - let modified = metadata.modified()?; - Ok(Self::Generic(modified)) - } - } -} - #[cfg(unix)] #[cfg(test)] mod tests {