mirror of
https://github.com/astral-sh/uv.git
synced 2025-08-04 19:08:04 +00:00
Improve path source dist caching (#578)
Path distribution cache reading errors are no longer fatal. We now invalidate the path file source dists if its modification timestamp changed, and invalidate path dir source dists if `pyproject.toml` or alternatively `setup.py` changed, which seems good choices since changing pyproject.toml should trigger a rebuild and the user can `touch` the file as part of their workflow. `CachedByTimestamp` is now a shared util. It doesn't have methods as i don't think it's worth it yet for two users. Closes #478 TODO(konstin): Write a test. This is probably twice as much work as that fix itself, so i made that PR without one for now.
This commit is contained in:
parent
1bf754556f
commit
3f4d7b7826
6 changed files with 90 additions and 36 deletions
|
@ -22,5 +22,7 @@ directories = { workspace = true }
|
|||
fs-err = { workspace = true, features = ["tokio"] }
|
||||
hex = { workspace = true }
|
||||
seahash = { workspace = true }
|
||||
serde = { workspace = true, features = ["derive"] }
|
||||
serde_json = { workspace = true }
|
||||
tempfile = { workspace = true }
|
||||
url = { workspace = true }
|
||||
|
|
9
crates/puffin-cache/src/by_timestamp.rs
Normal file
9
crates/puffin-cache/src/by_timestamp.rs
Normal file
|
@ -0,0 +1,9 @@
|
|||
use std::time::SystemTime;
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
#[derive(Deserialize, Serialize)]
|
||||
pub struct CachedByTimestamp<T> {
|
||||
pub timestamp: SystemTime,
|
||||
pub data: T,
|
||||
}
|
|
@ -7,6 +7,7 @@ use std::sync::Arc;
|
|||
use fs_err as fs;
|
||||
use tempfile::{tempdir, TempDir};
|
||||
|
||||
pub use by_timestamp::CachedByTimestamp;
|
||||
pub use canonical_url::{CanonicalUrl, RepositoryUrl};
|
||||
#[cfg(feature = "clap")]
|
||||
pub use cli::CacheArgs;
|
||||
|
@ -14,6 +15,7 @@ pub use digest::digest;
|
|||
pub use stable_hash::{StableHash, StableHasher};
|
||||
pub use wheel::WheelCache;
|
||||
|
||||
mod by_timestamp;
|
||||
mod cache_key;
|
||||
mod canonical_url;
|
||||
mod cli;
|
||||
|
|
|
@ -14,7 +14,7 @@ use tempfile::TempDir;
|
|||
use thiserror::Error;
|
||||
use tokio::task::JoinError;
|
||||
use tokio_util::compat::FuturesAsyncReadCompatExt;
|
||||
use tracing::{debug, info_span};
|
||||
use tracing::{debug, info_span, warn};
|
||||
use url::Url;
|
||||
use zip::result::ZipError;
|
||||
use zip::ZipArchive;
|
||||
|
@ -24,7 +24,7 @@ use distribution_types::direct_url::{DirectArchiveUrl, DirectGitUrl};
|
|||
use distribution_types::{GitSourceDist, Metadata, PathSourceDist, RemoteSource, SourceDist};
|
||||
use install_wheel_rs::read_dist_info;
|
||||
use platform_tags::Tags;
|
||||
use puffin_cache::{digest, CacheBucket, CacheEntry, CanonicalUrl, WheelCache};
|
||||
use puffin_cache::{digest, CacheBucket, CacheEntry, CachedByTimestamp, CanonicalUrl, WheelCache};
|
||||
use puffin_client::{CachedClient, CachedClientError, DataWithCachePolicy};
|
||||
use puffin_fs::write_atomic;
|
||||
use puffin_git::{Fetch, GitSource};
|
||||
|
@ -73,6 +73,8 @@ pub enum SourceDistError {
|
|||
DistInfo(#[from] install_wheel_rs::Error),
|
||||
#[error("Failed to read zip archive from built wheel")]
|
||||
Zip(#[from] ZipError),
|
||||
#[error("Source distribution directory contains neither readable pyproject.toml nor setup.py")]
|
||||
DirWithoutEntrypoint,
|
||||
|
||||
/// Should not occur; only seen when another task panicked.
|
||||
#[error("The task executor is broken, did some other task panic?")]
|
||||
|
@ -350,21 +352,60 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> {
|
|||
METADATA_JSON.to_string(),
|
||||
);
|
||||
|
||||
let mut metadatas = if cache_entry.path().is_file() {
|
||||
let cached = fs::read(&cache_entry.path()).await?;
|
||||
let metadatas = serde_json::from_slice::<Metadata21s>(&cached)?;
|
||||
// Do we have previous compatible build of this source dist?
|
||||
if let Some((filename, cached_data)) = metadatas
|
||||
.iter()
|
||||
.find(|(filename, _metadata)| filename.is_compatible(self.tags))
|
||||
{
|
||||
return Ok(BuiltWheelMetadata::from_cached(
|
||||
filename,
|
||||
cached_data,
|
||||
&cache_entry,
|
||||
));
|
||||
let file_metadata = fs_err::metadata(&path_source_dist.path)?;
|
||||
// `modified()` is infallible on windows and unix (i.e., all platforms we support).
|
||||
let modified = if file_metadata.is_file() {
|
||||
file_metadata.modified()?
|
||||
} else {
|
||||
if path_source_dist.path.join("pyproject.toml").is_file() {
|
||||
path_source_dist
|
||||
.path
|
||||
.join("pyproject.toml")
|
||||
.metadata()?
|
||||
.modified()?
|
||||
} else if path_source_dist.path.join("setup.py").is_file() {
|
||||
path_source_dist
|
||||
.path
|
||||
.join("setup.py")
|
||||
.metadata()?
|
||||
.modified()?
|
||||
} else {
|
||||
return Err(SourceDistError::DirWithoutEntrypoint);
|
||||
}
|
||||
};
|
||||
|
||||
let mut metadatas = if cache_entry.path().is_file() {
|
||||
let cached = fs::read(&cache_entry.path()).await.ok().and_then(|cached| {
|
||||
serde_json::from_slice::<CachedByTimestamp<Metadata21s>>(&cached).ok()
|
||||
});
|
||||
if let Some(cached) = cached {
|
||||
if cached.timestamp == modified {
|
||||
// Do we have previous compatible build of this source dist?
|
||||
if let Some((filename, cached_data)) = cached
|
||||
.data
|
||||
.iter()
|
||||
.find(|(filename, _metadata)| filename.is_compatible(self.tags))
|
||||
{
|
||||
return Ok(BuiltWheelMetadata::from_cached(
|
||||
filename,
|
||||
cached_data,
|
||||
&cache_entry,
|
||||
));
|
||||
}
|
||||
cached.data
|
||||
} else {
|
||||
debug!(
|
||||
"Removing stale built wheels for: {}",
|
||||
cache_entry.path().display()
|
||||
);
|
||||
if let Err(err) = fs::remove_dir_all(&cache_entry.dir).await {
|
||||
warn!("Failed to remove stale built wheel cache directory: {err}");
|
||||
}
|
||||
Metadata21s::default()
|
||||
}
|
||||
} else {
|
||||
Metadata21s::default()
|
||||
}
|
||||
metadatas
|
||||
} else {
|
||||
Metadata21s::default()
|
||||
};
|
||||
|
@ -399,8 +440,12 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> {
|
|||
metadata: metadata.clone(),
|
||||
},
|
||||
);
|
||||
let cached = CachedByTimestamp {
|
||||
timestamp: modified,
|
||||
data: metadatas,
|
||||
};
|
||||
fs::create_dir_all(&cache_entry.dir).await?;
|
||||
let data = serde_json::to_vec(&metadatas)?;
|
||||
let data = serde_json::to_vec(&cached)?;
|
||||
write_atomic(cache_entry.path(), data).await?;
|
||||
|
||||
if let Some(task) = task {
|
||||
|
|
|
@ -1,6 +1,5 @@
|
|||
use std::path::{Path, PathBuf};
|
||||
use std::process::Command;
|
||||
use std::time::UNIX_EPOCH;
|
||||
|
||||
use fs_err as fs;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
@ -9,6 +8,7 @@ use tracing::debug;
|
|||
use pep440_rs::Version;
|
||||
use pep508_rs::MarkerEnvironment;
|
||||
use platform_host::Platform;
|
||||
use puffin_cache::CachedByTimestamp;
|
||||
use puffin_cache::{digest, Cache, CacheBucket};
|
||||
use puffin_fs::write_atomic_sync;
|
||||
|
||||
|
@ -106,12 +106,6 @@ pub(crate) struct InterpreterQueryResult {
|
|||
pub(crate) sys_executable: PathBuf,
|
||||
}
|
||||
|
||||
#[derive(Deserialize, Serialize)]
|
||||
pub(crate) struct CachedByTimestamp<T> {
|
||||
pub(crate) timestamp: u128,
|
||||
pub(crate) data: T,
|
||||
}
|
||||
|
||||
impl InterpreterQueryResult {
|
||||
/// Return the resolved [`InterpreterQueryResult`] for the given Python executable.
|
||||
pub(crate) fn query(interpreter: &Path) -> Result<Self, Error> {
|
||||
|
@ -158,19 +152,19 @@ impl InterpreterQueryResult {
|
|||
/// time as a cache key.
|
||||
pub(crate) fn query_cached(executable: &Path, cache: &Cache) -> Result<Self, Error> {
|
||||
let executable_bytes = executable.as_os_str().as_encoded_bytes();
|
||||
let cache_dir = cache.bucket(CacheBucket::Interpreter);
|
||||
let cache_path = cache_dir.join(format!("{}.json", digest(&executable_bytes)));
|
||||
let cache_entry = cache.entry(
|
||||
CacheBucket::Interpreter,
|
||||
"",
|
||||
format!("{}.json", digest(&executable_bytes)),
|
||||
);
|
||||
|
||||
let modified = fs_err::metadata(executable)?
|
||||
// Note: This is infallible on windows and unix (i.e., all platforms we support).
|
||||
.modified()?
|
||||
.duration_since(UNIX_EPOCH)
|
||||
.map_err(|err| Error::SystemTime(executable.to_path_buf(), err))?;
|
||||
// `modified()` is infallible on windows and unix (i.e., all platforms we support).
|
||||
let modified = fs_err::metadata(executable)?.modified()?;
|
||||
|
||||
// Read from the cache.
|
||||
if let Ok(data) = fs::read(&cache_path) {
|
||||
if let Ok(data) = fs::read(cache_entry.path()) {
|
||||
if let Ok(cached) = serde_json::from_slice::<CachedByTimestamp<Self>>(&data) {
|
||||
if cached.timestamp == modified.as_millis() {
|
||||
if cached.timestamp == modified {
|
||||
debug!("Using cached markers for: {}", executable.display());
|
||||
return Ok(cached.data);
|
||||
}
|
||||
|
@ -189,12 +183,12 @@ impl InterpreterQueryResult {
|
|||
// If `executable` is a pyenv shim, a bash script that redirects to the activated
|
||||
// python executable at another path, we're not allowed to cache the interpreter info
|
||||
if executable == info.sys_executable {
|
||||
fs::create_dir_all(cache_dir)?;
|
||||
fs::create_dir_all(&cache_entry.dir)?;
|
||||
// Write to the cache.
|
||||
write_atomic_sync(
|
||||
cache_path,
|
||||
cache_entry.path(),
|
||||
serde_json::to_vec(&CachedByTimestamp {
|
||||
timestamp: modified.as_millis(),
|
||||
timestamp: modified,
|
||||
data: info.clone(),
|
||||
})?,
|
||||
)?;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue