mirror of
https://github.com/astral-sh/uv.git
synced 2025-08-04 02:48:17 +00:00
Introduce Cache
, CacheBucket
and CacheEntry
(#507)
This is mostly a mechanical refactor that moves 80% of our code to the
same cache abstraction.
It introduces cache `Cache`, which abstracts away the path of the cache
and the temp dir drop and is passed throughout the codebase. To get a
specific cache bucket, you need to requests your `CacheBucket` from
`Cache`. `CacheBucket` is the centralizes the names of all cache
buckets, moving them away from the string constants spread throughout
the crates.
Specifically for working with the `CachedClient`, there is a
`CacheEntry`. I'm not sure yet if that is a strict improvement over
`cache_dir: PathBuf, cache_file: String`, i may have to rotate that
later.
The interpreter cache moved into `interpreter-v0`.
We can use the `CacheBucket` page to document the cache structure in
each bucket:

This commit is contained in:
parent
3d47d2b1da
commit
5435d44756
38 changed files with 528 additions and 433 deletions
|
@ -1,7 +1,6 @@
|
|||
use std::borrow::Cow;
|
||||
use std::cmp::Reverse;
|
||||
use std::io;
|
||||
use std::path::Path;
|
||||
use std::str::FromStr;
|
||||
use std::sync::Arc;
|
||||
|
||||
|
@ -18,6 +17,7 @@ use distribution_filename::{WheelFilename, WheelFilenameError};
|
|||
use distribution_types::direct_url::DirectGitUrl;
|
||||
use distribution_types::{BuiltDist, Dist, Metadata, RemoteSource, SourceDist};
|
||||
use platform_tags::Tags;
|
||||
use puffin_cache::{Cache, CacheBucket};
|
||||
use puffin_client::RegistryClient;
|
||||
use puffin_git::GitSource;
|
||||
use puffin_traits::BuildContext;
|
||||
|
@ -31,12 +31,6 @@ use crate::{
|
|||
SourceDistError,
|
||||
};
|
||||
|
||||
// The cache subdirectory in which to store Git repositories.
|
||||
const GIT_CACHE: &str = "git-v0";
|
||||
|
||||
// The cache subdirectory in which to store downloaded wheel archives.
|
||||
const ARCHIVES_CACHE: &str = "archives-v0";
|
||||
|
||||
#[derive(Debug, Error)]
|
||||
pub enum DistributionDatabaseError {
|
||||
#[error("Failed to parse '{0}' as url")]
|
||||
|
@ -75,7 +69,7 @@ pub enum DistributionDatabaseError {
|
|||
/// This struct also has the task of acquiring locks around source dist builds in general and git
|
||||
/// operation especially.
|
||||
pub struct DistributionDatabase<'a, Context: BuildContext + Send + Sync> {
|
||||
cache: &'a Path,
|
||||
cache: &'a Cache,
|
||||
reporter: Option<Arc<dyn Reporter>>,
|
||||
locks: Arc<Locks>,
|
||||
client: &'a RegistryClient,
|
||||
|
@ -85,7 +79,7 @@ pub struct DistributionDatabase<'a, Context: BuildContext + Send + Sync> {
|
|||
|
||||
impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context> {
|
||||
pub fn new(
|
||||
cache: &'a Path,
|
||||
cache: &'a Cache,
|
||||
tags: &'a Tags,
|
||||
client: &'a RegistryClient,
|
||||
build_context: &'a Context,
|
||||
|
@ -193,7 +187,10 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context>
|
|||
|
||||
// Create a directory for the wheel.
|
||||
// TODO(konstin): Change this when the built wheel naming scheme is fixed.
|
||||
let wheel_dir = self.cache.join(ARCHIVES_CACHE).join(wheel.package_id());
|
||||
let wheel_dir = self
|
||||
.cache
|
||||
.bucket(CacheBucket::Archives)
|
||||
.join(wheel.package_id());
|
||||
fs::create_dir_all(&wheel_dir).await?;
|
||||
|
||||
// Download the wheel to a temporary file.
|
||||
|
@ -221,7 +218,10 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context>
|
|||
|
||||
// Create a directory for the wheel.
|
||||
// TODO(konstin): Change this when the built wheel naming scheme is fixed.
|
||||
let wheel_dir = self.cache.join(ARCHIVES_CACHE).join(wheel.package_id());
|
||||
let wheel_dir = self
|
||||
.cache
|
||||
.bucket(CacheBucket::Archives)
|
||||
.join(wheel.package_id());
|
||||
fs::create_dir_all(&wheel_dir).await?;
|
||||
|
||||
// Fetch the wheel.
|
||||
|
@ -315,7 +315,7 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context>
|
|||
let SourceDist::Git(source_dist) = dist else {
|
||||
return Ok(None);
|
||||
};
|
||||
let git_dir = self.cache.join(GIT_CACHE);
|
||||
let git_dir = self.build_context.cache().bucket(CacheBucket::Git);
|
||||
|
||||
let DirectGitUrl { url, subdirectory } =
|
||||
DirectGitUrl::try_from(&source_dist.url).map_err(DistributionDatabaseError::Git)?;
|
||||
|
|
|
@ -23,7 +23,8 @@ use distribution_types::direct_url::{DirectArchiveUrl, DirectGitUrl};
|
|||
use distribution_types::{Dist, GitSourceDist, Identifier, RemoteSource, SourceDist};
|
||||
use install_wheel_rs::find_dist_info;
|
||||
use platform_tags::Tags;
|
||||
use puffin_cache::{digest, CanonicalUrl, WheelMetadataCache};
|
||||
use puffin_cache::CacheBucket::BuiltWheels;
|
||||
use puffin_cache::{digest, Cache, CacheBucket, CanonicalUrl, WheelMetadataCache};
|
||||
use puffin_client::{CachedClient, CachedClientError, DataWithCachePolicy};
|
||||
use puffin_git::{Fetch, GitSource};
|
||||
use puffin_normalize::PackageName;
|
||||
|
@ -34,9 +35,6 @@ use crate::download::BuiltWheel;
|
|||
use crate::locks::LockedFile;
|
||||
use crate::{Download, Reporter, SourceDistDownload};
|
||||
|
||||
const BUILT_WHEELS_CACHE: &str = "built-wheels-v0";
|
||||
const GIT_CACHE: &str = "git-v0";
|
||||
|
||||
/// The caller is responsible for adding the source dist information to the error chain
|
||||
#[derive(Debug, Error)]
|
||||
pub enum SourceDistError {
|
||||
|
@ -51,7 +49,7 @@ pub enum SourceDistError {
|
|||
Client(#[from] puffin_client::Error),
|
||||
|
||||
// Cache writing error
|
||||
#[error(transparent)]
|
||||
#[error("Failed to write to source dist cache")]
|
||||
Io(#[from] std::io::Error),
|
||||
#[error("Cache (de)serialization failed")]
|
||||
Serde(#[from] serde_json::Error),
|
||||
|
@ -96,12 +94,12 @@ impl BuiltWheelMetadata {
|
|||
fn from_cached(
|
||||
filename: &WheelFilename,
|
||||
cached_data: &DiskFilenameAndMetadata,
|
||||
cache: &Path,
|
||||
cache: &Cache,
|
||||
source_dist: &SourceDist,
|
||||
) -> Self {
|
||||
// TODO(konstin): Change this when the built wheel naming scheme is fixed
|
||||
let wheel_dir = cache
|
||||
.join(BUILT_WHEELS_CACHE)
|
||||
.bucket(CacheBucket::BuiltWheels)
|
||||
.join(source_dist.distribution_id());
|
||||
|
||||
Self {
|
||||
|
@ -183,7 +181,7 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> {
|
|||
let wheel_dir = self
|
||||
.build_context
|
||||
.cache()
|
||||
.join(BUILT_WHEELS_CACHE)
|
||||
.bucket(CacheBucket::BuiltWheels)
|
||||
.join(source_dist.distribution_id());
|
||||
fs::create_dir_all(&wheel_dir).await?;
|
||||
|
||||
|
@ -231,11 +229,11 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> {
|
|||
cache_shard: WheelMetadataCache<'data>,
|
||||
subdirectory: Option<&'data Path>,
|
||||
) -> Result<BuiltWheelMetadata, SourceDistError> {
|
||||
let cache_dir = self
|
||||
.build_context
|
||||
.cache()
|
||||
.join(cache_shard.built_wheel_dir(filename));
|
||||
let cache_file = METADATA_JSON;
|
||||
let cache_entry = self.build_context.cache().entry(
|
||||
CacheBucket::BuiltWheelMetadata,
|
||||
cache_shard.built_wheel_dir(filename),
|
||||
METADATA_JSON.to_string(),
|
||||
);
|
||||
|
||||
let response_callback = |response| async {
|
||||
debug!("Downloading and building source distribution: {source_dist}");
|
||||
|
@ -256,7 +254,7 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> {
|
|||
reporter.on_download_progress(&Download::SourceDist(download.clone()));
|
||||
}
|
||||
|
||||
let (disk_filename, wheel_filename, metadata) = self
|
||||
let (_wheel_filename, disk_filename, wheel_filename, metadata) = self
|
||||
.build_source_dist(
|
||||
&download.dist,
|
||||
temp_dir,
|
||||
|
@ -285,7 +283,7 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> {
|
|||
let req = self.cached_client.uncached().get(url.clone()).build()?;
|
||||
let metadatas = self
|
||||
.cached_client
|
||||
.get_cached_with_callback(req, &cache_dir, cache_file, response_callback)
|
||||
.get_cached_with_callback(req, &cache_entry, response_callback)
|
||||
.await
|
||||
.map_err(|err| match err {
|
||||
CachedClientError::Callback(err) => err,
|
||||
|
@ -318,7 +316,7 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> {
|
|||
.await
|
||||
.map_err(puffin_client::Error::RequestMiddlewareError)?;
|
||||
let (temp_dir, sdist_file) = self.download_source_dist_url(response, filename).await?;
|
||||
let (disk_filename, wheel_filename, metadata) = self
|
||||
let (_wheel_dir, disk_filename, wheel_filename, metadata) = self
|
||||
.build_source_dist(source_dist, temp_dir, &sdist_file, subdirectory)
|
||||
.await
|
||||
.map_err(SourceDistError::Build)?;
|
||||
|
@ -337,7 +335,7 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> {
|
|||
// have to build a source dist next.
|
||||
// Just return if the response wasn't cacheable or there was another errors that
|
||||
// `CachedClient` already complained about
|
||||
if let Ok(cached) = fs::read(cache_dir.join(cache_file)).await {
|
||||
if let Ok(cached) = fs::read(cache_entry.path()).await {
|
||||
// If the file exists and it was just read or written by `CachedClient`, we assume it must
|
||||
// be correct.
|
||||
let mut cached = serde_json::from_slice::<DataWithCachePolicy<Metadata21s>>(&cached)?;
|
||||
|
@ -345,7 +343,7 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> {
|
|||
cached
|
||||
.data
|
||||
.insert(wheel_filename.clone(), cached_data.clone());
|
||||
fs::write(cache_file, serde_json::to_vec(&cached)?).await?;
|
||||
fs::write(cache_entry.path(), serde_json::to_vec(&cached)?).await?;
|
||||
};
|
||||
|
||||
Ok(BuiltWheelMetadata::from_cached(
|
||||
|
@ -369,22 +367,14 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> {
|
|||
.expect("Exact commit after checkout")
|
||||
.to_string();
|
||||
let cache_shard = WheelMetadataCache::Git(&git_source_dist.url);
|
||||
let cache_dir = self
|
||||
.build_context
|
||||
.cache()
|
||||
.join(cache_shard.built_wheel_dir(&git_sha));
|
||||
let cache_file = cache_dir.join(METADATA_JSON);
|
||||
let cache_entry = self.build_context.cache().entry(
|
||||
CacheBucket::BuiltWheelMetadata,
|
||||
cache_shard.built_wheel_dir(&git_sha),
|
||||
METADATA_JSON.to_string(),
|
||||
);
|
||||
|
||||
// TODO(konstin): Change this when the built wheel naming scheme is fixed.
|
||||
let wheel_dir = self
|
||||
.build_context
|
||||
.cache()
|
||||
.join(BUILT_WHEELS_CACHE)
|
||||
.join(git_source_dist.distribution_id());
|
||||
fs::create_dir_all(&wheel_dir).await?;
|
||||
|
||||
let mut metadatas = if cache_file.is_file() {
|
||||
let cached = fs::read(&cache_file).await?;
|
||||
let mut metadatas = if cache_entry.path().is_file() {
|
||||
let cached = fs::read(&cache_entry.path()).await?;
|
||||
let metadatas = serde_json::from_slice::<Metadata21s>(&cached)?;
|
||||
// Do we have previous compatible build of this source dist?
|
||||
if let Some((filename, cached_data)) = metadatas
|
||||
|
@ -408,7 +398,7 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> {
|
|||
.as_ref()
|
||||
.map(|reporter| reporter.on_build_start(source_dist));
|
||||
|
||||
let (disk_filename, filename, metadata) = self
|
||||
let (wheel_dir, disk_filename, filename, metadata) = self
|
||||
.build_source_dist(source_dist, None, fetch.path(), subdirectory.as_deref())
|
||||
.await
|
||||
.map_err(SourceDistError::Build)?;
|
||||
|
@ -429,8 +419,8 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> {
|
|||
},
|
||||
);
|
||||
let cached = serde_json::to_vec(&metadatas)?;
|
||||
fs::create_dir_all(cache_dir).await?;
|
||||
fs::write(cache_file, cached).await?;
|
||||
fs::create_dir_all(&cache_entry.dir).await?;
|
||||
fs::write(cache_entry.path(), cached).await?;
|
||||
|
||||
if let Some(task) = task {
|
||||
if let Some(reporter) = self.reporter.as_ref() {
|
||||
|
@ -457,7 +447,9 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> {
|
|||
let mut reader = tokio::io::BufReader::new(reader.compat());
|
||||
|
||||
// Download the source distribution.
|
||||
let temp_dir = tempfile::tempdir_in(self.build_context.cache())?;
|
||||
let cache_dir = self.build_context.cache().bucket(BuiltWheels);
|
||||
fs::create_dir_all(&cache_dir).await?;
|
||||
let temp_dir = tempfile::tempdir_in(cache_dir)?;
|
||||
let sdist_file = temp_dir.path().join(source_dist_filename);
|
||||
let mut writer = tokio::fs::File::create(&sdist_file).await?;
|
||||
tokio::io::copy(&mut reader, &mut writer).await?;
|
||||
|
@ -469,7 +461,7 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> {
|
|||
url: &Url,
|
||||
) -> Result<(Fetch, Option<PathBuf>), SourceDistError> {
|
||||
debug!("Fetching source distribution from Git: {url}");
|
||||
let git_dir = self.build_context.cache().join(GIT_CACHE);
|
||||
let git_dir = self.build_context.cache().bucket(CacheBucket::Git);
|
||||
|
||||
// Avoid races between different processes, too.
|
||||
let locks_dir = git_dir.join("locks");
|
||||
|
@ -499,7 +491,7 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> {
|
|||
temp_dir: Option<TempDir>,
|
||||
source_dist: &Path,
|
||||
subdirectory: Option<&Path>,
|
||||
) -> anyhow::Result<(String, WheelFilename, Metadata21)> {
|
||||
) -> anyhow::Result<(PathBuf, String, WheelFilename, Metadata21)> {
|
||||
debug!("Building: {dist}");
|
||||
|
||||
if self.build_context.no_build() {
|
||||
|
@ -510,7 +502,7 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> {
|
|||
let wheel_dir = self
|
||||
.build_context
|
||||
.cache()
|
||||
.join(BUILT_WHEELS_CACHE)
|
||||
.bucket(CacheBucket::BuiltWheels)
|
||||
.join(dist.distribution_id());
|
||||
fs::create_dir_all(&wheel_dir).await?;
|
||||
|
||||
|
@ -535,7 +527,7 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> {
|
|||
let metadata = Metadata21::parse(dist_info.as_bytes())?;
|
||||
|
||||
debug!("Finished building: {dist}");
|
||||
Ok((disk_filename, filename, metadata))
|
||||
Ok((wheel_dir, disk_filename, filename, metadata))
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue