Split puffin-cache into Puffin-specific and generic utilities (#728)

This crate started off as generic caching utilities, but we started
adding a lot of Puffin-specific stuff (like the cache buckets
abstraction that knows about Git vs. direct URL vs. indexes and so on).
This PR moves the generic stuff into a new `cache-key` crate.
This commit is contained in:
Charlie Marsh 2023-12-25 09:38:56 -05:00 committed by GitHub
parent 4acf02f6b3
commit 6ff21374dc
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
27 changed files with 83 additions and 50 deletions

3
.gitignore vendored
View file

@ -9,9 +9,6 @@ target/
# MSVC Windows builds of rustc generate these, which store debugging information
*.pdb
# Use e.g. `--cache-dir cache-docker` to keep a cache across container invocations
cache-*
# Python tmp files
__pycache__

20
Cargo.lock generated
View file

@ -391,6 +391,15 @@ version = "1.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a3e368af43e418a04d52505cf3dbc23dda4e3407ae2fa99fd0e4f308ce546acc"
[[package]]
name = "cache-key"
version = "0.0.1"
dependencies = [
"hex",
"seahash",
"url",
]
[[package]]
name = "cachedir"
version = "0.3.1"
@ -847,11 +856,11 @@ name = "distribution-types"
version = "0.0.1"
dependencies = [
"anyhow",
"cache-key",
"distribution-filename",
"fs-err",
"pep440_rs 0.3.12",
"pep508_rs",
"puffin-cache",
"puffin-git",
"puffin-normalize",
"pypi-types",
@ -2291,15 +2300,14 @@ dependencies = [
name = "puffin-cache"
version = "0.0.1"
dependencies = [
"cache-key",
"cachedir",
"clap",
"directories",
"fs-err",
"hex",
"puffin-fs",
"puffin-normalize",
"pypi-types",
"seahash",
"serde",
"tempfile",
"tracing",
@ -2375,6 +2383,7 @@ dependencies = [
"anyhow",
"async_http_range_reader",
"async_zip",
"cache-key",
"distribution-filename",
"distribution-types",
"fs-err",
@ -2483,6 +2492,7 @@ version = "0.0.1"
dependencies = [
"anyhow",
"bytesize",
"cache-key",
"distribution-filename",
"distribution-types",
"fs-err",
@ -2541,12 +2551,12 @@ name = "puffin-git"
version = "0.0.1"
dependencies = [
"anyhow",
"cache-key",
"cargo-util",
"git2",
"glob",
"hex",
"once_cell",
"puffin-cache",
"rand",
"reqwest",
"serde",
@ -2593,6 +2603,7 @@ dependencies = [
name = "puffin-interpreter"
version = "0.0.1"
dependencies = [
"cache-key",
"fs-err",
"indoc",
"once_cell",
@ -2624,6 +2635,7 @@ version = "0.0.1"
dependencies = [
"anyhow",
"bitflags 2.4.1",
"cache-key",
"chrono",
"clap",
"colored",

View file

@ -4,6 +4,10 @@
Functionality for benchmarking Puffin.
## [cache-key](./cache-key)
Generic functionality for caching paths, URLs, and other resources across platforms.
## [distribution-filename](./distribution-filename)
Parse built distribution (wheel) and source distribution (sdist) filenames to extract structured

View file

@ -0,0 +1,19 @@
[package]
name = "cache-key"
version = "0.0.1"
description = "Generic functionality for caching paths, URLs, and other resources across platforms."
edition = { workspace = true }
rust-version = { workspace = true }
homepage = { workspace = true }
documentation = { workspace = true }
repository = { workspace = true }
authors = { workspace = true }
license = { workspace = true }
[lints]
workspace = true
[dependencies]
hex = { workspace = true }
seahash = { workspace = true }
url = { workspace = true }

View file

@ -0,0 +1,8 @@
pub use canonical_url::{CanonicalUrl, RepositoryUrl};
pub use digest::digest;
pub use stable_hash::{StableHash, StableHasher};
mod cache_key;
mod canonical_url;
mod digest;
mod stable_hash;

View file

@ -13,10 +13,10 @@ license = { workspace = true }
workspace = true
[dependencies]
cache-key = { path = "../cache-key" }
distribution-filename = { path = "../distribution-filename" }
pep440_rs = { path = "../pep440-rs" }
pep508_rs = { path = "../pep508-rs" }
puffin-cache = { path = "../puffin-cache" }
puffin-git = { path = "../puffin-git" }
puffin-normalize = { path = "../puffin-normalize" }
pypi-types = { path = "../pypi-types" }

View file

@ -639,13 +639,11 @@ impl RemoteSource for Dist {
impl Identifier for Url {
fn distribution_id(&self) -> DistributionId {
DistributionId::new(puffin_cache::digest(&puffin_cache::CanonicalUrl::new(self)))
DistributionId::new(cache_key::digest(&cache_key::CanonicalUrl::new(self)))
}
fn resource_id(&self) -> ResourceId {
ResourceId::new(puffin_cache::digest(&puffin_cache::RepositoryUrl::new(
self,
)))
ResourceId::new(cache_key::digest(&cache_key::RepositoryUrl::new(self)))
}
}
@ -661,11 +659,11 @@ impl Identifier for File {
impl Identifier for Path {
fn distribution_id(&self) -> DistributionId {
DistributionId::new(puffin_cache::digest(&self))
DistributionId::new(cache_key::digest(&self))
}
fn resource_id(&self) -> ResourceId {
ResourceId::new(puffin_cache::digest(&self))
ResourceId::new(cache_key::digest(&self))
}
}

View file

@ -3,7 +3,6 @@ use std::borrow::Cow;
use anyhow::Result;
use pep508_rs::VerbatimUrl;
use puffin_cache::CanonicalUrl;
use puffin_normalize::PackageName;
use crate::error::Error;
@ -38,7 +37,7 @@ pub trait DistributionMetadata: Name {
// `version` is normalized by its `ToString` impl
format!("{}-{}", self.name().as_dist_info_name(), version)
}
VersionOrUrl::Url(url) => puffin_cache::digest(&CanonicalUrl::new(url)),
VersionOrUrl::Url(url) => cache_key::digest(&cache_key::CanonicalUrl::new(url)),
})
}
}

View file

@ -14,6 +14,7 @@ license = { workspace = true }
workspace = true
[dependencies]
cache-key = { path = "../cache-key" }
puffin-fs = { path = "../puffin-fs" }
puffin-normalize = { path = "../puffin-normalize" }
pypi-types = { path = "../pypi-types" }
@ -22,8 +23,6 @@ cachedir = { workspace = true }
clap = { workspace = true, features = ["derive"], optional = true }
directories = { workspace = true }
fs-err = { workspace = true, features = ["tokio"] }
hex = { workspace = true }
seahash = { workspace = true }
serde = { workspace = true, features = ["derive"] }
tempfile = { workspace = true }
tracing = { workspace = true }

View file

@ -9,23 +9,17 @@ use fs_err as fs;
use tempfile::{tempdir, TempDir};
use tracing::debug;
use crate::wheel::WheelCacheKind;
pub use by_timestamp::CachedByTimestamp;
pub use canonical_url::{CanonicalUrl, RepositoryUrl};
#[cfg(feature = "clap")]
pub use cli::CacheArgs;
pub use digest::digest;
use puffin_fs::{directories, force_remove_all};
use puffin_normalize::PackageName;
pub use stable_hash::{StableHash, StableHasher};
pub use wheel::WheelCache;
pub use crate::by_timestamp::CachedByTimestamp;
#[cfg(feature = "clap")]
pub use crate::cli::CacheArgs;
pub use crate::wheel::WheelCache;
use crate::wheel::WheelCacheKind;
mod by_timestamp;
mod cache_key;
mod canonical_url;
mod cli;
mod digest;
mod stable_hash;
mod wheel;
/// A cache entry which may or may not exist yet.

View file

@ -2,11 +2,11 @@ use std::path::{Path, PathBuf};
use url::Url;
use cache_key::{digest, CanonicalUrl};
use pypi_types::IndexUrl;
#[allow(unused_imports)] // For rustdoc
use crate::CacheBucket;
use crate::{digest, CanonicalUrl};
/// Cache wheels and their metadata, both from remote wheels and built from source distributions.
///

View file

@ -4,6 +4,7 @@ version = "0.0.1"
edition = "2021"
[dependencies]
cache-key = { path = "../cache-key" }
distribution-filename = { path = "../distribution-filename", features = ["serde"] }
distribution-types = { path = "../distribution-types" }
install-wheel-rs = { path = "../install-wheel-rs" }

View file

@ -20,7 +20,7 @@ use distribution_filename::{DistFilename, SourceDistFilename, WheelFilename};
use distribution_types::{BuiltDist, Name};
use install_wheel_rs::find_dist_info;
use pep440_rs::Version;
use puffin_cache::{digest, Cache, CacheBucket, CanonicalUrl, WheelCache};
use puffin_cache::{Cache, CacheBucket, WheelCache};
use puffin_normalize::PackageName;
use pypi_types::{File, IndexUrl, IndexUrls, Metadata21, SimpleJson};
@ -143,7 +143,7 @@ impl RegistryClient {
CacheBucket::Simple,
Path::new(&match index {
IndexUrl::Pypi => "pypi".to_string(),
IndexUrl::Url(url) => digest(&CanonicalUrl::new(url)),
IndexUrl::Url(url) => cache_key::digest(&cache_key::CanonicalUrl::new(url)),
}),
format!("{}.msgpack", package_name.as_ref()),
);

View file

@ -13,6 +13,7 @@ license = { workspace = true }
workspace = true
[dependencies]
cache-key = { path = "../cache-key" }
distribution-filename = { path = "../distribution-filename", features = ["serde"] }
distribution-types = { path = "../distribution-types" }
install-wheel-rs = { path = "../install-wheel-rs" }

View file

@ -15,7 +15,7 @@ use url::Url;
use distribution_filename::{WheelFilename, WheelFilenameError};
use distribution_types::{BuiltDist, DirectGitUrl, Dist, LocalEditable, Name, SourceDist};
use platform_tags::Tags;
use puffin_cache::{digest, Cache, CacheBucket, WheelCache};
use puffin_cache::{Cache, CacheBucket, WheelCache};
use puffin_client::RegistryClient;
use puffin_git::GitSource;
use puffin_traits::BuildContext;
@ -288,7 +288,7 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context>
dist,
filename,
path: editable_wheel_dir.join(disk_filename),
target: editable_wheel_dir.join(digest(&editable.path)),
target: editable_wheel_dir.join(cache_key::digest(&editable.path)),
};
Ok((LocalWheel::Built(built_wheel), metadata))
}

View file

@ -26,9 +26,7 @@ use distribution_types::{
};
use install_wheel_rs::read_dist_info;
use platform_tags::Tags;
use puffin_cache::{
digest, CacheBucket, CacheEntry, CacheShard, CachedByTimestamp, CanonicalUrl, WheelCache,
};
use puffin_cache::{CacheBucket, CacheEntry, CacheShard, CachedByTimestamp, WheelCache};
use puffin_client::{CachedClient, CachedClientError, DataWithCachePolicy};
use puffin_fs::{write_atomic, LockedFile};
use puffin_git::{Fetch, GitSource};
@ -653,8 +651,11 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> {
// Avoid races between different processes, too.
let lock_dir = git_dir.join("locks");
fs::create_dir_all(&lock_dir).await?;
let canonical_url = CanonicalUrl::new(url);
let _lock = LockedFile::acquire(lock_dir.join(digest(&canonical_url)), &canonical_url)?;
let canonical_url = cache_key::CanonicalUrl::new(url);
let _lock = LockedFile::acquire(
lock_dir.join(cache_key::digest(&canonical_url)),
&canonical_url,
)?;
let DirectGitUrl { url, subdirectory } =
DirectGitUrl::try_from(url).map_err(SourceDistError::Git)?;

View file

@ -13,7 +13,7 @@ license = { workspace = true }
workspace = true
[dependencies]
puffin-cache = { path = "../puffin-cache" }
cache-key = { path = "../cache-key" }
anyhow = { workspace = true }
cargo-util = { workspace = true }

View file

@ -8,7 +8,7 @@ use reqwest::Client;
use tracing::debug;
use url::Url;
use puffin_cache::{digest, RepositoryUrl};
use cache_key::{digest, RepositoryUrl};
use crate::git::GitRemote;
use crate::{FetchStrategy, GitSha, GitUrl};

View file

@ -13,6 +13,7 @@ license = { workspace = true }
workspace = true
[dependencies]
cache-key = { path = "../cache-key" }
pep440_rs = { path = "../pep440-rs" }
pep508_rs = { path = "../pep508-rs", features = ["serde"] }
platform-host = { path = "../platform-host" }

View file

@ -6,12 +6,12 @@ use once_cell::sync::OnceCell;
use serde::{Deserialize, Serialize};
use tracing::{debug, warn};
use cache_key::digest;
use pep440_rs::Version;
use pep508_rs::MarkerEnvironment;
use platform_host::{Platform, PlatformError};
use platform_tags::Tags;
use puffin_cache::CachedByTimestamp;
use puffin_cache::{digest, Cache, CacheBucket};
use puffin_cache::{Cache, CacheBucket, CachedByTimestamp};
use puffin_fs::write_atomic_sync;
use crate::python_platform::PythonPlatform;

View file

@ -13,6 +13,7 @@ license = { workspace = true }
workspace = true
[dependencies]
cache-key = { path = "../cache-key" }
distribution-filename = { path = "../distribution-filename", features = ["serde"] }
distribution-types = { path = "../distribution-types" }
install-wheel-rs = { path = "../install-wheel-rs" }

View file

@ -4,7 +4,6 @@ use pubgrub::type_aliases::DependencyConstraints;
use tracing::warn;
use pep508_rs::{MarkerEnvironment, Requirement, VersionOrUrl};
use puffin_cache::CanonicalUrl;
use puffin_normalize::{ExtraName, PackageName};
use crate::overrides::Overrides;
@ -235,7 +234,7 @@ fn merge_package(
PubGrubPackage::Package(name, _extra, Some(left)),
PubGrubPackage::Package(.., Some(right)),
) => {
if CanonicalUrl::new(left) == CanonicalUrl::new(right) {
if cache_key::CanonicalUrl::new(left) == cache_key::CanonicalUrl::new(right) {
Ok(None)
} else {
Err(ResolveError::ConflictingUrls(

View file

@ -23,7 +23,6 @@ use distribution_types::{
};
use pep508_rs::{MarkerEnvironment, Requirement};
use platform_tags::Tags;
use puffin_cache::CanonicalUrl;
use puffin_client::RegistryClient;
use puffin_distribution::{DistributionDatabase, DistributionDatabaseError};
use puffin_normalize::PackageName;
@ -868,17 +867,17 @@ pub(crate) struct Index {
}
#[derive(Debug, Default)]
struct AllowedUrls(FxHashSet<CanonicalUrl>);
struct AllowedUrls(FxHashSet<cache_key::CanonicalUrl>);
impl AllowedUrls {
fn contains(&self, url: &Url) -> bool {
self.0.contains(&CanonicalUrl::new(url))
self.0.contains(&cache_key::CanonicalUrl::new(url))
}
}
impl<'a> FromIterator<&'a Url> for AllowedUrls {
fn from_iter<T: IntoIterator<Item = &'a Url>>(iter: T) -> Self {
Self(iter.into_iter().map(CanonicalUrl::new).collect())
Self(iter.into_iter().map(cache_key::CanonicalUrl::new).collect())
}
}