diff --git a/.gitignore b/.gitignore index 79d27d5bf..abcd6f2a8 100644 --- a/.gitignore +++ b/.gitignore @@ -9,9 +9,6 @@ target/ # MSVC Windows builds of rustc generate these, which store debugging information *.pdb -# Use e.g. `--cache-dir cache-docker` to keep a cache across container invocations -cache-* - # Python tmp files __pycache__ diff --git a/Cargo.lock b/Cargo.lock index 7a4d26f1f..6d16bfe49 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -391,6 +391,15 @@ version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a3e368af43e418a04d52505cf3dbc23dda4e3407ae2fa99fd0e4f308ce546acc" +[[package]] +name = "cache-key" +version = "0.0.1" +dependencies = [ + "hex", + "seahash", + "url", +] + [[package]] name = "cachedir" version = "0.3.1" @@ -847,11 +856,11 @@ name = "distribution-types" version = "0.0.1" dependencies = [ "anyhow", + "cache-key", "distribution-filename", "fs-err", "pep440_rs 0.3.12", "pep508_rs", - "puffin-cache", "puffin-git", "puffin-normalize", "pypi-types", @@ -2291,15 +2300,14 @@ dependencies = [ name = "puffin-cache" version = "0.0.1" dependencies = [ + "cache-key", "cachedir", "clap", "directories", "fs-err", - "hex", "puffin-fs", "puffin-normalize", "pypi-types", - "seahash", "serde", "tempfile", "tracing", @@ -2375,6 +2383,7 @@ dependencies = [ "anyhow", "async_http_range_reader", "async_zip", + "cache-key", "distribution-filename", "distribution-types", "fs-err", @@ -2483,6 +2492,7 @@ version = "0.0.1" dependencies = [ "anyhow", "bytesize", + "cache-key", "distribution-filename", "distribution-types", "fs-err", @@ -2541,12 +2551,12 @@ name = "puffin-git" version = "0.0.1" dependencies = [ "anyhow", + "cache-key", "cargo-util", "git2", "glob", "hex", "once_cell", - "puffin-cache", "rand", "reqwest", "serde", @@ -2593,6 +2603,7 @@ dependencies = [ name = "puffin-interpreter" version = "0.0.1" dependencies = [ + "cache-key", "fs-err", "indoc", "once_cell", @@ -2624,6 +2635,7 @@ version = "0.0.1" dependencies = [ "anyhow", "bitflags 2.4.1", + "cache-key", "chrono", "clap", "colored", diff --git a/crates/README.md b/crates/README.md index 221877eb0..70badea53 100644 --- a/crates/README.md +++ b/crates/README.md @@ -4,6 +4,10 @@ Functionality for benchmarking Puffin. +## [cache-key](./cache-key) + +Generic functionality for caching paths, URLs, and other resources across platforms. + ## [distribution-filename](./distribution-filename) Parse built distribution (wheel) and source distribution (sdist) filenames to extract structured diff --git a/crates/cache-key/Cargo.toml b/crates/cache-key/Cargo.toml new file mode 100644 index 000000000..2461eb3f1 --- /dev/null +++ b/crates/cache-key/Cargo.toml @@ -0,0 +1,19 @@ +[package] +name = "cache-key" +version = "0.0.1" +description = "Generic functionality for caching paths, URLs, and other resources across platforms." +edition = { workspace = true } +rust-version = { workspace = true } +homepage = { workspace = true } +documentation = { workspace = true } +repository = { workspace = true } +authors = { workspace = true } +license = { workspace = true } + +[lints] +workspace = true + +[dependencies] +hex = { workspace = true } +seahash = { workspace = true } +url = { workspace = true } diff --git a/crates/puffin-cache/src/cache_key.rs b/crates/cache-key/src/cache_key.rs similarity index 100% rename from crates/puffin-cache/src/cache_key.rs rename to crates/cache-key/src/cache_key.rs diff --git a/crates/puffin-cache/src/canonical_url.rs b/crates/cache-key/src/canonical_url.rs similarity index 100% rename from crates/puffin-cache/src/canonical_url.rs rename to crates/cache-key/src/canonical_url.rs diff --git a/crates/puffin-cache/src/digest.rs b/crates/cache-key/src/digest.rs similarity index 100% rename from crates/puffin-cache/src/digest.rs rename to crates/cache-key/src/digest.rs diff --git a/crates/cache-key/src/lib.rs b/crates/cache-key/src/lib.rs new file mode 100644 index 000000000..60b774462 --- /dev/null +++ b/crates/cache-key/src/lib.rs @@ -0,0 +1,8 @@ +pub use canonical_url::{CanonicalUrl, RepositoryUrl}; +pub use digest::digest; +pub use stable_hash::{StableHash, StableHasher}; + +mod cache_key; +mod canonical_url; +mod digest; +mod stable_hash; diff --git a/crates/puffin-cache/src/stable_hash.rs b/crates/cache-key/src/stable_hash.rs similarity index 100% rename from crates/puffin-cache/src/stable_hash.rs rename to crates/cache-key/src/stable_hash.rs diff --git a/crates/distribution-types/Cargo.toml b/crates/distribution-types/Cargo.toml index 62fcaa6c6..0f1f8f953 100644 --- a/crates/distribution-types/Cargo.toml +++ b/crates/distribution-types/Cargo.toml @@ -13,10 +13,10 @@ license = { workspace = true } workspace = true [dependencies] +cache-key = { path = "../cache-key" } distribution-filename = { path = "../distribution-filename" } pep440_rs = { path = "../pep440-rs" } pep508_rs = { path = "../pep508-rs" } -puffin-cache = { path = "../puffin-cache" } puffin-git = { path = "../puffin-git" } puffin-normalize = { path = "../puffin-normalize" } pypi-types = { path = "../pypi-types" } diff --git a/crates/distribution-types/src/lib.rs b/crates/distribution-types/src/lib.rs index f25354f74..47efe5cc7 100644 --- a/crates/distribution-types/src/lib.rs +++ b/crates/distribution-types/src/lib.rs @@ -639,13 +639,11 @@ impl RemoteSource for Dist { impl Identifier for Url { fn distribution_id(&self) -> DistributionId { - DistributionId::new(puffin_cache::digest(&puffin_cache::CanonicalUrl::new(self))) + DistributionId::new(cache_key::digest(&cache_key::CanonicalUrl::new(self))) } fn resource_id(&self) -> ResourceId { - ResourceId::new(puffin_cache::digest(&puffin_cache::RepositoryUrl::new( - self, - ))) + ResourceId::new(cache_key::digest(&cache_key::RepositoryUrl::new(self))) } } @@ -661,11 +659,11 @@ impl Identifier for File { impl Identifier for Path { fn distribution_id(&self) -> DistributionId { - DistributionId::new(puffin_cache::digest(&self)) + DistributionId::new(cache_key::digest(&self)) } fn resource_id(&self) -> ResourceId { - ResourceId::new(puffin_cache::digest(&self)) + ResourceId::new(cache_key::digest(&self)) } } diff --git a/crates/distribution-types/src/traits.rs b/crates/distribution-types/src/traits.rs index 3cd6ea3b5..705b15e8c 100644 --- a/crates/distribution-types/src/traits.rs +++ b/crates/distribution-types/src/traits.rs @@ -3,7 +3,6 @@ use std::borrow::Cow; use anyhow::Result; use pep508_rs::VerbatimUrl; -use puffin_cache::CanonicalUrl; use puffin_normalize::PackageName; use crate::error::Error; @@ -38,7 +37,7 @@ pub trait DistributionMetadata: Name { // `version` is normalized by its `ToString` impl format!("{}-{}", self.name().as_dist_info_name(), version) } - VersionOrUrl::Url(url) => puffin_cache::digest(&CanonicalUrl::new(url)), + VersionOrUrl::Url(url) => cache_key::digest(&cache_key::CanonicalUrl::new(url)), }) } } diff --git a/crates/puffin-cache/Cargo.toml b/crates/puffin-cache/Cargo.toml index a85339bae..0e66eabc0 100644 --- a/crates/puffin-cache/Cargo.toml +++ b/crates/puffin-cache/Cargo.toml @@ -14,6 +14,7 @@ license = { workspace = true } workspace = true [dependencies] +cache-key = { path = "../cache-key" } puffin-fs = { path = "../puffin-fs" } puffin-normalize = { path = "../puffin-normalize" } pypi-types = { path = "../pypi-types" } @@ -22,8 +23,6 @@ cachedir = { workspace = true } clap = { workspace = true, features = ["derive"], optional = true } directories = { workspace = true } fs-err = { workspace = true, features = ["tokio"] } -hex = { workspace = true } -seahash = { workspace = true } serde = { workspace = true, features = ["derive"] } tempfile = { workspace = true } tracing = { workspace = true } diff --git a/crates/puffin-cache/src/lib.rs b/crates/puffin-cache/src/lib.rs index 8171c0cca..3fa3a94a6 100644 --- a/crates/puffin-cache/src/lib.rs +++ b/crates/puffin-cache/src/lib.rs @@ -9,23 +9,17 @@ use fs_err as fs; use tempfile::{tempdir, TempDir}; use tracing::debug; -use crate::wheel::WheelCacheKind; -pub use by_timestamp::CachedByTimestamp; -pub use canonical_url::{CanonicalUrl, RepositoryUrl}; -#[cfg(feature = "clap")] -pub use cli::CacheArgs; -pub use digest::digest; use puffin_fs::{directories, force_remove_all}; use puffin_normalize::PackageName; -pub use stable_hash::{StableHash, StableHasher}; -pub use wheel::WheelCache; + +pub use crate::by_timestamp::CachedByTimestamp; +#[cfg(feature = "clap")] +pub use crate::cli::CacheArgs; +pub use crate::wheel::WheelCache; +use crate::wheel::WheelCacheKind; mod by_timestamp; -mod cache_key; -mod canonical_url; mod cli; -mod digest; -mod stable_hash; mod wheel; /// A cache entry which may or may not exist yet. diff --git a/crates/puffin-cache/src/wheel.rs b/crates/puffin-cache/src/wheel.rs index e560ce4b9..3acb34d60 100644 --- a/crates/puffin-cache/src/wheel.rs +++ b/crates/puffin-cache/src/wheel.rs @@ -2,11 +2,11 @@ use std::path::{Path, PathBuf}; use url::Url; +use cache_key::{digest, CanonicalUrl}; use pypi_types::IndexUrl; #[allow(unused_imports)] // For rustdoc use crate::CacheBucket; -use crate::{digest, CanonicalUrl}; /// Cache wheels and their metadata, both from remote wheels and built from source distributions. /// diff --git a/crates/puffin-client/Cargo.toml b/crates/puffin-client/Cargo.toml index 53e36b34c..4c84a26d4 100644 --- a/crates/puffin-client/Cargo.toml +++ b/crates/puffin-client/Cargo.toml @@ -4,6 +4,7 @@ version = "0.0.1" edition = "2021" [dependencies] +cache-key = { path = "../cache-key" } distribution-filename = { path = "../distribution-filename", features = ["serde"] } distribution-types = { path = "../distribution-types" } install-wheel-rs = { path = "../install-wheel-rs" } diff --git a/crates/puffin-client/src/registry_client.rs b/crates/puffin-client/src/registry_client.rs index d8c88dee6..2d17f5527 100644 --- a/crates/puffin-client/src/registry_client.rs +++ b/crates/puffin-client/src/registry_client.rs @@ -20,7 +20,7 @@ use distribution_filename::{DistFilename, SourceDistFilename, WheelFilename}; use distribution_types::{BuiltDist, Name}; use install_wheel_rs::find_dist_info; use pep440_rs::Version; -use puffin_cache::{digest, Cache, CacheBucket, CanonicalUrl, WheelCache}; +use puffin_cache::{Cache, CacheBucket, WheelCache}; use puffin_normalize::PackageName; use pypi_types::{File, IndexUrl, IndexUrls, Metadata21, SimpleJson}; @@ -143,7 +143,7 @@ impl RegistryClient { CacheBucket::Simple, Path::new(&match index { IndexUrl::Pypi => "pypi".to_string(), - IndexUrl::Url(url) => digest(&CanonicalUrl::new(url)), + IndexUrl::Url(url) => cache_key::digest(&cache_key::CanonicalUrl::new(url)), }), format!("{}.msgpack", package_name.as_ref()), ); diff --git a/crates/puffin-distribution/Cargo.toml b/crates/puffin-distribution/Cargo.toml index 9f9a2e8b6..7012bfeda 100644 --- a/crates/puffin-distribution/Cargo.toml +++ b/crates/puffin-distribution/Cargo.toml @@ -13,6 +13,7 @@ license = { workspace = true } workspace = true [dependencies] +cache-key = { path = "../cache-key" } distribution-filename = { path = "../distribution-filename", features = ["serde"] } distribution-types = { path = "../distribution-types" } install-wheel-rs = { path = "../install-wheel-rs" } diff --git a/crates/puffin-distribution/src/distribution_database.rs b/crates/puffin-distribution/src/distribution_database.rs index ee9bd2c62..9fd3c0105 100644 --- a/crates/puffin-distribution/src/distribution_database.rs +++ b/crates/puffin-distribution/src/distribution_database.rs @@ -15,7 +15,7 @@ use url::Url; use distribution_filename::{WheelFilename, WheelFilenameError}; use distribution_types::{BuiltDist, DirectGitUrl, Dist, LocalEditable, Name, SourceDist}; use platform_tags::Tags; -use puffin_cache::{digest, Cache, CacheBucket, WheelCache}; +use puffin_cache::{Cache, CacheBucket, WheelCache}; use puffin_client::RegistryClient; use puffin_git::GitSource; use puffin_traits::BuildContext; @@ -288,7 +288,7 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context> dist, filename, path: editable_wheel_dir.join(disk_filename), - target: editable_wheel_dir.join(digest(&editable.path)), + target: editable_wheel_dir.join(cache_key::digest(&editable.path)), }; Ok((LocalWheel::Built(built_wheel), metadata)) } diff --git a/crates/puffin-distribution/src/source_dist.rs b/crates/puffin-distribution/src/source_dist.rs index 55d65b9b8..0705d97c2 100644 --- a/crates/puffin-distribution/src/source_dist.rs +++ b/crates/puffin-distribution/src/source_dist.rs @@ -26,9 +26,7 @@ use distribution_types::{ }; use install_wheel_rs::read_dist_info; use platform_tags::Tags; -use puffin_cache::{ - digest, CacheBucket, CacheEntry, CacheShard, CachedByTimestamp, CanonicalUrl, WheelCache, -}; +use puffin_cache::{CacheBucket, CacheEntry, CacheShard, CachedByTimestamp, WheelCache}; use puffin_client::{CachedClient, CachedClientError, DataWithCachePolicy}; use puffin_fs::{write_atomic, LockedFile}; use puffin_git::{Fetch, GitSource}; @@ -653,8 +651,11 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> { // Avoid races between different processes, too. let lock_dir = git_dir.join("locks"); fs::create_dir_all(&lock_dir).await?; - let canonical_url = CanonicalUrl::new(url); - let _lock = LockedFile::acquire(lock_dir.join(digest(&canonical_url)), &canonical_url)?; + let canonical_url = cache_key::CanonicalUrl::new(url); + let _lock = LockedFile::acquire( + lock_dir.join(cache_key::digest(&canonical_url)), + &canonical_url, + )?; let DirectGitUrl { url, subdirectory } = DirectGitUrl::try_from(url).map_err(SourceDistError::Git)?; diff --git a/crates/puffin-git/Cargo.toml b/crates/puffin-git/Cargo.toml index 75a175b3b..14d823832 100644 --- a/crates/puffin-git/Cargo.toml +++ b/crates/puffin-git/Cargo.toml @@ -13,7 +13,7 @@ license = { workspace = true } workspace = true [dependencies] -puffin-cache = { path = "../puffin-cache" } +cache-key = { path = "../cache-key" } anyhow = { workspace = true } cargo-util = { workspace = true } diff --git a/crates/puffin-git/src/source.rs b/crates/puffin-git/src/source.rs index 5a1c5150d..d2c4c143a 100644 --- a/crates/puffin-git/src/source.rs +++ b/crates/puffin-git/src/source.rs @@ -8,7 +8,7 @@ use reqwest::Client; use tracing::debug; use url::Url; -use puffin_cache::{digest, RepositoryUrl}; +use cache_key::{digest, RepositoryUrl}; use crate::git::GitRemote; use crate::{FetchStrategy, GitSha, GitUrl}; diff --git a/crates/puffin-interpreter/Cargo.toml b/crates/puffin-interpreter/Cargo.toml index cc6d186c2..8317658bd 100644 --- a/crates/puffin-interpreter/Cargo.toml +++ b/crates/puffin-interpreter/Cargo.toml @@ -13,6 +13,7 @@ license = { workspace = true } workspace = true [dependencies] +cache-key = { path = "../cache-key" } pep440_rs = { path = "../pep440-rs" } pep508_rs = { path = "../pep508-rs", features = ["serde"] } platform-host = { path = "../platform-host" } diff --git a/crates/puffin-interpreter/src/interpreter.rs b/crates/puffin-interpreter/src/interpreter.rs index fe719f839..660b6161e 100644 --- a/crates/puffin-interpreter/src/interpreter.rs +++ b/crates/puffin-interpreter/src/interpreter.rs @@ -6,12 +6,12 @@ use once_cell::sync::OnceCell; use serde::{Deserialize, Serialize}; use tracing::{debug, warn}; +use cache_key::digest; use pep440_rs::Version; use pep508_rs::MarkerEnvironment; use platform_host::{Platform, PlatformError}; use platform_tags::Tags; -use puffin_cache::CachedByTimestamp; -use puffin_cache::{digest, Cache, CacheBucket}; +use puffin_cache::{Cache, CacheBucket, CachedByTimestamp}; use puffin_fs::write_atomic_sync; use crate::python_platform::PythonPlatform; diff --git a/crates/puffin-resolver/Cargo.toml b/crates/puffin-resolver/Cargo.toml index 43b0c36c9..a3aa72ea6 100644 --- a/crates/puffin-resolver/Cargo.toml +++ b/crates/puffin-resolver/Cargo.toml @@ -13,6 +13,7 @@ license = { workspace = true } workspace = true [dependencies] +cache-key = { path = "../cache-key" } distribution-filename = { path = "../distribution-filename", features = ["serde"] } distribution-types = { path = "../distribution-types" } install-wheel-rs = { path = "../install-wheel-rs" } diff --git a/crates/puffin-resolver/src/pubgrub/dependencies.rs b/crates/puffin-resolver/src/pubgrub/dependencies.rs index 201142046..4eac6caf6 100644 --- a/crates/puffin-resolver/src/pubgrub/dependencies.rs +++ b/crates/puffin-resolver/src/pubgrub/dependencies.rs @@ -4,7 +4,6 @@ use pubgrub::type_aliases::DependencyConstraints; use tracing::warn; use pep508_rs::{MarkerEnvironment, Requirement, VersionOrUrl}; -use puffin_cache::CanonicalUrl; use puffin_normalize::{ExtraName, PackageName}; use crate::overrides::Overrides; @@ -235,7 +234,7 @@ fn merge_package( PubGrubPackage::Package(name, _extra, Some(left)), PubGrubPackage::Package(.., Some(right)), ) => { - if CanonicalUrl::new(left) == CanonicalUrl::new(right) { + if cache_key::CanonicalUrl::new(left) == cache_key::CanonicalUrl::new(right) { Ok(None) } else { Err(ResolveError::ConflictingUrls( diff --git a/crates/puffin-resolver/src/resolver.rs b/crates/puffin-resolver/src/resolver.rs index 257b2bb64..6face7e78 100644 --- a/crates/puffin-resolver/src/resolver.rs +++ b/crates/puffin-resolver/src/resolver.rs @@ -23,7 +23,6 @@ use distribution_types::{ }; use pep508_rs::{MarkerEnvironment, Requirement}; use platform_tags::Tags; -use puffin_cache::CanonicalUrl; use puffin_client::RegistryClient; use puffin_distribution::{DistributionDatabase, DistributionDatabaseError}; use puffin_normalize::PackageName; @@ -868,17 +867,17 @@ pub(crate) struct Index { } #[derive(Debug, Default)] -struct AllowedUrls(FxHashSet); +struct AllowedUrls(FxHashSet); impl AllowedUrls { fn contains(&self, url: &Url) -> bool { - self.0.contains(&CanonicalUrl::new(url)) + self.0.contains(&cache_key::CanonicalUrl::new(url)) } } impl<'a> FromIterator<&'a Url> for AllowedUrls { fn from_iter>(iter: T) -> Self { - Self(iter.into_iter().map(CanonicalUrl::new).collect()) + Self(iter.into_iter().map(cache_key::CanonicalUrl::new).collect()) } }