Introduce Cache, CacheBucket and CacheEntry (#507)

This is mostly a mechanical refactor that moves 80% of our code to the
same cache abstraction.

It introduces cache `Cache`, which abstracts away the path of the cache
and the temp dir drop and is passed throughout the codebase. To get a
specific cache bucket, you need to requests your `CacheBucket` from
`Cache`. `CacheBucket` is the centralizes the names of all cache
buckets, moving them away from the string constants spread throughout
the crates.

Specifically for working with the `CachedClient`, there is a
`CacheEntry`. I'm not sure yet if that is a strict improvement over
`cache_dir: PathBuf, cache_file: String`, i may have to rotate that
later.

The interpreter cache moved into `interpreter-v0`.

We can use the `CacheBucket` page to document the cache structure in
each bucket:


![image](b023fdfb-e34d-4c2d-8663-b5f73937a539)
This commit is contained in:
konsti 2023-11-28 18:11:14 +01:00 committed by GitHub
parent 3d47d2b1da
commit 5435d44756
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
38 changed files with 528 additions and 433 deletions

View file

@ -25,6 +25,7 @@ clap = { workspace = true }
fs-err = { workspace = true }
serde = { workspace = true }
serde_json = { workspace = true }
tempfile = { workspace = true }
thiserror = { workspace = true }
tracing = { workspace = true }
tracing-subscriber = { workspace = true }

View file

@ -25,7 +25,8 @@ fn run() -> Result<(), gourgeist::Error> {
let location = cli.path.unwrap_or(Utf8PathBuf::from(".venv"));
let python = parse_python_cli(cli.python)?;
let platform = Platform::current()?;
let info = Interpreter::query(python.as_std_path(), platform, None).unwrap();
let cache = tempfile::tempdir()?;
let info = Interpreter::query(python.as_std_path(), platform, cache.path()).unwrap();
create_bare_venv(&location, &info)?;
Ok(())
}

View file

@ -1,12 +1,13 @@
#![cfg(feature = "clap")]
use fs_err as fs;
use std::io;
use std::path::PathBuf;
use clap::Parser;
use directories::ProjectDirs;
use tempfile::{tempdir, TempDir};
use fs_err as fs;
use crate::Cache;
#[derive(Parser, Debug, Clone)]
pub struct CacheArgs {
@ -19,18 +20,7 @@ pub struct CacheArgs {
cache_dir: Option<PathBuf>,
}
#[derive(Debug)]
pub struct CacheDir {
/// The cache directory.
cache_dir: PathBuf,
/// A temporary cache directory, if the user requested `--no-cache`. Included to ensure that
/// the temporary directory exists for the length of the operation, but is dropped at the end
/// as appropriate.
#[allow(dead_code)]
temp_dir: Option<TempDir>,
}
impl TryFrom<CacheArgs> for CacheDir {
impl TryFrom<CacheArgs> for Cache {
type Error = io::Error;
/// Prefer, in order:
@ -43,36 +33,16 @@ impl TryFrom<CacheArgs> for CacheDir {
fn try_from(value: CacheArgs) -> Result<Self, Self::Error> {
let project_dirs = ProjectDirs::from("", "", "puffin");
if value.no_cache {
let temp_dir = tempdir()?;
let cache_dir = temp_dir.path().to_path_buf();
Ok(Self {
cache_dir,
temp_dir: Some(temp_dir),
})
Ok(Cache::temp()?)
} else if let Some(cache_dir) = value.cache_dir {
fs::create_dir_all(&cache_dir)?;
Ok(Self {
cache_dir: fs::canonicalize(cache_dir)?,
temp_dir: None,
})
Ok(Cache::from_path(fs::canonicalize(cache_dir)?))
} else if let Some(project_dirs) = project_dirs {
Ok(Self {
cache_dir: project_dirs.cache_dir().to_path_buf(),
temp_dir: None,
})
Ok(Cache::from_path(project_dirs.cache_dir().to_path_buf()))
} else {
let cache_dir = ".puffin_cache";
fs::create_dir_all(cache_dir)?;
Ok(Self {
cache_dir: fs::canonicalize(cache_dir)?,
temp_dir: None,
})
Ok(Cache::from_path(fs::canonicalize(cache_dir)?))
}
}
}
impl CacheDir {
pub fn path(&self) -> &PathBuf {
&self.cache_dir
}
}

View file

@ -1,118 +1,211 @@
use std::hash::Hasher;
use std::fmt::{Display, Formatter};
use std::io;
use std::path::{Path, PathBuf};
use std::sync::Arc;
use seahash::SeaHasher;
use tempfile::{tempdir, TempDir};
pub use canonical_url::{CanonicalUrl, RepositoryUrl};
#[cfg(feature = "clap")]
pub use cli::{CacheArgs, CacheDir};
pub use cli::CacheArgs;
pub use digest::digest;
pub use metadata::WheelMetadataCache;
pub use stable_hash::{StableHash, StableHasher};
mod cache_key;
mod canonical_url;
mod cli;
mod digest;
mod metadata;
mod stable_hash;
/// A trait for types that can be hashed in a stable way across versions and platforms.
pub trait StableHash {
fn stable_hash(&self, state: &mut StableHasher);
/// A cache entry which may or may not exist yet.
#[derive(Debug, Clone)]
pub struct CacheEntry {
pub dir: PathBuf,
pub file: String,
}
fn stable_hash_slice(data: &[Self], state: &mut StableHasher)
where
Self: Sized,
{
for piece in data {
piece.stable_hash(state);
}
impl CacheEntry {
pub fn path(&self) -> PathBuf {
// TODO(konstin): Cache this to avoid allocations?
self.dir.join(&self.file)
}
}
#[derive(Clone, Default)]
pub struct StableHasher {
inner: SeaHasher,
/// The main cache abstraction.
#[derive(Debug, Clone)]
pub struct Cache {
/// The cache directory.
root: PathBuf,
/// A temporary cache directory, if the user requested `--no-cache`.
///
/// Included to ensure that the temporary directory exists for the length of the operation, but
/// is dropped at the end as appropriate.
_temp_dir_drop: Option<Arc<TempDir>>,
}
impl StableHasher {
pub fn new() -> Self {
impl Cache {
/// A persistent cache directory at `root`.
pub fn from_path(root: impl Into<PathBuf>) -> Self {
Self {
inner: SeaHasher::new(),
root: root.into(),
_temp_dir_drop: None,
}
}
pub fn finish(self) -> u64 {
self.inner.finish()
/// Create a temporary cache directory.
pub fn temp() -> Result<Self, io::Error> {
let temp_dir = tempdir()?;
Ok(Self {
root: temp_dir.path().to_path_buf(),
_temp_dir_drop: Some(Arc::new(temp_dir)),
})
}
pub fn root(&self) -> &Path {
&self.root
}
/// The folder for a specific cache bucket
pub fn bucket(&self, cache_bucket: CacheBucket) -> PathBuf {
self.root.join(cache_bucket.to_str())
}
pub fn entry(
&self,
cache_bucket: CacheBucket,
dir: impl AsRef<Path>,
file: String,
) -> CacheEntry {
CacheEntry {
dir: self.bucket(cache_bucket).join(dir.as_ref()),
file,
}
}
}
impl Hasher for StableHasher {
#[inline]
fn finish(&self) -> u64 {
self.inner.finish()
}
/// The different kinds of data in the cache are stored in different bucket, which in our case
/// are subfolders.
#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)]
pub enum CacheBucket {
/// Downloaded remote wheel archives.
Archives,
/// Metadata of a built source distribution.
///
/// Cache structure:
/// * `<build wheel metadata cache>/pypi/foo-1.0.0.zip/metadata.json`
/// * `<build wheel metadata cache>/<sha256(index-url)>/foo-1.0.0.zip/metadata.json`
/// * `<build wheel metadata cache>/url/<sha256(url)>/foo-1.0.0.zip/metadata.json`
///
/// But the url filename does not need to be a valid source dist filename
/// (<https://github.com/search?q=path%3A**%2Frequirements.txt+master.zip&type=code>),
/// so it could also be the following and we have to take any string as filename:
/// * `<build wheel metadata cache>/url/<sha256(url)>/master.zip/metadata.json`
///
/// # Example
///
/// The following requirements:
/// ```text
/// # git source dist
/// pydantic-extra-types @ git+https://github.com/pydantic/pydantic-extra-types.git
/// # pypi source dist
/// django_allauth==0.51.0
/// # url source dist
/// werkzeug @ https://files.pythonhosted.org/packages/0d/cc/ff1904eb5eb4b455e442834dabf9427331ac0fa02853bf83db817a7dd53d/werkzeug-3.0.1.tar.gz
/// ```
///
/// ...may be cached as:
/// ```text
/// built-wheel-metadata-v0
/// ├── git
/// │ └── 5c56bc1c58c34c11
/// │ └── 843b753e9e8cb74e83cac55598719b39a4d5ef1f
/// │ └── metadata.json
/// ├── pypi
/// │ └── django-allauth-0.51.0.tar.gz
/// │ └── metadata.json
/// └── url
/// └── 6781bd6440ae72c2
/// └── werkzeug-3.0.1.tar.gz
/// └── metadata.json
/// ```
///
/// The inside of a `metadata.json`:
/// ```json
/// {
/// "data": {
/// "django_allauth-0.51.0-py3-none-any.whl": {
/// "metadata-version": "2.1",
/// "name": "django-allauth",
/// "version": "0.51.0",
/// ...
/// }
/// }
/// }
/// ```
BuiltWheelMetadata,
/// Wheel archives built from source distributions.
BuiltWheels,
/// Git repositories.
Git,
/// Information about an interpreter at a path.
Interpreter,
/// Index responses through the simple metadata API.
Simple,
/// Metadata of a remote wheel.
///
/// Cache structure:
/// * `wheel-metadata-v0/pypi/foo-1.0.0-py3-none-any.json`
/// * `wheel-metadata-v0/<digest(index-url)>/foo-1.0.0-py3-none-any.json`
/// * `wheel-metadata-v0/url/<digest(url)>/foo-1.0.0-py3-none-any.json`
///
/// See `puffin_client::RegistryClient::wheel_metadata` for information on how wheel metadata
/// is fetched.
///
/// # Example
///
/// The following requirements:
/// ```text
/// # pypi wheel
/// pandas
/// # url wheel
/// flask @ https://files.pythonhosted.org/packages/36/42/015c23096649b908c809c69388a805a571a3bea44362fe87e33fc3afa01f/flask-3.0.0-py3-none-any.whl
/// ```
///
/// ...may be cached as:
/// ```text
/// wheel-metadata-v0
/// ├── pypi
/// │ ...
/// │ ├── pandas-2.1.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.json
/// │ ...
/// └── url
/// └── 4b8be67c801a7ecb
/// └── flask-3.0.0-py3-none-any.json
/// ```
WheelMetadata,
/// Unzipped wheels, ready for installation via reflinking, symlinking, or copying.
Wheels,
}
#[inline]
fn write(&mut self, bytes: &[u8]) {
self.inner.write(bytes);
}
#[inline]
fn write_u8(&mut self, i: u8) {
self.inner.write_u8(i);
}
#[inline]
fn write_u16(&mut self, i: u16) {
self.inner.write_u16(i);
}
#[inline]
fn write_u32(&mut self, i: u32) {
self.inner.write_u32(i);
}
#[inline]
fn write_u64(&mut self, i: u64) {
self.inner.write_u64(i);
}
#[inline]
fn write_u128(&mut self, i: u128) {
self.inner.write_u128(i);
}
#[inline]
fn write_usize(&mut self, i: usize) {
self.inner.write_usize(i);
}
#[inline]
fn write_i8(&mut self, i: i8) {
self.inner.write_i8(i);
}
#[inline]
fn write_i16(&mut self, i: i16) {
self.inner.write_i16(i);
}
#[inline]
fn write_i32(&mut self, i: i32) {
self.inner.write_i32(i);
}
#[inline]
fn write_i64(&mut self, i: i64) {
self.inner.write_i64(i);
}
#[inline]
fn write_i128(&mut self, i: i128) {
self.inner.write_i128(i);
}
#[inline]
fn write_isize(&mut self, i: isize) {
self.inner.write_isize(i);
impl CacheBucket {
fn to_str(self) -> &'static str {
match self {
CacheBucket::Archives => "archives-v0",
CacheBucket::BuiltWheelMetadata => "built-wheel-metadata-v0",
CacheBucket::BuiltWheels => "built-wheels-v0",
CacheBucket::Git => "git-v0",
CacheBucket::Interpreter => "interpreter-v0",
CacheBucket::Simple => "simple-v0",
CacheBucket::WheelMetadata => "wheel-metadata-v0",
CacheBucket::Wheels => "wheels-v0",
}
}
}
impl Display for CacheBucket {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
f.write_str(self.to_str())
}
}

View file

@ -4,18 +4,15 @@ use url::Url;
use pypi_types::IndexUrl;
#[allow(unused_imports)] // For rustdoc
use crate::CacheBucket;
use crate::{digest, CanonicalUrl};
/// Cache for metadata from (remote) wheels files
const WHEEL_METADATA_CACHE: &str = "wheel-metadata-v0";
/// Cache for metadata from wheels build from source dists
const BUILT_WHEEL_METADATA_CACHE: &str = "built-wheel-metadata-v0";
/// Cache wheel metadata, both from remote wheels and built from source distributions.
///
/// See [`WheelMetadataCache::wheel_dir`] for remote wheel metadata caching and
/// [`WheelMetadataCache::built_wheel_dir`] for caching of metadata of built source
/// distributions.
/// See [`WheelMetadataCache::wheel_dir`]/[`CacheBucket::WheelMetadata`] for remote wheel metadata
/// caching and [`WheelMetadataCache::built_wheel_dir`]/[`CacheBucket::BuiltWheelMetadata`] for
/// built source distributions metadata caching.
pub enum WheelMetadataCache<'a> {
/// Either pypi or an alternative index, which we key by index url
Index(&'a IndexUrl),
@ -44,90 +41,13 @@ impl<'a> WheelMetadataCache<'a> {
}
}
/// Metadata of a remote wheel
///
/// Cache structure:
/// * `<wheel metadata cache>/pypi/foo-1.0.0-py3-none-any.json`
/// * `<wheel metadata cache>/<digest(index-url)>/foo-1.0.0-py3-none-any.json`
/// * `<wheel metadata cache>/url/<digest(url)>/foo-1.0.0-py3-none-any.json`
///
/// See `puffin_client::RegistryClient::wheel_metadata` for information on how wheel metadata
/// is fetched.
///
/// # Example
/// ```text
/// # pypi wheel
/// pandas
/// # url wheel
/// flask @ https://files.pythonhosted.org/packages/36/42/015c23096649b908c809c69388a805a571a3bea44362fe87e33fc3afa01f/flask-3.0.0-py3-none-any.whl
/// ```
/// may be cached as
/// ```text
/// wheel-metadata-v0
/// ├── pypi
/// │ ...
/// │ ├── pandas-2.1.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.json
/// │ ...
/// └── url
/// └── 4b8be67c801a7ecb
/// └── flask-3.0.0-py3-none-any.json
/// ```
/// Metadata of a remote wheel. See [`CacheBucket::WheelMetadata`]
pub fn wheel_dir(&self) -> PathBuf {
PathBuf::from(WHEEL_METADATA_CACHE).join(self.bucket())
self.bucket()
}
/// Metadata of a built source distribution
///
/// Cache structure:
/// * `<build wheel metadata cache>/pypi/foo-1.0.0.zip/metadata.json`
/// * `<build wheel metadata cache>/<sha256(index-url)>/foo-1.0.0.zip/metadata.json`
/// * `<build wheel metadata cache>/url/<sha256(url)>/foo-1.0.0.zip/metadata.json`
/// But the url filename does not need to be a valid source dist filename
/// (<https://github.com/search?q=path%3A**%2Frequirements.txt+master.zip&type=code>),
/// so it could also be the following and we have to take any string as filename:
/// * `<build wheel metadata cache>/url/<sha256(url)>/master.zip/metadata.json`
///
/// # Example
/// ```text
/// # git source dist
/// pydantic-extra-types @ git+https://github.com/pydantic/pydantic-extra-types.git
/// # pypi source dist
/// django_allauth==0.51.0
/// # url source dist
/// werkzeug @ https://files.pythonhosted.org/packages/0d/cc/ff1904eb5eb4b455e442834dabf9427331ac0fa02853bf83db817a7dd53d/werkzeug-3.0.1.tar.gz
/// ```
/// may be cached as
/// ```text
/// built-wheel-metadata-v0
/// ├── git
/// │ └── 5c56bc1c58c34c11
/// │ └── 843b753e9e8cb74e83cac55598719b39a4d5ef1f
/// │ └── metadata.json
/// ├── pypi
/// │ └── django-allauth-0.51.0.tar.gz
/// │ └── metadata.json
/// └── url
/// └── 6781bd6440ae72c2
/// └── werkzeug-3.0.1.tar.gz
/// └── metadata.json
/// ```
///
/// The inside of a `metadata.json`:
/// ```json
/// {
/// "data": {
/// "django_allauth-0.51.0-py3-none-any.whl": {
/// "metadata-version": "2.1",
/// "name": "django-allauth",
/// "version": "0.51.0",
/// ...
/// }
/// }
/// }
/// ```
/// Metadata of a built source distribution. See [`CacheBucket::BuiltWheelMetadata`]
pub fn built_wheel_dir(&self, filename: &str) -> PathBuf {
PathBuf::from(BUILT_WHEEL_METADATA_CACHE)
.join(self.bucket())
.join(filename)
self.bucket().join(filename)
}
}

View file

@ -0,0 +1,106 @@
use std::hash::Hasher;
use seahash::SeaHasher;
/// A trait for types that can be hashed in a stable way across versions and platforms.
pub trait StableHash {
fn stable_hash(&self, state: &mut StableHasher);
fn stable_hash_slice(data: &[Self], state: &mut StableHasher)
where
Self: Sized,
{
for piece in data {
piece.stable_hash(state);
}
}
}
#[derive(Clone, Default)]
pub struct StableHasher {
inner: SeaHasher,
}
impl StableHasher {
pub fn new() -> Self {
Self {
inner: SeaHasher::new(),
}
}
pub fn finish(self) -> u64 {
self.inner.finish()
}
}
impl Hasher for StableHasher {
#[inline]
fn finish(&self) -> u64 {
self.inner.finish()
}
#[inline]
fn write(&mut self, bytes: &[u8]) {
self.inner.write(bytes);
}
#[inline]
fn write_u8(&mut self, i: u8) {
self.inner.write_u8(i);
}
#[inline]
fn write_u16(&mut self, i: u16) {
self.inner.write_u16(i);
}
#[inline]
fn write_u32(&mut self, i: u32) {
self.inner.write_u32(i);
}
#[inline]
fn write_u64(&mut self, i: u64) {
self.inner.write_u64(i);
}
#[inline]
fn write_u128(&mut self, i: u128) {
self.inner.write_u128(i);
}
#[inline]
fn write_usize(&mut self, i: usize) {
self.inner.write_usize(i);
}
#[inline]
fn write_i8(&mut self, i: i8) {
self.inner.write_i8(i);
}
#[inline]
fn write_i16(&mut self, i: i16) {
self.inner.write_i16(i);
}
#[inline]
fn write_i32(&mut self, i: i32) {
self.inner.write_i32(i);
}
#[inline]
fn write_i64(&mut self, i: i64) {
self.inner.write_i64(i);
}
#[inline]
fn write_i128(&mut self, i: i128) {
self.inner.write_i128(i);
}
#[inline]
fn write_isize(&mut self, i: isize) {
self.inner.write_isize(i);
}
}

View file

@ -1,38 +1,40 @@
use std::fmt::Write;
use std::path::Path;
use anyhow::{Context, Result};
use fs_err as fs;
use tracing::debug;
use puffin_cache::Cache;
use crate::commands::ExitStatus;
use crate::printer::Printer;
/// Clear the cache.
pub(crate) fn clean(cache: &Path, mut printer: Printer) -> Result<ExitStatus> {
if !cache.exists() {
writeln!(printer, "No cache found at: {}", cache.display())?;
pub(crate) fn clean(cache: &Cache, mut printer: Printer) -> Result<ExitStatus> {
if !cache.root().exists() {
writeln!(printer, "No cache found at: {}", cache.root().display())?;
return Ok(ExitStatus::Success);
}
debug!("Clearing cache at: {}", cache.display());
debug!("Clearing cache at: {}", cache.root().display());
for entry in cache
.root()
.read_dir()
.with_context(|| {
format!(
"Failed to read directory contents while clearing {}",
cache.display()
cache.root().display()
)
})?
.flatten()
{
if entry.file_type()?.is_dir() {
fs::remove_dir_all(entry.path())
.with_context(|| format!("Failed to clear cache at {}", cache.display()))?;
.with_context(|| format!("Failed to clear cache at {}", cache.root().display()))?;
} else {
fs::remove_file(entry.path())
.with_context(|| format!("Failed to clear cache at {}", cache.display()))?;
.with_context(|| format!("Failed to clear cache at {}", cache.root().display()))?;
}
}

View file

@ -1,9 +1,8 @@
use std::path::Path;
use anyhow::Result;
use tracing::debug;
use platform_host::Platform;
use puffin_cache::Cache;
use puffin_installer::SitePackages;
use puffin_interpreter::Virtualenv;
@ -11,10 +10,10 @@ use crate::commands::ExitStatus;
use crate::printer::Printer;
/// Enumerate the installed packages in the current environment.
pub(crate) fn freeze(cache: &Path, _printer: Printer) -> Result<ExitStatus> {
pub(crate) fn freeze(cache: &Cache, _printer: Printer) -> Result<ExitStatus> {
// Detect the current Python interpreter.
let platform = Platform::current()?;
let python = Virtualenv::from_env(platform, Some(cache))?;
let python = Virtualenv::from_env(platform, cache)?;
debug!(
"Using Python interpreter: {}",
python.python_executable().display()

View file

@ -15,6 +15,7 @@ use tracing::debug;
use pep508_rs::Requirement;
use platform_host::Platform;
use platform_tags::Tags;
use puffin_cache::Cache;
use puffin_client::RegistryClientBuilder;
use puffin_dispatch::BuildDispatch;
use puffin_interpreter::Virtualenv;
@ -44,7 +45,7 @@ pub(crate) async fn pip_compile(
no_build: bool,
python_version: Option<PythonVersion>,
exclude_newer: Option<DateTime<Utc>>,
cache: &Path,
cache: Cache,
mut printer: Printer,
) -> Result<ExitStatus> {
miette::set_hook(Box::new(|_| {
@ -112,7 +113,7 @@ pub(crate) async fn pip_compile(
// Detect the current Python interpreter.
let platform = Platform::current()?;
let venv = Virtualenv::from_env(platform, Some(cache))?;
let venv = Virtualenv::from_env(platform, &cache)?;
debug!(
"Using Python {} at {}",
@ -136,7 +137,7 @@ pub(crate) async fn pip_compile(
// Instantiate a client.
let client = {
let mut builder = RegistryClientBuilder::new(cache);
let mut builder = RegistryClientBuilder::new(cache.clone());
if let Some(IndexUrls { index, extra_index }) = index_urls {
if let Some(index) = index {
builder = builder.index(index);
@ -150,7 +151,7 @@ pub(crate) async fn pip_compile(
let build_dispatch = BuildDispatch::new(
client.clone(),
cache.to_path_buf(),
cache.clone(),
interpreter_info,
fs::canonicalize(venv.python_executable())?,
no_build,

View file

@ -1,5 +1,4 @@
use std::fmt::Write;
use std::path::Path;
use anyhow::{Context, Result};
use colored::Colorize;
@ -12,6 +11,7 @@ use install_wheel_rs::linker::LinkMode;
use pep508_rs::Requirement;
use platform_host::Platform;
use platform_tags::Tags;
use puffin_cache::Cache;
use puffin_client::RegistryClientBuilder;
use puffin_dispatch::BuildDispatch;
use puffin_distribution::DistributionDatabase;
@ -31,7 +31,7 @@ pub(crate) async fn pip_sync(
link_mode: LinkMode,
index_urls: Option<IndexUrls>,
no_build: bool,
cache: &Path,
cache: Cache,
mut printer: Printer,
) -> Result<ExitStatus> {
// Read all requirements from the provided sources.
@ -52,7 +52,7 @@ pub(crate) async fn pip_sync(
link_mode,
index_urls,
no_build,
cache,
&cache,
printer,
)
.await
@ -64,14 +64,14 @@ pub(crate) async fn sync_requirements(
link_mode: LinkMode,
index_urls: Option<IndexUrls>,
no_build: bool,
cache: &Path,
cache: &Cache,
mut printer: Printer,
) -> Result<ExitStatus> {
let start = std::time::Instant::now();
// Detect the current Python interpreter.
let platform = Platform::current()?;
let venv = Virtualenv::from_env(platform, Some(cache))?;
let venv = Virtualenv::from_env(platform, cache)?;
debug!(
"Using Python interpreter: {}",
venv.python_executable().display()
@ -107,7 +107,7 @@ pub(crate) async fn sync_requirements(
// Instantiate a client.
let client = {
let mut builder = RegistryClientBuilder::new(cache);
let mut builder = RegistryClientBuilder::new(cache.clone());
if let Some(IndexUrls { index, extra_index }) = index_urls {
if let Some(index) = index {
builder = builder.index(index);
@ -179,7 +179,7 @@ pub(crate) async fn sync_requirements(
let build_dispatch = BuildDispatch::new(
client.clone(),
cache.to_path_buf(),
cache.clone(),
venv.interpreter().clone(),
fs::canonicalize(venv.python_executable())?,
no_build,

View file

@ -1,5 +1,4 @@
use std::fmt::Write;
use std::path::Path;
use anyhow::Result;
use colored::Colorize;
@ -7,6 +6,7 @@ use tracing::debug;
use distribution_types::Metadata;
use platform_host::Platform;
use puffin_cache::Cache;
use puffin_interpreter::Virtualenv;
use crate::commands::{elapsed, ExitStatus};
@ -16,7 +16,7 @@ use crate::requirements::{ExtrasSpecification, RequirementsSource, RequirementsS
/// Uninstall packages from the current environment.
pub(crate) async fn pip_uninstall(
sources: &[RequirementsSource],
cache: &Path,
cache: Cache,
mut printer: Printer,
) -> Result<ExitStatus> {
let start = std::time::Instant::now();
@ -31,7 +31,7 @@ pub(crate) async fn pip_uninstall(
// Detect the current Python interpreter.
let platform = Platform::current()?;
let venv = Virtualenv::from_env(platform, Some(cache))?;
let venv = Virtualenv::from_env(platform, &cache)?;
debug!(
"Using Python interpreter: {}",
venv.python_executable().display()

View file

@ -2,10 +2,10 @@ use std::path::PathBuf;
use anyhow::Result;
use miette::{Diagnostic, IntoDiagnostic};
use puffin_normalize::PackageName;
use thiserror::Error;
use tracing::info;
use puffin_normalize::PackageName;
use puffin_workspace::WorkspaceError;
use crate::commands::ExitStatus;

View file

@ -8,6 +8,7 @@ use miette::{Diagnostic, IntoDiagnostic};
use thiserror::Error;
use platform_host::Platform;
use puffin_cache::{Cache, CacheBucket};
use puffin_interpreter::Interpreter;
use crate::commands::ExitStatus;
@ -18,9 +19,10 @@ use crate::printer::Printer;
pub(crate) fn venv(
path: &Path,
base_python: Option<&Path>,
cache: &Cache,
printer: Printer,
) -> Result<ExitStatus> {
match venv_impl(path, base_python, printer) {
match venv_impl(path, base_python, cache, printer) {
Ok(status) => Ok(status),
Err(err) => {
#[allow(clippy::print_stderr)]
@ -55,6 +57,7 @@ enum VenvError {
fn venv_impl(
path: &Path,
base_python: Option<&Path>,
cache: &Cache,
mut printer: Printer,
) -> miette::Result<ExitStatus> {
// Locate the Python interpreter.
@ -74,8 +77,12 @@ fn venv_impl(
};
let platform = Platform::current().into_diagnostic()?;
let interpreter_info =
Interpreter::query(&base_python, platform, None).map_err(VenvError::InterpreterError)?;
let interpreter_info = Interpreter::query(
&base_python,
platform,
&cache.bucket(CacheBucket::Interpreter),
)
.map_err(VenvError::InterpreterError)?;
writeln!(
printer,

View file

@ -8,7 +8,7 @@ use clap::{Args, Parser, Subcommand};
use colored::Colorize;
use url::Url;
use puffin_cache::{CacheArgs, CacheDir};
use puffin_cache::{Cache, CacheArgs};
use puffin_normalize::{ExtraName, PackageName};
use puffin_resolver::{PreReleaseMode, ResolutionMode};
use requirements::ExtrasSpecification;
@ -249,7 +249,7 @@ async fn inner() -> Result<ExitStatus> {
printer::Printer::Default
};
let cache_dir = CacheDir::try_from(cli.cache_args)?;
let cache = Cache::try_from(cli.cache_args)?;
match cli.command {
Commands::PipCompile(args) => {
@ -286,7 +286,7 @@ async fn inner() -> Result<ExitStatus> {
args.no_build,
args.python_version,
args.exclude_newer,
cache_dir.path(),
cache,
printer,
)
.await
@ -304,7 +304,7 @@ async fn inner() -> Result<ExitStatus> {
args.link_mode.unwrap_or_default(),
index_urls,
args.no_build,
cache_dir.path(),
cache,
printer,
)
.await
@ -316,11 +316,11 @@ async fn inner() -> Result<ExitStatus> {
.map(RequirementsSource::from)
.chain(args.requirement.into_iter().map(RequirementsSource::from))
.collect::<Vec<_>>();
commands::pip_uninstall(&sources, cache_dir.path(), printer).await
commands::pip_uninstall(&sources, cache, printer).await
}
Commands::Clean => commands::clean(cache_dir.path(), printer),
Commands::Freeze => commands::freeze(cache_dir.path(), printer),
Commands::Venv(args) => commands::venv(&args.name, args.python.as_deref(), printer),
Commands::Clean => commands::clean(&cache, printer),
Commands::Freeze => commands::freeze(&cache, printer),
Commands::Venv(args) => commands::venv(&args.name, args.python.as_deref(), &cache, printer),
Commands::Add(args) => commands::add(&args.name, printer),
Commands::Remove(args) => commands::remove(&args.name, printer),
}

View file

@ -1,5 +1,4 @@
use std::future::Future;
use std::path::Path;
use std::time::SystemTime;
use http_cache_semantics::{AfterResponse, BeforeRequest, CachePolicy};
@ -10,6 +9,8 @@ use serde::{Deserialize, Serialize};
use tempfile::NamedTempFile;
use tracing::{debug, trace, warn};
use puffin_cache::CacheEntry;
/// Either a cached client error or a (user specified) error from the callback
pub enum CachedClientError<CallbackError> {
Client(crate::Error),
@ -92,24 +93,22 @@ impl CachedClient {
>(
&self,
req: Request,
cache_dir: &Path,
cache_file: &str,
cache_entry: &CacheEntry,
response_callback: Callback,
) -> Result<Payload, CachedClientError<CallBackError>>
where
Callback: FnOnce(Response) -> CallbackReturn,
CallbackReturn: Future<Output = Result<Payload, CallBackError>>,
{
let cache_path = cache_dir.join(cache_file);
let cached = if let Ok(cached) = fs_err::tokio::read(&cache_path).await {
let cached = if let Ok(cached) = fs_err::tokio::read(cache_entry.path()).await {
match serde_json::from_slice::<DataWithCachePolicy<Payload>>(&cached) {
Ok(data) => Some(data),
Err(err) => {
warn!(
"Broken cache entry at {}, removing: {err}",
cache_path.display()
cache_entry.path().display()
);
let _ = fs_err::tokio::remove_file(&cache_path).await;
let _ = fs_err::tokio::remove_file(&cache_entry.path()).await;
None
}
}
@ -122,14 +121,17 @@ impl CachedClient {
match cached_response {
CachedResponse::FreshCache(data) => Ok(data),
CachedResponse::NotModified(data_with_cache_policy) => {
let temp_file = NamedTempFile::new_in(cache_dir).map_err(crate::Error::from)?;
let temp_file =
NamedTempFile::new_in(&cache_entry.dir).map_err(crate::Error::from)?;
fs_err::tokio::write(
&temp_file,
&serde_json::to_vec(&data_with_cache_policy).map_err(crate::Error::from)?,
)
.await
.map_err(crate::Error::from)?;
temp_file.persist(cache_path).map_err(crate::Error::from)?;
temp_file
.persist(cache_entry.path())
.map_err(crate::Error::from)?;
Ok(data_with_cache_policy.data)
}
CachedResponse::ModifiedOrNew(res, cache_policy) => {
@ -138,17 +140,19 @@ impl CachedClient {
.map_err(|err| CachedClientError::Callback(err))?;
if let Some(cache_policy) = cache_policy {
let data_with_cache_policy = DataWithCachePolicy { data, cache_policy };
fs_err::tokio::create_dir_all(cache_dir)
fs_err::tokio::create_dir_all(&cache_entry.dir)
.await
.map_err(crate::Error::from)?;
let temp_file = NamedTempFile::new_in(cache_dir).map_err(crate::Error::from)?;
fs_err::tokio::write(
&temp_file,
&serde_json::to_vec(&data_with_cache_policy).map_err(crate::Error::from)?,
)
.await
.map_err(crate::Error::from)?;
temp_file.persist(cache_path).map_err(crate::Error::from)?;
let temp_file =
NamedTempFile::new_in(&cache_entry.dir).map_err(crate::Error::from)?;
let data =
serde_json::to_vec(&data_with_cache_policy).map_err(crate::Error::from)?;
fs_err::tokio::write(&temp_file, &data)
.await
.map_err(crate::Error::from)?;
temp_file
.persist(cache_entry.path())
.map_err(crate::Error::from)?;
Ok(data_with_cache_policy.data)
} else {
Ok(data)

View file

@ -14,6 +14,7 @@ pub enum Error {
#[error(transparent)]
UrlParseError(#[from] url::ParseError),
/// Dist-info error
#[error(transparent)]
InstallWheel(#[from] install_wheel_rs::Error),
@ -66,7 +67,7 @@ pub enum Error {
#[error("The wheel {0} is not a valid zip file")]
Zip(WheelFilename, #[source] ZipError),
#[error(transparent)]
#[error("Failed to write to the client cache")]
IO(#[from] io::Error),
/// An [`io::Error`] with a filename attached

View file

@ -1,5 +1,5 @@
use std::fmt::Debug;
use std::path::PathBuf;
use std::path::Path;
use std::str::FromStr;
use async_http_range_reader::{AsyncHttpRangeReader, AsyncHttpRangeReaderError};
@ -17,15 +17,13 @@ use url::Url;
use distribution_filename::WheelFilename;
use distribution_types::{BuiltDist, Metadata};
use install_wheel_rs::find_dist_info;
use puffin_cache::{digest, CanonicalUrl, WheelMetadataCache};
use puffin_cache::{digest, Cache, CacheBucket, CanonicalUrl, WheelMetadataCache};
use puffin_normalize::PackageName;
use pypi_types::{File, IndexUrl, Metadata21, SimpleJson};
use crate::remote_metadata::wheel_metadata_from_remote_zip;
use crate::{CachedClient, CachedClientError, Error};
const SIMPLE_CACHE: &str = "simple-v0";
/// A builder for an [`RegistryClient`].
#[derive(Debug, Clone)]
pub struct RegistryClientBuilder {
@ -34,17 +32,17 @@ pub struct RegistryClientBuilder {
no_index: bool,
proxy: Url,
retries: u32,
cache: PathBuf,
cache: Cache,
}
impl RegistryClientBuilder {
pub fn new(cache: impl Into<PathBuf>) -> Self {
pub fn new(cache: Cache) -> Self {
Self {
index: IndexUrl::Pypi,
extra_index: vec![],
no_index: false,
proxy: Url::parse("https://pypi-metadata.ruff.rs").unwrap(),
cache: cache.into(),
cache,
retries: 0,
}
}
@ -82,8 +80,8 @@ impl RegistryClientBuilder {
}
#[must_use]
pub fn cache<T>(mut self, cache: impl Into<PathBuf>) -> Self {
self.cache = cache.into();
pub fn cache<T>(mut self, cache: Cache) -> Self {
self.cache = cache;
self
}
@ -129,7 +127,7 @@ pub struct RegistryClient {
/// [`reqwest::Client] instead of [`reqwest_middleware::Client`]
pub(crate) client_raw: Client,
/// Used for the remote wheel METADATA cache
pub(crate) cache: PathBuf,
pub(crate) cache: Cache,
}
impl RegistryClient {
@ -160,11 +158,14 @@ impl RegistryClient {
url
);
let cache_dir = self.cache.join(SIMPLE_CACHE).join(match index {
IndexUrl::Pypi => "pypi".to_string(),
IndexUrl::Url(url) => digest(&CanonicalUrl::new(url)),
});
let cache_file = format!("{}.json", package_name.as_ref());
let cache_entry = self.cache.entry(
CacheBucket::Simple,
Path::new(&match index {
IndexUrl::Pypi => "pypi".to_string(),
IndexUrl::Url(url) => digest(&CanonicalUrl::new(url)),
}),
format!("{}.json", package_name.as_ref()),
);
let simple_request = self
.client
@ -180,12 +181,7 @@ impl RegistryClient {
};
let result = self
.client
.get_cached_with_callback(
simple_request,
&cache_dir,
&cache_file,
parse_simple_response,
)
.get_cached_with_callback(simple_request, &cache_entry, parse_simple_response)
.await;
// Fetch from the index.
@ -265,10 +261,11 @@ impl RegistryClient {
{
let url = Url::parse(&format!("{}.metadata", file.url))?;
let cache_dir = self
.cache
.join(WheelMetadataCache::Index(&index).wheel_dir());
let cache_file = format!("{}.json", filename.stem());
let cache_entry = self.cache.entry(
CacheBucket::WheelMetadata,
WheelMetadataCache::Index(&index).wheel_dir(),
format!("{}.json", filename.stem()),
);
let response_callback = |response: Response| async {
Metadata21::parse(response.bytes().await?.as_ref())
@ -277,7 +274,7 @@ impl RegistryClient {
let req = self.client.uncached().get(url.clone()).build()?;
Ok(self
.client
.get_cached_with_callback(req, &cache_dir, &cache_file, response_callback)
.get_cached_with_callback(req, &cache_entry, response_callback)
.await?)
} else {
// If we lack PEP 658 support, try using HTTP range requests to read only the
@ -299,8 +296,11 @@ impl RegistryClient {
return Err(Error::NoIndex(url.to_string()));
}
let cache_dir = self.cache.join(cache_shard.wheel_dir());
let cache_file = format!("{}.json", filename.stem());
let cache_entry = self.cache.entry(
CacheBucket::WheelMetadata,
cache_shard.wheel_dir(),
format!("{}.json", filename.stem()),
);
// This response callback is special, we actually make a number of subsequent requests to
// fetch the file from the remote zip.
@ -317,12 +317,7 @@ impl RegistryClient {
let req = self.client.uncached().head(url.clone()).build()?;
let result = self
.client
.get_cached_with_callback(
req,
&cache_dir,
&cache_file,
read_metadata_from_initial_response,
)
.get_cached_with_callback(req, &cache_entry, read_metadata_from_initial_response)
.await;
match result {

View file

@ -1,17 +1,17 @@
use std::str::FromStr;
use anyhow::Result;
use tempfile::tempdir;
use url::Url;
use distribution_filename::WheelFilename;
use distribution_types::{BuiltDist, DirectUrlBuiltDist};
use puffin_cache::Cache;
use puffin_client::RegistryClientBuilder;
#[tokio::test]
async fn remote_metadata_with_and_without_cache() -> Result<()> {
let temp_cache = tempdir()?;
let client = RegistryClientBuilder::new(temp_cache.path().to_path_buf()).build();
let cache = Cache::temp()?;
let client = RegistryClientBuilder::new(cache).build();
// The first run is without cache (the tempdir is empty), the second has the cache from the
// first run.

View file

@ -6,7 +6,7 @@ use clap::Parser;
use fs_err as fs;
use platform_host::Platform;
use puffin_cache::{CacheArgs, CacheDir};
use puffin_cache::{Cache, CacheArgs};
use puffin_client::RegistryClientBuilder;
use puffin_dispatch::BuildDispatch;
use puffin_interpreter::Virtualenv;
@ -38,14 +38,14 @@ pub(crate) async fn build(args: BuildArgs) -> Result<PathBuf> {
env::current_dir()?
};
let cache_dir = CacheDir::try_from(args.cache_args)?;
let cache = Cache::try_from(args.cache_args)?;
let platform = Platform::current()?;
let venv = Virtualenv::from_env(platform, Some(cache_dir.path()))?;
let venv = Virtualenv::from_env(platform, &cache)?;
let build_dispatch = BuildDispatch::new(
RegistryClientBuilder::new(cache_dir.path().clone()).build(),
cache_dir.path().clone(),
RegistryClientBuilder::new(cache.clone()).build(),
cache,
venv.interpreter().clone(),
fs::canonicalize(venv.python_executable())?,
false,

View file

@ -12,7 +12,7 @@ use petgraph::dot::{Config as DotConfig, Dot};
use pep508_rs::Requirement;
use platform_host::Platform;
use platform_tags::Tags;
use puffin_cache::{CacheArgs, CacheDir};
use puffin_cache::{Cache, CacheArgs};
use puffin_client::RegistryClientBuilder;
use puffin_dispatch::BuildDispatch;
use puffin_interpreter::Virtualenv;
@ -42,14 +42,14 @@ pub(crate) struct ResolveCliArgs {
}
pub(crate) async fn resolve_cli(args: ResolveCliArgs) -> Result<()> {
let cache_dir = CacheDir::try_from(args.cache_args)?;
let cache = Cache::try_from(args.cache_args)?;
let platform = Platform::current()?;
let venv = Virtualenv::from_env(platform, Some(cache_dir.path()))?;
let client = RegistryClientBuilder::new(cache_dir.path().clone()).build();
let venv = Virtualenv::from_env(platform, &cache)?;
let client = RegistryClientBuilder::new(cache.clone()).build();
let build_dispatch = BuildDispatch::new(
client.clone(),
cache_dir.path().clone(),
cache.clone(),
venv.interpreter().clone(),
fs::canonicalize(venv.python_executable())?,
args.no_build,

View file

@ -10,7 +10,7 @@ use futures::StreamExt;
use indicatif::ProgressStyle;
use pep508_rs::Requirement;
use platform_host::Platform;
use puffin_cache::{CacheArgs, CacheDir};
use puffin_cache::{Cache, CacheArgs};
use puffin_client::RegistryClientBuilder;
use puffin_dispatch::BuildDispatch;
use puffin_interpreter::Virtualenv;
@ -37,7 +37,7 @@ pub(crate) struct ResolveManyArgs {
}
pub(crate) async fn resolve_many(args: ResolveManyArgs) -> Result<()> {
let cache_dir = CacheDir::try_from(args.cache_args)?;
let cache = Cache::try_from(args.cache_args)?;
let data = fs::read_to_string(&args.list)?;
let lines = data.lines().map(Requirement::from_str);
@ -48,10 +48,10 @@ pub(crate) async fn resolve_many(args: ResolveManyArgs) -> Result<()> {
};
let platform = Platform::current()?;
let venv = Virtualenv::from_env(platform, Some(cache_dir.path()))?;
let venv = Virtualenv::from_env(platform, &cache)?;
let build_dispatch = BuildDispatch::new(
RegistryClientBuilder::new(cache_dir.path().clone()).build(),
cache_dir.path().clone(),
RegistryClientBuilder::new(cache.clone()).build(),
cache.clone(),
venv.interpreter().clone(),
fs::canonicalize(venv.python_executable())?,
args.no_build,

View file

@ -6,7 +6,7 @@ use url::Url;
use distribution_filename::WheelFilename;
use distribution_types::{BuiltDist, DirectUrlBuiltDist};
use puffin_cache::{CacheArgs, CacheDir};
use puffin_cache::{Cache, CacheArgs};
use puffin_client::RegistryClientBuilder;
#[derive(Parser)]
@ -17,9 +17,9 @@ pub(crate) struct WheelMetadataArgs {
}
pub(crate) async fn wheel_metadata(args: WheelMetadataArgs) -> Result<()> {
let cache_dir = CacheDir::try_from(args.cache_args)?;
let cache_dir = Cache::try_from(args.cache_args)?;
let client = RegistryClientBuilder::new(cache_dir.path().clone()).build();
let client = RegistryClientBuilder::new(cache_dir.clone()).build();
let filename = WheelFilename::from_str(
args.url

View file

@ -20,6 +20,7 @@ pep508_rs = { path = "../pep508-rs" }
platform-host = { path = "../platform-host" }
platform-tags = { path = "../platform-tags" }
puffin-build = { path = "../puffin-build" }
puffin-cache = { path = "../puffin-cache" }
puffin-client = { path = "../puffin-client" }
puffin-distribution = { path = "../puffin-distribution" }
puffin-installer = { path = "../puffin-installer" }

View file

@ -15,6 +15,7 @@ use distribution_types::Metadata;
use pep508_rs::Requirement;
use platform_tags::Tags;
use puffin_build::{SourceBuild, SourceBuildContext};
use puffin_cache::Cache;
use puffin_client::RegistryClient;
use puffin_distribution::DistributionDatabase;
use puffin_installer::{InstallPlan, Installer, Unzipper};
@ -26,7 +27,7 @@ use puffin_traits::BuildContext;
/// documentation.
pub struct BuildDispatch {
client: RegistryClient,
cache: PathBuf,
cache: Cache,
interpreter: Interpreter,
base_python: PathBuf,
no_build: bool,
@ -37,7 +38,7 @@ pub struct BuildDispatch {
impl BuildDispatch {
pub fn new(
client: RegistryClient,
cache: PathBuf,
cache: Cache,
interpreter: Interpreter,
base_python: PathBuf,
no_build: bool,
@ -61,8 +62,8 @@ impl BuildDispatch {
}
impl BuildContext for BuildDispatch {
fn cache(&self) -> &Path {
self.cache.as_path()
fn cache(&self) -> &Cache {
&self.cache
}
fn interpreter(&self) -> &Interpreter {
@ -121,7 +122,7 @@ impl BuildContext for BuildDispatch {
local,
remote,
extraneous,
} = InstallPlan::try_from_requirements(requirements, &self.cache, venv, &tags)?;
} = InstallPlan::try_from_requirements(requirements, self.cache(), venv, &tags)?;
// Resolve the dependencies.
let remote = if remote.is_empty() {
@ -167,7 +168,7 @@ impl BuildContext for BuildDispatch {
wheels.iter().map(ToString::to_string).join(", ")
);
Unzipper::default()
.unzip(wheels, &self.cache)
.unzip(wheels, self.cache())
.await
.context("Failed to unpack build dependencies")?
};

View file

@ -1,7 +1,6 @@
use std::borrow::Cow;
use std::cmp::Reverse;
use std::io;
use std::path::Path;
use std::str::FromStr;
use std::sync::Arc;
@ -18,6 +17,7 @@ use distribution_filename::{WheelFilename, WheelFilenameError};
use distribution_types::direct_url::DirectGitUrl;
use distribution_types::{BuiltDist, Dist, Metadata, RemoteSource, SourceDist};
use platform_tags::Tags;
use puffin_cache::{Cache, CacheBucket};
use puffin_client::RegistryClient;
use puffin_git::GitSource;
use puffin_traits::BuildContext;
@ -31,12 +31,6 @@ use crate::{
SourceDistError,
};
// The cache subdirectory in which to store Git repositories.
const GIT_CACHE: &str = "git-v0";
// The cache subdirectory in which to store downloaded wheel archives.
const ARCHIVES_CACHE: &str = "archives-v0";
#[derive(Debug, Error)]
pub enum DistributionDatabaseError {
#[error("Failed to parse '{0}' as url")]
@ -75,7 +69,7 @@ pub enum DistributionDatabaseError {
/// This struct also has the task of acquiring locks around source dist builds in general and git
/// operation especially.
pub struct DistributionDatabase<'a, Context: BuildContext + Send + Sync> {
cache: &'a Path,
cache: &'a Cache,
reporter: Option<Arc<dyn Reporter>>,
locks: Arc<Locks>,
client: &'a RegistryClient,
@ -85,7 +79,7 @@ pub struct DistributionDatabase<'a, Context: BuildContext + Send + Sync> {
impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context> {
pub fn new(
cache: &'a Path,
cache: &'a Cache,
tags: &'a Tags,
client: &'a RegistryClient,
build_context: &'a Context,
@ -193,7 +187,10 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context>
// Create a directory for the wheel.
// TODO(konstin): Change this when the built wheel naming scheme is fixed.
let wheel_dir = self.cache.join(ARCHIVES_CACHE).join(wheel.package_id());
let wheel_dir = self
.cache
.bucket(CacheBucket::Archives)
.join(wheel.package_id());
fs::create_dir_all(&wheel_dir).await?;
// Download the wheel to a temporary file.
@ -221,7 +218,10 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context>
// Create a directory for the wheel.
// TODO(konstin): Change this when the built wheel naming scheme is fixed.
let wheel_dir = self.cache.join(ARCHIVES_CACHE).join(wheel.package_id());
let wheel_dir = self
.cache
.bucket(CacheBucket::Archives)
.join(wheel.package_id());
fs::create_dir_all(&wheel_dir).await?;
// Fetch the wheel.
@ -315,7 +315,7 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context>
let SourceDist::Git(source_dist) = dist else {
return Ok(None);
};
let git_dir = self.cache.join(GIT_CACHE);
let git_dir = self.build_context.cache().bucket(CacheBucket::Git);
let DirectGitUrl { url, subdirectory } =
DirectGitUrl::try_from(&source_dist.url).map_err(DistributionDatabaseError::Git)?;

View file

@ -23,7 +23,8 @@ use distribution_types::direct_url::{DirectArchiveUrl, DirectGitUrl};
use distribution_types::{Dist, GitSourceDist, Identifier, RemoteSource, SourceDist};
use install_wheel_rs::find_dist_info;
use platform_tags::Tags;
use puffin_cache::{digest, CanonicalUrl, WheelMetadataCache};
use puffin_cache::CacheBucket::BuiltWheels;
use puffin_cache::{digest, Cache, CacheBucket, CanonicalUrl, WheelMetadataCache};
use puffin_client::{CachedClient, CachedClientError, DataWithCachePolicy};
use puffin_git::{Fetch, GitSource};
use puffin_normalize::PackageName;
@ -34,9 +35,6 @@ use crate::download::BuiltWheel;
use crate::locks::LockedFile;
use crate::{Download, Reporter, SourceDistDownload};
const BUILT_WHEELS_CACHE: &str = "built-wheels-v0";
const GIT_CACHE: &str = "git-v0";
/// The caller is responsible for adding the source dist information to the error chain
#[derive(Debug, Error)]
pub enum SourceDistError {
@ -51,7 +49,7 @@ pub enum SourceDistError {
Client(#[from] puffin_client::Error),
// Cache writing error
#[error(transparent)]
#[error("Failed to write to source dist cache")]
Io(#[from] std::io::Error),
#[error("Cache (de)serialization failed")]
Serde(#[from] serde_json::Error),
@ -96,12 +94,12 @@ impl BuiltWheelMetadata {
fn from_cached(
filename: &WheelFilename,
cached_data: &DiskFilenameAndMetadata,
cache: &Path,
cache: &Cache,
source_dist: &SourceDist,
) -> Self {
// TODO(konstin): Change this when the built wheel naming scheme is fixed
let wheel_dir = cache
.join(BUILT_WHEELS_CACHE)
.bucket(CacheBucket::BuiltWheels)
.join(source_dist.distribution_id());
Self {
@ -183,7 +181,7 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> {
let wheel_dir = self
.build_context
.cache()
.join(BUILT_WHEELS_CACHE)
.bucket(CacheBucket::BuiltWheels)
.join(source_dist.distribution_id());
fs::create_dir_all(&wheel_dir).await?;
@ -231,11 +229,11 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> {
cache_shard: WheelMetadataCache<'data>,
subdirectory: Option<&'data Path>,
) -> Result<BuiltWheelMetadata, SourceDistError> {
let cache_dir = self
.build_context
.cache()
.join(cache_shard.built_wheel_dir(filename));
let cache_file = METADATA_JSON;
let cache_entry = self.build_context.cache().entry(
CacheBucket::BuiltWheelMetadata,
cache_shard.built_wheel_dir(filename),
METADATA_JSON.to_string(),
);
let response_callback = |response| async {
debug!("Downloading and building source distribution: {source_dist}");
@ -256,7 +254,7 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> {
reporter.on_download_progress(&Download::SourceDist(download.clone()));
}
let (disk_filename, wheel_filename, metadata) = self
let (_wheel_filename, disk_filename, wheel_filename, metadata) = self
.build_source_dist(
&download.dist,
temp_dir,
@ -285,7 +283,7 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> {
let req = self.cached_client.uncached().get(url.clone()).build()?;
let metadatas = self
.cached_client
.get_cached_with_callback(req, &cache_dir, cache_file, response_callback)
.get_cached_with_callback(req, &cache_entry, response_callback)
.await
.map_err(|err| match err {
CachedClientError::Callback(err) => err,
@ -318,7 +316,7 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> {
.await
.map_err(puffin_client::Error::RequestMiddlewareError)?;
let (temp_dir, sdist_file) = self.download_source_dist_url(response, filename).await?;
let (disk_filename, wheel_filename, metadata) = self
let (_wheel_dir, disk_filename, wheel_filename, metadata) = self
.build_source_dist(source_dist, temp_dir, &sdist_file, subdirectory)
.await
.map_err(SourceDistError::Build)?;
@ -337,7 +335,7 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> {
// have to build a source dist next.
// Just return if the response wasn't cacheable or there was another errors that
// `CachedClient` already complained about
if let Ok(cached) = fs::read(cache_dir.join(cache_file)).await {
if let Ok(cached) = fs::read(cache_entry.path()).await {
// If the file exists and it was just read or written by `CachedClient`, we assume it must
// be correct.
let mut cached = serde_json::from_slice::<DataWithCachePolicy<Metadata21s>>(&cached)?;
@ -345,7 +343,7 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> {
cached
.data
.insert(wheel_filename.clone(), cached_data.clone());
fs::write(cache_file, serde_json::to_vec(&cached)?).await?;
fs::write(cache_entry.path(), serde_json::to_vec(&cached)?).await?;
};
Ok(BuiltWheelMetadata::from_cached(
@ -369,22 +367,14 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> {
.expect("Exact commit after checkout")
.to_string();
let cache_shard = WheelMetadataCache::Git(&git_source_dist.url);
let cache_dir = self
.build_context
.cache()
.join(cache_shard.built_wheel_dir(&git_sha));
let cache_file = cache_dir.join(METADATA_JSON);
let cache_entry = self.build_context.cache().entry(
CacheBucket::BuiltWheelMetadata,
cache_shard.built_wheel_dir(&git_sha),
METADATA_JSON.to_string(),
);
// TODO(konstin): Change this when the built wheel naming scheme is fixed.
let wheel_dir = self
.build_context
.cache()
.join(BUILT_WHEELS_CACHE)
.join(git_source_dist.distribution_id());
fs::create_dir_all(&wheel_dir).await?;
let mut metadatas = if cache_file.is_file() {
let cached = fs::read(&cache_file).await?;
let mut metadatas = if cache_entry.path().is_file() {
let cached = fs::read(&cache_entry.path()).await?;
let metadatas = serde_json::from_slice::<Metadata21s>(&cached)?;
// Do we have previous compatible build of this source dist?
if let Some((filename, cached_data)) = metadatas
@ -408,7 +398,7 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> {
.as_ref()
.map(|reporter| reporter.on_build_start(source_dist));
let (disk_filename, filename, metadata) = self
let (wheel_dir, disk_filename, filename, metadata) = self
.build_source_dist(source_dist, None, fetch.path(), subdirectory.as_deref())
.await
.map_err(SourceDistError::Build)?;
@ -429,8 +419,8 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> {
},
);
let cached = serde_json::to_vec(&metadatas)?;
fs::create_dir_all(cache_dir).await?;
fs::write(cache_file, cached).await?;
fs::create_dir_all(&cache_entry.dir).await?;
fs::write(cache_entry.path(), cached).await?;
if let Some(task) = task {
if let Some(reporter) = self.reporter.as_ref() {
@ -457,7 +447,9 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> {
let mut reader = tokio::io::BufReader::new(reader.compat());
// Download the source distribution.
let temp_dir = tempfile::tempdir_in(self.build_context.cache())?;
let cache_dir = self.build_context.cache().bucket(BuiltWheels);
fs::create_dir_all(&cache_dir).await?;
let temp_dir = tempfile::tempdir_in(cache_dir)?;
let sdist_file = temp_dir.path().join(source_dist_filename);
let mut writer = tokio::fs::File::create(&sdist_file).await?;
tokio::io::copy(&mut reader, &mut writer).await?;
@ -469,7 +461,7 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> {
url: &Url,
) -> Result<(Fetch, Option<PathBuf>), SourceDistError> {
debug!("Fetching source distribution from Git: {url}");
let git_dir = self.build_context.cache().join(GIT_CACHE);
let git_dir = self.build_context.cache().bucket(CacheBucket::Git);
// Avoid races between different processes, too.
let locks_dir = git_dir.join("locks");
@ -499,7 +491,7 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> {
temp_dir: Option<TempDir>,
source_dist: &Path,
subdirectory: Option<&Path>,
) -> anyhow::Result<(String, WheelFilename, Metadata21)> {
) -> anyhow::Result<(PathBuf, String, WheelFilename, Metadata21)> {
debug!("Building: {dist}");
if self.build_context.no_build() {
@ -510,7 +502,7 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> {
let wheel_dir = self
.build_context
.cache()
.join(BUILT_WHEELS_CACHE)
.bucket(CacheBucket::BuiltWheels)
.join(dist.distribution_id());
fs::create_dir_all(&wheel_dir).await?;
@ -535,7 +527,7 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> {
let metadata = Metadata21::parse(dist_info.as_bytes())?;
debug!("Finished building: {dist}");
Ok((disk_filename, filename, metadata))
Ok((wheel_dir, disk_filename, filename, metadata))
}
}

View file

@ -4,9 +4,7 @@ use fs_err as fs;
use distribution_filename::WheelFilename;
use distribution_types::{BuiltDist, Dist, Metadata, SourceDist, VersionOrUrl};
use puffin_cache::{digest, CanonicalUrl};
static WHEEL_CACHE: &str = "wheels-v0";
use puffin_cache::{digest, Cache, CacheBucket, CanonicalUrl};
#[derive(Debug)]
pub(crate) struct WheelCache {
@ -15,9 +13,9 @@ pub(crate) struct WheelCache {
impl WheelCache {
/// Create a handle to the wheel cache.
pub(crate) fn new(root: &Path) -> Self {
pub(crate) fn new(cache: &Cache) -> Self {
Self {
root: root.join(WHEEL_CACHE),
root: cache.bucket(CacheBucket::Wheels),
}
}

View file

@ -1,14 +1,14 @@
use std::path::Path;
use std::str::FromStr;
use anyhow::{bail, Context, Result};
use distribution_filename::WheelFilename;
use tracing::debug;
use distribution_filename::WheelFilename;
use distribution_types::direct_url::DirectUrl;
use distribution_types::{CachedDist, InstalledDist, RemoteSource};
use pep508_rs::{Requirement, VersionOrUrl};
use platform_tags::Tags;
use puffin_cache::Cache;
use puffin_interpreter::Virtualenv;
use crate::url_index::UrlIndex;
@ -34,7 +34,7 @@ impl InstallPlan {
/// need to be downloaded, and those that should be removed.
pub fn try_from_requirements(
requirements: &[Requirement],
cache: &Path,
cache: &Cache,
venv: &Virtualenv,
tags: &Tags,
) -> Result<Self> {

View file

@ -1,11 +1,11 @@
use std::collections::HashMap;
use std::path::Path;
use fs_err as fs;
use tracing::warn;
use distribution_types::{CachedRegistryDist, Metadata};
use platform_tags::Tags;
use puffin_cache::Cache;
use puffin_normalize::PackageName;
use crate::cache::{CacheShard, WheelCache};
@ -16,10 +16,10 @@ pub struct RegistryIndex(HashMap<PackageName, CachedRegistryDist>);
impl RegistryIndex {
/// Build an index of cached distributions from a directory.
pub fn try_from_directory(path: &Path, tags: &Tags) -> Self {
pub fn try_from_directory(cache: &Cache, tags: &Tags) -> Self {
let mut index = HashMap::new();
let cache = WheelCache::new(path);
let cache = WheelCache::new(cache);
let Ok(dir) = cache.read_dir(CacheShard::Registry) else {
return Self(index);
};
@ -31,7 +31,7 @@ impl RegistryIndex {
Err(err) => {
warn!(
"Failed to read entry of cache at {}: {}",
path.display(),
cache.root().display(),
err
);
continue;

View file

@ -1,10 +1,10 @@
use std::cmp::Reverse;
use std::path::Path;
use anyhow::Result;
use tracing::debug;
use distribution_types::{CachedDist, Dist, Identifier, RemoteSource};
use puffin_cache::Cache;
use puffin_distribution::{LocalWheel, Unzip};
use crate::cache::WheelCache;
@ -27,10 +27,10 @@ impl Unzipper {
pub async fn unzip(
&self,
downloads: Vec<LocalWheel>,
target: &Path,
cache: &Cache,
) -> Result<Vec<CachedDist>> {
// Create the wheel cache subdirectory, if necessary.
let wheel_cache = WheelCache::new(target);
let wheel_cache = WheelCache::new(cache);
wheel_cache.init()?;
// Sort the wheels by size.

View file

@ -1,4 +1,4 @@
use std::path::{Path, PathBuf};
use std::path::PathBuf;
use fxhash::FxHashMap;
use tracing::warn;
@ -6,6 +6,7 @@ use url::Url;
use distribution_filename::WheelFilename;
use distribution_types::{CachedDirectUrlDist, Identifier};
use puffin_cache::Cache;
use crate::cache::{CacheShard, WheelCache};
@ -16,10 +17,10 @@ pub(crate) struct UrlIndex(FxHashMap<String, PathBuf>);
impl UrlIndex {
/// Build an index of cached distributions from a directory.
pub(crate) fn try_from_directory(path: &Path) -> Self {
pub(crate) fn try_from_directory(cache: &Cache) -> Self {
let mut index = FxHashMap::default();
let cache = WheelCache::new(path);
let cache = WheelCache::new(cache);
let Ok(dir) = cache.read_dir(CacheShard::Url) else {
return Self(index);
};
@ -30,7 +31,7 @@ impl UrlIndex {
Err(err) => {
warn!(
"Failed to read entry of cache at {}: {}",
path.display(),
cache.root().display(),
err
);
continue;

View file

@ -16,6 +16,7 @@ workspace = true
pep440_rs = { path = "../pep440-rs" }
pep508_rs = { path = "../pep508-rs", features = ["serde"] }
platform-host = { path = "../platform-host" }
puffin-cache = { path = "../puffin-cache" }
cacache = { workspace = true }
fs-err = { workspace = true, features = ["tokio"] }

View file

@ -24,16 +24,8 @@ pub struct Interpreter {
impl Interpreter {
/// Detect the interpreter info for the given Python executable.
pub fn query(
executable: &Path,
platform: Platform,
cache: Option<&Path>,
) -> Result<Self, Error> {
let info = if let Some(cache) = cache {
InterpreterQueryResult::query_cached(executable, cache)?
} else {
InterpreterQueryResult::query(executable)?
};
pub fn query(executable: &Path, platform: Platform, cache: &Path) -> Result<Self, Error> {
let info = InterpreterQueryResult::query_cached(executable, cache)?;
debug_assert!(
info.base_prefix == info.base_exec_prefix,
"Not a venv python: {}, prefix: {}",

View file

@ -4,6 +4,7 @@ use std::path::{Path, PathBuf};
use tracing::debug;
use platform_host::Platform;
use puffin_cache::{Cache, CacheBucket};
use crate::python_platform::PythonPlatform;
use crate::{Error, Interpreter};
@ -17,13 +18,17 @@ pub struct Virtualenv {
impl Virtualenv {
/// Venv the current Python executable from the host environment.
pub fn from_env(platform: Platform, cache: Option<&Path>) -> Result<Self, Error> {
pub fn from_env(platform: Platform, cache: &Cache) -> Result<Self, Error> {
let platform = PythonPlatform::from(platform);
let Some(venv) = detect_virtual_env(&platform)? else {
return Err(Error::NotFound);
};
let executable = platform.venv_python(&venv);
let interpreter = Interpreter::query(&executable, platform.0, cache)?;
let interpreter = Interpreter::query(
&executable,
platform.0,
&cache.bucket(CacheBucket::Interpreter),
)?;
Ok(Self {
root: venv,
@ -31,14 +36,14 @@ impl Virtualenv {
})
}
pub fn from_virtualenv(
platform: Platform,
root: &Path,
cache: Option<&Path>,
) -> Result<Self, Error> {
pub fn from_virtualenv(platform: Platform, root: &Path, cache: &Cache) -> Result<Self, Error> {
let platform = PythonPlatform::from(platform);
let executable = platform.venv_python(root);
let interpreter = Interpreter::query(&executable, platform.0, cache)?;
let interpreter = Interpreter::query(
&executable,
platform.0,
&cache.bucket(CacheBucket::Interpreter),
)?;
Ok(Self {
root: root.to_path_buf(),

View file

@ -11,11 +11,11 @@ use std::str::FromStr;
use anyhow::Result;
use chrono::{DateTime, Utc};
use once_cell::sync::Lazy;
use tempfile::tempdir;
use pep508_rs::{MarkerEnvironment, Requirement, StringVersion};
use platform_host::{Arch, Os, Platform};
use platform_tags::Tags;
use puffin_cache::Cache;
use puffin_client::RegistryClientBuilder;
use puffin_interpreter::{Interpreter, Virtualenv};
use puffin_resolver::{
@ -31,12 +31,12 @@ static EXCLUDE_NEWER: Lazy<DateTime<Utc>> = Lazy::new(|| {
});
struct DummyContext {
cache: PathBuf,
cache: Cache,
interpreter: Interpreter,
}
impl BuildContext for DummyContext {
fn cache(&self) -> &Path {
fn cache(&self) -> &Cache {
&self.cache
}
@ -80,10 +80,9 @@ async fn resolve(
markers: &'static MarkerEnvironment,
tags: &Tags,
) -> Result<Graph> {
let temp_dir = tempdir()?;
let client = RegistryClientBuilder::new(temp_dir.path()).build();
let client = RegistryClientBuilder::new(Cache::temp()?).build();
let build_context = DummyContext {
cache: temp_dir.path().to_path_buf(),
cache: Cache::temp()?,
interpreter: Interpreter::artificial(
Platform::current()?,
markers.clone(),

View file

@ -14,6 +14,7 @@ workspace = true
[dependencies]
pep508_rs = { path = "../pep508-rs" }
puffin-cache = { path = "../puffin-cache" }
puffin-interpreter = { path = "../puffin-interpreter" }
anyhow = { workspace = true }

View file

@ -7,6 +7,7 @@ use std::pin::Pin;
use anyhow::Result;
use pep508_rs::Requirement;
use puffin_cache::Cache;
use puffin_interpreter::{Interpreter, Virtualenv};
/// Avoid cyclic crate dependencies between resolver, installer and builder.
@ -49,8 +50,7 @@ use puffin_interpreter::{Interpreter, Virtualenv};
// TODO(konstin): Proper error types
pub trait BuildContext {
// TODO(konstin): Add a cache abstraction
fn cache(&self) -> &Path;
fn cache(&self) -> &Cache;
/// All (potentially nested) source distribution builds use the same base python and can reuse
/// it's metadata (e.g. wheel compatibility tags).