From 162952bf6417de89d63cde267aaed7ca954dd291 Mon Sep 17 00:00:00 2001 From: Charlie Marsh Date: Sat, 7 Oct 2023 10:24:52 -0400 Subject: [PATCH] Add a content-addressed cache for wheels (#38) Closes https://github.com/astral-sh/puffin/issues/4. --- Cargo.lock | 1 + README.md | 22 ++++++++- crates/puffin-cli/src/commands/sync.rs | 2 +- crates/puffin-client/src/api.rs | 2 +- crates/puffin-installer/Cargo.toml | 1 + crates/puffin-installer/src/lib.rs | 67 ++++++++++++++++++-------- requirements.txt | 30 ++++++++++-- 7 files changed, 97 insertions(+), 28 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 48c5c17c8..e387b1c91 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1713,6 +1713,7 @@ name = "puffin-installer" version = "0.1.0" dependencies = [ "anyhow", + "cacache", "install-wheel-rs", "puffin-client", "puffin-interpreter", diff --git a/README.md b/README.md index 584fdafb5..0d920551b 100644 --- a/README.md +++ b/README.md @@ -18,6 +18,26 @@ cargo run -p puffin-cli -- sync requirements.txt ## Benchmarks +### Resolution + +To compare a warm run of `puffin` to `pip-compile`: + +```shell +hyperfine --runs 10 --warmup 3 --prepare "rm -f /tmp/tmp.txt" \ + "./target/release/puffin-cli compile requirements.txt" \ + "pip-compile requirements.txt -o /tmp/tmp.txt" +``` + +To compare a cold run of `puffin` to `pip-compile`: + +```shell +hyperfine --runs 10 --warmup 3 --prepare "rm -f /tmp/tmp.txt" \ + "./target/release/puffin-cli compile requirements.txt --no-cache" \ + "pip-compile requirements.txt --rebuild --pip-args '--no-cache-dir' -o /tmp/tmp.txt" +``` + +### Installation + To compare a warm run of `puffin` to `pip`: ```shell @@ -31,7 +51,7 @@ To compare a cold run of `puffin` to `pip`: ```shell hyperfine --runs 10 --warmup 3 \ "./target/release/puffin-cli sync requirements.txt --no-cache" \ - "pip install -r requirements.txt --ignore-installed --no-cache-dir" + "pip install -r requirements.txt --ignore-installed --no-cache-dir --no-deps" ``` ## License diff --git a/crates/puffin-cli/src/commands/sync.rs b/crates/puffin-cli/src/commands/sync.rs index 79554904c..7238db719 100644 --- a/crates/puffin-cli/src/commands/sync.rs +++ b/crates/puffin-cli/src/commands/sync.rs @@ -54,7 +54,7 @@ pub(crate) async fn sync(src: &Path, cache: Option<&Path>) -> Result // Install into the current environment. let wheels = resolution.into_files().collect::>(); - puffin_installer::install(&wheels, &python, &client).await?; + puffin_installer::install(&wheels, &python, &client, cache).await?; #[allow(clippy::print_stdout)] { diff --git a/crates/puffin-client/src/api.rs b/crates/puffin-client/src/api.rs index 581290069..9319dfb8b 100644 --- a/crates/puffin-client/src/api.rs +++ b/crates/puffin-client/src/api.rs @@ -147,7 +147,7 @@ pub struct File { pub filename: String, pub hashes: Hashes, pub requires_python: Option, - pub size: i64, + pub size: usize, pub upload_time: String, pub url: String, pub yanked: Yanked, diff --git a/crates/puffin-installer/Cargo.toml b/crates/puffin-installer/Cargo.toml index 036fa2f11..51afe3839 100644 --- a/crates/puffin-installer/Cargo.toml +++ b/crates/puffin-installer/Cargo.toml @@ -21,3 +21,4 @@ tracing = { workspace = true } url = { workspace = true } tokio = { workspace = true } tokio-util = { workspace = true } +cacache = { version = "11.7.1", default-features = false, features = ["tokio-runtime"] } diff --git a/crates/puffin-installer/src/lib.rs b/crates/puffin-installer/src/lib.rs index 07b45537d..e60ff7fb5 100644 --- a/crates/puffin-installer/src/lib.rs +++ b/crates/puffin-installer/src/lib.rs @@ -2,8 +2,10 @@ use std::path::Path; use std::str::FromStr; use anyhow::Result; +use cacache::{Algorithm, Integrity}; use tokio::task::JoinSet; use tokio_util::compat::FuturesAsyncReadCompatExt; +use tracing::debug; use url::Url; use install_wheel_rs::{install_wheel, InstallLocation}; @@ -16,21 +18,20 @@ pub async fn install( wheels: &[File], python: &PythonExecutable, client: &PypiClient, + cache: Option<&Path>, ) -> Result<()> { - // Create a temporary directory, in which we'll store the wheels. - let tmp_dir = tempfile::tempdir()?; - - // Download the wheels in parallel. - let mut downloads = JoinSet::new(); + // Fetch the wheels in parallel. + let mut fetches = JoinSet::new(); + let mut results = Vec::with_capacity(wheels.len()); for wheel in wheels { - downloads.spawn(do_download( + fetches.spawn(fetch_wheel( wheel.clone(), client.clone(), - tmp_dir.path().join(&wheel.hashes.sha256), + cache.map(Path::to_path_buf), )); } - while let Some(result) = downloads.join_next().await.transpose()? { - result?; + while let Some(result) = fetches.join_next().await.transpose()? { + results.push(result?); } // Install each wheel. @@ -39,14 +40,14 @@ pub async fn install( python_version: python.simple_version(), }; let locked_dir = location.acquire_lock()?; - for wheel in wheels { - let path = tmp_dir.path().join(&wheel.hashes.sha256); - let filename = WheelFilename::from_str(&wheel.filename)?; + for wheel in results { + let reader = std::io::Cursor::new(wheel.buffer); + let filename = WheelFilename::from_str(&wheel.file.filename)?; // TODO(charlie): Should this be async? install_wheel( &locked_dir, - std::fs::File::open(path)?, + reader, &filename, false, false, @@ -59,15 +60,41 @@ pub async fn install( Ok(()) } +#[derive(Debug)] +struct FetchedWheel { + file: File, + buffer: Vec, +} + /// Download a wheel to a given path. -async fn do_download(wheel: File, client: PypiClient, path: impl AsRef) -> Result { - // TODO(charlie): Store these in a content-addressed cache. - let url = Url::parse(&wheel.url)?; +async fn fetch_wheel( + file: File, + client: PypiClient, + cache: Option>, +) -> Result { + // Parse the wheel's SRI. + let sri = Integrity::from_hex(&file.hashes.sha256, Algorithm::Sha256)?; + + // Read from the cache, if possible. + if let Some(cache) = cache.as_ref() { + if let Ok(buffer) = cacache::read_hash(&cache, &sri).await { + debug!("Extracted wheel from cache: {:?}", file.filename); + return Ok(FetchedWheel { file, buffer }); + } + } + + let url = Url::parse(&file.url)?; let reader = client.stream_external(&url).await?; - // TODO(charlie): Stream the unzip. - let mut writer = tokio::fs::File::create(path).await?; - tokio::io::copy(&mut reader.compat(), &mut writer).await?; + // Read into a buffer. + let mut buffer = Vec::with_capacity(file.size); + let mut reader = tokio::io::BufReader::new(reader.compat()); + tokio::io::copy(&mut reader, &mut buffer).await?; - Ok(wheel) + // Write the buffer to the cache, if possible. + if let Some(cache) = cache.as_ref() { + cacache::write_hash(&cache, &buffer).await?; + } + + Ok(FetchedWheel { file, buffer }) } diff --git a/requirements.txt b/requirements.txt index a4a3fdeb9..e62f440f0 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,26 @@ -click==8.1.7 -pathspec==0.11.2 +# +# This file is autogenerated by pip-compile with Python 3.11 +# by the following command: +# +# pip-compile --pip-args='--no-cache-dir' requirements.in +# +attrs==23.1.0 + # via + # cattrs + # lsprotocol +cattrs==23.1.2 + # via lsprotocol +lsprotocol==2023.0.0b1 + # via + # -r requirements.in + # pygls packaging==23.2 -platformdirs==3.11.0 -black==23.9.1 -mypy-extensions==1.0.0 + # via -r requirements.in +pygls==1.1.1 + # via -r requirements.in +ruff==0.0.292 + # via -r requirements.in +typeguard==3.0.2 + # via pygls +typing-extensions==4.8.0 + # via -r requirements.in