Add a content-addressed cache for wheels (#38)

Closes https://github.com/astral-sh/puffin/issues/4.
This commit is contained in:
Charlie Marsh 2023-10-07 10:24:52 -04:00 committed by GitHub
parent 6c31631913
commit 162952bf64
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 97 additions and 28 deletions

1
Cargo.lock generated
View file

@ -1713,6 +1713,7 @@ name = "puffin-installer"
version = "0.1.0" version = "0.1.0"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"cacache",
"install-wheel-rs", "install-wheel-rs",
"puffin-client", "puffin-client",
"puffin-interpreter", "puffin-interpreter",

View file

@ -18,6 +18,26 @@ cargo run -p puffin-cli -- sync requirements.txt
## Benchmarks ## Benchmarks
### Resolution
To compare a warm run of `puffin` to `pip-compile`:
```shell
hyperfine --runs 10 --warmup 3 --prepare "rm -f /tmp/tmp.txt" \
"./target/release/puffin-cli compile requirements.txt" \
"pip-compile requirements.txt -o /tmp/tmp.txt"
```
To compare a cold run of `puffin` to `pip-compile`:
```shell
hyperfine --runs 10 --warmup 3 --prepare "rm -f /tmp/tmp.txt" \
"./target/release/puffin-cli compile requirements.txt --no-cache" \
"pip-compile requirements.txt --rebuild --pip-args '--no-cache-dir' -o /tmp/tmp.txt"
```
### Installation
To compare a warm run of `puffin` to `pip`: To compare a warm run of `puffin` to `pip`:
```shell ```shell
@ -31,7 +51,7 @@ To compare a cold run of `puffin` to `pip`:
```shell ```shell
hyperfine --runs 10 --warmup 3 \ hyperfine --runs 10 --warmup 3 \
"./target/release/puffin-cli sync requirements.txt --no-cache" \ "./target/release/puffin-cli sync requirements.txt --no-cache" \
"pip install -r requirements.txt --ignore-installed --no-cache-dir" "pip install -r requirements.txt --ignore-installed --no-cache-dir --no-deps"
``` ```
## License ## License

View file

@ -54,7 +54,7 @@ pub(crate) async fn sync(src: &Path, cache: Option<&Path>) -> Result<ExitStatus>
// Install into the current environment. // Install into the current environment.
let wheels = resolution.into_files().collect::<Vec<_>>(); let wheels = resolution.into_files().collect::<Vec<_>>();
puffin_installer::install(&wheels, &python, &client).await?; puffin_installer::install(&wheels, &python, &client, cache).await?;
#[allow(clippy::print_stdout)] #[allow(clippy::print_stdout)]
{ {

View file

@ -147,7 +147,7 @@ pub struct File {
pub filename: String, pub filename: String,
pub hashes: Hashes, pub hashes: Hashes,
pub requires_python: Option<String>, pub requires_python: Option<String>,
pub size: i64, pub size: usize,
pub upload_time: String, pub upload_time: String,
pub url: String, pub url: String,
pub yanked: Yanked, pub yanked: Yanked,

View file

@ -21,3 +21,4 @@ tracing = { workspace = true }
url = { workspace = true } url = { workspace = true }
tokio = { workspace = true } tokio = { workspace = true }
tokio-util = { workspace = true } tokio-util = { workspace = true }
cacache = { version = "11.7.1", default-features = false, features = ["tokio-runtime"] }

View file

@ -2,8 +2,10 @@ use std::path::Path;
use std::str::FromStr; use std::str::FromStr;
use anyhow::Result; use anyhow::Result;
use cacache::{Algorithm, Integrity};
use tokio::task::JoinSet; use tokio::task::JoinSet;
use tokio_util::compat::FuturesAsyncReadCompatExt; use tokio_util::compat::FuturesAsyncReadCompatExt;
use tracing::debug;
use url::Url; use url::Url;
use install_wheel_rs::{install_wheel, InstallLocation}; use install_wheel_rs::{install_wheel, InstallLocation};
@ -16,21 +18,20 @@ pub async fn install(
wheels: &[File], wheels: &[File],
python: &PythonExecutable, python: &PythonExecutable,
client: &PypiClient, client: &PypiClient,
cache: Option<&Path>,
) -> Result<()> { ) -> Result<()> {
// Create a temporary directory, in which we'll store the wheels. // Fetch the wheels in parallel.
let tmp_dir = tempfile::tempdir()?; let mut fetches = JoinSet::new();
let mut results = Vec::with_capacity(wheels.len());
// Download the wheels in parallel.
let mut downloads = JoinSet::new();
for wheel in wheels { for wheel in wheels {
downloads.spawn(do_download( fetches.spawn(fetch_wheel(
wheel.clone(), wheel.clone(),
client.clone(), client.clone(),
tmp_dir.path().join(&wheel.hashes.sha256), cache.map(Path::to_path_buf),
)); ));
} }
while let Some(result) = downloads.join_next().await.transpose()? { while let Some(result) = fetches.join_next().await.transpose()? {
result?; results.push(result?);
} }
// Install each wheel. // Install each wheel.
@ -39,14 +40,14 @@ pub async fn install(
python_version: python.simple_version(), python_version: python.simple_version(),
}; };
let locked_dir = location.acquire_lock()?; let locked_dir = location.acquire_lock()?;
for wheel in wheels { for wheel in results {
let path = tmp_dir.path().join(&wheel.hashes.sha256); let reader = std::io::Cursor::new(wheel.buffer);
let filename = WheelFilename::from_str(&wheel.filename)?; let filename = WheelFilename::from_str(&wheel.file.filename)?;
// TODO(charlie): Should this be async? // TODO(charlie): Should this be async?
install_wheel( install_wheel(
&locked_dir, &locked_dir,
std::fs::File::open(path)?, reader,
&filename, &filename,
false, false,
false, false,
@ -59,15 +60,41 @@ pub async fn install(
Ok(()) Ok(())
} }
#[derive(Debug)]
struct FetchedWheel {
file: File,
buffer: Vec<u8>,
}
/// Download a wheel to a given path. /// Download a wheel to a given path.
async fn do_download(wheel: File, client: PypiClient, path: impl AsRef<Path>) -> Result<File> { async fn fetch_wheel(
// TODO(charlie): Store these in a content-addressed cache. file: File,
let url = Url::parse(&wheel.url)?; client: PypiClient,
cache: Option<impl AsRef<Path>>,
) -> Result<FetchedWheel> {
// Parse the wheel's SRI.
let sri = Integrity::from_hex(&file.hashes.sha256, Algorithm::Sha256)?;
// Read from the cache, if possible.
if let Some(cache) = cache.as_ref() {
if let Ok(buffer) = cacache::read_hash(&cache, &sri).await {
debug!("Extracted wheel from cache: {:?}", file.filename);
return Ok(FetchedWheel { file, buffer });
}
}
let url = Url::parse(&file.url)?;
let reader = client.stream_external(&url).await?; let reader = client.stream_external(&url).await?;
// TODO(charlie): Stream the unzip. // Read into a buffer.
let mut writer = tokio::fs::File::create(path).await?; let mut buffer = Vec::with_capacity(file.size);
tokio::io::copy(&mut reader.compat(), &mut writer).await?; let mut reader = tokio::io::BufReader::new(reader.compat());
tokio::io::copy(&mut reader, &mut buffer).await?;
Ok(wheel) // Write the buffer to the cache, if possible.
if let Some(cache) = cache.as_ref() {
cacache::write_hash(&cache, &buffer).await?;
}
Ok(FetchedWheel { file, buffer })
} }

View file

@ -1,6 +1,26 @@
click==8.1.7 #
pathspec==0.11.2 # This file is autogenerated by pip-compile with Python 3.11
# by the following command:
#
# pip-compile --pip-args='--no-cache-dir' requirements.in
#
attrs==23.1.0
# via
# cattrs
# lsprotocol
cattrs==23.1.2
# via lsprotocol
lsprotocol==2023.0.0b1
# via
# -r requirements.in
# pygls
packaging==23.2 packaging==23.2
platformdirs==3.11.0 # via -r requirements.in
black==23.9.1 pygls==1.1.1
mypy-extensions==1.0.0 # via -r requirements.in
ruff==0.0.292
# via -r requirements.in
typeguard==3.0.2
# via pygls
typing-extensions==4.8.0
# via -r requirements.in