Add a content-addressed cache for wheels (#38)

Closes https://github.com/astral-sh/puffin/issues/4.
This commit is contained in:
Charlie Marsh 2023-10-07 10:24:52 -04:00 committed by GitHub
parent 6c31631913
commit 162952bf64
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 97 additions and 28 deletions

1
Cargo.lock generated
View file

@ -1713,6 +1713,7 @@ name = "puffin-installer"
version = "0.1.0"
dependencies = [
"anyhow",
"cacache",
"install-wheel-rs",
"puffin-client",
"puffin-interpreter",

View file

@ -18,6 +18,26 @@ cargo run -p puffin-cli -- sync requirements.txt
## Benchmarks
### Resolution
To compare a warm run of `puffin` to `pip-compile`:
```shell
hyperfine --runs 10 --warmup 3 --prepare "rm -f /tmp/tmp.txt" \
"./target/release/puffin-cli compile requirements.txt" \
"pip-compile requirements.txt -o /tmp/tmp.txt"
```
To compare a cold run of `puffin` to `pip-compile`:
```shell
hyperfine --runs 10 --warmup 3 --prepare "rm -f /tmp/tmp.txt" \
"./target/release/puffin-cli compile requirements.txt --no-cache" \
"pip-compile requirements.txt --rebuild --pip-args '--no-cache-dir' -o /tmp/tmp.txt"
```
### Installation
To compare a warm run of `puffin` to `pip`:
```shell
@ -31,7 +51,7 @@ To compare a cold run of `puffin` to `pip`:
```shell
hyperfine --runs 10 --warmup 3 \
"./target/release/puffin-cli sync requirements.txt --no-cache" \
"pip install -r requirements.txt --ignore-installed --no-cache-dir"
"pip install -r requirements.txt --ignore-installed --no-cache-dir --no-deps"
```
## License

View file

@ -54,7 +54,7 @@ pub(crate) async fn sync(src: &Path, cache: Option<&Path>) -> Result<ExitStatus>
// Install into the current environment.
let wheels = resolution.into_files().collect::<Vec<_>>();
puffin_installer::install(&wheels, &python, &client).await?;
puffin_installer::install(&wheels, &python, &client, cache).await?;
#[allow(clippy::print_stdout)]
{

View file

@ -147,7 +147,7 @@ pub struct File {
pub filename: String,
pub hashes: Hashes,
pub requires_python: Option<String>,
pub size: i64,
pub size: usize,
pub upload_time: String,
pub url: String,
pub yanked: Yanked,

View file

@ -21,3 +21,4 @@ tracing = { workspace = true }
url = { workspace = true }
tokio = { workspace = true }
tokio-util = { workspace = true }
cacache = { version = "11.7.1", default-features = false, features = ["tokio-runtime"] }

View file

@ -2,8 +2,10 @@ use std::path::Path;
use std::str::FromStr;
use anyhow::Result;
use cacache::{Algorithm, Integrity};
use tokio::task::JoinSet;
use tokio_util::compat::FuturesAsyncReadCompatExt;
use tracing::debug;
use url::Url;
use install_wheel_rs::{install_wheel, InstallLocation};
@ -16,21 +18,20 @@ pub async fn install(
wheels: &[File],
python: &PythonExecutable,
client: &PypiClient,
cache: Option<&Path>,
) -> Result<()> {
// Create a temporary directory, in which we'll store the wheels.
let tmp_dir = tempfile::tempdir()?;
// Download the wheels in parallel.
let mut downloads = JoinSet::new();
// Fetch the wheels in parallel.
let mut fetches = JoinSet::new();
let mut results = Vec::with_capacity(wheels.len());
for wheel in wheels {
downloads.spawn(do_download(
fetches.spawn(fetch_wheel(
wheel.clone(),
client.clone(),
tmp_dir.path().join(&wheel.hashes.sha256),
cache.map(Path::to_path_buf),
));
}
while let Some(result) = downloads.join_next().await.transpose()? {
result?;
while let Some(result) = fetches.join_next().await.transpose()? {
results.push(result?);
}
// Install each wheel.
@ -39,14 +40,14 @@ pub async fn install(
python_version: python.simple_version(),
};
let locked_dir = location.acquire_lock()?;
for wheel in wheels {
let path = tmp_dir.path().join(&wheel.hashes.sha256);
let filename = WheelFilename::from_str(&wheel.filename)?;
for wheel in results {
let reader = std::io::Cursor::new(wheel.buffer);
let filename = WheelFilename::from_str(&wheel.file.filename)?;
// TODO(charlie): Should this be async?
install_wheel(
&locked_dir,
std::fs::File::open(path)?,
reader,
&filename,
false,
false,
@ -59,15 +60,41 @@ pub async fn install(
Ok(())
}
#[derive(Debug)]
struct FetchedWheel {
file: File,
buffer: Vec<u8>,
}
/// Download a wheel to a given path.
async fn do_download(wheel: File, client: PypiClient, path: impl AsRef<Path>) -> Result<File> {
// TODO(charlie): Store these in a content-addressed cache.
let url = Url::parse(&wheel.url)?;
async fn fetch_wheel(
file: File,
client: PypiClient,
cache: Option<impl AsRef<Path>>,
) -> Result<FetchedWheel> {
// Parse the wheel's SRI.
let sri = Integrity::from_hex(&file.hashes.sha256, Algorithm::Sha256)?;
// Read from the cache, if possible.
if let Some(cache) = cache.as_ref() {
if let Ok(buffer) = cacache::read_hash(&cache, &sri).await {
debug!("Extracted wheel from cache: {:?}", file.filename);
return Ok(FetchedWheel { file, buffer });
}
}
let url = Url::parse(&file.url)?;
let reader = client.stream_external(&url).await?;
// TODO(charlie): Stream the unzip.
let mut writer = tokio::fs::File::create(path).await?;
tokio::io::copy(&mut reader.compat(), &mut writer).await?;
// Read into a buffer.
let mut buffer = Vec::with_capacity(file.size);
let mut reader = tokio::io::BufReader::new(reader.compat());
tokio::io::copy(&mut reader, &mut buffer).await?;
Ok(wheel)
// Write the buffer to the cache, if possible.
if let Some(cache) = cache.as_ref() {
cacache::write_hash(&cache, &buffer).await?;
}
Ok(FetchedWheel { file, buffer })
}

View file

@ -1,6 +1,26 @@
click==8.1.7
pathspec==0.11.2
#
# This file is autogenerated by pip-compile with Python 3.11
# by the following command:
#
# pip-compile --pip-args='--no-cache-dir' requirements.in
#
attrs==23.1.0
# via
# cattrs
# lsprotocol
cattrs==23.1.2
# via lsprotocol
lsprotocol==2023.0.0b1
# via
# -r requirements.in
# pygls
packaging==23.2
platformdirs==3.11.0
black==23.9.1
mypy-extensions==1.0.0
# via -r requirements.in
pygls==1.1.1
# via -r requirements.in
ruff==0.0.292
# via -r requirements.in
typeguard==3.0.2
# via pygls
typing-extensions==4.8.0
# via -r requirements.in