Add a content-addressed cache for wheels (#38)

Closes https://github.com/astral-sh/puffin/issues/4.
This commit is contained in:
Charlie Marsh 2023-10-07 10:24:52 -04:00 committed by GitHub
parent 6c31631913
commit 162952bf64
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 97 additions and 28 deletions

View file

@ -54,7 +54,7 @@ pub(crate) async fn sync(src: &Path, cache: Option<&Path>) -> Result<ExitStatus>
// Install into the current environment.
let wheels = resolution.into_files().collect::<Vec<_>>();
puffin_installer::install(&wheels, &python, &client).await?;
puffin_installer::install(&wheels, &python, &client, cache).await?;
#[allow(clippy::print_stdout)]
{

View file

@ -147,7 +147,7 @@ pub struct File {
pub filename: String,
pub hashes: Hashes,
pub requires_python: Option<String>,
pub size: i64,
pub size: usize,
pub upload_time: String,
pub url: String,
pub yanked: Yanked,

View file

@ -21,3 +21,4 @@ tracing = { workspace = true }
url = { workspace = true }
tokio = { workspace = true }
tokio-util = { workspace = true }
cacache = { version = "11.7.1", default-features = false, features = ["tokio-runtime"] }

View file

@ -2,8 +2,10 @@ use std::path::Path;
use std::str::FromStr;
use anyhow::Result;
use cacache::{Algorithm, Integrity};
use tokio::task::JoinSet;
use tokio_util::compat::FuturesAsyncReadCompatExt;
use tracing::debug;
use url::Url;
use install_wheel_rs::{install_wheel, InstallLocation};
@ -16,21 +18,20 @@ pub async fn install(
wheels: &[File],
python: &PythonExecutable,
client: &PypiClient,
cache: Option<&Path>,
) -> Result<()> {
// Create a temporary directory, in which we'll store the wheels.
let tmp_dir = tempfile::tempdir()?;
// Download the wheels in parallel.
let mut downloads = JoinSet::new();
// Fetch the wheels in parallel.
let mut fetches = JoinSet::new();
let mut results = Vec::with_capacity(wheels.len());
for wheel in wheels {
downloads.spawn(do_download(
fetches.spawn(fetch_wheel(
wheel.clone(),
client.clone(),
tmp_dir.path().join(&wheel.hashes.sha256),
cache.map(Path::to_path_buf),
));
}
while let Some(result) = downloads.join_next().await.transpose()? {
result?;
while let Some(result) = fetches.join_next().await.transpose()? {
results.push(result?);
}
// Install each wheel.
@ -39,14 +40,14 @@ pub async fn install(
python_version: python.simple_version(),
};
let locked_dir = location.acquire_lock()?;
for wheel in wheels {
let path = tmp_dir.path().join(&wheel.hashes.sha256);
let filename = WheelFilename::from_str(&wheel.filename)?;
for wheel in results {
let reader = std::io::Cursor::new(wheel.buffer);
let filename = WheelFilename::from_str(&wheel.file.filename)?;
// TODO(charlie): Should this be async?
install_wheel(
&locked_dir,
std::fs::File::open(path)?,
reader,
&filename,
false,
false,
@ -59,15 +60,41 @@ pub async fn install(
Ok(())
}
#[derive(Debug)]
struct FetchedWheel {
file: File,
buffer: Vec<u8>,
}
/// Download a wheel to a given path.
async fn do_download(wheel: File, client: PypiClient, path: impl AsRef<Path>) -> Result<File> {
// TODO(charlie): Store these in a content-addressed cache.
let url = Url::parse(&wheel.url)?;
async fn fetch_wheel(
file: File,
client: PypiClient,
cache: Option<impl AsRef<Path>>,
) -> Result<FetchedWheel> {
// Parse the wheel's SRI.
let sri = Integrity::from_hex(&file.hashes.sha256, Algorithm::Sha256)?;
// Read from the cache, if possible.
if let Some(cache) = cache.as_ref() {
if let Ok(buffer) = cacache::read_hash(&cache, &sri).await {
debug!("Extracted wheel from cache: {:?}", file.filename);
return Ok(FetchedWheel { file, buffer });
}
}
let url = Url::parse(&file.url)?;
let reader = client.stream_external(&url).await?;
// TODO(charlie): Stream the unzip.
let mut writer = tokio::fs::File::create(path).await?;
tokio::io::copy(&mut reader.compat(), &mut writer).await?;
// Read into a buffer.
let mut buffer = Vec::with_capacity(file.size);
let mut reader = tokio::io::BufReader::new(reader.compat());
tokio::io::copy(&mut reader, &mut buffer).await?;
Ok(wheel)
// Write the buffer to the cache, if possible.
if let Some(cache) = cache.as_ref() {
cacache::write_hash(&cache, &buffer).await?;
}
Ok(FetchedWheel { file, buffer })
}