Only store small wheels in-memory (#348)

Closes https://github.com/astral-sh/puffin/issues/246.
This commit is contained in:
Charlie Marsh 2023-11-06 16:50:00 -08:00 committed by GitHub
parent e952557bf1
commit 2c114592bd
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
9 changed files with 87 additions and 36 deletions

7
Cargo.lock generated
View file

@ -379,6 +379,12 @@ version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a2bd12c1caf447e69cd4528f47f94d203fd2582878ecb9e9465484c4148a8223"
[[package]]
name = "bytesize"
version = "1.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a3e368af43e418a04d52505cf3dbc23dda4e3407ae2fa99fd0e4f308ce546acc"
[[package]]
name = "cacache"
version = "12.0.0"
@ -2350,6 +2356,7 @@ name = "puffin-installer"
version = "0.0.1"
dependencies = [
"anyhow",
"bytesize",
"cacache",
"distribution-filename",
"fs-err",

View file

@ -17,6 +17,7 @@ anyhow = { version = "1.0.75" }
async_http_range_reader = { git = "https://github.com/baszalmstra/async_http_range_reader", ref = "4cafe5afda889d53060e0565c949d4ffd6ef3786" }
async_zip = { version = "0.0.15", features = ["tokio", "deflate"] }
bitflags = { version = "2.4.1" }
bytesize = { version = "1.3.0" }
cacache = { version = "12.0.0", default-features = false, features = ["tokio-runtime"] }
camino = { version = "1.1.6", features = ["serde1"] }
clap = { version = "4.4.7" }

View file

@ -147,7 +147,10 @@ pub(crate) async fn sync_requirements(
let downloader = puffin_installer::Downloader::new(&client, cache)
.with_reporter(DownloadReporter::from(printer).with_length(remote.len() as u64));
let downloads = downloader.download(remote).await?;
let downloads = downloader
.download(remote)
.await
.context("Failed to download distributions")?;
let s = if downloads.len() == 1 { "" } else { "s" };
writeln!(
@ -188,7 +191,10 @@ pub(crate) async fn sync_requirements(
let builder = Builder::new(&build_dispatch)
.with_reporter(BuildReporter::from(printer).with_length(sdists.len() as u64));
let wheels = builder.build(sdists).await?;
let wheels = builder
.build(sdists)
.await
.context("Failed to build source distributions")?;
let s = if wheels.len() == 1 { "" } else { "s" };
writeln!(
@ -219,7 +225,7 @@ pub(crate) async fn sync_requirements(
let unzips = unzipper
.unzip(downloads, cache)
.await
.context("Failed to download and unpack wheels")?;
.context("Failed to unpack wheels")?;
let s = if unzips.len() == 1 { "" } else { "s" };
writeln!(

View file

@ -0,0 +1,23 @@
---
source: crates/puffin-cli/tests/pip_sync.rs
assertion_line: 554
info:
program: puffin
args:
- pip-sync
- requirements.txt
- "--cache-dir"
- /var/folders/nt/6gf2v7_s3k13zq_t3944rwz40000gn/T/.tmpZjdQsr
env:
VIRTUAL_ENV: /var/folders/nt/6gf2v7_s3k13zq_t3944rwz40000gn/T/.tmpkhiPUZ/.venv
---
success: false
exit_code: 2
----- stdout -----
----- stderr -----
Resolved 1 package in [TIME]
Downloaded 1 package in [TIME]
error: Failed to download and unpack wheels
Caused by: No such file or directory (os error 2)

View file

@ -115,7 +115,7 @@ impl RemoteDistribution {
Self::Url(_, url) => {
let filename = url
.path_segments()
.and_then(std::iter::Iterator::last)
.and_then(Iterator::last)
.ok_or_else(|| anyhow!("Could not parse filename from URL: {}", url))?;
Ok(Cow::Owned(filename.to_owned()))
}

View file

@ -24,6 +24,7 @@ puffin-traits = { path = "../puffin-traits" }
pypi-types = { path = "../pypi-types" }
anyhow = { workspace = true }
bytesize = { workspace = true }
cacache = { workspace = true }
fs-err = { workspace = true }
fxhash = { workspace = true }

View file

@ -3,7 +3,7 @@ use std::path::{Path, PathBuf};
use std::sync::Arc;
use anyhow::Result;
use cacache::{Algorithm, Integrity};
use bytesize::ByteSize;
use tokio::task::JoinSet;
use tokio_util::compat::FuturesAsyncReadCompatExt;
use tracing::debug;
@ -58,8 +58,6 @@ impl<'a> Downloader<'a> {
let mut fetches = JoinSet::new();
let mut downloads = Vec::with_capacity(distributions.len());
for distribution in distributions {
debug!("Downloading wheel: {distribution}");
fetches.spawn(fetch_distribution(
distribution.clone(),
self.client.clone(),
@ -100,47 +98,56 @@ async fn fetch_distribution(
if distribution.is_wheel() {
match &distribution {
RemoteDistribution::Registry(.., file) => {
// Parse the wheel's SRI.
let sri = Integrity::from_hex(&file.hashes.sha256, Algorithm::Sha256)?;
// Read from the cache, if possible.
if let Ok(buffer) = cacache::read_hash(&cache, &sri).await {
debug!("Extracted wheel from cache: {distribution}");
return Ok(Download::Wheel(Wheel::InMemory(InMemoryWheel {
remote: distribution,
buffer,
})));
}
// Fetch the wheel.
let url = Url::parse(&file.url)?;
let reader = client.stream_external(&url).await?;
// Read into a buffer.
let mut buffer = Vec::with_capacity(file.size);
let mut reader = tokio::io::BufReader::new(reader.compat());
tokio::io::copy(&mut reader, &mut buffer).await?;
// If the file is greater than 5MB, write it to disk; otherwise, keep it in memory.
let file_size = ByteSize::b(file.size as u64);
if file_size >= ByteSize::mb(5) {
debug!("Fetching disk-based wheel from registry: {distribution} ({file_size})");
// Write the buffer to the cache.
cacache::write_hash(&cache, &buffer).await?;
// Download the wheel to a temporary file.
let temp_dir = tempfile::tempdir_in(cache)?.into_path();
let wheel_filename = distribution.filename()?;
let wheel_file = temp_dir.join(wheel_filename.as_ref());
let mut writer = tokio::fs::File::create(&wheel_file).await?;
tokio::io::copy(&mut reader.compat(), &mut writer).await?;
Ok(Download::Wheel(Wheel::InMemory(InMemoryWheel {
remote: distribution,
buffer,
})))
Ok(Download::Wheel(Wheel::Disk(DiskWheel {
remote: distribution,
path: wheel_file,
})))
} else {
debug!("Fetching in-memory wheel from registry: {distribution} ({file_size})");
// Read into a buffer.
let mut buffer = Vec::with_capacity(file.size);
let mut reader = tokio::io::BufReader::new(reader.compat());
tokio::io::copy(&mut reader, &mut buffer).await?;
Ok(Download::Wheel(Wheel::InMemory(InMemoryWheel {
remote: distribution,
buffer,
})))
}
}
RemoteDistribution::Url(.., url) => {
debug!("Fetching disk-based wheel from URL: {url}");
// Fetch the wheel.
let reader = client.stream_external(url).await?;
// Read into a buffer.
let mut buffer = Vec::with_capacity(1024 * 1024);
let mut reader = tokio::io::BufReader::new(reader.compat());
tokio::io::copy(&mut reader, &mut buffer).await?;
// Download the wheel to a temporary file.
let temp_dir = tempfile::tempdir_in(cache)?.into_path();
let wheel_filename = distribution.filename()?;
let wheel_file = temp_dir.join(wheel_filename.as_ref());
let mut writer = tokio::fs::File::create(&wheel_file).await?;
tokio::io::copy(&mut reader.compat(), &mut writer).await?;
Ok(Download::Wheel(Wheel::InMemory(InMemoryWheel {
Ok(Download::Wheel(Wheel::Disk(DiskWheel {
remote: distribution,
buffer,
path: wheel_file,
})))
}
}

View file

@ -96,7 +96,7 @@ impl Unzipper {
fn unzip_wheel(wheel: Wheel, target: &Path) -> Result<()> {
match wheel {
Wheel::InMemory(wheel) => unzip_archive(std::io::Cursor::new(wheel.buffer), target),
Wheel::Disk(wheel) => unzip_archive(std::fs::File::open(wheel.path)?, target),
Wheel::Disk(wheel) => unzip_archive(fs_err::File::open(wheel.path)?, target),
}
}

View file

@ -123,6 +123,12 @@ impl HasLength for File {
}
}
impl HasLength for fs_err::File {
fn len(&self) -> u64 {
self.metadata().unwrap().len()
}
}
impl HasLength for Cursor<Vec<u8>> {
fn len(&self) -> u64 {
self.get_ref().len() as u64