diff --git a/Cargo.lock b/Cargo.lock index 3aac18739..a2c877178 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -379,6 +379,12 @@ version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a2bd12c1caf447e69cd4528f47f94d203fd2582878ecb9e9465484c4148a8223" +[[package]] +name = "bytesize" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a3e368af43e418a04d52505cf3dbc23dda4e3407ae2fa99fd0e4f308ce546acc" + [[package]] name = "cacache" version = "12.0.0" @@ -2350,6 +2356,7 @@ name = "puffin-installer" version = "0.0.1" dependencies = [ "anyhow", + "bytesize", "cacache", "distribution-filename", "fs-err", diff --git a/Cargo.toml b/Cargo.toml index 07621ec03..f35a77d2d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -17,6 +17,7 @@ anyhow = { version = "1.0.75" } async_http_range_reader = { git = "https://github.com/baszalmstra/async_http_range_reader", ref = "4cafe5afda889d53060e0565c949d4ffd6ef3786" } async_zip = { version = "0.0.15", features = ["tokio", "deflate"] } bitflags = { version = "2.4.1" } +bytesize = { version = "1.3.0" } cacache = { version = "12.0.0", default-features = false, features = ["tokio-runtime"] } camino = { version = "1.1.6", features = ["serde1"] } clap = { version = "4.4.7" } diff --git a/crates/puffin-cli/src/commands/pip_sync.rs b/crates/puffin-cli/src/commands/pip_sync.rs index 1ce88f916..9eb7d895b 100644 --- a/crates/puffin-cli/src/commands/pip_sync.rs +++ b/crates/puffin-cli/src/commands/pip_sync.rs @@ -147,7 +147,10 @@ pub(crate) async fn sync_requirements( let downloader = puffin_installer::Downloader::new(&client, cache) .with_reporter(DownloadReporter::from(printer).with_length(remote.len() as u64)); - let downloads = downloader.download(remote).await?; + let downloads = downloader + .download(remote) + .await + .context("Failed to download distributions")?; let s = if downloads.len() == 1 { "" } else { "s" }; writeln!( @@ -188,7 +191,10 @@ pub(crate) async fn sync_requirements( let builder = Builder::new(&build_dispatch) .with_reporter(BuildReporter::from(printer).with_length(sdists.len() as u64)); - let wheels = builder.build(sdists).await?; + let wheels = builder + .build(sdists) + .await + .context("Failed to build source distributions")?; let s = if wheels.len() == 1 { "" } else { "s" }; writeln!( @@ -219,7 +225,7 @@ pub(crate) async fn sync_requirements( let unzips = unzipper .unzip(downloads, cache) .await - .context("Failed to download and unpack wheels")?; + .context("Failed to unpack wheels")?; let s = if unzips.len() == 1 { "" } else { "s" }; writeln!( diff --git a/crates/puffin-cli/tests/snapshots/pip_sync__install_url.snap.new b/crates/puffin-cli/tests/snapshots/pip_sync__install_url.snap.new new file mode 100644 index 000000000..6ff69cf8b --- /dev/null +++ b/crates/puffin-cli/tests/snapshots/pip_sync__install_url.snap.new @@ -0,0 +1,23 @@ +--- +source: crates/puffin-cli/tests/pip_sync.rs +assertion_line: 554 +info: + program: puffin + args: + - pip-sync + - requirements.txt + - "--cache-dir" + - /var/folders/nt/6gf2v7_s3k13zq_t3944rwz40000gn/T/.tmpZjdQsr + env: + VIRTUAL_ENV: /var/folders/nt/6gf2v7_s3k13zq_t3944rwz40000gn/T/.tmpkhiPUZ/.venv +--- +success: false +exit_code: 2 +----- stdout ----- + +----- stderr ----- +Resolved 1 package in [TIME] +Downloaded 1 package in [TIME] +error: Failed to download and unpack wheels + Caused by: No such file or directory (os error 2) + diff --git a/crates/puffin-distribution/src/lib.rs b/crates/puffin-distribution/src/lib.rs index d6b23a144..8afabd046 100644 --- a/crates/puffin-distribution/src/lib.rs +++ b/crates/puffin-distribution/src/lib.rs @@ -115,7 +115,7 @@ impl RemoteDistribution { Self::Url(_, url) => { let filename = url .path_segments() - .and_then(std::iter::Iterator::last) + .and_then(Iterator::last) .ok_or_else(|| anyhow!("Could not parse filename from URL: {}", url))?; Ok(Cow::Owned(filename.to_owned())) } diff --git a/crates/puffin-installer/Cargo.toml b/crates/puffin-installer/Cargo.toml index eb7c536a5..c13bfefc7 100644 --- a/crates/puffin-installer/Cargo.toml +++ b/crates/puffin-installer/Cargo.toml @@ -24,6 +24,7 @@ puffin-traits = { path = "../puffin-traits" } pypi-types = { path = "../pypi-types" } anyhow = { workspace = true } +bytesize = { workspace = true } cacache = { workspace = true } fs-err = { workspace = true } fxhash = { workspace = true } diff --git a/crates/puffin-installer/src/downloader.rs b/crates/puffin-installer/src/downloader.rs index 0d3d391bc..c1d054caf 100644 --- a/crates/puffin-installer/src/downloader.rs +++ b/crates/puffin-installer/src/downloader.rs @@ -3,7 +3,7 @@ use std::path::{Path, PathBuf}; use std::sync::Arc; use anyhow::Result; -use cacache::{Algorithm, Integrity}; +use bytesize::ByteSize; use tokio::task::JoinSet; use tokio_util::compat::FuturesAsyncReadCompatExt; use tracing::debug; @@ -58,8 +58,6 @@ impl<'a> Downloader<'a> { let mut fetches = JoinSet::new(); let mut downloads = Vec::with_capacity(distributions.len()); for distribution in distributions { - debug!("Downloading wheel: {distribution}"); - fetches.spawn(fetch_distribution( distribution.clone(), self.client.clone(), @@ -100,47 +98,56 @@ async fn fetch_distribution( if distribution.is_wheel() { match &distribution { RemoteDistribution::Registry(.., file) => { - // Parse the wheel's SRI. - let sri = Integrity::from_hex(&file.hashes.sha256, Algorithm::Sha256)?; - - // Read from the cache, if possible. - if let Ok(buffer) = cacache::read_hash(&cache, &sri).await { - debug!("Extracted wheel from cache: {distribution}"); - return Ok(Download::Wheel(Wheel::InMemory(InMemoryWheel { - remote: distribution, - buffer, - }))); - } - // Fetch the wheel. let url = Url::parse(&file.url)?; let reader = client.stream_external(&url).await?; - // Read into a buffer. - let mut buffer = Vec::with_capacity(file.size); - let mut reader = tokio::io::BufReader::new(reader.compat()); - tokio::io::copy(&mut reader, &mut buffer).await?; + // If the file is greater than 5MB, write it to disk; otherwise, keep it in memory. + let file_size = ByteSize::b(file.size as u64); + if file_size >= ByteSize::mb(5) { + debug!("Fetching disk-based wheel from registry: {distribution} ({file_size})"); - // Write the buffer to the cache. - cacache::write_hash(&cache, &buffer).await?; + // Download the wheel to a temporary file. + let temp_dir = tempfile::tempdir_in(cache)?.into_path(); + let wheel_filename = distribution.filename()?; + let wheel_file = temp_dir.join(wheel_filename.as_ref()); + let mut writer = tokio::fs::File::create(&wheel_file).await?; + tokio::io::copy(&mut reader.compat(), &mut writer).await?; - Ok(Download::Wheel(Wheel::InMemory(InMemoryWheel { - remote: distribution, - buffer, - }))) + Ok(Download::Wheel(Wheel::Disk(DiskWheel { + remote: distribution, + path: wheel_file, + }))) + } else { + debug!("Fetching in-memory wheel from registry: {distribution} ({file_size})"); + + // Read into a buffer. + let mut buffer = Vec::with_capacity(file.size); + let mut reader = tokio::io::BufReader::new(reader.compat()); + tokio::io::copy(&mut reader, &mut buffer).await?; + + Ok(Download::Wheel(Wheel::InMemory(InMemoryWheel { + remote: distribution, + buffer, + }))) + } } RemoteDistribution::Url(.., url) => { + debug!("Fetching disk-based wheel from URL: {url}"); + // Fetch the wheel. let reader = client.stream_external(url).await?; - // Read into a buffer. - let mut buffer = Vec::with_capacity(1024 * 1024); - let mut reader = tokio::io::BufReader::new(reader.compat()); - tokio::io::copy(&mut reader, &mut buffer).await?; + // Download the wheel to a temporary file. + let temp_dir = tempfile::tempdir_in(cache)?.into_path(); + let wheel_filename = distribution.filename()?; + let wheel_file = temp_dir.join(wheel_filename.as_ref()); + let mut writer = tokio::fs::File::create(&wheel_file).await?; + tokio::io::copy(&mut reader.compat(), &mut writer).await?; - Ok(Download::Wheel(Wheel::InMemory(InMemoryWheel { + Ok(Download::Wheel(Wheel::Disk(DiskWheel { remote: distribution, - buffer, + path: wheel_file, }))) } } diff --git a/crates/puffin-installer/src/unzipper.rs b/crates/puffin-installer/src/unzipper.rs index 5a712d86d..fc6c9ef3b 100644 --- a/crates/puffin-installer/src/unzipper.rs +++ b/crates/puffin-installer/src/unzipper.rs @@ -96,7 +96,7 @@ impl Unzipper { fn unzip_wheel(wheel: Wheel, target: &Path) -> Result<()> { match wheel { Wheel::InMemory(wheel) => unzip_archive(std::io::Cursor::new(wheel.buffer), target), - Wheel::Disk(wheel) => unzip_archive(std::fs::File::open(wheel.path)?, target), + Wheel::Disk(wheel) => unzip_archive(fs_err::File::open(wheel.path)?, target), } } diff --git a/crates/puffin-installer/src/vendor/cloneable_seekable_reader.rs b/crates/puffin-installer/src/vendor/cloneable_seekable_reader.rs index 4f9db7939..cbe8a8867 100644 --- a/crates/puffin-installer/src/vendor/cloneable_seekable_reader.rs +++ b/crates/puffin-installer/src/vendor/cloneable_seekable_reader.rs @@ -123,6 +123,12 @@ impl HasLength for File { } } +impl HasLength for fs_err::File { + fn len(&self) -> u64 { + self.metadata().unwrap().len() + } +} + impl HasLength for Cursor> { fn len(&self) -> u64 { self.get_ref().len() as u64