Sort wheels by size when downloading and zipping (#210)

I just learned about this from PackagingCon, and locally, it shows a
nice speedup:

```
❯ hyperfine --warmup 3 --prepare "rm -rf .venv && ./target/release/puffin venv .venv" "./target/release/puffin pip-sync ./scripts/benchmarks/requirements-large.txt --no-cache" "./target/release/main pip-sync ./scripts/benchmarks/requirements-large.txt --no-cache"
Benchmark 1: ./target/release/puffin pip-sync ./scripts/benchmarks/requirements-large.txt --no-cache
  Time (mean ± σ):      3.958 s ±  0.250 s    [User: 1.323 s, System: 5.840 s]
  Range (min … max):    3.652 s …  4.402 s    10 runs

Benchmark 2: ./target/release/main pip-sync ./scripts/benchmarks/requirements-large.txt --no-cache
  Time (mean ± σ):      4.214 s ±  0.451 s    [User: 1.322 s, System: 5.976 s]
  Range (min … max):    3.708 s …  5.268 s    10 runs

Summary
  './target/release/puffin pip-sync ./scripts/benchmarks/requirements-large.txt --no-cache' ran
    1.06 ± 0.13 times faster than './target/release/main pip-sync ./scripts/benchmarks/requirements-large.txt --no-cache'
```
This commit is contained in:
Charlie Marsh 2023-10-26 13:50:56 -07:00 committed by GitHub
parent 12e6b46ae8
commit d5c3ff789a
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 15 additions and 5 deletions

View file

@ -149,16 +149,16 @@ pub(crate) async fn sync_requirements(
.with_reporter(DownloadReporter::from(printer).with_length(remote.len() as u64));
let downloads = downloader
.download(&remote, cache.unwrap_or(staging.path()))
.download(remote, cache.unwrap_or(staging.path()))
.await?;
let s = if remote.len() == 1 { "" } else { "s" };
let s = if downloads.len() == 1 { "" } else { "s" };
writeln!(
printer,
"{}",
format!(
"Downloaded {} in {}",
format!("{} package{}", remote.len(), s).bold(),
format!("{} package{}", downloads.len(), s).bold(),
elapsed(start.elapsed())
)
.dimmed()

View file

@ -149,7 +149,7 @@ impl BuildContext for BuildDispatch {
remote.iter().map(ToString::to_string).join(", ")
);
Downloader::new(&self.client, self.cache.as_deref())
.download(&remote, self.cache.as_deref().unwrap_or(staging.path()))
.download(remote, self.cache.as_deref().unwrap_or(staging.path()))
.await
.context("Failed to download build dependencies")?
};

View file

@ -1,3 +1,4 @@
use std::cmp::Reverse;
use std::path::Path;
use anyhow::Result;
@ -42,13 +43,17 @@ impl<'a> Downloader<'a> {
/// Install a set of wheels into a Python virtual environment.
pub async fn download(
&'a self,
wheels: &'a [RemoteDistribution],
wheels: Vec<RemoteDistribution>,
target: &'a Path,
) -> Result<Vec<InMemoryDistribution>> {
// Create the wheel cache subdirectory, if necessary.
let wheel_cache = WheelCache::new(target);
wheel_cache.init()?;
// Sort the wheels by size.
let mut wheels = wheels;
wheels.sort_unstable_by_key(|wheel| Reverse(wheel.file().size));
// Phase 1: Fetch the wheels in parallel.
let mut fetches = JoinSet::new();
let mut downloads = Vec::with_capacity(wheels.len());

View file

@ -1,3 +1,4 @@
use std::cmp::Reverse;
use std::path::Path;
use anyhow::Result;
@ -38,6 +39,10 @@ impl Unzipper {
let wheel_cache = WheelCache::new(target);
wheel_cache.init()?;
// Sort the wheels by size.
let mut downloads = downloads;
downloads.sort_unstable_by_key(|wheel| Reverse(wheel.buffer.len()));
let staging = tempfile::tempdir_in(wheel_cache.root())?;
// Unpack the wheels into the cache.