From 35fd86631b6a57e5c453055cad159abd672e20ee Mon Sep 17 00:00:00 2001 From: Charlie Marsh Date: Mon, 20 Nov 2023 03:22:52 -0800 Subject: [PATCH] Unify distribution operations into a single crate (#460) ## Summary This PR unifies the behavior that lived in the resolver's `distribution` crates with the behaviors that were spread between the various structs in the installer crate into a single `Fetcher` struct that is intended to manage all interactions with distributions. Specifically, the interface of this struct is such that it can access distribution metadata, download distributions, return those downloads, etc., all with a common cache. Overall, this is mostly just DRYing up code that was repeated between the two crates, and putting it behind a reasonable shared interface. --- Cargo.lock | 41 ++- crates/README.md | 11 +- crates/distribution-types/src/lib.rs | 24 ++ crates/puffin-cli/Cargo.toml | 1 + crates/puffin-cli/src/commands/pip_sync.rs | 4 +- crates/puffin-cli/src/commands/reporters.rs | 12 +- crates/puffin-dispatch/Cargo.toml | 5 +- crates/puffin-dispatch/src/lib.rs | 4 +- crates/puffin-distribution/Cargo.toml | 31 ++ crates/puffin-distribution/src/download.rs | 186 ++++++++++ crates/puffin-distribution/src/fetcher.rs | 334 ++++++++++++++++++ crates/puffin-distribution/src/lib.rs | 10 + crates/puffin-distribution/src/reporter.rs | 39 ++ crates/puffin-distribution/src/unzip.rs | 81 +++++ .../src/vendor/cloneable_seekable_reader.rs | 6 + .../src/vendor/mod.rs | 0 crates/puffin-installer/Cargo.toml | 5 +- crates/puffin-installer/src/builder.rs | 49 +-- crates/puffin-installer/src/downloader.rs | 239 +------------ crates/puffin-installer/src/lib.rs | 3 +- crates/puffin-installer/src/unzipper.rs | 71 +--- crates/puffin-macros/Cargo.toml | 2 - crates/puffin-resolver/Cargo.toml | 1 + .../src/distribution/built_dist.rs | 69 ---- .../src/distribution/cached_wheel.rs | 63 ---- .../puffin-resolver/src/distribution/mod.rs | 6 - .../src/distribution/source_dist.rs | 235 ------------ crates/puffin-resolver/src/error.rs | 37 +- crates/puffin-resolver/src/lib.rs | 1 - crates/puffin-resolver/src/resolver.rs | 204 ++++------- 30 files changed, 880 insertions(+), 894 deletions(-) create mode 100644 crates/puffin-distribution/Cargo.toml create mode 100644 crates/puffin-distribution/src/download.rs create mode 100644 crates/puffin-distribution/src/fetcher.rs create mode 100644 crates/puffin-distribution/src/lib.rs create mode 100644 crates/puffin-distribution/src/reporter.rs create mode 100644 crates/puffin-distribution/src/unzip.rs rename crates/{puffin-installer => puffin-distribution}/src/vendor/cloneable_seekable_reader.rs (98%) rename crates/{puffin-installer => puffin-distribution}/src/vendor/mod.rs (100%) delete mode 100644 crates/puffin-resolver/src/distribution/built_dist.rs delete mode 100644 crates/puffin-resolver/src/distribution/cached_wheel.rs delete mode 100644 crates/puffin-resolver/src/distribution/mod.rs delete mode 100644 crates/puffin-resolver/src/distribution/source_dist.rs diff --git a/Cargo.lock b/Cargo.lock index 4a3fafc82..140d6c85e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -988,10 +988,11 @@ dependencies = [ [[package]] name = "fs-err" -version = "2.9.0" +version = "2.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0845fa252299212f0389d64ba26f34fa32cfe41588355f21ed507c59a0f64541" +checksum = "fb5fd9bcbe8b1087cbd395b51498c01bc997cef73e778a80b77a811af5e2d29f" dependencies = [ + "autocfg", "tokio", ] @@ -2410,6 +2411,7 @@ dependencies = [ "puffin-cache", "puffin-client", "puffin-dispatch", + "puffin-distribution", "puffin-installer", "puffin-interpreter", "puffin-normalize", @@ -2503,6 +2505,7 @@ version = "0.0.1" dependencies = [ "anyhow", "distribution-types", + "fs-err", "gourgeist", "itertools 0.11.0", "pep508_rs", @@ -2510,15 +2513,41 @@ dependencies = [ "platform-tags", "puffin-build", "puffin-client", + "puffin-distribution", "puffin-installer", "puffin-interpreter", "puffin-resolver", "puffin-traits", "pypi-types", "tempfile", + "tokio", "tracing", ] +[[package]] +name = "puffin-distribution" +version = "0.0.1" +dependencies = [ + "anyhow", + "bytesize", + "distribution-filename", + "distribution-types", + "fs-err", + "install-wheel-rs", + "platform-tags", + "puffin-client", + "puffin-git", + "puffin-traits", + "pypi-types", + "rayon", + "tempfile", + "tokio", + "tokio-util", + "tracing", + "url", + "zip", +] + [[package]] name = "puffin-git" version = "0.0.1" @@ -2543,8 +2572,6 @@ name = "puffin-installer" version = "0.0.1" dependencies = [ "anyhow", - "bytesize", - "cacache", "distribution-filename", "distribution-types", "fs-err", @@ -2554,6 +2581,7 @@ dependencies = [ "pep508_rs", "puffin-cache", "puffin-client", + "puffin-distribution", "puffin-git", "puffin-interpreter", "puffin-normalize", @@ -2563,10 +2591,8 @@ dependencies = [ "tempfile", "thiserror", "tokio", - "tokio-util", "tracing", "url", - "zip", ] [[package]] @@ -2590,10 +2616,8 @@ dependencies = [ name = "puffin-macros" version = "0.0.1" dependencies = [ - "colored", "fxhash", "once_cell", - "tracing", ] [[package]] @@ -2633,6 +2657,7 @@ dependencies = [ "pubgrub", "puffin-cache", "puffin-client", + "puffin-distribution", "puffin-git", "puffin-interpreter", "puffin-macros", diff --git a/crates/README.md b/crates/README.md index bed8d9052..26bb06d0f 100644 --- a/crates/README.md +++ b/crates/README.md @@ -9,6 +9,11 @@ Functionality for benchmarking Puffin. Parse built distribution (wheel) and source distribution (sdist) filenames to extract structured metadata. +## [distribution-types](./distribution-types) + +Abstractions for representing built distributions (wheels) and source distributions (sdists), and +the sources from which they can be downloaded. + ## [gourgeist](./gourgeist) A `venv` replacement to create virtual environments in Rust. @@ -58,10 +63,10 @@ Development utilities for Puffin. A centralized `struct` for resolving and building source distributions in isolated environments. Implements the traits defined in `puffin-traits`. -## [distribution-types](./distribution-types) +## [puffin-distribution](./puffin-distribution) -Abstractions for representing built distributions (wheels) and source distributions (sdists), and -the sources from which they can be downloaded. +Client for interacting with built distributions (wheels) and source distributions (sdists). +Capable of fetching metadata, distribution contents, etc. ## [puffin-git](./puffin-git) diff --git a/crates/distribution-types/src/lib.rs b/crates/distribution-types/src/lib.rs index db5c59c4a..cadbb14f7 100644 --- a/crates/distribution-types/src/lib.rs +++ b/crates/distribution-types/src/lib.rs @@ -139,6 +139,30 @@ impl Dist { Dist::Source(source) => source.file(), } } + + #[must_use] + pub fn with_url(self, url: Url) -> Self { + match self { + Self::Built(built) => Self::Built(match built { + BuiltDist::DirectUrl(dist) => BuiltDist::DirectUrl(DirectUrlBuiltDist { + name: dist.name, + url, + }), + dist @ BuiltDist::Registry(_) => dist, + }), + Self::Source(source) => Self::Source(match source { + SourceDist::DirectUrl(dist) => SourceDist::DirectUrl(DirectUrlSourceDist { + name: dist.name, + url, + }), + SourceDist::Git(dist) => SourceDist::Git(GitSourceDist { + name: dist.name, + url, + }), + dist @ SourceDist::Registry(_) => dist, + }), + } + } } impl BuiltDist { diff --git a/crates/puffin-cli/Cargo.toml b/crates/puffin-cli/Cargo.toml index 986c100bd..ed60a7b57 100644 --- a/crates/puffin-cli/Cargo.toml +++ b/crates/puffin-cli/Cargo.toml @@ -26,6 +26,7 @@ puffin-cache = { path = "../puffin-cache", features = ["clap"] } puffin-client = { path = "../puffin-client" } puffin-dispatch = { path = "../puffin-dispatch" } distribution-types = { path = "../distribution-types" } +puffin-distribution = { path = "../puffin-distribution" } puffin-installer = { path = "../puffin-installer" } puffin-interpreter = { path = "../puffin-interpreter" } puffin-normalize = { path = "../puffin-normalize" } diff --git a/crates/puffin-cli/src/commands/pip_sync.rs b/crates/puffin-cli/src/commands/pip_sync.rs index a8e8c8625..80fa18504 100644 --- a/crates/puffin-cli/src/commands/pip_sync.rs +++ b/crates/puffin-cli/src/commands/pip_sync.rs @@ -210,8 +210,8 @@ pub(crate) async fn sync_requirements( downloads .into_iter() .partition_map(|download| match download { - puffin_installer::Download::Wheel(wheel) => Either::Left(wheel), - puffin_installer::Download::SourceDist(sdist) => Either::Right(sdist), + puffin_distribution::Download::Wheel(wheel) => Either::Left(wheel), + puffin_distribution::Download::SourceDist(sdist) => Either::Right(sdist), }); // Build any missing source distributions. diff --git a/crates/puffin-cli/src/commands/reporters.rs b/crates/puffin-cli/src/commands/reporters.rs index 441202a83..6339f625d 100644 --- a/crates/puffin-cli/src/commands/reporters.rs +++ b/crates/puffin-cli/src/commands/reporters.rs @@ -1,12 +1,12 @@ -use colored::Colorize; use std::sync::{Arc, Mutex}; use std::time::Duration; +use colored::Colorize; use indicatif::{MultiProgress, ProgressBar, ProgressStyle}; use url::Url; -use distribution_types::{CachedDist, Dist, Metadata, SourceDist, VersionOrUrl}; -use puffin_installer::Download; +use distribution_types::{CachedDist, Dist, Metadata, VersionOrUrl}; +use puffin_distribution::Download; use puffin_normalize::ExtraName; use puffin_normalize::PackageName; @@ -246,7 +246,7 @@ impl puffin_resolver::ResolverReporter for ResolverReporter { self.progress.finish_and_clear(); } - fn on_build_start(&self, dist: &SourceDist) -> usize { + fn on_build_start(&self, dist: &Dist) -> usize { let progress = self.multi_progress.insert_before( &self.progress, ProgressBar::with_draw_target(None, self.printer.target()), @@ -264,7 +264,7 @@ impl puffin_resolver::ResolverReporter for ResolverReporter { bars.len() - 1 } - fn on_build_complete(&self, dist: &SourceDist, index: usize) { + fn on_build_complete(&self, dist: &Dist, index: usize) { let bars = self.bars.lock().unwrap(); let progress = &bars[index]; progress.finish_with_message(format!( @@ -311,7 +311,7 @@ trait ColorDisplay { fn to_color_string(&self) -> String; } -impl ColorDisplay for &SourceDist { +impl ColorDisplay for &Dist { fn to_color_string(&self) -> String { let name = self.name(); let version_or_url = self.version_or_url(); diff --git a/crates/puffin-dispatch/Cargo.toml b/crates/puffin-dispatch/Cargo.toml index da0bb1720..b1806121c 100644 --- a/crates/puffin-dispatch/Cargo.toml +++ b/crates/puffin-dispatch/Cargo.toml @@ -11,13 +11,14 @@ authors = { workspace = true } license = { workspace = true } [dependencies] +distribution-types = { path = "../distribution-types" } gourgeist = { path = "../gourgeist" } pep508_rs = { path = "../pep508-rs" } platform-host = { path = "../platform-host" } platform-tags = { path = "../platform-tags" } puffin-build = { path = "../puffin-build" } puffin-client = { path = "../puffin-client" } -distribution-types = { path = "../distribution-types" } +puffin-distribution = { path = "../puffin-distribution" } puffin-installer = { path = "../puffin-installer" } puffin-interpreter = { path = "../puffin-interpreter" } puffin-resolver = { path = "../puffin-resolver" } @@ -25,6 +26,8 @@ puffin-traits = { path = "../puffin-traits" } pypi-types = { path = "../pypi-types" } anyhow = { workspace = true } +fs-err = { workspace = true } itertools = { workspace = true } tempfile = { workspace = true } +tokio = { workspace = true } tracing = { workspace = true } diff --git a/crates/puffin-dispatch/src/lib.rs b/crates/puffin-dispatch/src/lib.rs index 6822d1310..991cf83b4 100644 --- a/crates/puffin-dispatch/src/lib.rs +++ b/crates/puffin-dispatch/src/lib.rs @@ -164,8 +164,8 @@ impl BuildContext for BuildDispatch { downloads .into_iter() .partition_map(|download| match download { - puffin_installer::Download::Wheel(wheel) => Either::Left(wheel), - puffin_installer::Download::SourceDist(sdist) => Either::Right(sdist), + puffin_distribution::Download::Wheel(wheel) => Either::Left(wheel), + puffin_distribution::Download::SourceDist(sdist) => Either::Right(sdist), }); // Build any missing source distributions. diff --git a/crates/puffin-distribution/Cargo.toml b/crates/puffin-distribution/Cargo.toml new file mode 100644 index 000000000..992d94774 --- /dev/null +++ b/crates/puffin-distribution/Cargo.toml @@ -0,0 +1,31 @@ +[package] +name = "puffin-distribution" +version = "0.0.1" +edition = { workspace = true } +rust-version = { workspace = true } +homepage = { workspace = true } +documentation = { workspace = true } +repository = { workspace = true } +authors = { workspace = true } +license = { workspace = true } + +[dependencies] +distribution-filename = { path = "../distribution-filename" } +distribution-types = { path = "../distribution-types" } +install-wheel-rs = { path = "../install-wheel-rs" } +platform-tags = { path = "../platform-tags" } +puffin-client = { path = "../puffin-client" } +puffin-git = { path = "../puffin-git" } +puffin-traits = { path = "../puffin-traits" } +pypi-types = { path = "../pypi-types" } + +anyhow = { workspace = true } +bytesize = { workspace = true } +fs-err = { workspace = true } +rayon = { workspace = true } +tempfile = { workspace = true } +tokio = { workspace = true } +tokio-util = { workspace = true, features = ["compat"] } +tracing = { workspace = true } +url = { workspace = true } +zip = { workspace = true } diff --git a/crates/puffin-distribution/src/download.rs b/crates/puffin-distribution/src/download.rs new file mode 100644 index 000000000..09920afbc --- /dev/null +++ b/crates/puffin-distribution/src/download.rs @@ -0,0 +1,186 @@ +use std::path::PathBuf; +use std::str::FromStr; + +use anyhow::{format_err, Context, Result}; +use tempfile::TempDir; +use zip::ZipArchive; + +use distribution_filename::WheelFilename; +use distribution_types::{Dist, RemoteSource}; +use install_wheel_rs::find_dist_info; +use pypi_types::Metadata21; + +/// A downloaded wheel that's stored in-memory. +#[derive(Debug)] +pub struct InMemoryWheel { + /// The remote distribution from which this wheel was downloaded. + pub(crate) dist: Dist, + /// The contents of the wheel. + pub(crate) buffer: Vec, +} + +/// A downloaded wheel that's stored on-disk. +#[derive(Debug)] +pub struct DiskWheel { + /// The remote distribution from which this wheel was downloaded. + pub(crate) dist: Dist, + /// The path to the downloaded wheel. + pub(crate) path: PathBuf, + /// The download location, to be dropped after use. + #[allow(dead_code)] + pub(crate) temp_dir: Option, +} + +/// A downloaded wheel. +#[derive(Debug)] +pub enum WheelDownload { + InMemory(InMemoryWheel), + Disk(DiskWheel), +} + +/// A downloaded source distribution. +#[derive(Debug)] +pub struct SourceDistDownload { + /// The remote distribution from which this source distribution was downloaded. + pub(crate) dist: Dist, + /// The path to the downloaded archive or directory. + pub(crate) sdist_file: PathBuf, + /// The subdirectory within the archive or directory. + pub(crate) subdirectory: Option, + /// We can't use source dist archives, we build them into wheels which we persist and then drop + /// the source distribution. This field is non for git dependencies, which we keep in the cache. + #[allow(dead_code)] + pub(crate) temp_dir: Option, +} + +/// A downloaded distribution, either a wheel or a source distribution. +#[derive(Debug)] +pub enum Download { + Wheel(WheelDownload), + SourceDist(SourceDistDownload), +} + +impl std::fmt::Display for Download { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Download::Wheel(wheel) => write!(f, "{wheel}"), + Download::SourceDist(sdist) => write!(f, "{sdist}"), + } + } +} + +impl std::fmt::Display for WheelDownload { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + WheelDownload::InMemory(wheel) => write!(f, "{}", wheel.dist), + WheelDownload::Disk(wheel) => write!(f, "{}", wheel.dist), + } + } +} + +impl std::fmt::Display for SourceDistDownload { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.dist) + } +} + +impl InMemoryWheel { + /// Read the [`Metadata21`] from a wheel. + pub fn read_dist_info(&self) -> Result { + let mut archive = ZipArchive::new(std::io::Cursor::new(&self.buffer))?; + let filename = self.filename()?; + let dist_info_dir = + find_dist_info(&filename, archive.file_names().map(|name| (name, name))) + .map_err(|err| format_err!("Invalid wheel {filename}: {err}"))? + .1; + let dist_info = + std::io::read_to_string(archive.by_name(&format!("{dist_info_dir}/METADATA"))?)?; + Ok(Metadata21::parse(dist_info.as_bytes())?) + } +} + +impl DiskWheel { + /// Read the [`Metadata21`] from a wheel. + pub fn read_dist_info(&self) -> Result { + let mut archive = ZipArchive::new(fs_err::File::open(&self.path)?)?; + let filename = self.filename()?; + let dist_info_dir = + find_dist_info(&filename, archive.file_names().map(|name| (name, name))) + .map_err(|err| format_err!("Invalid wheel {filename}: {err}"))? + .1; + let dist_info = + std::io::read_to_string(archive.by_name(&format!("{dist_info_dir}/METADATA"))?)?; + Ok(Metadata21::parse(dist_info.as_bytes())?) + } +} + +impl WheelDownload { + /// Read the [`Metadata21`] from a wheel. + pub fn read_dist_info(&self) -> Result { + match self { + WheelDownload::InMemory(wheel) => wheel.read_dist_info(), + WheelDownload::Disk(wheel) => wheel.read_dist_info(), + } + } +} + +impl DiskWheel { + /// Return the [`Dist`] from which this wheel was downloaded. + pub fn remote(&self) -> &Dist { + &self.dist + } + + /// Return the [`WheelFilename`] of this wheel. + pub fn filename(&self) -> Result { + // If the wheel was downloaded to disk, it's either a download of a remote wheel, or a + // built source distribution, both of which imply a valid wheel filename. + let filename = WheelFilename::from_str( + self.path + .file_name() + .context("Missing filename")? + .to_str() + .context("Invalid filename")?, + )?; + Ok(filename) + } +} + +impl InMemoryWheel { + /// Return the [`Dist`] from which this wheel was downloaded. + pub fn remote(&self) -> &Dist { + &self.dist + } + + /// Return the [`WheelFilename`] of this wheel. + pub fn filename(&self) -> Result { + // If the wheel is an in-memory buffer, it's assumed that the underlying distribution is + // itself a wheel, which in turn requires that the filename be parseable. + let filename = WheelFilename::from_str(self.dist.filename()?)?; + Ok(filename) + } +} + +impl WheelDownload { + /// Return the [`Dist`] from which this wheel was downloaded. + pub fn remote(&self) -> &Dist { + match self { + WheelDownload::InMemory(wheel) => wheel.remote(), + WheelDownload::Disk(wheel) => wheel.remote(), + } + } + + /// Return the [`WheelFilename`] of this wheel. + pub fn filename(&self) -> Result { + match self { + WheelDownload::InMemory(wheel) => wheel.filename(), + WheelDownload::Disk(wheel) => wheel.filename(), + } + } +} + +impl SourceDistDownload { + /// Return the [`Dist`] from which this source distribution was downloaded. + pub fn remote(&self) -> &Dist { + &self.dist + } +} diff --git a/crates/puffin-distribution/src/fetcher.rs b/crates/puffin-distribution/src/fetcher.rs new file mode 100644 index 000000000..4ac96b006 --- /dev/null +++ b/crates/puffin-distribution/src/fetcher.rs @@ -0,0 +1,334 @@ +use std::path::Path; +use std::str::FromStr; +use std::sync::Arc; + +use anyhow::Result; +use bytesize::ByteSize; +use fs_err::tokio as fs; +use tokio_util::compat::FuturesAsyncReadCompatExt; +use tracing::debug; +use url::Url; + +use distribution_filename::WheelFilename; +use distribution_types::direct_url::{DirectArchiveUrl, DirectGitUrl}; +use distribution_types::{BuiltDist, Dist, Identifier, Metadata, RemoteSource, SourceDist}; +use platform_tags::Tags; +use puffin_client::RegistryClient; +use puffin_git::{GitSource, GitUrl}; +use puffin_traits::BuildContext; +use pypi_types::Metadata21; + +use crate::reporter::Facade; +use crate::{DiskWheel, Download, InMemoryWheel, Reporter, SourceDistDownload, WheelDownload}; + +const GIT_CACHE: &str = "git-v0"; +const BUILT_WHEELS_CACHE: &str = "built-wheels-v0"; + +/// A high-level interface for accessing distribution metadata and source contents. +pub struct Fetcher<'a> { + cache: &'a Path, + reporter: Option>, +} + +impl<'a> Fetcher<'a> { + /// Initialize a [`Fetcher`]. + pub fn new(cache: &'a Path) -> Self { + Self { + cache, + reporter: None, + } + } + + /// Set the [`Reporter`] to use for this source distribution fetcher. + #[must_use] + pub fn with_reporter(self, reporter: impl Reporter + 'static) -> Self { + Self { + reporter: Some(Arc::new(reporter)), + ..self + } + } + + /// Return the [`Metadata21`] for a distribution, if it exists in the cache. + pub fn find_metadata(&self, dist: &Dist, tags: &Tags) -> Result> { + self.find_in_cache(dist, tags) + .map(|wheel| wheel.read_dist_info()) + .transpose() + } + + /// Fetch the [`Metadata21`] for a distribution. + /// + /// If the given [`Dist`] is a source distribution, the distribution will be downloaded, built, + /// and cached. + pub async fn fetch_metadata( + &self, + dist: &Dist, + client: &RegistryClient, + build_context: &impl BuildContext, + ) -> Result { + match dist { + // Fetch the metadata directly from the registry. + Dist::Built(BuiltDist::Registry(wheel)) => { + let metadata = client.wheel_metadata(wheel.file.clone()).await?; + Ok(metadata) + } + // Fetch the distribution, then read the metadata (for built distributions), or build + // the distribution and _then_ read the metadata (for source distributions). + dist => match self.fetch_dist(dist, client).await? { + Download::Wheel(wheel) => wheel.read_dist_info(), + Download::SourceDist(sdist) => { + let wheel = self.build_sdist(sdist, build_context).await?; + wheel.read_dist_info() + } + }, + } + } + + /// Download a distribution. + pub async fn fetch_dist(&self, dist: &Dist, client: &RegistryClient) -> Result { + match &dist { + Dist::Built(BuiltDist::Registry(wheel)) => { + // Fetch the wheel. + let url = Url::parse(&wheel.file.url)?; + let reader = client.stream_external(&url).await?; + + // If the file is greater than 5MB, write it to disk; otherwise, keep it in memory. + let small_size = if let Some(size) = wheel.file.size { + let byte_size = ByteSize::b(size as u64); + if byte_size < ByteSize::mb(5) { + Some(size) + } else { + None + } + } else { + None + }; + if let Some(small_size) = small_size { + debug!( + "Fetching in-memory wheel from registry: {dist} ({})", + ByteSize::b(small_size as u64) + ); + + // Read into a buffer. + let mut buffer = Vec::with_capacity(small_size); + let mut reader = tokio::io::BufReader::new(reader.compat()); + tokio::io::copy(&mut reader, &mut buffer).await?; + + Ok(Download::Wheel(WheelDownload::InMemory(InMemoryWheel { + dist: dist.clone(), + buffer, + }))) + } else { + let size = + small_size.map_or("unknown size".to_string(), |size| size.to_string()); + debug!("Fetching disk-based wheel from registry: {dist} ({size})"); + + // Download the wheel to a temporary file. + let temp_dir = tempfile::tempdir_in(self.cache)?; + let wheel_filename = &wheel.file.filename; + let wheel_file = temp_dir.path().join(wheel_filename); + let mut writer = tokio::fs::File::create(&wheel_file).await?; + tokio::io::copy(&mut reader.compat(), &mut writer).await?; + + Ok(Download::Wheel(WheelDownload::Disk(DiskWheel { + dist: dist.clone(), + path: wheel_file, + temp_dir: Some(temp_dir), + }))) + } + } + + Dist::Built(BuiltDist::DirectUrl(wheel)) => { + debug!("Fetching disk-based wheel from URL: {}", &wheel.url); + + // Fetch the wheel. + let reader = client.stream_external(&wheel.url).await?; + + // Download the wheel to a temporary file. + let temp_dir = tempfile::tempdir_in(self.cache)?; + let wheel_filename = wheel.filename()?; + let wheel_file = temp_dir.path().join(wheel_filename); + let mut writer = tokio::fs::File::create(&wheel_file).await?; + tokio::io::copy(&mut reader.compat(), &mut writer).await?; + + Ok(Download::Wheel(WheelDownload::Disk(DiskWheel { + dist: dist.clone(), + path: wheel_file, + temp_dir: Some(temp_dir), + }))) + } + + Dist::Source(SourceDist::Registry(sdist)) => { + debug!( + "Fetching source distribution from registry: {}", + &sdist.file.url + ); + + let url = Url::parse(&sdist.file.url)?; + let reader = client.stream_external(&url).await?; + + // Download the source distribution. + let temp_dir = tempfile::tempdir_in(self.cache)?; + let sdist_filename = sdist.filename()?; + let sdist_file = temp_dir.path().join(sdist_filename); + let mut writer = tokio::fs::File::create(&sdist_file).await?; + tokio::io::copy(&mut reader.compat(), &mut writer).await?; + + Ok(Download::SourceDist(SourceDistDownload { + dist: dist.clone(), + sdist_file, + subdirectory: None, + temp_dir: Some(temp_dir), + })) + } + + Dist::Source(SourceDist::DirectUrl(sdist)) => { + debug!("Fetching source distribution from URL: {}", sdist.url); + + let DirectArchiveUrl { url, subdirectory } = DirectArchiveUrl::from(&sdist.url); + + let reader = client.stream_external(&url).await?; + let mut reader = tokio::io::BufReader::new(reader.compat()); + + // Download the source distribution. + let temp_dir = tempfile::tempdir_in(self.cache)?; + let sdist_filename = sdist.filename()?; + let sdist_file = temp_dir.path().join(sdist_filename); + let mut writer = tokio::fs::File::create(&sdist_file).await?; + tokio::io::copy(&mut reader, &mut writer).await?; + + Ok(Download::SourceDist(SourceDistDownload { + dist: dist.clone(), + sdist_file, + subdirectory, + temp_dir: Some(temp_dir), + })) + } + + Dist::Source(SourceDist::Git(sdist)) => { + debug!("Fetching source distribution from Git: {}", sdist.url); + + let DirectGitUrl { url, subdirectory } = DirectGitUrl::try_from(&sdist.url)?; + + let git_dir = self.cache.join(GIT_CACHE); + let source = GitSource::new(url, git_dir); + let sdist_file = tokio::task::spawn_blocking(move || source.fetch()) + .await?? + .into(); + + Ok(Download::SourceDist(SourceDistDownload { + dist: dist.clone(), + sdist_file, + subdirectory, + temp_dir: None, + })) + } + } + } + + /// Build a downloaded source distribution. + pub async fn build_sdist( + &self, + dist: SourceDistDownload, + build_context: &impl BuildContext, + ) -> Result { + let task = self + .reporter + .as_ref() + .map(|reporter| reporter.on_build_start(&dist.dist)); + + // Create a directory for the wheel. + let wheel_dir = self + .cache + .join(BUILT_WHEELS_CACHE) + .join(dist.dist.package_id()); + fs::create_dir_all(&wheel_dir).await?; + + // Build the wheel. + // TODO(charlie): If this is a Git dependency, we should do another checkout. If the same + // repository is used by multiple dependencies, at multiple commits, the local checkout may now + // point to the wrong commit. + let disk_filename = build_context + .build_source( + &dist.sdist_file, + dist.subdirectory.as_deref(), + &wheel_dir, + &dist.dist.to_string(), + ) + .await?; + let wheel_filename = wheel_dir.join(disk_filename); + + if let Some(task) = task { + if let Some(reporter) = self.reporter.as_ref() { + reporter.on_build_complete(&dist.dist, task); + } + } + + Ok(WheelDownload::Disk(DiskWheel { + dist: dist.dist, + path: wheel_filename, + temp_dir: None, + })) + } + + /// Find a built wheel in the cache. + fn find_in_cache(&self, dist: &Dist, tags: &Tags) -> Option { + let wheel_dir = self + .cache + .join(BUILT_WHEELS_CACHE) + .join(dist.distribution_id()); + let read_dir = fs_err::read_dir(wheel_dir).ok()?; + for entry in read_dir { + let Ok(entry) = entry else { + continue; + }; + let Ok(filename) = + WheelFilename::from_str(entry.file_name().to_string_lossy().as_ref()) + else { + continue; + }; + if filename.is_compatible(tags) { + return Some(DiskWheel { + dist: dist.clone(), + path: entry.path(), + temp_dir: None, + }); + } + } + None + } + + /// Given a remote source distribution, return a precise variant, if possible. + /// + /// For example, given a Git dependency with a reference to a branch or tag, return a URL + /// with a precise reference to the current commit of that branch or tag. + /// + /// This method takes into account various normalizations that are independent from the Git + /// layer. For example: removing `#subdirectory=pkg_dir`-like fragments, and removing `git+` + /// prefix kinds. + pub async fn precise(&self, dist: &Dist) -> Result> { + let Dist::Source(SourceDist::Git(sdist)) = dist else { + return Ok(None); + }; + + let DirectGitUrl { url, subdirectory } = DirectGitUrl::try_from(&sdist.url)?; + + // If the commit already contains a complete SHA, short-circuit. + if url.precise().is_some() { + return Ok(None); + } + + // Fetch the precise SHA of the Git reference (which could be a branch, a tag, a partial + // commit, etc.). + let git_dir = self.cache.join(GIT_CACHE); + let source = if let Some(reporter) = self.reporter.clone() { + GitSource::new(url, git_dir).with_reporter(Facade::from(reporter)) + } else { + GitSource::new(url, git_dir) + }; + let precise = tokio::task::spawn_blocking(move || source.fetch()).await??; + let url = GitUrl::from(precise); + + // Re-encode as a URL. + Ok(Some(DirectGitUrl { url, subdirectory }.into())) + } +} diff --git a/crates/puffin-distribution/src/lib.rs b/crates/puffin-distribution/src/lib.rs new file mode 100644 index 000000000..95456738d --- /dev/null +++ b/crates/puffin-distribution/src/lib.rs @@ -0,0 +1,10 @@ +pub use crate::download::{DiskWheel, Download, InMemoryWheel, SourceDistDownload, WheelDownload}; +pub use crate::fetcher::Fetcher; +pub use crate::reporter::Reporter; +pub use crate::unzip::Unzip; + +mod download; +mod fetcher; +mod reporter; +mod unzip; +mod vendor; diff --git a/crates/puffin-distribution/src/reporter.rs b/crates/puffin-distribution/src/reporter.rs new file mode 100644 index 000000000..e4d4b850d --- /dev/null +++ b/crates/puffin-distribution/src/reporter.rs @@ -0,0 +1,39 @@ +use std::sync::Arc; + +use distribution_types::Dist; +use url::Url; + +pub trait Reporter: Send + Sync { + /// Callback to invoke when a source distribution build is kicked off. + fn on_build_start(&self, dist: &Dist) -> usize; + + /// Callback to invoke when a source distribution build is complete. + fn on_build_complete(&self, dist: &Dist, id: usize); + + /// Callback to invoke when a repository checkout begins. + fn on_checkout_start(&self, url: &Url, rev: &str) -> usize; + + /// Callback to invoke when a repository checkout completes. + fn on_checkout_complete(&self, url: &Url, rev: &str, index: usize); +} + +/// A facade for converting from [`Reporter`] to [`puffin_git::Reporter`]. +pub(crate) struct Facade { + reporter: Arc, +} + +impl From> for Facade { + fn from(reporter: Arc) -> Self { + Self { reporter } + } +} + +impl puffin_git::Reporter for Facade { + fn on_checkout_start(&self, url: &Url, rev: &str) -> usize { + self.reporter.on_checkout_start(url, rev) + } + + fn on_checkout_complete(&self, url: &Url, rev: &str, index: usize) { + self.reporter.on_checkout_complete(url, rev, index); + } +} diff --git a/crates/puffin-distribution/src/unzip.rs b/crates/puffin-distribution/src/unzip.rs new file mode 100644 index 000000000..96e0d857b --- /dev/null +++ b/crates/puffin-distribution/src/unzip.rs @@ -0,0 +1,81 @@ +use std::io::{Read, Seek}; +use std::path::Path; + +use anyhow::Result; +use rayon::prelude::*; +use zip::ZipArchive; + +use crate::vendor::{CloneableSeekableReader, HasLength}; +use crate::{DiskWheel, InMemoryWheel, WheelDownload}; + +pub trait Unzip { + /// Unzip a wheel into the target directory. + fn unzip(&self, target: &Path) -> Result<()>; +} + +impl Unzip for DiskWheel { + fn unzip(&self, target: &Path) -> Result<()> { + unzip_archive(fs_err::File::open(&self.path)?, target) + } +} + +impl Unzip for InMemoryWheel { + fn unzip(&self, target: &Path) -> Result<()> { + unzip_archive(std::io::Cursor::new(&self.buffer), target) + } +} + +impl Unzip for WheelDownload { + fn unzip(&self, target: &Path) -> Result<()> { + match self { + WheelDownload::InMemory(wheel) => wheel.unzip(target), + WheelDownload::Disk(wheel) => wheel.unzip(target), + } + } +} + +/// Unzip a zip archive into the target directory. +fn unzip_archive(reader: R, target: &Path) -> Result<()> { + // Unzip in parallel. + let archive = ZipArchive::new(CloneableSeekableReader::new(reader))?; + (0..archive.len()) + .par_bridge() + .map(|file_number| { + let mut archive = archive.clone(); + let mut file = archive.by_index(file_number)?; + + // Determine the path of the file within the wheel. + let file_path = match file.enclosed_name() { + Some(path) => path.to_owned(), + None => return Ok(()), + }; + + // Create necessary parent directories. + let path = target.join(file_path); + if file.is_dir() { + fs_err::create_dir_all(path)?; + return Ok(()); + } + if let Some(parent) = path.parent() { + fs_err::create_dir_all(parent)?; + } + + // Write the file. + let mut outfile = fs_err::File::create(&path)?; + std::io::copy(&mut file, &mut outfile)?; + + // Set permissions. + #[cfg(unix)] + { + use std::fs::Permissions; + use std::os::unix::fs::PermissionsExt; + + if let Some(mode) = file.unix_mode() { + std::fs::set_permissions(&path, Permissions::from_mode(mode))?; + } + } + + Ok(()) + }) + .collect::>() +} diff --git a/crates/puffin-installer/src/vendor/cloneable_seekable_reader.rs b/crates/puffin-distribution/src/vendor/cloneable_seekable_reader.rs similarity index 98% rename from crates/puffin-installer/src/vendor/cloneable_seekable_reader.rs rename to crates/puffin-distribution/src/vendor/cloneable_seekable_reader.rs index cbe8a8867..8c83a767d 100644 --- a/crates/puffin-installer/src/vendor/cloneable_seekable_reader.rs +++ b/crates/puffin-distribution/src/vendor/cloneable_seekable_reader.rs @@ -135,6 +135,12 @@ impl HasLength for Cursor> { } } +impl HasLength for Cursor<&Vec> { + fn len(&self) -> u64 { + self.get_ref().len() as u64 + } +} + #[cfg(test)] mod test { use std::io::{Cursor, Read, Seek, SeekFrom}; diff --git a/crates/puffin-installer/src/vendor/mod.rs b/crates/puffin-distribution/src/vendor/mod.rs similarity index 100% rename from crates/puffin-installer/src/vendor/mod.rs rename to crates/puffin-distribution/src/vendor/mod.rs diff --git a/crates/puffin-installer/Cargo.toml b/crates/puffin-installer/Cargo.toml index 271ed8fe3..858278043 100644 --- a/crates/puffin-installer/Cargo.toml +++ b/crates/puffin-installer/Cargo.toml @@ -17,6 +17,7 @@ pep508_rs = { path = "../pep508-rs" } puffin-cache = { path = "../puffin-cache" } puffin-client = { path = "../puffin-client" } distribution-types = { path = "../distribution-types" } +puffin-distribution = { path = "../puffin-distribution" } puffin-git = { path = "../puffin-git" } puffin-interpreter = { path = "../puffin-interpreter" } puffin-normalize = { path = "../puffin-normalize" } @@ -24,15 +25,11 @@ puffin-traits = { path = "../puffin-traits" } pypi-types = { path = "../pypi-types" } anyhow = { workspace = true } -bytesize = { workspace = true } -cacache = { workspace = true } fs-err = { workspace = true } fxhash = { workspace = true } rayon = { workspace = true } tempfile = { workspace = true } thiserror = { workspace = true } tokio = { workspace = true } -tokio-util = { workspace = true } tracing = { workspace = true } url = { workspace = true } -zip = { workspace = true } diff --git a/crates/puffin-installer/src/builder.rs b/crates/puffin-installer/src/builder.rs index 09ff44548..56f49004d 100644 --- a/crates/puffin-installer/src/builder.rs +++ b/crates/puffin-installer/src/builder.rs @@ -1,20 +1,14 @@ //! Build source distributions from downloaded archives. -//! -//! TODO(charlie): Unify with `crates/puffin-resolver/src/distribution/source_distribution.rs`. use std::cmp::Reverse; use anyhow::Result; -use fs_err::tokio as fs; use tracing::debug; -use distribution_types::{Dist, Metadata, RemoteSource}; +use distribution_types::{Dist, RemoteSource}; +use puffin_distribution::{Fetcher, SourceDistDownload, WheelDownload}; use puffin_traits::BuildContext; -use crate::downloader::{DiskWheel, SourceDistDownload, WheelDownload}; - -const BUILT_WHEELS_CACHE: &str = "built-wheels-v0"; - pub struct Builder<'a, T: BuildContext + Send + Sync> { build_context: &'a T, reporter: Option>, @@ -43,7 +37,7 @@ impl<'a, T: BuildContext + Send + Sync> Builder<'a, T> { // Sort the distributions by size. let mut dists = dists; dists.sort_unstable_by_key(|distribution| { - Reverse(distribution.dist.size().unwrap_or(usize::MAX)) + Reverse(distribution.remote().size().unwrap_or(usize::MAX)) }); // Build the distributions serially. @@ -51,7 +45,9 @@ impl<'a, T: BuildContext + Send + Sync> Builder<'a, T> { for dist in dists { debug!("Building source distribution: {dist}"); - let result = build_sdist(dist, self.build_context).await?; + let result = Fetcher::new(self.build_context.cache()) + .build_sdist(dist, self.build_context) + .await?; if let Some(reporter) = self.reporter.as_ref() { reporter.on_progress(result.remote()); @@ -68,39 +64,6 @@ impl<'a, T: BuildContext + Send + Sync> Builder<'a, T> { } } -/// Build a source distribution into a wheel. -async fn build_sdist( - dist: SourceDistDownload, - build_context: &T, -) -> Result { - // Create a directory for the wheel. - let wheel_dir = build_context - .cache() - .join(BUILT_WHEELS_CACHE) - .join(dist.dist.package_id()); - fs::create_dir_all(&wheel_dir).await?; - - // Build the wheel. - // TODO(charlie): If this is a Git dependency, we should do another checkout. If the same - // repository is used by multiple dependencies, at multiple commits, the local checkout may now - // point to the wrong commit. - let disk_filename = build_context - .build_source( - &dist.sdist_file, - dist.subdirectory.as_deref(), - &wheel_dir, - &dist.dist.to_string(), - ) - .await?; - let wheel_filename = wheel_dir.join(disk_filename); - - Ok(WheelDownload::Disk(DiskWheel { - dist: dist.dist, - path: wheel_filename, - temp_dir: None, - })) -} - pub trait Reporter: Send + Sync { /// Callback to invoke when a source distribution is built. fn on_progress(&self, dist: &Dist); diff --git a/crates/puffin-installer/src/downloader.rs b/crates/puffin-installer/src/downloader.rs index b1d2366cf..03c524fcf 100644 --- a/crates/puffin-installer/src/downloader.rs +++ b/crates/puffin-installer/src/downloader.rs @@ -3,28 +3,19 @@ use std::path::{Path, PathBuf}; use std::sync::Arc; use anyhow::{bail, Result}; -use bytesize::ByteSize; -use tempfile::TempDir; use tokio::task::JoinSet; -use tokio_util::compat::FuturesAsyncReadCompatExt; -use tracing::debug; -use url::Url; -use distribution_types::direct_url::{DirectArchiveUrl, DirectGitUrl}; -use distribution_types::{BuiltDist, Dist, RemoteSource, SourceDist}; +use distribution_types::{Dist, RemoteSource}; use puffin_client::RegistryClient; -use puffin_git::GitSource; +use puffin_distribution::{Download, Fetcher}; use crate::locks::Locks; -const GIT_CACHE: &str = "git-v0"; - pub struct Downloader<'a> { client: &'a RegistryClient, cache: &'a Path, locks: Arc, reporter: Option>, - /// Block building source distributions by not downloading them no_build: bool, } @@ -49,7 +40,7 @@ impl<'a> Downloader<'a> { } } - /// Optionally, block downloading source distributions + /// Optionally, block downloading source distributions. #[must_use] pub fn with_no_build(self, no_build: bool) -> Self { Self { no_build, ..self } @@ -109,144 +100,8 @@ async fn fetch( ) -> Result { let lock = locks.acquire(&dist).await; let _guard = lock.lock().await; - - match &dist { - Dist::Built(BuiltDist::Registry(wheel)) => { - // Fetch the wheel. - let url = Url::parse(&wheel.file.url)?; - let reader = client.stream_external(&url).await?; - - // If the file is greater than 5MB, write it to disk; otherwise, keep it in memory. - let small_size = if let Some(size) = wheel.file.size { - let byte_size = ByteSize::b(size as u64); - if byte_size < ByteSize::mb(5) { - Some(size) - } else { - None - } - } else { - None - }; - if let Some(small_size) = small_size { - debug!( - "Fetching in-memory wheel from registry: {dist} ({})", - ByteSize::b(small_size as u64) - ); - - // Read into a buffer. - let mut buffer = Vec::with_capacity(small_size); - let mut reader = tokio::io::BufReader::new(reader.compat()); - tokio::io::copy(&mut reader, &mut buffer).await?; - - Ok(Download::Wheel(WheelDownload::InMemory(InMemoryWheel { - dist, - buffer, - }))) - } else { - let size = small_size.map_or("unknown size".to_string(), |size| size.to_string()); - debug!("Fetching disk-based wheel from registry: {dist} ({size})"); - - // Download the wheel to a temporary file. - let temp_dir = tempfile::tempdir_in(cache)?; - let wheel_filename = &wheel.file.filename; - let wheel_file = temp_dir.path().join(wheel_filename); - let mut writer = tokio::fs::File::create(&wheel_file).await?; - tokio::io::copy(&mut reader.compat(), &mut writer).await?; - - Ok(Download::Wheel(WheelDownload::Disk(DiskWheel { - dist, - path: wheel_file, - temp_dir: Some(temp_dir), - }))) - } - } - - Dist::Built(BuiltDist::DirectUrl(wheel)) => { - debug!("Fetching disk-based wheel from URL: {}", &wheel.url); - - // Fetch the wheel. - let reader = client.stream_external(&wheel.url).await?; - - // Download the wheel to a temporary file. - let temp_dir = tempfile::tempdir_in(cache)?; - let wheel_filename = wheel.filename()?; - let wheel_file = temp_dir.path().join(wheel_filename); - let mut writer = tokio::fs::File::create(&wheel_file).await?; - tokio::io::copy(&mut reader.compat(), &mut writer).await?; - - Ok(Download::Wheel(WheelDownload::Disk(DiskWheel { - dist, - path: wheel_file, - temp_dir: Some(temp_dir), - }))) - } - - Dist::Source(SourceDist::Registry(sdist)) => { - debug!( - "Fetching source distribution from registry: {}", - &sdist.file.url - ); - - let url = Url::parse(&sdist.file.url)?; - let reader = client.stream_external(&url).await?; - - // Download the source distribution. - let temp_dir = tempfile::tempdir_in(cache)?; - let sdist_filename = sdist.filename()?; - let sdist_file = temp_dir.path().join(sdist_filename); - let mut writer = tokio::fs::File::create(&sdist_file).await?; - tokio::io::copy(&mut reader.compat(), &mut writer).await?; - - Ok(Download::SourceDist(SourceDistDownload { - dist, - sdist_file, - subdirectory: None, - temp_dir: Some(temp_dir), - })) - } - - Dist::Source(SourceDist::DirectUrl(sdist)) => { - debug!("Fetching source distribution from URL: {}", sdist.url); - - let DirectArchiveUrl { url, subdirectory } = DirectArchiveUrl::from(&sdist.url); - - let reader = client.stream_external(&url).await?; - let mut reader = tokio::io::BufReader::new(reader.compat()); - - // Download the source distribution. - let temp_dir = tempfile::tempdir_in(cache)?; - let sdist_filename = sdist.filename()?; - let sdist_file = temp_dir.path().join(sdist_filename); - let mut writer = tokio::fs::File::create(&sdist_file).await?; - tokio::io::copy(&mut reader, &mut writer).await?; - - Ok(Download::SourceDist(SourceDistDownload { - dist, - sdist_file, - subdirectory, - temp_dir: Some(temp_dir), - })) - } - - Dist::Source(SourceDist::Git(sdist)) => { - debug!("Fetching source distribution from Git: {}", sdist.url); - - let DirectGitUrl { url, subdirectory } = DirectGitUrl::try_from(&sdist.url)?; - - let git_dir = cache.join(GIT_CACHE); - let source = GitSource::new(url, git_dir); - let sdist_file = tokio::task::spawn_blocking(move || source.fetch()) - .await?? - .into(); - - Ok(Download::SourceDist(SourceDistDownload { - dist, - sdist_file, - subdirectory, - temp_dir: None, - })) - } - } + let metadata = Fetcher::new(&cache).fetch_dist(&dist, &client).await?; + Ok(metadata) } pub trait Reporter: Send + Sync { @@ -256,87 +111,3 @@ pub trait Reporter: Send + Sync { /// Callback to invoke when the operation is complete. fn on_download_complete(&self); } - -/// A downloaded wheel that's stored in-memory. -#[derive(Debug)] -pub struct InMemoryWheel { - /// The remote distribution from which this wheel was downloaded. - pub(crate) dist: Dist, - /// The contents of the wheel. - pub(crate) buffer: Vec, -} - -/// A downloaded wheel that's stored on-disk. -#[derive(Debug)] -pub struct DiskWheel { - /// The remote distribution from which this wheel was downloaded. - pub(crate) dist: Dist, - /// The path to the downloaded wheel. - pub(crate) path: PathBuf, - /// The download location, to be dropped after use. - #[allow(dead_code)] // We only want the drop implementation - pub(crate) temp_dir: Option, -} - -/// A downloaded wheel. -#[derive(Debug)] -pub enum WheelDownload { - InMemory(InMemoryWheel), - Disk(DiskWheel), -} - -impl WheelDownload { - /// Return the [`Dist`] from which this wheel was downloaded. - pub fn remote(&self) -> &Dist { - match self { - WheelDownload::InMemory(wheel) => &wheel.dist, - WheelDownload::Disk(wheel) => &wheel.dist, - } - } -} - -/// A downloaded source distribution. -#[derive(Debug)] -pub struct SourceDistDownload { - /// The remote distribution from which this source distribution was downloaded. - pub(crate) dist: Dist, - /// The path to the downloaded archive or directory. - pub(crate) sdist_file: PathBuf, - /// The subdirectory within the archive or directory. - pub(crate) subdirectory: Option, - /// We can't use source dist archives, we build them into wheels which we persist and then drop - /// the source distribution. This field is non for git dependencies, which we keep in the cache. - #[allow(dead_code)] // We only keep it for the drop impl - pub(crate) temp_dir: Option, -} - -/// A downloaded distribution, either a wheel or a source distribution. -#[derive(Debug)] -pub enum Download { - Wheel(WheelDownload), - SourceDist(SourceDistDownload), -} - -impl std::fmt::Display for Download { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - Download::Wheel(wheel) => write!(f, "{wheel}"), - Download::SourceDist(sdist) => write!(f, "{sdist}"), - } - } -} - -impl std::fmt::Display for WheelDownload { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - WheelDownload::InMemory(wheel) => write!(f, "{}", wheel.dist), - WheelDownload::Disk(wheel) => write!(f, "{}", wheel.dist), - } - } -} - -impl std::fmt::Display for SourceDistDownload { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "{}", self.dist) - } -} diff --git a/crates/puffin-installer/src/lib.rs b/crates/puffin-installer/src/lib.rs index 40e07fcdd..cad938753 100644 --- a/crates/puffin-installer/src/lib.rs +++ b/crates/puffin-installer/src/lib.rs @@ -1,5 +1,5 @@ pub use builder::{Builder, Reporter as BuildReporter}; -pub use downloader::{Download, Downloader, Reporter as DownloadReporter}; +pub use downloader::{Downloader, Reporter as DownloadReporter}; pub use installer::{Installer, Reporter as InstallReporter}; pub use plan::InstallPlan; pub use registry_index::RegistryIndex; @@ -18,4 +18,3 @@ mod site_packages; mod uninstall; mod unzipper; mod url_index; -mod vendor; diff --git a/crates/puffin-installer/src/unzipper.rs b/crates/puffin-installer/src/unzipper.rs index 13f783f05..edfad4901 100644 --- a/crates/puffin-installer/src/unzipper.rs +++ b/crates/puffin-installer/src/unzipper.rs @@ -1,18 +1,13 @@ use std::cmp::Reverse; -use std::io::{Read, Seek}; use std::path::Path; use anyhow::Result; -use rayon::iter::ParallelBridge; -use rayon::iter::ParallelIterator; use tracing::debug; -use zip::ZipArchive; -use distribution_types::{CachedDist, Dist, Identifier}; +use distribution_types::{CachedDist, Dist, Identifier, RemoteSource}; +use puffin_distribution::{Unzip, WheelDownload}; use crate::cache::WheelCache; -use crate::downloader::WheelDownload; -use crate::vendor::{CloneableSeekableReader, HasLength}; #[derive(Default)] pub struct Unzipper { @@ -40,10 +35,8 @@ impl Unzipper { // Sort the wheels by size. let mut downloads = downloads; - downloads.sort_unstable_by_key(|wheel| match wheel { - WheelDownload::Disk(_) => Reverse(usize::MIN), - WheelDownload::InMemory(wheel) => Reverse(wheel.buffer.len()), - }); + downloads + .sort_unstable_by_key(|wheel| Reverse(wheel.remote().size().unwrap_or(usize::MIN))); let staging = tempfile::tempdir_in(wheel_cache.root())?; @@ -57,7 +50,7 @@ impl Unzipper { // Unzip the wheel. tokio::task::spawn_blocking({ let target = staging.path().join(remote.distribution_id()); - move || unzip_wheel(download, &target) + move || download.unzip(&target) }) .await??; @@ -93,60 +86,6 @@ impl Unzipper { } } -/// Unzip a wheel into the target directory. -fn unzip_wheel(wheel: WheelDownload, target: &Path) -> Result<()> { - match wheel { - WheelDownload::InMemory(wheel) => unzip_archive(std::io::Cursor::new(wheel.buffer), target), - WheelDownload::Disk(wheel) => unzip_archive(fs_err::File::open(wheel.path)?, target), - } -} - -/// Unzip a zip archive into the target directory. -fn unzip_archive(reader: R, target: &Path) -> Result<()> { - // Unzip in parallel. - let archive = ZipArchive::new(CloneableSeekableReader::new(reader))?; - (0..archive.len()) - .par_bridge() - .map(|file_number| { - let mut archive = archive.clone(); - let mut file = archive.by_index(file_number)?; - - // Determine the path of the file within the wheel. - let file_path = match file.enclosed_name() { - Some(path) => path.to_owned(), - None => return Ok(()), - }; - - // Create necessary parent directories. - let path = target.join(file_path); - if file.is_dir() { - fs_err::create_dir_all(path)?; - return Ok(()); - } - if let Some(parent) = path.parent() { - fs_err::create_dir_all(parent)?; - } - - // Write the file. - let mut outfile = fs_err::File::create(&path)?; - std::io::copy(&mut file, &mut outfile)?; - - // Set permissions. - #[cfg(unix)] - { - use std::fs::Permissions; - use std::os::unix::fs::PermissionsExt; - - if let Some(mode) = file.unix_mode() { - std::fs::set_permissions(&path, Permissions::from_mode(mode))?; - } - } - - Ok(()) - }) - .collect::>() -} - pub trait Reporter: Send + Sync { /// Callback to invoke when a wheel is unzipped. fn on_unzip_progress(&self, dist: &Dist); diff --git a/crates/puffin-macros/Cargo.toml b/crates/puffin-macros/Cargo.toml index 781d063fe..d644b3e14 100644 --- a/crates/puffin-macros/Cargo.toml +++ b/crates/puffin-macros/Cargo.toml @@ -10,7 +10,5 @@ authors = { workspace = true } license = { workspace = true } [dependencies] -colored = { workspace = true } fxhash = { workspace = true } once_cell = { workspace = true } -tracing = { workspace = true } diff --git a/crates/puffin-resolver/Cargo.toml b/crates/puffin-resolver/Cargo.toml index 2f5ca4e32..fcdd4135c 100644 --- a/crates/puffin-resolver/Cargo.toml +++ b/crates/puffin-resolver/Cargo.toml @@ -19,6 +19,7 @@ platform-tags = { path = "../platform-tags" } puffin-cache = { path = "../puffin-cache" } puffin-client = { path = "../puffin-client" } distribution-types = { path = "../distribution-types" } +puffin-distribution = { path = "../puffin-distribution" } puffin-git = { path = "../puffin-git" } puffin-interpreter = { path = "../puffin-interpreter" } puffin-macros = { path = "../puffin-macros" } diff --git a/crates/puffin-resolver/src/distribution/built_dist.rs b/crates/puffin-resolver/src/distribution/built_dist.rs deleted file mode 100644 index 3465531fc..000000000 --- a/crates/puffin-resolver/src/distribution/built_dist.rs +++ /dev/null @@ -1,69 +0,0 @@ -use std::path::Path; -use std::str::FromStr; - -use anyhow::{Context, Result}; -use fs_err::tokio as fs; -use tokio_util::compat::FuturesAsyncReadCompatExt; -use tracing::debug; - -use distribution_filename::WheelFilename; -use distribution_types::{DirectUrlBuiltDist, Identifier, RemoteSource}; -use platform_tags::Tags; -use puffin_client::RegistryClient; -use pypi_types::Metadata21; - -use crate::distribution::cached_wheel::CachedWheel; - -const REMOTE_WHEELS_CACHE: &str = "remote-wheels-v0"; - -/// Fetch a built distribution from a remote source, or from a local cache. -pub(crate) struct BuiltDistFetcher<'a>(&'a Path); - -impl<'a> BuiltDistFetcher<'a> { - /// Initialize a [`BuiltDistFetcher`] from a [`BuildContext`]. - pub(crate) fn new(cache: &'a Path) -> Self { - Self(cache) - } - - /// Read the [`Metadata21`] from a wheel, if it exists in the cache. - pub(crate) fn find_dist_info( - &self, - dist: &DirectUrlBuiltDist, - tags: &Tags, - ) -> Result> { - CachedWheel::find_in_cache(dist, tags, self.0.join(REMOTE_WHEELS_CACHE)) - .as_ref() - .map(|wheel| CachedWheel::read_dist_info(wheel).context("Failed to read dist info")) - .transpose() - } - - /// Download a wheel, storing it in the cache. - pub(crate) async fn download_wheel( - &self, - dist: &DirectUrlBuiltDist, - client: &RegistryClient, - ) -> Result { - debug!("Downloading: {dist}"); - let reader = client.stream_external(&dist.url).await?; - - // Create a directory for the wheel. - let wheel_dir = self - .0 - .join(REMOTE_WHEELS_CACHE) - .join(dist.distribution_id()); - fs::create_dir_all(&wheel_dir).await?; - - // Download the wheel. - let wheel_filename = dist.filename()?; - let wheel_file = wheel_dir.join(wheel_filename); - let mut writer = tokio::fs::File::create(&wheel_file).await?; - tokio::io::copy(&mut reader.compat(), &mut writer).await?; - - // Read the metadata from the wheel. - let wheel = CachedWheel::new(wheel_file, WheelFilename::from_str(wheel_filename)?); - let metadata = wheel.read_dist_info()?; - - debug!("Finished downloading: {dist}"); - Ok(metadata) - } -} diff --git a/crates/puffin-resolver/src/distribution/cached_wheel.rs b/crates/puffin-resolver/src/distribution/cached_wheel.rs deleted file mode 100644 index f9f838fd8..000000000 --- a/crates/puffin-resolver/src/distribution/cached_wheel.rs +++ /dev/null @@ -1,63 +0,0 @@ -use std::path::{Path, PathBuf}; -use std::str::FromStr; - -use anyhow::{format_err, Result}; -use zip::ZipArchive; - -use distribution_filename::WheelFilename; -use distribution_types::Identifier; -use install_wheel_rs::find_dist_info; -use platform_tags::Tags; -use pypi_types::Metadata21; - -/// A cached wheel built from a remote source. -#[derive(Debug)] -pub(super) struct CachedWheel { - path: PathBuf, - filename: WheelFilename, -} - -impl CachedWheel { - pub(super) fn new(path: PathBuf, filename: WheelFilename) -> Self { - Self { path, filename } - } - - /// Search for a wheel matching the tags that was built from the given distribution. - pub(super) fn find_in_cache( - dist: &T, - tags: &Tags, - cache: impl AsRef, - ) -> Option { - let wheel_dir = cache.as_ref().join(dist.distribution_id()); - let Ok(read_dir) = fs_err::read_dir(wheel_dir) else { - return None; - }; - for entry in read_dir { - let Ok(entry) = entry else { - continue; - }; - let Ok(filename) = - WheelFilename::from_str(entry.file_name().to_string_lossy().as_ref()) - else { - continue; - }; - if filename.is_compatible(tags) { - let path = entry.path().clone(); - return Some(CachedWheel { path, filename }); - } - } - None - } - - /// Read the [`Metadata21`] from a wheel. - pub(super) fn read_dist_info(&self) -> Result { - let mut archive = ZipArchive::new(fs_err::File::open(&self.path)?)?; - let filename = &self.filename; - let dist_info_dir = find_dist_info(filename, archive.file_names().map(|name| (name, name))) - .map_err(|err| format_err!("Invalid wheel {filename}: {err}"))? - .1; - let dist_info = - std::io::read_to_string(archive.by_name(&format!("{dist_info_dir}/METADATA"))?)?; - Ok(Metadata21::parse(dist_info.as_bytes())?) - } -} diff --git a/crates/puffin-resolver/src/distribution/mod.rs b/crates/puffin-resolver/src/distribution/mod.rs deleted file mode 100644 index f5eb2bbb3..000000000 --- a/crates/puffin-resolver/src/distribution/mod.rs +++ /dev/null @@ -1,6 +0,0 @@ -pub(crate) use built_dist::BuiltDistFetcher; -pub(crate) use source_dist::{Reporter as SourceDistributionReporter, SourceDistFetcher}; - -mod built_dist; -mod cached_wheel; -mod source_dist; diff --git a/crates/puffin-resolver/src/distribution/source_dist.rs b/crates/puffin-resolver/src/distribution/source_dist.rs deleted file mode 100644 index a48546cbf..000000000 --- a/crates/puffin-resolver/src/distribution/source_dist.rs +++ /dev/null @@ -1,235 +0,0 @@ -//! Fetch and build source distributions from remote sources. -//! -//! TODO(charlie): Unify with `crates/puffin-installer/src/sdist_builder.rs`. - -use std::str::FromStr; -use std::sync::Arc; - -use anyhow::{bail, Result}; -use fs_err::tokio as fs; -use tokio_util::compat::FuturesAsyncReadCompatExt; -use tracing::debug; -use url::Url; - -use distribution_filename::WheelFilename; -use distribution_types::direct_url::{DirectArchiveUrl, DirectGitUrl}; -use distribution_types::{Identifier, RemoteSource, SourceDist}; -use platform_tags::Tags; -use puffin_client::RegistryClient; -use puffin_git::{GitSource, GitUrl}; -use puffin_traits::BuildContext; -use pypi_types::Metadata21; - -use crate::distribution::cached_wheel::CachedWheel; - -const BUILT_WHEELS_CACHE: &str = "built-wheels-v0"; - -const GIT_CACHE: &str = "git-v0"; - -/// Fetch and build a source distribution from a remote source, or from a local cache. -pub(crate) struct SourceDistFetcher<'a, T: BuildContext> { - build_context: &'a T, - reporter: Option>, -} - -impl<'a, T: BuildContext> SourceDistFetcher<'a, T> { - /// Initialize a [`SourceDistFetcher`] from a [`BuildContext`]. - pub(crate) fn new(build_context: &'a T) -> Self { - Self { - build_context, - reporter: None, - } - } - - /// Set the [`Reporter`] to use for this source distribution fetcher. - #[must_use] - pub(crate) fn with_reporter(self, reporter: impl Reporter + 'static) -> Self { - Self { - reporter: Some(Arc::new(reporter)), - ..self - } - } - - /// Read the [`Metadata21`] from a built source distribution, if it exists in the cache. - pub(crate) fn find_dist_info( - &self, - dist: &SourceDist, - tags: &Tags, - ) -> Result> { - CachedWheel::find_in_cache( - dist, - tags, - self.build_context.cache().join(BUILT_WHEELS_CACHE), - ) - .as_ref() - .map(CachedWheel::read_dist_info) - .transpose() - } - - /// Download and build a source distribution, storing the built wheel in the cache. - pub(crate) async fn download_and_build_sdist( - &self, - dist: &SourceDist, - client: &RegistryClient, - ) -> Result { - debug!("Building: {dist}"); - - if self.build_context.no_build() { - bail!("Building source distributions is disabled"); - } - - let (temp_dir, sdist_file, subdirectory) = match dist { - SourceDist::Registry(sdist) => { - debug!( - "Fetching source distribution from registry: {}", - sdist.file.url - ); - - let url = Url::parse(&sdist.file.url)?; - let reader = client.stream_external(&url).await?; - - // Download the source distribution. - let temp_dir = tempfile::tempdir_in(self.build_context.cache())?; - let sdist_filename = sdist.filename()?; - let sdist_file = temp_dir.path().join(sdist_filename); - let mut writer = tokio::fs::File::create(&sdist_file).await?; - tokio::io::copy(&mut reader.compat(), &mut writer).await?; - - (Some(temp_dir), sdist_file, None) - } - - SourceDist::DirectUrl(sdist) => { - debug!("Fetching source distribution from URL: {}", sdist.url); - - let DirectArchiveUrl { url, subdirectory } = DirectArchiveUrl::from(&sdist.url); - - let reader = client.stream_external(&url).await?; - let mut reader = tokio::io::BufReader::new(reader.compat()); - - // Download the source distribution. - let temp_dir = tempfile::tempdir_in(self.build_context.cache())?; - let sdist_filename = sdist.filename()?; - let sdist_file = temp_dir.path().join(sdist_filename); - let mut writer = tokio::fs::File::create(&sdist_file).await?; - tokio::io::copy(&mut reader, &mut writer).await?; - - (Some(temp_dir), sdist_file, subdirectory) - } - - SourceDist::Git(sdist) => { - debug!("Fetching source distribution from Git: {}", sdist.url); - - let DirectGitUrl { url, subdirectory } = DirectGitUrl::try_from(&sdist.url)?; - - let git_dir = self.build_context.cache().join(GIT_CACHE); - let source = if let Some(reporter) = &self.reporter { - GitSource::new(url, git_dir).with_reporter(Facade::from(reporter.clone())) - } else { - GitSource::new(url, git_dir) - }; - let sdist_file = tokio::task::spawn_blocking(move || source.fetch()) - .await?? - .into(); - - (None, sdist_file, subdirectory) - } - }; - - // Create a directory for the wheel. - let wheel_dir = self - .build_context - .cache() - .join(BUILT_WHEELS_CACHE) - .join(dist.distribution_id()); - fs::create_dir_all(&wheel_dir).await?; - - // Build the wheel. - let disk_filename = self - .build_context - .build_source( - &sdist_file, - subdirectory.as_deref(), - &wheel_dir, - &dist.to_string(), - ) - .await?; - - if let Some(temp_dir) = temp_dir { - temp_dir.close()?; - } - - // Read the metadata from the wheel. - let wheel = CachedWheel::new( - wheel_dir.join(&disk_filename), - WheelFilename::from_str(&disk_filename)?, - ); - let metadata = wheel.read_dist_info()?; - - debug!("Finished building: {dist}"); - Ok(metadata) - } - - /// Given a remote source distribution, return a precise variant, if possible. - /// - /// For example, given a Git dependency with a reference to a branch or tag, return a URL - /// with a precise reference to the current commit of that branch or tag. - /// - /// This method takes into account various normalizations that are independent from the Git - /// layer. For example: removing `#subdirectory=pkg_dir`-like fragments, and removing `git+` - /// prefix kinds. - pub(crate) async fn precise(&self, dist: &SourceDist) -> Result> { - let SourceDist::Git(sdist) = dist else { - return Ok(None); - }; - - let DirectGitUrl { url, subdirectory } = DirectGitUrl::try_from(&sdist.url)?; - - // If the commit already contains a complete SHA, short-circuit. - if url.precise().is_some() { - return Ok(None); - } - - // Fetch the precise SHA of the Git reference (which could be a branch, a tag, a partial - // commit, etc.). - let git_dir = self.build_context.cache().join(GIT_CACHE); - let source = if let Some(reporter) = &self.reporter { - GitSource::new(url, git_dir).with_reporter(Facade::from(reporter.clone())) - } else { - GitSource::new(url, git_dir) - }; - let precise = tokio::task::spawn_blocking(move || source.fetch()).await??; - let url = GitUrl::from(precise); - - // Re-encode as a URL. - Ok(Some(DirectGitUrl { url, subdirectory }.into())) - } -} - -pub(crate) trait Reporter: Send + Sync { - /// Callback to invoke when a repository checkout begins. - fn on_checkout_start(&self, url: &Url, rev: &str) -> usize; - - /// Callback to invoke when a repository checkout completes. - fn on_checkout_complete(&self, url: &Url, rev: &str, index: usize); -} - -/// A facade for converting from [`Reporter`] to [`puffin_git::Reporter`]. -struct Facade { - reporter: Arc, -} - -impl From> for Facade { - fn from(reporter: Arc) -> Self { - Self { reporter } - } -} - -impl puffin_git::Reporter for Facade { - fn on_checkout_start(&self, url: &Url, rev: &str) -> usize { - self.reporter.on_checkout_start(url, rev) - } - - fn on_checkout_complete(&self, url: &Url, rev: &str, index: usize) { - self.reporter.on_checkout_complete(url, rev, index); - } -} diff --git a/crates/puffin-resolver/src/error.rs b/crates/puffin-resolver/src/error.rs index 431e9cf5c..08aa452b8 100644 --- a/crates/puffin-resolver/src/error.rs +++ b/crates/puffin-resolver/src/error.rs @@ -5,7 +5,7 @@ use pubgrub::report::Reporter; use thiserror::Error; use url::Url; -use distribution_types::{BuiltDist, SourceDist}; +use distribution_types::{BuiltDist, Dist, SourceDist}; use pep508_rs::Requirement; use puffin_normalize::PackageName; @@ -115,33 +115,28 @@ impl From>> f } impl ResolveError { - pub fn from_source_dist(dist: SourceDist, err: anyhow::Error) -> Self { + pub fn from_dist(dist: Dist, err: anyhow::Error) -> Self { match dist { - SourceDist::Registry(sdist) => Self::RegistrySourceDist { - filename: sdist.file.filename.clone(), - err, - }, - SourceDist::DirectUrl(sdist) => Self::UrlSourceDist { - url: sdist.url.clone(), - err, - }, - SourceDist::Git(sdist) => Self::UrlSourceDist { - url: sdist.url.clone(), - err, - }, - } - } - - pub fn from_built_dist(dist: BuiltDist, err: anyhow::Error) -> Self { - match dist { - BuiltDist::Registry(wheel) => Self::RegistryBuiltDist { + Dist::Built(BuiltDist::Registry(wheel)) => Self::RegistryBuiltDist { filename: wheel.file.filename.clone(), err, }, - BuiltDist::DirectUrl(wheel) => Self::UrlBuiltDist { + Dist::Built(BuiltDist::DirectUrl(wheel)) => Self::UrlBuiltDist { url: wheel.url.clone(), err, }, + Dist::Source(SourceDist::Registry(sdist)) => Self::RegistrySourceDist { + filename: sdist.file.filename.clone(), + err, + }, + Dist::Source(SourceDist::DirectUrl(sdist)) => Self::UrlSourceDist { + url: sdist.url.clone(), + err, + }, + Dist::Source(SourceDist::Git(sdist)) => Self::UrlSourceDist { + url: sdist.url.clone(), + err, + }, } } } diff --git a/crates/puffin-resolver/src/lib.rs b/crates/puffin-resolver/src/lib.rs index 58050b735..f78826c53 100644 --- a/crates/puffin-resolver/src/lib.rs +++ b/crates/puffin-resolver/src/lib.rs @@ -9,7 +9,6 @@ pub use resolution_options::ResolutionOptions; pub use resolver::{BuildId, Reporter as ResolverReporter, Resolver}; mod candidate_selector; -mod distribution; mod error; mod file; mod finder; diff --git a/crates/puffin-resolver/src/resolver.rs b/crates/puffin-resolver/src/resolver.rs index df5addeb7..205b82737 100644 --- a/crates/puffin-resolver/src/resolver.rs +++ b/crates/puffin-resolver/src/resolver.rs @@ -1,5 +1,6 @@ //! Given a set of requirements, find a set of compatible packages. +use std::borrow::Cow; use std::sync::Arc; use anyhow::Result; @@ -12,25 +13,22 @@ use pubgrub::range::Range; use pubgrub::solver::{Incompatibility, State}; use pubgrub::type_aliases::DependencyConstraints; use tokio::select; -use tracing::{debug, error, trace}; +use tracing::{debug, trace}; use url::Url; use waitmap::WaitMap; use distribution_filename::WheelFilename; -use distribution_types::{ - BuiltDist, DirectUrlSourceDist, Dist, GitSourceDist, Identifier, Metadata, SourceDist, - VersionOrUrl, -}; +use distribution_types::{BuiltDist, Dist, Identifier, Metadata, SourceDist, VersionOrUrl}; use pep508_rs::{MarkerEnvironment, Requirement}; use platform_tags::Tags; use puffin_cache::CanonicalUrl; use puffin_client::RegistryClient; +use puffin_distribution::Fetcher; use puffin_normalize::{ExtraName, PackageName}; use puffin_traits::BuildContext; use pypi_types::{File, IndexUrl, Metadata21, SimpleJson}; use crate::candidate_selector::CandidateSelector; -use crate::distribution::{BuiltDistFetcher, SourceDistFetcher, SourceDistributionReporter}; use crate::error::ResolveError; use crate::file::DistFile; use crate::locks::Locks; @@ -42,7 +40,7 @@ use crate::resolution::Graph; use crate::version_map::VersionMap; use crate::ResolutionOptions; -pub struct Resolver<'a, Context: BuildContext + Sync> { +pub struct Resolver<'a, Context: BuildContext + Send + Sync> { project: Option, requirements: Vec, constraints: Vec, @@ -58,7 +56,7 @@ pub struct Resolver<'a, Context: BuildContext + Sync> { reporter: Option>, } -impl<'a, Context: BuildContext + Sync> Resolver<'a, Context> { +impl<'a, Context: BuildContext + Send + Sync> Resolver<'a, Context> { /// Initialize a new resolver. pub fn new( manifest: Manifest, @@ -607,42 +605,65 @@ impl<'a, Context: BuildContext + Sync> Resolver<'a, Context> { .await } - // Fetch wheel metadata. - Request::Dist(Dist::Built(distribution)) => { - let metadata = - match &distribution { - BuiltDist::Registry(wheel) => { - self.client - .wheel_metadata(wheel.file.clone()) - .map_err(ResolveError::Client) - .await? - } - BuiltDist::DirectUrl(wheel) => { - let fetcher = BuiltDistFetcher::new(self.build_context.cache()); - match fetcher.find_dist_info(wheel, self.tags) { - Ok(Some(metadata)) => { - debug!("Found wheel metadata in cache: {wheel}"); - metadata - } - Ok(None) => { - debug!("Downloading wheel: {wheel}"); - fetcher.download_wheel(wheel, self.client).await.map_err( - |err| { - ResolveError::from_built_dist(distribution.clone(), err) - }, - )? - } - Err(err) => { - error!("Failed to read wheel from cache: {err}"); - fetcher.download_wheel(wheel, self.client).await.map_err( - |err| { - ResolveError::from_built_dist(distribution.clone(), err) - }, - )? - } - } - } - }; + // Fetch wheel metadata from the registry if possible. This is a fast-path to avoid + // reading from the cache in the common case: we cache wheel metadata in the HTTP + // cache, rather than downloading the wheel itself. + Request::Dist(Dist::Built(BuiltDist::Registry(wheel))) => { + let metadata = self + .client + .wheel_metadata(wheel.file.clone()) + .map_err(ResolveError::Client) + .await?; + if metadata.name != *wheel.name() { + return Err(ResolveError::NameMismatch { + metadata: metadata.name, + given: wheel.name().clone(), + }); + } + Ok(Response::Dist( + Dist::Built(BuiltDist::Registry(wheel)), + metadata, + None, + )) + } + + // Fetch distribution metadata. + Request::Dist(distribution) => { + let lock = self.locks.acquire(&distribution).await; + let _guard = lock.lock().await; + + let fetcher = if let Some(reporter) = self.reporter.clone() { + Fetcher::new(self.build_context.cache()).with_reporter(Facade { reporter }) + } else { + Fetcher::new(self.build_context.cache()) + }; + + let precise_url = fetcher + .precise(&distribution) + .await + .map_err(|err| ResolveError::from_dist(distribution.clone(), err))?; + + // Insert the `precise`, if it exists. + let precise_distribution = match precise_url.as_ref() { + Some(url) => Cow::Owned(distribution.clone().with_url(url.clone())), + None => Cow::Borrowed(&distribution), + }; + + // Fetch the metadata for the distribution. + let metadata = { + if let Ok(Some(metadata)) = + fetcher.find_metadata(&precise_distribution, self.tags) + { + debug!("Found distribution metadata in cache: {precise_distribution}"); + metadata + } else { + debug!("Downloading distribution: {precise_distribution}"); + fetcher + .fetch_metadata(&precise_distribution, self.client, self.build_context) + .await + .map_err(|err| ResolveError::from_dist(distribution.clone(), err))? + } + }; if metadata.name != *distribution.name() { return Err(ResolveError::NameMismatch { @@ -651,84 +672,7 @@ impl<'a, Context: BuildContext + Sync> Resolver<'a, Context> { }); } - Ok(Response::Dist(Dist::Built(distribution), metadata, None)) - } - - // Fetch source distribution metadata. - Request::Dist(Dist::Source(sdist)) => { - let lock = self.locks.acquire(&sdist).await; - let _guard = lock.lock().await; - - let fetcher = if let Some(reporter) = &self.reporter { - SourceDistFetcher::new(self.build_context).with_reporter(Facade { - reporter: reporter.clone(), - }) - } else { - SourceDistFetcher::new(self.build_context) - }; - - let precise = fetcher - .precise(&sdist) - .await - .map_err(|err| ResolveError::from_source_dist(sdist.clone(), err))?; - - let task = self - .reporter - .as_ref() - .map(|reporter| reporter.on_build_start(&sdist)); - - let metadata = { - // Insert the `precise`, if it exists. - let sdist = match sdist.clone() { - SourceDist::DirectUrl(sdist) => { - SourceDist::DirectUrl(DirectUrlSourceDist { - url: precise.clone().unwrap_or_else(|| sdist.url.clone()), - ..sdist - }) - } - SourceDist::Git(sdist) => SourceDist::Git(GitSourceDist { - url: precise.clone().unwrap_or_else(|| sdist.url.clone()), - ..sdist - }), - sdist @ SourceDist::Registry(_) => sdist, - }; - - match fetcher.find_dist_info(&sdist, self.tags) { - Ok(Some(metadata)) => { - debug!("Found source distribution metadata in cache: {sdist}"); - metadata - } - Ok(None) => { - debug!("Downloading source distribution: {sdist}"); - fetcher - .download_and_build_sdist(&sdist, self.client) - .await - .map_err(|err| ResolveError::from_source_dist(sdist.clone(), err))? - } - Err(err) => { - error!("Failed to read source distribution from cache: {err}",); - fetcher - .download_and_build_sdist(&sdist, self.client) - .await - .map_err(|err| ResolveError::from_source_dist(sdist.clone(), err))? - } - } - }; - - if metadata.name != *sdist.name() { - return Err(ResolveError::NameMismatch { - metadata: metadata.name, - given: sdist.name().clone(), - }); - } - - if let Some(task) = task { - if let Some(reporter) = self.reporter.as_ref() { - reporter.on_build_complete(&sdist, task); - } - } - - Ok(Response::Dist(Dist::Source(sdist), metadata, precise)) + Ok(Response::Dist(distribution, metadata, precise_url)) } } } @@ -768,10 +712,10 @@ pub trait Reporter: Send + Sync { fn on_complete(&self); /// Callback to invoke when a source distribution build is kicked off. - fn on_build_start(&self, dist: &SourceDist) -> usize; + fn on_build_start(&self, dist: &Dist) -> usize; /// Callback to invoke when a source distribution build is complete. - fn on_build_complete(&self, dist: &SourceDist, id: usize); + fn on_build_complete(&self, dist: &Dist, id: usize); /// Callback to invoke when a repository checkout begins. fn on_checkout_start(&self, url: &Url, rev: &str) -> usize; @@ -780,12 +724,20 @@ pub trait Reporter: Send + Sync { fn on_checkout_complete(&self, url: &Url, rev: &str, index: usize); } -/// A facade for converting from [`Reporter`] to [`puffin_git::Reporter`]. +/// A facade for converting from [`Reporter`] to [`puffin_distribution::Reporter`]. struct Facade { reporter: Arc, } -impl SourceDistributionReporter for Facade { +impl puffin_distribution::Reporter for Facade { + fn on_build_start(&self, dist: &Dist) -> usize { + self.reporter.on_build_start(dist) + } + + fn on_build_complete(&self, dist: &Dist, id: usize) { + self.reporter.on_build_complete(dist, id); + } + fn on_checkout_start(&self, url: &Url, rev: &str) -> usize { self.reporter.on_checkout_start(url, rev) }