From 8032d4606e24f15c97d66a9a58a1a7fe854d691d Mon Sep 17 00:00:00 2001 From: Charlie Marsh Date: Thu, 5 Oct 2023 00:31:47 -0400 Subject: [PATCH] Misc. changes --- crates/puffin-cli/src/commands/install.rs | 175 ++++++++++++---------- crates/puffin-client/Cargo.toml | 3 +- crates/puffin-client/src/api.rs | 46 ++++-- crates/puffin-client/src/lib.rs | 2 +- 4 files changed, 134 insertions(+), 92 deletions(-) diff --git a/crates/puffin-cli/src/commands/install.rs b/crates/puffin-cli/src/commands/install.rs index ab16703cf..9d9f3db96 100644 --- a/crates/puffin-cli/src/commands/install.rs +++ b/crates/puffin-cli/src/commands/install.rs @@ -3,17 +3,29 @@ use std::path::Path; use std::str::FromStr; use anyhow::Result; +use futures::future::Either; use futures::{StreamExt, TryFutureExt}; use pep440_rs::Version; use pep508_rs::{MarkerEnvironment, Requirement, StringVersion, VersionOrUrl}; -use tracing::trace; +use tracing::debug; -use puffin_client::{PypiClientBuilder, SimpleJson}; +use puffin_client::{File, PypiClientBuilder, SimpleJson}; +use puffin_requirements::metadata::Metadata21; use puffin_requirements::package_name::PackageName; use puffin_requirements::wheel::WheelName; use crate::commands::ExitStatus; +enum Request { + Package(Requirement), + Version(Requirement, File), +} + +enum Response { + Package(SimpleJson, Requirement), + Version(Metadata21, Requirement), +} + pub(crate) async fn install(src: &Path) -> Result { // TODO(charlie): Fetch from the environment. let env = MarkerEnvironment { @@ -37,102 +49,107 @@ pub(crate) async fn install(src: &Path) -> Result { let requirements = puffin_requirements::Requirements::from_str(&requirements_txt)?; // Instantiate a client. - let client = PypiClientBuilder::default().build(); + let pypi_client = PypiClientBuilder::default().build(); + let proxy_client = PypiClientBuilder::default().build(); - // Fetch metadata in parallel. + // A channel to fetch package metadata (e.g., given `flask`, fetch all versions) and version + // metadata (e.g., given `flask==1.0.0`, fetch the metadata for that version). let (package_sink, package_stream) = futures::channel::mpsc::unbounded(); - let mut resolution: HashMap = HashMap::with_capacity(requirements.len()); - - // Create a stream of futures that fetch metadata for each requirement. + // Initialize the package stream. let mut package_stream = package_stream - .map(|requirement: Requirement| { - client - .simple(requirement.name.clone()) - .map_ok(move |metadata| (metadata, requirement)) + .map(|request: Request| match request { + Request::Package(requirement) => Either::Left( + pypi_client + .simple(requirement.name.clone()) + .map_ok(move |metadata| Response::Package(metadata, requirement)), + ), + Request::Version(requirement, file) => Either::Right( + proxy_client + .file(file.clone()) + .map_ok(move |metadata| Response::Version(metadata, requirement)), + ), }) - .buffer_unordered(48) - .ready_chunks(48); + .buffer_unordered(32) + .ready_chunks(32); - // Push all the requirements into the sink. + // Push all the requirements into the package sink. let mut in_flight: HashSet = HashSet::with_capacity(requirements.len()); for requirement in requirements.iter() { - package_sink.unbounded_send(requirement.clone())?; + debug!("--> adding root dependency: {}", requirement); + package_sink.unbounded_send(Request::Package(requirement.clone()))?; in_flight.insert(PackageName::normalize(&requirement.name)); } + // Resolve the requirements. + let mut resolution: HashMap = HashMap::with_capacity(requirements.len()); + while let Some(chunk) = package_stream.next().await { for result in chunk { - let (metadata, requirement): (SimpleJson, Requirement) = result?; + let result: Response = result?; + match result { + Response::Package(metadata, requirement) => { + // TODO(charlie): Support URLs. Right now, we treat a URL as an unpinned dependency. + let specifiers = + requirement + .version_or_url + .as_ref() + .and_then(|version_or_url| match version_or_url { + VersionOrUrl::VersionSpecifier(specifiers) => Some(specifiers), + VersionOrUrl::Url(_) => None, + }); - // Remove this requirement from the in-flight set. - let normalized_name = PackageName::normalize(&requirement.name); - in_flight.remove(&normalized_name); + // Pick a version that satisfies the requirement. + let Some(file) = metadata.files.iter().rev().find(|file| { + // We only support wheels for now. + let Ok(name) = WheelName::from_str(file.filename.as_str()) else { + return false; + }; - // TODO(charlie): Support URLs. Right now, we treat a URL as an unpinned dependency. - let specifiers = requirement - .version_or_url - .as_ref() - .and_then(|version_or_url| match version_or_url { - VersionOrUrl::VersionSpecifier(specifiers) => Some(specifiers), - VersionOrUrl::Url(_) => None, - }); + specifiers + .iter() + .all(|specifier| specifier.contains(&name.version)) + }) else { + continue; + }; - // Pick a version that satisfies the requirement. - let Some(file) = metadata.files.iter().rev().find(|file| { - // We only support wheels for now. - let Ok(name) = WheelName::from_str(file.filename.as_str()) else { - return false; - }; - - specifiers - .iter() - .all(|specifier| specifier.contains(&name.version)) - }) else { - continue; - }; - - // Fetch the metadata for this specific version. - let metadata = client.file(file).await?; - trace!( - "Selecting {version} for {requirement}", - version = metadata.version, - requirement = requirement - ); - - // Add to the resolved set. - let normalized_name = PackageName::normalize(&requirement.name); - resolution.insert(normalized_name, metadata.version); - - // Enqueue its dependencies. - for dependency in metadata.requires_dist { - if !dependency - .evaluate_markers(&env, requirement.extras.clone().unwrap_or_default()) - { - trace!("Ignoring {dependency} because it doesn't match the environment"); - continue; + package_sink.unbounded_send(Request::Version(requirement, file.clone()))?; } + Response::Version(metadata, requirement) => { + debug!( + "--> selected version {} for {}", + metadata.version, requirement + ); - if dependency - .extras - .as_ref() - .is_some_and(|extras| !extras.is_empty()) - { - trace!("Ignoring {dependency} because it has extras"); - continue; + // Add to the resolved set. + let normalized_name = PackageName::normalize(&requirement.name); + in_flight.remove(&normalized_name); + resolution.insert(normalized_name, metadata.version); + + // Enqueue its dependencies. + for dependency in metadata.requires_dist { + if !dependency + .evaluate_markers(&env, requirement.extras.clone().unwrap_or_default()) + { + debug!("--> ignoring {dependency} due to environment mismatch"); + continue; + } + + let normalized_name = PackageName::normalize(&dependency.name); + + if resolution.contains_key(&normalized_name) { + continue; + } + + if !in_flight.insert(normalized_name) { + continue; + } + + debug!("--> adding transitive dependency: {}", dependency); + + package_sink.unbounded_send(Request::Package(dependency))?; + } } - - let normalized_name = PackageName::normalize(&dependency.name); - if resolution.contains_key(&normalized_name) { - continue; - } - - if !in_flight.insert(normalized_name) { - continue; - } - - trace!("Enqueueing {dependency}"); - package_sink.unbounded_send(dependency)?; } } diff --git a/crates/puffin-client/Cargo.toml b/crates/puffin-client/Cargo.toml index 59eb01d79..a7bbfc38b 100644 --- a/crates/puffin-client/Cargo.toml +++ b/crates/puffin-client/Cargo.toml @@ -9,10 +9,11 @@ edition = "2021" puffin-requirements = { path = "../puffin-requirements" } http-cache-reqwest = { version = "0.11.3" } -reqwest = { version = "0.11.22", features = ["json", "gzip", "stream"] } +reqwest = { version = "0.11.22", features = ["json", "gzip", "brotli"] } reqwest-middleware = { version = "0.2.3" } reqwest-retry = { version = "0.3.0" } serde = { version = "1.0.188" } serde_json = { version = "1.0.107" } thiserror = { version = "1.0.49" } url = { version = "2.4.1" } +tracing = "0.1.37" diff --git a/crates/puffin-client/src/api.rs b/crates/puffin-client/src/api.rs index 85dcfe9a7..7c665f36e 100644 --- a/crates/puffin-client/src/api.rs +++ b/crates/puffin-client/src/api.rs @@ -1,5 +1,8 @@ +use std::fmt::Debug; + use reqwest::StatusCode; use serde::{Deserialize, Serialize}; +use tracing::trace; use url::Url; use puffin_requirements::metadata::Metadata21; @@ -13,6 +16,8 @@ impl PypiClient { &self, package_name: impl AsRef, ) -> Result { + let start = std::time::Instant::now(); + // Format the URL for PyPI. let mut url = self.registry.join("simple")?; url.path_segments_mut() @@ -21,10 +26,20 @@ impl PypiClient { url.path_segments_mut().unwrap().push(""); url.set_query(Some("format=application/vnd.pypi.simple.v1+json")); + trace!( + "fetching metadata for {} from {}", + package_name.as_ref(), + url + ); + // Fetch from the registry. - let text = self.simple_impl(package_name, &url).await?; - serde_json::from_str(&text) - .map_err(move |e| PypiClientError::from_json_err(e, url.to_string())) + let text = self.simple_impl(&package_name, &url).await?; + let payload = serde_json::from_str(&text) + .map_err(move |e| PypiClientError::from_json_err(e, "".to_string())); + + trace!("fetched metadata for {} in {:?}", url, start.elapsed()); + + payload } async fn simple_impl( @@ -35,6 +50,7 @@ impl PypiClient { Ok(self .client .get(url.clone()) + .header("Accept-Encoding", "gzip") .send() .await? .error_for_status() @@ -52,7 +68,9 @@ impl PypiClient { .await?) } - pub async fn file(&self, file: &File) -> Result { + pub async fn file(&self, file: File) -> Result { + let start = std::time::Instant::now(); + // Send to the proxy. let url = self.proxy.join( file.url @@ -60,9 +78,15 @@ impl PypiClient { .unwrap(), )?; + trace!("fetching file {} from {}", file.filename, url); + // Fetch from the registry. let text = self.file_impl(&file.filename, &url).await?; - Metadata21::parse(text.as_bytes()).map_err(std::convert::Into::into) + let payload = Metadata21::parse(text.as_bytes()).map_err(std::convert::Into::into); + + trace!("fetched file {} in {:?}", url, start.elapsed()); + + payload } async fn file_impl( @@ -91,7 +115,7 @@ impl PypiClient { } } -#[derive(Debug, Serialize, Deserialize)] +#[derive(Debug, Clone, Serialize, Deserialize)] pub struct SimpleJson { pub files: Vec, pub meta: Meta, @@ -99,7 +123,7 @@ pub struct SimpleJson { pub versions: Vec, } -#[derive(Debug, Serialize, Deserialize)] +#[derive(Debug, Clone, Serialize, Deserialize)] #[serde(rename_all = "kebab-case")] pub struct File { pub core_metadata: Metadata, @@ -113,26 +137,26 @@ pub struct File { pub yanked: Yanked, } -#[derive(Debug, Serialize, Deserialize)] +#[derive(Debug, Clone, Serialize, Deserialize)] #[serde(untagged)] pub enum Metadata { Bool(bool), Hashes(Hashes), } -#[derive(Debug, Serialize, Deserialize)] +#[derive(Debug, Clone, Serialize, Deserialize)] #[serde(untagged)] pub enum Yanked { Bool(bool), Reason(String), } -#[derive(Debug, Serialize, Deserialize)] +#[derive(Debug, Clone, Serialize, Deserialize)] pub struct Hashes { pub sha256: String, } -#[derive(Debug, Serialize, Deserialize)] +#[derive(Debug, Clone, Serialize, Deserialize)] #[serde(rename_all = "kebab-case")] pub struct Meta { #[serde(rename = "_last-serial")] diff --git a/crates/puffin-client/src/lib.rs b/crates/puffin-client/src/lib.rs index 5d783c71d..6427814d3 100644 --- a/crates/puffin-client/src/lib.rs +++ b/crates/puffin-client/src/lib.rs @@ -1,4 +1,4 @@ -pub use api::SimpleJson; +pub use api::{File, SimpleJson}; pub use client::PypiClientBuilder; mod api;