diff --git a/Cargo.lock b/Cargo.lock index f230688cf..54d26bbd7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -179,6 +179,19 @@ dependencies = [ "tempfile", ] +[[package]] +name = "async-compression" +version = "0.3.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "942c7cd7ae39e91bde4820d74132e9862e62c2f386c3aa90ccf55949f5bad63a" +dependencies = [ + "flate2", + "futures-core", + "futures-io", + "memchr", + "pin-project-lite", +] + [[package]] name = "async-compression" version = "0.4.4" @@ -204,6 +217,40 @@ dependencies = [ "syn 2.0.38", ] +[[package]] +name = "async_http_range_reader" +version = "0.3.0" +source = "git+https://github.com/baszalmstra/async_http_range_reader#4cafe5afda889d53060e0565c949d4ffd6ef3786" +dependencies = [ + "bisection", + "futures", + "http-content-range", + "itertools", + "memmap2 0.9.0", + "reqwest", + "thiserror", + "tokio", + "tokio-stream", + "tokio-util", + "tracing", +] + +[[package]] +name = "async_zip" +version = "0.0.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "795310de3218cde15219fc98c1cf7d8fe9db4865aab27fcf1d535d6cb61c6b54" +dependencies = [ + "async-compression 0.3.15", + "crc32fast", + "futures-util", + "log", + "pin-project", + "thiserror", + "tokio", + "tokio-util", +] + [[package]] name = "autocfg" version = "1.1.0" @@ -255,6 +302,12 @@ dependencies = [ "serde", ] +[[package]] +name = "bisection" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "021e079a1bab0ecce6cf4b4b74c0c37afa4a697136eb3b127875c84a8f04a8c3" + [[package]] name = "bitflags" version = "1.3.2" @@ -337,7 +390,7 @@ dependencies = [ "futures", "hex", "libc", - "memmap2", + "memmap2 0.5.10", "miette", "reflink-copy", "serde", @@ -1189,6 +1242,12 @@ dependencies = [ "time", ] +[[package]] +name = "http-content-range" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9f0d1a8ef218a86416107794b34cc446958d9203556c312bb41eab4c924c1d2e" + [[package]] name = "http-serde" version = "1.1.3" @@ -1598,6 +1657,15 @@ dependencies = [ "libc", ] +[[package]] +name = "memmap2" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "deaba38d7abf1d4cca21cc89e932e542ba2b9258664d2a9ef0e61512039c9375" +dependencies = [ + "libc", +] + [[package]] name = "memoffset" version = "0.9.0" @@ -1896,6 +1964,26 @@ dependencies = [ "indexmap 2.0.2", ] +[[package]] +name = "pin-project" +version = "1.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fda4ed1c6c173e3fc7a83629421152e01d7b1f9b7f65fb301e490e8cfc656422" +dependencies = [ + "pin-project-internal", +] + +[[package]] +name = "pin-project-internal" +version = "1.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4359fd9c9171ec6e8c62926d6faaf553a8dc3f64e1507e76da7911b4f6a04405" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.38", +] + [[package]] name = "pin-project-lite" version = "0.2.13" @@ -2167,8 +2255,15 @@ dependencies = [ name = "puffin-client" version = "0.0.1" dependencies = [ + "anyhow", + "async_http_range_reader", + "async_zip", + "distribution-filename", + "fs-err", "futures", "http-cache-reqwest", + "install-wheel-rs", + "puffin-cache", "puffin-normalize", "puffin-package", "reqwest", @@ -2176,8 +2271,10 @@ dependencies = [ "reqwest-retry", "serde", "serde_json", + "tempfile", "thiserror", "tokio", + "tokio-util", "tracing", "url", ] @@ -2190,6 +2287,7 @@ dependencies = [ "clap", "colored", "directories", + "distribution-filename", "fs-err", "futures", "gourgeist", @@ -2209,6 +2307,7 @@ dependencies = [ "tracing", "tracing-indicatif", "tracing-subscriber", + "url", "which", ] @@ -2675,7 +2774,7 @@ version = "0.11.22" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "046cd98826c46c2ac8ddecae268eb5c2e58628688a5fc7a2643704a73faba95b" dependencies = [ - "async-compression", + "async-compression 0.4.4", "base64 0.21.5", "bytes", "encoding_rs", @@ -3409,6 +3508,7 @@ dependencies = [ "futures-core", "pin-project-lite", "tokio", + "tokio-util", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index 89fcfa341..219dfbb01 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -14,6 +14,8 @@ license = "MIT OR Apache-2.0" [workspace.dependencies] anyhow = { version = "1.0.75" } +async_http_range_reader = { git = "https://github.com/baszalmstra/async_http_range_reader", ref = "4cafe5afda889d53060e0565c949d4ffd6ef3786" } +async_zip = { version = "0.0.15", features = ["tokio", "deflate"] } bitflags = { version = "2.4.0" } cacache = { version = "11.7.1", default-features = false, features = ["tokio-runtime"] } camino = { version = "1.1.6", features = ["serde1"] } diff --git a/crates/install-wheel-rs/src/lib.rs b/crates/install-wheel-rs/src/lib.rs index dfef4256b..6615d8341 100644 --- a/crates/install-wheel-rs/src/lib.rs +++ b/crates/install-wheel-rs/src/lib.rs @@ -2,6 +2,7 @@ use std::io; +use distribution_filename::WheelFilename; use platform_info::PlatformInfoError; use thiserror::Error; use zip::result::ZipError; @@ -69,3 +70,45 @@ impl Error { } } } + +/// The metadata name may be uppercase, while the wheel and dist info names are lowercase, or +/// the metadata name and the dist info name are lowercase, while the wheel name is uppercase. +/// Either way, we just search the wheel for the name +pub fn find_dist_info_metadata<'a, T: Copy>( + filename: &WheelFilename, + files: impl Iterator, +) -> Result<(T, &'a str), String> { + let dist_info_matcher = format!( + "{}-{}", + filename.distribution.as_dist_info_name(), + filename.version + ); + let metadatas: Vec<_> = files + .filter_map(|(payload, path)| { + let (dir, file) = path.split_once('/')?; + let dir = dir.strip_suffix(".dist-info")?; + if dir.to_lowercase() == dist_info_matcher && file == "METADATA" { + Some((payload, path)) + } else { + None + } + }) + .collect(); + let (payload, path) = match metadatas[..] { + [] => { + return Err("no .dist-info directory".to_string()); + } + [(payload, path)] => (payload, path), + _ => { + return Err(format!( + "multiple .dist-info directories: {}", + metadatas + .into_iter() + .map(|(_, path)| path.to_string()) + .collect::>() + .join(", ") + )); + } + }; + Ok((payload, path)) +} diff --git a/crates/puffin-client/Cargo.toml b/crates/puffin-client/Cargo.toml index 0fe743c50..bbdfa80ad 100644 --- a/crates/puffin-client/Cargo.toml +++ b/crates/puffin-client/Cargo.toml @@ -4,10 +4,16 @@ version = "0.0.1" edition = "2021" [dependencies] +distribution-filename = { path = "../distribution-filename" } +install-wheel-rs = { path = "../install-wheel-rs" } +puffin-cache = { path = "../puffin-cache" } puffin-normalize = { path = "../puffin-normalize" } puffin-package = { path = "../puffin-package" } +async_http_range_reader = { workspace = true } +async_zip = { workspace = true } futures = { workspace = true } +fs-err = { workspace = true, features = ["tokio"] } http-cache-reqwest = { workspace = true } reqwest = { workspace = true } reqwest-middleware = { workspace = true } @@ -15,6 +21,11 @@ reqwest-retry = { workspace = true } serde = { workspace = true } serde_json = { workspace = true } thiserror = { workspace = true } -tokio = { workspace = true } +tempfile = { workspace = true } +tokio = { workspace = true, features = ["fs"] } +tokio-util = { workspace = true } tracing = { workspace = true } url = { workspace = true } + +[dev-dependencies] +anyhow = { workspace = true } diff --git a/crates/puffin-client/src/client.rs b/crates/puffin-client/src/client.rs index 5a756a216..1c1429b46 100644 --- a/crates/puffin-client/src/client.rs +++ b/crates/puffin-client/src/client.rs @@ -1,20 +1,32 @@ use std::fmt::Debug; use std::path::PathBuf; +use async_http_range_reader::{ + AsyncHttpRangeReader, AsyncHttpRangeReaderError, CheckSupportMethod, +}; +use async_zip::tokio::read::seek::ZipFileReader; use futures::{AsyncRead, StreamExt, TryStreamExt}; use http_cache_reqwest::{CACacheManager, Cache, CacheMode, HttpCache, HttpCacheOptions}; -use reqwest::ClientBuilder; -use reqwest::StatusCode; +use reqwest::header::HeaderMap; +use reqwest::{header, Client, ClientBuilder, StatusCode}; use reqwest_middleware::ClientWithMiddleware; use reqwest_retry::policies::ExponentialBackoff; use reqwest_retry::RetryTransientMiddleware; -use tracing::trace; +use tempfile::tempfile; +use tokio::io::BufWriter; +use tokio_util::compat::{FuturesAsyncReadCompatExt, TokioAsyncReadCompatExt}; +use tracing::{debug, trace}; use url::Url; +use distribution_filename::WheelFilename; +use install_wheel_rs::find_dist_info_metadata; use puffin_normalize::PackageName; use puffin_package::pypi_types::{File, Metadata21, SimpleJson}; use crate::error::Error; +use crate::remote_metadata::{ + wheel_metadata_from_remote_zip, wheel_metadata_get_cached, wheel_metadata_write_cache, +}; /// A builder for an [`RegistryClient`]. #[derive(Debug, Clone)] @@ -96,10 +108,10 @@ impl RegistryClientBuilder { let mut client_builder = reqwest_middleware::ClientBuilder::new(client_raw.clone()).with(retry_strategy); - if let Some(path) = self.cache { + if let Some(path) = &self.cache { client_builder = client_builder.with(Cache(HttpCache { mode: CacheMode::Default, - manager: CACacheManager { path }, + manager: CACacheManager { path: path.clone() }, options: HttpCacheOptions::default(), })); } @@ -108,15 +120,16 @@ impl RegistryClientBuilder { let retry_strategy = RetryTransientMiddleware::new_with_policy(retry_policy); let uncached_client_builder = - reqwest_middleware::ClientBuilder::new(client_raw).with(retry_strategy); + reqwest_middleware::ClientBuilder::new(client_raw.clone()).with(retry_strategy); RegistryClient { index: self.index, extra_index: self.extra_index, no_index: self.no_index, - proxy: self.proxy, client: client_builder.build(), + client_raw, uncached_client: uncached_client_builder.build(), + cache: self.cache, } } } @@ -128,9 +141,11 @@ pub struct RegistryClient { pub(crate) extra_index: Vec, /// Ignore the package index, instead relying on local archives and caches. pub(crate) no_index: bool, - pub(crate) proxy: Url, pub(crate) client: ClientWithMiddleware, pub(crate) uncached_client: ClientWithMiddleware, + pub(crate) client_raw: Client, + /// Used for the remote wheel METADATA cache + pub(crate) cache: Option, } impl RegistryClient { @@ -184,33 +199,110 @@ impl RegistryClient { } /// Fetch the metadata from a wheel file. - pub async fn file(&self, file: File) -> Result { + pub async fn wheel_metadata( + &self, + file: File, + filename: WheelFilename, + ) -> Result { if self.no_index { return Err(Error::NoIndex(file.filename)); } - // Per PEP 658, if `data-dist-info-metadata` is available, we can request it directly; - // otherwise, send to our dedicated caching proxy. - let url = if file.data_dist_info_metadata.is_available() { - Url::parse(&format!("{}.metadata", file.url))? + // If the metadata file is available at its own url (PEP 658), download it from there + let url = Url::parse(&file.url)?; + if file.data_dist_info_metadata.is_available() { + let url = Url::parse(&format!("{}.metadata", file.url))?; + trace!("Fetching file {} from {}", file.filename, url); + let text = self.wheel_metadata_impl(&url).await.map_err(|err| { + if err.status() == Some(StatusCode::NOT_FOUND) { + Error::FileNotFound(file.filename, err) + } else { + err.into() + } + })?; + + Ok(Metadata21::parse(text.as_bytes())?) + // If we lack PEP 658 support, try using HTTP range requests to read only the + // `.dist-info/METADATA` file from the zip, and if that also fails, download the whole wheel + // into the cache and read from there } else { - self.proxy.join(file.url.parse::()?.path())? - }; - - trace!("Fetching file {} from {}", file.filename, url); - - // Fetch from the index. - let text = self.file_impl(&url).await.map_err(|err| { - if err.status() == Some(StatusCode::NOT_FOUND) { - Error::FileNotFound(file.filename, err) - } else { - err.into() - } - })?; - Metadata21::parse(text.as_bytes()).map_err(std::convert::Into::into) + self.wheel_metadata_no_index(&filename, &url).await + } } - async fn file_impl(&self, url: &Url) -> Result { + /// Get the wheel metadata if it isn't available in an index through PEP 658 + pub async fn wheel_metadata_no_index( + &self, + filename: &WheelFilename, + url: &Url, + ) -> Result { + Ok( + if let Some(cached_metadata) = + wheel_metadata_get_cached(url, self.cache.as_deref()).await + { + debug!("Cache hit for wheel metadata for {url}"); + cached_metadata + } else if let Some((mut reader, headers)) = self.range_reader(url.clone()).await? { + debug!("Using remote zip reader for wheel metadata for {url}"); + let text = wheel_metadata_from_remote_zip(filename, &mut reader).await?; + let metadata = Metadata21::parse(text.as_bytes())?; + let is_immutable = headers + .get(header::CACHE_CONTROL) + .and_then(|header| header.to_str().ok()) + .unwrap_or_default() + .split(',') + .any(|entry| entry.trim().to_lowercase() == "immutable"); + if is_immutable { + debug!("Immutable (cacheable) wheel metadata for {url}"); + wheel_metadata_write_cache(url, self.cache.as_deref(), &metadata).await?; + } + metadata + } else { + debug!("Downloading whole wheel to extract metadata from {url}"); + // TODO(konstin): Download the wheel into a cache shared with the installer instead + // Note that this branch is only hit when you're not using and the server where + // you host your wheels for some reasons doesn't support range requests + // (tbh we should probably warn here and tekk users to get a better registry because + // their current one makes resolution unnecessary slow) + let temp_download = tempfile()?; + let mut writer = BufWriter::new(tokio::fs::File::from_std(temp_download)); + let mut reader = self.stream_external(url).await?.compat(); + tokio::io::copy(&mut reader, &mut writer).await?; + let temp_download = writer.into_inner(); + + let mut reader = ZipFileReader::new(temp_download.compat()) + .await + .map_err(|err| Error::Zip(filename.clone(), err))?; + + let ((metadata_idx, _metadata_entry), _path) = find_dist_info_metadata( + filename, + reader + .file() + .entries() + .iter() + .enumerate() + .filter_map(|(idx, e)| { + Some(((idx, e), e.entry().filename().as_str().ok()?)) + }), + ) + .map_err(|err| Error::InvalidDistInfo(filename.clone(), err))?; + + // Read the contents of the METADATA file + let mut contents = Vec::new(); + reader + .reader_with_entry(metadata_idx) + .await + .map_err(|err| Error::Zip(filename.clone(), err))? + .read_to_end_checked(&mut contents) + .await + .map_err(|err| Error::Zip(filename.clone(), err))?; + + Metadata21::parse(&contents)? + }, + ) + } + + async fn wheel_metadata_impl(&self, url: &Url) -> Result { Ok(self .client .get(url.clone()) @@ -244,4 +336,23 @@ impl RegistryClient { .into_async_read(), )) } + + /// An async for individual files inside a remote zip file, if the server supports it. Returns + /// the headers of the initial request for caching. + async fn range_reader( + &self, + url: Url, + ) -> Result, Error> { + let response = AsyncHttpRangeReader::new( + self.client_raw.clone(), + url.clone(), + CheckSupportMethod::Head, + ) + .await; + match response { + Ok((reader, headers)) => Ok(Some((reader, headers))), + Err(AsyncHttpRangeReaderError::HttpRangeRequestUnsupported) => Ok(None), + Err(err) => Err(err.into()), + } + } } diff --git a/crates/puffin-client/src/error.rs b/crates/puffin-client/src/error.rs index a4bf42303..7d97b6cbe 100644 --- a/crates/puffin-client/src/error.rs +++ b/crates/puffin-client/src/error.rs @@ -1,5 +1,10 @@ +use std::io; + +use async_http_range_reader::AsyncHttpRangeReaderError; +use async_zip::error::ZipError; use thiserror::Error; +use distribution_filename::WheelFilename; use puffin_package::pypi_types; #[derive(Debug, Error)] @@ -41,6 +46,18 @@ pub enum Error { source: serde_json::Error, url: String, }, + + #[error(transparent)] + AsyncHttpRangeReader(#[from] AsyncHttpRangeReaderError), + + #[error("Expected a single .dist-info directory in {0}, found {1}")] + InvalidDistInfo(WheelFilename, String), + + #[error("The wheel {0} is not a valid zip file")] + Zip(WheelFilename, #[source] ZipError), + + #[error(transparent)] + IO(#[from] io::Error), } impl Error { diff --git a/crates/puffin-client/src/lib.rs b/crates/puffin-client/src/lib.rs index 3262b0e3e..5c2abb19b 100644 --- a/crates/puffin-client/src/lib.rs +++ b/crates/puffin-client/src/lib.rs @@ -3,3 +3,4 @@ pub use error::Error; mod client; mod error; +mod remote_metadata; diff --git a/crates/puffin-client/src/remote_metadata.rs b/crates/puffin-client/src/remote_metadata.rs new file mode 100644 index 000000000..516558eb6 --- /dev/null +++ b/crates/puffin-client/src/remote_metadata.rs @@ -0,0 +1,148 @@ +use std::io; +use std::path::Path; + +use async_http_range_reader::AsyncHttpRangeReader; +use async_zip::tokio::read::seek::ZipFileReader; +use fs_err::tokio as fs; +use tokio_util::compat::TokioAsyncReadCompatExt; +use url::Url; + +use distribution_filename::WheelFilename; +use install_wheel_rs::find_dist_info_metadata; +use puffin_cache::CanonicalUrl; +use puffin_package::pypi_types::Metadata21; + +use crate::Error; + +const WHEEL_METADATA_FROM_ZIP_CACHE: &str = "wheel-metadata-v0"; + +/// Try to read the cached METADATA previously extracted from a remote zip, if it exists +pub(crate) async fn wheel_metadata_get_cached( + url: &Url, + cache: Option<&Path>, +) -> Option { + // TODO(konstin): Actual good cache layout + let path = cache? + .join(WHEEL_METADATA_FROM_ZIP_CACHE) + .join(puffin_cache::digest(&CanonicalUrl::new(url))); + if !path.is_file() { + return None; + } + let data = fs::read(path).await.ok()?; + serde_json::from_slice(&data).ok() +} + +/// Write the cached METADATA extracted from a remote zip to the cache +pub(crate) async fn wheel_metadata_write_cache( + url: &Url, + cache: Option<&Path>, + metadata: &Metadata21, +) -> io::Result<()> { + let Some(cache) = cache else { + return Ok(()); + }; + // TODO(konstin): Actual good cache layout + let dir = cache.join(WHEEL_METADATA_FROM_ZIP_CACHE); + fs::create_dir_all(&dir).await?; + let path = dir.join(puffin_cache::digest(&CanonicalUrl::new(url))); + fs::write(path, serde_json::to_vec(metadata)?).await +} + +/// Read the `.dist-info/METADATA` file from a async remote zip reader, so we avoid downloading the +/// entire wheel just for the one file. +/// +/// This method is derived from `prefix-div/rip`, which is available under the following BSD-3 +/// Clause license: +/// +/// ```text +/// BSD 3-Clause License +/// +/// Copyright (c) 2023, prefix.dev GmbH +/// +/// Redistribution and use in source and binary forms, with or without +/// modification, are permitted provided that the following conditions are met: +/// +/// 1. Redistributions of source code must retain the above copyright notice, this +/// list of conditions and the following disclaimer. +/// +/// 2. Redistributions in binary form must reproduce the above copyright notice, +/// this list of conditions and the following disclaimer in the documentation +/// and/or other materials provided with the distribution. +/// +/// 3. Neither the name of the copyright holder nor the names of its +/// contributors may be used to endorse or promote products derived from +/// this software without specific prior written permission. +/// +/// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +/// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +/// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +/// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +/// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +/// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +/// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +/// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +/// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +/// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +/// ``` +/// +/// Additional work and modifications to the originating source are available under the +/// Apache License, Version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or ) +/// or MIT license ([LICENSE-MIT](LICENSE-MIT) or ), as per the +/// rest of the crate. +pub(crate) async fn wheel_metadata_from_remote_zip( + filename: &WheelFilename, + reader: &mut AsyncHttpRangeReader, +) -> Result { + // Make sure we have the back part of the stream. + // Best guess for the central directory size inside the zip + const CENTRAL_DIRECTORY_SIZE: u64 = 16384; + // Because the zip index is at the back + reader + .prefetch(reader.len().saturating_sub(CENTRAL_DIRECTORY_SIZE)..reader.len()) + .await; + + // Construct a zip reader to uses the stream. + let mut reader = ZipFileReader::new(reader.compat()) + .await + .map_err(|err| Error::Zip(filename.clone(), err))?; + + let ((metadata_idx, metadata_entry), _path) = find_dist_info_metadata( + filename, + reader + .file() + .entries() + .iter() + .enumerate() + .filter_map(|(idx, e)| Some(((idx, e), e.entry().filename().as_str().ok()?))), + ) + .map_err(|err| Error::InvalidDistInfo(filename.clone(), err))?; + + let offset = metadata_entry.header_offset(); + let size = metadata_entry.entry().compressed_size() + + 30 // Header size in bytes + + metadata_entry.entry().filename().as_bytes().len() as u64; + + // The zip archive uses as BufReader which reads in chunks of 8192. To ensure we prefetch + // enough data we round the size up to the nearest multiple of the buffer size. + let buffer_size = 8192; + let size = ((size + buffer_size - 1) / buffer_size) * buffer_size; + + // Fetch the bytes from the zip archive that contain the requested file. + reader + .inner_mut() + .get_mut() + .prefetch(offset..offset + size) + .await; + + // Read the contents of the METADATA file + let mut contents = String::new(); + reader + .reader_with_entry(metadata_idx) + .await + .map_err(|err| Error::Zip(filename.clone(), err))? + .read_to_string_checked(&mut contents) + .await + .map_err(|err| Error::Zip(filename.clone(), err))?; + + Ok(contents) +} diff --git a/crates/puffin-client/tests/remote_metadata.rs b/crates/puffin-client/tests/remote_metadata.rs new file mode 100644 index 000000000..ca8c57aab --- /dev/null +++ b/crates/puffin-client/tests/remote_metadata.rs @@ -0,0 +1,28 @@ +use std::str::FromStr; + +use anyhow::Result; +use tempfile::tempdir; +use url::Url; + +use distribution_filename::WheelFilename; +use puffin_client::RegistryClientBuilder; + +#[tokio::test] +async fn remote_metadata_with_and_without_cache() -> Result<()> { + let temp_cache = tempdir().unwrap(); + let client = RegistryClientBuilder::default() + .cache(Some(temp_cache.path().to_path_buf())) + .build(); + // The first run is without cache (the tempdir is empty), the second has the cache from the + // first run + for _ in 0..2 { + let url = "https://files.pythonhosted.org/packages/00/e5/f12a80907d0884e6dff9c16d0c0114d81b8cd07dc3ae54c5e962cc83037e/tqdm-4.66.1-py3-none-any.whl"; + let filename = WheelFilename::from_str(url.rsplit_once('/').unwrap().1).unwrap(); + let metadata = client + .wheel_metadata_no_index(&filename, &Url::parse(url).unwrap()) + .await + .unwrap(); + assert_eq!(metadata.summary.unwrap(), "Fast, Extensible Progress Meter"); + } + Ok(()) +} diff --git a/crates/puffin-dev/Cargo.toml b/crates/puffin-dev/Cargo.toml index 1bbd85053..d10f76561 100644 --- a/crates/puffin-dev/Cargo.toml +++ b/crates/puffin-dev/Cargo.toml @@ -11,6 +11,7 @@ authors = { workspace = true } license = { workspace = true } [dependencies] +distribution-filename = { path = "../distribution-filename" } gourgeist = { path = "../gourgeist" } pep508_rs = { path = "../pep508-rs" } platform-host = { path = "../platform-host" } @@ -36,3 +37,4 @@ tracing = { workspace = true } tracing-indicatif = { workspace = true } tracing-subscriber = { workspace = true } which = { workspace = true } +url = { workspace = true } diff --git a/crates/puffin-dev/src/main.rs b/crates/puffin-dev/src/main.rs index eb63e7b91..dafe745e8 100644 --- a/crates/puffin-dev/src/main.rs +++ b/crates/puffin-dev/src/main.rs @@ -16,10 +16,12 @@ use resolve_many::ResolveManyArgs; use crate::build::{build, BuildArgs}; use crate::resolve_cli::ResolveCliArgs; +use crate::wheel_metadata::WheelMetadataArgs; mod build; mod resolve_cli; mod resolve_many; +mod wheel_metadata; #[derive(Parser)] enum Cli { @@ -34,6 +36,7 @@ enum Cli { ResolveMany(ResolveManyArgs), /// Resolve requirements passed on the CLI ResolveCli(ResolveCliArgs), + WheelMetadata(WheelMetadataArgs), } async fn run() -> Result<()> { @@ -49,6 +52,7 @@ async fn run() -> Result<()> { Cli::ResolveCli(args) => { resolve_cli::resolve_cli(args).await?; } + Cli::WheelMetadata(args) => wheel_metadata::wheel_metadata(args).await?, } Ok(()) } diff --git a/crates/puffin-dev/src/wheel_metadata.rs b/crates/puffin-dev/src/wheel_metadata.rs new file mode 100644 index 000000000..7b998fb5e --- /dev/null +++ b/crates/puffin-dev/src/wheel_metadata.rs @@ -0,0 +1,44 @@ +use std::path::PathBuf; +use std::str::FromStr; + +use clap::Parser; +use directories::ProjectDirs; +use url::Url; + +use distribution_filename::WheelFilename; +use puffin_client::RegistryClientBuilder; + +#[derive(Parser)] +pub(crate) struct WheelMetadataArgs { + url: Url, + /// Avoid reading from or writing to the cache. + #[arg(global = true, long, short)] + no_cache: bool, + /// Path to the cache directory. + #[arg(global = true, long, env = "PUFFIN_CACHE_DIR")] + cache_dir: Option, +} + +pub(crate) async fn wheel_metadata(args: WheelMetadataArgs) -> anyhow::Result<()> { + let project_dirs = ProjectDirs::from("", "", "puffin"); + let cache_dir = (!args.no_cache) + .then(|| { + args.cache_dir + .as_deref() + .or_else(|| project_dirs.as_ref().map(ProjectDirs::cache_dir)) + }) + .flatten(); + let client = RegistryClientBuilder::default().cache(cache_dir).build(); + + let filename = WheelFilename::from_str( + args.url + .path() + .rsplit_once('/') + .unwrap_or(("", args.url.path())) + .1, + )?; + + let metadata = client.wheel_metadata_no_index(&filename, &args.url).await?; + println!("{metadata:?}"); + Ok(()) +} diff --git a/crates/puffin-resolver/src/file.rs b/crates/puffin-resolver/src/file.rs index ccad6e18a..ca8a11dff 100644 --- a/crates/puffin-resolver/src/file.rs +++ b/crates/puffin-resolver/src/file.rs @@ -1,13 +1,14 @@ +use distribution_filename::{SourceDistributionFilename, WheelFilename}; use std::ops::Deref; use puffin_package::pypi_types::File; /// A distribution can either be a wheel or a source distribution. #[derive(Debug, Clone)] -pub(crate) struct WheelFile(File); +pub(crate) struct WheelFile(pub(crate) File, pub(crate) WheelFilename); #[derive(Debug, Clone)] -pub(crate) struct SdistFile(File); +pub(crate) struct SdistFile(pub(crate) File, pub(crate) SourceDistributionFilename); #[derive(Debug, Clone)] pub(crate) enum DistributionFile { @@ -31,18 +32,6 @@ impl Deref for SdistFile { } } -impl From for WheelFile { - fn from(file: File) -> Self { - Self(file) - } -} - -impl From for SdistFile { - fn from(file: File) -> Self { - Self(file) - } -} - impl From for File { fn from(wheel: WheelFile) -> Self { wheel.0 @@ -67,19 +56,6 @@ impl From for DistributionFile { } } -impl From for DistributionFile { - fn from(file: File) -> Self { - if std::path::Path::new(file.filename.as_str()) - .extension() - .map_or(false, |ext| ext.eq_ignore_ascii_case("whl")) - { - Self::Wheel(WheelFile::from(file)) - } else { - Self::Sdist(SdistFile::from(file)) - } - } -} - impl DistributionFile { pub(crate) fn filename(&self) -> &str { match self { diff --git a/crates/puffin-resolver/src/finder.rs b/crates/puffin-resolver/src/finder.rs index 4375f2392..384c63e6a 100644 --- a/crates/puffin-resolver/src/finder.rs +++ b/crates/puffin-resolver/src/finder.rs @@ -157,6 +157,7 @@ impl<'a> DistributionFinder<'a> { } #[derive(Debug)] +#[allow(clippy::large_enum_variant)] enum Request { /// A request to fetch the metadata for a package. Package(Requirement), diff --git a/crates/puffin-resolver/src/resolver.rs b/crates/puffin-resolver/src/resolver.rs index b77ef9055..09668aa76 100644 --- a/crates/puffin-resolver/src/resolver.rs +++ b/crates/puffin-resolver/src/resolver.rs @@ -548,31 +548,33 @@ impl<'a, Context: BuildContext + Sync> Resolver<'a, Context> { // distributions. let mut version_map: VersionMap = BTreeMap::new(); for file in metadata.files { - if let Ok(name) = WheelFilename::from_str(file.filename.as_str()) { - if name.is_compatible(self.tags) { - let version = PubGrubVersion::from(name.version); + if let Ok(filename) = WheelFilename::from_str(file.filename.as_str()) { + if filename.is_compatible(self.tags) { + let version = PubGrubVersion::from(filename.version.clone()); match version_map.entry(version) { std::collections::btree_map::Entry::Occupied(mut entry) => { if matches!(entry.get(), DistributionFile::Sdist(_)) { // Wheels get precedence over source distributions. - entry.insert(DistributionFile::from(WheelFile::from( - file, + entry.insert(DistributionFile::from(WheelFile( + file, filename, ))); } } std::collections::btree_map::Entry::Vacant(entry) => { - entry.insert(DistributionFile::from(WheelFile::from(file))); + entry.insert(DistributionFile::from(WheelFile( + file, filename, + ))); } } } - } else if let Ok(name) = + } else if let Ok(filename) = SourceDistributionFilename::parse(file.filename.as_str(), &package_name) { - let version = PubGrubVersion::from(name.version); + let version = PubGrubVersion::from(filename.version.clone()); if let std::collections::btree_map::Entry::Vacant(entry) = version_map.entry(version) { - entry.insert(DistributionFile::from(SdistFile::from(file))); + entry.insert(DistributionFile::from(SdistFile(file, filename))); } } } @@ -627,7 +629,7 @@ impl<'a, Context: BuildContext + Sync> Resolver<'a, Context> { Request::Wheel(package_name, file) => { let metadata = self .client - .file(file.clone().into()) + .wheel_metadata(file.0.clone(), file.1.clone()) .map_err(ResolveError::Client) .await?;