Fetch wheel metadata by async range requests on the remote wheel (#301)

Use range requests and async zip to extract the METADATA file from a
remote wheel.

We currently only cache when the remote says the remote declares the
resource as immutable, see
https://github.com/06chaynes/http-cache/issues/57 and
https://github.com/baszalmstra/async_http_range_reader/pull/1 . The
cache is stored as json with the description omitted, this improve cache
deserialization performance.
This commit is contained in:
konsti 2023-11-06 15:06:49 +01:00 committed by GitHub
parent 6f83a44fea
commit b2439b24a1
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
15 changed files with 558 additions and 68 deletions

104
Cargo.lock generated
View file

@ -179,6 +179,19 @@ dependencies = [
"tempfile",
]
[[package]]
name = "async-compression"
version = "0.3.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "942c7cd7ae39e91bde4820d74132e9862e62c2f386c3aa90ccf55949f5bad63a"
dependencies = [
"flate2",
"futures-core",
"futures-io",
"memchr",
"pin-project-lite",
]
[[package]]
name = "async-compression"
version = "0.4.4"
@ -204,6 +217,40 @@ dependencies = [
"syn 2.0.38",
]
[[package]]
name = "async_http_range_reader"
version = "0.3.0"
source = "git+https://github.com/baszalmstra/async_http_range_reader#4cafe5afda889d53060e0565c949d4ffd6ef3786"
dependencies = [
"bisection",
"futures",
"http-content-range",
"itertools",
"memmap2 0.9.0",
"reqwest",
"thiserror",
"tokio",
"tokio-stream",
"tokio-util",
"tracing",
]
[[package]]
name = "async_zip"
version = "0.0.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "795310de3218cde15219fc98c1cf7d8fe9db4865aab27fcf1d535d6cb61c6b54"
dependencies = [
"async-compression 0.3.15",
"crc32fast",
"futures-util",
"log",
"pin-project",
"thiserror",
"tokio",
"tokio-util",
]
[[package]]
name = "autocfg"
version = "1.1.0"
@ -255,6 +302,12 @@ dependencies = [
"serde",
]
[[package]]
name = "bisection"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "021e079a1bab0ecce6cf4b4b74c0c37afa4a697136eb3b127875c84a8f04a8c3"
[[package]]
name = "bitflags"
version = "1.3.2"
@ -337,7 +390,7 @@ dependencies = [
"futures",
"hex",
"libc",
"memmap2",
"memmap2 0.5.10",
"miette",
"reflink-copy",
"serde",
@ -1189,6 +1242,12 @@ dependencies = [
"time",
]
[[package]]
name = "http-content-range"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9f0d1a8ef218a86416107794b34cc446958d9203556c312bb41eab4c924c1d2e"
[[package]]
name = "http-serde"
version = "1.1.3"
@ -1598,6 +1657,15 @@ dependencies = [
"libc",
]
[[package]]
name = "memmap2"
version = "0.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "deaba38d7abf1d4cca21cc89e932e542ba2b9258664d2a9ef0e61512039c9375"
dependencies = [
"libc",
]
[[package]]
name = "memoffset"
version = "0.9.0"
@ -1896,6 +1964,26 @@ dependencies = [
"indexmap 2.0.2",
]
[[package]]
name = "pin-project"
version = "1.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fda4ed1c6c173e3fc7a83629421152e01d7b1f9b7f65fb301e490e8cfc656422"
dependencies = [
"pin-project-internal",
]
[[package]]
name = "pin-project-internal"
version = "1.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4359fd9c9171ec6e8c62926d6faaf553a8dc3f64e1507e76da7911b4f6a04405"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.38",
]
[[package]]
name = "pin-project-lite"
version = "0.2.13"
@ -2167,8 +2255,15 @@ dependencies = [
name = "puffin-client"
version = "0.0.1"
dependencies = [
"anyhow",
"async_http_range_reader",
"async_zip",
"distribution-filename",
"fs-err",
"futures",
"http-cache-reqwest",
"install-wheel-rs",
"puffin-cache",
"puffin-normalize",
"puffin-package",
"reqwest",
@ -2176,8 +2271,10 @@ dependencies = [
"reqwest-retry",
"serde",
"serde_json",
"tempfile",
"thiserror",
"tokio",
"tokio-util",
"tracing",
"url",
]
@ -2190,6 +2287,7 @@ dependencies = [
"clap",
"colored",
"directories",
"distribution-filename",
"fs-err",
"futures",
"gourgeist",
@ -2209,6 +2307,7 @@ dependencies = [
"tracing",
"tracing-indicatif",
"tracing-subscriber",
"url",
"which",
]
@ -2675,7 +2774,7 @@ version = "0.11.22"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "046cd98826c46c2ac8ddecae268eb5c2e58628688a5fc7a2643704a73faba95b"
dependencies = [
"async-compression",
"async-compression 0.4.4",
"base64 0.21.5",
"bytes",
"encoding_rs",
@ -3409,6 +3508,7 @@ dependencies = [
"futures-core",
"pin-project-lite",
"tokio",
"tokio-util",
]
[[package]]

View file

@ -14,6 +14,8 @@ license = "MIT OR Apache-2.0"
[workspace.dependencies]
anyhow = { version = "1.0.75" }
async_http_range_reader = { git = "https://github.com/baszalmstra/async_http_range_reader", ref = "4cafe5afda889d53060e0565c949d4ffd6ef3786" }
async_zip = { version = "0.0.15", features = ["tokio", "deflate"] }
bitflags = { version = "2.4.0" }
cacache = { version = "11.7.1", default-features = false, features = ["tokio-runtime"] }
camino = { version = "1.1.6", features = ["serde1"] }

View file

@ -2,6 +2,7 @@
use std::io;
use distribution_filename::WheelFilename;
use platform_info::PlatformInfoError;
use thiserror::Error;
use zip::result::ZipError;
@ -69,3 +70,45 @@ impl Error {
}
}
}
/// The metadata name may be uppercase, while the wheel and dist info names are lowercase, or
/// the metadata name and the dist info name are lowercase, while the wheel name is uppercase.
/// Either way, we just search the wheel for the name
pub fn find_dist_info_metadata<'a, T: Copy>(
filename: &WheelFilename,
files: impl Iterator<Item = (T, &'a str)>,
) -> Result<(T, &'a str), String> {
let dist_info_matcher = format!(
"{}-{}",
filename.distribution.as_dist_info_name(),
filename.version
);
let metadatas: Vec<_> = files
.filter_map(|(payload, path)| {
let (dir, file) = path.split_once('/')?;
let dir = dir.strip_suffix(".dist-info")?;
if dir.to_lowercase() == dist_info_matcher && file == "METADATA" {
Some((payload, path))
} else {
None
}
})
.collect();
let (payload, path) = match metadatas[..] {
[] => {
return Err("no .dist-info directory".to_string());
}
[(payload, path)] => (payload, path),
_ => {
return Err(format!(
"multiple .dist-info directories: {}",
metadatas
.into_iter()
.map(|(_, path)| path.to_string())
.collect::<Vec<_>>()
.join(", ")
));
}
};
Ok((payload, path))
}

View file

@ -4,10 +4,16 @@ version = "0.0.1"
edition = "2021"
[dependencies]
distribution-filename = { path = "../distribution-filename" }
install-wheel-rs = { path = "../install-wheel-rs" }
puffin-cache = { path = "../puffin-cache" }
puffin-normalize = { path = "../puffin-normalize" }
puffin-package = { path = "../puffin-package" }
async_http_range_reader = { workspace = true }
async_zip = { workspace = true }
futures = { workspace = true }
fs-err = { workspace = true, features = ["tokio"] }
http-cache-reqwest = { workspace = true }
reqwest = { workspace = true }
reqwest-middleware = { workspace = true }
@ -15,6 +21,11 @@ reqwest-retry = { workspace = true }
serde = { workspace = true }
serde_json = { workspace = true }
thiserror = { workspace = true }
tokio = { workspace = true }
tempfile = { workspace = true }
tokio = { workspace = true, features = ["fs"] }
tokio-util = { workspace = true }
tracing = { workspace = true }
url = { workspace = true }
[dev-dependencies]
anyhow = { workspace = true }

View file

@ -1,20 +1,32 @@
use std::fmt::Debug;
use std::path::PathBuf;
use async_http_range_reader::{
AsyncHttpRangeReader, AsyncHttpRangeReaderError, CheckSupportMethod,
};
use async_zip::tokio::read::seek::ZipFileReader;
use futures::{AsyncRead, StreamExt, TryStreamExt};
use http_cache_reqwest::{CACacheManager, Cache, CacheMode, HttpCache, HttpCacheOptions};
use reqwest::ClientBuilder;
use reqwest::StatusCode;
use reqwest::header::HeaderMap;
use reqwest::{header, Client, ClientBuilder, StatusCode};
use reqwest_middleware::ClientWithMiddleware;
use reqwest_retry::policies::ExponentialBackoff;
use reqwest_retry::RetryTransientMiddleware;
use tracing::trace;
use tempfile::tempfile;
use tokio::io::BufWriter;
use tokio_util::compat::{FuturesAsyncReadCompatExt, TokioAsyncReadCompatExt};
use tracing::{debug, trace};
use url::Url;
use distribution_filename::WheelFilename;
use install_wheel_rs::find_dist_info_metadata;
use puffin_normalize::PackageName;
use puffin_package::pypi_types::{File, Metadata21, SimpleJson};
use crate::error::Error;
use crate::remote_metadata::{
wheel_metadata_from_remote_zip, wheel_metadata_get_cached, wheel_metadata_write_cache,
};
/// A builder for an [`RegistryClient`].
#[derive(Debug, Clone)]
@ -96,10 +108,10 @@ impl RegistryClientBuilder {
let mut client_builder =
reqwest_middleware::ClientBuilder::new(client_raw.clone()).with(retry_strategy);
if let Some(path) = self.cache {
if let Some(path) = &self.cache {
client_builder = client_builder.with(Cache(HttpCache {
mode: CacheMode::Default,
manager: CACacheManager { path },
manager: CACacheManager { path: path.clone() },
options: HttpCacheOptions::default(),
}));
}
@ -108,15 +120,16 @@ impl RegistryClientBuilder {
let retry_strategy = RetryTransientMiddleware::new_with_policy(retry_policy);
let uncached_client_builder =
reqwest_middleware::ClientBuilder::new(client_raw).with(retry_strategy);
reqwest_middleware::ClientBuilder::new(client_raw.clone()).with(retry_strategy);
RegistryClient {
index: self.index,
extra_index: self.extra_index,
no_index: self.no_index,
proxy: self.proxy,
client: client_builder.build(),
client_raw,
uncached_client: uncached_client_builder.build(),
cache: self.cache,
}
}
}
@ -128,9 +141,11 @@ pub struct RegistryClient {
pub(crate) extra_index: Vec<Url>,
/// Ignore the package index, instead relying on local archives and caches.
pub(crate) no_index: bool,
pub(crate) proxy: Url,
pub(crate) client: ClientWithMiddleware,
pub(crate) uncached_client: ClientWithMiddleware,
pub(crate) client_raw: Client,
/// Used for the remote wheel METADATA cache
pub(crate) cache: Option<PathBuf>,
}
impl RegistryClient {
@ -184,33 +199,110 @@ impl RegistryClient {
}
/// Fetch the metadata from a wheel file.
pub async fn file(&self, file: File) -> Result<Metadata21, Error> {
pub async fn wheel_metadata(
&self,
file: File,
filename: WheelFilename,
) -> Result<Metadata21, Error> {
if self.no_index {
return Err(Error::NoIndex(file.filename));
}
// Per PEP 658, if `data-dist-info-metadata` is available, we can request it directly;
// otherwise, send to our dedicated caching proxy.
let url = if file.data_dist_info_metadata.is_available() {
Url::parse(&format!("{}.metadata", file.url))?
// If the metadata file is available at its own url (PEP 658), download it from there
let url = Url::parse(&file.url)?;
if file.data_dist_info_metadata.is_available() {
let url = Url::parse(&format!("{}.metadata", file.url))?;
trace!("Fetching file {} from {}", file.filename, url);
let text = self.wheel_metadata_impl(&url).await.map_err(|err| {
if err.status() == Some(StatusCode::NOT_FOUND) {
Error::FileNotFound(file.filename, err)
} else {
err.into()
}
})?;
Ok(Metadata21::parse(text.as_bytes())?)
// If we lack PEP 658 support, try using HTTP range requests to read only the
// `.dist-info/METADATA` file from the zip, and if that also fails, download the whole wheel
// into the cache and read from there
} else {
self.proxy.join(file.url.parse::<Url>()?.path())?
};
trace!("Fetching file {} from {}", file.filename, url);
// Fetch from the index.
let text = self.file_impl(&url).await.map_err(|err| {
if err.status() == Some(StatusCode::NOT_FOUND) {
Error::FileNotFound(file.filename, err)
} else {
err.into()
}
})?;
Metadata21::parse(text.as_bytes()).map_err(std::convert::Into::into)
self.wheel_metadata_no_index(&filename, &url).await
}
}
async fn file_impl(&self, url: &Url) -> Result<String, reqwest_middleware::Error> {
/// Get the wheel metadata if it isn't available in an index through PEP 658
pub async fn wheel_metadata_no_index(
&self,
filename: &WheelFilename,
url: &Url,
) -> Result<Metadata21, Error> {
Ok(
if let Some(cached_metadata) =
wheel_metadata_get_cached(url, self.cache.as_deref()).await
{
debug!("Cache hit for wheel metadata for {url}");
cached_metadata
} else if let Some((mut reader, headers)) = self.range_reader(url.clone()).await? {
debug!("Using remote zip reader for wheel metadata for {url}");
let text = wheel_metadata_from_remote_zip(filename, &mut reader).await?;
let metadata = Metadata21::parse(text.as_bytes())?;
let is_immutable = headers
.get(header::CACHE_CONTROL)
.and_then(|header| header.to_str().ok())
.unwrap_or_default()
.split(',')
.any(|entry| entry.trim().to_lowercase() == "immutable");
if is_immutable {
debug!("Immutable (cacheable) wheel metadata for {url}");
wheel_metadata_write_cache(url, self.cache.as_deref(), &metadata).await?;
}
metadata
} else {
debug!("Downloading whole wheel to extract metadata from {url}");
// TODO(konstin): Download the wheel into a cache shared with the installer instead
// Note that this branch is only hit when you're not using and the server where
// you host your wheels for some reasons doesn't support range requests
// (tbh we should probably warn here and tekk users to get a better registry because
// their current one makes resolution unnecessary slow)
let temp_download = tempfile()?;
let mut writer = BufWriter::new(tokio::fs::File::from_std(temp_download));
let mut reader = self.stream_external(url).await?.compat();
tokio::io::copy(&mut reader, &mut writer).await?;
let temp_download = writer.into_inner();
let mut reader = ZipFileReader::new(temp_download.compat())
.await
.map_err(|err| Error::Zip(filename.clone(), err))?;
let ((metadata_idx, _metadata_entry), _path) = find_dist_info_metadata(
filename,
reader
.file()
.entries()
.iter()
.enumerate()
.filter_map(|(idx, e)| {
Some(((idx, e), e.entry().filename().as_str().ok()?))
}),
)
.map_err(|err| Error::InvalidDistInfo(filename.clone(), err))?;
// Read the contents of the METADATA file
let mut contents = Vec::new();
reader
.reader_with_entry(metadata_idx)
.await
.map_err(|err| Error::Zip(filename.clone(), err))?
.read_to_end_checked(&mut contents)
.await
.map_err(|err| Error::Zip(filename.clone(), err))?;
Metadata21::parse(&contents)?
},
)
}
async fn wheel_metadata_impl(&self, url: &Url) -> Result<String, reqwest_middleware::Error> {
Ok(self
.client
.get(url.clone())
@ -244,4 +336,23 @@ impl RegistryClient {
.into_async_read(),
))
}
/// An async for individual files inside a remote zip file, if the server supports it. Returns
/// the headers of the initial request for caching.
async fn range_reader(
&self,
url: Url,
) -> Result<Option<(AsyncHttpRangeReader, HeaderMap)>, Error> {
let response = AsyncHttpRangeReader::new(
self.client_raw.clone(),
url.clone(),
CheckSupportMethod::Head,
)
.await;
match response {
Ok((reader, headers)) => Ok(Some((reader, headers))),
Err(AsyncHttpRangeReaderError::HttpRangeRequestUnsupported) => Ok(None),
Err(err) => Err(err.into()),
}
}
}

View file

@ -1,5 +1,10 @@
use std::io;
use async_http_range_reader::AsyncHttpRangeReaderError;
use async_zip::error::ZipError;
use thiserror::Error;
use distribution_filename::WheelFilename;
use puffin_package::pypi_types;
#[derive(Debug, Error)]
@ -41,6 +46,18 @@ pub enum Error {
source: serde_json::Error,
url: String,
},
#[error(transparent)]
AsyncHttpRangeReader(#[from] AsyncHttpRangeReaderError),
#[error("Expected a single .dist-info directory in {0}, found {1}")]
InvalidDistInfo(WheelFilename, String),
#[error("The wheel {0} is not a valid zip file")]
Zip(WheelFilename, #[source] ZipError),
#[error(transparent)]
IO(#[from] io::Error),
}
impl Error {

View file

@ -3,3 +3,4 @@ pub use error::Error;
mod client;
mod error;
mod remote_metadata;

View file

@ -0,0 +1,148 @@
use std::io;
use std::path::Path;
use async_http_range_reader::AsyncHttpRangeReader;
use async_zip::tokio::read::seek::ZipFileReader;
use fs_err::tokio as fs;
use tokio_util::compat::TokioAsyncReadCompatExt;
use url::Url;
use distribution_filename::WheelFilename;
use install_wheel_rs::find_dist_info_metadata;
use puffin_cache::CanonicalUrl;
use puffin_package::pypi_types::Metadata21;
use crate::Error;
const WHEEL_METADATA_FROM_ZIP_CACHE: &str = "wheel-metadata-v0";
/// Try to read the cached METADATA previously extracted from a remote zip, if it exists
pub(crate) async fn wheel_metadata_get_cached(
url: &Url,
cache: Option<&Path>,
) -> Option<Metadata21> {
// TODO(konstin): Actual good cache layout
let path = cache?
.join(WHEEL_METADATA_FROM_ZIP_CACHE)
.join(puffin_cache::digest(&CanonicalUrl::new(url)));
if !path.is_file() {
return None;
}
let data = fs::read(path).await.ok()?;
serde_json::from_slice(&data).ok()
}
/// Write the cached METADATA extracted from a remote zip to the cache
pub(crate) async fn wheel_metadata_write_cache(
url: &Url,
cache: Option<&Path>,
metadata: &Metadata21,
) -> io::Result<()> {
let Some(cache) = cache else {
return Ok(());
};
// TODO(konstin): Actual good cache layout
let dir = cache.join(WHEEL_METADATA_FROM_ZIP_CACHE);
fs::create_dir_all(&dir).await?;
let path = dir.join(puffin_cache::digest(&CanonicalUrl::new(url)));
fs::write(path, serde_json::to_vec(metadata)?).await
}
/// Read the `.dist-info/METADATA` file from a async remote zip reader, so we avoid downloading the
/// entire wheel just for the one file.
///
/// This method is derived from `prefix-div/rip`, which is available under the following BSD-3
/// Clause license:
///
/// ```text
/// BSD 3-Clause License
///
/// Copyright (c) 2023, prefix.dev GmbH
///
/// Redistribution and use in source and binary forms, with or without
/// modification, are permitted provided that the following conditions are met:
///
/// 1. Redistributions of source code must retain the above copyright notice, this
/// list of conditions and the following disclaimer.
///
/// 2. Redistributions in binary form must reproduce the above copyright notice,
/// this list of conditions and the following disclaimer in the documentation
/// and/or other materials provided with the distribution.
///
/// 3. Neither the name of the copyright holder nor the names of its
/// contributors may be used to endorse or promote products derived from
/// this software without specific prior written permission.
///
/// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
/// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
/// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
/// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
/// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
/// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
/// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
/// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
/// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
/// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
/// ```
///
/// Additional work and modifications to the originating source are available under the
/// Apache License, Version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or <https://www.apache.org/licenses/LICENSE-2.0>)
/// or MIT license ([LICENSE-MIT](LICENSE-MIT) or <https://opensource.org/licenses/MIT>), as per the
/// rest of the crate.
pub(crate) async fn wheel_metadata_from_remote_zip(
filename: &WheelFilename,
reader: &mut AsyncHttpRangeReader,
) -> Result<String, Error> {
// Make sure we have the back part of the stream.
// Best guess for the central directory size inside the zip
const CENTRAL_DIRECTORY_SIZE: u64 = 16384;
// Because the zip index is at the back
reader
.prefetch(reader.len().saturating_sub(CENTRAL_DIRECTORY_SIZE)..reader.len())
.await;
// Construct a zip reader to uses the stream.
let mut reader = ZipFileReader::new(reader.compat())
.await
.map_err(|err| Error::Zip(filename.clone(), err))?;
let ((metadata_idx, metadata_entry), _path) = find_dist_info_metadata(
filename,
reader
.file()
.entries()
.iter()
.enumerate()
.filter_map(|(idx, e)| Some(((idx, e), e.entry().filename().as_str().ok()?))),
)
.map_err(|err| Error::InvalidDistInfo(filename.clone(), err))?;
let offset = metadata_entry.header_offset();
let size = metadata_entry.entry().compressed_size()
+ 30 // Header size in bytes
+ metadata_entry.entry().filename().as_bytes().len() as u64;
// The zip archive uses as BufReader which reads in chunks of 8192. To ensure we prefetch
// enough data we round the size up to the nearest multiple of the buffer size.
let buffer_size = 8192;
let size = ((size + buffer_size - 1) / buffer_size) * buffer_size;
// Fetch the bytes from the zip archive that contain the requested file.
reader
.inner_mut()
.get_mut()
.prefetch(offset..offset + size)
.await;
// Read the contents of the METADATA file
let mut contents = String::new();
reader
.reader_with_entry(metadata_idx)
.await
.map_err(|err| Error::Zip(filename.clone(), err))?
.read_to_string_checked(&mut contents)
.await
.map_err(|err| Error::Zip(filename.clone(), err))?;
Ok(contents)
}

View file

@ -0,0 +1,28 @@
use std::str::FromStr;
use anyhow::Result;
use tempfile::tempdir;
use url::Url;
use distribution_filename::WheelFilename;
use puffin_client::RegistryClientBuilder;
#[tokio::test]
async fn remote_metadata_with_and_without_cache() -> Result<()> {
let temp_cache = tempdir().unwrap();
let client = RegistryClientBuilder::default()
.cache(Some(temp_cache.path().to_path_buf()))
.build();
// The first run is without cache (the tempdir is empty), the second has the cache from the
// first run
for _ in 0..2 {
let url = "https://files.pythonhosted.org/packages/00/e5/f12a80907d0884e6dff9c16d0c0114d81b8cd07dc3ae54c5e962cc83037e/tqdm-4.66.1-py3-none-any.whl";
let filename = WheelFilename::from_str(url.rsplit_once('/').unwrap().1).unwrap();
let metadata = client
.wheel_metadata_no_index(&filename, &Url::parse(url).unwrap())
.await
.unwrap();
assert_eq!(metadata.summary.unwrap(), "Fast, Extensible Progress Meter");
}
Ok(())
}

View file

@ -11,6 +11,7 @@ authors = { workspace = true }
license = { workspace = true }
[dependencies]
distribution-filename = { path = "../distribution-filename" }
gourgeist = { path = "../gourgeist" }
pep508_rs = { path = "../pep508-rs" }
platform-host = { path = "../platform-host" }
@ -36,3 +37,4 @@ tracing = { workspace = true }
tracing-indicatif = { workspace = true }
tracing-subscriber = { workspace = true }
which = { workspace = true }
url = { workspace = true }

View file

@ -16,10 +16,12 @@ use resolve_many::ResolveManyArgs;
use crate::build::{build, BuildArgs};
use crate::resolve_cli::ResolveCliArgs;
use crate::wheel_metadata::WheelMetadataArgs;
mod build;
mod resolve_cli;
mod resolve_many;
mod wheel_metadata;
#[derive(Parser)]
enum Cli {
@ -34,6 +36,7 @@ enum Cli {
ResolveMany(ResolveManyArgs),
/// Resolve requirements passed on the CLI
ResolveCli(ResolveCliArgs),
WheelMetadata(WheelMetadataArgs),
}
async fn run() -> Result<()> {
@ -49,6 +52,7 @@ async fn run() -> Result<()> {
Cli::ResolveCli(args) => {
resolve_cli::resolve_cli(args).await?;
}
Cli::WheelMetadata(args) => wheel_metadata::wheel_metadata(args).await?,
}
Ok(())
}

View file

@ -0,0 +1,44 @@
use std::path::PathBuf;
use std::str::FromStr;
use clap::Parser;
use directories::ProjectDirs;
use url::Url;
use distribution_filename::WheelFilename;
use puffin_client::RegistryClientBuilder;
#[derive(Parser)]
pub(crate) struct WheelMetadataArgs {
url: Url,
/// Avoid reading from or writing to the cache.
#[arg(global = true, long, short)]
no_cache: bool,
/// Path to the cache directory.
#[arg(global = true, long, env = "PUFFIN_CACHE_DIR")]
cache_dir: Option<PathBuf>,
}
pub(crate) async fn wheel_metadata(args: WheelMetadataArgs) -> anyhow::Result<()> {
let project_dirs = ProjectDirs::from("", "", "puffin");
let cache_dir = (!args.no_cache)
.then(|| {
args.cache_dir
.as_deref()
.or_else(|| project_dirs.as_ref().map(ProjectDirs::cache_dir))
})
.flatten();
let client = RegistryClientBuilder::default().cache(cache_dir).build();
let filename = WheelFilename::from_str(
args.url
.path()
.rsplit_once('/')
.unwrap_or(("", args.url.path()))
.1,
)?;
let metadata = client.wheel_metadata_no_index(&filename, &args.url).await?;
println!("{metadata:?}");
Ok(())
}

View file

@ -1,13 +1,14 @@
use distribution_filename::{SourceDistributionFilename, WheelFilename};
use std::ops::Deref;
use puffin_package::pypi_types::File;
/// A distribution can either be a wheel or a source distribution.
#[derive(Debug, Clone)]
pub(crate) struct WheelFile(File);
pub(crate) struct WheelFile(pub(crate) File, pub(crate) WheelFilename);
#[derive(Debug, Clone)]
pub(crate) struct SdistFile(File);
pub(crate) struct SdistFile(pub(crate) File, pub(crate) SourceDistributionFilename);
#[derive(Debug, Clone)]
pub(crate) enum DistributionFile {
@ -31,18 +32,6 @@ impl Deref for SdistFile {
}
}
impl From<File> for WheelFile {
fn from(file: File) -> Self {
Self(file)
}
}
impl From<File> for SdistFile {
fn from(file: File) -> Self {
Self(file)
}
}
impl From<WheelFile> for File {
fn from(wheel: WheelFile) -> Self {
wheel.0
@ -67,19 +56,6 @@ impl From<SdistFile> for DistributionFile {
}
}
impl From<File> for DistributionFile {
fn from(file: File) -> Self {
if std::path::Path::new(file.filename.as_str())
.extension()
.map_or(false, |ext| ext.eq_ignore_ascii_case("whl"))
{
Self::Wheel(WheelFile::from(file))
} else {
Self::Sdist(SdistFile::from(file))
}
}
}
impl DistributionFile {
pub(crate) fn filename(&self) -> &str {
match self {

View file

@ -157,6 +157,7 @@ impl<'a> DistributionFinder<'a> {
}
#[derive(Debug)]
#[allow(clippy::large_enum_variant)]
enum Request {
/// A request to fetch the metadata for a package.
Package(Requirement),

View file

@ -548,31 +548,33 @@ impl<'a, Context: BuildContext + Sync> Resolver<'a, Context> {
// distributions.
let mut version_map: VersionMap = BTreeMap::new();
for file in metadata.files {
if let Ok(name) = WheelFilename::from_str(file.filename.as_str()) {
if name.is_compatible(self.tags) {
let version = PubGrubVersion::from(name.version);
if let Ok(filename) = WheelFilename::from_str(file.filename.as_str()) {
if filename.is_compatible(self.tags) {
let version = PubGrubVersion::from(filename.version.clone());
match version_map.entry(version) {
std::collections::btree_map::Entry::Occupied(mut entry) => {
if matches!(entry.get(), DistributionFile::Sdist(_)) {
// Wheels get precedence over source distributions.
entry.insert(DistributionFile::from(WheelFile::from(
file,
entry.insert(DistributionFile::from(WheelFile(
file, filename,
)));
}
}
std::collections::btree_map::Entry::Vacant(entry) => {
entry.insert(DistributionFile::from(WheelFile::from(file)));
entry.insert(DistributionFile::from(WheelFile(
file, filename,
)));
}
}
}
} else if let Ok(name) =
} else if let Ok(filename) =
SourceDistributionFilename::parse(file.filename.as_str(), &package_name)
{
let version = PubGrubVersion::from(name.version);
let version = PubGrubVersion::from(filename.version.clone());
if let std::collections::btree_map::Entry::Vacant(entry) =
version_map.entry(version)
{
entry.insert(DistributionFile::from(SdistFile::from(file)));
entry.insert(DistributionFile::from(SdistFile(file, filename)));
}
}
}
@ -627,7 +629,7 @@ impl<'a, Context: BuildContext + Sync> Resolver<'a, Context> {
Request::Wheel(package_name, file) => {
let metadata = self
.client
.file(file.clone().into())
.wheel_metadata(file.0.clone(), file.1.clone())
.map_err(ResolveError::Client)
.await?;