mirror of
https://github.com/astral-sh/uv.git
synced 2025-07-07 13:25:00 +00:00
Fetch wheel metadata by async range requests on the remote wheel (#301)
Use range requests and async zip to extract the METADATA file from a remote wheel. We currently only cache when the remote says the remote declares the resource as immutable, see https://github.com/06chaynes/http-cache/issues/57 and https://github.com/baszalmstra/async_http_range_reader/pull/1 . The cache is stored as json with the description omitted, this improve cache deserialization performance.
This commit is contained in:
parent
6f83a44fea
commit
b2439b24a1
15 changed files with 558 additions and 68 deletions
104
Cargo.lock
generated
104
Cargo.lock
generated
|
@ -179,6 +179,19 @@ dependencies = [
|
|||
"tempfile",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "async-compression"
|
||||
version = "0.3.15"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "942c7cd7ae39e91bde4820d74132e9862e62c2f386c3aa90ccf55949f5bad63a"
|
||||
dependencies = [
|
||||
"flate2",
|
||||
"futures-core",
|
||||
"futures-io",
|
||||
"memchr",
|
||||
"pin-project-lite",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "async-compression"
|
||||
version = "0.4.4"
|
||||
|
@ -204,6 +217,40 @@ dependencies = [
|
|||
"syn 2.0.38",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "async_http_range_reader"
|
||||
version = "0.3.0"
|
||||
source = "git+https://github.com/baszalmstra/async_http_range_reader#4cafe5afda889d53060e0565c949d4ffd6ef3786"
|
||||
dependencies = [
|
||||
"bisection",
|
||||
"futures",
|
||||
"http-content-range",
|
||||
"itertools",
|
||||
"memmap2 0.9.0",
|
||||
"reqwest",
|
||||
"thiserror",
|
||||
"tokio",
|
||||
"tokio-stream",
|
||||
"tokio-util",
|
||||
"tracing",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "async_zip"
|
||||
version = "0.0.15"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "795310de3218cde15219fc98c1cf7d8fe9db4865aab27fcf1d535d6cb61c6b54"
|
||||
dependencies = [
|
||||
"async-compression 0.3.15",
|
||||
"crc32fast",
|
||||
"futures-util",
|
||||
"log",
|
||||
"pin-project",
|
||||
"thiserror",
|
||||
"tokio",
|
||||
"tokio-util",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "autocfg"
|
||||
version = "1.1.0"
|
||||
|
@ -255,6 +302,12 @@ dependencies = [
|
|||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "bisection"
|
||||
version = "0.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "021e079a1bab0ecce6cf4b4b74c0c37afa4a697136eb3b127875c84a8f04a8c3"
|
||||
|
||||
[[package]]
|
||||
name = "bitflags"
|
||||
version = "1.3.2"
|
||||
|
@ -337,7 +390,7 @@ dependencies = [
|
|||
"futures",
|
||||
"hex",
|
||||
"libc",
|
||||
"memmap2",
|
||||
"memmap2 0.5.10",
|
||||
"miette",
|
||||
"reflink-copy",
|
||||
"serde",
|
||||
|
@ -1189,6 +1242,12 @@ dependencies = [
|
|||
"time",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "http-content-range"
|
||||
version = "0.1.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9f0d1a8ef218a86416107794b34cc446958d9203556c312bb41eab4c924c1d2e"
|
||||
|
||||
[[package]]
|
||||
name = "http-serde"
|
||||
version = "1.1.3"
|
||||
|
@ -1598,6 +1657,15 @@ dependencies = [
|
|||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "memmap2"
|
||||
version = "0.9.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "deaba38d7abf1d4cca21cc89e932e542ba2b9258664d2a9ef0e61512039c9375"
|
||||
dependencies = [
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "memoffset"
|
||||
version = "0.9.0"
|
||||
|
@ -1896,6 +1964,26 @@ dependencies = [
|
|||
"indexmap 2.0.2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pin-project"
|
||||
version = "1.1.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fda4ed1c6c173e3fc7a83629421152e01d7b1f9b7f65fb301e490e8cfc656422"
|
||||
dependencies = [
|
||||
"pin-project-internal",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pin-project-internal"
|
||||
version = "1.1.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4359fd9c9171ec6e8c62926d6faaf553a8dc3f64e1507e76da7911b4f6a04405"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.38",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pin-project-lite"
|
||||
version = "0.2.13"
|
||||
|
@ -2167,8 +2255,15 @@ dependencies = [
|
|||
name = "puffin-client"
|
||||
version = "0.0.1"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"async_http_range_reader",
|
||||
"async_zip",
|
||||
"distribution-filename",
|
||||
"fs-err",
|
||||
"futures",
|
||||
"http-cache-reqwest",
|
||||
"install-wheel-rs",
|
||||
"puffin-cache",
|
||||
"puffin-normalize",
|
||||
"puffin-package",
|
||||
"reqwest",
|
||||
|
@ -2176,8 +2271,10 @@ dependencies = [
|
|||
"reqwest-retry",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"tempfile",
|
||||
"thiserror",
|
||||
"tokio",
|
||||
"tokio-util",
|
||||
"tracing",
|
||||
"url",
|
||||
]
|
||||
|
@ -2190,6 +2287,7 @@ dependencies = [
|
|||
"clap",
|
||||
"colored",
|
||||
"directories",
|
||||
"distribution-filename",
|
||||
"fs-err",
|
||||
"futures",
|
||||
"gourgeist",
|
||||
|
@ -2209,6 +2307,7 @@ dependencies = [
|
|||
"tracing",
|
||||
"tracing-indicatif",
|
||||
"tracing-subscriber",
|
||||
"url",
|
||||
"which",
|
||||
]
|
||||
|
||||
|
@ -2675,7 +2774,7 @@ version = "0.11.22"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "046cd98826c46c2ac8ddecae268eb5c2e58628688a5fc7a2643704a73faba95b"
|
||||
dependencies = [
|
||||
"async-compression",
|
||||
"async-compression 0.4.4",
|
||||
"base64 0.21.5",
|
||||
"bytes",
|
||||
"encoding_rs",
|
||||
|
@ -3409,6 +3508,7 @@ dependencies = [
|
|||
"futures-core",
|
||||
"pin-project-lite",
|
||||
"tokio",
|
||||
"tokio-util",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
|
|
@ -14,6 +14,8 @@ license = "MIT OR Apache-2.0"
|
|||
|
||||
[workspace.dependencies]
|
||||
anyhow = { version = "1.0.75" }
|
||||
async_http_range_reader = { git = "https://github.com/baszalmstra/async_http_range_reader", ref = "4cafe5afda889d53060e0565c949d4ffd6ef3786" }
|
||||
async_zip = { version = "0.0.15", features = ["tokio", "deflate"] }
|
||||
bitflags = { version = "2.4.0" }
|
||||
cacache = { version = "11.7.1", default-features = false, features = ["tokio-runtime"] }
|
||||
camino = { version = "1.1.6", features = ["serde1"] }
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
|
||||
use std::io;
|
||||
|
||||
use distribution_filename::WheelFilename;
|
||||
use platform_info::PlatformInfoError;
|
||||
use thiserror::Error;
|
||||
use zip::result::ZipError;
|
||||
|
@ -69,3 +70,45 @@ impl Error {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// The metadata name may be uppercase, while the wheel and dist info names are lowercase, or
|
||||
/// the metadata name and the dist info name are lowercase, while the wheel name is uppercase.
|
||||
/// Either way, we just search the wheel for the name
|
||||
pub fn find_dist_info_metadata<'a, T: Copy>(
|
||||
filename: &WheelFilename,
|
||||
files: impl Iterator<Item = (T, &'a str)>,
|
||||
) -> Result<(T, &'a str), String> {
|
||||
let dist_info_matcher = format!(
|
||||
"{}-{}",
|
||||
filename.distribution.as_dist_info_name(),
|
||||
filename.version
|
||||
);
|
||||
let metadatas: Vec<_> = files
|
||||
.filter_map(|(payload, path)| {
|
||||
let (dir, file) = path.split_once('/')?;
|
||||
let dir = dir.strip_suffix(".dist-info")?;
|
||||
if dir.to_lowercase() == dist_info_matcher && file == "METADATA" {
|
||||
Some((payload, path))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
let (payload, path) = match metadatas[..] {
|
||||
[] => {
|
||||
return Err("no .dist-info directory".to_string());
|
||||
}
|
||||
[(payload, path)] => (payload, path),
|
||||
_ => {
|
||||
return Err(format!(
|
||||
"multiple .dist-info directories: {}",
|
||||
metadatas
|
||||
.into_iter()
|
||||
.map(|(_, path)| path.to_string())
|
||||
.collect::<Vec<_>>()
|
||||
.join(", ")
|
||||
));
|
||||
}
|
||||
};
|
||||
Ok((payload, path))
|
||||
}
|
||||
|
|
|
@ -4,10 +4,16 @@ version = "0.0.1"
|
|||
edition = "2021"
|
||||
|
||||
[dependencies]
|
||||
distribution-filename = { path = "../distribution-filename" }
|
||||
install-wheel-rs = { path = "../install-wheel-rs" }
|
||||
puffin-cache = { path = "../puffin-cache" }
|
||||
puffin-normalize = { path = "../puffin-normalize" }
|
||||
puffin-package = { path = "../puffin-package" }
|
||||
|
||||
async_http_range_reader = { workspace = true }
|
||||
async_zip = { workspace = true }
|
||||
futures = { workspace = true }
|
||||
fs-err = { workspace = true, features = ["tokio"] }
|
||||
http-cache-reqwest = { workspace = true }
|
||||
reqwest = { workspace = true }
|
||||
reqwest-middleware = { workspace = true }
|
||||
|
@ -15,6 +21,11 @@ reqwest-retry = { workspace = true }
|
|||
serde = { workspace = true }
|
||||
serde_json = { workspace = true }
|
||||
thiserror = { workspace = true }
|
||||
tokio = { workspace = true }
|
||||
tempfile = { workspace = true }
|
||||
tokio = { workspace = true, features = ["fs"] }
|
||||
tokio-util = { workspace = true }
|
||||
tracing = { workspace = true }
|
||||
url = { workspace = true }
|
||||
|
||||
[dev-dependencies]
|
||||
anyhow = { workspace = true }
|
||||
|
|
|
@ -1,20 +1,32 @@
|
|||
use std::fmt::Debug;
|
||||
use std::path::PathBuf;
|
||||
|
||||
use async_http_range_reader::{
|
||||
AsyncHttpRangeReader, AsyncHttpRangeReaderError, CheckSupportMethod,
|
||||
};
|
||||
use async_zip::tokio::read::seek::ZipFileReader;
|
||||
use futures::{AsyncRead, StreamExt, TryStreamExt};
|
||||
use http_cache_reqwest::{CACacheManager, Cache, CacheMode, HttpCache, HttpCacheOptions};
|
||||
use reqwest::ClientBuilder;
|
||||
use reqwest::StatusCode;
|
||||
use reqwest::header::HeaderMap;
|
||||
use reqwest::{header, Client, ClientBuilder, StatusCode};
|
||||
use reqwest_middleware::ClientWithMiddleware;
|
||||
use reqwest_retry::policies::ExponentialBackoff;
|
||||
use reqwest_retry::RetryTransientMiddleware;
|
||||
use tracing::trace;
|
||||
use tempfile::tempfile;
|
||||
use tokio::io::BufWriter;
|
||||
use tokio_util::compat::{FuturesAsyncReadCompatExt, TokioAsyncReadCompatExt};
|
||||
use tracing::{debug, trace};
|
||||
use url::Url;
|
||||
|
||||
use distribution_filename::WheelFilename;
|
||||
use install_wheel_rs::find_dist_info_metadata;
|
||||
use puffin_normalize::PackageName;
|
||||
use puffin_package::pypi_types::{File, Metadata21, SimpleJson};
|
||||
|
||||
use crate::error::Error;
|
||||
use crate::remote_metadata::{
|
||||
wheel_metadata_from_remote_zip, wheel_metadata_get_cached, wheel_metadata_write_cache,
|
||||
};
|
||||
|
||||
/// A builder for an [`RegistryClient`].
|
||||
#[derive(Debug, Clone)]
|
||||
|
@ -96,10 +108,10 @@ impl RegistryClientBuilder {
|
|||
let mut client_builder =
|
||||
reqwest_middleware::ClientBuilder::new(client_raw.clone()).with(retry_strategy);
|
||||
|
||||
if let Some(path) = self.cache {
|
||||
if let Some(path) = &self.cache {
|
||||
client_builder = client_builder.with(Cache(HttpCache {
|
||||
mode: CacheMode::Default,
|
||||
manager: CACacheManager { path },
|
||||
manager: CACacheManager { path: path.clone() },
|
||||
options: HttpCacheOptions::default(),
|
||||
}));
|
||||
}
|
||||
|
@ -108,15 +120,16 @@ impl RegistryClientBuilder {
|
|||
let retry_strategy = RetryTransientMiddleware::new_with_policy(retry_policy);
|
||||
|
||||
let uncached_client_builder =
|
||||
reqwest_middleware::ClientBuilder::new(client_raw).with(retry_strategy);
|
||||
reqwest_middleware::ClientBuilder::new(client_raw.clone()).with(retry_strategy);
|
||||
|
||||
RegistryClient {
|
||||
index: self.index,
|
||||
extra_index: self.extra_index,
|
||||
no_index: self.no_index,
|
||||
proxy: self.proxy,
|
||||
client: client_builder.build(),
|
||||
client_raw,
|
||||
uncached_client: uncached_client_builder.build(),
|
||||
cache: self.cache,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -128,9 +141,11 @@ pub struct RegistryClient {
|
|||
pub(crate) extra_index: Vec<Url>,
|
||||
/// Ignore the package index, instead relying on local archives and caches.
|
||||
pub(crate) no_index: bool,
|
||||
pub(crate) proxy: Url,
|
||||
pub(crate) client: ClientWithMiddleware,
|
||||
pub(crate) uncached_client: ClientWithMiddleware,
|
||||
pub(crate) client_raw: Client,
|
||||
/// Used for the remote wheel METADATA cache
|
||||
pub(crate) cache: Option<PathBuf>,
|
||||
}
|
||||
|
||||
impl RegistryClient {
|
||||
|
@ -184,33 +199,110 @@ impl RegistryClient {
|
|||
}
|
||||
|
||||
/// Fetch the metadata from a wheel file.
|
||||
pub async fn file(&self, file: File) -> Result<Metadata21, Error> {
|
||||
pub async fn wheel_metadata(
|
||||
&self,
|
||||
file: File,
|
||||
filename: WheelFilename,
|
||||
) -> Result<Metadata21, Error> {
|
||||
if self.no_index {
|
||||
return Err(Error::NoIndex(file.filename));
|
||||
}
|
||||
|
||||
// Per PEP 658, if `data-dist-info-metadata` is available, we can request it directly;
|
||||
// otherwise, send to our dedicated caching proxy.
|
||||
let url = if file.data_dist_info_metadata.is_available() {
|
||||
Url::parse(&format!("{}.metadata", file.url))?
|
||||
// If the metadata file is available at its own url (PEP 658), download it from there
|
||||
let url = Url::parse(&file.url)?;
|
||||
if file.data_dist_info_metadata.is_available() {
|
||||
let url = Url::parse(&format!("{}.metadata", file.url))?;
|
||||
trace!("Fetching file {} from {}", file.filename, url);
|
||||
let text = self.wheel_metadata_impl(&url).await.map_err(|err| {
|
||||
if err.status() == Some(StatusCode::NOT_FOUND) {
|
||||
Error::FileNotFound(file.filename, err)
|
||||
} else {
|
||||
err.into()
|
||||
}
|
||||
})?;
|
||||
|
||||
Ok(Metadata21::parse(text.as_bytes())?)
|
||||
// If we lack PEP 658 support, try using HTTP range requests to read only the
|
||||
// `.dist-info/METADATA` file from the zip, and if that also fails, download the whole wheel
|
||||
// into the cache and read from there
|
||||
} else {
|
||||
self.proxy.join(file.url.parse::<Url>()?.path())?
|
||||
};
|
||||
|
||||
trace!("Fetching file {} from {}", file.filename, url);
|
||||
|
||||
// Fetch from the index.
|
||||
let text = self.file_impl(&url).await.map_err(|err| {
|
||||
if err.status() == Some(StatusCode::NOT_FOUND) {
|
||||
Error::FileNotFound(file.filename, err)
|
||||
} else {
|
||||
err.into()
|
||||
}
|
||||
})?;
|
||||
Metadata21::parse(text.as_bytes()).map_err(std::convert::Into::into)
|
||||
self.wheel_metadata_no_index(&filename, &url).await
|
||||
}
|
||||
}
|
||||
|
||||
async fn file_impl(&self, url: &Url) -> Result<String, reqwest_middleware::Error> {
|
||||
/// Get the wheel metadata if it isn't available in an index through PEP 658
|
||||
pub async fn wheel_metadata_no_index(
|
||||
&self,
|
||||
filename: &WheelFilename,
|
||||
url: &Url,
|
||||
) -> Result<Metadata21, Error> {
|
||||
Ok(
|
||||
if let Some(cached_metadata) =
|
||||
wheel_metadata_get_cached(url, self.cache.as_deref()).await
|
||||
{
|
||||
debug!("Cache hit for wheel metadata for {url}");
|
||||
cached_metadata
|
||||
} else if let Some((mut reader, headers)) = self.range_reader(url.clone()).await? {
|
||||
debug!("Using remote zip reader for wheel metadata for {url}");
|
||||
let text = wheel_metadata_from_remote_zip(filename, &mut reader).await?;
|
||||
let metadata = Metadata21::parse(text.as_bytes())?;
|
||||
let is_immutable = headers
|
||||
.get(header::CACHE_CONTROL)
|
||||
.and_then(|header| header.to_str().ok())
|
||||
.unwrap_or_default()
|
||||
.split(',')
|
||||
.any(|entry| entry.trim().to_lowercase() == "immutable");
|
||||
if is_immutable {
|
||||
debug!("Immutable (cacheable) wheel metadata for {url}");
|
||||
wheel_metadata_write_cache(url, self.cache.as_deref(), &metadata).await?;
|
||||
}
|
||||
metadata
|
||||
} else {
|
||||
debug!("Downloading whole wheel to extract metadata from {url}");
|
||||
// TODO(konstin): Download the wheel into a cache shared with the installer instead
|
||||
// Note that this branch is only hit when you're not using and the server where
|
||||
// you host your wheels for some reasons doesn't support range requests
|
||||
// (tbh we should probably warn here and tekk users to get a better registry because
|
||||
// their current one makes resolution unnecessary slow)
|
||||
let temp_download = tempfile()?;
|
||||
let mut writer = BufWriter::new(tokio::fs::File::from_std(temp_download));
|
||||
let mut reader = self.stream_external(url).await?.compat();
|
||||
tokio::io::copy(&mut reader, &mut writer).await?;
|
||||
let temp_download = writer.into_inner();
|
||||
|
||||
let mut reader = ZipFileReader::new(temp_download.compat())
|
||||
.await
|
||||
.map_err(|err| Error::Zip(filename.clone(), err))?;
|
||||
|
||||
let ((metadata_idx, _metadata_entry), _path) = find_dist_info_metadata(
|
||||
filename,
|
||||
reader
|
||||
.file()
|
||||
.entries()
|
||||
.iter()
|
||||
.enumerate()
|
||||
.filter_map(|(idx, e)| {
|
||||
Some(((idx, e), e.entry().filename().as_str().ok()?))
|
||||
}),
|
||||
)
|
||||
.map_err(|err| Error::InvalidDistInfo(filename.clone(), err))?;
|
||||
|
||||
// Read the contents of the METADATA file
|
||||
let mut contents = Vec::new();
|
||||
reader
|
||||
.reader_with_entry(metadata_idx)
|
||||
.await
|
||||
.map_err(|err| Error::Zip(filename.clone(), err))?
|
||||
.read_to_end_checked(&mut contents)
|
||||
.await
|
||||
.map_err(|err| Error::Zip(filename.clone(), err))?;
|
||||
|
||||
Metadata21::parse(&contents)?
|
||||
},
|
||||
)
|
||||
}
|
||||
|
||||
async fn wheel_metadata_impl(&self, url: &Url) -> Result<String, reqwest_middleware::Error> {
|
||||
Ok(self
|
||||
.client
|
||||
.get(url.clone())
|
||||
|
@ -244,4 +336,23 @@ impl RegistryClient {
|
|||
.into_async_read(),
|
||||
))
|
||||
}
|
||||
|
||||
/// An async for individual files inside a remote zip file, if the server supports it. Returns
|
||||
/// the headers of the initial request for caching.
|
||||
async fn range_reader(
|
||||
&self,
|
||||
url: Url,
|
||||
) -> Result<Option<(AsyncHttpRangeReader, HeaderMap)>, Error> {
|
||||
let response = AsyncHttpRangeReader::new(
|
||||
self.client_raw.clone(),
|
||||
url.clone(),
|
||||
CheckSupportMethod::Head,
|
||||
)
|
||||
.await;
|
||||
match response {
|
||||
Ok((reader, headers)) => Ok(Some((reader, headers))),
|
||||
Err(AsyncHttpRangeReaderError::HttpRangeRequestUnsupported) => Ok(None),
|
||||
Err(err) => Err(err.into()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,5 +1,10 @@
|
|||
use std::io;
|
||||
|
||||
use async_http_range_reader::AsyncHttpRangeReaderError;
|
||||
use async_zip::error::ZipError;
|
||||
use thiserror::Error;
|
||||
|
||||
use distribution_filename::WheelFilename;
|
||||
use puffin_package::pypi_types;
|
||||
|
||||
#[derive(Debug, Error)]
|
||||
|
@ -41,6 +46,18 @@ pub enum Error {
|
|||
source: serde_json::Error,
|
||||
url: String,
|
||||
},
|
||||
|
||||
#[error(transparent)]
|
||||
AsyncHttpRangeReader(#[from] AsyncHttpRangeReaderError),
|
||||
|
||||
#[error("Expected a single .dist-info directory in {0}, found {1}")]
|
||||
InvalidDistInfo(WheelFilename, String),
|
||||
|
||||
#[error("The wheel {0} is not a valid zip file")]
|
||||
Zip(WheelFilename, #[source] ZipError),
|
||||
|
||||
#[error(transparent)]
|
||||
IO(#[from] io::Error),
|
||||
}
|
||||
|
||||
impl Error {
|
||||
|
|
|
@ -3,3 +3,4 @@ pub use error::Error;
|
|||
|
||||
mod client;
|
||||
mod error;
|
||||
mod remote_metadata;
|
||||
|
|
148
crates/puffin-client/src/remote_metadata.rs
Normal file
148
crates/puffin-client/src/remote_metadata.rs
Normal file
|
@ -0,0 +1,148 @@
|
|||
use std::io;
|
||||
use std::path::Path;
|
||||
|
||||
use async_http_range_reader::AsyncHttpRangeReader;
|
||||
use async_zip::tokio::read::seek::ZipFileReader;
|
||||
use fs_err::tokio as fs;
|
||||
use tokio_util::compat::TokioAsyncReadCompatExt;
|
||||
use url::Url;
|
||||
|
||||
use distribution_filename::WheelFilename;
|
||||
use install_wheel_rs::find_dist_info_metadata;
|
||||
use puffin_cache::CanonicalUrl;
|
||||
use puffin_package::pypi_types::Metadata21;
|
||||
|
||||
use crate::Error;
|
||||
|
||||
const WHEEL_METADATA_FROM_ZIP_CACHE: &str = "wheel-metadata-v0";
|
||||
|
||||
/// Try to read the cached METADATA previously extracted from a remote zip, if it exists
|
||||
pub(crate) async fn wheel_metadata_get_cached(
|
||||
url: &Url,
|
||||
cache: Option<&Path>,
|
||||
) -> Option<Metadata21> {
|
||||
// TODO(konstin): Actual good cache layout
|
||||
let path = cache?
|
||||
.join(WHEEL_METADATA_FROM_ZIP_CACHE)
|
||||
.join(puffin_cache::digest(&CanonicalUrl::new(url)));
|
||||
if !path.is_file() {
|
||||
return None;
|
||||
}
|
||||
let data = fs::read(path).await.ok()?;
|
||||
serde_json::from_slice(&data).ok()
|
||||
}
|
||||
|
||||
/// Write the cached METADATA extracted from a remote zip to the cache
|
||||
pub(crate) async fn wheel_metadata_write_cache(
|
||||
url: &Url,
|
||||
cache: Option<&Path>,
|
||||
metadata: &Metadata21,
|
||||
) -> io::Result<()> {
|
||||
let Some(cache) = cache else {
|
||||
return Ok(());
|
||||
};
|
||||
// TODO(konstin): Actual good cache layout
|
||||
let dir = cache.join(WHEEL_METADATA_FROM_ZIP_CACHE);
|
||||
fs::create_dir_all(&dir).await?;
|
||||
let path = dir.join(puffin_cache::digest(&CanonicalUrl::new(url)));
|
||||
fs::write(path, serde_json::to_vec(metadata)?).await
|
||||
}
|
||||
|
||||
/// Read the `.dist-info/METADATA` file from a async remote zip reader, so we avoid downloading the
|
||||
/// entire wheel just for the one file.
|
||||
///
|
||||
/// This method is derived from `prefix-div/rip`, which is available under the following BSD-3
|
||||
/// Clause license:
|
||||
///
|
||||
/// ```text
|
||||
/// BSD 3-Clause License
|
||||
///
|
||||
/// Copyright (c) 2023, prefix.dev GmbH
|
||||
///
|
||||
/// Redistribution and use in source and binary forms, with or without
|
||||
/// modification, are permitted provided that the following conditions are met:
|
||||
///
|
||||
/// 1. Redistributions of source code must retain the above copyright notice, this
|
||||
/// list of conditions and the following disclaimer.
|
||||
///
|
||||
/// 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
/// this list of conditions and the following disclaimer in the documentation
|
||||
/// and/or other materials provided with the distribution.
|
||||
///
|
||||
/// 3. Neither the name of the copyright holder nor the names of its
|
||||
/// contributors may be used to endorse or promote products derived from
|
||||
/// this software without specific prior written permission.
|
||||
///
|
||||
/// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
/// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
/// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
/// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
/// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
/// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
/// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
/// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
/// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
/// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
/// ```
|
||||
///
|
||||
/// Additional work and modifications to the originating source are available under the
|
||||
/// Apache License, Version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or <https://www.apache.org/licenses/LICENSE-2.0>)
|
||||
/// or MIT license ([LICENSE-MIT](LICENSE-MIT) or <https://opensource.org/licenses/MIT>), as per the
|
||||
/// rest of the crate.
|
||||
pub(crate) async fn wheel_metadata_from_remote_zip(
|
||||
filename: &WheelFilename,
|
||||
reader: &mut AsyncHttpRangeReader,
|
||||
) -> Result<String, Error> {
|
||||
// Make sure we have the back part of the stream.
|
||||
// Best guess for the central directory size inside the zip
|
||||
const CENTRAL_DIRECTORY_SIZE: u64 = 16384;
|
||||
// Because the zip index is at the back
|
||||
reader
|
||||
.prefetch(reader.len().saturating_sub(CENTRAL_DIRECTORY_SIZE)..reader.len())
|
||||
.await;
|
||||
|
||||
// Construct a zip reader to uses the stream.
|
||||
let mut reader = ZipFileReader::new(reader.compat())
|
||||
.await
|
||||
.map_err(|err| Error::Zip(filename.clone(), err))?;
|
||||
|
||||
let ((metadata_idx, metadata_entry), _path) = find_dist_info_metadata(
|
||||
filename,
|
||||
reader
|
||||
.file()
|
||||
.entries()
|
||||
.iter()
|
||||
.enumerate()
|
||||
.filter_map(|(idx, e)| Some(((idx, e), e.entry().filename().as_str().ok()?))),
|
||||
)
|
||||
.map_err(|err| Error::InvalidDistInfo(filename.clone(), err))?;
|
||||
|
||||
let offset = metadata_entry.header_offset();
|
||||
let size = metadata_entry.entry().compressed_size()
|
||||
+ 30 // Header size in bytes
|
||||
+ metadata_entry.entry().filename().as_bytes().len() as u64;
|
||||
|
||||
// The zip archive uses as BufReader which reads in chunks of 8192. To ensure we prefetch
|
||||
// enough data we round the size up to the nearest multiple of the buffer size.
|
||||
let buffer_size = 8192;
|
||||
let size = ((size + buffer_size - 1) / buffer_size) * buffer_size;
|
||||
|
||||
// Fetch the bytes from the zip archive that contain the requested file.
|
||||
reader
|
||||
.inner_mut()
|
||||
.get_mut()
|
||||
.prefetch(offset..offset + size)
|
||||
.await;
|
||||
|
||||
// Read the contents of the METADATA file
|
||||
let mut contents = String::new();
|
||||
reader
|
||||
.reader_with_entry(metadata_idx)
|
||||
.await
|
||||
.map_err(|err| Error::Zip(filename.clone(), err))?
|
||||
.read_to_string_checked(&mut contents)
|
||||
.await
|
||||
.map_err(|err| Error::Zip(filename.clone(), err))?;
|
||||
|
||||
Ok(contents)
|
||||
}
|
28
crates/puffin-client/tests/remote_metadata.rs
Normal file
28
crates/puffin-client/tests/remote_metadata.rs
Normal file
|
@ -0,0 +1,28 @@
|
|||
use std::str::FromStr;
|
||||
|
||||
use anyhow::Result;
|
||||
use tempfile::tempdir;
|
||||
use url::Url;
|
||||
|
||||
use distribution_filename::WheelFilename;
|
||||
use puffin_client::RegistryClientBuilder;
|
||||
|
||||
#[tokio::test]
|
||||
async fn remote_metadata_with_and_without_cache() -> Result<()> {
|
||||
let temp_cache = tempdir().unwrap();
|
||||
let client = RegistryClientBuilder::default()
|
||||
.cache(Some(temp_cache.path().to_path_buf()))
|
||||
.build();
|
||||
// The first run is without cache (the tempdir is empty), the second has the cache from the
|
||||
// first run
|
||||
for _ in 0..2 {
|
||||
let url = "https://files.pythonhosted.org/packages/00/e5/f12a80907d0884e6dff9c16d0c0114d81b8cd07dc3ae54c5e962cc83037e/tqdm-4.66.1-py3-none-any.whl";
|
||||
let filename = WheelFilename::from_str(url.rsplit_once('/').unwrap().1).unwrap();
|
||||
let metadata = client
|
||||
.wheel_metadata_no_index(&filename, &Url::parse(url).unwrap())
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(metadata.summary.unwrap(), "Fast, Extensible Progress Meter");
|
||||
}
|
||||
Ok(())
|
||||
}
|
|
@ -11,6 +11,7 @@ authors = { workspace = true }
|
|||
license = { workspace = true }
|
||||
|
||||
[dependencies]
|
||||
distribution-filename = { path = "../distribution-filename" }
|
||||
gourgeist = { path = "../gourgeist" }
|
||||
pep508_rs = { path = "../pep508-rs" }
|
||||
platform-host = { path = "../platform-host" }
|
||||
|
@ -36,3 +37,4 @@ tracing = { workspace = true }
|
|||
tracing-indicatif = { workspace = true }
|
||||
tracing-subscriber = { workspace = true }
|
||||
which = { workspace = true }
|
||||
url = { workspace = true }
|
||||
|
|
|
@ -16,10 +16,12 @@ use resolve_many::ResolveManyArgs;
|
|||
|
||||
use crate::build::{build, BuildArgs};
|
||||
use crate::resolve_cli::ResolveCliArgs;
|
||||
use crate::wheel_metadata::WheelMetadataArgs;
|
||||
|
||||
mod build;
|
||||
mod resolve_cli;
|
||||
mod resolve_many;
|
||||
mod wheel_metadata;
|
||||
|
||||
#[derive(Parser)]
|
||||
enum Cli {
|
||||
|
@ -34,6 +36,7 @@ enum Cli {
|
|||
ResolveMany(ResolveManyArgs),
|
||||
/// Resolve requirements passed on the CLI
|
||||
ResolveCli(ResolveCliArgs),
|
||||
WheelMetadata(WheelMetadataArgs),
|
||||
}
|
||||
|
||||
async fn run() -> Result<()> {
|
||||
|
@ -49,6 +52,7 @@ async fn run() -> Result<()> {
|
|||
Cli::ResolveCli(args) => {
|
||||
resolve_cli::resolve_cli(args).await?;
|
||||
}
|
||||
Cli::WheelMetadata(args) => wheel_metadata::wheel_metadata(args).await?,
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
|
44
crates/puffin-dev/src/wheel_metadata.rs
Normal file
44
crates/puffin-dev/src/wheel_metadata.rs
Normal file
|
@ -0,0 +1,44 @@
|
|||
use std::path::PathBuf;
|
||||
use std::str::FromStr;
|
||||
|
||||
use clap::Parser;
|
||||
use directories::ProjectDirs;
|
||||
use url::Url;
|
||||
|
||||
use distribution_filename::WheelFilename;
|
||||
use puffin_client::RegistryClientBuilder;
|
||||
|
||||
#[derive(Parser)]
|
||||
pub(crate) struct WheelMetadataArgs {
|
||||
url: Url,
|
||||
/// Avoid reading from or writing to the cache.
|
||||
#[arg(global = true, long, short)]
|
||||
no_cache: bool,
|
||||
/// Path to the cache directory.
|
||||
#[arg(global = true, long, env = "PUFFIN_CACHE_DIR")]
|
||||
cache_dir: Option<PathBuf>,
|
||||
}
|
||||
|
||||
pub(crate) async fn wheel_metadata(args: WheelMetadataArgs) -> anyhow::Result<()> {
|
||||
let project_dirs = ProjectDirs::from("", "", "puffin");
|
||||
let cache_dir = (!args.no_cache)
|
||||
.then(|| {
|
||||
args.cache_dir
|
||||
.as_deref()
|
||||
.or_else(|| project_dirs.as_ref().map(ProjectDirs::cache_dir))
|
||||
})
|
||||
.flatten();
|
||||
let client = RegistryClientBuilder::default().cache(cache_dir).build();
|
||||
|
||||
let filename = WheelFilename::from_str(
|
||||
args.url
|
||||
.path()
|
||||
.rsplit_once('/')
|
||||
.unwrap_or(("", args.url.path()))
|
||||
.1,
|
||||
)?;
|
||||
|
||||
let metadata = client.wheel_metadata_no_index(&filename, &args.url).await?;
|
||||
println!("{metadata:?}");
|
||||
Ok(())
|
||||
}
|
|
@ -1,13 +1,14 @@
|
|||
use distribution_filename::{SourceDistributionFilename, WheelFilename};
|
||||
use std::ops::Deref;
|
||||
|
||||
use puffin_package::pypi_types::File;
|
||||
|
||||
/// A distribution can either be a wheel or a source distribution.
|
||||
#[derive(Debug, Clone)]
|
||||
pub(crate) struct WheelFile(File);
|
||||
pub(crate) struct WheelFile(pub(crate) File, pub(crate) WheelFilename);
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub(crate) struct SdistFile(File);
|
||||
pub(crate) struct SdistFile(pub(crate) File, pub(crate) SourceDistributionFilename);
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub(crate) enum DistributionFile {
|
||||
|
@ -31,18 +32,6 @@ impl Deref for SdistFile {
|
|||
}
|
||||
}
|
||||
|
||||
impl From<File> for WheelFile {
|
||||
fn from(file: File) -> Self {
|
||||
Self(file)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<File> for SdistFile {
|
||||
fn from(file: File) -> Self {
|
||||
Self(file)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<WheelFile> for File {
|
||||
fn from(wheel: WheelFile) -> Self {
|
||||
wheel.0
|
||||
|
@ -67,19 +56,6 @@ impl From<SdistFile> for DistributionFile {
|
|||
}
|
||||
}
|
||||
|
||||
impl From<File> for DistributionFile {
|
||||
fn from(file: File) -> Self {
|
||||
if std::path::Path::new(file.filename.as_str())
|
||||
.extension()
|
||||
.map_or(false, |ext| ext.eq_ignore_ascii_case("whl"))
|
||||
{
|
||||
Self::Wheel(WheelFile::from(file))
|
||||
} else {
|
||||
Self::Sdist(SdistFile::from(file))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl DistributionFile {
|
||||
pub(crate) fn filename(&self) -> &str {
|
||||
match self {
|
||||
|
|
|
@ -157,6 +157,7 @@ impl<'a> DistributionFinder<'a> {
|
|||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
#[allow(clippy::large_enum_variant)]
|
||||
enum Request {
|
||||
/// A request to fetch the metadata for a package.
|
||||
Package(Requirement),
|
||||
|
|
|
@ -548,31 +548,33 @@ impl<'a, Context: BuildContext + Sync> Resolver<'a, Context> {
|
|||
// distributions.
|
||||
let mut version_map: VersionMap = BTreeMap::new();
|
||||
for file in metadata.files {
|
||||
if let Ok(name) = WheelFilename::from_str(file.filename.as_str()) {
|
||||
if name.is_compatible(self.tags) {
|
||||
let version = PubGrubVersion::from(name.version);
|
||||
if let Ok(filename) = WheelFilename::from_str(file.filename.as_str()) {
|
||||
if filename.is_compatible(self.tags) {
|
||||
let version = PubGrubVersion::from(filename.version.clone());
|
||||
match version_map.entry(version) {
|
||||
std::collections::btree_map::Entry::Occupied(mut entry) => {
|
||||
if matches!(entry.get(), DistributionFile::Sdist(_)) {
|
||||
// Wheels get precedence over source distributions.
|
||||
entry.insert(DistributionFile::from(WheelFile::from(
|
||||
file,
|
||||
entry.insert(DistributionFile::from(WheelFile(
|
||||
file, filename,
|
||||
)));
|
||||
}
|
||||
}
|
||||
std::collections::btree_map::Entry::Vacant(entry) => {
|
||||
entry.insert(DistributionFile::from(WheelFile::from(file)));
|
||||
entry.insert(DistributionFile::from(WheelFile(
|
||||
file, filename,
|
||||
)));
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if let Ok(name) =
|
||||
} else if let Ok(filename) =
|
||||
SourceDistributionFilename::parse(file.filename.as_str(), &package_name)
|
||||
{
|
||||
let version = PubGrubVersion::from(name.version);
|
||||
let version = PubGrubVersion::from(filename.version.clone());
|
||||
if let std::collections::btree_map::Entry::Vacant(entry) =
|
||||
version_map.entry(version)
|
||||
{
|
||||
entry.insert(DistributionFile::from(SdistFile::from(file)));
|
||||
entry.insert(DistributionFile::from(SdistFile(file, filename)));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -627,7 +629,7 @@ impl<'a, Context: BuildContext + Sync> Resolver<'a, Context> {
|
|||
Request::Wheel(package_name, file) => {
|
||||
let metadata = self
|
||||
.client
|
||||
.file(file.clone().into())
|
||||
.wheel_metadata(file.0.clone(), file.1.clone())
|
||||
.map_err(ResolveError::Client)
|
||||
.await?;
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue