Extract METADATA reading into a crate (#7231)

This is preparatory work for the upload functionality, which needs to
read the METADATA file and attach its parsed contents to the POST
request: We move finding the `.dist-info` from `install-wheel-rs` and
`uv-client` to a new `uv-metadata` crate, so it can be shared with the
publish crate.

I don't properly know if its the right place since the upload code isn't
ready, but i'm PR-ing it now because it already had merge conflicts.
This commit is contained in:
konsti 2024-09-10 15:31:01 +02:00 committed by GitHub
parent 95a4beeed3
commit 2b3890f2b4
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
17 changed files with 254 additions and 185 deletions

24
Cargo.lock generated
View file

@ -4661,7 +4661,6 @@ dependencies = [
"hyper", "hyper",
"hyper-util", "hyper-util",
"insta", "insta",
"install-wheel-rs",
"itertools 0.13.0", "itertools 0.13.0",
"jiff", "jiff",
"pep440_rs", "pep440_rs",
@ -4687,6 +4686,7 @@ dependencies = [
"uv-cache", "uv-cache",
"uv-configuration", "uv-configuration",
"uv-fs", "uv-fs",
"uv-metadata",
"uv-normalize", "uv-normalize",
"uv-version", "uv-version",
"uv-warnings", "uv-warnings",
@ -4794,7 +4794,6 @@ dependencies = [
"futures", "futures",
"indoc", "indoc",
"insta", "insta",
"install-wheel-rs",
"nanoid", "nanoid",
"pep440_rs", "pep440_rs",
"pep508_rs", "pep508_rs",
@ -4818,6 +4817,7 @@ dependencies = [
"uv-extract", "uv-extract",
"uv-fs", "uv-fs",
"uv-git", "uv-git",
"uv-metadata",
"uv-normalize", "uv-normalize",
"uv-types", "uv-types",
"uv-warnings", "uv-warnings",
@ -4939,6 +4939,24 @@ dependencies = [
"textwrap", "textwrap",
] ]
[[package]]
name = "uv-metadata"
version = "0.1.0"
dependencies = [
"async_zip",
"distribution-filename",
"fs-err",
"futures",
"pep440_rs",
"pypi-types",
"thiserror",
"tokio",
"tokio-util",
"tracing",
"uv-normalize",
"zip",
]
[[package]] [[package]]
name = "uv-normalize" name = "uv-normalize"
version = "0.0.1" version = "0.0.1"
@ -5066,7 +5084,6 @@ dependencies = [
"futures", "futures",
"indexmap", "indexmap",
"insta", "insta",
"install-wheel-rs",
"itertools 0.13.0", "itertools 0.13.0",
"jiff", "jiff",
"once-map", "once-map",
@ -5096,6 +5113,7 @@ dependencies = [
"uv-distribution", "uv-distribution",
"uv-fs", "uv-fs",
"uv-git", "uv-git",
"uv-metadata",
"uv-normalize", "uv-normalize",
"uv-pubgrub", "uv-pubgrub",
"uv-python", "uv-python",

View file

@ -41,12 +41,13 @@ uv-fs = { path = "crates/uv-fs" }
uv-git = { path = "crates/uv-git" } uv-git = { path = "crates/uv-git" }
uv-installer = { path = "crates/uv-installer" } uv-installer = { path = "crates/uv-installer" }
uv-macros = { path = "crates/uv-macros" } uv-macros = { path = "crates/uv-macros" }
uv-metadata = { path = "crates/uv-metadata" }
uv-normalize = { path = "crates/uv-normalize" } uv-normalize = { path = "crates/uv-normalize" }
uv-options-metadata = { path = "crates/uv-options-metadata" } uv-options-metadata = { path = "crates/uv-options-metadata" }
uv-pubgrub = { path = "crates/uv-pubgrub" }
uv-python = { path = "crates/uv-python" } uv-python = { path = "crates/uv-python" }
uv-requirements = { path = "crates/uv-requirements" } uv-requirements = { path = "crates/uv-requirements" }
uv-resolver = { path = "crates/uv-resolver" } uv-resolver = { path = "crates/uv-resolver" }
uv-pubgrub = { path = "crates/uv-pubgrub" }
uv-scripts = { path = "crates/uv-scripts" } uv-scripts = { path = "crates/uv-scripts" }
uv-settings = { path = "crates/uv-settings" } uv-settings = { path = "crates/uv-settings" }
uv-shell = { path = "crates/uv-shell" } uv-shell = { path = "crates/uv-shell" }
@ -64,7 +65,7 @@ async-channel = { version = "2.2.0" }
async-compression = { version = "0.4.6" } async-compression = { version = "0.4.6" }
async-trait = { version = "0.1.78" } async-trait = { version = "0.1.78" }
async_http_range_reader = { version = "0.8.0" } async_http_range_reader = { version = "0.8.0" }
async_zip = { git = "https://github.com/charliermarsh/rs-async-zip", rev = "011b24604fa7bc223daaad7712c0694bac8f0a87", features = ["deflate"] } async_zip = { git = "https://github.com/charliermarsh/rs-async-zip", rev = "011b24604fa7bc223daaad7712c0694bac8f0a87", features = ["deflate", "tokio"] }
axoupdater = { version = "0.7.0", default-features = false } axoupdater = { version = "0.7.0", default-features = false }
backoff = { version = "0.4.0" } backoff = { version = "0.4.0" }
base64 = { version = "0.22.0" } base64 = { version = "0.22.0" }

View file

@ -16,7 +16,6 @@ use uv_normalize::PackageName;
pub use wheel::{parse_wheel_file, read_record_file, LibKind}; pub use wheel::{parse_wheel_file, read_record_file, LibKind};
pub mod linker; pub mod linker;
pub mod metadata;
mod record; mod record;
mod script; mod script;
mod uninstall; mod uninstall;
@ -82,24 +81,10 @@ pub enum Error {
Pep440, Pep440,
#[error("Invalid direct_url.json")] #[error("Invalid direct_url.json")]
DirectUrlJson(#[from] serde_json::Error), DirectUrlJson(#[from] serde_json::Error),
#[error("No .dist-info directory found")]
MissingDistInfo,
#[error("Cannot uninstall package; `RECORD` file not found at: {}", _0.user_display())] #[error("Cannot uninstall package; `RECORD` file not found at: {}", _0.user_display())]
MissingRecord(PathBuf), MissingRecord(PathBuf),
#[error("Cannot uninstall package; `top_level.txt` file not found at: {}", _0.user_display())] #[error("Cannot uninstall package; `top_level.txt` file not found at: {}", _0.user_display())]
MissingTopLevel(PathBuf), MissingTopLevel(PathBuf),
#[error("Multiple .dist-info directories found: {0}")]
MultipleDistInfo(String),
#[error(
"The .dist-info directory {0} does not consist of the normalized package name and version"
)]
MissingDistInfoSegments(String),
#[error("The .dist-info directory {0} does not start with the normalized package name: {1}")]
MissingDistInfoPackageName(String, String),
#[error("The .dist-info directory {0} does not start with the normalized version: {1}")]
MissingDistInfoVersion(String, String),
#[error("The .dist-info directory name contains invalid characters")]
InvalidDistInfoPrefix,
#[error("Invalid wheel size")] #[error("Invalid wheel size")]
InvalidSize, InvalidSize,
#[error("Invalid package name")] #[error("Invalid package name")]

View file

@ -10,7 +10,6 @@ workspace = true
cache-key = { workspace = true } cache-key = { workspace = true }
distribution-filename = { workspace = true } distribution-filename = { workspace = true }
distribution-types = { workspace = true } distribution-types = { workspace = true }
install-wheel-rs = { workspace = true }
pep440_rs = { workspace = true } pep440_rs = { workspace = true }
pep508_rs = { workspace = true } pep508_rs = { workspace = true }
platform-tags = { workspace = true } platform-tags = { workspace = true }
@ -19,6 +18,7 @@ uv-auth = { workspace = true }
uv-cache = { workspace = true } uv-cache = { workspace = true }
uv-configuration = { workspace = true } uv-configuration = { workspace = true }
uv-fs = { workspace = true, features = ["tokio"] } uv-fs = { workspace = true, features = ["tokio"] }
uv-metadata = { workspace = true }
uv-normalize = { workspace = true } uv-normalize = { workspace = true }
uv-version = { workspace = true } uv-version = { workspace = true }
uv-warnings = { workspace = true } uv-warnings = { workspace = true }
@ -26,7 +26,7 @@ uv-warnings = { workspace = true }
anyhow = { workspace = true } anyhow = { workspace = true }
async-trait = { workspace = true } async-trait = { workspace = true }
async_http_range_reader = { workspace = true } async_http_range_reader = { workspace = true }
async_zip = { workspace = true, features = ["tokio"] } async_zip = { workspace = true }
fs-err = { workspace = true, features = ["tokio"] } fs-err = { workspace = true, features = ["tokio"] }
futures = { workspace = true } futures = { workspace = true }
html-escape = { workspace = true } html-escape = { workspace = true }

View file

@ -148,8 +148,8 @@ pub enum ErrorKind {
#[error("Expected an index URL, but received non-base URL: {0}")] #[error("Expected an index URL, but received non-base URL: {0}")]
CannotBeABase(Url), CannotBeABase(Url),
#[error(transparent)] #[error("Failed to read metadata: `{0}`")]
DistInfo(#[from] install_wheel_rs::Error), Metadata(String, #[source] uv_metadata::Error),
#[error("{0} isn't available locally, but making network requests to registries was banned")] #[error("{0} isn't available locally, but making network requests to registries was banned")]
NoIndex(String), NoIndex(String),

View file

@ -9,8 +9,6 @@ use http::HeaderMap;
use reqwest::{Client, Response, StatusCode}; use reqwest::{Client, Response, StatusCode};
use reqwest_middleware::ClientWithMiddleware; use reqwest_middleware::ClientWithMiddleware;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use tokio::io::AsyncReadExt;
use tokio_util::compat::{FuturesAsyncReadCompatExt, TokioAsyncReadCompatExt};
use tracing::{info_span, instrument, trace, warn, Instrument}; use tracing::{info_span, instrument, trace, warn, Instrument};
use url::Url; use url::Url;
@ -18,7 +16,6 @@ use distribution_filename::{DistFilename, SourceDistFilename, WheelFilename};
use distribution_types::{ use distribution_types::{
BuiltDist, File, FileLocation, IndexCapabilities, IndexUrl, IndexUrls, Name, BuiltDist, File, FileLocation, IndexCapabilities, IndexUrl, IndexUrls, Name,
}; };
use install_wheel_rs::metadata::{find_archive_dist_info, is_metadata_entry};
use pep440_rs::Version; use pep440_rs::Version;
use pep508_rs::MarkerEnvironment; use pep508_rs::MarkerEnvironment;
use platform_tags::Platform; use platform_tags::Platform;
@ -26,6 +23,7 @@ use pypi_types::{Metadata23, SimpleJson};
use uv_cache::{Cache, CacheBucket, CacheEntry, WheelCache}; use uv_cache::{Cache, CacheBucket, CacheEntry, WheelCache};
use uv_configuration::KeyringProviderType; use uv_configuration::KeyringProviderType;
use uv_configuration::{IndexStrategy, TrustedHost}; use uv_configuration::{IndexStrategy, TrustedHost};
use uv_metadata::{read_metadata_async_seek, read_metadata_async_stream};
use uv_normalize::PackageName; use uv_normalize::PackageName;
use crate::base_client::BaseClientBuilder; use crate::base_client::BaseClientBuilder;
@ -452,8 +450,18 @@ impl RegistryClient {
.await .await
.map_err(ErrorKind::Io)?; .map_err(ErrorKind::Io)?;
let reader = tokio::io::BufReader::new(file); let reader = tokio::io::BufReader::new(file);
read_metadata_async_seek(&wheel.filename, built_dist.to_string(), reader) let contents = read_metadata_async_seek(&wheel.filename, reader)
.await? .await
.map_err(|err| {
ErrorKind::Metadata(path.to_string_lossy().to_string(), err)
})?;
Metadata23::parse_metadata(&contents).map_err(|err| {
ErrorKind::MetadataParseError(
wheel.filename.clone(),
built_dist.to_string(),
Box::new(err),
)
})?
} }
WheelLocation::Url(url) => { WheelLocation::Url(url) => {
self.wheel_metadata_registry(&wheel.index, &wheel.file, &url, capabilities) self.wheel_metadata_registry(&wheel.index, &wheel.file, &url, capabilities)
@ -476,7 +484,18 @@ impl RegistryClient {
.await .await
.map_err(ErrorKind::Io)?; .map_err(ErrorKind::Io)?;
let reader = tokio::io::BufReader::new(file); let reader = tokio::io::BufReader::new(file);
read_metadata_async_seek(&wheel.filename, built_dist.to_string(), reader).await? let contents = read_metadata_async_seek(&wheel.filename, reader)
.await
.map_err(|err| {
ErrorKind::Metadata(wheel.install_path.to_string_lossy().to_string(), err)
})?;
Metadata23::parse_metadata(&contents).map_err(|err| {
ErrorKind::MetadataParseError(
wheel.filename.clone(),
built_dist.to_string(),
Box::new(err),
)
})?
} }
}; };
@ -609,7 +628,7 @@ impl RegistryClient {
.await .await
.map_err(ErrorKind::AsyncHttpRangeReader)?; .map_err(ErrorKind::AsyncHttpRangeReader)?;
trace!("Getting metadata for {filename} by range request"); trace!("Getting metadata for {filename} by range request");
let text = wheel_metadata_from_remote_zip(filename, &mut reader).await?; let text = wheel_metadata_from_remote_zip(filename, url, &mut reader).await?;
let metadata = Metadata23::parse_metadata(text.as_bytes()).map_err(|err| { let metadata = Metadata23::parse_metadata(text.as_bytes()).map_err(|err| {
Error::from(ErrorKind::MetadataParseError( Error::from(ErrorKind::MetadataParseError(
filename.clone(), filename.clone(),
@ -675,7 +694,9 @@ impl RegistryClient {
.map_err(|err| self.handle_response_errors(err)) .map_err(|err| self.handle_response_errors(err))
.into_async_read(); .into_async_read();
read_metadata_async_stream(filename, url.to_string(), reader).await read_metadata_async_stream(filename, url.as_ref(), reader)
.await
.map_err(|err| ErrorKind::Metadata(url.to_string(), err))
} }
.instrument(info_span!("read_metadata_stream", wheel = %filename)) .instrument(info_span!("read_metadata_stream", wheel = %filename))
}; };
@ -701,88 +722,6 @@ impl RegistryClient {
} }
} }
/// Read a wheel's `METADATA` file from a zip file.
async fn read_metadata_async_seek(
filename: &WheelFilename,
debug_source: String,
reader: impl tokio::io::AsyncRead + tokio::io::AsyncSeek + Unpin,
) -> Result<Metadata23, Error> {
let reader = futures::io::BufReader::new(reader.compat());
let mut zip_reader = async_zip::base::read::seek::ZipFileReader::new(reader)
.await
.map_err(|err| ErrorKind::Zip(filename.clone(), err))?;
let (metadata_idx, _dist_info_prefix) = find_archive_dist_info(
filename,
zip_reader
.file()
.entries()
.iter()
.enumerate()
.filter_map(|(index, entry)| Some((index, entry.filename().as_str().ok()?))),
)
.map_err(ErrorKind::DistInfo)?;
// Read the contents of the `METADATA` file.
let mut contents = Vec::new();
zip_reader
.reader_with_entry(metadata_idx)
.await
.map_err(|err| ErrorKind::Zip(filename.clone(), err))?
.read_to_end_checked(&mut contents)
.await
.map_err(|err| ErrorKind::Zip(filename.clone(), err))?;
let metadata = Metadata23::parse_metadata(&contents).map_err(|err| {
ErrorKind::MetadataParseError(filename.clone(), debug_source, Box::new(err))
})?;
Ok(metadata)
}
/// Like [`read_metadata_async_seek`], but doesn't use seek.
async fn read_metadata_async_stream<R: futures::AsyncRead + Unpin>(
filename: &WheelFilename,
debug_source: String,
reader: R,
) -> Result<Metadata23, Error> {
let reader = futures::io::BufReader::with_capacity(128 * 1024, reader);
let mut zip = async_zip::base::read::stream::ZipFileReader::new(reader);
while let Some(mut entry) = zip
.next_with_entry()
.await
.map_err(|err| ErrorKind::Zip(filename.clone(), err))?
{
// Find the `METADATA` entry.
let path = entry
.reader()
.entry()
.filename()
.as_str()
.map_err(|err| ErrorKind::Zip(filename.clone(), err))?;
if is_metadata_entry(path, filename).map_err(ErrorKind::DistInfo)? {
let mut reader = entry.reader_mut().compat();
let mut contents = Vec::new();
reader.read_to_end(&mut contents).await.unwrap();
let metadata = Metadata23::parse_metadata(&contents).map_err(|err| {
ErrorKind::MetadataParseError(filename.clone(), debug_source, Box::new(err))
})?;
return Ok(metadata);
}
// Close current file to get access to the next one. See docs:
// https://docs.rs/async_zip/0.0.16/async_zip/base/read/stream/
zip = entry
.skip()
.await
.map_err(|err| ErrorKind::Zip(filename.clone(), err))?;
}
Err(ErrorKind::MetadataNotFound(filename.clone(), debug_source).into())
}
#[derive( #[derive(
Default, Debug, Serialize, Deserialize, rkyv::Archive, rkyv::Deserialize, rkyv::Serialize, Default, Debug, Serialize, Deserialize, rkyv::Archive, rkyv::Deserialize, rkyv::Serialize,
)] )]

View file

@ -1,11 +1,10 @@
use crate::{Error, ErrorKind};
use async_http_range_reader::AsyncHttpRangeReader; use async_http_range_reader::AsyncHttpRangeReader;
use distribution_filename::WheelFilename;
use futures::io::BufReader; use futures::io::BufReader;
use tokio_util::compat::TokioAsyncReadCompatExt; use tokio_util::compat::TokioAsyncReadCompatExt;
use url::Url;
use distribution_filename::WheelFilename; use uv_metadata::find_archive_dist_info;
use install_wheel_rs::metadata::find_archive_dist_info;
use crate::{Error, ErrorKind};
/// Read the `.dist-info/METADATA` file from a async remote zip reader, so we avoid downloading the /// Read the `.dist-info/METADATA` file from a async remote zip reader, so we avoid downloading the
/// entire wheel just for the one file. /// entire wheel just for the one file.
@ -50,6 +49,7 @@ use crate::{Error, ErrorKind};
/// rest of the crate. /// rest of the crate.
pub(crate) async fn wheel_metadata_from_remote_zip( pub(crate) async fn wheel_metadata_from_remote_zip(
filename: &WheelFilename, filename: &WheelFilename,
debug_name: &Url,
reader: &mut AsyncHttpRangeReader, reader: &mut AsyncHttpRangeReader,
) -> Result<String, Error> { ) -> Result<String, Error> {
// Make sure we have the back part of the stream. // Make sure we have the back part of the stream.
@ -75,7 +75,7 @@ pub(crate) async fn wheel_metadata_from_remote_zip(
.enumerate() .enumerate()
.filter_map(|(idx, e)| Some(((idx, e), e.filename().as_str().ok()?))), .filter_map(|(idx, e)| Some(((idx, e), e.filename().as_str().ok()?))),
) )
.map_err(ErrorKind::DistInfo)?; .map_err(|err| ErrorKind::Metadata(debug_name.to_string(), err))?;
let offset = metadata_entry.header_offset(); let offset = metadata_entry.header_offset();
let size = metadata_entry.compressed_size() let size = metadata_entry.compressed_size()

View file

@ -16,7 +16,6 @@ workspace = true
cache-key = { workspace = true } cache-key = { workspace = true }
distribution-filename = { workspace = true } distribution-filename = { workspace = true }
distribution-types = { workspace = true } distribution-types = { workspace = true }
install-wheel-rs = { workspace = true }
pep440_rs = { workspace = true } pep440_rs = { workspace = true }
pep508_rs = { workspace = true } pep508_rs = { workspace = true }
platform-tags = { workspace = true } platform-tags = { workspace = true }
@ -28,6 +27,7 @@ uv-configuration = { workspace = true }
uv-extract = { workspace = true } uv-extract = { workspace = true }
uv-fs = { workspace = true, features = ["tokio"] } uv-fs = { workspace = true, features = ["tokio"] }
uv-git = { workspace = true } uv-git = { workspace = true }
uv-metadata = { workspace = true }
uv-normalize = { workspace = true } uv-normalize = { workspace = true }
uv-types = { workspace = true } uv-types = { workspace = true }
uv-warnings = { workspace = true } uv-warnings = { workspace = true }

View file

@ -4,6 +4,8 @@ use crate::Error;
use distribution_filename::WheelFilename; use distribution_filename::WheelFilename;
use distribution_types::{CachedDist, Dist, Hashed}; use distribution_types::{CachedDist, Dist, Hashed};
use pypi_types::{HashDigest, Metadata23}; use pypi_types::{HashDigest, Metadata23};
use uv_metadata::read_flat_wheel_metadata;
use uv_cache_info::CacheInfo; use uv_cache_info::CacheInfo;
/// A locally available wheel. /// A locally available wheel.
@ -41,6 +43,7 @@ impl LocalWheel {
/// Read the [`Metadata23`] from a wheel. /// Read the [`Metadata23`] from a wheel.
pub fn metadata(&self) -> Result<Metadata23, Error> { pub fn metadata(&self) -> Result<Metadata23, Error> {
read_flat_wheel_metadata(&self.filename, &self.archive) read_flat_wheel_metadata(&self.filename, &self.archive)
.map_err(|err| Error::WheelMetadata(self.archive.clone(), Box::new(err)))
} }
} }
@ -68,13 +71,3 @@ impl std::fmt::Display for LocalWheel {
write!(f, "{}", self.remote()) write!(f, "{}", self.remote())
} }
} }
/// Read the [`Metadata23`] from an unzipped wheel.
fn read_flat_wheel_metadata(
filename: &WheelFilename,
wheel: impl AsRef<Path>,
) -> Result<Metadata23, Error> {
let dist_info = install_wheel_rs::metadata::find_flat_dist_info(filename, &wheel)?;
let metadata = install_wheel_rs::metadata::read_dist_info_metadata(&dist_info, &wheel)?;
Ok(Metadata23::parse_metadata(&metadata)?)
}

View file

@ -63,8 +63,8 @@ pub enum Error {
VersionMismatch { given: Version, metadata: Version }, VersionMismatch { given: Version, metadata: Version },
#[error("Failed to parse metadata from built wheel")] #[error("Failed to parse metadata from built wheel")]
Metadata(#[from] pypi_types::MetadataError), Metadata(#[from] pypi_types::MetadataError),
#[error("Failed to read `dist-info` metadata from built wheel")] #[error("Failed to read metadata: `{}`", _0.user_display())]
DistInfo(#[from] install_wheel_rs::Error), WheelMetadata(PathBuf, #[source] Box<uv_metadata::Error>),
#[error("Failed to read zip archive from built wheel")] #[error("Failed to read zip archive from built wheel")]
Zip(#[from] ZipError), Zip(#[from] ZipError),
#[error("Source distribution directory contains neither readable `pyproject.toml` nor `setup.py`: `{}`", _0.user_display())] #[error("Source distribution directory contains neither readable `pyproject.toml` nor `setup.py`: `{}`", _0.user_display())]

View file

@ -19,7 +19,6 @@ use distribution_types::{
}; };
use fs_err::tokio as fs; use fs_err::tokio as fs;
use futures::{FutureExt, TryStreamExt}; use futures::{FutureExt, TryStreamExt};
use install_wheel_rs::metadata::read_archive_metadata;
use platform_tags::Tags; use platform_tags::Tags;
use pypi_types::{HashDigest, Metadata12, Metadata23, RequiresTxt}; use pypi_types::{HashDigest, Metadata12, Metadata23, RequiresTxt};
use reqwest::Response; use reqwest::Response;
@ -34,6 +33,7 @@ use uv_client::{
use uv_configuration::{BuildKind, BuildOutput}; use uv_configuration::{BuildKind, BuildOutput};
use uv_extract::hash::Hasher; use uv_extract::hash::Hasher;
use uv_fs::{rename_with_retry, write_atomic, LockedFile}; use uv_fs::{rename_with_retry, write_atomic, LockedFile};
use uv_metadata::read_archive_metadata;
use uv_types::{BuildContext, SourceBuildTrait}; use uv_types::{BuildContext, SourceBuildTrait};
use zip::ZipArchive; use zip::ZipArchive;
@ -1508,7 +1508,7 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
// Read the metadata from the wheel. // Read the metadata from the wheel.
let filename = WheelFilename::from_str(&disk_filename)?; let filename = WheelFilename::from_str(&disk_filename)?;
let metadata = read_wheel_metadata(&filename, cache_shard.join(&disk_filename))?; let metadata = read_wheel_metadata(&filename, &cache_shard.join(&disk_filename))?;
// Validate the metadata. // Validate the metadata.
validate(source, &metadata)?; validate(source, &metadata)?;
@ -2019,14 +2019,12 @@ async fn read_cached_metadata(cache_entry: &CacheEntry) -> Result<Option<Metadat
} }
/// Read the [`Metadata23`] from a built wheel. /// Read the [`Metadata23`] from a built wheel.
fn read_wheel_metadata( fn read_wheel_metadata(filename: &WheelFilename, wheel: &Path) -> Result<Metadata23, Error> {
filename: &WheelFilename,
wheel: impl Into<PathBuf>,
) -> Result<Metadata23, Error> {
let file = fs_err::File::open(wheel).map_err(Error::CacheRead)?; let file = fs_err::File::open(wheel).map_err(Error::CacheRead)?;
let reader = std::io::BufReader::new(file); let reader = std::io::BufReader::new(file);
let mut archive = ZipArchive::new(reader)?; let mut archive = ZipArchive::new(reader)?;
let dist_info = read_archive_metadata(filename, &mut archive)?; let dist_info = read_archive_metadata(filename, &mut archive)
.map_err(|err| Error::WheelMetadata(wheel.to_path_buf(), Box::new(err)))?;
Ok(Metadata23::parse_metadata(&dist_info)?) Ok(Metadata23::parse_metadata(&dist_info)?)
} }

View file

@ -17,7 +17,7 @@ distribution-filename = { workspace = true }
pypi-types = { workspace = true } pypi-types = { workspace = true }
async-compression = { workspace = true, features = ["bzip2", "gzip", "zstd", "xz"] } async-compression = { workspace = true, features = ["bzip2", "gzip", "zstd", "xz"] }
async_zip = { workspace = true, features = ["tokio"] } async_zip = { workspace = true }
fs-err = { workspace = true, features = ["tokio"] } fs-err = { workspace = true, features = ["tokio"] }
futures = { workspace = true } futures = { workspace = true }
md-5 = { workspace = true } md-5 = { workspace = true }

View file

@ -0,0 +1,28 @@
[package]
name = "uv-metadata"
version = "0.1.0"
edition.workspace = true
rust-version.workspace = true
homepage.workspace = true
documentation.workspace = true
repository.workspace = true
authors.workspace = true
license.workspace = true
[dependencies]
distribution-filename = { workspace = true }
pep440_rs = { workspace = true }
pypi-types = { workspace = true }
uv-normalize = { workspace = true }
async_zip = { workspace = true }
fs-err = { workspace = true }
futures = { workspace = true }
thiserror = { workspace = true }
tokio = { workspace = true }
tokio-util = { workspace = true }
tracing = { workspace = true }
zip = { workspace = true }
[lints]
workspace = true

View file

@ -1,15 +1,51 @@
use std::io::{Read, Seek}; //! Read metadata from wheels and source distributions.
use std::path::Path; //!
use std::str::FromStr; //! This module reads all fields exhaustively. The fields are defined in the [Core metadata
//! specification](https://packaging.python.org/en/latest/specifications/core-metadata/).
use tracing::warn;
use zip::ZipArchive;
use distribution_filename::WheelFilename; use distribution_filename::WheelFilename;
use pep440_rs::Version; use pep440_rs::Version;
use uv_normalize::DistInfoName; use pypi_types::Metadata23;
use std::io;
use std::io::{Read, Seek};
use std::path::Path;
use std::str::FromStr;
use thiserror::Error;
use tokio::io::AsyncReadExt;
use tokio_util::compat::{FuturesAsyncReadCompatExt, TokioAsyncReadCompatExt};
use tracing::warn;
use uv_normalize::{DistInfoName, InvalidNameError};
use zip::ZipArchive;
use crate::Error; /// The caller is responsible for attaching the path or url we failed to read.
#[derive(Debug, Error)]
pub enum Error {
#[error("Failed to read `dist-info` metadata from built wheel")]
DistInfo,
#[error("No .dist-info directory found")]
MissingDistInfo,
#[error("Multiple .dist-info directories found: {0}")]
MultipleDistInfo(String),
#[error(
"The .dist-info directory does not consist of the normalized package name and version: `{0}`"
)]
MissingDistInfoSegments(String),
#[error("The .dist-info directory {0} does not start with the normalized package name: {1}")]
MissingDistInfoPackageName(String, String),
#[error("The .dist-info directory {0} does not start with the normalized version: {1}")]
MissingDistInfoVersion(String, String),
#[error("The .dist-info directory name contains invalid characters")]
InvalidName(#[from] InvalidNameError),
#[error("The metadata at {0} is invalid")]
InvalidMetadata(String, pypi_types::MetadataError),
#[error("Failed to read from zip file")]
Zip(#[from] zip::result::ZipError),
#[error("Failed to read from zip file")]
AsyncZip(#[from] async_zip::error::ZipError),
// No `#[from]` to enforce manual review of `io::Error` sources.
#[error(transparent)]
Io(io::Error),
}
/// Find the `.dist-info` directory in a zipped wheel. /// Find the `.dist-info` directory in a zipped wheel.
/// ///
@ -123,13 +159,11 @@ pub fn read_archive_metadata(
let dist_info_prefix = let dist_info_prefix =
find_archive_dist_info(filename, archive.file_names().map(|name| (name, name)))?.1; find_archive_dist_info(filename, archive.file_names().map(|name| (name, name)))?.1;
let mut file = archive let mut file = archive.by_name(&format!("{dist_info_prefix}.dist-info/METADATA"))?;
.by_name(&format!("{dist_info_prefix}.dist-info/METADATA"))
.map_err(|err| Error::Zip(filename.to_string(), err))?;
#[allow(clippy::cast_possible_truncation)] #[allow(clippy::cast_possible_truncation)]
let mut buffer = Vec::with_capacity(file.size() as usize); let mut buffer = Vec::with_capacity(file.size() as usize);
file.read_to_end(&mut buffer)?; file.read_to_end(&mut buffer).map_err(Error::Io)?;
Ok(buffer) Ok(buffer)
} }
@ -142,7 +176,9 @@ pub fn find_flat_dist_info(
path: impl AsRef<Path>, path: impl AsRef<Path>,
) -> Result<String, Error> { ) -> Result<String, Error> {
// Iterate over `path` to find the `.dist-info` directory. It should be at the top-level. // Iterate over `path` to find the `.dist-info` directory. It should be at the top-level.
let Some(dist_info_prefix) = fs_err::read_dir(path.as_ref())?.find_map(|entry| { let Some(dist_info_prefix) = fs_err::read_dir(path.as_ref())
.map_err(Error::Io)?
.find_map(|entry| {
let entry = entry.ok()?; let entry = entry.ok()?;
let file_type = entry.file_type().ok()?; let file_type = entry.file_type().ok()?;
if file_type.is_dir() { if file_type.is_dir() {
@ -158,10 +194,9 @@ pub fn find_flat_dist_info(
} else { } else {
None None
} }
}) else { })
return Err(Error::InvalidWheel( else {
"Missing .dist-info directory".to_string(), return Err(Error::MissingDistInfo);
));
}; };
// Like `pip`, validate that the `.dist-info` directory is prefixed with the canonical // Like `pip`, validate that the `.dist-info` directory is prefixed with the canonical
@ -199,16 +234,86 @@ pub fn read_dist_info_metadata(
let metadata_file = wheel let metadata_file = wheel
.as_ref() .as_ref()
.join(format!("{dist_info_prefix}.dist-info/METADATA")); .join(format!("{dist_info_prefix}.dist-info/METADATA"));
Ok(fs_err::read(metadata_file)?) fs_err::read(metadata_file).map_err(Error::Io)
}
/// Read a wheel's `METADATA` file from a zip file.
pub async fn read_metadata_async_seek(
filename: &WheelFilename,
reader: impl tokio::io::AsyncRead + tokio::io::AsyncSeek + Unpin,
) -> Result<Vec<u8>, Error> {
let reader = futures::io::BufReader::new(reader.compat());
let mut zip_reader = async_zip::base::read::seek::ZipFileReader::new(reader).await?;
let (metadata_idx, _dist_info_prefix) = find_archive_dist_info(
filename,
zip_reader
.file()
.entries()
.iter()
.enumerate()
.filter_map(|(index, entry)| Some((index, entry.filename().as_str().ok()?))),
)?;
// Read the contents of the `METADATA` file.
let mut contents = Vec::new();
zip_reader
.reader_with_entry(metadata_idx)
.await?
.read_to_end_checked(&mut contents)
.await?;
Ok(contents)
}
/// Like [`read_metadata_async_seek`], but doesn't use seek.
pub async fn read_metadata_async_stream<R: futures::AsyncRead + Unpin>(
filename: &WheelFilename,
debug_path: &str,
reader: R,
) -> Result<Metadata23, Error> {
let reader = futures::io::BufReader::with_capacity(128 * 1024, reader);
let mut zip = async_zip::base::read::stream::ZipFileReader::new(reader);
while let Some(mut entry) = zip.next_with_entry().await? {
// Find the `METADATA` entry.
let path = entry.reader().entry().filename().as_str()?;
if is_metadata_entry(path, filename)? {
let mut reader = entry.reader_mut().compat();
let mut contents = Vec::new();
reader.read_to_end(&mut contents).await.unwrap();
let metadata = Metadata23::parse_metadata(&contents)
.map_err(|err| Error::InvalidMetadata(debug_path.to_string(), err))?;
return Ok(metadata);
}
// Close current file to get access to the next one. See docs:
// https://docs.rs/async_zip/0.0.16/async_zip/base/read/stream/
zip = entry.skip().await?;
}
Err(Error::MissingDistInfo)
}
/// Read the [`Metadata23`] from an unzipped wheel.
pub fn read_flat_wheel_metadata(
filename: &WheelFilename,
wheel: impl AsRef<Path>,
) -> Result<Metadata23, Error> {
let dist_info_prefix = find_flat_dist_info(filename, &wheel)?;
let metadata = read_dist_info_metadata(&dist_info_prefix, &wheel)?;
Metadata23::parse_metadata(&metadata).map_err(|err| {
Error::InvalidMetadata(format!("{dist_info_prefix}.dist-info/METADATA"), err)
})
} }
#[cfg(test)] #[cfg(test)]
mod test { mod test {
use std::str::FromStr; use super::find_archive_dist_info;
use distribution_filename::WheelFilename; use distribution_filename::WheelFilename;
use std::str::FromStr;
use crate::metadata::find_archive_dist_info;
#[test] #[test]
fn test_dot_in_name() { fn test_dot_in_name() {

View file

@ -16,7 +16,6 @@ workspace = true
cache-key = { workspace = true } cache-key = { workspace = true }
distribution-filename = { workspace = true } distribution-filename = { workspace = true }
distribution-types = { workspace = true } distribution-types = { workspace = true }
install-wheel-rs = { workspace = true }
once-map = { workspace = true } once-map = { workspace = true }
pep440_rs = { workspace = true } pep440_rs = { workspace = true }
pep508_rs = { workspace = true } pep508_rs = { workspace = true }
@ -28,6 +27,7 @@ uv-configuration = { workspace = true }
uv-distribution = { workspace = true } uv-distribution = { workspace = true }
uv-fs = { workspace = true, features = ["serde"] } uv-fs = { workspace = true, features = ["serde"] }
uv-git = { workspace = true } uv-git = { workspace = true }
uv-metadata = { workspace = true }
uv-normalize = { workspace = true } uv-normalize = { workspace = true }
uv-pubgrub = { workspace = true } uv-pubgrub = { workspace = true }
uv-python = { workspace = true } uv-python = { workspace = true }

View file

@ -39,7 +39,7 @@ pub enum MetadataResponse {
/// The wheel metadata was found, but the metadata was inconsistent. /// The wheel metadata was found, but the metadata was inconsistent.
InconsistentMetadata(Box<uv_distribution::Error>), InconsistentMetadata(Box<uv_distribution::Error>),
/// The wheel has an invalid structure. /// The wheel has an invalid structure.
InvalidStructure(Box<install_wheel_rs::Error>), InvalidStructure(Box<uv_metadata::Error>),
/// The wheel metadata was not found in the cache and the network is not available. /// The wheel metadata was not found in the cache and the network is not available.
Offline, Offline,
} }
@ -184,7 +184,7 @@ impl<'a, Context: BuildContext> ResolverProvider for DefaultResolverProvider<'a,
uv_client::ErrorKind::MetadataParseError(_, _, err) => { uv_client::ErrorKind::MetadataParseError(_, _, err) => {
Ok(MetadataResponse::InvalidMetadata(err)) Ok(MetadataResponse::InvalidMetadata(err))
} }
uv_client::ErrorKind::DistInfo(err) => { uv_client::ErrorKind::Metadata(_, err) => {
Ok(MetadataResponse::InvalidStructure(Box::new(err))) Ok(MetadataResponse::InvalidStructure(Box::new(err)))
} }
kind => Err(uv_client::Error::from(kind).into()), kind => Err(uv_client::Error::from(kind).into()),
@ -198,8 +198,8 @@ impl<'a, Context: BuildContext> ResolverProvider for DefaultResolverProvider<'a,
uv_distribution::Error::Metadata(err) => { uv_distribution::Error::Metadata(err) => {
Ok(MetadataResponse::InvalidMetadata(Box::new(err))) Ok(MetadataResponse::InvalidMetadata(Box::new(err)))
} }
uv_distribution::Error::DistInfo(err) => { uv_distribution::Error::WheelMetadata(_, err) => {
Ok(MetadataResponse::InvalidStructure(Box::new(err))) Ok(MetadataResponse::InvalidStructure(err))
} }
err => Err(err), err => Err(err),
}, },

View file

@ -2615,13 +2615,15 @@ fn incompatible_wheel() -> Result<()> {
.arg("requirements.txt") .arg("requirements.txt")
.arg("--strict"), @r###" .arg("--strict"), @r###"
success: false success: false
exit_code: 2 exit_code: 1
----- stdout ----- ----- stdout -----
----- stderr ----- ----- stderr -----
error: Failed to read `foo @ file://[TEMP_DIR]/foo-1.2.3-not-compatible-wheel.whl` × No solution found when resolving dependencies:
Caused by: Failed to unzip wheel: foo-1.2.3-not-compatible-wheel.whl Because foo has an invalid package format and you require foo, we can conclude that your requirements are unsatisfiable.
Caused by: unable to locate the end of central directory record
hint: The structure of foo was invalid:
Failed to read from zip file
"### "###
); );