Backtrack on distributions with invalid metadata (#2834)

## Summary

Closes https://github.com/astral-sh/uv/issues/2821.
This commit is contained in:
Charlie Marsh 2024-04-05 18:00:48 -04:00 committed by GitHub
parent f0b0e1943c
commit 00934044aa
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
15 changed files with 229 additions and 73 deletions

1
Cargo.lock generated
View file

@ -4754,6 +4754,7 @@ dependencies = [
"futures",
"indexmap",
"insta",
"install-wheel-rs",
"itertools 0.12.1",
"once-map",
"once_cell",

View file

@ -132,7 +132,7 @@ pub enum ErrorKind {
/// Dist-info error
#[error(transparent)]
InstallWheel(#[from] install_wheel_rs::Error),
DistInfo(#[from] install_wheel_rs::Error),
#[error("{0} isn't available locally, but making network requests to registries was banned.")]
NoIndex(String),

View file

@ -632,7 +632,7 @@ async fn read_metadata_async_seek(
.enumerate()
.filter_map(|(index, entry)| Some((index, entry.filename().as_str().ok()?))),
)
.map_err(ErrorKind::InstallWheel)?;
.map_err(ErrorKind::DistInfo)?;
// Read the contents of the `METADATA` file.
let mut contents = Vec::new();

View file

@ -75,7 +75,7 @@ pub(crate) async fn wheel_metadata_from_remote_zip(
.enumerate()
.filter_map(|(idx, e)| Some(((idx, e), e.filename().as_str().ok()?))),
)
.map_err(ErrorKind::InstallWheel)?;
.map_err(ErrorKind::DistInfo)?;
let offset = metadata_entry.header_offset();
let size = metadata_entry.compressed_size()

View file

@ -11,7 +11,7 @@ use pep508_rs::{MarkerEnvironment, Requirement, VersionOrUrl};
use pypi_types::Metadata23;
use uv_client::RegistryClient;
use uv_distribution::{DistributionDatabase, Reporter};
use uv_resolver::InMemoryIndex;
use uv_resolver::{InMemoryIndex, MetadataResponse};
use uv_types::{BuildContext, Constraints, Overrides, RequestedRequirements};
/// A resolver for resolving lookahead requirements from direct URLs.
@ -134,7 +134,18 @@ impl<'a, Context: BuildContext + Send + Sync> LookaheadResolver<'a, Context> {
// Fetch the metadata for the distribution.
let requires_dist = {
let id = dist.package_id();
if let Some(metadata) = self.index.get_metadata(&id) {
if let Some(metadata) = self
.index
.get_metadata(&id)
.as_deref()
.and_then(|response| {
if let MetadataResponse::Found(metadata) = response {
Some(metadata)
} else {
None
}
})
{
// If the metadata is already in the index, return it.
metadata.requires_dist.clone()
} else {
@ -151,7 +162,8 @@ impl<'a, Context: BuildContext + Send + Sync> LookaheadResolver<'a, Context> {
let requires_dist = metadata.requires_dist.clone();
// Insert the metadata into the index.
self.index.insert_metadata(id, metadata);
self.index
.insert_metadata(id, MetadataResponse::Found(metadata));
requires_dist
}

View file

@ -1,5 +1,5 @@
use std::borrow::Cow;
use std::ops::Deref;
use std::path::{Path, PathBuf};
use anyhow::{Context, Result};
@ -11,7 +11,7 @@ use pep508_rs::Requirement;
use uv_client::RegistryClient;
use uv_distribution::{DistributionDatabase, Reporter};
use uv_fs::Simplified;
use uv_resolver::InMemoryIndex;
use uv_resolver::{InMemoryIndex, MetadataResponse};
use uv_types::BuildContext;
use crate::ExtrasSpecification;
@ -87,16 +87,28 @@ impl<'a, Context: BuildContext + Send + Sync> SourceTreeResolver<'a, Context> {
// Fetch the metadata for the distribution.
let metadata = {
let id = PackageId::from_url(source.url());
if let Some(metadata) = self.index.get_metadata(&id) {
if let Some(metadata) = self
.index
.get_metadata(&id)
.as_deref()
.and_then(|response| {
if let MetadataResponse::Found(metadata) = response {
Some(metadata)
} else {
None
}
})
{
// If the metadata is already in the index, return it.
metadata.deref().clone()
metadata.clone()
} else {
// Run the PEP 517 build process to extract metadata from the source distribution.
let source = BuildableSource::Url(source);
let metadata = self.database.build_wheel_metadata(&source).await?;
// Insert the metadata into the index.
self.index.insert_metadata(id, metadata.clone());
self.index
.insert_metadata(id, MetadataResponse::Found(metadata.clone()));
metadata
}

View file

@ -20,7 +20,7 @@ use pypi_types::Metadata10;
use uv_client::RegistryClient;
use uv_distribution::{DistributionDatabase, Reporter};
use uv_normalize::PackageName;
use uv_resolver::InMemoryIndex;
use uv_resolver::{InMemoryIndex, MetadataResponse};
use uv_types::BuildContext;
/// Like [`RequirementsSpecification`], but with concrete names for all requirements.
@ -236,7 +236,13 @@ impl<'a, Context: BuildContext + Send + Sync> NamedRequirementsResolver<'a, Cont
// Fetch the metadata for the distribution.
let name = {
let id = PackageId::from_url(source.url());
if let Some(metadata) = index.get_metadata(&id) {
if let Some(metadata) = index.get_metadata(&id).as_deref().and_then(|response| {
if let MetadataResponse::Found(metadata) = response {
Some(metadata)
} else {
None
}
}) {
// If the metadata is already in the index, return it.
metadata.name.clone()
} else {
@ -247,7 +253,7 @@ impl<'a, Context: BuildContext + Send + Sync> NamedRequirementsResolver<'a, Cont
let name = metadata.name.clone();
// Insert the metadata into the index.
index.insert_metadata(id, metadata);
index.insert_metadata(id, MetadataResponse::Found(metadata));
name
}

View file

@ -16,6 +16,7 @@ workspace = true
cache-key = { workspace = true }
distribution-filename = { workspace = true, features = ["serde"] }
distribution-types = { workspace = true }
install-wheel-rs = { workspace = true }
once-map = { workspace = true }
pep440_rs = { workspace = true }
pep508_rs = { workspace = true }

View file

@ -9,7 +9,7 @@ pub use python_requirement::PythonRequirement;
pub use resolution::{AnnotationStyle, Diagnostic, DisplayResolutionGraph, ResolutionGraph};
pub use resolution_mode::ResolutionMode;
pub use resolver::{
BuildId, DefaultResolverProvider, InMemoryIndex, PackageVersionsResult,
BuildId, DefaultResolverProvider, InMemoryIndex, MetadataResponse, PackageVersionsResult,
Reporter as ResolverReporter, Resolver, ResolverProvider, VersionsResponse,
WheelMetadataResult,
};

View file

@ -18,7 +18,7 @@ use distribution_types::{
use once_map::OnceMap;
use pep440_rs::Version;
use pep508_rs::MarkerEnvironment;
use pypi_types::{Hashes, Metadata23};
use pypi_types::Hashes;
use uv_distribution::to_precise;
use uv_normalize::{ExtraName, PackageName};
@ -28,7 +28,7 @@ use crate::pins::FilePins;
use crate::preferences::Preferences;
use crate::pubgrub::{PubGrubDistribution, PubGrubPackage};
use crate::redirect::apply_redirect;
use crate::resolver::{InMemoryIndex, VersionsResponse};
use crate::resolver::{InMemoryIndex, MetadataResponse, VersionsResponse};
use crate::{Manifest, ResolveError};
/// Indicate the style of annotation comments, used to indicate the dependencies that requested each
@ -66,7 +66,7 @@ impl ResolutionGraph {
selection: &SelectedDependencies<UvDependencyProvider>,
pins: &FilePins,
packages: &OnceMap<PackageName, VersionsResponse>,
distributions: &OnceMap<PackageId, Metadata23>,
distributions: &OnceMap<PackageId, MetadataResponse>,
state: &State<UvDependencyProvider>,
preferences: &Preferences,
editables: Editables,
@ -164,13 +164,20 @@ impl ResolutionGraph {
});
}
} else {
let metadata = distributions.get(&dist.package_id()).unwrap_or_else(|| {
let response = distributions.get(&dist.package_id()).unwrap_or_else(|| {
panic!(
"Every package should have metadata: {:?}",
dist.package_id()
)
});
let MetadataResponse::Found(metadata) = &*response else {
panic!(
"Every package should have metadata: {:?}",
dist.package_id()
)
};
if metadata.provides_extras.contains(extra) {
extras
.entry(package_name.clone())
@ -211,13 +218,20 @@ impl ResolutionGraph {
});
}
} else {
let metadata = distributions.get(&dist.package_id()).unwrap_or_else(|| {
let response = distributions.get(&dist.package_id()).unwrap_or_else(|| {
panic!(
"Every package should have metadata: {:?}",
dist.package_id()
)
});
let MetadataResponse::Found(metadata) = &*response else {
panic!(
"Every package should have metadata: {:?}",
dist.package_id()
)
};
if metadata.provides_extras.contains(extra) {
extras
.entry(package_name.clone())
@ -417,10 +431,16 @@ impl ResolutionGraph {
}
VersionOrUrl::Url(verbatim_url) => PackageId::from_url(verbatim_url.raw()),
};
let md = index
let res = index
.distributions
.get(&package_id)
.expect("every package in resolution graph has metadata");
let MetadataResponse::Found(md) = &*res else {
panic!(
"Every package should have metadata: {:?}",
dist.package_id()
)
};
for req in manifest.apply(&md.requires_dist) {
let Some(ref marker_tree) = req.marker else {
continue;

View file

@ -2,10 +2,9 @@ use std::sync::Arc;
use distribution_types::PackageId;
use once_map::OnceMap;
use pypi_types::Metadata23;
use uv_normalize::PackageName;
use crate::resolver::provider::VersionsResponse;
use crate::resolver::provider::{MetadataResponse, VersionsResponse};
/// In-memory index of package metadata.
#[derive(Default)]
@ -15,18 +14,18 @@ pub struct InMemoryIndex {
pub(crate) packages: OnceMap<PackageName, VersionsResponse>,
/// A map from package ID to metadata for that distribution.
pub(crate) distributions: OnceMap<PackageId, Metadata23>,
pub(crate) distributions: OnceMap<PackageId, MetadataResponse>,
}
impl InMemoryIndex {
/// Insert a [`VersionsResponse`] into the index.
pub fn insert_package(&self, package_name: PackageName, metadata: VersionsResponse) {
self.packages.done(package_name, metadata);
pub fn insert_package(&self, package_name: PackageName, response: VersionsResponse) {
self.packages.done(package_name, response);
}
/// Insert a [`Metadata23`] into the index.
pub fn insert_metadata(&self, package_id: PackageId, metadata: Metadata23) {
self.distributions.done(package_id, metadata);
pub fn insert_metadata(&self, package_id: PackageId, response: MetadataResponse) {
self.distributions.done(package_id, response);
}
/// Get the [`VersionsResponse`] for a given package name, without waiting.
@ -34,8 +33,8 @@ impl InMemoryIndex {
self.packages.get(package_name)
}
/// Get the [`Metadata23`] for a given package ID, without waiting.
pub fn get_metadata(&self, package_id: &PackageId) -> Option<Arc<Metadata23>> {
/// Get the [`MetadataResponse`] for a given package ID, without waiting.
pub fn get_metadata(&self, package_id: &PackageId) -> Option<Arc<MetadataResponse>> {
self.distributions.get(package_id)
}
}

View file

@ -14,7 +14,7 @@ use pubgrub::range::Range;
use pubgrub::solver::{Incompatibility, State};
use rustc_hash::{FxHashMap, FxHashSet};
use tokio_stream::wrappers::ReceiverStream;
use tracing::{debug, info_span, instrument, trace, Instrument};
use tracing::{debug, info_span, instrument, trace, warn, Instrument};
use distribution_types::{
BuiltDist, Dist, DistributionMetadata, IncompatibleDist, IncompatibleSource, IncompatibleWheel,
@ -46,8 +46,8 @@ use crate::python_requirement::PythonRequirement;
use crate::resolution::ResolutionGraph;
pub use crate::resolver::index::InMemoryIndex;
pub use crate::resolver::provider::{
DefaultResolverProvider, PackageVersionsResult, ResolverProvider, VersionsResponse,
WheelMetadataResult,
DefaultResolverProvider, MetadataResponse, PackageVersionsResult, ResolverProvider,
VersionsResponse, WheelMetadataResult,
};
use crate::resolver::reporter::Facade;
pub use crate::resolver::reporter::{BuildId, Reporter};
@ -77,6 +77,10 @@ pub(crate) enum UnavailablePackage {
Offline,
/// The package was not found in the registry
NotFound,
/// The wheel metadata was found, but could not be parsed.
InvalidMetadata,
/// The wheel has an invalid structure.
InvalidStructure,
}
enum ResolverVersion {
@ -342,6 +346,12 @@ impl<
UnavailablePackage::NotFound => {
"was not found in the package registry"
}
UnavailablePackage::InvalidMetadata => {
"was found, but the metadata could not be parsed"
}
UnavailablePackage::InvalidStructure => {
"was found, but has an invalid format"
}
})
} else {
None
@ -590,12 +600,33 @@ impl<
}
let dist = PubGrubDistribution::from_url(package_name, url);
let metadata = self
let response = self
.index
.distributions
.wait(&dist.package_id())
.await
.ok_or(ResolveError::Unregistered)?;
// If we failed to fetch the metadata for a URL, we can't proceed.
let metadata = match &*response {
MetadataResponse::Found(metadata) => metadata,
MetadataResponse::Offline => {
self.unavailable_packages
.insert(package_name.clone(), UnavailablePackage::Offline);
return Ok(None);
}
MetadataResponse::InvalidMetadata(_) => {
self.unavailable_packages
.insert(package_name.clone(), UnavailablePackage::InvalidMetadata);
return Ok(None);
}
MetadataResponse::InvalidStructure(_) => {
self.unavailable_packages
.insert(package_name.clone(), UnavailablePackage::InvalidStructure);
return Ok(None);
}
};
let version = &metadata.version;
// The version is incompatible with the requirement.
@ -711,7 +742,6 @@ impl<
let version = candidate.version().clone();
// Emit a request to fetch the metadata for this version.
if self.index.distributions.register(candidate.package_id()) {
let request = match dist.for_resolution() {
ResolvedDistRef::Installable(dist) => Request::Dist(dist.clone()),
@ -866,7 +896,7 @@ impl<
}
// Wait for the metadata to be available.
let metadata = self
let response = self
.index
.distributions
.wait(&package_id)
@ -874,6 +904,26 @@ impl<
.await
.ok_or(ResolveError::Unregistered)?;
let metadata = match *response {
MetadataResponse::Found(ref metadata) => metadata,
MetadataResponse::Offline => {
return Ok(Dependencies::Unavailable(
"network connectivity is disabled, but the metadata wasn't found in the cache"
.to_string(),
));
}
MetadataResponse::InvalidMetadata(_) => {
return Ok(Dependencies::Unavailable(
"the package metadata could not be parsed".to_string(),
));
}
MetadataResponse::InvalidStructure(_) => {
return Ok(Dependencies::Unavailable(
"the package has an invalid format".to_string(),
));
}
};
let mut constraints = PubGrubDependencies::from_requirements(
&metadata.requires_dist,
&self.constraints,
@ -923,23 +973,41 @@ impl<
}
Some(Response::Installed { dist, metadata }) => {
trace!("Received installed distribution metadata for: {dist}");
self.index.distributions.done(dist.package_id(), metadata);
self.index
.distributions
.done(dist.package_id(), MetadataResponse::Found(metadata));
}
Some(Response::Dist {
dist: Dist::Built(dist),
metadata,
}) => {
trace!("Received built distribution metadata for: {dist}");
match &metadata {
MetadataResponse::InvalidMetadata(err) => {
warn!("Unable to extract metadata for {dist}: {err}");
}
MetadataResponse::InvalidStructure(err) => {
warn!("Unable to extract metadata for {dist}: {err}");
}
_ => {}
}
self.index.distributions.done(dist.package_id(), metadata);
}
Some(Response::Dist {
dist: Dist::Source(distribution),
dist: Dist::Source(dist),
metadata,
}) => {
trace!("Received source distribution metadata for: {distribution}");
self.index
.distributions
.done(distribution.package_id(), metadata);
trace!("Received source distribution metadata for: {dist}");
match &metadata {
MetadataResponse::InvalidMetadata(err) => {
warn!("Unable to extract metadata for {dist}: {err}");
}
MetadataResponse::InvalidStructure(err) => {
warn!("Unable to extract metadata for {dist}: {err}");
}
_ => {}
}
self.index.distributions.done(dist.package_id(), metadata);
}
None => {}
}
@ -1147,7 +1215,10 @@ enum Response {
/// The returned metadata for a package hosted on a registry.
Package(PackageName, VersionsResponse),
/// The returned metadata for a distribution.
Dist { dist: Dist, metadata: Metadata23 },
Dist {
dist: Dist,
metadata: MetadataResponse,
},
/// The returned metadata for an already-installed distribution.
Installed {
dist: InstalledDist,

View file

@ -16,7 +16,7 @@ use crate::version_map::VersionMap;
use crate::yanks::AllowedYanks;
pub type PackageVersionsResult = Result<VersionsResponse, uv_client::Error>;
pub type WheelMetadataResult = Result<Metadata23, uv_distribution::Error>;
pub type WheelMetadataResult = Result<MetadataResponse, uv_distribution::Error>;
/// The response when requesting versions for a package
#[derive(Debug)]
@ -31,6 +31,18 @@ pub enum VersionsResponse {
Offline,
}
#[derive(Debug)]
pub enum MetadataResponse {
/// The wheel metadata was found and parsed successfully.
Found(Metadata23),
/// The wheel metadata was found, but could not be parsed.
InvalidMetadata(Box<pypi_types::MetadataError>),
/// The wheel has an invalid structure.
InvalidStructure(Box<install_wheel_rs::Error>),
/// The wheel metadata was not found in the cache and the network is not available.
Offline,
}
pub trait ResolverProvider: Send + Sync {
/// Get the version map for a package.
fn get_package_versions<'io>(
@ -108,11 +120,7 @@ impl<'a, Context: BuildContext + Send + Sync> ResolverProvider
&'io self,
package_name: &'io PackageName,
) -> PackageVersionsResult {
let result = self.client.simple(package_name).await;
// If the "Simple API" request was successful, convert to `VersionMap` on the Tokio
// threadpool, since it can be slow.
match result {
match self.client.simple(package_name).await {
Ok(results) => Ok(VersionsResponse::Found(
results
.into_iter()
@ -161,8 +169,30 @@ impl<'a, Context: BuildContext + Send + Sync> ResolverProvider
}
}
/// Fetch the metadata for a distribution, building it if necessary.
async fn get_or_build_wheel_metadata<'io>(&'io self, dist: &'io Dist) -> WheelMetadataResult {
self.fetcher.get_or_build_wheel_metadata(dist).await
match self.fetcher.get_or_build_wheel_metadata(dist).await {
Ok(metadata) => Ok(MetadataResponse::Found(metadata)),
Err(err) => match err {
uv_distribution::Error::Client(client) => match client.into_kind() {
uv_client::ErrorKind::Offline(_) => Ok(MetadataResponse::Offline),
uv_client::ErrorKind::MetadataParseError(_, _, err) => {
Ok(MetadataResponse::InvalidMetadata(err))
}
uv_client::ErrorKind::DistInfo(err) => {
Ok(MetadataResponse::InvalidStructure(Box::new(err)))
}
kind => Err(uv_client::Error::from(kind).into()),
},
uv_distribution::Error::Metadata(err) => {
Ok(MetadataResponse::InvalidMetadata(Box::new(err)))
}
uv_distribution::Error::DistInfo(err) => {
Ok(MetadataResponse::InvalidStructure(Box::new(err)))
}
err => Err(err),
},
}
}
fn index_locations(&self) -> &IndexLocations {

View file

@ -4420,21 +4420,22 @@ fn offline_registry_backtrack() -> Result<()> {
"###
);
// Resolve with `--offline`, with a looser requirement. We should backtrack to `1.1.1`, but
// we don't right now.
// Resolve with `--offline`, with a looser requirement. We should backtrack to `1.1.1`.
let requirements_in = context.temp_dir.child("requirements.in");
requirements_in.write_str("iniconfig")?;
uv_snapshot!(context.compile()
.arg("requirements.in")
.arg("--offline"), @r###"
success: false
exit_code: 2
success: true
exit_code: 0
----- stdout -----
# This file was autogenerated by uv via the following command:
# uv pip compile --cache-dir [CACHE_DIR] --exclude-newer 2024-03-25T00:00:00Z requirements.in --offline
iniconfig==1.1.1
----- stderr -----
error: Failed to download: iniconfig==2.0.0
Caused by: Network connectivity is disabled, but the requested data wasn't found in the cache for: `https://files.pythonhosted.org/packages/ef/a6/62565a6e1cf69e10f5727360368e451d4b7f58beeac6173dc9db836a5b46/iniconfig-2.0.0-py3-none-any.whl.metadata`
Resolved 1 package in [TIME]
"###
);
@ -4563,16 +4564,15 @@ fn invalid_metadata_requires_python() -> Result<()> {
.arg("--find-links")
.arg(context.workspace_root.join("scripts").join("links")), @r###"
success: false
exit_code: 2
exit_code: 1
----- stdout -----
----- stderr -----
error: Failed to download: validation==2.0.0
Caused by: Couldn't parse metadata of validation-2.0.0-py3-none-any.whl from validation==2.0.0
Caused by: Failed to parse version: Unexpected end of version specifier, expected operator:
12
^^
× No solution found when resolving dependencies:
Because validation==2.0.0 is unusable because its dependencies are
unusable because the package metadata could not be parsed and you
require validation==2.0.0, we can conclude that the requirements are
unsatisfiable.
"###
);
@ -4593,12 +4593,15 @@ fn invalid_metadata_multiple_dist_info() -> Result<()> {
.arg("--find-links")
.arg(context.workspace_root.join("scripts").join("links")), @r###"
success: false
exit_code: 2
exit_code: 1
----- stdout -----
----- stderr -----
error: Failed to download: validation==3.0.0
Caused by: Multiple .dist-info directories found: validation-2.0.0, validation-3.0.0
× No solution found when resolving dependencies:
Because validation==3.0.0 is unusable because its dependencies
are unusable because the package has an invalid format and you
require validation==3.0.0, we can conclude that the requirements are
unsatisfiable.
"###
);
@ -4613,19 +4616,21 @@ fn invalid_metadata_backtrack() -> Result<()> {
requirements_in.write_str("validation")?;
// `2.0.0` and `3.0.0` have invalid metadata. We should backtrack to `1.0.0` (the preceding
// version, which has valid metadata), but we don't right now.
// version, which has valid metadata).
uv_snapshot!(context.compile()
.arg("requirements.in")
.arg("--no-index")
.arg("--find-links")
.arg(context.workspace_root.join("scripts").join("links")), @r###"
success: false
exit_code: 2
success: true
exit_code: 0
----- stdout -----
# This file was autogenerated by uv via the following command:
# uv pip compile --cache-dir [CACHE_DIR] --exclude-newer 2024-03-25T00:00:00Z requirements.in --no-index
validation==1.0.0
----- stderr -----
error: Failed to download: validation==3.0.0
Caused by: Multiple .dist-info directories found: validation-2.0.0, validation-3.0.0
Resolved 1 package in [TIME]
"###
);

View file

@ -1100,8 +1100,7 @@ fn mismatched_name() -> Result<()> {
----- stdout -----
----- stderr -----
error: Failed to read: foo @ file://[TEMP_DIR]/foo-2.0.1-py3-none-any.whl
Caused by: The .dist-info directory tomli-2.0.1 does not start with the normalized package name: foo
error: Because foo was found, but has an invalid format and you require foo, we can conclude that the requirements are unsatisfiable.
"###
);