Add a proxy layer for extras (#3100)

Given requirements like:

```
black==23.1.0
black[colorama]
```

The resolver will (on `main`) add a dependency on Black, and then try to
use the most recent version of Black to satisfy `black[colorama]`. For
sake of example, assume `black==24.0.0` is the most recent version. Once
the selects this most recent version, it'll fetch the metadata, then
return the dependencies for `black==24.0.0` with the `colorama` extra
enabled. Finally, it will tack on `black==24.0.0` (a dependency on the
base package). The resolver will then detect a conflict between
`black==23.1.0` and `black==24.0.0`, and throw out
`black[colorama]==24.0.0`, trying to next most-recent version.

This is both wasteful and can cause problems, since we're fetching
metadata for versions that will _never_ satisfy the resolver. In the
`apache-airflow[all]` case, I also ran into an issue whereby we were
attempting to build very old versions of `apache-airflow` due to
`apache-airflow[pandas]`, which in turn led to resolution failures.

The solution proposed here is that we create a new proxy package with
exactly two dependencies: one on `black` and one of `black[colorama]`.
Both of these packages must be at the same version as the proxy package,
so the resolver knows much _earlier_ that (in the above example) the
extra variant _must_ match `23.1.0`.
This commit is contained in:
Charlie Marsh 2024-04-18 21:04:59 -04:00 committed by GitHub
parent 822ae19879
commit 2e88bb6f1b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 128 additions and 98 deletions

View file

@ -1,11 +1,12 @@
use std::collections::{BTreeMap, BTreeSet}; use std::collections::{BTreeMap, BTreeSet};
use std::fmt::Formatter; use std::fmt::Formatter;
use std::ops::Deref; use std::ops::Deref;
use std::sync::Arc;
use dashmap::{DashMap, DashSet}; use dashmap::{DashMap, DashSet};
use indexmap::IndexMap; use indexmap::IndexMap;
use pubgrub::range::Range; use pubgrub::range::Range;
use pubgrub::report::{DefaultStringReporter, DerivationTree, Reporter}; use pubgrub::report::{DefaultStringReporter, DerivationTree, External, Reporter};
use rustc_hash::FxHashMap; use rustc_hash::FxHashMap;
use distribution_types::{ use distribution_types::{
@ -109,6 +110,45 @@ impl<T> From<tokio::sync::mpsc::error::SendError<T>> for ResolveError {
} }
} }
/// Given a [`DerivationTree`], collapse any [`External::FromDependencyOf`] incompatibilities
/// wrap an [`PubGrubPackage::Extra`] package.
fn collapse_extra_proxies(derivation_tree: &mut DerivationTree<PubGrubPackage, Range<Version>>) {
match derivation_tree {
DerivationTree::External(_) => {}
DerivationTree::Derived(derived) => {
match (
Arc::make_mut(&mut derived.cause1),
Arc::make_mut(&mut derived.cause2),
) {
(
DerivationTree::External(External::FromDependencyOf(
PubGrubPackage::Extra(..),
..,
)),
ref mut cause,
) => {
collapse_extra_proxies(cause);
*derivation_tree = cause.clone();
}
(
ref mut cause,
DerivationTree::External(External::FromDependencyOf(
PubGrubPackage::Extra(..),
..,
)),
) => {
collapse_extra_proxies(cause);
*derivation_tree = cause.clone();
}
_ => {
collapse_extra_proxies(Arc::make_mut(&mut derived.cause1));
collapse_extra_proxies(Arc::make_mut(&mut derived.cause2));
}
}
}
}
}
impl From<pubgrub::error::PubGrubError<UvDependencyProvider>> for ResolveError { impl From<pubgrub::error::PubGrubError<UvDependencyProvider>> for ResolveError {
fn from(value: pubgrub::error::PubGrubError<UvDependencyProvider>) -> Self { fn from(value: pubgrub::error::PubGrubError<UvDependencyProvider>) -> Self {
match value { match value {
@ -119,7 +159,9 @@ impl From<pubgrub::error::PubGrubError<UvDependencyProvider>> for ResolveError {
unreachable!() unreachable!()
} }
pubgrub::error::PubGrubError::Failure(inner) => Self::Failure(inner), pubgrub::error::PubGrubError::Failure(inner) => Self::Failure(inner),
pubgrub::error::PubGrubError::NoSolution(derivation_tree) => { pubgrub::error::PubGrubError::NoSolution(mut derivation_tree) => {
collapse_extra_proxies(&mut derivation_tree);
Self::NoSolution(NoSolutionError { Self::NoSolution(NoSolutionError {
derivation_tree, derivation_tree,
// The following should be populated before display for the best error messages // The following should be populated before display for the best error messages
@ -208,7 +250,8 @@ impl NoSolutionError {
BTreeSet::from([python_requirement.target().deref().clone()]), BTreeSet::from([python_requirement.target().deref().clone()]),
); );
} }
PubGrubPackage::Package(name, ..) => { PubGrubPackage::Extra(_, _, _) => {}
PubGrubPackage::Package(name, _, _) => {
// Avoid including available versions for packages that exist in the derivation // Avoid including available versions for packages that exist in the derivation
// tree, but were never visited during resolution. We _may_ have metadata for // tree, but were never visited during resolution. We _may_ have metadata for
// these packages, but it's non-deterministic, and omitting them ensures that // these packages, but it's non-deterministic, and omitting them ensures that
@ -256,7 +299,7 @@ impl NoSolutionError {
) -> Self { ) -> Self {
let mut new = FxHashMap::default(); let mut new = FxHashMap::default();
for package in self.derivation_tree.packages() { for package in self.derivation_tree.packages() {
if let PubGrubPackage::Package(name, ..) = package { if let PubGrubPackage::Package(name, _, _) = package {
if let Some(entry) = unavailable_packages.get(name) { if let Some(entry) = unavailable_packages.get(name) {
let reason = entry.value(); let reason = entry.value();
new.insert(name.clone(), reason.clone()); new.insert(name.clone(), reason.clone());
@ -275,7 +318,7 @@ impl NoSolutionError {
) -> Self { ) -> Self {
let mut new = FxHashMap::default(); let mut new = FxHashMap::default();
for package in self.derivation_tree.packages() { for package in self.derivation_tree.packages() {
if let PubGrubPackage::Package(name, ..) = package { if let PubGrubPackage::Package(name, _, _) = package {
if let Some(entry) = incomplete_packages.get(name) { if let Some(entry) = incomplete_packages.get(name) {
let versions = entry.value(); let versions = entry.value();
for entry in versions { for entry in versions {

View file

@ -61,13 +61,31 @@ pub enum PubGrubPackage {
/// _after_ a registry variant. /// _after_ a registry variant.
Option<VerbatimUrl>, Option<VerbatimUrl>,
), ),
/// A proxy package to represent a dependency with an extra (e.g., `black[colorama]`).
///
/// For a given package `black`, and an extra `colorama`, we create a virtual package
/// with exactly two dependencies: `PubGrubPackage::Package("black", None)` and
/// `PubGrubPackage::Package("black", Some("colorama")`. Both dependencies are pinned to the
/// same version, and the virtual package is discarded at the end of the resolution process.
///
/// The benefit of the proxy package (versus `PubGrubPackage::Package("black", Some("colorama")`
/// on its own) is that it enables us to avoid attempting to retrieve metadata for irrelevant
/// versions the extra variants by making it clear to PubGrub that the extra variant must match
/// the exact same version of the base variant. Without the proxy package, then when provided
/// requirements like `black==23.0.1` and `black[colorama]`, PubGrub may attempt to retrieve
/// metadata for `black[colorama]` versions other than `23.0.1`.
Extra(PackageName, ExtraName, Option<VerbatimUrl>),
} }
impl PubGrubPackage { impl PubGrubPackage {
/// Create a [`PubGrubPackage`] from a package name and optional extra name. /// Create a [`PubGrubPackage`] from a package name and optional extra name.
pub(crate) fn from_package(name: PackageName, extra: Option<ExtraName>, urls: &Urls) -> Self { pub(crate) fn from_package(name: PackageName, extra: Option<ExtraName>, urls: &Urls) -> Self {
let url = urls.get(&name).cloned(); let url = urls.get(&name).cloned();
Self::Package(name, extra, url) if let Some(extra) = extra {
Self::Extra(name, extra, url)
} else {
Self::Package(name, extra, url)
}
} }
} }
@ -94,6 +112,7 @@ impl std::fmt::Display for PubGrubPackage {
Self::Package(name, Some(extra), ..) => { Self::Package(name, Some(extra), ..) => {
write!(f, "{name}[{extra}]") write!(f, "{name}[{extra}]")
} }
Self::Extra(name, extra, ..) => write!(f, "{name}[{extra}]"),
} }
} }
} }

View file

@ -27,7 +27,8 @@ impl PubGrubPriorities {
match package { match package {
PubGrubPackage::Root(_) => {} PubGrubPackage::Root(_) => {}
PubGrubPackage::Python(_) => {} PubGrubPackage::Python(_) => {}
PubGrubPackage::Package(name, _, None) => {
PubGrubPackage::Extra(name, _, None) | PubGrubPackage::Package(name, _, None) => {
match self.0.entry(name.clone()) { match self.0.entry(name.clone()) {
std::collections::hash_map::Entry::Occupied(mut entry) => { std::collections::hash_map::Entry::Occupied(mut entry) => {
// Preserve the original index. // Preserve the original index.
@ -65,7 +66,7 @@ impl PubGrubPriorities {
} }
} }
} }
PubGrubPackage::Package(name, _, Some(_)) => { PubGrubPackage::Extra(name, _, Some(_)) | PubGrubPackage::Package(name, _, Some(_)) => {
match self.0.entry(name.clone()) { match self.0.entry(name.clone()) {
std::collections::hash_map::Entry::Occupied(mut entry) => { std::collections::hash_map::Entry::Occupied(mut entry) => {
// Preserve the original index. // Preserve the original index.
@ -99,6 +100,7 @@ impl PubGrubPriorities {
match package { match package {
PubGrubPackage::Root(_) => Some(PubGrubPriority::Root), PubGrubPackage::Root(_) => Some(PubGrubPriority::Root),
PubGrubPackage::Python(_) => Some(PubGrubPriority::Root), PubGrubPackage::Python(_) => Some(PubGrubPriority::Root),
PubGrubPackage::Extra(name, _, _) => self.0.get(name).copied(),
PubGrubPackage::Package(name, _, _) => self.0.get(name).copied(), PubGrubPackage::Package(name, _, _) => self.0.get(name).copied(),
} }
} }

View file

@ -525,6 +525,7 @@ impl<
match package { match package {
PubGrubPackage::Root(_) => {} PubGrubPackage::Root(_) => {}
PubGrubPackage::Python(_) => {} PubGrubPackage::Python(_) => {}
PubGrubPackage::Extra(_, _, _) => {}
PubGrubPackage::Package(name, _extra, None) => { PubGrubPackage::Package(name, _extra, None) => {
// Verify that the package is allowed under the hash-checking policy. // Verify that the package is allowed under the hash-checking policy.
if !self.hasher.allows_package(name) { if !self.hasher.allows_package(name) {
@ -561,7 +562,7 @@ impl<
// Iterate over the potential packages, and fetch file metadata for any of them. These // Iterate over the potential packages, and fetch file metadata for any of them. These
// represent our current best guesses for the versions that we _might_ select. // represent our current best guesses for the versions that we _might_ select.
for (package, range) in packages { for (package, range) in packages {
let PubGrubPackage::Package(package_name, _extra, None) = package else { let PubGrubPackage::Package(package_name, None, None) = package else {
continue; continue;
}; };
request_sink request_sink
@ -604,16 +605,9 @@ impl<
} }
} }
PubGrubPackage::Package(package_name, extra, Some(url)) => { PubGrubPackage::Extra(package_name, _, Some(url))
if let Some(extra) = extra { | PubGrubPackage::Package(package_name, _, Some(url)) => {
debug!( debug!("Searching for a compatible version of {package} @ {url} ({range})");
"Searching for a compatible version of {package_name}[{extra}] @ {url} ({range})",
);
} else {
debug!(
"Searching for a compatible version of {package_name} @ {url} ({range})"
);
}
// If the dist is an editable, return the version from the editable metadata. // If the dist is an editable, return the version from the editable metadata.
if let Some((_local, metadata)) = self.editables.get(package_name) { if let Some((_local, metadata)) = self.editables.get(package_name) {
@ -702,7 +696,8 @@ impl<
Ok(Some(ResolverVersion::Available(version.clone()))) Ok(Some(ResolverVersion::Available(version.clone())))
} }
PubGrubPackage::Package(package_name, extra, None) => { PubGrubPackage::Extra(package_name, _, None)
| PubGrubPackage::Package(package_name, _, None) => {
// Wait for the metadata to be available. // Wait for the metadata to be available.
let versions_response = self let versions_response = self
.index .index
@ -732,13 +727,7 @@ impl<
} }
}; };
if let Some(extra) = extra { debug!("Searching for a compatible version of {package} ({range})");
debug!(
"Searching for a compatible version of {package_name}[{extra}] ({range})",
);
} else {
debug!("Searching for a compatible version of {package_name} ({range})");
}
// Find a version. // Find a version.
let Some(candidate) = self.selector.select( let Some(candidate) = self.selector.select(
@ -770,22 +759,13 @@ impl<
} }
ResolvedDistRef::Installed(_) => Cow::Borrowed("installed"), ResolvedDistRef::Installed(_) => Cow::Borrowed("installed"),
}; };
if let Some(extra) = extra {
debug!( debug!(
"Selecting: {}[{}]=={} ({})", "Selecting: {}=={} ({})",
candidate.name(), package,
extra, candidate.version(),
candidate.version(), filename,
filename, );
);
} else {
debug!(
"Selecting: {}=={} ({})",
candidate.name(),
candidate.version(),
filename,
);
}
// We want to return a package pinned to a specific version; but we _also_ want to // We want to return a package pinned to a specific version; but we _also_ want to
// store the exact file that we selected to satisfy that version. // store the exact file that we selected to satisfy that version.
@ -794,13 +774,16 @@ impl<
let version = candidate.version().clone(); let version = candidate.version().clone();
// Emit a request to fetch the metadata for this version. // Emit a request to fetch the metadata for this version.
if self.index.distributions.register(candidate.version_id()) { if matches!(package, PubGrubPackage::Package(_, _, _)) {
let request = match dist.for_resolution() { if self.index.distributions.register(candidate.version_id()) {
ResolvedDistRef::Installable(dist) => Request::Dist(dist.clone()), let request = match dist.for_resolution() {
ResolvedDistRef::Installed(dist) => Request::Installed(dist.clone()), ResolvedDistRef::Installable(dist) => Request::Dist(dist.clone()),
}; ResolvedDistRef::Installed(dist) => Request::Installed(dist.clone()),
request_sink.send(request).await?; };
request_sink.send(request).await?;
}
} }
Ok(Some(ResolverVersion::Available(version))) Ok(Some(ResolverVersion::Available(version)))
} }
} }
@ -896,7 +879,7 @@ impl<
// Determine if the distribution is editable. // Determine if the distribution is editable.
if let Some((_local, metadata)) = self.editables.get(package_name) { if let Some((_local, metadata)) = self.editables.get(package_name) {
let mut constraints = PubGrubDependencies::from_requirements( let constraints = PubGrubDependencies::from_requirements(
&metadata.requires_dist, &metadata.requires_dist,
&self.constraints, &self.constraints,
&self.overrides, &self.overrides,
@ -917,14 +900,6 @@ impl<
self.visit_package(dep_package, request_sink).await?; self.visit_package(dep_package, request_sink).await?;
} }
// If a package has an extra, insert a constraint on the base package.
if extra.is_some() {
constraints.push(
PubGrubPackage::Package(package_name.clone(), None, url.clone()),
Range::singleton(version.clone()),
);
}
return Ok(Dependencies::Available(constraints.into())); return Ok(Dependencies::Available(constraints.into()));
} }
@ -1013,7 +988,7 @@ impl<
} }
}; };
let mut constraints = PubGrubDependencies::from_requirements( let constraints = PubGrubDependencies::from_requirements(
&metadata.requires_dist, &metadata.requires_dist,
&self.constraints, &self.constraints,
&self.overrides, &self.overrides,
@ -1034,16 +1009,20 @@ impl<
self.visit_package(package, request_sink).await?; self.visit_package(package, request_sink).await?;
} }
// If a package has an extra, insert a constraint on the base package.
if extra.is_some() {
constraints.push(
PubGrubPackage::Package(package_name.clone(), None, url.clone()),
Range::singleton(version.clone()),
);
}
Ok(Dependencies::Available(constraints.into())) Ok(Dependencies::Available(constraints.into()))
} }
// Add a dependency on both the extra and base package.
PubGrubPackage::Extra(package_name, extra, url) => Ok(Dependencies::Available(vec![
(
PubGrubPackage::Package(package_name.clone(), None, url.clone()),
Range::singleton(version.clone()),
),
(
PubGrubPackage::Package(package_name.clone(), Some(extra.clone()), url.clone()),
Range::singleton(version.clone()),
),
])),
} }
} }
@ -1251,6 +1230,7 @@ impl<
match package { match package {
PubGrubPackage::Root(_) => {} PubGrubPackage::Root(_) => {}
PubGrubPackage::Python(_) => {} PubGrubPackage::Python(_) => {}
PubGrubPackage::Extra(_, _, _) => {}
PubGrubPackage::Package(package_name, _extra, Some(url)) => { PubGrubPackage::Package(package_name, _extra, Some(url)) => {
reporter.on_progress(package_name, &VersionOrUrl::Url(url)); reporter.on_progress(package_name, &VersionOrUrl::Url(url));
} }

View file

@ -3904,8 +3904,6 @@ fn find_links_requirements_txt() -> Result<()> {
/// `extras==0.0.2` fails to build (i.e., it always throws). Since `extras==0.0.1` is pinned, we /// `extras==0.0.2` fails to build (i.e., it always throws). Since `extras==0.0.1` is pinned, we
/// should never even attempt to build `extras==0.0.2`, despite an unpinned `extras[dev]` /// should never even attempt to build `extras==0.0.2`, despite an unpinned `extras[dev]`
/// requirement. /// requirement.
///
/// This resolution should succeed, but currently fails.
#[test] #[test]
fn avoid_irrelevant_extras() -> Result<()> { fn avoid_irrelevant_extras() -> Result<()> {
let context = TestContext::new("3.12"); let context = TestContext::new("3.12");
@ -3915,39 +3913,27 @@ fn avoid_irrelevant_extras() -> Result<()> {
extras[dev] extras[dev]
"})?; "})?;
let filters = std::iter::once((r"exit code: 1", "exit status: 1")) uv_snapshot!(context.compile()
.chain(context.filters())
.collect::<Vec<_>>();
uv_snapshot!(filters, context.compile()
.arg("requirements.in") .arg("requirements.in")
.arg("--find-links") .arg("--find-links")
.arg(context.workspace_root.join("scripts").join("links")), @r###" .arg(context.workspace_root.join("scripts").join("links")), @r###"
success: false success: true
exit_code: 2 exit_code: 0
----- stdout ----- ----- stdout -----
# This file was autogenerated by uv via the following command:
# uv pip compile --cache-dir [CACHE_DIR] --exclude-newer 2024-03-25T00:00:00Z requirements.in
anyio==4.3.0
# via extras
extras==0.0.1
idna==3.6
# via anyio
iniconfig==2.0.0
# via extras
sniffio==1.3.1
# via anyio
----- stderr ----- ----- stderr -----
error: Failed to download and build: extras==0.0.2 Resolved 5 packages in [TIME]
Caused by: Failed to build: extras==0.0.2
Caused by: Build backend failed to determine extra requires with `build_wheel()` with exit status: 1
--- stdout:
--- stderr:
Traceback (most recent call last):
File "<string>", line 14, in <module>
File "[CACHE_DIR]/[TMP]/build_meta.py", line 325, in get_requires_for_build_wheel
return self._get_build_requires(config_settings, requirements=['wheel'])
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "[CACHE_DIR]/[TMP]/build_meta.py", line 295, in _get_build_requires
self.run_setup()
File "[CACHE_DIR]/[TMP]/build_meta.py", line 487, in run_setup
super().run_setup(setup_script=setup_script)
File "[CACHE_DIR]/[TMP]/build_meta.py", line 311, in run_setup
exec(code, locals())
File "<string>", line 3, in <module>
ZeroDivisionError: division by zero
---
"###); "###);
Ok(()) Ok(())

View file

@ -932,7 +932,7 @@ fn extra_incompatible_with_extra() {
----- stderr ----- ----- stderr -----
× No solution found when resolving dependencies: × No solution found when resolving dependencies:
Because only package-a[extra-c]==1.0.0 is available and package-a[extra-c]==1.0.0 depends on package-b==2.0.0, we can conclude that all versions of package-a[extra-c] depend on package-b==2.0.0. Because only package-a[extra-c]==1.0.0 is available and package-a[extra-c]==1.0.0 depends on package-b==2.0.0, we can conclude that all versions of package-a[extra-c] depend on package-b==2.0.0.
And because package-a[extra-b]==1.0.0 depends on package-b==1.0.0 and only package-a[extra-b]==1.0.0 is available, we can conclude that all versions of package-a[extra-c] and all versions of package-a[extra-b] are incompatible. And because package-a[extra-b]==1.0.0 depends on package-b==1.0.0 and only package-a[extra-b]==1.0.0 is available, we can conclude that all versions of package-a[extra-b] and all versions of package-a[extra-c] are incompatible.
And because you require package-a[extra-b] and you require package-a[extra-c], we can conclude that the requirements are unsatisfiable. And because you require package-a[extra-b] and you require package-a[extra-c], we can conclude that the requirements are unsatisfiable.
"###); "###);
@ -1047,7 +1047,7 @@ fn extra_incompatible_with_root() {
----- stderr ----- ----- stderr -----
× No solution found when resolving dependencies: × No solution found when resolving dependencies:
Because package-a[extra]==1.0.0 depends on package-b==1.0.0 and only package-a[extra]==1.0.0 is available, we can conclude that all versions of package-a[extra] depend on package-b==1.0.0. Because only package-a[extra]==1.0.0 is available and package-a[extra]==1.0.0 depends on package-b==1.0.0, we can conclude that all versions of package-a[extra] depend on package-b==1.0.0.
And because you require package-a[extra] and you require package-b==2.0.0, we can conclude that the requirements are unsatisfiable. And because you require package-a[extra] and you require package-b==2.0.0, we can conclude that the requirements are unsatisfiable.
"###); "###);