make universal resolver fork only when markers are disjoint (#4135)

The basic idea here is to make it so forking can only ever result in a
resolution that, for a particular marker environment, will only install
at most one version of a package. We can guarantee this by ensuring we
only fork on conflicting dependency specifications only when their
corresponding markers are completely disjoint. If they aren't, then
resolution _must_ find a single version of the package in the
intersection of the two dependency specifications.

A test for this case has been added to packse here:
https://github.com/astral-sh/packse/pull/182. Previously, that test
would result in a resolution with two different unconditional versions
of the same package. With this change, resolution fails (as it should).

A commit-by-commit review should be helpful here, since the first commit
is a refactor to make the second commit a bit more digestible.
This commit is contained in:
Andrew Gallant 2024-06-07 19:40:55 -04:00 committed by GitHub
parent 0db1bf4df7
commit c46fa74e65
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 484 additions and 204 deletions

View file

@ -26,7 +26,7 @@ use distribution_types::{
};
pub(crate) use locals::Locals;
use pep440_rs::{Version, MIN_VERSION};
use pep508_rs::MarkerEnvironment;
use pep508_rs::{MarkerEnvironment, MarkerTree};
use platform_tags::Tags;
use pypi_types::{Metadata23, Requirement};
pub(crate) use urls::Urls;
@ -476,45 +476,27 @@ impl<InstalledPackages: InstalledPackagesProvider> ResolverState<InstalledPackag
{
// Retrieve that package dependencies.
let package = state.next.clone();
let forks = self.get_dependencies_forking(
let forked_deps = self.get_dependencies_forking(
&package,
&version,
&mut state.priorities,
&request_sink,
)?;
let forks_len = forks.len();
// This is a somewhat tortured technique to ensure
// that our resolver state is only cloned as much
// as it needs to be. And *especially*, in the case
// when no forks occur, the state should not be
// cloned at all. We basically move the state into
// `forked_states`, and then only clone it if there
// it at least one more fork to visit.
let mut cur_state = Some(state);
for (i, fork) in forks.into_iter().enumerate() {
let is_last = i == forks_len - 1;
let dependencies = match fork {
Dependencies::Unavailable(reason) => {
let mut forked_state = cur_state.take().unwrap();
if !is_last {
cur_state = Some(forked_state.clone());
}
forked_state.pubgrub.add_incompatibility(
Incompatibility::custom_version(
package.clone(),
version.clone(),
UnavailableReason::Version(reason),
),
);
forked_states.push(forked_state);
continue;
}
Dependencies::Available(constraints)
if constraints
.iter()
.any(|(dependency, _)| dependency == &package) =>
match forked_deps {
ForkedDependencies::Unavailable(reason) => {
state
.pubgrub
.add_incompatibility(Incompatibility::custom_version(
package.clone(),
version.clone(),
UnavailableReason::Version(reason),
));
forked_states.push(state);
}
ForkedDependencies::Unforked(constraints) => {
if constraints
.iter()
.any(|(dependency, _)| dependency == &package)
{
if enabled!(Level::DEBUG) {
prefetcher.log_tried_versions();
@ -525,28 +507,69 @@ impl<InstalledPackages: InstalledPackagesProvider> ResolverState<InstalledPackag
}
.into());
}
Dependencies::Available(constraints) => constraints,
};
let mut forked_state = cur_state.take().unwrap();
if !is_last {
cur_state = Some(forked_state.clone());
}
// Add that package and version if the dependencies are not problematic.
let dep_incompats =
forked_state.pubgrub.add_incompatibility_from_dependencies(
// Add that package and version if the dependencies are not problematic.
let dep_incompats =
state.pubgrub.add_incompatibility_from_dependencies(
package.clone(),
version.clone(),
constraints,
);
state.pubgrub.partial_solution.add_version(
package.clone(),
version.clone(),
dependencies,
dep_incompats,
&state.pubgrub.incompatibility_store,
);
forked_state.pubgrub.partial_solution.add_version(
package.clone(),
version.clone(),
dep_incompats,
&forked_state.pubgrub.incompatibility_store,
);
forked_states.push(forked_state);
forked_states.push(state);
}
ForkedDependencies::Forked(forks) => {
assert!(forks.len() >= 2);
// This is a somewhat tortured technique to ensure
// that our resolver state is only cloned as much
// as it needs to be. We basically move the state
// into `forked_states`, and then only clone it if
// there it at least one more fork to visit.
let mut cur_state = Some(state);
let forks_len = forks.len();
for (i, fork) in forks.into_iter().enumerate() {
if fork
.dependencies
.iter()
.any(|(dependency, _)| dependency == &package)
{
if enabled!(Level::DEBUG) {
prefetcher.log_tried_versions();
}
return Err(PubGrubError::SelfDependency {
package: package.clone(),
version: version.clone(),
}
.into());
}
let is_last = i == forks_len - 1;
let mut forked_state = cur_state.take().unwrap();
if !is_last {
cur_state = Some(forked_state.clone());
}
// Add that package and version if the dependencies are not problematic.
let dep_incompats =
forked_state.pubgrub.add_incompatibility_from_dependencies(
package.clone(),
version.clone(),
fork.dependencies,
);
forked_state.pubgrub.partial_solution.add_version(
package.clone(),
version.clone(),
dep_incompats,
&forked_state.pubgrub.incompatibility_store,
);
forked_states.push(forked_state);
}
}
}
continue 'FORK;
}
@ -896,62 +919,15 @@ impl<InstalledPackages: InstalledPackagesProvider> ResolverState<InstalledPackag
version: &Version,
priorities: &mut PubGrubPriorities,
request_sink: &Sender<Request>,
) -> Result<Vec<Dependencies>, ResolveError> {
type Dep = (PubGrubPackage, Range<Version>);
) -> Result<ForkedDependencies, ResolveError> {
let result = self.get_dependencies(package, version, priorities, request_sink);
if self.markers.is_some() {
return result.map(|deps| vec![deps]);
return result.map(|deps| match deps {
Dependencies::Available(deps) => ForkedDependencies::Unforked(deps),
Dependencies::Unavailable(err) => ForkedDependencies::Unavailable(err),
});
}
let deps: Vec<Dep> = match result? {
Dependencies::Available(deps) => deps,
Dependencies::Unavailable(err) => return Ok(vec![Dependencies::Unavailable(err)]),
};
let mut by_grouping: FxHashMap<&PackageName, FxHashMap<&Range<Version>, Vec<&Dep>>> =
FxHashMap::default();
for dep in &deps {
let (ref pkg, ref range) = *dep;
let name = match &**pkg {
// A root can never be a dependency of another package, and a `Python` pubgrub
// package is never returned by `get_dependencies`. So these cases never occur.
// TODO(charlie): This might be overly conservative for `Extra` and `Group`. If
// multiple groups are enabled, we shouldn't need to fork. Similarly, if multiple
// extras are enabled, we shouldn't need to fork.
PubGrubPackageInner::Root(_) | PubGrubPackageInner::Python(_) => unreachable!(),
PubGrubPackageInner::Package { ref name, .. }
| PubGrubPackageInner::Marker { ref name, .. }
| PubGrubPackageInner::Extra { ref name, .. }
| PubGrubPackageInner::Dev { ref name, .. } => name,
};
by_grouping
.entry(name)
.or_default()
.entry(range)
.or_default()
.push(dep);
}
let mut forks: Vec<Vec<Dep>> = vec![vec![]];
for (_, groups) in by_grouping {
if groups.len() <= 1 {
for deps in groups.into_values() {
for fork in &mut forks {
fork.extend(deps.iter().map(|dep| (*dep).clone()));
}
}
} else {
let mut new_forks: Vec<Vec<Dep>> = vec![];
for deps in groups.into_values() {
let mut new_forks_for_group = forks.clone();
for fork in &mut new_forks_for_group {
fork.extend(deps.iter().map(|dep| (*dep).clone()));
}
new_forks.extend(new_forks_for_group);
}
forks = new_forks;
}
}
Ok(forks.into_iter().map(Dependencies::Available).collect())
Ok(result?.fork())
}
/// Given a candidate package and version, return its dependencies.
@ -1814,6 +1790,246 @@ enum Dependencies {
Available(Vec<(PubGrubPackage, Range<Version>)>),
}
impl Dependencies {
fn fork(self) -> ForkedDependencies {
use std::collections::hash_map::Entry;
let deps = match self {
Dependencies::Available(deps) => deps,
Dependencies::Unavailable(err) => return ForkedDependencies::Unavailable(err),
};
let mut by_name: FxHashMap<&PackageName, PossibleForks> = FxHashMap::default();
for (index, (ref pkg, _)) in deps.iter().enumerate() {
let (name, marker) = match &**pkg {
// A root can never be a dependency of another package, and a `Python` pubgrub
// package is never returned by `get_dependencies`. So these cases never occur.
PubGrubPackageInner::Root(_) | PubGrubPackageInner::Python(_) => unreachable!(),
PubGrubPackageInner::Package { name, marker, .. }
| PubGrubPackageInner::Extra { name, marker, .. }
| PubGrubPackageInner::Dev { name, marker, .. } => (name, marker.as_ref()),
PubGrubPackageInner::Marker { name, marker, .. } => (name, Some(marker)),
};
let Some(marker) = marker else {
// When no marker is found, it implies there is a dependency on
// this package that is unconditional with respect to marker
// expressions. Therefore, it should never be the cause of a
// fork since it is necessarily overlapping with every other
// possible marker expression that isn't pathological.
match by_name.entry(name) {
Entry::Vacant(e) => {
e.insert(PossibleForks::NoForkPossible(vec![index]));
}
Entry::Occupied(mut e) => {
e.get_mut().push_unconditional_package(index);
}
}
continue;
};
let possible_forks = match by_name.entry(name) {
// If one doesn't exist, then this is the first dependency
// with this package name. And since it has a marker, we can
// add it as the initial instance of a possibly forking set of
// dependencies. (A fork will only actually happen if another
// dependency is found with the same package name *and* where
// its marker expression is disjoint with this one.)
Entry::Vacant(e) => {
let possible_fork = PossibleFork {
packages: vec![(index, marker)],
};
let fork_groups = PossibleForkGroups {
forks: vec![possible_fork],
};
e.insert(PossibleForks::PossiblyForking(fork_groups));
continue;
}
// Now that we have a marker, look for an existing entry. If
// one already exists and is "no fork possible," then we know
// we can't fork.
Entry::Occupied(e) => match *e.into_mut() {
PossibleForks::NoForkPossible(ref mut indices) => {
indices.push(index);
continue;
}
PossibleForks::PossiblyForking(ref mut possible_forks) => possible_forks,
},
};
// At this point, we know we 1) have a duplicate dependency on
// a package and 2) the original and this one both have marker
// expressions. This still doesn't guarantee that a fork occurs
// though. A fork can only occur when the marker expressions from
// (2) are provably disjoint. Otherwise, we could end up with
// a resolution that would result in installing two different
// versions of the same package. Specifically, this could occur in
// precisely the cases where the marker expressions intersect.
//
// By construction, the marker expressions *in* each fork group
// have some non-empty intersection, and the marker expressions
// *between* each fork group are completely disjoint. So what we do
// is look for a group in which there is some overlap. If so, this
// package gets added to that fork group. Otherwise, we create a
// new fork group.
// possible_forks.push(PossibleFork { packages: vec![] });
let Some(possible_fork) = possible_forks.find_overlapping_fork_group(marker) else {
// Create a new fork since there was no overlap.
possible_forks.forks.push(PossibleFork {
packages: vec![(index, marker)],
});
continue;
};
// Add to an existing fork since there was overlap.
possible_fork.packages.push((index, marker));
}
// If all possible forks have exactly 1 group, then there is no forking.
if !by_name.values().any(PossibleForks::has_fork) {
return ForkedDependencies::Unforked(deps);
}
let mut forks = vec![Fork {
dependencies: vec![],
}];
for (_, possible_forks) in by_name {
let fork_groups = match possible_forks {
PossibleForks::PossiblyForking(fork_groups) => fork_groups,
PossibleForks::NoForkPossible(indices) => {
// No fork is provoked by this package, so just add
// everything in this group to each of the forks.
for index in indices {
for fork in &mut forks {
fork.dependencies.push(deps[index].clone());
}
}
continue;
}
};
let mut new_forks: Vec<Fork> = vec![];
for group in fork_groups.forks {
let mut new_forks_for_group = forks.clone();
for (index, _) in group.packages {
for fork in &mut new_forks_for_group {
fork.dependencies.push(deps[index].clone());
}
}
new_forks.extend(new_forks_for_group);
}
forks = new_forks;
}
ForkedDependencies::Forked(forks)
}
}
#[derive(Debug)]
enum ForkedDependencies {
/// Package dependencies are not available.
Unavailable(UnavailableVersion),
/// No forking occurred.
Unforked(Vec<(PubGrubPackage, Range<Version>)>),
/// Forked containers for all available package versions.
///
/// Note that there is always at least two forks. If there would
/// be fewer than 2 forks, then there is no fork at all and the
/// `Unforked` variant is used instead.
Forked(Vec<Fork>),
}
#[derive(Clone, Debug)]
struct Fork {
dependencies: Vec<(PubGrubPackage, Range<Version>)>,
}
#[derive(Debug)]
enum PossibleForks<'a> {
NoForkPossible(Vec<usize>),
PossiblyForking(PossibleForkGroups<'a>),
}
impl<'a> PossibleForks<'a> {
/// Returns true if and only if this contains a fork assuming there are
/// no other dependencies to be considered.
fn has_fork(&self) -> bool {
let PossibleForks::PossiblyForking(ref fork_groups) = *self else {
return false;
};
fork_groups.forks.len() > 1
}
/// Pushes an unconditional index to a package.
///
/// If this previously contained possible forks, those are combined into
/// one single set of dependencies that can never be forked.
///
/// That is, adding an unconditional package means it is not disjoint with
/// all other possible dependencies using the same package name.
fn push_unconditional_package(&mut self, index: usize) {
self.make_no_forks_possible();
let PossibleForks::NoForkPossible(ref mut indices) = *self else {
unreachable!("all forks should be eliminated")
};
indices.push(index);
}
/// Convert this set of possible forks into something that can never fork.
///
/// This is useful in cases where a dependency on a package is found
/// without any marker expressions at all. In this case, it is never
/// possible for this package to provoke a fork. Since it is unconditional,
/// it implies it is never disjoint with any other dependency specification
/// on the same package. (Except for pathological cases of marker
/// expressions that always evaluate to false. But we generally ignore
/// those.)
fn make_no_forks_possible(&mut self) {
let PossibleForks::PossiblyForking(ref fork_groups) = *self else {
return;
};
let mut indices = vec![];
for possible_fork in &fork_groups.forks {
for &(index, _) in &possible_fork.packages {
indices.push(index);
}
}
*self = PossibleForks::NoForkPossible(indices);
}
}
#[derive(Debug)]
struct PossibleForkGroups<'a> {
forks: Vec<PossibleFork<'a>>,
}
impl<'a> PossibleForkGroups<'a> {
/// Given a marker expression, if there is a fork in this set of fork
/// groups with non-empty overlap with it, then that fork group is
/// returned. Otherwise, `None` is returned.
fn find_overlapping_fork_group<'g>(
&'g mut self,
marker: &MarkerTree,
) -> Option<&'g mut PossibleFork<'a>> {
self.forks
.iter_mut()
.find(|fork| fork.is_overlapping(marker))
}
}
#[derive(Debug)]
struct PossibleFork<'a> {
packages: Vec<(usize, &'a MarkerTree)>,
}
impl<'a> PossibleFork<'a> {
/// Returns true if and only if the given marker expression has a non-empty
/// intersection with *any* of the package markers within this possible
/// fork.
fn is_overlapping(&self, marker: &MarkerTree) -> bool {
use crate::marker::is_disjoint;
for (_, tree) in &self.packages {
if !is_disjoint(marker, tree) {
return true;
}
}
false
}
}
fn uncapitalize<T: AsRef<str>>(string: T) -> String {
let mut chars = string.as_ref().chars();
match chars.next() {