uv-resolver: rewrite forking to be based on overlapping markers

Closes #4732
This commit is contained in:
Andrew Gallant 2024-07-31 10:09:24 -04:00 committed by Andrew Gallant
parent 8dbf43c85d
commit 04b7cf894a
2 changed files with 216 additions and 388 deletions

View file

@ -168,6 +168,11 @@ impl ResolutionGraph {
Range::from(requires_python.bound().clone()),
);
}
// The above simplification may turn some markers into
// "always false." In which case, we should remove that
// edge since it can never be traversed in any marker
// environment.
petgraph.retain_edges(|graph, edge| !graph[edge].is_false());
}
let fork_markers = if let [resolution] = resolutions {

View file

@ -705,38 +705,52 @@ impl<InstalledPackages: InstalledPackagesProvider> ResolverState<InstalledPackag
// there is at least one more fork to visit.
let mut cur_state = Some(current_state);
let forks_len = forks.len();
forks.into_iter().enumerate().map(move |(i, fork)| {
let is_last = i == forks_len - 1;
let forked_state = cur_state.take().unwrap();
if !is_last {
cur_state = Some(forked_state.clone());
}
forks
.into_iter()
.enumerate()
.map(move |(i, fork)| {
let is_last = i == forks_len - 1;
let forked_state = cur_state.take().unwrap();
if !is_last {
cur_state = Some(forked_state.clone());
}
let mut forked_state = forked_state.with_markers(fork.markers);
forked_state.add_package_version_dependencies(
for_package,
version,
&self.urls,
&self.locals,
fork.dependencies.clone(),
&self.git,
self.selector.resolution_strategy(),
)?;
// Emit a request to fetch the metadata for each registry package.
for dependency in &fork.dependencies {
let PubGrubDependency {
package,
version: _,
specifier: _,
url: _,
} = dependency;
let url = package
.name()
.and_then(|name| forked_state.fork_urls.get(name));
self.visit_package(package, url, request_sink)?;
}
Ok(forked_state)
})
let mut forked_state = forked_state.with_markers(fork.markers);
forked_state.add_package_version_dependencies(
for_package,
version,
&self.urls,
&self.locals,
fork.dependencies.clone(),
&self.git,
self.selector.resolution_strategy(),
)?;
// Emit a request to fetch the metadata for each registry package.
for dependency in &fork.dependencies {
let PubGrubDependency {
package,
version: _,
specifier: _,
url: _,
} = dependency;
let url = package
.name()
.and_then(|name| forked_state.fork_urls.get(name));
self.visit_package(package, url, request_sink)?;
}
Ok(forked_state)
})
// Drop any forked states whose markers are known to never
// match any marker environments.
.filter(|result| {
if let Ok(ref forked_state) = result {
let markers = forked_state.markers.fork_markers().expect("is a fork");
if markers.is_false() {
return false;
}
}
true
})
}
/// Visit a [`PubGrubPackage`] prior to selection. This should be called on a [`PubGrubPackage`]
@ -1167,7 +1181,9 @@ impl<InstalledPackages: InstalledPackagesProvider> ResolverState<InstalledPackag
let result = self.get_dependencies(package, version, fork_urls, markers, requires_python);
match markers {
ResolverMarkers::SpecificEnvironment(_) => result.map(|deps| match deps {
Dependencies::Available(deps) => ForkedDependencies::Unforked(deps),
Dependencies::Available(deps) | Dependencies::Unforkable(deps) => {
ForkedDependencies::Unforked(deps)
}
Dependencies::Unavailable(err) => ForkedDependencies::Unavailable(err),
}),
ResolverMarkers::Universal { .. } | ResolverMarkers::Fork(_) => Ok(result?.fork()),
@ -1299,7 +1315,7 @@ impl<InstalledPackages: InstalledPackagesProvider> ResolverState<InstalledPackag
// If we're excluding transitive dependencies, short-circuit. (It's important that
// we fetched the metadata, though, since we need it to validate extras.)
if self.dependency_mode.is_direct() {
return Ok(Dependencies::Available(Vec::default()));
return Ok(Dependencies::Unforkable(Vec::default()));
}
let requirements = self.flatten_requirements(
@ -1342,11 +1358,11 @@ impl<InstalledPackages: InstalledPackagesProvider> ResolverState<InstalledPackag
dependencies
}
PubGrubPackageInner::Python(_) => return Ok(Dependencies::Available(Vec::default())),
PubGrubPackageInner::Python(_) => return Ok(Dependencies::Unforkable(Vec::default())),
// Add a dependency on both the marker and base package.
PubGrubPackageInner::Marker { name, marker } => {
return Ok(Dependencies::Available(
return Ok(Dependencies::Unforkable(
[None, Some(marker)]
.into_iter()
.map(move |marker| PubGrubDependency {
@ -1370,7 +1386,7 @@ impl<InstalledPackages: InstalledPackagesProvider> ResolverState<InstalledPackag
extra,
marker,
} => {
return Ok(Dependencies::Available(
return Ok(Dependencies::Unforkable(
[None, marker.as_ref()]
.into_iter()
.dedup()
@ -1396,7 +1412,7 @@ impl<InstalledPackages: InstalledPackagesProvider> ResolverState<InstalledPackag
// Add a dependency on both the development dependency group and base package, with and
// without the marker.
PubGrubPackageInner::Dev { name, dev, marker } => {
return Ok(Dependencies::Available(
return Ok(Dependencies::Unforkable(
[None, marker.as_ref()]
.into_iter()
.dedup()
@ -1420,7 +1436,32 @@ impl<InstalledPackages: InstalledPackagesProvider> ResolverState<InstalledPackag
}
};
Ok(Dependencies::Available(dependencies))
// At present, if we know that we are running resolution
// starting with an existing set of forks, then we
// explicitly and forcefully prevent any new forks from
// being created. The motivation for doing this is that our
// `is_definitively_empty_set` logic for detecting whether
// a marker expression matches the empty set isn't yet
// bullet proof. Moreover, when re-running resolution, it's
// possible for new forks to get generated that ultimately are
// superfluous due to empty set markers. While we do filter
// these out, we do it via `is_definitively_empty_set`.
//
// Ideally we wouldn't do this here forcefully since if
// the input requirements change (i.e., `pyproject.toml`),
// then it could be correct to introduce a new fork.
// But in order to remove this, I think we need to make
// `is_definitively_empty_set` better than it is today.
if matches!(
self.markers,
ResolverMarkers::Universal {
fork_preferences: Some(_)
}
) {
Ok(Dependencies::Unforkable(dependencies))
} else {
Ok(Dependencies::Available(dependencies))
}
}
/// The regular and dev dependencies filtered by Python version and the markers of this fork,
@ -2626,6 +2667,12 @@ enum Dependencies {
/// `PubGrubPackage` values in this list to have the same package name.
/// These conflicts are resolved via `Dependencies::fork`.
Available(Vec<PubGrubDependency>),
/// Dependencies that should never result in a fork.
///
/// For example, the dependencies of a `Marker` package will have the
/// same name and version, but differ according to marker expressions.
/// But we never want this to result in a fork.
Unforkable(Vec<PubGrubDependency>),
}
impl Dependencies {
@ -2636,155 +2683,38 @@ impl Dependencies {
/// name *and* those dependency specifications have corresponding marker
/// expressions that are completely disjoint with one another.
fn fork(self) -> ForkedDependencies {
use std::collections::hash_map::Entry;
let deps = match self {
Dependencies::Available(deps) => deps,
Dependencies::Unforkable(deps) => return ForkedDependencies::Unforked(deps),
Dependencies::Unavailable(err) => return ForkedDependencies::Unavailable(err),
};
let mut by_name: FxHashMap<&PackageName, PossibleForks> = FxHashMap::default();
for (index, dependency) in deps.iter().enumerate() {
// A root can never be a dependency of another package,
// and a `Python` pubgrub package is never returned by
// `get_dependencies`. So a pubgrub package always has a
// name in this context.
let name = dependency
let mut name_to_deps: BTreeMap<PackageName, Vec<PubGrubDependency>> = BTreeMap::new();
for dep in deps {
let name = dep
.package
.name()
.expect("dependency always has a name");
let marker = dependency.package.marker();
let Some(marker) = marker else {
// When no marker is found, it implies there is a dependency on
// this package that is unconditional with respect to marker
// expressions. Therefore, it should never be the cause of a
// fork since it is necessarily overlapping with every other
// possible marker expression that isn't pathological.
match by_name.entry(name) {
Entry::Vacant(e) => {
e.insert(PossibleForks::NoForkPossible(vec![index]));
}
Entry::Occupied(mut e) => {
e.get_mut().push_unconditional_package(index);
}
}
continue;
};
let possible_forks = match by_name.entry(name) {
// If one doesn't exist, then this is the first dependency
// with this package name. And since it has a marker, we can
// add it as the initial instance of a possibly forking set of
// dependencies. (A fork will only actually happen if another
// dependency is found with the same package name *and* where
// its marker expression is disjoint with this one.)
Entry::Vacant(e) => {
let possible_fork = PossibleFork {
packages: vec![(index, marker)],
};
let fork_groups = PossibleForkGroups {
forks: vec![possible_fork],
};
e.insert(PossibleForks::PossiblyForking(fork_groups));
continue;
}
// Now that we have a marker, look for an existing entry. If
// one already exists and is "no fork possible," then we know
// we can't fork.
Entry::Occupied(e) => match *e.into_mut() {
PossibleForks::NoForkPossible(ref mut indices) => {
indices.push(index);
continue;
}
PossibleForks::PossiblyForking(ref mut possible_forks) => possible_forks,
},
};
// At this point, we know we 1) have a duplicate dependency on
// a package and 2) the original and this one both have marker
// expressions. This still doesn't guarantee that a fork occurs
// though. A fork can only occur when the marker expressions from
// (2) are provably disjoint. Otherwise, we could end up with
// a resolution that would result in installing two different
// versions of the same package. Specifically, this could occur in
// precisely the cases where the marker expressions intersect.
//
// By construction, the marker expressions *in* each fork group
// have some non-empty intersection, and the marker expressions
// *between* each fork group are completely disjoint. So what we do
// is look for a group in which there is some overlap. If so, this
// package gets added to that fork group. Otherwise, we create a
// new fork group.
let Some(possible_fork) = possible_forks.find_overlapping_fork_group(marker) else {
// Create a new fork since there was no overlap.
possible_forks.forks.push(PossibleFork {
packages: vec![(index, marker)],
});
continue;
};
// Add to an existing fork since there was overlap.
possible_fork.packages.push((index, marker));
.expect("dependency always has a name")
.clone();
name_to_deps.entry(name).or_default().push(dep);
}
// If all possible forks have exactly 1 group, then there is no forking.
if !by_name.values().any(PossibleForks::has_fork) {
return ForkedDependencies::Unforked(deps);
}
let mut forks = vec![Fork {
dependencies: vec![],
markers: MarkerTree::TRUE,
}];
let mut diverging_packages = Vec::new();
for (name, possible_forks) in by_name {
let fork_groups = match possible_forks.finish() {
// 'finish()' guarantees that 'PossiblyForking' implies
// 'DefinitelyForking'.
PossibleForks::PossiblyForking(fork_groups) => fork_groups,
PossibleForks::NoForkPossible(indices) => {
// No fork is provoked by this package, so just add
// everything in this group to each of the forks.
for index in indices {
for fork in &mut forks {
fork.add_nonfork_package(deps[index].clone());
}
}
continue;
}
};
assert!(fork_groups.forks.len() >= 2, "expected definitive fork");
let mut new_forks: Vec<Fork> = vec![];
if let Some(markers) = fork_groups.remaining_universe() {
trace!("Adding split to cover possibly incomplete markers: {markers:?}");
let mut new_forks_for_remaining_universe = forks.clone();
for fork in &mut new_forks_for_remaining_universe {
fork.markers.and(markers.clone());
fork.remove_disjoint_packages();
}
new_forks.extend(new_forks_for_remaining_universe);
}
// Each group has a list of packages whose marker expressions are
// guaranteed to be overlapping. So we must union those marker
// expressions and then intersect them with each existing fork.
for group in fork_groups.forks {
let mut new_forks_for_group = forks.clone();
for fork in &mut new_forks_for_group {
fork.markers.and(group.union());
fork.remove_disjoint_packages();
for &(index, _) in &group.packages {
fork.dependencies.push(deps[index].clone());
}
}
new_forks.extend(new_forks_for_group);
}
forks = new_forks;
diverging_packages.push(name.clone());
}
// Prioritize the forks. Prefer solving forks with lower Python bounds, since they're more
// likely to produce solutions that work for forks with higher Python bounds (whereas the
// inverse is not true).
forks.sort();
ForkedDependencies::Forked {
forks,
let Forks {
mut forks,
diverging_packages,
} = Forks::new(name_to_deps);
if forks.is_empty() {
ForkedDependencies::Unforked(vec![])
} else if forks.len() == 1 {
ForkedDependencies::Unforked(forks.pop().unwrap().dependencies)
} else {
// Prioritize the forks. Prefer solving forks with lower Python
// bounds, since they're more likely to produce solutions that work
// for forks with higher Python bounds (whereas the inverse is not
// true).
forks.sort();
ForkedDependencies::Forked {
forks,
diverging_packages: diverging_packages.into_iter().collect(),
}
}
}
}
@ -2815,6 +2745,96 @@ enum ForkedDependencies {
},
}
/// A list of forks determined from the dependencies of a single package.
///
/// Any time a marker expression is seen that is not true for all possible
/// marker environments, it is possible for it to introduce a new fork.
#[derive(Debug, Default)]
struct Forks {
/// The forks discovered among the dependencies.
forks: Vec<Fork>,
/// The package(s) that provoked at least one additional fork.
diverging_packages: BTreeSet<PackageName>,
}
impl Forks {
fn new(name_to_deps: BTreeMap<PackageName, Vec<PubGrubDependency>>) -> Forks {
let mut forks = vec![Fork {
dependencies: vec![],
markers: MarkerTree::TRUE,
}];
let mut diverging_packages = BTreeSet::new();
for (name, mut deps) in name_to_deps {
assert!(!deps.is_empty(), "every name has at least one dependency");
// We never fork if there's only one dependency
// specification for a given package name. This particular
// strategy results in a "conservative" approach to forking
// that gives up correctness in some cases in exchange for
// more limited forking. More limited forking results in
// simpler-and-easier-to-understand lock files and faster
// resolving. The correctness we give up manifests when
// two transitive non-sibling dependencies conflict. In
// that case, we don't detect the fork ahead of time (at
// present).
if deps.len() == 1 {
let dep = deps.pop().unwrap();
let markers = dep.package.marker().cloned().unwrap_or(MarkerTree::TRUE);
for fork in &mut forks {
if !fork.markers.is_disjoint(&markers) {
fork.dependencies.push(dep.clone());
}
}
continue;
}
for dep in deps {
let mut markers = dep.package.marker().cloned().unwrap_or(MarkerTree::TRUE);
if markers.is_false() {
// If the markers can never be satisfied, then we
// can drop this dependency unceremoniously.
continue;
}
if markers.is_true() {
// Or, if the markers are always true, then we just
// add the dependency to every fork unconditionally.
for fork in &mut forks {
if !fork.markers.is_disjoint(&markers) {
fork.dependencies.push(dep.clone());
}
}
continue;
}
// Otherwise, we *should* need to add a new fork...
diverging_packages.insert(name.clone());
let mut new = vec![];
for mut fork in std::mem::take(&mut forks) {
if fork.markers.is_disjoint(&markers) {
new.push(fork);
continue;
}
let not_markers = markers.negate();
let mut new_markers = markers.clone();
new_markers.and(fork.markers.negate());
if !fork.markers.is_disjoint(&not_markers) {
let mut new_fork = fork.clone();
new_fork.intersect(not_markers);
new.push(new_fork);
}
fork.dependencies.push(dep.clone());
fork.intersect(markers);
new.push(fork);
markers = new_markers;
}
forks = new;
}
}
Forks {
forks,
diverging_packages,
}
}
}
/// A single fork in a list of dependencies.
///
/// A fork corresponds to the full list of dependencies for a package,
@ -2845,6 +2865,18 @@ struct Fork {
markers: MarkerTree,
}
impl Fork {
fn intersect(&mut self, markers: MarkerTree) {
self.markers.and(markers);
self.dependencies.retain(|dep| {
let Some(markers) = dep.package.marker() else {
return true;
};
!self.markers.is_disjoint(markers)
});
}
}
impl Ord for Fork {
fn cmp(&self, other: &Self) -> Ordering {
// A higher `requires-python` requirement indicates a _lower-priority_ fork. We'd prefer
@ -2889,215 +2921,6 @@ impl PartialOrd for Fork {
}
}
impl Fork {
/// Add the given dependency to this fork.
///
/// This works by assuming the given package did *not* provoke a fork.
///
/// It is only added if the markers on the given package are not disjoint
/// with this fork's markers.
fn add_nonfork_package(&mut self, dependency: PubGrubDependency) {
if dependency
.package
.marker()
.map_or(true, |marker| !marker.is_disjoint(&self.markers))
{
self.dependencies.push(dependency);
}
}
/// Removes any dependencies in this fork whose markers are disjoint with
/// its own markers.
fn remove_disjoint_packages(&mut self) {
self.dependencies.retain(|dependency| {
dependency
.package
.marker()
.map_or(true, |pkg_marker| !pkg_marker.is_disjoint(&self.markers))
});
}
}
/// Intermediate state that represents a *possible* grouping of forks
/// for one package name.
///
/// This accumulates state while examining a `Dependencies` list. In
/// particular, it accumulates conflicting dependency specifications and marker
/// expressions. As soon as a fork can be ruled out, this state is switched to
/// `NoForkPossible`. If, at the end of visiting all `Dependencies`, we still
/// have `PossibleForks::PossiblyForking`, then a fork exists if and only if
/// one of its groups has length bigger than `1`.
///
/// One common way for a fork to be known to be impossible is if there exists
/// conflicting dependency specifications where at least one is unconditional.
/// For example, `a<2` and `a>=2 ; sys_platform == 'foo'`. In this case, `a<2`
/// has a marker expression that is always true and thus never disjoint with
/// any other marker expression. Therefore, there can be no fork for `a`.
///
/// Note that we use indices into a `Dependencies` list to represent packages.
/// This avoids excessive cloning.
#[derive(Debug)]
enum PossibleForks<'a> {
/// A group of dependencies (all with the same package name) where it is
/// known that no forks exist.
NoForkPossible(Vec<usize>),
/// A group of groups dependencies (all with the same package name) where
/// it is possible for each group to correspond to a fork.
PossiblyForking(PossibleForkGroups<'a>),
}
impl<'a> PossibleForks<'a> {
/// Returns true if and only if this contains a fork assuming there are
/// no other dependencies to be considered.
fn has_fork(&self) -> bool {
let PossibleForks::PossiblyForking(ref fork_groups) = *self else {
return false;
};
fork_groups.forks.len() > 1
}
/// Consumes this possible set of forks and converts a "possibly forking"
/// variant to a "no fork possible" variant if there are no actual forks.
///
/// This should be called when all dependencies for one package have been
/// considered. It will normalize this value such that `PossiblyForking`
/// means `DefinitelyForking`.
fn finish(mut self) -> PossibleForks<'a> {
let PossibleForks::PossiblyForking(ref fork_groups) = self else {
return self;
};
if fork_groups.forks.len() == 1 {
self.make_no_forks_possible();
return self;
}
self
}
/// Pushes an unconditional index to a package.
///
/// If this previously contained possible forks, those are combined into
/// one single set of dependencies that can never be forked.
///
/// That is, adding an unconditional package means it is not disjoint with
/// all other possible dependencies using the same package name.
fn push_unconditional_package(&mut self, index: usize) {
self.make_no_forks_possible();
let PossibleForks::NoForkPossible(ref mut indices) = *self else {
unreachable!("all forks should be eliminated")
};
indices.push(index);
}
/// Convert this set of possible forks into something that can never fork.
///
/// This is useful in cases where a dependency on a package is found
/// without any marker expressions at all. In this case, it is never
/// possible for this package to provoke a fork. Since it is unconditional,
/// it implies it is never disjoint with any other dependency specification
/// on the same package. (Except for pathological cases of marker
/// expressions that always evaluate to false. But we generally ignore
/// those.)
fn make_no_forks_possible(&mut self) {
let PossibleForks::PossiblyForking(ref fork_groups) = *self else {
return;
};
let mut indices = vec![];
for possible_fork in &fork_groups.forks {
for &(index, _) in &possible_fork.packages {
indices.push(index);
}
}
*self = PossibleForks::NoForkPossible(indices);
}
}
/// A list of groups of dependencies (all with the same package name), where
/// each group may correspond to a fork.
#[derive(Debug)]
struct PossibleForkGroups<'a> {
/// The list of forks.
forks: Vec<PossibleFork<'a>>,
}
impl<'a> PossibleForkGroups<'a> {
/// Given a marker expression, if there is a fork in this set of fork
/// groups with non-empty overlap with it, then that fork group is
/// returned. Otherwise, `None` is returned.
fn find_overlapping_fork_group<'g>(
&'g mut self,
marker: &MarkerTree,
) -> Option<&'g mut PossibleFork<'a>> {
self.forks
.iter_mut()
.find(|fork| fork.is_overlapping(marker))
}
/// Returns a marker tree corresponding to the set of marker expressions
/// outside of this fork group.
///
/// In many cases, it can be easily known that the set of marker
/// expressions referred to by this marker tree is empty. In this case,
/// `None` is returned. But note that if a marker tree is returned, it is
/// still possible for it to describe exactly zero marker environments.
fn remaining_universe(&self) -> Option<MarkerTree> {
let mut have = MarkerTree::FALSE;
for fork in &self.forks {
have.or(fork.union());
}
let missing = have.negate();
if missing.is_false() {
return None;
}
Some(missing)
}
}
/// Intermediate state representing a single possible fork.
///
/// The key invariant here is that, beyond a singleton fork, for all packages
/// in this fork, its marker expression must be overlapping with at least one
/// other package's marker expression. That is, when considering whether a
/// dependency specification with a conflicting package name provokes a fork
/// or not, one must look at the existing possible groups of forks. If any of
/// those groups have a package with an overlapping marker expression, then
/// the conflicting package name cannot possibly introduce a new fork. But if
/// there is no existing fork with an overlapping marker expression, then the
/// conflict provokes a new fork.
///
/// As with other intermediate data types, we use indices into a list of
/// `Dependencies` to represent packages to avoid excessive cloning.
#[derive(Debug)]
struct PossibleFork<'a> {
packages: Vec<(usize, &'a MarkerTree)>,
}
impl<'a> PossibleFork<'a> {
/// Returns true if and only if the given marker expression has a non-empty
/// intersection with *any* of the package markers within this possible
/// fork.
fn is_overlapping(&self, candidate_package_markers: &MarkerTree) -> bool {
for (_, package_markers) in &self.packages {
if !candidate_package_markers.is_disjoint(package_markers) {
return true;
}
}
false
}
/// Returns the union of all the marker expressions in this possible fork.
///
/// Each marker expression in the union returned is guaranteed to be overlapping
/// with at least one other expression in the same union.
fn union(&self) -> MarkerTree {
let mut union = MarkerTree::FALSE;
for &(_, marker) in &self.packages {
union.or(marker.clone());
}
union
}
}
/// Returns true if and only if the given requirement's marker expression has a
/// possible true value given the `requires_python` specifier given.
///