Choose most-compatible wheel in resolver and installer (#422)

## Summary

This PR implements logic to sort wheels by priority, where priority is
defined as preferring more "specific" wheels over less "specific"
wheels. For example, in the case of Black, my machine now selects
`black-23.11.0-cp311-cp311-macosx_11_0_arm64.whl`, whereas sorting by
lowest priority instead gives me `black-23.11.0-py3-none-any.whl`.

As part of this change, I've also modified the resolver to fallback to
using incompatible wheels when determining package metadata, if no
compatible wheels are available.

The `VersionMap` was also moved out of `resolver.rs` and into its own
file with a wrapper type, for clarity.

Closes https://github.com/astral-sh/puffin/issues/380.
Closes https://github.com/astral-sh/puffin/issues/421.
This commit is contained in:
Charlie Marsh 2023-11-15 10:22:11 -08:00 committed by GitHub
parent 1147a4de14
commit d3caf9ae86
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
10 changed files with 254 additions and 87 deletions

1
Cargo.lock generated
View file

@ -2161,6 +2161,7 @@ dependencies = [
name = "platform-tags"
version = "0.0.1"
dependencies = [
"anyhow",
"fxhash",
"platform-host",
]

View file

@ -5,7 +5,7 @@ use thiserror::Error;
use url::Url;
use pep440_rs::Version;
use platform_tags::Tags;
use platform_tags::{TagPriority, Tags};
use puffin_normalize::{InvalidNameError, PackageName};
#[derive(Debug, Clone, Eq, PartialEq)]
@ -122,6 +122,12 @@ impl WheelFilename {
compatible_tags.is_compatible(&self.python_tag, &self.abi_tag, &self.platform_tag)
}
/// Return the [`TagPriority`] score of the wheel with the given tags, or `None` if the wheel is
/// incompatible.
pub fn compatibility(&self, compatible_tags: &Tags) -> Option<TagPriority> {
compatible_tags.compatibility(&self.python_tag, &self.abi_tag, &self.platform_tag)
}
/// Get the tag for this wheel.
pub fn get_tag(&self) -> String {
format!(

View file

@ -10,5 +10,7 @@ authors = { workspace = true }
license = { workspace = true }
[dependencies]
fxhash = { workspace = true }
platform-host = { path = "../platform-host" }
anyhow = { workspace = true }
fxhash = { workspace = true }

View file

@ -1,4 +1,7 @@
use fxhash::{FxHashMap, FxHashSet};
use std::num::NonZeroU32;
use anyhow::{Error, Result};
use fxhash::FxHashMap;
use platform_host::{Arch, Os, Platform, PlatformError};
@ -8,20 +11,24 @@ use platform_host::{Arch, Os, Platform, PlatformError};
/// wheel are compatible with the current environment.
#[derive(Debug)]
pub struct Tags {
/// python_tag |--> abi_tag |--> {platform_tag}
map: FxHashMap<String, FxHashMap<String, FxHashSet<String>>>,
/// python_tag |--> abi_tag |--> platform_tag |--> priority
map: FxHashMap<String, FxHashMap<String, FxHashMap<String, TagPriority>>>,
}
impl Tags {
/// Create a new set of tags.
///
/// Tags are prioritized based on their position in the given vector. Specifically, tags that
/// appear earlier in the vector are given higher priority than tags that appear later.
pub fn new(tags: Vec<(String, String, String)>) -> Self {
let mut map = FxHashMap::default();
for (py, abi, platform) in tags {
for (index, (py, abi, platform)) in tags.into_iter().rev().enumerate() {
map.entry(py.to_string())
.or_insert(FxHashMap::default())
.entry(abi.to_string())
.or_insert(FxHashSet::default())
.insert(platform.to_string());
.or_insert(FxHashMap::default())
.entry(platform.to_string())
.or_insert(TagPriority::try_from(index).expect("valid tag priority"));
}
Self { map }
}
@ -98,7 +105,10 @@ impl Tags {
}
/// Returns true when there exists at least one tag for this platform
/// whose individal components all appear in each of the slices given.
/// whose individual components all appear in each of the slices given.
///
/// Like [`Tags::compatibility`], but short-circuits as soon as a compatible
/// tag is found.
pub fn is_compatible(
&self,
wheel_python_tags: &[String],
@ -112,9 +122,8 @@ impl Tags {
// to avoid is looping over all of the platform tags. We avoid that
// with hashmap lookups.
let pythons = &self.map;
for wheel_py in wheel_python_tags {
let Some(abis) = pythons.get(wheel_py) else {
let Some(abis) = self.map.get(wheel_py) else {
continue;
};
for wheel_abi in wheel_abi_tags {
@ -122,7 +131,7 @@ impl Tags {
continue;
};
for wheel_platform in wheel_platform_tags {
if platforms.contains(wheel_platform) {
if platforms.contains_key(wheel_platform) {
return true;
}
}
@ -130,6 +139,48 @@ impl Tags {
}
false
}
/// Returns the [`TagPriority`] of the most-compatible platform tag, or `None` if there is no
/// compatible tag.
pub fn compatibility(
&self,
wheel_python_tags: &[String],
wheel_abi_tags: &[String],
wheel_platform_tags: &[String],
) -> Option<TagPriority> {
let mut max_priority = None;
for wheel_py in wheel_python_tags {
let Some(abis) = self.map.get(wheel_py) else {
continue;
};
for wheel_abi in wheel_abi_tags {
let Some(platforms) = abis.get(wheel_abi) else {
continue;
};
for wheel_platform in wheel_platform_tags {
let priority = platforms.get(wheel_platform).copied();
max_priority = max_priority.max(priority);
}
}
}
max_priority
}
}
/// The priority of a platform tag.
///
/// A wrapper around [`NonZeroU32`]. Higher values indicate higher priority.
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
pub struct TagPriority(NonZeroU32);
impl TryFrom<usize> for TagPriority {
type Error = Error;
/// Create a [`TagPriority`] from a `usize`, where higher `usize` values are given higher
/// priority.
fn try_from(priority: usize) -> Result<Self> {
Ok(Self(NonZeroU32::try_from(1 + u32::try_from(priority)?)?))
}
}
/// Returns the compatible tags for the current [`Platform`] (e.g., `manylinux_2_17`,

View file

@ -535,7 +535,6 @@ fn compile_numpy_py38() -> Result<()> {
.arg("requirements.in")
.arg("--cache-dir")
.arg(cache_dir.path())
// Check that we select the wheel and not the sdist
.arg("--no-build")
.env("VIRTUAL_ENV", venv.as_os_str())
.current_dir(&temp_dir), @r###"

View file

@ -8,7 +8,7 @@ use crate::file::DistFile;
use crate::prerelease_mode::PreReleaseStrategy;
use crate::pubgrub::PubGrubVersion;
use crate::resolution_mode::ResolutionStrategy;
use crate::resolver::VersionMap;
use crate::version_map::VersionMap;
use crate::Manifest;
#[derive(Debug)]

View file

@ -10,8 +10,9 @@ use futures::{StreamExt, TryFutureExt};
use fxhash::FxHashMap;
use distribution_filename::{SourceDistFilename, WheelFilename};
use pep440_rs::Version;
use pep508_rs::{Requirement, VersionOrUrl};
use platform_tags::Tags;
use platform_tags::{TagPriority, Tags};
use puffin_client::RegistryClient;
use puffin_distribution::Dist;
use puffin_interpreter::InterpreterInfo;
@ -134,7 +135,10 @@ impl<'a> DistFinder<'a> {
/// select a version that satisfies the requirement, preferring wheels to source distributions.
fn select(&self, requirement: &Requirement, files: Vec<File>) -> Option<Dist> {
let mut fallback = None;
let mut best_version: Option<Version> = None;
let mut best_wheel: Option<(Dist, TagPriority)> = None;
let mut best_sdist: Option<Dist> = None;
for file in files.into_iter().rev() {
// Only add dists compatible with the python version.
// This is relevant for source dists which give no other indication of their
@ -150,22 +154,56 @@ impl<'a> DistFinder<'a> {
{
continue;
}
// Find the most-compatible wheel.
if let Ok(wheel) = WheelFilename::from_str(file.filename.as_str()) {
if !wheel.is_compatible(self.tags) {
continue;
// If we iterated past the first-compatible version, break.
if best_version
.as_ref()
.is_some_and(|version| *version != wheel.version)
{
break;
}
if requirement.is_satisfied_by(&wheel.version) {
return Some(Dist::from_registry(wheel.name, wheel.version, file));
best_version = Some(wheel.version.clone());
if let Some(priority) = wheel.compatibility(self.tags) {
if best_wheel
.as_ref()
.map_or(true, |(.., existing)| priority > *existing)
{
best_wheel = Some((
Dist::from_registry(wheel.name, wheel.version, file),
priority,
));
}
}
}
} else if let Ok(sdist) =
SourceDistFilename::parse(file.filename.as_str(), &requirement.name)
{
if requirement.is_satisfied_by(&sdist.version) {
fallback = Some(Dist::from_registry(sdist.name, sdist.version, file));
continue;
}
// Find the most-compatible sdist, if no wheel was found.
if best_wheel.is_none() {
if let Ok(sdist) =
SourceDistFilename::parse(file.filename.as_str(), &requirement.name)
{
// If we iterated past the first-compatible version, break.
if best_version
.as_ref()
.is_some_and(|version| *version != sdist.version)
{
break;
}
if requirement.is_satisfied_by(&sdist.version) {
best_version = Some(sdist.version.clone());
best_sdist = Some(Dist::from_registry(sdist.name, sdist.version, file));
}
}
}
}
fallback
best_wheel.map_or(best_sdist, |(wheel, ..)| Some(wheel))
}
}

View file

@ -19,3 +19,4 @@ mod pubgrub;
mod resolution;
mod resolution_mode;
mod resolver;
mod version_map;

View file

@ -1,7 +1,5 @@
//! Given a set of requirements, find a set of compatible packages.
use std::collections::BTreeMap;
use std::str::FromStr;
use std::sync::Arc;
use anyhow::Result;
@ -17,7 +15,7 @@ use tracing::{debug, error, trace};
use url::Url;
use waitmap::WaitMap;
use distribution_filename::{SourceDistFilename, WheelFilename};
use distribution_filename::WheelFilename;
use pep508_rs::{MarkerEnvironment, Requirement};
use platform_tags::Tags;
use puffin_cache::CanonicalUrl;
@ -33,13 +31,14 @@ use pypi_types::{File, Metadata21, SimpleJson};
use crate::candidate_selector::CandidateSelector;
use crate::distribution::{BuiltDistFetcher, SourceDistFetcher, SourceDistributionReporter};
use crate::error::ResolveError;
use crate::file::{DistFile, SdistFile, WheelFile};
use crate::file::DistFile;
use crate::locks::Locks;
use crate::manifest::Manifest;
use crate::pubgrub::{
PubGrubDependencies, PubGrubPackage, PubGrubPriorities, PubGrubVersion, MIN_VERSION,
};
use crate::resolution::Graph;
use crate::version_map::VersionMap;
pub struct Resolver<'a, Context: BuildContext + Sync> {
project: Option<PackageName>,
@ -532,62 +531,13 @@ impl<'a, Context: BuildContext + Sync> Resolver<'a, Context> {
match response? {
Response::Package(package_name, metadata) => {
trace!("Received package metadata for: {package_name}");
// Group the distributions by version and kind, discarding any incompatible
// distributions.
let mut version_map = VersionMap::new();
for file in metadata.files {
// Only add dists compatible with the python version.
// This is relevant for source dists which give no other indication of their
// compatibility and wheels which may be tagged `py3-none-any` but
// have `requires-python: ">=3.9"`
// TODO(konstin): https://github.com/astral-sh/puffin/issues/406
if !file
.requires_python
.as_ref()
.map_or(true, |requires_python| {
requires_python
.contains(self.build_context.interpreter_info().version())
})
{
continue;
}
// When resolving, exclude yanked files. TODO(konstin): When we fail
// resolving due to a dependency locked to yanked version, we should tell
// the user.
if file.yanked.is_yanked() {
continue;
}
if let Ok(filename) = WheelFilename::from_str(file.filename.as_str()) {
if filename.is_compatible(self.tags) {
let version = PubGrubVersion::from(filename.version);
match version_map.entry(version) {
std::collections::btree_map::Entry::Occupied(mut entry) => {
if matches!(entry.get(), DistFile::Sdist(_)) {
// Wheels get precedence over source distributions.
entry.insert(DistFile::from(WheelFile(file)));
}
}
std::collections::btree_map::Entry::Vacant(entry) => {
entry.insert(DistFile::from(WheelFile(file)));
}
}
}
} else if let Ok(filename) =
SourceDistFilename::parse(file.filename.as_str(), &package_name)
{
let version = PubGrubVersion::from(filename.version);
if let std::collections::btree_map::Entry::Vacant(entry) =
version_map.entry(version)
{
entry.insert(DistFile::from(SdistFile(file)));
}
}
}
self.index
.packages
.insert(package_name.clone(), version_map);
let version_map = VersionMap::from_metadata(
metadata,
&package_name,
self.tags,
self.build_context.interpreter_info().version(),
);
self.index.packages.insert(package_name, version_map);
}
Response::Dist(Dist::Built(distribution), metadata, ..) => {
trace!("Received built distribution metadata for: {distribution}");
@ -836,8 +786,6 @@ enum Response {
Dist(Dist, Metadata21, Option<Url>),
}
pub(crate) type VersionMap = BTreeMap<PubGrubVersion, DistFile>;
/// In-memory index of in-flight network requests. Any request in an [`InFlight`] state will be
/// eventually be inserted into an [`Index`].
#[derive(Debug, Default)]

View file

@ -0,0 +1,121 @@
use std::collections::btree_map::Entry;
use std::collections::BTreeMap;
use std::str::FromStr;
use distribution_filename::{SourceDistFilename, WheelFilename};
use pep440_rs::Version;
use platform_tags::{TagPriority, Tags};
use puffin_normalize::PackageName;
use pypi_types::SimpleJson;
use crate::file::{DistFile, SdistFile, WheelFile};
use crate::pubgrub::PubGrubVersion;
/// A map from versions to distributions.
#[derive(Debug, Default)]
pub(crate) struct VersionMap(BTreeMap<PubGrubVersion, ScoreDistribution>);
impl VersionMap {
/// Initialize a [`VersionMap`] from the given metadata.
pub(crate) fn from_metadata(
metadata: SimpleJson,
package_name: &PackageName,
tags: &Tags,
python_version: &Version,
) -> Self {
let mut map = BTreeMap::default();
// Group the distributions by version and kind, discarding any incompatible
// distributions.
for file in metadata.files {
// Only add dists compatible with the python version. This is relevant for source
// distributions which give no other indication of their compatibility and wheels which
// may be tagged `py3-none-any` but have `requires-python: ">=3.9"`.
// TODO(konstin): https://github.com/astral-sh/puffin/issues/406
if !file
.requires_python
.as_ref()
.map_or(true, |requires_python| {
requires_python.contains(python_version)
})
{
continue;
}
// When resolving, exclude yanked files.
// TODO(konstin): When we fail resolving due to a dependency locked to yanked version,
// we should tell the user.
if file.yanked.is_yanked() {
continue;
}
if let Ok(filename) = WheelFilename::from_str(file.filename.as_str()) {
let priority = filename.compatibility(tags);
match map.entry(filename.version.into()) {
Entry::Occupied(mut entry) => {
match entry.get() {
ScoreDistribution::Sdist(_) => {
// Prefer wheels over source distributions.
entry.insert(ScoreDistribution::Wheel(
DistFile::from(WheelFile(file)),
priority,
));
}
ScoreDistribution::Wheel(.., existing) => {
// Prefer wheels with higher priority.
if priority > *existing {
entry.insert(ScoreDistribution::Wheel(
DistFile::from(WheelFile(file)),
priority,
));
}
}
}
}
Entry::Vacant(entry) => {
entry.insert(ScoreDistribution::Wheel(
DistFile::from(WheelFile(file)),
priority,
));
}
}
} else if let Ok(filename) =
SourceDistFilename::parse(file.filename.as_str(), package_name)
{
if let Entry::Vacant(entry) = map.entry(filename.version.into()) {
entry.insert(ScoreDistribution::Sdist(DistFile::from(SdistFile(file))));
}
}
}
Self(map)
}
/// Return the [`DistFile`] for the given version, if any.
pub(crate) fn get(&self, version: &PubGrubVersion) -> Option<&DistFile> {
self.0.get(version).map(|file| match file {
ScoreDistribution::Sdist(file) => file,
ScoreDistribution::Wheel(file, ..) => file,
})
}
/// Return an iterator over the versions and distributions.
pub(crate) fn iter(&self) -> impl DoubleEndedIterator<Item = (&PubGrubVersion, &DistFile)> {
self.0.iter().map(|(version, file)| {
(
version,
match file {
ScoreDistribution::Sdist(file) => file,
ScoreDistribution::Wheel(file, ..) => file,
},
)
})
}
}
#[derive(Debug)]
enum ScoreDistribution {
Sdist(DistFile),
Wheel(DistFile, Option<TagPriority>),
}