Filter and store all distributions upfront (#185)

Modifies the resolver to remove any incompatible distributions upfront,
and store them in an index by version. This will be necessary to support
`--upgrade` semantics.

This actually does cause a meaningful slowdown right now (since we now
iterate over all files, even if we otherwise never would've needed to
touch them), but we should be able to optimize it out later.
This commit is contained in:
Charlie Marsh 2023-10-25 18:06:44 -07:00 committed by GitHub
parent 5ed913af50
commit 61a61db154
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 373 additions and 223 deletions

View file

@ -0,0 +1,85 @@
use puffin_client::File;
use std::ops::Deref;
/// A distribution can either be a wheel or a source distribution.
#[derive(Debug, Clone)]
pub(crate) struct WheelFile(File);
#[derive(Debug, Clone)]
pub(crate) struct SdistFile(File);
#[derive(Debug, Clone)]
pub(crate) enum DistributionFile {
Wheel(WheelFile),
Sdist(SdistFile),
}
impl Deref for WheelFile {
type Target = File;
fn deref(&self) -> &Self::Target {
&self.0
}
}
impl Deref for SdistFile {
type Target = File;
fn deref(&self) -> &Self::Target {
&self.0
}
}
impl From<File> for WheelFile {
fn from(file: File) -> Self {
Self(file)
}
}
impl From<File> for SdistFile {
fn from(file: File) -> Self {
Self(file)
}
}
impl From<WheelFile> for File {
fn from(wheel: WheelFile) -> Self {
wheel.0
}
}
impl From<SdistFile> for File {
fn from(sdist: SdistFile) -> Self {
sdist.0
}
}
impl From<WheelFile> for DistributionFile {
fn from(wheel: WheelFile) -> Self {
Self::Wheel(wheel)
}
}
impl From<SdistFile> for DistributionFile {
fn from(sdist: SdistFile) -> Self {
Self::Sdist(sdist)
}
}
impl DistributionFile {
pub(crate) fn filename(&self) -> &str {
match self {
Self::Wheel(wheel) => wheel.filename.as_str(),
Self::Sdist(sdist) => sdist.filename.as_str(),
}
}
}
impl From<DistributionFile> for File {
fn from(file: DistributionFile) -> Self {
match file {
DistributionFile::Wheel(wheel) => wheel.into(),
DistributionFile::Sdist(sdist) => sdist.into(),
}
}
}

View file

@ -1,14 +1,15 @@
pub use error::ResolveError;
pub use mode::ResolutionMode;
pub use resolution::PinnedPackage;
pub use resolver::Resolver;
pub use selector::ResolutionMode;
pub use source_distribution::BuiltSourceDistributionCache;
pub use wheel_finder::{Reporter, WheelFinder};
mod distribution;
mod error;
mod mode;
mod pubgrub;
mod resolution;
mod resolver;
mod selector;
mod source_distribution;
mod wheel_finder;

View file

@ -1,67 +0,0 @@
use fxhash::FxHashSet;
use itertools::Either;
use pep508_rs::Requirement;
use puffin_client::File;
use puffin_package::package_name::PackageName;
#[derive(Debug, Default, Clone, Copy, PartialEq, Eq)]
#[cfg_attr(feature = "clap", derive(clap::ValueEnum))]
pub enum ResolutionMode {
/// Resolve the highest compatible version of each package.
#[default]
Highest,
/// Resolve the lowest compatible version of each package.
Lowest,
/// Resolve the lowest compatible version of any direct dependencies, and the highest
/// compatible version of any transitive dependencies.
LowestDirect,
}
#[derive(Debug, Clone)]
pub(crate) enum CandidateSelector {
/// Resolve the highest compatible version of each package.
Highest,
/// Resolve the lowest compatible version of each package.
Lowest,
/// Resolve the lowest compatible version of any direct dependencies, and the highest
/// compatible version of any transitive dependencies.
LowestDirect(FxHashSet<PackageName>),
}
impl CandidateSelector {
/// Return a candidate selector for the given resolution mode.
pub(crate) fn from_mode(mode: ResolutionMode, direct_dependencies: &[Requirement]) -> Self {
match mode {
ResolutionMode::Highest => Self::Highest,
ResolutionMode::Lowest => Self::Lowest,
ResolutionMode::LowestDirect => Self::LowestDirect(
direct_dependencies
.iter()
.map(|requirement| PackageName::normalize(&requirement.name))
.collect(),
),
}
}
}
impl CandidateSelector {
/// Return an iterator over the candidates for the given package name.
pub(crate) fn iter_candidates<'a>(
&self,
package_name: &PackageName,
candidates: &'a [File],
) -> impl Iterator<Item = &'a File> {
match self {
CandidateSelector::Highest => Either::Left(candidates.iter().rev()),
CandidateSelector::Lowest => Either::Right(candidates.iter()),
CandidateSelector::LowestDirect(direct_dependencies) => {
if direct_dependencies.contains(package_name) {
Either::Right(candidates.iter())
} else {
Either::Left(candidates.iter().rev())
}
}
}
}
}

View file

@ -2,6 +2,7 @@
use std::borrow::Borrow;
use std::collections::hash_map::Entry;
use std::collections::BTreeMap;
use std::future::Future;
use std::pin::Pin;
use std::str::FromStr;
@ -28,27 +29,29 @@ use puffin_package::metadata::Metadata21;
use puffin_package::package_name::PackageName;
use puffin_traits::BuildContext;
use crate::distribution::{DistributionFile, SdistFile, WheelFile};
use crate::error::ResolveError;
use crate::mode::{CandidateSelector, ResolutionMode};
use crate::pubgrub::package::PubGrubPackage;
use crate::pubgrub::version::{PubGrubVersion, MIN_VERSION};
use crate::pubgrub::{iter_requirements, version_range};
use crate::resolution::Graph;
use crate::resolution::{Graph, Resolution};
use crate::selector::{CandidateSelector, ResolutionMode};
use crate::source_distribution::{download_and_build_sdist, read_dist_info};
use crate::BuiltSourceDistributionCache;
pub struct Resolver<'a, Ctx: BuildContext> {
pub struct Resolver<'a, Context: BuildContext> {
requirements: Vec<Requirement>,
constraints: Vec<Requirement>,
resolution: Option<Resolution>,
markers: &'a MarkerEnvironment,
tags: &'a Tags,
client: &'a RegistryClient,
selector: CandidateSelector,
index: Arc<Index>,
build_context: &'a Ctx,
build_context: &'a Context,
}
impl<'a, Ctx: BuildContext> Resolver<'a, Ctx> {
impl<'a, Context: BuildContext> Resolver<'a, Context> {
/// Initialize a new resolver.
pub fn new(
requirements: Vec<Requirement>,
@ -57,11 +60,12 @@ impl<'a, Ctx: BuildContext> Resolver<'a, Ctx> {
markers: &'a MarkerEnvironment,
tags: &'a Tags,
client: &'a RegistryClient,
build_context: &'a Ctx,
build_context: &'a Context,
) -> Self {
Self {
selector: CandidateSelector::from_mode(mode, &requirements),
index: Arc::new(Index::default()),
resolution: None,
requirements,
constraints,
markers,
@ -71,6 +75,12 @@ impl<'a, Ctx: BuildContext> Resolver<'a, Ctx> {
}
}
#[must_use]
pub fn with_resolution(mut self, resolution: Resolution) -> Self {
self.resolution = Some(resolution);
self
}
/// Resolve a set of requirements into a set of pinned versions.
pub async fn resolve(self) -> Result<Graph, ResolveError> {
// A channel to fetch package metadata (e.g., given `flask`, fetch all versions) and version
@ -271,46 +281,13 @@ impl<'a, Ctx: BuildContext> Resolver<'a, Ctx> {
let Some(entry) = self.index.packages.get(package_name) else {
continue;
};
let simple_json = entry.value();
let version_map = entry.value();
// Try to find a wheel. If there isn't any, to a find a source distribution. If there
// isn't any either, short circuit and fail the resolution.
let Some((file, request)) = self
// Try to find a compatible version. If there aren't any compatible versions,
// short-circuit and return `None`.
let Some(candidate) = self
.selector
.iter_candidates(package_name, &simple_json.files)
.find_map(|file| {
let wheel_filename = WheelFilename::from_str(file.filename.as_str()).ok()?;
if !wheel_filename.is_compatible(self.tags) {
return None;
}
if range
.borrow()
.contains(&PubGrubVersion::from(wheel_filename.version.clone()))
{
Some((file, Request::Wheel(file.clone())))
} else {
None
}
})
.or_else(|| {
self.selector
.iter_candidates(package_name, &simple_json.files)
.find_map(|file| {
let sdist_filename =
SourceDistributionFilename::parse(&file.filename, package_name)
.ok()?;
if range
.borrow()
.contains(&PubGrubVersion::from(sdist_filename.version.clone()))
{
Some((file, Request::Sdist((file.clone(), sdist_filename))))
} else {
None
}
})
})
.select(package_name, range.borrow(), version_map)
else {
// Short circuit: we couldn't find _any_ compatible versions for a package.
let (package, _range) = potential_packages.swap_remove(index);
@ -318,8 +295,21 @@ impl<'a, Ctx: BuildContext> Resolver<'a, Ctx> {
};
// Emit a request to fetch the metadata for this version.
if in_flight.insert(file.hashes.sha256.clone()) {
request_sink.unbounded_send(request)?;
match candidate.file {
DistributionFile::Wheel(file) => {
if in_flight.insert(file.hashes.sha256.clone()) {
request_sink.unbounded_send(Request::Wheel(file.clone()))?;
}
}
DistributionFile::Sdist(file) => {
if in_flight.insert(file.hashes.sha256.clone()) {
request_sink.unbounded_send(Request::Sdist(
file.clone(),
candidate.package_name.clone(),
candidate.version.clone().into(),
))?;
}
}
}
selection = index;
@ -335,7 +325,7 @@ impl<'a, Ctx: BuildContext> Resolver<'a, Ctx> {
PubGrubPackage::Package(package_name, _) => {
// Wait for the metadata to be available.
let entry = self.index.packages.wait(package_name).await.unwrap();
let simple_json = entry.value();
let version_map = entry.value();
debug!(
"Searching for a compatible version of {} ({})",
@ -344,91 +334,50 @@ impl<'a, Ctx: BuildContext> Resolver<'a, Ctx> {
);
// Find a compatible version.
let mut wheel = self
.selector
.iter_candidates(package_name, &simple_json.files)
.find_map(|file| {
let Ok(name) = WheelFilename::from_str(file.filename.as_str()) else {
return None;
};
let Some(candidate) =
self.selector
.select(package_name, range.borrow(), version_map)
else {
// Short circuit: we couldn't find _any_ compatible versions for a package.
return Ok((package, None));
};
if !name.is_compatible(self.tags) {
return None;
}
debug!(
"Selecting: {}=={} ({})",
candidate.package_name,
candidate.version,
candidate.file.filename()
);
if !range
.borrow()
.contains(&PubGrubVersion::from(name.version.clone()))
{
return None;
};
Some(Wheel {
file: file.clone(),
name: package_name.clone(),
version: name.version.clone(),
})
});
if wheel.is_none() {
if let Some((sdist_file, parsed_filename)) =
self.selector
.iter_candidates(package_name, &simple_json.files)
.filter_map(|file| {
let Ok(parsed_filename) =
SourceDistributionFilename::parse(&file.filename, package_name)
else {
return None;
};
if !range.borrow().contains(&PubGrubVersion::from(
parsed_filename.version.clone(),
)) {
return None;
};
Some((file, parsed_filename))
})
.max_by(|left, right| left.1.version.cmp(&right.1.version))
{
// Emit a request to fetch the metadata for this version.
if in_flight.insert(sdist_file.hashes.sha256.clone()) {
request_sink.unbounded_send(Request::Sdist((
sdist_file.clone(),
parsed_filename.clone(),
)))?;
}
// TODO(konstin): That's not a wheel
wheel = Some(Wheel {
file: sdist_file.clone(),
name: package_name.clone(),
version: parsed_filename.version.clone(),
});
}
}
if let Some(wheel) = wheel {
debug!(
"Selecting: {}=={} ({})",
wheel.name, wheel.version, wheel.file.filename
// We want to return a package pinned to a specific version; but we _also_ want to
// store the exact file that we selected to satisfy that version.
pins.entry(candidate.package_name.clone())
.or_default()
.insert(
candidate.version.clone().into(),
candidate.file.clone().into(),
);
// We want to return a package pinned to a specific version; but we _also_ want to
// store the exact file that we selected to satisfy that version.
pins.entry(wheel.name)
.or_default()
.insert(wheel.version.clone(), wheel.file.clone());
// Emit a request to fetch the metadata for this version.
if in_flight.insert(wheel.file.hashes.sha256.clone()) {
request_sink.unbounded_send(Request::Wheel(wheel.file.clone()))?;
// Emit a request to fetch the metadata for this version.
match candidate.file {
DistributionFile::Wheel(file) => {
if in_flight.insert(file.hashes.sha256.clone()) {
request_sink.unbounded_send(Request::Wheel(file.clone()))?;
}
}
DistributionFile::Sdist(file) => {
if in_flight.insert(file.hashes.sha256.clone()) {
request_sink.unbounded_send(Request::Sdist(
file.clone(),
candidate.package_name.clone(),
candidate.version.clone().into(),
))?;
}
}
Ok((package, Some(PubGrubVersion::from(wheel.version))))
} else {
// Short circuit: we couldn't find _any_ compatible versions for a package.
Ok((package, None))
}
let version = candidate.version.clone();
Ok((package, Some(version)))
}
};
}
@ -555,7 +504,36 @@ impl<'a, Ctx: BuildContext> Resolver<'a, Ctx> {
match response? {
Response::Package(package_name, metadata) => {
trace!("Received package metadata for {}", package_name);
self.index.packages.insert(package_name.clone(), metadata);
// Group the distributions by version and kind, discarding any incompatible
// distributions.
let mut version_map: VersionMap = BTreeMap::new();
for file in metadata.files {
if let Ok(name) = WheelFilename::from_str(file.filename.as_str()) {
if name.is_compatible(self.tags) {
let version = PubGrubVersion::from(name.version);
if let std::collections::btree_map::Entry::Vacant(entry) =
version_map.entry(version)
{
entry.insert(DistributionFile::from(WheelFile::from(file)));
}
}
} else if let Ok(name) = SourceDistributionFilename::parse(
file.filename.as_str(),
&package_name,
) {
let version = PubGrubVersion::from(name.version);
if let std::collections::btree_map::Entry::Vacant(entry) =
version_map.entry(version)
{
entry.insert(DistributionFile::from(SdistFile::from(file)));
}
}
}
self.index
.packages
.insert(package_name.clone(), version_map);
}
Response::Wheel(file, metadata) => {
trace!("Received file metadata for {}", file.filename);
@ -589,19 +567,29 @@ impl<'a, Ctx: BuildContext> Resolver<'a, Ctx> {
),
Request::Wheel(file) => Box::pin(
self.client
.file(file.clone())
.file(file.clone().into())
.map_ok(move |metadata| Response::Wheel(file, metadata))
.map_err(ResolveError::Client),
),
Request::Sdist((file, filename)) => Box::pin(async move {
Request::Sdist(file, package_name, version) => Box::pin(async move {
let cached_wheel = self.build_context.cache().and_then(|cache| {
BuiltSourceDistributionCache::new(cache).find_wheel(&filename, self.tags)
BuiltSourceDistributionCache::new(cache).find_wheel(
&package_name,
&version,
self.tags,
)
});
let metadata21 = if let Some(cached_wheel) = cached_wheel {
read_dist_info(cached_wheel).await
} else {
download_and_build_sdist(&file, self.client, self.build_context, &filename)
.await
download_and_build_sdist(
&file,
&package_name,
&version,
self.client,
self.build_context,
)
.await
}
.map_err(|err| ResolveError::SourceDistribution {
filename: file.filename.clone(),
@ -614,25 +602,15 @@ impl<'a, Ctx: BuildContext> Resolver<'a, Ctx> {
}
}
#[derive(Debug, Clone)]
struct Wheel {
/// The underlying [`File`] for this wheel.
file: File,
/// The normalized name of the package.
name: PackageName,
/// The version of the package.
version: pep440_rs::Version,
}
/// Fetch the metadata for an item
#[derive(Debug)]
enum Request {
/// A request to fetch the metadata for a package.
Package(PackageName),
/// A request to fetch and build the source distribution for a specific package version
Sdist((File, SourceDistributionFilename)),
Sdist(SdistFile, PackageName, pep440_rs::Version),
/// A request to fetch the metadata for a specific version of a package.
Wheel(File),
Wheel(WheelFile),
}
#[derive(Debug)]
@ -640,15 +618,17 @@ enum Response {
/// The returned metadata for a package.
Package(PackageName, SimpleJson),
/// The returned metadata for a specific version of a package.
Wheel(File, Metadata21),
Wheel(WheelFile, Metadata21),
/// The returned metadata for an sdist build.
Sdist(File, Metadata21),
Sdist(SdistFile, Metadata21),
}
pub(crate) type VersionMap = BTreeMap<PubGrubVersion, DistributionFile>;
/// In-memory index of package metadata.
struct Index {
/// A map from package name to the metadata for that package.
packages: WaitMap<PackageName, SimpleJson>,
packages: WaitMap<PackageName, VersionMap>,
/// A map from wheel SHA to the metadata for that wheel.
versions: WaitMap<String, Metadata21>,

View file

@ -0,0 +1,148 @@
use fxhash::FxHashSet;
use pubgrub::range::Range;
use crate::distribution::DistributionFile;
use pep508_rs::Requirement;
use puffin_package::package_name::PackageName;
use crate::pubgrub::version::PubGrubVersion;
use crate::resolver::VersionMap;
#[derive(Debug, Default, Clone, Copy, PartialEq, Eq)]
#[cfg_attr(feature = "clap", derive(clap::ValueEnum))]
pub enum ResolutionMode {
/// Resolve the highest compatible version of each package.
#[default]
Highest,
/// Resolve the lowest compatible version of each package.
Lowest,
/// Resolve the lowest compatible version of any direct dependencies, and the highest
/// compatible version of any transitive dependencies.
LowestDirect,
}
#[derive(Debug, Clone)]
pub(crate) enum CandidateSelector {
/// Resolve the highest compatible version of each package.
Highest,
/// Resolve the lowest compatible version of each package.
Lowest,
/// Resolve the lowest compatible version of any direct dependencies, and the highest
/// compatible version of any transitive dependencies.
LowestDirect(FxHashSet<PackageName>),
}
impl CandidateSelector {
/// Return a candidate selector for the given resolution mode.
pub(crate) fn from_mode(mode: ResolutionMode, direct_dependencies: &[Requirement]) -> Self {
match mode {
ResolutionMode::Highest => Self::Highest,
ResolutionMode::Lowest => Self::Lowest,
ResolutionMode::LowestDirect => Self::LowestDirect(
direct_dependencies
.iter()
.map(|requirement| PackageName::normalize(&requirement.name))
.collect(),
),
}
}
}
impl CandidateSelector {
/// Select a [`Candidate`] from a set of candidate versions and files.
pub(crate) fn select<'a>(
&self,
package_name: &'a PackageName,
range: &'a Range<PubGrubVersion>,
version_map: &'a VersionMap,
) -> Option<Candidate<'a>> {
match self {
CandidateSelector::Highest => {
CandidateSelector::select_highest(package_name, range, version_map)
}
CandidateSelector::Lowest => {
CandidateSelector::select_lowest(package_name, range, version_map)
}
CandidateSelector::LowestDirect(direct_dependencies) => {
if direct_dependencies.contains(package_name) {
CandidateSelector::select_lowest(package_name, range, version_map)
} else {
CandidateSelector::select_highest(package_name, range, version_map)
}
}
}
}
/// Select the highest-compatible [`Candidate`] from a set of candidate versions and files,
/// preferring wheels over sdists.
fn select_highest<'a>(
package_name: &'a PackageName,
range: &'a Range<PubGrubVersion>,
version_map: &'a VersionMap,
) -> Option<Candidate<'a>> {
let mut sdist = None;
for (version, file) in version_map.iter().rev() {
if range.contains(version) {
match file {
DistributionFile::Wheel(_) => {
return Some(Candidate {
package_name,
version,
file,
});
}
DistributionFile::Sdist(_) => {
sdist = Some(Candidate {
package_name,
version,
file,
});
}
}
}
}
sdist
}
/// Select the highest-compatible [`Candidate`] from a set of candidate versions and files,
/// preferring wheels over sdists.
fn select_lowest<'a>(
package_name: &'a PackageName,
range: &'a Range<PubGrubVersion>,
version_map: &'a VersionMap,
) -> Option<Candidate<'a>> {
let mut sdist = None;
for (version, file) in version_map {
if range.contains(version) {
match file {
DistributionFile::Wheel(_) => {
return Some(Candidate {
package_name,
version,
file,
});
}
DistributionFile::Sdist(_) => {
sdist = Some(Candidate {
package_name,
version,
file,
});
}
}
}
}
sdist
}
}
#[derive(Debug, Clone)]
pub(crate) struct Candidate<'a> {
/// The name of the package.
pub(crate) package_name: &'a PackageName,
/// The version of the package.
pub(crate) version: &'a PubGrubVersion,
/// The file of the package.
pub(crate) file: &'a DistributionFile,
}

View file

@ -10,7 +10,7 @@ use tracing::debug;
use url::Url;
use zip::ZipArchive;
use distribution_filename::{SourceDistributionFilename, WheelFilename};
use distribution_filename::WheelFilename;
use pep440_rs::Version;
use platform_tags::Tags;
use puffin_client::{File, RegistryClient};
@ -39,15 +39,18 @@ impl BuiltSourceDistributionCache {
/// Search for a wheel matching the tags that was built from the given source distribution.
pub fn find_wheel(
&self,
filename: &SourceDistributionFilename,
package_name: &PackageName,
version: &Version,
tags: &Tags,
) -> Option<PathBuf> {
let Ok(read_dir) = fs_err::read_dir(self.version(&filename.name, &filename.version)) else {
let Ok(read_dir) = fs_err::read_dir(self.version(package_name, version)) else {
return None;
};
for entry in read_dir {
let Ok(entry) = entry else { continue };
let Ok(entry) = entry else {
continue;
};
let Ok(wheel) = WheelFilename::from_str(entry.file_name().to_string_lossy().as_ref())
else {
continue;
@ -63,9 +66,10 @@ impl BuiltSourceDistributionCache {
pub(crate) async fn download_and_build_sdist(
file: &File,
package_name: &PackageName,
version: &Version,
client: &RegistryClient,
build_context: &impl BuildContext,
sdist_filename: &SourceDistributionFilename,
) -> Result<Metadata21> {
debug!("Building {}", &file.filename);
let url = Url::parse(&file.url)?;
@ -80,8 +84,7 @@ pub(crate) async fn download_and_build_sdist(
tokio::io::copy(&mut reader, &mut writer).await?;
let wheel_dir = if let Some(cache) = &build_context.cache() {
BuiltSourceDistributionCache::new(cache)
.version(&sdist_filename.name, &sdist_filename.version)
BuiltSourceDistributionCache::new(cache).version(package_name, version)
} else {
temp_dir.path().join("wheels")
};