diff --git a/crates/puffin-resolver/src/distribution.rs b/crates/puffin-resolver/src/distribution.rs new file mode 100644 index 000000000..0831d1575 --- /dev/null +++ b/crates/puffin-resolver/src/distribution.rs @@ -0,0 +1,85 @@ +use puffin_client::File; +use std::ops::Deref; + +/// A distribution can either be a wheel or a source distribution. +#[derive(Debug, Clone)] +pub(crate) struct WheelFile(File); + +#[derive(Debug, Clone)] +pub(crate) struct SdistFile(File); + +#[derive(Debug, Clone)] +pub(crate) enum DistributionFile { + Wheel(WheelFile), + Sdist(SdistFile), +} + +impl Deref for WheelFile { + type Target = File; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl Deref for SdistFile { + type Target = File; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl From for WheelFile { + fn from(file: File) -> Self { + Self(file) + } +} + +impl From for SdistFile { + fn from(file: File) -> Self { + Self(file) + } +} + +impl From for File { + fn from(wheel: WheelFile) -> Self { + wheel.0 + } +} + +impl From for File { + fn from(sdist: SdistFile) -> Self { + sdist.0 + } +} + +impl From for DistributionFile { + fn from(wheel: WheelFile) -> Self { + Self::Wheel(wheel) + } +} + +impl From for DistributionFile { + fn from(sdist: SdistFile) -> Self { + Self::Sdist(sdist) + } +} + +impl DistributionFile { + pub(crate) fn filename(&self) -> &str { + match self { + Self::Wheel(wheel) => wheel.filename.as_str(), + Self::Sdist(sdist) => sdist.filename.as_str(), + } + } +} + +impl From for File { + fn from(file: DistributionFile) -> Self { + match file { + DistributionFile::Wheel(wheel) => wheel.into(), + DistributionFile::Sdist(sdist) => sdist.into(), + } + } +} diff --git a/crates/puffin-resolver/src/lib.rs b/crates/puffin-resolver/src/lib.rs index 54ff22c84..8198a7214 100644 --- a/crates/puffin-resolver/src/lib.rs +++ b/crates/puffin-resolver/src/lib.rs @@ -1,14 +1,15 @@ pub use error::ResolveError; -pub use mode::ResolutionMode; pub use resolution::PinnedPackage; pub use resolver::Resolver; +pub use selector::ResolutionMode; pub use source_distribution::BuiltSourceDistributionCache; pub use wheel_finder::{Reporter, WheelFinder}; +mod distribution; mod error; -mod mode; mod pubgrub; mod resolution; mod resolver; +mod selector; mod source_distribution; mod wheel_finder; diff --git a/crates/puffin-resolver/src/mode.rs b/crates/puffin-resolver/src/mode.rs deleted file mode 100644 index 064960a79..000000000 --- a/crates/puffin-resolver/src/mode.rs +++ /dev/null @@ -1,67 +0,0 @@ -use fxhash::FxHashSet; -use itertools::Either; - -use pep508_rs::Requirement; -use puffin_client::File; -use puffin_package::package_name::PackageName; - -#[derive(Debug, Default, Clone, Copy, PartialEq, Eq)] -#[cfg_attr(feature = "clap", derive(clap::ValueEnum))] -pub enum ResolutionMode { - /// Resolve the highest compatible version of each package. - #[default] - Highest, - /// Resolve the lowest compatible version of each package. - Lowest, - /// Resolve the lowest compatible version of any direct dependencies, and the highest - /// compatible version of any transitive dependencies. - LowestDirect, -} - -#[derive(Debug, Clone)] -pub(crate) enum CandidateSelector { - /// Resolve the highest compatible version of each package. - Highest, - /// Resolve the lowest compatible version of each package. - Lowest, - /// Resolve the lowest compatible version of any direct dependencies, and the highest - /// compatible version of any transitive dependencies. - LowestDirect(FxHashSet), -} - -impl CandidateSelector { - /// Return a candidate selector for the given resolution mode. - pub(crate) fn from_mode(mode: ResolutionMode, direct_dependencies: &[Requirement]) -> Self { - match mode { - ResolutionMode::Highest => Self::Highest, - ResolutionMode::Lowest => Self::Lowest, - ResolutionMode::LowestDirect => Self::LowestDirect( - direct_dependencies - .iter() - .map(|requirement| PackageName::normalize(&requirement.name)) - .collect(), - ), - } - } -} - -impl CandidateSelector { - /// Return an iterator over the candidates for the given package name. - pub(crate) fn iter_candidates<'a>( - &self, - package_name: &PackageName, - candidates: &'a [File], - ) -> impl Iterator { - match self { - CandidateSelector::Highest => Either::Left(candidates.iter().rev()), - CandidateSelector::Lowest => Either::Right(candidates.iter()), - CandidateSelector::LowestDirect(direct_dependencies) => { - if direct_dependencies.contains(package_name) { - Either::Right(candidates.iter()) - } else { - Either::Left(candidates.iter().rev()) - } - } - } - } -} diff --git a/crates/puffin-resolver/src/resolver.rs b/crates/puffin-resolver/src/resolver.rs index 7ab00b643..b14311242 100644 --- a/crates/puffin-resolver/src/resolver.rs +++ b/crates/puffin-resolver/src/resolver.rs @@ -2,6 +2,7 @@ use std::borrow::Borrow; use std::collections::hash_map::Entry; +use std::collections::BTreeMap; use std::future::Future; use std::pin::Pin; use std::str::FromStr; @@ -28,27 +29,29 @@ use puffin_package::metadata::Metadata21; use puffin_package::package_name::PackageName; use puffin_traits::BuildContext; +use crate::distribution::{DistributionFile, SdistFile, WheelFile}; use crate::error::ResolveError; -use crate::mode::{CandidateSelector, ResolutionMode}; use crate::pubgrub::package::PubGrubPackage; use crate::pubgrub::version::{PubGrubVersion, MIN_VERSION}; use crate::pubgrub::{iter_requirements, version_range}; -use crate::resolution::Graph; +use crate::resolution::{Graph, Resolution}; +use crate::selector::{CandidateSelector, ResolutionMode}; use crate::source_distribution::{download_and_build_sdist, read_dist_info}; use crate::BuiltSourceDistributionCache; -pub struct Resolver<'a, Ctx: BuildContext> { +pub struct Resolver<'a, Context: BuildContext> { requirements: Vec, constraints: Vec, + resolution: Option, markers: &'a MarkerEnvironment, tags: &'a Tags, client: &'a RegistryClient, selector: CandidateSelector, index: Arc, - build_context: &'a Ctx, + build_context: &'a Context, } -impl<'a, Ctx: BuildContext> Resolver<'a, Ctx> { +impl<'a, Context: BuildContext> Resolver<'a, Context> { /// Initialize a new resolver. pub fn new( requirements: Vec, @@ -57,11 +60,12 @@ impl<'a, Ctx: BuildContext> Resolver<'a, Ctx> { markers: &'a MarkerEnvironment, tags: &'a Tags, client: &'a RegistryClient, - build_context: &'a Ctx, + build_context: &'a Context, ) -> Self { Self { selector: CandidateSelector::from_mode(mode, &requirements), index: Arc::new(Index::default()), + resolution: None, requirements, constraints, markers, @@ -71,6 +75,12 @@ impl<'a, Ctx: BuildContext> Resolver<'a, Ctx> { } } + #[must_use] + pub fn with_resolution(mut self, resolution: Resolution) -> Self { + self.resolution = Some(resolution); + self + } + /// Resolve a set of requirements into a set of pinned versions. pub async fn resolve(self) -> Result { // A channel to fetch package metadata (e.g., given `flask`, fetch all versions) and version @@ -271,46 +281,13 @@ impl<'a, Ctx: BuildContext> Resolver<'a, Ctx> { let Some(entry) = self.index.packages.get(package_name) else { continue; }; - let simple_json = entry.value(); + let version_map = entry.value(); - // Try to find a wheel. If there isn't any, to a find a source distribution. If there - // isn't any either, short circuit and fail the resolution. - let Some((file, request)) = self + // Try to find a compatible version. If there aren't any compatible versions, + // short-circuit and return `None`. + let Some(candidate) = self .selector - .iter_candidates(package_name, &simple_json.files) - .find_map(|file| { - let wheel_filename = WheelFilename::from_str(file.filename.as_str()).ok()?; - if !wheel_filename.is_compatible(self.tags) { - return None; - } - - if range - .borrow() - .contains(&PubGrubVersion::from(wheel_filename.version.clone())) - { - Some((file, Request::Wheel(file.clone()))) - } else { - None - } - }) - .or_else(|| { - self.selector - .iter_candidates(package_name, &simple_json.files) - .find_map(|file| { - let sdist_filename = - SourceDistributionFilename::parse(&file.filename, package_name) - .ok()?; - - if range - .borrow() - .contains(&PubGrubVersion::from(sdist_filename.version.clone())) - { - Some((file, Request::Sdist((file.clone(), sdist_filename)))) - } else { - None - } - }) - }) + .select(package_name, range.borrow(), version_map) else { // Short circuit: we couldn't find _any_ compatible versions for a package. let (package, _range) = potential_packages.swap_remove(index); @@ -318,8 +295,21 @@ impl<'a, Ctx: BuildContext> Resolver<'a, Ctx> { }; // Emit a request to fetch the metadata for this version. - if in_flight.insert(file.hashes.sha256.clone()) { - request_sink.unbounded_send(request)?; + match candidate.file { + DistributionFile::Wheel(file) => { + if in_flight.insert(file.hashes.sha256.clone()) { + request_sink.unbounded_send(Request::Wheel(file.clone()))?; + } + } + DistributionFile::Sdist(file) => { + if in_flight.insert(file.hashes.sha256.clone()) { + request_sink.unbounded_send(Request::Sdist( + file.clone(), + candidate.package_name.clone(), + candidate.version.clone().into(), + ))?; + } + } } selection = index; @@ -335,7 +325,7 @@ impl<'a, Ctx: BuildContext> Resolver<'a, Ctx> { PubGrubPackage::Package(package_name, _) => { // Wait for the metadata to be available. let entry = self.index.packages.wait(package_name).await.unwrap(); - let simple_json = entry.value(); + let version_map = entry.value(); debug!( "Searching for a compatible version of {} ({})", @@ -344,91 +334,50 @@ impl<'a, Ctx: BuildContext> Resolver<'a, Ctx> { ); // Find a compatible version. - let mut wheel = self - .selector - .iter_candidates(package_name, &simple_json.files) - .find_map(|file| { - let Ok(name) = WheelFilename::from_str(file.filename.as_str()) else { - return None; - }; + let Some(candidate) = + self.selector + .select(package_name, range.borrow(), version_map) + else { + // Short circuit: we couldn't find _any_ compatible versions for a package. + return Ok((package, None)); + }; - if !name.is_compatible(self.tags) { - return None; - } + debug!( + "Selecting: {}=={} ({})", + candidate.package_name, + candidate.version, + candidate.file.filename() + ); - if !range - .borrow() - .contains(&PubGrubVersion::from(name.version.clone())) - { - return None; - }; - - Some(Wheel { - file: file.clone(), - name: package_name.clone(), - version: name.version.clone(), - }) - }); - - if wheel.is_none() { - if let Some((sdist_file, parsed_filename)) = - self.selector - .iter_candidates(package_name, &simple_json.files) - .filter_map(|file| { - let Ok(parsed_filename) = - SourceDistributionFilename::parse(&file.filename, package_name) - else { - return None; - }; - - if !range.borrow().contains(&PubGrubVersion::from( - parsed_filename.version.clone(), - )) { - return None; - }; - - Some((file, parsed_filename)) - }) - .max_by(|left, right| left.1.version.cmp(&right.1.version)) - { - // Emit a request to fetch the metadata for this version. - if in_flight.insert(sdist_file.hashes.sha256.clone()) { - request_sink.unbounded_send(Request::Sdist(( - sdist_file.clone(), - parsed_filename.clone(), - )))?; - } - // TODO(konstin): That's not a wheel - wheel = Some(Wheel { - file: sdist_file.clone(), - name: package_name.clone(), - version: parsed_filename.version.clone(), - }); - } - } - - if let Some(wheel) = wheel { - debug!( - "Selecting: {}=={} ({})", - wheel.name, wheel.version, wheel.file.filename + // We want to return a package pinned to a specific version; but we _also_ want to + // store the exact file that we selected to satisfy that version. + pins.entry(candidate.package_name.clone()) + .or_default() + .insert( + candidate.version.clone().into(), + candidate.file.clone().into(), ); - // We want to return a package pinned to a specific version; but we _also_ want to - // store the exact file that we selected to satisfy that version. - pins.entry(wheel.name) - .or_default() - .insert(wheel.version.clone(), wheel.file.clone()); - - // Emit a request to fetch the metadata for this version. - if in_flight.insert(wheel.file.hashes.sha256.clone()) { - request_sink.unbounded_send(Request::Wheel(wheel.file.clone()))?; + // Emit a request to fetch the metadata for this version. + match candidate.file { + DistributionFile::Wheel(file) => { + if in_flight.insert(file.hashes.sha256.clone()) { + request_sink.unbounded_send(Request::Wheel(file.clone()))?; + } + } + DistributionFile::Sdist(file) => { + if in_flight.insert(file.hashes.sha256.clone()) { + request_sink.unbounded_send(Request::Sdist( + file.clone(), + candidate.package_name.clone(), + candidate.version.clone().into(), + ))?; + } } - - Ok((package, Some(PubGrubVersion::from(wheel.version)))) - } else { - // Short circuit: we couldn't find _any_ compatible versions for a package. - Ok((package, None)) } + + let version = candidate.version.clone(); + Ok((package, Some(version))) } }; } @@ -555,7 +504,36 @@ impl<'a, Ctx: BuildContext> Resolver<'a, Ctx> { match response? { Response::Package(package_name, metadata) => { trace!("Received package metadata for {}", package_name); - self.index.packages.insert(package_name.clone(), metadata); + + // Group the distributions by version and kind, discarding any incompatible + // distributions. + let mut version_map: VersionMap = BTreeMap::new(); + for file in metadata.files { + if let Ok(name) = WheelFilename::from_str(file.filename.as_str()) { + if name.is_compatible(self.tags) { + let version = PubGrubVersion::from(name.version); + if let std::collections::btree_map::Entry::Vacant(entry) = + version_map.entry(version) + { + entry.insert(DistributionFile::from(WheelFile::from(file))); + } + } + } else if let Ok(name) = SourceDistributionFilename::parse( + file.filename.as_str(), + &package_name, + ) { + let version = PubGrubVersion::from(name.version); + if let std::collections::btree_map::Entry::Vacant(entry) = + version_map.entry(version) + { + entry.insert(DistributionFile::from(SdistFile::from(file))); + } + } + } + + self.index + .packages + .insert(package_name.clone(), version_map); } Response::Wheel(file, metadata) => { trace!("Received file metadata for {}", file.filename); @@ -589,19 +567,29 @@ impl<'a, Ctx: BuildContext> Resolver<'a, Ctx> { ), Request::Wheel(file) => Box::pin( self.client - .file(file.clone()) + .file(file.clone().into()) .map_ok(move |metadata| Response::Wheel(file, metadata)) .map_err(ResolveError::Client), ), - Request::Sdist((file, filename)) => Box::pin(async move { + Request::Sdist(file, package_name, version) => Box::pin(async move { let cached_wheel = self.build_context.cache().and_then(|cache| { - BuiltSourceDistributionCache::new(cache).find_wheel(&filename, self.tags) + BuiltSourceDistributionCache::new(cache).find_wheel( + &package_name, + &version, + self.tags, + ) }); let metadata21 = if let Some(cached_wheel) = cached_wheel { read_dist_info(cached_wheel).await } else { - download_and_build_sdist(&file, self.client, self.build_context, &filename) - .await + download_and_build_sdist( + &file, + &package_name, + &version, + self.client, + self.build_context, + ) + .await } .map_err(|err| ResolveError::SourceDistribution { filename: file.filename.clone(), @@ -614,25 +602,15 @@ impl<'a, Ctx: BuildContext> Resolver<'a, Ctx> { } } -#[derive(Debug, Clone)] -struct Wheel { - /// The underlying [`File`] for this wheel. - file: File, - /// The normalized name of the package. - name: PackageName, - /// The version of the package. - version: pep440_rs::Version, -} - /// Fetch the metadata for an item #[derive(Debug)] enum Request { /// A request to fetch the metadata for a package. Package(PackageName), /// A request to fetch and build the source distribution for a specific package version - Sdist((File, SourceDistributionFilename)), + Sdist(SdistFile, PackageName, pep440_rs::Version), /// A request to fetch the metadata for a specific version of a package. - Wheel(File), + Wheel(WheelFile), } #[derive(Debug)] @@ -640,15 +618,17 @@ enum Response { /// The returned metadata for a package. Package(PackageName, SimpleJson), /// The returned metadata for a specific version of a package. - Wheel(File, Metadata21), + Wheel(WheelFile, Metadata21), /// The returned metadata for an sdist build. - Sdist(File, Metadata21), + Sdist(SdistFile, Metadata21), } +pub(crate) type VersionMap = BTreeMap; + /// In-memory index of package metadata. struct Index { /// A map from package name to the metadata for that package. - packages: WaitMap, + packages: WaitMap, /// A map from wheel SHA to the metadata for that wheel. versions: WaitMap, diff --git a/crates/puffin-resolver/src/selector.rs b/crates/puffin-resolver/src/selector.rs new file mode 100644 index 000000000..79e972ad1 --- /dev/null +++ b/crates/puffin-resolver/src/selector.rs @@ -0,0 +1,148 @@ +use fxhash::FxHashSet; +use pubgrub::range::Range; + +use crate::distribution::DistributionFile; +use pep508_rs::Requirement; + +use puffin_package::package_name::PackageName; + +use crate::pubgrub::version::PubGrubVersion; +use crate::resolver::VersionMap; + +#[derive(Debug, Default, Clone, Copy, PartialEq, Eq)] +#[cfg_attr(feature = "clap", derive(clap::ValueEnum))] +pub enum ResolutionMode { + /// Resolve the highest compatible version of each package. + #[default] + Highest, + /// Resolve the lowest compatible version of each package. + Lowest, + /// Resolve the lowest compatible version of any direct dependencies, and the highest + /// compatible version of any transitive dependencies. + LowestDirect, +} + +#[derive(Debug, Clone)] +pub(crate) enum CandidateSelector { + /// Resolve the highest compatible version of each package. + Highest, + /// Resolve the lowest compatible version of each package. + Lowest, + /// Resolve the lowest compatible version of any direct dependencies, and the highest + /// compatible version of any transitive dependencies. + LowestDirect(FxHashSet), +} + +impl CandidateSelector { + /// Return a candidate selector for the given resolution mode. + pub(crate) fn from_mode(mode: ResolutionMode, direct_dependencies: &[Requirement]) -> Self { + match mode { + ResolutionMode::Highest => Self::Highest, + ResolutionMode::Lowest => Self::Lowest, + ResolutionMode::LowestDirect => Self::LowestDirect( + direct_dependencies + .iter() + .map(|requirement| PackageName::normalize(&requirement.name)) + .collect(), + ), + } + } +} + +impl CandidateSelector { + /// Select a [`Candidate`] from a set of candidate versions and files. + pub(crate) fn select<'a>( + &self, + package_name: &'a PackageName, + range: &'a Range, + version_map: &'a VersionMap, + ) -> Option> { + match self { + CandidateSelector::Highest => { + CandidateSelector::select_highest(package_name, range, version_map) + } + CandidateSelector::Lowest => { + CandidateSelector::select_lowest(package_name, range, version_map) + } + CandidateSelector::LowestDirect(direct_dependencies) => { + if direct_dependencies.contains(package_name) { + CandidateSelector::select_lowest(package_name, range, version_map) + } else { + CandidateSelector::select_highest(package_name, range, version_map) + } + } + } + } + + /// Select the highest-compatible [`Candidate`] from a set of candidate versions and files, + /// preferring wheels over sdists. + fn select_highest<'a>( + package_name: &'a PackageName, + range: &'a Range, + version_map: &'a VersionMap, + ) -> Option> { + let mut sdist = None; + for (version, file) in version_map.iter().rev() { + if range.contains(version) { + match file { + DistributionFile::Wheel(_) => { + return Some(Candidate { + package_name, + version, + file, + }); + } + DistributionFile::Sdist(_) => { + sdist = Some(Candidate { + package_name, + version, + file, + }); + } + } + } + } + sdist + } + + /// Select the highest-compatible [`Candidate`] from a set of candidate versions and files, + /// preferring wheels over sdists. + fn select_lowest<'a>( + package_name: &'a PackageName, + range: &'a Range, + version_map: &'a VersionMap, + ) -> Option> { + let mut sdist = None; + for (version, file) in version_map { + if range.contains(version) { + match file { + DistributionFile::Wheel(_) => { + return Some(Candidate { + package_name, + version, + file, + }); + } + DistributionFile::Sdist(_) => { + sdist = Some(Candidate { + package_name, + version, + file, + }); + } + } + } + } + sdist + } +} + +#[derive(Debug, Clone)] +pub(crate) struct Candidate<'a> { + /// The name of the package. + pub(crate) package_name: &'a PackageName, + /// The version of the package. + pub(crate) version: &'a PubGrubVersion, + /// The file of the package. + pub(crate) file: &'a DistributionFile, +} diff --git a/crates/puffin-resolver/src/source_distribution.rs b/crates/puffin-resolver/src/source_distribution.rs index 69d37e246..3dcc57a69 100644 --- a/crates/puffin-resolver/src/source_distribution.rs +++ b/crates/puffin-resolver/src/source_distribution.rs @@ -10,7 +10,7 @@ use tracing::debug; use url::Url; use zip::ZipArchive; -use distribution_filename::{SourceDistributionFilename, WheelFilename}; +use distribution_filename::WheelFilename; use pep440_rs::Version; use platform_tags::Tags; use puffin_client::{File, RegistryClient}; @@ -39,15 +39,18 @@ impl BuiltSourceDistributionCache { /// Search for a wheel matching the tags that was built from the given source distribution. pub fn find_wheel( &self, - filename: &SourceDistributionFilename, + package_name: &PackageName, + version: &Version, tags: &Tags, ) -> Option { - let Ok(read_dir) = fs_err::read_dir(self.version(&filename.name, &filename.version)) else { + let Ok(read_dir) = fs_err::read_dir(self.version(package_name, version)) else { return None; }; for entry in read_dir { - let Ok(entry) = entry else { continue }; + let Ok(entry) = entry else { + continue; + }; let Ok(wheel) = WheelFilename::from_str(entry.file_name().to_string_lossy().as_ref()) else { continue; @@ -63,9 +66,10 @@ impl BuiltSourceDistributionCache { pub(crate) async fn download_and_build_sdist( file: &File, + package_name: &PackageName, + version: &Version, client: &RegistryClient, build_context: &impl BuildContext, - sdist_filename: &SourceDistributionFilename, ) -> Result { debug!("Building {}", &file.filename); let url = Url::parse(&file.url)?; @@ -80,8 +84,7 @@ pub(crate) async fn download_and_build_sdist( tokio::io::copy(&mut reader, &mut writer).await?; let wheel_dir = if let Some(cache) = &build_context.cache() { - BuiltSourceDistributionCache::new(cache) - .version(&sdist_filename.name, &sdist_filename.version) + BuiltSourceDistributionCache::new(cache).version(package_name, version) } else { temp_dir.path().join("wheels") };