Add an extra struct around the package-to-flat index map (#923)

## Summary

`FlatIndex` is now the thing that's keyed on `PackageName`, while
`FlatDistributions` is what used to be called `FlatIndex` (a map from
version to `PrioritizedDistribution`, for a single package). I find this
a bit clearer, since we can also remove the `from_files` that doesn't
return `Self`, which I had trouble following.
This commit is contained in:
Charlie Marsh 2024-01-15 09:48:10 -05:00 committed by GitHub
parent 9a3f3d385c
commit e6d7124147
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
9 changed files with 66 additions and 53 deletions

View file

@ -139,8 +139,7 @@ pub(crate) async fn pip_sync(
} else { } else {
let start = std::time::Instant::now(); let start = std::time::Instant::now();
let flat_index_files = client.flat_index().await?; let flat_index = FlatIndex::from_files(client.flat_index().await?, tags);
let flat_index = FlatIndex::from_files(flat_index_files, tags);
let wheel_finder = let wheel_finder =
puffin_resolver::DistFinder::new(tags, &client, venv.interpreter(), &flat_index) puffin_resolver::DistFinder::new(tags, &client, venv.interpreter(), &flat_index)

View file

@ -13,30 +13,30 @@ use pep440_rs::Version;
use platform_tags::Tags; use platform_tags::Tags;
use puffin_normalize::PackageName; use puffin_normalize::PackageName;
pub type FlatIndexEntry = (DistFilename, File, IndexUrl);
/// A set of [`PrioritizedDistribution`] from a `--find-links` entry, indexed by [`PackageName`]
/// and [`Version`].
#[derive(Debug, Clone, Default)] #[derive(Debug, Clone, Default)]
pub struct FlatIndex(pub BTreeMap<Version, PrioritizedDistribution>); pub struct FlatIndex(FxHashMap<PackageName, FlatDistributions>);
impl FlatIndex { impl FlatIndex {
/// Collect all the files from `--find-links` into a override hashmap we can pass into version map creation. /// Collect all files from a `--find-links` target into a [`FlatIndex`].
#[instrument(skip_all)] #[instrument(skip_all)]
pub fn from_files( pub fn from_files(dists: Vec<FlatIndexEntry>, tags: &Tags) -> Self {
dists: Vec<(DistFilename, File, IndexUrl)>, let mut flat_index = FxHashMap::default();
tags: &Tags,
) -> FxHashMap<PackageName, Self> {
// If we have packages of the same name from find links, gives them priority, otherwise start empty
let mut flat_index: FxHashMap<PackageName, Self> = FxHashMap::default();
// Collect compatible distributions. // Collect compatible distributions.
for (filename, file, index) in dists { for (filename, file, index) in dists {
let version_map = flat_index.entry(filename.name().clone()).or_default(); let distributions = flat_index.entry(filename.name().clone()).or_default();
Self::add_file(version_map, file, filename, tags, index); Self::add_file(distributions, file, filename, tags, index);
} }
flat_index Self(flat_index)
} }
fn add_file( fn add_file(
version_map: &mut FlatIndex, distributions: &mut FlatDistributions,
file: File, file: File,
filename: DistFilename, filename: DistFilename,
tags: &Tags, tags: &Tags,
@ -54,7 +54,7 @@ impl FlatIndex {
file, file,
index, index,
})); }));
match version_map.0.entry(version) { match distributions.0.entry(version) {
Entry::Occupied(mut entry) => { Entry::Occupied(mut entry) => {
entry.get_mut().insert_built(dist, None, None, priority); entry.get_mut().insert_built(dist, None, None, priority);
} }
@ -71,7 +71,7 @@ impl FlatIndex {
file, file,
index, index,
})); }));
match version_map.0.entry(filename.version.clone()) { match distributions.0.entry(filename.version.clone()) {
Entry::Occupied(mut entry) => { Entry::Occupied(mut entry) => {
entry.get_mut().insert_source(dist, None, None); entry.get_mut().insert_source(dist, None, None);
} }
@ -83,7 +83,25 @@ impl FlatIndex {
} }
} }
/// Get the [`FlatDistributions`] for the given package name.
pub fn get(&self, package_name: &PackageName) -> Option<&FlatDistributions> {
self.0.get(package_name)
}
}
/// A set of [`PrioritizedDistribution`] from a `--find-links` entry for a single package, indexed
/// by [`Version`].
#[derive(Debug, Clone, Default)]
pub struct FlatDistributions(BTreeMap<Version, PrioritizedDistribution>);
impl FlatDistributions {
pub fn iter(&self) -> impl Iterator<Item = (&Version, &PrioritizedDistribution)> { pub fn iter(&self) -> impl Iterator<Item = (&Version, &PrioritizedDistribution)> {
self.0.iter() self.0.iter()
} }
} }
impl From<FlatDistributions> for BTreeMap<Version, PrioritizedDistribution> {
fn from(distributions: FlatDistributions) -> Self {
distributions.0
}
}

View file

@ -1,6 +1,6 @@
pub use cached_client::{CachedClient, CachedClientError, DataWithCachePolicy}; pub use cached_client::{CachedClient, CachedClientError, DataWithCachePolicy};
pub use error::Error; pub use error::Error;
pub use flat_index::FlatIndex; pub use flat_index::{FlatDistributions, FlatIndex, FlatIndexEntry};
pub use registry_client::{ pub use registry_client::{
read_metadata_async, RegistryClient, RegistryClientBuilder, SimpleMetadata, VersionFiles, read_metadata_async, RegistryClient, RegistryClientBuilder, SimpleMetadata, VersionFiles,
}; };

View file

@ -30,7 +30,7 @@ use pypi_types::{BaseUrl, Hashes, Metadata21, SimpleJson};
use crate::html::SimpleHtml; use crate::html::SimpleHtml;
use crate::remote_metadata::wheel_metadata_from_remote_zip; use crate::remote_metadata::wheel_metadata_from_remote_zip;
use crate::{CachedClient, CachedClientError, Error}; use crate::{CachedClient, CachedClientError, Error, FlatIndexEntry};
/// A builder for an [`RegistryClient`]. /// A builder for an [`RegistryClient`].
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
@ -118,7 +118,7 @@ impl RegistryClient {
/// Read the directories and flat remote indexes from `--find-links`. /// Read the directories and flat remote indexes from `--find-links`.
#[allow(clippy::result_large_err)] #[allow(clippy::result_large_err)]
pub async fn flat_index(&self) -> Result<Vec<(DistFilename, File, IndexUrl)>, Error> { pub async fn flat_index(&self) -> Result<Vec<FlatIndexEntry>, Error> {
let mut dists = Vec::new(); let mut dists = Vec::new();
// TODO(konstin): Parallelize reads over flat indexes. // TODO(konstin): Parallelize reads over flat indexes.
for flat_index in self.index_locations.flat_indexes() { for flat_index in self.index_locations.flat_indexes() {
@ -144,7 +144,7 @@ impl RegistryClient {
} }
/// Read a flat remote index from a `--find-links` URL. /// Read a flat remote index from a `--find-links` URL.
async fn read_flat_url(&self, url: &Url) -> Result<Vec<(DistFilename, File, IndexUrl)>, Error> { async fn read_flat_url(&self, url: &Url) -> Result<Vec<FlatIndexEntry>, Error> {
let cache_entry = self.cache.entry( let cache_entry = self.cache.entry(
CacheBucket::FlatIndex, CacheBucket::FlatIndex,
"html", "html",
@ -198,9 +198,7 @@ impl RegistryClient {
} }
/// Read a flat remote index from a `--find-links` directory. /// Read a flat remote index from a `--find-links` directory.
fn read_flat_index_dir( fn read_flat_index_dir(path: &PathBuf) -> Result<Vec<FlatIndexEntry>, io::Error> {
path: &PathBuf,
) -> Result<Vec<(DistFilename, File, IndexUrl)>, io::Error> {
// Absolute paths are required for the URL conversion. // Absolute paths are required for the URL conversion.
let path = fs_err::canonicalize(path)?; let path = fs_err::canonicalize(path)?;
let url = Url::from_directory_path(&path).expect("URL is already absolute"); let url = Url::from_directory_path(&path).expect("URL is already absolute");

View file

@ -18,7 +18,7 @@ use pep508_rs::Requirement;
use platform_host::Platform; use platform_host::Platform;
use platform_tags::Tags; use platform_tags::Tags;
use puffin_cache::{Cache, CacheArgs}; use puffin_cache::{Cache, CacheArgs};
use puffin_client::{RegistryClient, RegistryClientBuilder}; use puffin_client::{FlatIndex, RegistryClient, RegistryClientBuilder};
use puffin_dispatch::BuildDispatch; use puffin_dispatch::BuildDispatch;
use puffin_distribution::RegistryWheelIndex; use puffin_distribution::RegistryWheelIndex;
use puffin_installer::Downloader; use puffin_installer::Downloader;
@ -104,7 +104,7 @@ async fn install_chunk(
index_locations: &IndexLocations, index_locations: &IndexLocations,
) -> Result<()> { ) -> Result<()> {
let resolution: Vec<_> = let resolution: Vec<_> =
DistFinder::new(tags, client, venv.interpreter(), &FxHashMap::default()) DistFinder::new(tags, client, venv.interpreter(), &FlatIndex::default())
.resolve_stream(requirements) .resolve_stream(requirements)
.collect() .collect()
.await; .await;

View file

@ -10,7 +10,7 @@ use distribution_filename::DistFilename;
use distribution_types::{Dist, IndexUrl, Resolution}; use distribution_types::{Dist, IndexUrl, Resolution};
use pep508_rs::{Requirement, VersionOrUrl}; use pep508_rs::{Requirement, VersionOrUrl};
use platform_tags::Tags; use platform_tags::Tags;
use puffin_client::{FlatIndex, RegistryClient, SimpleMetadata}; use puffin_client::{FlatDistributions, FlatIndex, RegistryClient, SimpleMetadata};
use puffin_interpreter::Interpreter; use puffin_interpreter::Interpreter;
use puffin_normalize::PackageName; use puffin_normalize::PackageName;
@ -21,7 +21,7 @@ pub struct DistFinder<'a> {
client: &'a RegistryClient, client: &'a RegistryClient,
reporter: Option<Box<dyn Reporter>>, reporter: Option<Box<dyn Reporter>>,
interpreter: &'a Interpreter, interpreter: &'a Interpreter,
flat_index: &'a FxHashMap<PackageName, FlatIndex>, flat_index: &'a FlatIndex,
} }
impl<'a> DistFinder<'a> { impl<'a> DistFinder<'a> {
@ -30,7 +30,7 @@ impl<'a> DistFinder<'a> {
tags: &'a Tags, tags: &'a Tags,
client: &'a RegistryClient, client: &'a RegistryClient,
interpreter: &'a Interpreter, interpreter: &'a Interpreter,
flat_index: &'a FxHashMap<PackageName, FlatIndex>, flat_index: &'a FlatIndex,
) -> Self { ) -> Self {
Self { Self {
tags, tags,
@ -56,7 +56,7 @@ impl<'a> DistFinder<'a> {
async fn resolve_requirement( async fn resolve_requirement(
&self, &self,
requirement: &Requirement, requirement: &Requirement,
flat_index: Option<&FlatIndex>, flat_index: Option<&FlatDistributions>,
) -> Result<(PackageName, Dist), ResolveError> { ) -> Result<(PackageName, Dist), ResolveError> {
match requirement.version_or_url.as_ref() { match requirement.version_or_url.as_ref() {
None | Some(VersionOrUrl::VersionSpecifier(_)) => { None | Some(VersionOrUrl::VersionSpecifier(_)) => {
@ -118,7 +118,7 @@ impl<'a> DistFinder<'a> {
requirement: &Requirement, requirement: &Requirement,
metadata: SimpleMetadata, metadata: SimpleMetadata,
index: &IndexUrl, index: &IndexUrl,
flat_index: Option<&FlatIndex>, flat_index: Option<&FlatDistributions>,
) -> Option<Dist> { ) -> Option<Dist> {
// Prioritize the flat index by initializing the "best" matches with its entries. // Prioritize the flat index by initializing the "best" matches with its entries.
let matching_override = if let Some(flat_index) = flat_index { let matching_override = if let Some(flat_index) = flat_index {

View file

@ -24,7 +24,7 @@ use distribution_types::{
use pep440_rs::{Version, VersionSpecifiers, MIN_VERSION}; use pep440_rs::{Version, VersionSpecifiers, MIN_VERSION};
use pep508_rs::{MarkerEnvironment, Requirement}; use pep508_rs::{MarkerEnvironment, Requirement};
use platform_tags::Tags; use platform_tags::Tags;
use puffin_client::RegistryClient; use puffin_client::{FlatIndex, RegistryClient};
use puffin_distribution::DistributionDatabase; use puffin_distribution::DistributionDatabase;
use puffin_interpreter::Interpreter; use puffin_interpreter::Interpreter;
use puffin_normalize::PackageName; use puffin_normalize::PackageName;
@ -89,6 +89,7 @@ impl<'a, Context: BuildContext + Send + Sync> Resolver<'a, DefaultResolverProvid
let provider = DefaultResolverProvider::new( let provider = DefaultResolverProvider::new(
client, client,
DistributionDatabase::new(build_context.cache(), tags, client, build_context), DistributionDatabase::new(build_context.cache(), tags, client, build_context),
FlatIndex::from_files(client.flat_index().await?, tags),
tags, tags,
PythonRequirement::new(interpreter, markers), PythonRequirement::new(interpreter, markers),
options.exclude_newer, options.exclude_newer,
@ -97,8 +98,7 @@ impl<'a, Context: BuildContext + Send + Sync> Resolver<'a, DefaultResolverProvid
.iter() .iter()
.chain(manifest.constraints.iter()) .chain(manifest.constraints.iter())
.collect(), .collect(),
) );
.await?;
Ok(Self::new_custom_io( Ok(Self::new_custom_io(
manifest, manifest,
options, options,

View file

@ -3,7 +3,6 @@ use std::future::Future;
use anyhow::Result; use anyhow::Result;
use chrono::{DateTime, Utc}; use chrono::{DateTime, Utc};
use futures::FutureExt; use futures::FutureExt;
use rustc_hash::FxHashMap;
use url::Url; use url::Url;
use distribution_types::Dist; use distribution_types::Dist;
@ -46,10 +45,12 @@ pub trait ResolverProvider: Send + Sync {
/// The main IO backend for the resolver, which does cached requests network requests using the /// The main IO backend for the resolver, which does cached requests network requests using the
/// [`RegistryClient`] and [`DistributionDatabase`]. /// [`RegistryClient`] and [`DistributionDatabase`].
pub struct DefaultResolverProvider<'a, Context: BuildContext + Send + Sync> { pub struct DefaultResolverProvider<'a, Context: BuildContext + Send + Sync> {
/// The [`RegistryClient`] used to query the index.
client: &'a RegistryClient, client: &'a RegistryClient,
/// These are the entries from `--find-links` that act as overrides for index responses. /// The [`DistributionDatabase`] used to build source distributions.
flat_index: FxHashMap<PackageName, FlatIndex>,
fetcher: DistributionDatabase<'a, Context>, fetcher: DistributionDatabase<'a, Context>,
/// These are the entries from `--find-links` that act as overrides for index responses.
flat_index: FlatIndex,
tags: &'a Tags, tags: &'a Tags,
python_requirement: PythonRequirement<'a>, python_requirement: PythonRequirement<'a>,
exclude_newer: Option<DateTime<Utc>>, exclude_newer: Option<DateTime<Utc>>,
@ -58,26 +59,24 @@ pub struct DefaultResolverProvider<'a, Context: BuildContext + Send + Sync> {
impl<'a, Context: BuildContext + Send + Sync> DefaultResolverProvider<'a, Context> { impl<'a, Context: BuildContext + Send + Sync> DefaultResolverProvider<'a, Context> {
/// Reads the flat index entries and builds the provider. /// Reads the flat index entries and builds the provider.
pub async fn new( pub fn new(
client: &'a RegistryClient, client: &'a RegistryClient,
fetcher: DistributionDatabase<'a, Context>, fetcher: DistributionDatabase<'a, Context>,
flat_index: FlatIndex,
tags: &'a Tags, tags: &'a Tags,
python_requirement: PythonRequirement<'a>, python_requirement: PythonRequirement<'a>,
exclude_newer: Option<DateTime<Utc>>, exclude_newer: Option<DateTime<Utc>>,
allowed_yanks: AllowedYanks, allowed_yanks: AllowedYanks,
) -> Result<Self, puffin_client::Error> { ) -> Self {
let flat_index_dists = client.flat_index().await?; Self {
let flat_index = FlatIndex::from_files(flat_index_dists, tags);
Ok(Self {
client, client,
flat_index,
fetcher, fetcher,
flat_index,
tags, tags,
python_requirement, python_requirement,
exclude_newer, exclude_newer,
allowed_yanks, allowed_yanks,
}) }
} }
} }
@ -88,7 +87,6 @@ impl<'a, Context: BuildContext + Send + Sync> ResolverProvider
&'io self, &'io self,
package_name: &'io PackageName, package_name: &'io PackageName,
) -> impl Future<Output = VersionMapResponse> + Send + 'io { ) -> impl Future<Output = VersionMapResponse> + Send + 'io {
let flat_index_override = self.flat_index.get(package_name).cloned();
self.client self.client
.simple(package_name) .simple(package_name)
.map(move |result| match result { .map(move |result| match result {
@ -100,10 +98,10 @@ impl<'a, Context: BuildContext + Send + Sync> ResolverProvider
&self.python_requirement, &self.python_requirement,
&self.allowed_yanks, &self.allowed_yanks,
self.exclude_newer.as_ref(), self.exclude_newer.as_ref(),
flat_index_override, self.flat_index.get(package_name).cloned(),
)), )),
Err(err @ puffin_client::Error::PackageNotFound(_)) => { Err(err @ puffin_client::Error::PackageNotFound(_)) => {
if let Some(flat_index) = flat_index_override { if let Some(flat_index) = self.flat_index.get(package_name).cloned() {
Ok(VersionMap::from(flat_index)) Ok(VersionMap::from(flat_index))
} else { } else {
Err(err) Err(err)

View file

@ -8,7 +8,7 @@ use distribution_filename::DistFilename;
use distribution_types::{Dist, IndexUrl, PrioritizedDistribution, ResolvableDist}; use distribution_types::{Dist, IndexUrl, PrioritizedDistribution, ResolvableDist};
use pep440_rs::Version; use pep440_rs::Version;
use platform_tags::Tags; use platform_tags::Tags;
use puffin_client::{FlatIndex, SimpleMetadata}; use puffin_client::{FlatDistributions, SimpleMetadata};
use puffin_normalize::PackageName; use puffin_normalize::PackageName;
use puffin_warnings::warn_user_once; use puffin_warnings::warn_user_once;
use pypi_types::{Hashes, Yanked}; use pypi_types::{Hashes, Yanked};
@ -32,11 +32,11 @@ impl VersionMap {
python_requirement: &PythonRequirement, python_requirement: &PythonRequirement,
allowed_yanks: &AllowedYanks, allowed_yanks: &AllowedYanks,
exclude_newer: Option<&DateTime<Utc>>, exclude_newer: Option<&DateTime<Utc>>,
flat_index: Option<FlatIndex>, flat_index: Option<FlatDistributions>,
) -> Self { ) -> Self {
// If we have packages of the same name from find links, gives them priority, otherwise start empty // If we have packages of the same name from find links, gives them priority, otherwise start empty
let mut version_map: BTreeMap<Version, PrioritizedDistribution> = let mut version_map: BTreeMap<Version, PrioritizedDistribution> =
flat_index.map(|overrides| overrides.0).unwrap_or_default(); flat_index.map(Into::into).unwrap_or_default();
// Collect compatible distributions. // Collect compatible distributions.
for (version, files) in metadata { for (version, files) in metadata {
@ -155,8 +155,8 @@ impl VersionMap {
} }
} }
impl From<FlatIndex> for VersionMap { impl From<FlatDistributions> for VersionMap {
fn from(flat_index: FlatIndex) -> Self { fn from(flat_index: FlatDistributions) -> Self {
Self(flat_index.0) Self(flat_index.into())
} }
} }