Add an extra struct around the package-to-flat index map (#923)

## Summary

`FlatIndex` is now the thing that's keyed on `PackageName`, while
`FlatDistributions` is what used to be called `FlatIndex` (a map from
version to `PrioritizedDistribution`, for a single package). I find this
a bit clearer, since we can also remove the `from_files` that doesn't
return `Self`, which I had trouble following.
This commit is contained in:
Charlie Marsh 2024-01-15 09:48:10 -05:00 committed by GitHub
parent 9a3f3d385c
commit e6d7124147
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
9 changed files with 66 additions and 53 deletions

View file

@ -139,8 +139,7 @@ pub(crate) async fn pip_sync(
} else {
let start = std::time::Instant::now();
let flat_index_files = client.flat_index().await?;
let flat_index = FlatIndex::from_files(flat_index_files, tags);
let flat_index = FlatIndex::from_files(client.flat_index().await?, tags);
let wheel_finder =
puffin_resolver::DistFinder::new(tags, &client, venv.interpreter(), &flat_index)

View file

@ -13,30 +13,30 @@ use pep440_rs::Version;
use platform_tags::Tags;
use puffin_normalize::PackageName;
pub type FlatIndexEntry = (DistFilename, File, IndexUrl);
/// A set of [`PrioritizedDistribution`] from a `--find-links` entry, indexed by [`PackageName`]
/// and [`Version`].
#[derive(Debug, Clone, Default)]
pub struct FlatIndex(pub BTreeMap<Version, PrioritizedDistribution>);
pub struct FlatIndex(FxHashMap<PackageName, FlatDistributions>);
impl FlatIndex {
/// Collect all the files from `--find-links` into a override hashmap we can pass into version map creation.
/// Collect all files from a `--find-links` target into a [`FlatIndex`].
#[instrument(skip_all)]
pub fn from_files(
dists: Vec<(DistFilename, File, IndexUrl)>,
tags: &Tags,
) -> FxHashMap<PackageName, Self> {
// If we have packages of the same name from find links, gives them priority, otherwise start empty
let mut flat_index: FxHashMap<PackageName, Self> = FxHashMap::default();
pub fn from_files(dists: Vec<FlatIndexEntry>, tags: &Tags) -> Self {
let mut flat_index = FxHashMap::default();
// Collect compatible distributions.
for (filename, file, index) in dists {
let version_map = flat_index.entry(filename.name().clone()).or_default();
Self::add_file(version_map, file, filename, tags, index);
let distributions = flat_index.entry(filename.name().clone()).or_default();
Self::add_file(distributions, file, filename, tags, index);
}
flat_index
Self(flat_index)
}
fn add_file(
version_map: &mut FlatIndex,
distributions: &mut FlatDistributions,
file: File,
filename: DistFilename,
tags: &Tags,
@ -54,7 +54,7 @@ impl FlatIndex {
file,
index,
}));
match version_map.0.entry(version) {
match distributions.0.entry(version) {
Entry::Occupied(mut entry) => {
entry.get_mut().insert_built(dist, None, None, priority);
}
@ -71,7 +71,7 @@ impl FlatIndex {
file,
index,
}));
match version_map.0.entry(filename.version.clone()) {
match distributions.0.entry(filename.version.clone()) {
Entry::Occupied(mut entry) => {
entry.get_mut().insert_source(dist, None, None);
}
@ -83,7 +83,25 @@ impl FlatIndex {
}
}
/// Get the [`FlatDistributions`] for the given package name.
pub fn get(&self, package_name: &PackageName) -> Option<&FlatDistributions> {
self.0.get(package_name)
}
}
/// A set of [`PrioritizedDistribution`] from a `--find-links` entry for a single package, indexed
/// by [`Version`].
#[derive(Debug, Clone, Default)]
pub struct FlatDistributions(BTreeMap<Version, PrioritizedDistribution>);
impl FlatDistributions {
pub fn iter(&self) -> impl Iterator<Item = (&Version, &PrioritizedDistribution)> {
self.0.iter()
}
}
impl From<FlatDistributions> for BTreeMap<Version, PrioritizedDistribution> {
fn from(distributions: FlatDistributions) -> Self {
distributions.0
}
}

View file

@ -1,6 +1,6 @@
pub use cached_client::{CachedClient, CachedClientError, DataWithCachePolicy};
pub use error::Error;
pub use flat_index::FlatIndex;
pub use flat_index::{FlatDistributions, FlatIndex, FlatIndexEntry};
pub use registry_client::{
read_metadata_async, RegistryClient, RegistryClientBuilder, SimpleMetadata, VersionFiles,
};

View file

@ -30,7 +30,7 @@ use pypi_types::{BaseUrl, Hashes, Metadata21, SimpleJson};
use crate::html::SimpleHtml;
use crate::remote_metadata::wheel_metadata_from_remote_zip;
use crate::{CachedClient, CachedClientError, Error};
use crate::{CachedClient, CachedClientError, Error, FlatIndexEntry};
/// A builder for an [`RegistryClient`].
#[derive(Debug, Clone)]
@ -118,7 +118,7 @@ impl RegistryClient {
/// Read the directories and flat remote indexes from `--find-links`.
#[allow(clippy::result_large_err)]
pub async fn flat_index(&self) -> Result<Vec<(DistFilename, File, IndexUrl)>, Error> {
pub async fn flat_index(&self) -> Result<Vec<FlatIndexEntry>, Error> {
let mut dists = Vec::new();
// TODO(konstin): Parallelize reads over flat indexes.
for flat_index in self.index_locations.flat_indexes() {
@ -144,7 +144,7 @@ impl RegistryClient {
}
/// Read a flat remote index from a `--find-links` URL.
async fn read_flat_url(&self, url: &Url) -> Result<Vec<(DistFilename, File, IndexUrl)>, Error> {
async fn read_flat_url(&self, url: &Url) -> Result<Vec<FlatIndexEntry>, Error> {
let cache_entry = self.cache.entry(
CacheBucket::FlatIndex,
"html",
@ -198,9 +198,7 @@ impl RegistryClient {
}
/// Read a flat remote index from a `--find-links` directory.
fn read_flat_index_dir(
path: &PathBuf,
) -> Result<Vec<(DistFilename, File, IndexUrl)>, io::Error> {
fn read_flat_index_dir(path: &PathBuf) -> Result<Vec<FlatIndexEntry>, io::Error> {
// Absolute paths are required for the URL conversion.
let path = fs_err::canonicalize(path)?;
let url = Url::from_directory_path(&path).expect("URL is already absolute");

View file

@ -18,7 +18,7 @@ use pep508_rs::Requirement;
use platform_host::Platform;
use platform_tags::Tags;
use puffin_cache::{Cache, CacheArgs};
use puffin_client::{RegistryClient, RegistryClientBuilder};
use puffin_client::{FlatIndex, RegistryClient, RegistryClientBuilder};
use puffin_dispatch::BuildDispatch;
use puffin_distribution::RegistryWheelIndex;
use puffin_installer::Downloader;
@ -104,7 +104,7 @@ async fn install_chunk(
index_locations: &IndexLocations,
) -> Result<()> {
let resolution: Vec<_> =
DistFinder::new(tags, client, venv.interpreter(), &FxHashMap::default())
DistFinder::new(tags, client, venv.interpreter(), &FlatIndex::default())
.resolve_stream(requirements)
.collect()
.await;

View file

@ -10,7 +10,7 @@ use distribution_filename::DistFilename;
use distribution_types::{Dist, IndexUrl, Resolution};
use pep508_rs::{Requirement, VersionOrUrl};
use platform_tags::Tags;
use puffin_client::{FlatIndex, RegistryClient, SimpleMetadata};
use puffin_client::{FlatDistributions, FlatIndex, RegistryClient, SimpleMetadata};
use puffin_interpreter::Interpreter;
use puffin_normalize::PackageName;
@ -21,7 +21,7 @@ pub struct DistFinder<'a> {
client: &'a RegistryClient,
reporter: Option<Box<dyn Reporter>>,
interpreter: &'a Interpreter,
flat_index: &'a FxHashMap<PackageName, FlatIndex>,
flat_index: &'a FlatIndex,
}
impl<'a> DistFinder<'a> {
@ -30,7 +30,7 @@ impl<'a> DistFinder<'a> {
tags: &'a Tags,
client: &'a RegistryClient,
interpreter: &'a Interpreter,
flat_index: &'a FxHashMap<PackageName, FlatIndex>,
flat_index: &'a FlatIndex,
) -> Self {
Self {
tags,
@ -56,7 +56,7 @@ impl<'a> DistFinder<'a> {
async fn resolve_requirement(
&self,
requirement: &Requirement,
flat_index: Option<&FlatIndex>,
flat_index: Option<&FlatDistributions>,
) -> Result<(PackageName, Dist), ResolveError> {
match requirement.version_or_url.as_ref() {
None | Some(VersionOrUrl::VersionSpecifier(_)) => {
@ -118,7 +118,7 @@ impl<'a> DistFinder<'a> {
requirement: &Requirement,
metadata: SimpleMetadata,
index: &IndexUrl,
flat_index: Option<&FlatIndex>,
flat_index: Option<&FlatDistributions>,
) -> Option<Dist> {
// Prioritize the flat index by initializing the "best" matches with its entries.
let matching_override = if let Some(flat_index) = flat_index {

View file

@ -24,7 +24,7 @@ use distribution_types::{
use pep440_rs::{Version, VersionSpecifiers, MIN_VERSION};
use pep508_rs::{MarkerEnvironment, Requirement};
use platform_tags::Tags;
use puffin_client::RegistryClient;
use puffin_client::{FlatIndex, RegistryClient};
use puffin_distribution::DistributionDatabase;
use puffin_interpreter::Interpreter;
use puffin_normalize::PackageName;
@ -89,6 +89,7 @@ impl<'a, Context: BuildContext + Send + Sync> Resolver<'a, DefaultResolverProvid
let provider = DefaultResolverProvider::new(
client,
DistributionDatabase::new(build_context.cache(), tags, client, build_context),
FlatIndex::from_files(client.flat_index().await?, tags),
tags,
PythonRequirement::new(interpreter, markers),
options.exclude_newer,
@ -97,8 +98,7 @@ impl<'a, Context: BuildContext + Send + Sync> Resolver<'a, DefaultResolverProvid
.iter()
.chain(manifest.constraints.iter())
.collect(),
)
.await?;
);
Ok(Self::new_custom_io(
manifest,
options,

View file

@ -3,7 +3,6 @@ use std::future::Future;
use anyhow::Result;
use chrono::{DateTime, Utc};
use futures::FutureExt;
use rustc_hash::FxHashMap;
use url::Url;
use distribution_types::Dist;
@ -46,10 +45,12 @@ pub trait ResolverProvider: Send + Sync {
/// The main IO backend for the resolver, which does cached requests network requests using the
/// [`RegistryClient`] and [`DistributionDatabase`].
pub struct DefaultResolverProvider<'a, Context: BuildContext + Send + Sync> {
/// The [`RegistryClient`] used to query the index.
client: &'a RegistryClient,
/// These are the entries from `--find-links` that act as overrides for index responses.
flat_index: FxHashMap<PackageName, FlatIndex>,
/// The [`DistributionDatabase`] used to build source distributions.
fetcher: DistributionDatabase<'a, Context>,
/// These are the entries from `--find-links` that act as overrides for index responses.
flat_index: FlatIndex,
tags: &'a Tags,
python_requirement: PythonRequirement<'a>,
exclude_newer: Option<DateTime<Utc>>,
@ -58,26 +59,24 @@ pub struct DefaultResolverProvider<'a, Context: BuildContext + Send + Sync> {
impl<'a, Context: BuildContext + Send + Sync> DefaultResolverProvider<'a, Context> {
/// Reads the flat index entries and builds the provider.
pub async fn new(
pub fn new(
client: &'a RegistryClient,
fetcher: DistributionDatabase<'a, Context>,
flat_index: FlatIndex,
tags: &'a Tags,
python_requirement: PythonRequirement<'a>,
exclude_newer: Option<DateTime<Utc>>,
allowed_yanks: AllowedYanks,
) -> Result<Self, puffin_client::Error> {
let flat_index_dists = client.flat_index().await?;
let flat_index = FlatIndex::from_files(flat_index_dists, tags);
Ok(Self {
) -> Self {
Self {
client,
flat_index,
fetcher,
flat_index,
tags,
python_requirement,
exclude_newer,
allowed_yanks,
})
}
}
}
@ -88,7 +87,6 @@ impl<'a, Context: BuildContext + Send + Sync> ResolverProvider
&'io self,
package_name: &'io PackageName,
) -> impl Future<Output = VersionMapResponse> + Send + 'io {
let flat_index_override = self.flat_index.get(package_name).cloned();
self.client
.simple(package_name)
.map(move |result| match result {
@ -100,10 +98,10 @@ impl<'a, Context: BuildContext + Send + Sync> ResolverProvider
&self.python_requirement,
&self.allowed_yanks,
self.exclude_newer.as_ref(),
flat_index_override,
self.flat_index.get(package_name).cloned(),
)),
Err(err @ puffin_client::Error::PackageNotFound(_)) => {
if let Some(flat_index) = flat_index_override {
if let Some(flat_index) = self.flat_index.get(package_name).cloned() {
Ok(VersionMap::from(flat_index))
} else {
Err(err)

View file

@ -8,7 +8,7 @@ use distribution_filename::DistFilename;
use distribution_types::{Dist, IndexUrl, PrioritizedDistribution, ResolvableDist};
use pep440_rs::Version;
use platform_tags::Tags;
use puffin_client::{FlatIndex, SimpleMetadata};
use puffin_client::{FlatDistributions, SimpleMetadata};
use puffin_normalize::PackageName;
use puffin_warnings::warn_user_once;
use pypi_types::{Hashes, Yanked};
@ -32,11 +32,11 @@ impl VersionMap {
python_requirement: &PythonRequirement,
allowed_yanks: &AllowedYanks,
exclude_newer: Option<&DateTime<Utc>>,
flat_index: Option<FlatIndex>,
flat_index: Option<FlatDistributions>,
) -> Self {
// If we have packages of the same name from find links, gives them priority, otherwise start empty
let mut version_map: BTreeMap<Version, PrioritizedDistribution> =
flat_index.map(|overrides| overrides.0).unwrap_or_default();
flat_index.map(Into::into).unwrap_or_default();
// Collect compatible distributions.
for (version, files) in metadata {
@ -155,8 +155,8 @@ impl VersionMap {
}
}
impl From<FlatIndex> for VersionMap {
fn from(flat_index: FlatIndex) -> Self {
Self(flat_index.0)
impl From<FlatDistributions> for VersionMap {
fn from(flat_index: FlatDistributions) -> Self {
Self(flat_index.into())
}
}