Implement --find-links as flat indexes (directories in pip-compile) (#912)

Add directory `--find-links` support for local paths to pip-compile.

It seems that pip joins all sources and then picks the best package. We
explicitly give find links packages precedence if the same exists on an
index and locally by prefilling the `VersionMap`, otherwise they are
added as another index and the existing rules of precedence apply.

Internally, the feature is called _flat index_, which is more meaningful
than _find links_: We're not looking for links, we're picking up local
directories, and (TBD) support another index format that's just a flat
list of files instead of a nested index.

`RegistryBuiltDist` and `RegistrySourceDist` now use `WheelFilename` and
`SourceDistFilename` respectively. The `File` inside `RegistryBuiltDist`
and `RegistrySourceDist` gained the ability to represent both a url and
a path so that `--find-links` with a url and with a path works the same,
both being locked as `<package_name>@<version>` instead of
`<package_name> @ <url>`. (This is more of a detail, this PR in general
still work if we strip that and have directory find links represented as
`<package_name> @ file:///path/to/file.ext`)

`PrioritizedDistribution` and `FlatIndex` have been moved to locations
where we can use them in the upstack PR.

I added a `scripts/wheels` directory with stripped down wheels to use
for testing.

We're lacking tests for correct tag priority precedence with flat
indexes, i only confirmed this manually since it is not covered in the
pip-compile or pip-sync output.

Closes #876
This commit is contained in:
konsti 2024-01-15 03:04:10 +01:00 committed by GitHub
parent 5ffbfadf66
commit e9b6b6fa36
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
42 changed files with 1069 additions and 483 deletions

View file

@ -5,10 +5,9 @@ use chrono::{DateTime, Utc};
use tracing::{instrument, warn};
use distribution_filename::DistFilename;
use distribution_types::{Dist, IndexUrl};
use pep440_rs::VersionSpecifiers;
use platform_tags::{TagPriority, Tags};
use puffin_client::SimpleMetadata;
use distribution_types::{Dist, IndexUrl, PrioritizedDistribution, ResolvableDist};
use platform_tags::Tags;
use puffin_client::{FlatIndex, SimpleMetadata};
use puffin_normalize::PackageName;
use puffin_warnings::warn_user_once;
use pypi_types::{Hashes, Yanked};
@ -18,12 +17,12 @@ use crate::python_requirement::PythonRequirement;
use crate::yanks::AllowedYanks;
/// A map from versions to distributions.
#[derive(Debug, Default)]
#[derive(Debug, Default, Clone)]
pub struct VersionMap(BTreeMap<PubGrubVersion, PrioritizedDistribution>);
impl VersionMap {
/// Initialize a [`VersionMap`] from the given metadata.
#[instrument(skip_all, fields(package_name = % package_name))]
#[instrument(skip_all, fields(package_name))]
#[allow(clippy::too_many_arguments)]
pub(crate) fn from_metadata(
metadata: SimpleMetadata,
@ -33,9 +32,11 @@ impl VersionMap {
python_requirement: &PythonRequirement,
allowed_yanks: &AllowedYanks,
exclude_newer: Option<&DateTime<Utc>>,
flat_index: Option<FlatIndex<PubGrubVersion>>,
) -> Self {
// If we have packages of the same name from find links, gives them priority, otherwise start empty
let mut version_map: BTreeMap<PubGrubVersion, PrioritizedDistribution> =
BTreeMap::default();
flat_index.map(|overrides| overrides.0).unwrap_or_default();
// Collect compatible distributions.
for (version, files) in metadata {
@ -82,22 +83,24 @@ impl VersionMap {
})
});
let dist = Dist::from_registry(
filename.name.clone(),
filename.version.clone(),
DistFilename::WheelFilename(filename),
file,
index.clone(),
);
match version_map.entry(version.clone().into()) {
Entry::Occupied(mut entry) => {
entry
.get_mut()
.insert_built(dist, requires_python, hash, priority);
entry.get_mut().insert_built(
dist,
requires_python,
Some(hash),
priority,
);
}
Entry::Vacant(entry) => {
entry.insert(PrioritizedDistribution::from_built(
dist,
requires_python,
hash,
Some(hash),
priority,
));
}
@ -105,20 +108,21 @@ impl VersionMap {
}
DistFilename::SourceDistFilename(filename) => {
let dist = Dist::from_registry(
filename.name.clone(),
filename.version.clone(),
DistFilename::SourceDistFilename(filename),
file,
index.clone(),
);
match version_map.entry(version.clone().into()) {
Entry::Occupied(mut entry) => {
entry.get_mut().insert_source(dist, requires_python, hash);
entry
.get_mut()
.insert_source(dist, requires_python, Some(hash));
}
Entry::Vacant(entry) => {
entry.insert(PrioritizedDistribution::from_source(
dist,
requires_python,
hash,
Some(hash),
));
}
}
@ -131,200 +135,24 @@ impl VersionMap {
}
/// Return the [`DistFile`] for the given version, if any.
pub(crate) fn get(&self, version: &PubGrubVersion) -> Option<ResolvableFile> {
pub(crate) fn get(&self, version: &PubGrubVersion) -> Option<ResolvableDist> {
self.0.get(version).and_then(PrioritizedDistribution::get)
}
/// Return an iterator over the versions and distributions.
pub(crate) fn iter(
&self,
) -> impl DoubleEndedIterator<Item = (&PubGrubVersion, ResolvableFile)> {
) -> impl DoubleEndedIterator<Item = (&PubGrubVersion, ResolvableDist)> {
self.0
.iter()
.filter_map(|(version, file)| Some((version, file.get()?)))
.filter_map(|(version, dist)| Some((version, dist.get()?)))
}
/// Return the [`Hashes`] for the given version, if any.
pub(crate) fn hashes(&self, version: &PubGrubVersion) -> Vec<Hashes> {
self.0
.get(version)
.map(|file| file.hashes.clone())
.map(|file| file.hashes().to_vec())
.unwrap_or_default()
}
}
/// Attach its requires-python to a [`Dist`], since downstream needs this information to filter
/// [`PrioritizedDistribution`].
#[derive(Debug)]
pub(crate) struct DistRequiresPython {
pub(crate) dist: Dist,
pub(crate) requires_python: Option<VersionSpecifiers>,
}
#[derive(Debug)]
struct PrioritizedDistribution {
/// An arbitrary source distribution for the package version.
source: Option<DistRequiresPython>,
/// The highest-priority, platform-compatible wheel for the package version.
compatible_wheel: Option<(DistRequiresPython, TagPriority)>,
/// An arbitrary, platform-incompatible wheel for the package version.
incompatible_wheel: Option<DistRequiresPython>,
/// The hashes for each distribution.
hashes: Vec<Hashes>,
}
impl PrioritizedDistribution {
/// Create a new [`PrioritizedDistribution`] from the given wheel distribution.
fn from_built(
dist: Dist,
requires_python: Option<VersionSpecifiers>,
hash: Hashes,
priority: Option<TagPriority>,
) -> Self {
if let Some(priority) = priority {
Self {
source: None,
compatible_wheel: Some((
DistRequiresPython {
dist,
requires_python,
},
priority,
)),
incompatible_wheel: None,
hashes: vec![hash],
}
} else {
Self {
source: None,
compatible_wheel: None,
incompatible_wheel: Some(DistRequiresPython {
dist,
requires_python,
}),
hashes: vec![hash],
}
}
}
/// Create a new [`PrioritizedDistribution`] from the given source distribution.
fn from_source(dist: Dist, requires_python: Option<VersionSpecifiers>, hash: Hashes) -> Self {
Self {
source: Some(DistRequiresPython {
dist,
requires_python,
}),
compatible_wheel: None,
incompatible_wheel: None,
hashes: vec![hash],
}
}
/// Insert the given built distribution into the [`PrioritizedDistribution`].
fn insert_built(
&mut self,
dist: Dist,
requires_python: Option<VersionSpecifiers>,
hash: Hashes,
priority: Option<TagPriority>,
) {
// Prefer the highest-priority, platform-compatible wheel.
if let Some(priority) = priority {
if let Some((.., existing_priority)) = &self.compatible_wheel {
if priority > *existing_priority {
self.compatible_wheel = Some((
DistRequiresPython {
dist,
requires_python,
},
priority,
));
}
} else {
self.compatible_wheel = Some((
DistRequiresPython {
dist,
requires_python,
},
priority,
));
}
} else if self.incompatible_wheel.is_none() {
self.incompatible_wheel = Some(DistRequiresPython {
dist,
requires_python,
});
}
self.hashes.push(hash);
}
/// Insert the given source distribution into the [`PrioritizedDistribution`].
fn insert_source(
&mut self,
dist: Dist,
requires_python: Option<VersionSpecifiers>,
hash: Hashes,
) {
if self.source.is_none() {
self.source = Some(DistRequiresPython {
dist,
requires_python,
});
}
self.hashes.push(hash);
}
/// Return the highest-priority distribution for the package version, if any.
fn get(&self) -> Option<ResolvableFile> {
match (
&self.compatible_wheel,
&self.source,
&self.incompatible_wheel,
) {
// Prefer the highest-priority, platform-compatible wheel.
(Some((wheel, _)), _, _) => Some(ResolvableFile::CompatibleWheel(wheel)),
// If we have a compatible source distribution and an incompatible wheel, return the
// wheel. We assume that all distributions have the same metadata for a given package
// version. If a compatible source distribution exists, we assume we can build it, but
// using the wheel is faster.
(_, Some(source_dist), Some(wheel)) => {
Some(ResolvableFile::IncompatibleWheel(source_dist, wheel))
}
// Otherwise, if we have a source distribution, return it.
(_, Some(source_dist), _) => Some(ResolvableFile::SourceDist(source_dist)),
_ => None,
}
}
}
#[derive(Debug, Clone)]
pub(crate) enum ResolvableFile<'a> {
/// The distribution should be resolved and installed using a source distribution.
SourceDist(&'a DistRequiresPython),
/// The distribution should be resolved and installed using a wheel distribution.
CompatibleWheel(&'a DistRequiresPython),
/// The distribution should be resolved using an incompatible wheel distribution, but
/// installed using a source distribution.
IncompatibleWheel(&'a DistRequiresPython, &'a DistRequiresPython),
}
impl<'a> ResolvableFile<'a> {
/// Return the [`DistFile`] to use during resolution.
pub(crate) fn resolve(&self) -> &DistRequiresPython {
match *self {
ResolvableFile::SourceDist(sdist) => sdist,
ResolvableFile::CompatibleWheel(wheel) => wheel,
ResolvableFile::IncompatibleWheel(_, wheel) => wheel,
}
}
/// Return the [`DistFile`] to use during installation.
pub(crate) fn install(&self) -> &DistRequiresPython {
match *self {
ResolvableFile::SourceDist(sdist) => sdist,
ResolvableFile::CompatibleWheel(wheel) => wheel,
ResolvableFile::IncompatibleWheel(sdist, _) => sdist,
}
}
}