Track file index (#452)

Track the index (or at least its url) where we got a file from across
the source code.

Fixes #448
This commit is contained in:
konsti 2023-11-20 09:48:16 +01:00 committed by GitHub
parent 6fd582f8b9
commit 46bb18f06e
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
9 changed files with 86 additions and 39 deletions

1
Cargo.lock generated
View file

@ -2765,6 +2765,7 @@ dependencies = [
"test-case",
"thiserror",
"tracing",
"url",
]
[[package]]

View file

@ -5,7 +5,7 @@ use url::Url;
use pep440_rs::Version;
use puffin_normalize::PackageName;
use pypi_types::File;
use pypi_types::{File, IndexUrl};
pub use crate::any::*;
pub use crate::cached::*;
@ -62,6 +62,7 @@ pub struct RegistryBuiltDist {
pub name: PackageName,
pub version: Version,
pub file: File,
pub index: IndexUrl,
}
/// A built distribution (wheel) that exists at an arbitrary URL.
@ -77,6 +78,7 @@ pub struct RegistrySourceDist {
pub name: PackageName,
pub version: Version,
pub file: File,
pub index: IndexUrl,
}
/// A source distribution that exists at an arbitrary URL.
@ -95,7 +97,7 @@ pub struct GitSourceDist {
impl Dist {
/// Create a [`Dist`] for a registry-based distribution.
pub fn from_registry(name: PackageName, version: Version, file: File) -> Self {
pub fn from_registry(name: PackageName, version: Version, file: File, index: IndexUrl) -> Self {
if Path::new(&file.filename)
.extension()
.is_some_and(|ext| ext.eq_ignore_ascii_case("whl"))
@ -104,12 +106,14 @@ impl Dist {
name,
version,
file,
index,
}))
} else {
Self::Source(SourceDist::Registry(RegistrySourceDist {
name,
version,
file,
index,
}))
}
}

View file

@ -19,7 +19,7 @@ use url::Url;
use distribution_filename::WheelFilename;
use install_wheel_rs::find_dist_info;
use puffin_normalize::PackageName;
use pypi_types::{File, Metadata21, SimpleJson};
use pypi_types::{File, IndexUrl, Metadata21, SimpleJson};
use crate::cached_client::CachedClient;
use crate::error::Error;
@ -30,8 +30,8 @@ use crate::remote_metadata::{
/// A builder for an [`RegistryClient`].
#[derive(Debug, Clone)]
pub struct RegistryClientBuilder {
index: Url,
extra_index: Vec<Url>,
index: IndexUrl,
extra_index: Vec<IndexUrl>,
no_index: bool,
proxy: Url,
retries: u32,
@ -41,7 +41,7 @@ pub struct RegistryClientBuilder {
impl RegistryClientBuilder {
pub fn new(cache: impl Into<PathBuf>) -> Self {
Self {
index: Url::parse("https://pypi.org/simple").unwrap(),
index: IndexUrl::from(Url::parse("https://pypi.org/simple").unwrap()),
extra_index: vec![],
no_index: false,
proxy: Url::parse("https://pypi-metadata.ruff.rs").unwrap(),
@ -54,13 +54,13 @@ impl RegistryClientBuilder {
impl RegistryClientBuilder {
#[must_use]
pub fn index(mut self, index: Url) -> Self {
self.index = index;
self.index = IndexUrl::from(index);
self
}
#[must_use]
pub fn extra_index(mut self, extra_index: Vec<Url>) -> Self {
self.extra_index = extra_index;
self.extra_index = extra_index.into_iter().map(IndexUrl::from).collect();
self
}
@ -135,8 +135,8 @@ impl RegistryClientBuilder {
// TODO(konstin): Clean up the clients once we moved everything to common caching
#[derive(Debug, Clone)]
pub struct RegistryClient {
pub(crate) index: Url,
pub(crate) extra_index: Vec<Url>,
pub(crate) index: IndexUrl,
pub(crate) extra_index: Vec<IndexUrl>,
/// Ignore the package index, instead relying on local archives and caches.
pub(crate) no_index: bool,
pub(crate) client: ClientWithMiddleware,
@ -149,14 +149,14 @@ pub struct RegistryClient {
impl RegistryClient {
/// Fetch a package from the `PyPI` simple API.
pub async fn simple(&self, package_name: PackageName) -> Result<SimpleJson, Error> {
pub async fn simple(&self, package_name: PackageName) -> Result<(IndexUrl, SimpleJson), Error> {
if self.no_index {
return Err(Error::NoIndex(package_name.as_ref().to_string()));
}
for index in std::iter::once(&self.index).chain(self.extra_index.iter()) {
// Format the URL for PyPI.
let mut url = index.clone();
let mut url: Url = index.clone().into();
url.path_segments_mut().unwrap().push(package_name.as_ref());
url.path_segments_mut().unwrap().push("");
url.set_query(Some("format=application/vnd.pypi.simple.v1+json"));
@ -170,8 +170,9 @@ impl RegistryClient {
// Fetch from the index.
match self.simple_impl(&url).await {
Ok(text) => {
return serde_json::from_str(&text)
.map_err(move |e| Error::from_json_err(e, url));
let data = serde_json::from_str(&text)
.map_err(move |e| Error::from_json_err(e, url))?;
return Ok((index.clone(), data));
}
Err(err) => {
if err.status() == Some(StatusCode::NOT_FOUND) {

View file

@ -17,7 +17,7 @@ use platform_tags::{TagPriority, Tags};
use puffin_client::RegistryClient;
use puffin_interpreter::InterpreterInfo;
use puffin_normalize::PackageName;
use pypi_types::{File, SimpleJson};
use pypi_types::{File, IndexUrl, SimpleJson};
use crate::error::ResolveError;
use crate::resolution::Resolution;
@ -68,7 +68,9 @@ impl<'a> DistFinder<'a> {
Request::Package(requirement) => self
.client
.simple(requirement.name.clone())
.map_ok(move |metadata| Response::Package(requirement, metadata)),
.map_ok(move |(index, metadata)| {
Response::Package(requirement, index, metadata)
}),
})
.buffer_unordered(32)
.ready_chunks(32);
@ -104,9 +106,10 @@ impl<'a> DistFinder<'a> {
for result in chunk {
let result: Response = result?;
match result {
Response::Package(requirement, metadata) => {
Response::Package(requirement, index, metadata) => {
// Pick a version that satisfies the requirement.
let Some(distribution) = self.select(&requirement, metadata.files) else {
let Some(distribution) = self.select(&requirement, &index, metadata.files)
else {
return Err(ResolveError::NotFound(requirement));
};
@ -134,7 +137,12 @@ impl<'a> DistFinder<'a> {
}
/// select a version that satisfies the requirement, preferring wheels to source distributions.
fn select(&self, requirement: &Requirement, files: Vec<File>) -> Option<Dist> {
fn select(
&self,
requirement: &Requirement,
index: &IndexUrl,
files: Vec<File>,
) -> Option<Dist> {
let mut best_version: Option<Version> = None;
let mut best_wheel: Option<(Dist, TagPriority)> = None;
let mut best_sdist: Option<Dist> = None;
@ -173,7 +181,7 @@ impl<'a> DistFinder<'a> {
.map_or(true, |(.., existing)| priority > *existing)
{
best_wheel = Some((
Dist::from_registry(wheel.name, wheel.version, file),
Dist::from_registry(wheel.name, wheel.version, file, index.clone()),
priority,
));
}
@ -197,7 +205,12 @@ impl<'a> DistFinder<'a> {
if requirement.is_satisfied_by(&sdist.version) {
best_version = Some(sdist.version.clone());
best_sdist = Some(Dist::from_registry(sdist.name, sdist.version, file));
best_sdist = Some(Dist::from_registry(
sdist.name,
sdist.version,
file,
index.clone(),
));
}
}
}
@ -216,7 +229,7 @@ enum Request {
#[derive(Debug)]
enum Response {
/// The returned metadata for a package.
Package(Requirement, SimpleJson),
Package(Requirement, IndexUrl, SimpleJson),
}
pub trait Reporter: Send + Sync {

View file

@ -14,7 +14,7 @@ use distribution_types::{BuiltDist, Dist, Metadata, SourceDist};
use pep440_rs::{Version, VersionSpecifier, VersionSpecifiers};
use pep508_rs::{Requirement, VersionOrUrl};
use puffin_normalize::PackageName;
use pypi_types::File;
use pypi_types::{File, IndexUrl};
use crate::pubgrub::{PubGrubPackage, PubGrubPriority, PubGrubVersion};
@ -58,7 +58,7 @@ impl Graph {
/// Create a new graph from the resolved `PubGrub` state.
pub fn from_state(
selection: &SelectedDependencies<PubGrubPackage, PubGrubVersion>,
pins: &FxHashMap<PackageName, FxHashMap<Version, File>>,
pins: &FxHashMap<PackageName, FxHashMap<Version, (IndexUrl, File)>>,
redirects: &WaitMap<Url, Url>,
state: &State<PubGrubPackage, Range<PubGrubVersion>, PubGrubPriority>,
) -> Self {
@ -73,12 +73,13 @@ impl Graph {
match package {
PubGrubPackage::Package(package_name, None, None) => {
let version = Version::from(version.clone());
let file = pins
let (index, file) = pins
.get(package_name)
.and_then(|versions| versions.get(&version))
.unwrap()
.clone();
let pinned_package = Dist::from_registry(package_name.clone(), version, file);
let pinned_package =
Dist::from_registry(package_name.clone(), version, file, index);
let index = graph.add_node(pinned_package);
inverse.insert(package_name, index);

View file

@ -27,7 +27,7 @@ use puffin_cache::CanonicalUrl;
use puffin_client::RegistryClient;
use puffin_normalize::{ExtraName, PackageName};
use puffin_traits::BuildContext;
use pypi_types::{File, Metadata21, SimpleJson};
use pypi_types::{File, IndexUrl, Metadata21, SimpleJson};
use crate::candidate_selector::CandidateSelector;
use crate::distribution::{BuiltDistFetcher, SourceDistFetcher, SourceDistributionReporter};
@ -330,7 +330,7 @@ impl<'a, Context: BuildContext + Sync> Resolver<'a, Context> {
let Some(entry) = self.index.packages.get(package_name) else {
continue;
};
let version_map = entry.value();
let (index, version_map) = entry.value();
// Try to find a compatible version. If there aren't any compatible versions,
// short-circuit and return `None`.
@ -345,6 +345,7 @@ impl<'a, Context: BuildContext + Sync> Resolver<'a, Context> {
candidate.package_name,
candidate.version.into(),
candidate.file.into(),
index.clone(),
);
request_sink.unbounded_send(Request::Dist(distribution))?;
}
@ -358,7 +359,7 @@ impl<'a, Context: BuildContext + Sync> Resolver<'a, Context> {
&self,
package: &PubGrubPackage,
range: &Range<PubGrubVersion>,
pins: &mut FxHashMap<PackageName, FxHashMap<pep440_rs::Version, File>>,
pins: &mut FxHashMap<PackageName, FxHashMap<pep440_rs::Version, (IndexUrl, File)>>,
in_flight: &mut InFlight,
request_sink: &futures::channel::mpsc::UnboundedSender<Request>,
) -> Result<Option<PubGrubVersion>, ResolveError> {
@ -405,7 +406,7 @@ impl<'a, Context: BuildContext + Sync> Resolver<'a, Context> {
PubGrubPackage::Package(package_name, _extra, None) => {
// Wait for the metadata to be available.
let entry = self.index.packages.wait(package_name).await.unwrap();
let version_map = entry.value();
let (index, version_map) = entry.value();
debug!("Searching for a compatible version of {package_name} ({range})");
@ -428,7 +429,7 @@ impl<'a, Context: BuildContext + Sync> Resolver<'a, Context> {
.or_default()
.insert(
candidate.version.clone().into(),
candidate.file.clone().into(),
(index.clone(), candidate.file.clone().into()),
);
let version = candidate.version.clone();
@ -439,6 +440,7 @@ impl<'a, Context: BuildContext + Sync> Resolver<'a, Context> {
candidate.package_name,
candidate.version.into(),
candidate.file.into(),
index.clone(),
);
request_sink.unbounded_send(Request::Dist(distribution))?;
}
@ -453,7 +455,7 @@ impl<'a, Context: BuildContext + Sync> Resolver<'a, Context> {
&self,
package: &PubGrubPackage,
version: &PubGrubVersion,
pins: &mut FxHashMap<PackageName, FxHashMap<pep440_rs::Version, File>>,
pins: &mut FxHashMap<PackageName, FxHashMap<pep440_rs::Version, (IndexUrl, File)>>,
priorities: &mut PubGrubPriorities,
in_flight: &mut InFlight,
request_sink: &futures::channel::mpsc::UnboundedSender<Request>,
@ -498,7 +500,7 @@ impl<'a, Context: BuildContext + Sync> Resolver<'a, Context> {
.unwrap(),
None => {
let versions = pins.get(package_name).unwrap();
let file = versions.get(version.into()).unwrap();
let (_index, file) = versions.get(version.into()).unwrap();
self.index
.distributions
.wait(&file.distribution_id())
@ -550,7 +552,7 @@ impl<'a, Context: BuildContext + Sync> Resolver<'a, Context> {
while let Some(response) = response_stream.next().await {
match response? {
Response::Package(package_name, metadata) => {
Response::Package(package_name, index, metadata) => {
trace!("Received package metadata for: {package_name}");
let version_map = VersionMap::from_metadata(
metadata,
@ -559,7 +561,9 @@ impl<'a, Context: BuildContext + Sync> Resolver<'a, Context> {
self.build_context.interpreter_info().version(),
self.exclude_newer.as_ref(),
);
self.index.packages.insert(package_name, version_map);
self.index
.packages
.insert(package_name, (index, version_map));
}
Response::Dist(Dist::Built(distribution), metadata, ..) => {
trace!("Received built distribution metadata for: {distribution}");
@ -596,7 +600,9 @@ impl<'a, Context: BuildContext + Sync> Resolver<'a, Context> {
Request::Package(package_name) => {
self.client
.simple(package_name.clone())
.map_ok(move |metadata| Response::Package(package_name, metadata))
.map_ok(move |(index, metadata)| {
Response::Package(package_name, index, metadata)
})
.map_err(ResolveError::Client)
.await
}
@ -803,7 +809,7 @@ enum Request {
#[allow(clippy::large_enum_variant)]
enum Response {
/// The returned metadata for a package hosted on a registry.
Package(PackageName, SimpleJson),
Package(PackageName, IndexUrl, SimpleJson),
/// The returned metadata for a distribution.
Dist(Dist, Metadata21, Option<Url>),
}
@ -839,8 +845,9 @@ impl InFlight {
/// In-memory index of package metadata.
struct Index {
/// A map from package name to the metadata for that package.
packages: WaitMap<PackageName, VersionMap>,
/// A map from package name to the metadata for that package and the index where the metadata
/// came from.
packages: WaitMap<PackageName, (IndexUrl, VersionMap)>,
/// A map from distribution SHA to metadata for that distribution.
distributions: WaitMap<String, Metadata21>,

View file

@ -22,6 +22,7 @@ rfc2047-decoder = { workspace = true }
serde = { workspace = true }
thiserror = { workspace = true }
tracing = { workspace = true }
url = { workspace = true }
[dev-dependencies]
indoc = { version = "2.0.4" }

View file

@ -0,0 +1,17 @@
use url::Url;
/// The url of an index, newtype'd to avoid mixing it with file urls
#[derive(Debug, Clone, Hash, Eq, PartialEq)]
pub struct IndexUrl(Url);
impl From<Url> for IndexUrl {
fn from(url: Url) -> Self {
Self(url)
}
}
impl From<IndexUrl> for Url {
fn from(index: IndexUrl) -> Self {
index.0
}
}

View file

@ -1,9 +1,11 @@
pub use direct_url::{ArchiveInfo, DirectUrl, VcsInfo, VcsKind};
pub use index_url::IndexUrl;
pub use lenient_requirement::LenientVersionSpecifiers;
pub use metadata::{Error, Metadata21};
pub use simple_json::{File, SimpleJson, Yanked};
mod direct_url;
mod index_url;
mod lenient_requirement;
mod metadata;
mod simple_json;