Track file index (#452)

Track the index (or at least its url) where we got a file from across
the source code.

Fixes #448
This commit is contained in:
konsti 2023-11-20 09:48:16 +01:00 committed by GitHub
parent 6fd582f8b9
commit 46bb18f06e
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
9 changed files with 86 additions and 39 deletions

1
Cargo.lock generated
View file

@ -2765,6 +2765,7 @@ dependencies = [
"test-case", "test-case",
"thiserror", "thiserror",
"tracing", "tracing",
"url",
] ]
[[package]] [[package]]

View file

@ -5,7 +5,7 @@ use url::Url;
use pep440_rs::Version; use pep440_rs::Version;
use puffin_normalize::PackageName; use puffin_normalize::PackageName;
use pypi_types::File; use pypi_types::{File, IndexUrl};
pub use crate::any::*; pub use crate::any::*;
pub use crate::cached::*; pub use crate::cached::*;
@ -62,6 +62,7 @@ pub struct RegistryBuiltDist {
pub name: PackageName, pub name: PackageName,
pub version: Version, pub version: Version,
pub file: File, pub file: File,
pub index: IndexUrl,
} }
/// A built distribution (wheel) that exists at an arbitrary URL. /// A built distribution (wheel) that exists at an arbitrary URL.
@ -77,6 +78,7 @@ pub struct RegistrySourceDist {
pub name: PackageName, pub name: PackageName,
pub version: Version, pub version: Version,
pub file: File, pub file: File,
pub index: IndexUrl,
} }
/// A source distribution that exists at an arbitrary URL. /// A source distribution that exists at an arbitrary URL.
@ -95,7 +97,7 @@ pub struct GitSourceDist {
impl Dist { impl Dist {
/// Create a [`Dist`] for a registry-based distribution. /// Create a [`Dist`] for a registry-based distribution.
pub fn from_registry(name: PackageName, version: Version, file: File) -> Self { pub fn from_registry(name: PackageName, version: Version, file: File, index: IndexUrl) -> Self {
if Path::new(&file.filename) if Path::new(&file.filename)
.extension() .extension()
.is_some_and(|ext| ext.eq_ignore_ascii_case("whl")) .is_some_and(|ext| ext.eq_ignore_ascii_case("whl"))
@ -104,12 +106,14 @@ impl Dist {
name, name,
version, version,
file, file,
index,
})) }))
} else { } else {
Self::Source(SourceDist::Registry(RegistrySourceDist { Self::Source(SourceDist::Registry(RegistrySourceDist {
name, name,
version, version,
file, file,
index,
})) }))
} }
} }

View file

@ -19,7 +19,7 @@ use url::Url;
use distribution_filename::WheelFilename; use distribution_filename::WheelFilename;
use install_wheel_rs::find_dist_info; use install_wheel_rs::find_dist_info;
use puffin_normalize::PackageName; use puffin_normalize::PackageName;
use pypi_types::{File, Metadata21, SimpleJson}; use pypi_types::{File, IndexUrl, Metadata21, SimpleJson};
use crate::cached_client::CachedClient; use crate::cached_client::CachedClient;
use crate::error::Error; use crate::error::Error;
@ -30,8 +30,8 @@ use crate::remote_metadata::{
/// A builder for an [`RegistryClient`]. /// A builder for an [`RegistryClient`].
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
pub struct RegistryClientBuilder { pub struct RegistryClientBuilder {
index: Url, index: IndexUrl,
extra_index: Vec<Url>, extra_index: Vec<IndexUrl>,
no_index: bool, no_index: bool,
proxy: Url, proxy: Url,
retries: u32, retries: u32,
@ -41,7 +41,7 @@ pub struct RegistryClientBuilder {
impl RegistryClientBuilder { impl RegistryClientBuilder {
pub fn new(cache: impl Into<PathBuf>) -> Self { pub fn new(cache: impl Into<PathBuf>) -> Self {
Self { Self {
index: Url::parse("https://pypi.org/simple").unwrap(), index: IndexUrl::from(Url::parse("https://pypi.org/simple").unwrap()),
extra_index: vec![], extra_index: vec![],
no_index: false, no_index: false,
proxy: Url::parse("https://pypi-metadata.ruff.rs").unwrap(), proxy: Url::parse("https://pypi-metadata.ruff.rs").unwrap(),
@ -54,13 +54,13 @@ impl RegistryClientBuilder {
impl RegistryClientBuilder { impl RegistryClientBuilder {
#[must_use] #[must_use]
pub fn index(mut self, index: Url) -> Self { pub fn index(mut self, index: Url) -> Self {
self.index = index; self.index = IndexUrl::from(index);
self self
} }
#[must_use] #[must_use]
pub fn extra_index(mut self, extra_index: Vec<Url>) -> Self { pub fn extra_index(mut self, extra_index: Vec<Url>) -> Self {
self.extra_index = extra_index; self.extra_index = extra_index.into_iter().map(IndexUrl::from).collect();
self self
} }
@ -135,8 +135,8 @@ impl RegistryClientBuilder {
// TODO(konstin): Clean up the clients once we moved everything to common caching // TODO(konstin): Clean up the clients once we moved everything to common caching
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
pub struct RegistryClient { pub struct RegistryClient {
pub(crate) index: Url, pub(crate) index: IndexUrl,
pub(crate) extra_index: Vec<Url>, pub(crate) extra_index: Vec<IndexUrl>,
/// Ignore the package index, instead relying on local archives and caches. /// Ignore the package index, instead relying on local archives and caches.
pub(crate) no_index: bool, pub(crate) no_index: bool,
pub(crate) client: ClientWithMiddleware, pub(crate) client: ClientWithMiddleware,
@ -149,14 +149,14 @@ pub struct RegistryClient {
impl RegistryClient { impl RegistryClient {
/// Fetch a package from the `PyPI` simple API. /// Fetch a package from the `PyPI` simple API.
pub async fn simple(&self, package_name: PackageName) -> Result<SimpleJson, Error> { pub async fn simple(&self, package_name: PackageName) -> Result<(IndexUrl, SimpleJson), Error> {
if self.no_index { if self.no_index {
return Err(Error::NoIndex(package_name.as_ref().to_string())); return Err(Error::NoIndex(package_name.as_ref().to_string()));
} }
for index in std::iter::once(&self.index).chain(self.extra_index.iter()) { for index in std::iter::once(&self.index).chain(self.extra_index.iter()) {
// Format the URL for PyPI. // Format the URL for PyPI.
let mut url = index.clone(); let mut url: Url = index.clone().into();
url.path_segments_mut().unwrap().push(package_name.as_ref()); url.path_segments_mut().unwrap().push(package_name.as_ref());
url.path_segments_mut().unwrap().push(""); url.path_segments_mut().unwrap().push("");
url.set_query(Some("format=application/vnd.pypi.simple.v1+json")); url.set_query(Some("format=application/vnd.pypi.simple.v1+json"));
@ -170,8 +170,9 @@ impl RegistryClient {
// Fetch from the index. // Fetch from the index.
match self.simple_impl(&url).await { match self.simple_impl(&url).await {
Ok(text) => { Ok(text) => {
return serde_json::from_str(&text) let data = serde_json::from_str(&text)
.map_err(move |e| Error::from_json_err(e, url)); .map_err(move |e| Error::from_json_err(e, url))?;
return Ok((index.clone(), data));
} }
Err(err) => { Err(err) => {
if err.status() == Some(StatusCode::NOT_FOUND) { if err.status() == Some(StatusCode::NOT_FOUND) {

View file

@ -17,7 +17,7 @@ use platform_tags::{TagPriority, Tags};
use puffin_client::RegistryClient; use puffin_client::RegistryClient;
use puffin_interpreter::InterpreterInfo; use puffin_interpreter::InterpreterInfo;
use puffin_normalize::PackageName; use puffin_normalize::PackageName;
use pypi_types::{File, SimpleJson}; use pypi_types::{File, IndexUrl, SimpleJson};
use crate::error::ResolveError; use crate::error::ResolveError;
use crate::resolution::Resolution; use crate::resolution::Resolution;
@ -68,7 +68,9 @@ impl<'a> DistFinder<'a> {
Request::Package(requirement) => self Request::Package(requirement) => self
.client .client
.simple(requirement.name.clone()) .simple(requirement.name.clone())
.map_ok(move |metadata| Response::Package(requirement, metadata)), .map_ok(move |(index, metadata)| {
Response::Package(requirement, index, metadata)
}),
}) })
.buffer_unordered(32) .buffer_unordered(32)
.ready_chunks(32); .ready_chunks(32);
@ -104,9 +106,10 @@ impl<'a> DistFinder<'a> {
for result in chunk { for result in chunk {
let result: Response = result?; let result: Response = result?;
match result { match result {
Response::Package(requirement, metadata) => { Response::Package(requirement, index, metadata) => {
// Pick a version that satisfies the requirement. // Pick a version that satisfies the requirement.
let Some(distribution) = self.select(&requirement, metadata.files) else { let Some(distribution) = self.select(&requirement, &index, metadata.files)
else {
return Err(ResolveError::NotFound(requirement)); return Err(ResolveError::NotFound(requirement));
}; };
@ -134,7 +137,12 @@ impl<'a> DistFinder<'a> {
} }
/// select a version that satisfies the requirement, preferring wheels to source distributions. /// select a version that satisfies the requirement, preferring wheels to source distributions.
fn select(&self, requirement: &Requirement, files: Vec<File>) -> Option<Dist> { fn select(
&self,
requirement: &Requirement,
index: &IndexUrl,
files: Vec<File>,
) -> Option<Dist> {
let mut best_version: Option<Version> = None; let mut best_version: Option<Version> = None;
let mut best_wheel: Option<(Dist, TagPriority)> = None; let mut best_wheel: Option<(Dist, TagPriority)> = None;
let mut best_sdist: Option<Dist> = None; let mut best_sdist: Option<Dist> = None;
@ -173,7 +181,7 @@ impl<'a> DistFinder<'a> {
.map_or(true, |(.., existing)| priority > *existing) .map_or(true, |(.., existing)| priority > *existing)
{ {
best_wheel = Some(( best_wheel = Some((
Dist::from_registry(wheel.name, wheel.version, file), Dist::from_registry(wheel.name, wheel.version, file, index.clone()),
priority, priority,
)); ));
} }
@ -197,7 +205,12 @@ impl<'a> DistFinder<'a> {
if requirement.is_satisfied_by(&sdist.version) { if requirement.is_satisfied_by(&sdist.version) {
best_version = Some(sdist.version.clone()); best_version = Some(sdist.version.clone());
best_sdist = Some(Dist::from_registry(sdist.name, sdist.version, file)); best_sdist = Some(Dist::from_registry(
sdist.name,
sdist.version,
file,
index.clone(),
));
} }
} }
} }
@ -216,7 +229,7 @@ enum Request {
#[derive(Debug)] #[derive(Debug)]
enum Response { enum Response {
/// The returned metadata for a package. /// The returned metadata for a package.
Package(Requirement, SimpleJson), Package(Requirement, IndexUrl, SimpleJson),
} }
pub trait Reporter: Send + Sync { pub trait Reporter: Send + Sync {

View file

@ -14,7 +14,7 @@ use distribution_types::{BuiltDist, Dist, Metadata, SourceDist};
use pep440_rs::{Version, VersionSpecifier, VersionSpecifiers}; use pep440_rs::{Version, VersionSpecifier, VersionSpecifiers};
use pep508_rs::{Requirement, VersionOrUrl}; use pep508_rs::{Requirement, VersionOrUrl};
use puffin_normalize::PackageName; use puffin_normalize::PackageName;
use pypi_types::File; use pypi_types::{File, IndexUrl};
use crate::pubgrub::{PubGrubPackage, PubGrubPriority, PubGrubVersion}; use crate::pubgrub::{PubGrubPackage, PubGrubPriority, PubGrubVersion};
@ -58,7 +58,7 @@ impl Graph {
/// Create a new graph from the resolved `PubGrub` state. /// Create a new graph from the resolved `PubGrub` state.
pub fn from_state( pub fn from_state(
selection: &SelectedDependencies<PubGrubPackage, PubGrubVersion>, selection: &SelectedDependencies<PubGrubPackage, PubGrubVersion>,
pins: &FxHashMap<PackageName, FxHashMap<Version, File>>, pins: &FxHashMap<PackageName, FxHashMap<Version, (IndexUrl, File)>>,
redirects: &WaitMap<Url, Url>, redirects: &WaitMap<Url, Url>,
state: &State<PubGrubPackage, Range<PubGrubVersion>, PubGrubPriority>, state: &State<PubGrubPackage, Range<PubGrubVersion>, PubGrubPriority>,
) -> Self { ) -> Self {
@ -73,12 +73,13 @@ impl Graph {
match package { match package {
PubGrubPackage::Package(package_name, None, None) => { PubGrubPackage::Package(package_name, None, None) => {
let version = Version::from(version.clone()); let version = Version::from(version.clone());
let file = pins let (index, file) = pins
.get(package_name) .get(package_name)
.and_then(|versions| versions.get(&version)) .and_then(|versions| versions.get(&version))
.unwrap() .unwrap()
.clone(); .clone();
let pinned_package = Dist::from_registry(package_name.clone(), version, file); let pinned_package =
Dist::from_registry(package_name.clone(), version, file, index);
let index = graph.add_node(pinned_package); let index = graph.add_node(pinned_package);
inverse.insert(package_name, index); inverse.insert(package_name, index);

View file

@ -27,7 +27,7 @@ use puffin_cache::CanonicalUrl;
use puffin_client::RegistryClient; use puffin_client::RegistryClient;
use puffin_normalize::{ExtraName, PackageName}; use puffin_normalize::{ExtraName, PackageName};
use puffin_traits::BuildContext; use puffin_traits::BuildContext;
use pypi_types::{File, Metadata21, SimpleJson}; use pypi_types::{File, IndexUrl, Metadata21, SimpleJson};
use crate::candidate_selector::CandidateSelector; use crate::candidate_selector::CandidateSelector;
use crate::distribution::{BuiltDistFetcher, SourceDistFetcher, SourceDistributionReporter}; use crate::distribution::{BuiltDistFetcher, SourceDistFetcher, SourceDistributionReporter};
@ -330,7 +330,7 @@ impl<'a, Context: BuildContext + Sync> Resolver<'a, Context> {
let Some(entry) = self.index.packages.get(package_name) else { let Some(entry) = self.index.packages.get(package_name) else {
continue; continue;
}; };
let version_map = entry.value(); let (index, version_map) = entry.value();
// Try to find a compatible version. If there aren't any compatible versions, // Try to find a compatible version. If there aren't any compatible versions,
// short-circuit and return `None`. // short-circuit and return `None`.
@ -345,6 +345,7 @@ impl<'a, Context: BuildContext + Sync> Resolver<'a, Context> {
candidate.package_name, candidate.package_name,
candidate.version.into(), candidate.version.into(),
candidate.file.into(), candidate.file.into(),
index.clone(),
); );
request_sink.unbounded_send(Request::Dist(distribution))?; request_sink.unbounded_send(Request::Dist(distribution))?;
} }
@ -358,7 +359,7 @@ impl<'a, Context: BuildContext + Sync> Resolver<'a, Context> {
&self, &self,
package: &PubGrubPackage, package: &PubGrubPackage,
range: &Range<PubGrubVersion>, range: &Range<PubGrubVersion>,
pins: &mut FxHashMap<PackageName, FxHashMap<pep440_rs::Version, File>>, pins: &mut FxHashMap<PackageName, FxHashMap<pep440_rs::Version, (IndexUrl, File)>>,
in_flight: &mut InFlight, in_flight: &mut InFlight,
request_sink: &futures::channel::mpsc::UnboundedSender<Request>, request_sink: &futures::channel::mpsc::UnboundedSender<Request>,
) -> Result<Option<PubGrubVersion>, ResolveError> { ) -> Result<Option<PubGrubVersion>, ResolveError> {
@ -405,7 +406,7 @@ impl<'a, Context: BuildContext + Sync> Resolver<'a, Context> {
PubGrubPackage::Package(package_name, _extra, None) => { PubGrubPackage::Package(package_name, _extra, None) => {
// Wait for the metadata to be available. // Wait for the metadata to be available.
let entry = self.index.packages.wait(package_name).await.unwrap(); let entry = self.index.packages.wait(package_name).await.unwrap();
let version_map = entry.value(); let (index, version_map) = entry.value();
debug!("Searching for a compatible version of {package_name} ({range})"); debug!("Searching for a compatible version of {package_name} ({range})");
@ -428,7 +429,7 @@ impl<'a, Context: BuildContext + Sync> Resolver<'a, Context> {
.or_default() .or_default()
.insert( .insert(
candidate.version.clone().into(), candidate.version.clone().into(),
candidate.file.clone().into(), (index.clone(), candidate.file.clone().into()),
); );
let version = candidate.version.clone(); let version = candidate.version.clone();
@ -439,6 +440,7 @@ impl<'a, Context: BuildContext + Sync> Resolver<'a, Context> {
candidate.package_name, candidate.package_name,
candidate.version.into(), candidate.version.into(),
candidate.file.into(), candidate.file.into(),
index.clone(),
); );
request_sink.unbounded_send(Request::Dist(distribution))?; request_sink.unbounded_send(Request::Dist(distribution))?;
} }
@ -453,7 +455,7 @@ impl<'a, Context: BuildContext + Sync> Resolver<'a, Context> {
&self, &self,
package: &PubGrubPackage, package: &PubGrubPackage,
version: &PubGrubVersion, version: &PubGrubVersion,
pins: &mut FxHashMap<PackageName, FxHashMap<pep440_rs::Version, File>>, pins: &mut FxHashMap<PackageName, FxHashMap<pep440_rs::Version, (IndexUrl, File)>>,
priorities: &mut PubGrubPriorities, priorities: &mut PubGrubPriorities,
in_flight: &mut InFlight, in_flight: &mut InFlight,
request_sink: &futures::channel::mpsc::UnboundedSender<Request>, request_sink: &futures::channel::mpsc::UnboundedSender<Request>,
@ -498,7 +500,7 @@ impl<'a, Context: BuildContext + Sync> Resolver<'a, Context> {
.unwrap(), .unwrap(),
None => { None => {
let versions = pins.get(package_name).unwrap(); let versions = pins.get(package_name).unwrap();
let file = versions.get(version.into()).unwrap(); let (_index, file) = versions.get(version.into()).unwrap();
self.index self.index
.distributions .distributions
.wait(&file.distribution_id()) .wait(&file.distribution_id())
@ -550,7 +552,7 @@ impl<'a, Context: BuildContext + Sync> Resolver<'a, Context> {
while let Some(response) = response_stream.next().await { while let Some(response) = response_stream.next().await {
match response? { match response? {
Response::Package(package_name, metadata) => { Response::Package(package_name, index, metadata) => {
trace!("Received package metadata for: {package_name}"); trace!("Received package metadata for: {package_name}");
let version_map = VersionMap::from_metadata( let version_map = VersionMap::from_metadata(
metadata, metadata,
@ -559,7 +561,9 @@ impl<'a, Context: BuildContext + Sync> Resolver<'a, Context> {
self.build_context.interpreter_info().version(), self.build_context.interpreter_info().version(),
self.exclude_newer.as_ref(), self.exclude_newer.as_ref(),
); );
self.index.packages.insert(package_name, version_map); self.index
.packages
.insert(package_name, (index, version_map));
} }
Response::Dist(Dist::Built(distribution), metadata, ..) => { Response::Dist(Dist::Built(distribution), metadata, ..) => {
trace!("Received built distribution metadata for: {distribution}"); trace!("Received built distribution metadata for: {distribution}");
@ -596,7 +600,9 @@ impl<'a, Context: BuildContext + Sync> Resolver<'a, Context> {
Request::Package(package_name) => { Request::Package(package_name) => {
self.client self.client
.simple(package_name.clone()) .simple(package_name.clone())
.map_ok(move |metadata| Response::Package(package_name, metadata)) .map_ok(move |(index, metadata)| {
Response::Package(package_name, index, metadata)
})
.map_err(ResolveError::Client) .map_err(ResolveError::Client)
.await .await
} }
@ -803,7 +809,7 @@ enum Request {
#[allow(clippy::large_enum_variant)] #[allow(clippy::large_enum_variant)]
enum Response { enum Response {
/// The returned metadata for a package hosted on a registry. /// The returned metadata for a package hosted on a registry.
Package(PackageName, SimpleJson), Package(PackageName, IndexUrl, SimpleJson),
/// The returned metadata for a distribution. /// The returned metadata for a distribution.
Dist(Dist, Metadata21, Option<Url>), Dist(Dist, Metadata21, Option<Url>),
} }
@ -839,8 +845,9 @@ impl InFlight {
/// In-memory index of package metadata. /// In-memory index of package metadata.
struct Index { struct Index {
/// A map from package name to the metadata for that package. /// A map from package name to the metadata for that package and the index where the metadata
packages: WaitMap<PackageName, VersionMap>, /// came from.
packages: WaitMap<PackageName, (IndexUrl, VersionMap)>,
/// A map from distribution SHA to metadata for that distribution. /// A map from distribution SHA to metadata for that distribution.
distributions: WaitMap<String, Metadata21>, distributions: WaitMap<String, Metadata21>,

View file

@ -22,6 +22,7 @@ rfc2047-decoder = { workspace = true }
serde = { workspace = true } serde = { workspace = true }
thiserror = { workspace = true } thiserror = { workspace = true }
tracing = { workspace = true } tracing = { workspace = true }
url = { workspace = true }
[dev-dependencies] [dev-dependencies]
indoc = { version = "2.0.4" } indoc = { version = "2.0.4" }

View file

@ -0,0 +1,17 @@
use url::Url;
/// The url of an index, newtype'd to avoid mixing it with file urls
#[derive(Debug, Clone, Hash, Eq, PartialEq)]
pub struct IndexUrl(Url);
impl From<Url> for IndexUrl {
fn from(url: Url) -> Self {
Self(url)
}
}
impl From<IndexUrl> for Url {
fn from(index: IndexUrl) -> Self {
index.0
}
}

View file

@ -1,9 +1,11 @@
pub use direct_url::{ArchiveInfo, DirectUrl, VcsInfo, VcsKind}; pub use direct_url::{ArchiveInfo, DirectUrl, VcsInfo, VcsKind};
pub use index_url::IndexUrl;
pub use lenient_requirement::LenientVersionSpecifiers; pub use lenient_requirement::LenientVersionSpecifiers;
pub use metadata::{Error, Metadata21}; pub use metadata::{Error, Metadata21};
pub use simple_json::{File, SimpleJson, Yanked}; pub use simple_json::{File, SimpleJson, Yanked};
mod direct_url; mod direct_url;
mod index_url;
mod lenient_requirement; mod lenient_requirement;
mod metadata; mod metadata;
mod simple_json; mod simple_json;