Add dedicated ID types to avoid opaque strings (#642)

This allows us to enforce type safety within the resolver. For example,
in the index, we can remove `String` as a key type and enforce that
callers _must_ present us with a `PackageId`. (This actually caught one
bug, where we were using the SHA rather than the package ID. That bug
shouldn't have had any effect given where it was, since those are 1:1,
but it's still problematic.)
This commit is contained in:
Charlie Marsh 2023-12-13 19:53:33 -05:00 committed by GitHub
parent 3549d9638e
commit 8071a23863
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
9 changed files with 115 additions and 65 deletions

View file

@ -0,0 +1,50 @@
/// A unique identifier for a package (e.g., `black==23.10.0`).
#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
pub struct PackageId(String);
impl PackageId {
pub fn new(id: impl Into<String>) -> Self {
Self(id.into())
}
}
/// A unique identifier for a distribution (e.g., `black-23.10.0-py3-none-any.whl`).
#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
pub struct DistributionId(String);
impl DistributionId {
pub fn new(id: impl Into<String>) -> Self {
Self(id.into())
}
}
/// A unique identifier for a resource, like a URL or a Git repository.
#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
pub struct ResourceId(String);
impl ResourceId {
pub fn new(id: impl Into<String>) -> Self {
Self(id.into())
}
}
impl From<&PackageId> for PackageId {
/// Required for `WaitMap::wait`.
fn from(value: &PackageId) -> Self {
value.clone()
}
}
impl From<&DistributionId> for DistributionId {
/// Required for `WaitMap::wait`.
fn from(value: &DistributionId) -> Self {
value.clone()
}
}
impl From<&ResourceId> for ResourceId {
/// Required for `WaitMap::wait`.
fn from(value: &ResourceId) -> Self {
value.clone()
}
}

View file

@ -36,6 +36,7 @@
//! Since we read this information from [`direct_url.json`](https://packaging.python.org/en/latest/specifications/direct-url-data-structure/), it doesn't match the information [`Dist`] exactly.
//!
//! TODO(konstin): Track all kinds from [`Dist`]
//! TODO(konstin): Track all kinds from [`Dist`]
use std::path::{Path, PathBuf};
use std::str::FromStr;
@ -50,6 +51,7 @@ use pypi_types::{File, IndexUrl};
pub use crate::any::*;
pub use crate::cached::*;
pub use crate::error::*;
pub use crate::id::*;
pub use crate::installed::*;
pub use crate::traits::*;
@ -57,6 +59,7 @@ mod any;
mod cached;
pub mod direct_url;
mod error;
mod id;
mod installed;
mod traits;
@ -550,107 +553,109 @@ impl RemoteSource for Dist {
}
impl Identifier for Url {
fn distribution_id(&self) -> String {
puffin_cache::digest(&puffin_cache::CanonicalUrl::new(self))
fn distribution_id(&self) -> DistributionId {
DistributionId::new(puffin_cache::digest(&puffin_cache::CanonicalUrl::new(self)))
}
fn resource_id(&self) -> String {
puffin_cache::digest(&puffin_cache::RepositoryUrl::new(self))
fn resource_id(&self) -> ResourceId {
ResourceId::new(puffin_cache::digest(&puffin_cache::RepositoryUrl::new(
self,
)))
}
}
impl Identifier for File {
fn distribution_id(&self) -> String {
self.hashes.sha256.clone()
fn distribution_id(&self) -> DistributionId {
DistributionId::new(self.hashes.sha256.clone())
}
fn resource_id(&self) -> String {
self.hashes.sha256.clone()
fn resource_id(&self) -> ResourceId {
ResourceId::new(self.hashes.sha256.clone())
}
}
impl Identifier for Path {
fn distribution_id(&self) -> String {
puffin_cache::digest(&self)
fn distribution_id(&self) -> DistributionId {
DistributionId::new(puffin_cache::digest(&self))
}
fn resource_id(&self) -> String {
puffin_cache::digest(&self)
fn resource_id(&self) -> ResourceId {
ResourceId::new(puffin_cache::digest(&self))
}
}
impl Identifier for RegistryBuiltDist {
fn distribution_id(&self) -> String {
fn distribution_id(&self) -> DistributionId {
self.file.distribution_id()
}
fn resource_id(&self) -> String {
fn resource_id(&self) -> ResourceId {
self.file.resource_id()
}
}
impl Identifier for RegistrySourceDist {
fn distribution_id(&self) -> String {
fn distribution_id(&self) -> DistributionId {
self.file.distribution_id()
}
fn resource_id(&self) -> String {
fn resource_id(&self) -> ResourceId {
self.file.resource_id()
}
}
impl Identifier for DirectUrlBuiltDist {
fn distribution_id(&self) -> String {
fn distribution_id(&self) -> DistributionId {
self.url.distribution_id()
}
fn resource_id(&self) -> String {
fn resource_id(&self) -> ResourceId {
self.url.resource_id()
}
}
impl Identifier for DirectUrlSourceDist {
fn distribution_id(&self) -> String {
fn distribution_id(&self) -> DistributionId {
self.url.distribution_id()
}
fn resource_id(&self) -> String {
fn resource_id(&self) -> ResourceId {
self.url.resource_id()
}
}
impl Identifier for PathBuiltDist {
fn distribution_id(&self) -> String {
fn distribution_id(&self) -> DistributionId {
self.url.distribution_id()
}
fn resource_id(&self) -> String {
fn resource_id(&self) -> ResourceId {
self.url.resource_id()
}
}
impl Identifier for PathSourceDist {
fn distribution_id(&self) -> String {
fn distribution_id(&self) -> DistributionId {
self.url.distribution_id()
}
fn resource_id(&self) -> String {
fn resource_id(&self) -> ResourceId {
self.url.resource_id()
}
}
impl Identifier for GitSourceDist {
fn distribution_id(&self) -> String {
fn distribution_id(&self) -> DistributionId {
self.url.distribution_id()
}
fn resource_id(&self) -> String {
fn resource_id(&self) -> ResourceId {
self.url.resource_id()
}
}
impl Identifier for SourceDist {
fn distribution_id(&self) -> String {
fn distribution_id(&self) -> DistributionId {
match self {
Self::Registry(dist) => dist.distribution_id(),
Self::DirectUrl(dist) => dist.distribution_id(),
@ -659,7 +664,7 @@ impl Identifier for SourceDist {
}
}
fn resource_id(&self) -> String {
fn resource_id(&self) -> ResourceId {
match self {
Self::Registry(dist) => dist.resource_id(),
Self::DirectUrl(dist) => dist.resource_id(),
@ -670,7 +675,7 @@ impl Identifier for SourceDist {
}
impl Identifier for BuiltDist {
fn distribution_id(&self) -> String {
fn distribution_id(&self) -> DistributionId {
match self {
Self::Registry(dist) => dist.distribution_id(),
Self::DirectUrl(dist) => dist.distribution_id(),
@ -678,7 +683,7 @@ impl Identifier for BuiltDist {
}
}
fn resource_id(&self) -> String {
fn resource_id(&self) -> ResourceId {
match self {
Self::Registry(dist) => dist.resource_id(),
Self::DirectUrl(dist) => dist.resource_id(),
@ -688,14 +693,14 @@ impl Identifier for BuiltDist {
}
impl Identifier for Dist {
fn distribution_id(&self) -> String {
fn distribution_id(&self) -> DistributionId {
match self {
Self::Built(dist) => dist.distribution_id(),
Self::Source(dist) => dist.distribution_id(),
}
}
fn resource_id(&self) -> String {
fn resource_id(&self) -> ResourceId {
match self {
Self::Built(dist) => dist.resource_id(),
Self::Source(dist) => dist.resource_id(),

View file

@ -5,9 +5,9 @@ use puffin_normalize::PackageName;
use crate::error::Error;
use crate::{
AnyDist, BuiltDist, CachedDirectUrlDist, CachedDist, CachedRegistryDist, DirectUrlBuiltDist,
DirectUrlSourceDist, Dist, GitSourceDist, InstalledDirectUrlDist, InstalledDist,
InstalledRegistryDist, PathBuiltDist, PathSourceDist, RegistryBuiltDist, RegistrySourceDist,
SourceDist, VersionOrUrl,
DirectUrlSourceDist, Dist, DistributionId, GitSourceDist, InstalledDirectUrlDist,
InstalledDist, InstalledRegistryDist, PackageId, PathBuiltDist, PathSourceDist,
RegistryBuiltDist, RegistrySourceDist, ResourceId, SourceDist, VersionOrUrl,
};
pub trait Metadata {
@ -23,15 +23,15 @@ pub trait Metadata {
/// Note that this is not equivalent to a unique identifier for the _distribution_, as multiple
/// registry-based distributions (e.g., different wheels for the same package and version)
/// will return the same package ID, but different distribution IDs.
fn package_id(&self) -> String {
match self.version_or_url() {
fn package_id(&self) -> PackageId {
PackageId::new(match self.version_or_url() {
VersionOrUrl::Version(version) => {
// https://packaging.python.org/en/latest/specifications/recording-installed-packages/#the-dist-info-directory
// `version` is normalized by its `ToString` impl
format!("{}-{}", self.name().as_dist_info_name(), version)
}
VersionOrUrl::Url(url) => puffin_cache::digest(&CanonicalUrl::new(url)),
}
})
}
}
@ -46,7 +46,7 @@ pub trait RemoteSource {
pub trait Identifier {
/// Return a unique resource identifier for the distribution, like a SHA-256 hash of the
/// distribution's contents.
fn distribution_id(&self) -> String;
fn distribution_id(&self) -> DistributionId;
/// Return a unique resource identifier for the underlying resource backing the distribution.
///
@ -54,7 +54,7 @@ pub trait Identifier {
/// if the same Git repository is used for two different distributions, at two different
/// subdirectories or two different commits, then those distributions would share a resource ID,
/// but have different distribution IDs.
fn resource_id(&self) -> String;
fn resource_id(&self) -> ResourceId;
}
// Implement `Display` for all known types that implement `DistributionIdentifier`.

View file

@ -8,11 +8,11 @@ use rustc_hash::FxHashMap;
use tokio::sync::Mutex;
use tracing::error;
use distribution_types::Identifier;
use distribution_types::{Identifier, ResourceId};
/// A set of locks used to prevent concurrent access to the same resource.
#[derive(Debug, Default)]
pub(crate) struct Locks(Mutex<FxHashMap<String, Arc<Mutex<()>>>>);
pub(crate) struct Locks(Mutex<FxHashMap<ResourceId, Arc<Mutex<()>>>>);
impl Locks {
/// Acquire a lock on the given resource.

View file

@ -28,7 +28,7 @@ impl PackageName {
}
impl From<&PackageName> for PackageName {
/// Required for `WaitMap::wait`
/// Required for `WaitMap::wait`.
fn from(package_name: &PackageName) -> Self {
package_name.clone()
}

View file

@ -1,7 +1,7 @@
use pubgrub::range::Range;
use rustc_hash::FxHashMap;
use distribution_types::Dist;
use distribution_types::{Dist, Metadata};
use pep508_rs::{Requirement, VersionOrUrl};
use puffin_normalize::PackageName;
use pypi_types::IndexUrl;
@ -252,3 +252,13 @@ impl<'a> Candidate<'a> {
)
}
}
impl Metadata for Candidate<'_> {
fn name(&self) -> &PackageName {
self.name
}
fn version_or_url(&self) -> distribution_types::VersionOrUrl {
distribution_types::VersionOrUrl::Version(self.version.into())
}
}

View file

@ -62,13 +62,6 @@ impl DistFile {
Self::Sdist(sdist) => sdist.filename.as_str(),
}
}
pub(crate) fn sha256(&self) -> &str {
match self {
Self::Wheel(wheel) => &wheel.hashes.sha256,
Self::Sdist(sdist) => &sdist.hashes.sha256,
}
}
}
impl From<DistFile> for File {

View file

@ -11,7 +11,7 @@ use pubgrub::type_aliases::SelectedDependencies;
use rustc_hash::FxHashMap;
use url::Url;
use distribution_types::{BuiltDist, Dist, Metadata, SourceDist};
use distribution_types::{BuiltDist, Dist, Metadata, PackageId, SourceDist};
use pep440_rs::{Version, VersionSpecifier, VersionSpecifiers};
use pep508_rs::{Requirement, VersionOrUrl};
use puffin_normalize::{ExtraName, PackageName};
@ -82,7 +82,7 @@ impl ResolutionGraph {
pub(crate) fn from_state(
selection: &SelectedDependencies<PubGrubPackage, PubGrubVersion>,
pins: &FilePins,
distributions: &OnceMap<String, Metadata21>,
distributions: &OnceMap<PackageId, Metadata21>,
redirects: &OnceMap<Url, Url>,
state: &State<PubGrubPackage, Range<PubGrubVersion>, PubGrubPriority>,
) -> Result<Self, ResolveError> {

View file

@ -18,7 +18,7 @@ use tracing::{debug, trace};
use url::Url;
use distribution_filename::WheelFilename;
use distribution_types::{BuiltDist, Dist, Metadata, SourceDist, VersionOrUrl};
use distribution_types::{BuiltDist, Dist, Metadata, PackageId, SourceDist, VersionOrUrl};
use pep508_rs::{MarkerEnvironment, Requirement};
use platform_tags::Tags;
use puffin_cache::CanonicalUrl;
@ -453,11 +453,7 @@ impl<'a, Provider: ResolverProvider> Resolver<'a, Provider> {
};
// Emit a request to fetch the metadata for this version.
if self
.index
.distributions
.register(candidate.resolve().sha256())
{
if self.index.distributions.register(&candidate.package_id()) {
let distribution = candidate.into_distribution(index.clone());
request_sink.unbounded_send(Request::Dist(distribution))?;
}
@ -561,11 +557,7 @@ impl<'a, Provider: ResolverProvider> Resolver<'a, Provider> {
let version = candidate.version().clone();
// Emit a request to fetch the metadata for this version.
if self
.index
.distributions
.register(candidate.resolve().sha256())
{
if self.index.distributions.register(&candidate.package_id()) {
let distribution = candidate.into_distribution(index.clone());
request_sink.unbounded_send(Request::Dist(distribution))?;
}
@ -822,7 +814,7 @@ pub(crate) struct Index {
pub(crate) packages: OnceMap<PackageName, (IndexUrl, VersionMap)>,
/// A map from distribution SHA to metadata for that distribution.
pub(crate) distributions: OnceMap<String, Metadata21>,
pub(crate) distributions: OnceMap<PackageId, Metadata21>,
/// A map from source URL to precise URL.
pub(crate) redirects: OnceMap<Url, Url>,