Support unnamed requirements in --require-hashes (#2993)

## Summary

This PR enables `--require-hashes` with unnamed requirements. The key
change is that `PackageId` becomes `VersionId` (since it refers to a
package at a specific version), and the new `PackageId` consists of
_either_ a package name _or_ a URL. The hashes are keyed by `PackageId`,
so we can generate the `RequiredHashes` before we have names for all
packages, and enforce them throughout.

Closes #2979.
This commit is contained in:
Charlie Marsh 2024-04-11 11:26:50 -04:00 committed by GitHub
parent d56d142520
commit 96c3c2e774
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
25 changed files with 256 additions and 185 deletions

View file

@ -30,6 +30,7 @@ rustc-hash = { workspace = true }
serde = { workspace = true, optional = true }
serde_json = { workspace = true, optional = true }
thiserror = { workspace = true }
url = { workspace = true }
[features]
default = []

View file

@ -1,8 +1,10 @@
use distribution_types::HashPolicy;
use rustc_hash::FxHashMap;
use std::str::FromStr;
use pep508_rs::{MarkerEnvironment, Requirement, VersionOrUrl};
use rustc_hash::FxHashMap;
use url::Url;
use distribution_types::{DistributionMetadata, HashPolicy, PackageId};
use pep508_rs::{MarkerEnvironment, RequirementsTxtRequirement, VersionOrUrl};
use pypi_types::{HashDigest, HashError};
use uv_normalize::PackageName;
@ -14,74 +16,115 @@ pub enum HashStrategy {
Generate,
/// Hashes should be validated against a pre-defined list of hashes. If necessary, hashes should
/// be generated so as to ensure that the archive is valid.
Validate(FxHashMap<PackageName, Vec<HashDigest>>),
Validate(FxHashMap<PackageId, Vec<HashDigest>>),
}
impl HashStrategy {
/// Return the [`HashPolicy`] for the given package.
pub fn get(&self, package_name: &PackageName) -> HashPolicy {
/// Return the [`HashPolicy`] for the given distribution.
pub fn get<T: DistributionMetadata>(&self, distribution: &T) -> HashPolicy {
match self {
Self::None => HashPolicy::None,
Self::Generate => HashPolicy::Generate,
Self::Validate(hashes) => hashes
.get(package_name)
.get(&distribution.package_id())
.map(Vec::as_slice)
.map_or(HashPolicy::None, HashPolicy::Validate),
}
}
/// Returns `true` if the given package is allowed.
pub fn allows(&self, package_name: &PackageName) -> bool {
/// Return the [`HashPolicy`] for the given registry-based package.
pub fn get_package(&self, name: &PackageName) -> HashPolicy {
match self {
Self::None => true,
Self::Generate => true,
Self::Validate(hashes) => hashes.contains_key(package_name),
Self::None => HashPolicy::None,
Self::Generate => HashPolicy::Generate,
Self::Validate(hashes) => hashes
.get(&PackageId::from_registry(name.clone()))
.map(Vec::as_slice)
.map_or(HashPolicy::None, HashPolicy::Validate),
}
}
/// Generate the required hashes from a set of [`Requirement`] entries.
/// Return the [`HashPolicy`] for the given direct URL package.
pub fn get_url(&self, url: &Url) -> HashPolicy {
match self {
Self::None => HashPolicy::None,
Self::Generate => HashPolicy::Generate,
Self::Validate(hashes) => hashes
.get(&PackageId::from_url(url))
.map(Vec::as_slice)
.map_or(HashPolicy::None, HashPolicy::Validate),
}
}
/// Returns `true` if the given registry-based package is allowed.
pub fn allows_package(&self, name: &PackageName) -> bool {
match self {
Self::None => true,
Self::Generate => true,
Self::Validate(hashes) => hashes.contains_key(&PackageId::from_registry(name.clone())),
}
}
/// Returns `true` if the given direct URL package is allowed.
pub fn allows_url(&self, url: &Url) -> bool {
match self {
Self::None => true,
Self::Generate => true,
Self::Validate(hashes) => hashes.contains_key(&PackageId::from_url(url)),
}
}
/// Generate the required hashes from a set of [`RequirementsTxtRequirement`] entries.
pub fn from_requirements(
requirements: impl Iterator<Item = (Requirement, Vec<String>)>,
requirements: impl Iterator<Item = (RequirementsTxtRequirement, Vec<String>)>,
markers: &MarkerEnvironment,
) -> Result<Self, HashStrategyError> {
let mut hashes = FxHashMap::<PackageName, Vec<HashDigest>>::default();
let mut hashes = FxHashMap::<PackageId, Vec<HashDigest>>::default();
// For each requirement, map from name to allowed hashes. We use the last entry for each
// package.
//
// For now, unnamed requirements are unsupported. This should be fine, since `--require-hashes`
// tends to be used after `pip-compile`, which will always output named requirements.
//
// TODO(charlie): Preserve hashes from `requirements.txt` through to this pass, so that we
// can iterate over requirements directly, rather than iterating over the entries.
for (requirement, digests) in requirements {
if !requirement.evaluate_markers(markers, &[]) {
continue;
}
// Every requirement must be either a pinned version or a direct URL.
match requirement.version_or_url.as_ref() {
Some(VersionOrUrl::Url(_)) => {
// Direct URLs are always allowed.
}
Some(VersionOrUrl::VersionSpecifier(specifiers)) => {
if specifiers
.iter()
.any(|specifier| matches!(specifier.operator(), pep440_rs::Operator::Equal))
{
// Pinned versions are allowed.
} else {
return Err(HashStrategyError::UnpinnedRequirement(
requirement.to_string(),
));
let id = match &requirement {
RequirementsTxtRequirement::Pep508(requirement) => {
match requirement.version_or_url.as_ref() {
Some(VersionOrUrl::Url(url)) => {
// Direct URLs are always allowed.
PackageId::from_url(url)
}
Some(VersionOrUrl::VersionSpecifier(specifiers)) => {
// Must be a single specifier.
let [specifier] = specifiers.as_ref() else {
return Err(HashStrategyError::UnpinnedRequirement(
requirement.to_string(),
));
};
// Must be pinned to a specific version.
if *specifier.operator() != pep440_rs::Operator::Equal {
return Err(HashStrategyError::UnpinnedRequirement(
requirement.to_string(),
));
}
PackageId::from_registry(requirement.name.clone())
}
None => {
return Err(HashStrategyError::UnpinnedRequirement(
requirement.to_string(),
))
}
}
}
None => {
return Err(HashStrategyError::UnpinnedRequirement(
requirement.to_string(),
))
RequirementsTxtRequirement::Unnamed(requirement) => {
// Direct URLs are always allowed.
PackageId::from_url(&requirement.url)
}
}
};
// Every requirement must include a hash.
if digests.is_empty() {
@ -95,8 +138,7 @@ impl HashStrategy {
.collect::<Result<Vec<_>, _>>()
.unwrap();
// TODO(charlie): Extract hashes from URL fragments.
hashes.insert(requirement.name, digests);
hashes.insert(id, digests);
}
Ok(Self::Validate(hashes))
@ -107,8 +149,6 @@ impl HashStrategy {
pub enum HashStrategyError {
#[error(transparent)]
Hash(#[from] HashError),
#[error("Unnamed requirements are not supported in `--require-hashes`")]
UnnamedRequirement,
#[error("In `--require-hashes` mode, all requirement must have their versions pinned with `==`, but found: {0}")]
UnpinnedRequirement(String),
#[error("In `--require-hashes` mode, all requirement must have a hash, but none were provided for: {0}")]

View file

@ -95,13 +95,13 @@ pub trait BuildContext: Sync {
///
/// For PEP 517 builds, this calls `get_requires_for_build_wheel`.
///
/// `package_id` is for error reporting only.
/// `version_id` is for error reporting only.
/// `dist` is for safety checks and may be null for editable builds.
fn setup_build<'a>(
&'a self,
source: &'a Path,
subdirectory: Option<&'a Path>,
package_id: &'a str,
version_id: &'a str,
dist: Option<&'a SourceDist>,
build_kind: BuildKind,
) -> impl Future<Output = Result<Self::SourceDistBuilder>> + Send + 'a;