diff --git a/Cargo.lock b/Cargo.lock index fcbc0755b..a70416778 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2024,6 +2024,16 @@ dependencies = [ "regex-automata 0.1.10", ] +[[package]] +name = "md-5" +version = "0.10.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d89e7ee0cfbedfc4da3340218492196241d89eefb6dab27de5df917a6d2e78cf" +dependencies = [ + "cfg-if", + "digest", +] + [[package]] name = "memchr" version = "2.7.2" @@ -4491,6 +4501,7 @@ dependencies = [ "fs-err", "futures", "install-wheel-rs", + "md-5", "nanoid", "once_cell", "pep440_rs", @@ -4502,6 +4513,7 @@ dependencies = [ "rmp-serde", "rustc-hash", "serde", + "sha2", "tempfile", "thiserror", "tokio", @@ -4527,8 +4539,11 @@ dependencies = [ "async_zip", "fs-err", "futures", + "md-5", + "pypi-types", "rayon", "rustc-hash", + "sha2", "thiserror", "tokio", "tokio-tar", @@ -4593,6 +4608,7 @@ dependencies = [ "pypi-types", "rayon", "requirements-txt", + "rmp-serde", "rustc-hash", "serde", "tempfile", @@ -4766,10 +4782,13 @@ dependencies = [ "distribution-types", "itertools 0.12.1", "once-map", + "pep440_rs", "pep508_rs", + "pypi-types", "rustc-hash", "serde", "serde_json", + "thiserror", "uv-cache", "uv-configuration", "uv-interpreter", diff --git a/Cargo.toml b/Cargo.toml index ae1963ad2..b253d8a36 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -94,6 +94,7 @@ indoc = { version = "2.0.4" } itertools = { version = "0.12.1" } junction = { version = "1.0.0" } mailparse = { version = "0.14.0" } +md-5 = { version = "0.10.6" } miette = { version = "7.2.0" } nanoid = { version = "0.4.0" } once_cell = { version = "1.19.0" } diff --git a/PIP_COMPATIBILITY.md b/PIP_COMPATIBILITY.md index 594c60096..df70d3487 100644 --- a/PIP_COMPATIBILITY.md +++ b/PIP_COMPATIBILITY.md @@ -259,14 +259,6 @@ When uv resolutions differ from `pip` in undesirable ways, it's often a sign tha are too loose, and that the user should consider tightening them. For example, in the case of `starlette` and `fastapi`, the user could require `fastapi>=0.110.0`. -## Hash-checking mode - -While uv will include hashes via `uv pip compile --generate-hashes`, it does not support -hash-checking mode, which is a feature of `pip` that allows users to verify the integrity of -downloaded packages by checking their hashes against those provided in the `requirements.txt` file. - -In the future, uv will support hash-checking mode. For more, see [#474](https://github.com/astral-sh/uv/issues/474). - ## `pip check` At present, `uv pip check` will surface the following diagnostics: diff --git a/crates/distribution-types/src/cached.rs b/crates/distribution-types/src/cached.rs index 2a0da761d..9661d3cfe 100644 --- a/crates/distribution-types/src/cached.rs +++ b/crates/distribution-types/src/cached.rs @@ -4,9 +4,11 @@ use anyhow::Result; use distribution_filename::WheelFilename; use pep508_rs::VerbatimUrl; +use pypi_types::HashDigest; use uv_normalize::PackageName; use crate::direct_url::{DirectUrl, LocalFileUrl}; +use crate::hashed::Hashed; use crate::{ BuiltDist, Dist, DistributionMetadata, InstalledMetadata, InstalledVersion, Name, SourceDist, VersionOrUrl, @@ -25,6 +27,7 @@ pub enum CachedDist { pub struct CachedRegistryDist { pub filename: WheelFilename, pub path: PathBuf, + pub hashes: Vec, } #[derive(Debug, Clone)] @@ -33,45 +36,60 @@ pub struct CachedDirectUrlDist { pub url: VerbatimUrl, pub path: PathBuf, pub editable: bool, + pub hashes: Vec, } impl CachedDist { /// Initialize a [`CachedDist`] from a [`Dist`]. - pub fn from_remote(remote: Dist, filename: WheelFilename, path: PathBuf) -> Self { + pub fn from_remote( + remote: Dist, + filename: WheelFilename, + hashes: Vec, + path: PathBuf, + ) -> Self { match remote { - Dist::Built(BuiltDist::Registry(_dist)) => { - Self::Registry(CachedRegistryDist { filename, path }) - } + Dist::Built(BuiltDist::Registry(_dist)) => Self::Registry(CachedRegistryDist { + filename, + path, + hashes, + }), Dist::Built(BuiltDist::DirectUrl(dist)) => Self::Url(CachedDirectUrlDist { filename, url: dist.url, + hashes, path, editable: false, }), Dist::Built(BuiltDist::Path(dist)) => Self::Url(CachedDirectUrlDist { filename, url: dist.url, + hashes, path, editable: false, }), - Dist::Source(SourceDist::Registry(_dist)) => { - Self::Registry(CachedRegistryDist { filename, path }) - } + Dist::Source(SourceDist::Registry(_dist)) => Self::Registry(CachedRegistryDist { + filename, + path, + hashes, + }), Dist::Source(SourceDist::DirectUrl(dist)) => Self::Url(CachedDirectUrlDist { filename, url: dist.url, + hashes, path, editable: false, }), Dist::Source(SourceDist::Git(dist)) => Self::Url(CachedDirectUrlDist { filename, url: dist.url, + hashes, path, editable: false, }), Dist::Source(SourceDist::Path(dist)) => Self::Url(CachedDirectUrlDist { filename, url: dist.url, + hashes, path, editable: dist.editable, }), @@ -104,6 +122,7 @@ impl CachedDist { } } + /// Returns `true` if the distribution is editable. pub fn editable(&self) -> bool { match self { Self::Registry(_) => false, @@ -111,6 +130,7 @@ impl CachedDist { } } + /// Returns the [`WheelFilename`] of the distribution. pub fn filename(&self) -> &WheelFilename { match self { Self::Registry(dist) => &dist.filename, @@ -119,12 +139,24 @@ impl CachedDist { } } +impl Hashed for CachedRegistryDist { + fn hashes(&self) -> &[HashDigest] { + &self.hashes + } +} + impl CachedDirectUrlDist { /// Initialize a [`CachedDirectUrlDist`] from a [`WheelFilename`], [`url::Url`], and [`Path`]. - pub fn from_url(filename: WheelFilename, url: VerbatimUrl, path: PathBuf) -> Self { + pub fn from_url( + filename: WheelFilename, + url: VerbatimUrl, + hashes: Vec, + path: PathBuf, + ) -> Self { Self { filename, url, + hashes, path, editable: false, } diff --git a/crates/distribution-types/src/hashed.rs b/crates/distribution-types/src/hashed.rs new file mode 100644 index 000000000..c8185021c --- /dev/null +++ b/crates/distribution-types/src/hashed.rs @@ -0,0 +1,27 @@ +use pypi_types::HashDigest; + +pub trait Hashed { + /// Return the [`HashDigest`]s for the archive. + fn hashes(&self) -> &[HashDigest]; + + /// Returns `true` if the archive satisfies the given hashes. + fn satisfies(&self, hashes: &[HashDigest]) -> bool { + if hashes.is_empty() { + true + } else { + self.hashes().iter().any(|hash| hashes.contains(hash)) + } + } + + /// Returns `true` if the archive includes a hash for at least one of the given algorithms. + fn has_digests(&self, hashes: &[HashDigest]) -> bool { + if hashes.is_empty() { + true + } else { + hashes + .iter() + .map(HashDigest::algorithm) + .any(|algorithm| self.hashes().iter().any(|hash| hash.algorithm == algorithm)) + } + } +} diff --git a/crates/distribution-types/src/lib.rs b/crates/distribution-types/src/lib.rs index 6c2ad6c81..ab74423ee 100644 --- a/crates/distribution-types/src/lib.rs +++ b/crates/distribution-types/src/lib.rs @@ -51,6 +51,7 @@ pub use crate::direct_url::*; pub use crate::editable::*; pub use crate::error::*; pub use crate::file::*; +pub use crate::hashed::*; pub use crate::id::*; pub use crate::index_url::*; pub use crate::installed::*; @@ -66,6 +67,7 @@ mod direct_url; mod editable; mod error; mod file; +mod hashed; mod id; mod index_url; mod installed; diff --git a/crates/distribution-types/src/prioritized_distribution.rs b/crates/distribution-types/src/prioritized_distribution.rs index dc67a0f34..ac793de84 100644 --- a/crates/distribution-types/src/prioritized_distribution.rs +++ b/crates/distribution-types/src/prioritized_distribution.rs @@ -84,6 +84,8 @@ impl Display for IncompatibleDist { IncompatibleWheel::RequiresPython(python) => { write!(f, "it requires at python {python}") } + IncompatibleWheel::MissingHash => f.write_str("it has no hash"), + IncompatibleWheel::MismatchedHash => f.write_str("the hash does not match"), }, Self::Source(incompatibility) => match incompatibility { IncompatibleSource::NoBuild => { @@ -104,6 +106,8 @@ impl Display for IncompatibleDist { IncompatibleSource::RequiresPython(python) => { write!(f, "it requires python {python}") } + IncompatibleSource::MissingHash => f.write_str("it has no hash"), + IncompatibleSource::MismatchedHash => f.write_str("the hash does not match"), }, Self::Unavailable => f.write_str("no distributions are available"), } @@ -122,6 +126,8 @@ pub enum IncompatibleWheel { Tag(IncompatibleTag), RequiresPython(VersionSpecifiers), Yanked(Yanked), + MissingHash, + MismatchedHash, NoBinary, } @@ -136,6 +142,8 @@ pub enum IncompatibleSource { ExcludeNewer(Option), RequiresPython(VersionSpecifiers), Yanked(Yanked), + MissingHash, + MismatchedHash, NoBuild, } @@ -381,20 +389,26 @@ impl IncompatibleSource { Self::ExcludeNewer(timestamp_self) => match other { // Smaller timestamps are closer to the cut-off time Self::ExcludeNewer(timestamp_other) => timestamp_other < timestamp_self, - Self::NoBuild | Self::RequiresPython(_) | Self::Yanked(_) => true, + Self::NoBuild + | Self::RequiresPython(_) + | Self::Yanked(_) + | Self::MissingHash + | Self::MismatchedHash => true, }, Self::RequiresPython(_) => match other { Self::ExcludeNewer(_) => false, // Version specifiers cannot be reasonably compared Self::RequiresPython(_) => false, - Self::NoBuild | Self::Yanked(_) => true, + Self::NoBuild | Self::Yanked(_) | Self::MissingHash | Self::MismatchedHash => true, }, Self::Yanked(_) => match other { Self::ExcludeNewer(_) | Self::RequiresPython(_) => false, // Yanks with a reason are more helpful for errors Self::Yanked(yanked_other) => matches!(yanked_other, Yanked::Reason(_)), - Self::NoBuild => true, + Self::NoBuild | Self::MissingHash | Self::MismatchedHash => true, }, + Self::MissingHash => false, + Self::MismatchedHash => false, Self::NoBuild => false, } } @@ -412,26 +426,37 @@ impl IncompatibleWheel { timestamp_other < timestamp_self } }, - Self::NoBinary | Self::RequiresPython(_) | Self::Tag(_) | Self::Yanked(_) => true, + Self::NoBinary + | Self::RequiresPython(_) + | Self::Tag(_) + | Self::Yanked(_) + | Self::MissingHash + | Self::MismatchedHash => true, }, Self::Tag(tag_self) => match other { Self::ExcludeNewer(_) => false, Self::Tag(tag_other) => tag_other > tag_self, - Self::NoBinary | Self::RequiresPython(_) | Self::Yanked(_) => true, + Self::NoBinary + | Self::RequiresPython(_) + | Self::Yanked(_) + | Self::MissingHash + | Self::MismatchedHash => true, }, Self::RequiresPython(_) => match other { Self::ExcludeNewer(_) | Self::Tag(_) => false, // Version specifiers cannot be reasonably compared Self::RequiresPython(_) => false, - Self::NoBinary | Self::Yanked(_) => true, + Self::NoBinary | Self::Yanked(_) | Self::MissingHash | Self::MismatchedHash => true, }, Self::Yanked(_) => match other { Self::ExcludeNewer(_) | Self::Tag(_) | Self::RequiresPython(_) => false, // Yanks with a reason are more helpful for errors Self::Yanked(yanked_other) => matches!(yanked_other, Yanked::Reason(_)), - Self::NoBinary => true, + Self::NoBinary | Self::MissingHash | Self::MismatchedHash => true, }, Self::NoBinary => false, + Self::MismatchedHash => false, + Self::MissingHash => false, } } } diff --git a/crates/pep508-rs/src/lib.rs b/crates/pep508-rs/src/lib.rs index d37eff711..c8091af7d 100644 --- a/crates/pep508-rs/src/lib.rs +++ b/crates/pep508-rs/src/lib.rs @@ -72,7 +72,7 @@ pub enum Pep508ErrorSource { String(String), /// A URL parsing error. #[error(transparent)] - UrlError(#[from] verbatim_url::VerbatimUrlError), + UrlError(#[from] VerbatimUrlError), /// The version requirement is not supported. #[error("{0}")] UnsupportedRequirement(String), diff --git a/crates/uv-cache/src/lib.rs b/crates/uv-cache/src/lib.rs index d2e5b3c41..ec054e83c 100644 --- a/crates/uv-cache/src/lib.rs +++ b/crates/uv-cache/src/lib.rs @@ -594,12 +594,12 @@ pub enum CacheBucket { impl CacheBucket { fn to_str(self) -> &'static str { match self { - Self::BuiltWheels => "built-wheels-v2", + Self::BuiltWheels => "built-wheels-v3", Self::FlatIndex => "flat-index-v0", Self::Git => "git-v0", Self::Interpreter => "interpreter-v0", Self::Simple => "simple-v7", - Self::Wheels => "wheels-v0", + Self::Wheels => "wheels-v1", Self::Archive => "archive-v0", } } diff --git a/crates/uv-client/Cargo.toml b/crates/uv-client/Cargo.toml index f7db9eebb..25ec0d8d1 100644 --- a/crates/uv-client/Cargo.toml +++ b/crates/uv-client/Cargo.toml @@ -11,14 +11,14 @@ install-wheel-rs = { workspace = true } pep440_rs = { workspace = true } pep508_rs = { workspace = true } platform-tags = { workspace = true } +pypi-types = { workspace = true } uv-auth = { workspace = true } uv-cache = { workspace = true } +uv-configuration = { workspace = true } uv-fs = { workspace = true, features = ["tokio"] } uv-normalize = { workspace = true } -uv-configuration = { workspace = true } uv-version = { workspace = true } uv-warnings = { workspace = true } -pypi-types = { workspace = true } anyhow = { workspace = true } async-trait = { workspace = true } diff --git a/crates/uv-client/src/cached_client.rs b/crates/uv-client/src/cached_client.rs index a80726e8b..672785328 100644 --- a/crates/uv-client/src/cached_client.rs +++ b/crates/uv-client/src/cached_client.rs @@ -299,6 +299,34 @@ impl CachedClient { } } + /// Make a request without checking whether the cache is fresh. + pub async fn skip_cache< + Payload: Serialize + DeserializeOwned + Send + 'static, + CallBackError, + Callback, + CallbackReturn, + >( + &self, + req: Request, + cache_entry: &CacheEntry, + response_callback: Callback, + ) -> Result> + where + Callback: FnOnce(Response) -> CallbackReturn + Send, + CallbackReturn: Future> + Send, + { + let (response, cache_policy) = self.fresh_request(req).await?; + + let payload = self + .run_response_callback(cache_entry, cache_policy, response, move |resp| async { + let payload = response_callback(resp).await?; + Ok(SerdeCacheable { inner: payload }) + }) + .await?; + + Ok(payload) + } + async fn resend_and_heal_cache( &self, req: Request, diff --git a/crates/uv-client/src/flat_index.rs b/crates/uv-client/src/flat_index.rs index facd5dbc0..623e9b26e 100644 --- a/crates/uv-client/src/flat_index.rs +++ b/crates/uv-client/src/flat_index.rs @@ -2,15 +2,12 @@ use std::path::PathBuf; use futures::{FutureExt, StreamExt}; use reqwest::Response; - use tracing::{debug, info_span, warn, Instrument}; use url::Url; use distribution_filename::DistFilename; use distribution_types::{File, FileLocation, FlatIndexLocation, IndexUrl}; - use pep508_rs::VerbatimUrl; - use uv_cache::{Cache, CacheBucket}; use crate::cached_client::{CacheControl, CachedClientError}; diff --git a/crates/uv-dev/src/resolve_cli.rs b/crates/uv-dev/src/resolve_cli.rs index 969ee18c6..df3d62187 100644 --- a/crates/uv-dev/src/resolve_cli.rs +++ b/crates/uv-dev/src/resolve_cli.rs @@ -18,7 +18,7 @@ use uv_dispatch::BuildDispatch; use uv_installer::SitePackages; use uv_interpreter::PythonEnvironment; use uv_resolver::{FlatIndex, InMemoryIndex, Manifest, Options, Resolver}; -use uv_types::{BuildIsolation, InFlight}; +use uv_types::{BuildIsolation, InFlight, RequiredHashes}; #[derive(ValueEnum, Default, Clone)] pub(crate) enum ResolveCliFormat { @@ -58,6 +58,7 @@ pub(crate) async fn resolve_cli(args: ResolveCliArgs) -> Result<()> { let index_locations = IndexLocations::new(args.index_url, args.extra_index_url, args.find_links, false); let index = InMemoryIndex::default(); + let hashes = RequiredHashes::default(); let in_flight = InFlight::default(); let no_build = if args.no_build { NoBuild::All @@ -73,6 +74,7 @@ pub(crate) async fn resolve_cli(args: ResolveCliArgs) -> Result<()> { FlatIndex::from_entries( entries, venv.interpreter().tags()?, + &RequiredHashes::default(), &no_build, &NoBinary::None, ) @@ -107,6 +109,7 @@ pub(crate) async fn resolve_cli(args: ResolveCliArgs) -> Result<()> { &client, &flat_index, &index, + &hashes, &build_dispatch, &site_packages, )?; diff --git a/crates/uv-dispatch/src/lib.rs b/crates/uv-dispatch/src/lib.rs index b35961d54..fa8ea31b4 100644 --- a/crates/uv-dispatch/src/lib.rs +++ b/crates/uv-dispatch/src/lib.rs @@ -21,7 +21,7 @@ use uv_configuration::{BuildKind, ConfigSettings, NoBinary, NoBuild, Reinstall, use uv_installer::{Downloader, Installer, Plan, Planner, SitePackages}; use uv_interpreter::{Interpreter, PythonEnvironment}; use uv_resolver::{FlatIndex, InMemoryIndex, Manifest, Options, Resolver}; -use uv_types::{BuildContext, BuildIsolation, EmptyInstalledPackages, InFlight}; +use uv_types::{BuildContext, BuildIsolation, EmptyInstalledPackages, InFlight, RequiredHashes}; /// The main implementation of [`BuildContext`], used by the CLI, see [`BuildContext`] /// documentation. @@ -134,6 +134,7 @@ impl<'a> BuildContext for BuildDispatch<'a> { async fn resolve<'data>(&'data self, requirements: &'data [Requirement]) -> Result { let markers = self.interpreter.markers(); let tags = self.interpreter.tags()?; + let hashes = RequiredHashes::default(); let resolver = Resolver::new( Manifest::simple(requirements.to_vec()), self.options, @@ -143,6 +144,7 @@ impl<'a> BuildContext for BuildDispatch<'a> { self.client, self.flat_index, self.index, + &hashes, self, &EmptyInstalledPackages, )?; @@ -176,6 +178,9 @@ impl<'a> BuildContext for BuildDispatch<'a> { venv.root().display(), ); + // Don't enforce hashes for build dependencies. + let hashes = RequiredHashes::default(); + // Determine the current environment markers. let tags = self.interpreter.tags()?; @@ -192,6 +197,7 @@ impl<'a> BuildContext for BuildDispatch<'a> { site_packages, &Reinstall::None, &NoBinary::None, + &RequiredHashes::default(), self.index_locations, self.cache(), venv, @@ -220,7 +226,7 @@ impl<'a> BuildContext for BuildDispatch<'a> { vec![] } else { // TODO(konstin): Check that there is no endless recursion. - let downloader = Downloader::new(self.cache, tags, self.client, self); + let downloader = Downloader::new(self.cache, tags, &hashes, self.client, self); debug!( "Downloading and building requirement{} for build: {}", if remote.len() == 1 { "" } else { "s" }, diff --git a/crates/uv-distribution/Cargo.toml b/crates/uv-distribution/Cargo.toml index 53d244433..a4814ff6d 100644 --- a/crates/uv-distribution/Cargo.toml +++ b/crates/uv-distribution/Cargo.toml @@ -33,6 +33,7 @@ uv-configuration = { workspace = true } anyhow = { workspace = true } fs-err = { workspace = true } futures = { workspace = true } +md-5 = { workspace = true } nanoid = { workspace = true } once_cell = { workspace = true } reqwest = { workspace = true } @@ -40,6 +41,7 @@ reqwest-middleware = { workspace = true } rmp-serde = { workspace = true } rustc-hash = { workspace = true } serde = { workspace = true, features = ["derive"] } +sha2 = { workspace = true } tempfile = { workspace = true } thiserror = { workspace = true } tokio = { workspace = true } diff --git a/crates/uv-distribution/src/archive.rs b/crates/uv-distribution/src/archive.rs new file mode 100644 index 000000000..a53de619d --- /dev/null +++ b/crates/uv-distribution/src/archive.rs @@ -0,0 +1,36 @@ +use std::path::PathBuf; + +use distribution_types::Hashed; +use pypi_types::HashDigest; + +/// An archive (unzipped wheel) that exists in the local cache. +#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] +pub struct Archive { + /// The path to the archive entry in the wheel's archive bucket. + pub path: PathBuf, + /// The computed hashes of the archive. + pub hashes: Vec, +} + +impl Archive { + /// Create a new [`Archive`] with the given path and hashes. + pub(crate) fn new(path: PathBuf, hashes: Vec) -> Self { + Self { path, hashes } + } + + /// Return the path to the archive entry in the wheel's archive bucket. + pub fn path(&self) -> &PathBuf { + &self.path + } + + /// Return the computed hashes of the archive. + pub fn hashes(&self) -> &[HashDigest] { + &self.hashes + } +} + +impl Hashed for Archive { + fn hashes(&self) -> &[HashDigest] { + &self.hashes + } +} diff --git a/crates/uv-distribution/src/distribution_database.rs b/crates/uv-distribution/src/distribution_database.rs index 6ece58e51..0db62a92d 100644 --- a/crates/uv-distribution/src/distribution_database.rs +++ b/crates/uv-distribution/src/distribution_database.rs @@ -11,16 +11,19 @@ use url::Url; use distribution_filename::WheelFilename; use distribution_types::{ - BuildableSource, BuiltDist, Dist, FileLocation, IndexLocations, LocalEditable, Name, SourceDist, + BuildableSource, BuiltDist, Dist, FileLocation, Hashed, IndexLocations, LocalEditable, Name, + SourceDist, }; use platform_tags::Tags; -use pypi_types::Metadata23; +use pypi_types::{HashDigest, Metadata23}; use uv_cache::{ArchiveTimestamp, CacheBucket, CacheEntry, CachedByTimestamp, WheelCache}; use uv_client::{CacheControl, CachedClientError, Connectivity, RegistryClient}; use uv_configuration::{NoBinary, NoBuild}; +use uv_extract::hash::Hasher; use uv_fs::write_atomic; use uv_types::BuildContext; +use crate::archive::Archive; use crate::locks::Locks; use crate::{Error, LocalWheel, Reporter, SourceDistributionBuilder}; @@ -79,28 +82,38 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context> /// Either fetch the wheel or fetch and build the source distribution /// - /// If `no_remote_wheel` is set, the wheel will be built from a source distribution - /// even if compatible pre-built wheels are available. + /// Returns a wheel that's compliant with the given platform tags. + /// + /// While hashes will be generated in some cases, hash-checking is only enforced for source + /// distributions, and should be enforced by the caller for wheels. #[instrument(skip_all, fields(%dist))] - pub async fn get_or_build_wheel(&self, dist: &Dist, tags: &Tags) -> Result { + pub async fn get_or_build_wheel( + &self, + dist: &Dist, + tags: &Tags, + hashes: &[HashDigest], + ) -> Result { match dist { - Dist::Built(built) => self.get_wheel(built).await, - Dist::Source(source) => self.build_wheel(source, tags).await, + Dist::Built(built) => self.get_wheel(built, hashes).await, + Dist::Source(source) => self.build_wheel(source, tags, hashes).await, } } /// Either fetch the only wheel metadata (directly from the index or with range requests) or /// fetch and build the source distribution. /// - /// Returns the [`Metadata23`], along with a "precise" URL for the source distribution, if - /// possible. For example, given a Git dependency with a reference to a branch or tag, return a - /// URL with a precise reference to the current commit of that branch or tag. + /// While hashes will be generated in some cases, hash-checking is only enforced for source + /// distributions, and should be enforced by the caller for wheels. #[instrument(skip_all, fields(%dist))] - pub async fn get_or_build_wheel_metadata(&self, dist: &Dist) -> Result { + pub async fn get_or_build_wheel_metadata( + &self, + dist: &Dist, + hashes: &[HashDigest], + ) -> Result { match dist { - Dist::Built(built) => self.get_wheel_metadata(built).await, + Dist::Built(built) => self.get_wheel_metadata(built, hashes).await, Dist::Source(source) => { - self.build_wheel_metadata(&BuildableSource::Dist(source)) + self.build_wheel_metadata(&BuildableSource::Dist(source), hashes) .await } } @@ -118,7 +131,7 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context> .build_editable(editable, editable_wheel_dir) .await?; - // Unzip. + // Unzip into the editable wheel directory. let path = editable_wheel_dir.join(&disk_filename); let target = editable_wheel_dir.join(cache_key::digest(&editable.path)); let archive = self.unzip_wheel(&path, &target).await?; @@ -126,13 +139,21 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context> dist, filename, archive, + hashes: vec![], }; Ok((wheel, metadata)) } /// Fetch a wheel from the cache or download it from the index. - async fn get_wheel(&self, dist: &BuiltDist) -> Result { + /// + /// While hashes will be generated in some cases, hash-checking is _not_ enforced and should + /// instead be enforced by the caller. + async fn get_wheel( + &self, + dist: &BuiltDist, + hashes: &[HashDigest], + ) -> Result { let no_binary = match self.build_context.no_binary() { NoBinary::None => false, NoBinary::All => true, @@ -157,8 +178,9 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context> WheelCache::Index(&wheel.index).wheel_dir(wheel.name().as_ref()), wheel.filename.stem(), ); + return self - .load_wheel(path, &wheel.filename, cache_entry, dist) + .load_wheel(path, &wheel.filename, cache_entry, dist, hashes) .await; } }; @@ -172,12 +194,13 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context> // Download and unzip. match self - .stream_wheel(url.clone(), &wheel.filename, &wheel_entry, dist) + .stream_wheel(url.clone(), &wheel.filename, &wheel_entry, dist, hashes) .await { Ok(archive) => Ok(LocalWheel { dist: Dist::Built(dist.clone()), - archive, + archive: archive.path, + hashes: archive.hashes, filename: wheel.filename.clone(), }), Err(Error::Extract(err)) if err.is_http_streaming_unsupported() => { @@ -188,11 +211,12 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context> // If the request failed because streaming is unsupported, download the // wheel directly. let archive = self - .download_wheel(url, &wheel.filename, &wheel_entry, dist) + .download_wheel(url, &wheel.filename, &wheel_entry, dist, hashes) .await?; Ok(LocalWheel { dist: Dist::Built(dist.clone()), - archive, + archive: archive.path, + hashes: archive.hashes, filename: wheel.filename.clone(), }) } @@ -210,12 +234,19 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context> // Download and unzip. match self - .stream_wheel(wheel.url.raw().clone(), &wheel.filename, &wheel_entry, dist) + .stream_wheel( + wheel.url.raw().clone(), + &wheel.filename, + &wheel_entry, + dist, + hashes, + ) .await { Ok(archive) => Ok(LocalWheel { dist: Dist::Built(dist.clone()), - archive, + archive: archive.path, + hashes: archive.hashes, filename: wheel.filename.clone(), }), Err(Error::Client(err)) if err.is_http_streaming_unsupported() => { @@ -231,11 +262,13 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context> &wheel.filename, &wheel_entry, dist, + hashes, ) .await?; Ok(LocalWheel { dist: Dist::Built(dist.clone()), - archive, + archive: archive.path, + hashes: archive.hashes, filename: wheel.filename.clone(), }) } @@ -249,7 +282,8 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context> WheelCache::Url(&wheel.url).wheel_dir(wheel.name().as_ref()), wheel.filename.stem(), ); - self.load_wheel(&wheel.path, &wheel.filename, cache_entry, dist) + + self.load_wheel(&wheel.path, &wheel.filename, cache_entry, dist, hashes) .await } } @@ -257,24 +291,33 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context> /// Convert a source distribution into a wheel, fetching it from the cache or building it if /// necessary. - async fn build_wheel(&self, dist: &SourceDist, tags: &Tags) -> Result { + /// + /// The returned wheel is guaranteed to come from a distribution with a matching hash, and + /// no build processes will be executed for distributions with mismatched hashes. + async fn build_wheel( + &self, + dist: &SourceDist, + tags: &Tags, + hashes: &[HashDigest], + ) -> Result { let lock = self.locks.acquire(&Dist::Source(dist.clone())).await; let _guard = lock.lock().await; let built_wheel = self .builder - .download_and_build(&BuildableSource::Dist(dist), tags) + .download_and_build(&BuildableSource::Dist(dist), tags, hashes) .boxed() .await?; // If the wheel was unzipped previously, respect it. Source distributions are - // cached under a unique build ID, so unzipped directories are never stale. + // cached under a unique revision ID, so unzipped directories are never stale. match built_wheel.target.canonicalize() { Ok(archive) => { return Ok(LocalWheel { dist: Dist::Source(dist.clone()), archive, filename: built_wheel.filename, + hashes: built_wheel.hashes, }); } Err(err) if err.kind() == io::ErrorKind::NotFound => {} @@ -287,12 +330,20 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context> archive: self .unzip_wheel(&built_wheel.path, &built_wheel.target) .await?, + hashes: built_wheel.hashes, filename: built_wheel.filename, }) } /// Fetch the wheel metadata from the index, or from the cache if possible. - pub async fn get_wheel_metadata(&self, dist: &BuiltDist) -> Result { + /// + /// While hashes will be generated in some cases, hash-checking is _not_ enforced and should + /// instead be enforced by the caller. + pub async fn get_wheel_metadata( + &self, + dist: &BuiltDist, + hashes: &[HashDigest], + ) -> Result { match self.client.wheel_metadata(dist).boxed().await { Ok(metadata) => Ok(metadata), Err(err) if err.is_http_streaming_unsupported() => { @@ -300,7 +351,7 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context> // If the request failed due to an error that could be resolved by // downloading the wheel directly, try that. - let wheel = self.get_wheel(dist).await?; + let wheel = self.get_wheel(dist, hashes).await?; Ok(wheel.metadata()?) } Err(err) => Err(err.into()), @@ -308,9 +359,13 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context> } /// Build the wheel metadata for a source distribution, or fetch it from the cache if possible. + /// + /// The returned metadata is guaranteed to come from a distribution with a matching hash, and + /// no build processes will be executed for distributions with mismatched hashes. pub async fn build_wheel_metadata( &self, source: &BuildableSource<'_>, + hashes: &[HashDigest], ) -> Result { let no_build = match self.build_context.no_build() { NoBuild::All => true, @@ -330,7 +385,7 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context> let metadata = self .builder - .download_and_build_metadata(source) + .download_and_build_metadata(source, hashes) .boxed() .await?; Ok(metadata) @@ -343,7 +398,8 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context> filename: &WheelFilename, wheel_entry: &CacheEntry, dist: &BuiltDist, - ) -> Result { + hashes: &[HashDigest], + ) -> Result { // Create an entry for the HTTP cache. let http_entry = wheel_entry.with_file(format!("{}.http", filename.stem())); @@ -354,23 +410,42 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context> .map_err(|err| self.handle_response_errors(err)) .into_async_read(); + // Create a hasher for each hash algorithm. + let algorithms = { + let mut hash = hashes.iter().map(HashDigest::algorithm).collect::>(); + hash.sort(); + hash.dedup(); + hash + }; + let mut hashers = algorithms.into_iter().map(Hasher::from).collect::>(); + let mut hasher = uv_extract::hash::HashReader::new(reader.compat(), &mut hashers); + // Download and unzip the wheel to a temporary directory. let temp_dir = tempfile::tempdir_in(self.build_context.cache().root()) .map_err(Error::CacheWrite)?; - uv_extract::stream::unzip(reader.compat(), temp_dir.path()).await?; + uv_extract::stream::unzip(&mut hasher, temp_dir.path()).await?; + + // If necessary, exhaust the reader to compute the hash. + if !hashes.is_empty() { + hasher.finish().await.map_err(Error::HashExhaustion)?; + } // Persist the temporary directory to the directory store. - let archive = self + let path = self .build_context .cache() .persist(temp_dir.into_path(), wheel_entry.path()) .await .map_err(Error::CacheRead)?; - Ok(archive) + Ok(Archive::new( + path, + hashers.into_iter().map(HashDigest::from).collect(), + )) } .instrument(info_span!("wheel", wheel = %dist)) }; + // Fetch the archive from the cache, or download it if necessary. let req = self.request(url.clone())?; let cache_control = match self.client.connectivity() { Connectivity::Online => CacheControl::from( @@ -391,6 +466,20 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context> CachedClientError::Client(err) => Error::Client(err), })?; + // If the archive is missing the required hashes, force a refresh. + let archive = if archive.has_digests(hashes) { + archive + } else { + self.client + .cached_client() + .skip_cache(self.request(url)?, &http_entry, download) + .await + .map_err(|err| match err { + CachedClientError::Callback(err) => err, + CachedClientError::Client(err) => Error::Client(err), + })? + }; + Ok(archive) } @@ -401,7 +490,8 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context> filename: &WheelFilename, wheel_entry: &CacheEntry, dist: &BuiltDist, - ) -> Result { + hashes: &[HashDigest], + ) -> Result { // Create an entry for the HTTP cache. let http_entry = wheel_entry.with_file(format!("{}.http", filename.stem())); @@ -427,16 +517,48 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context> file.seek(io::SeekFrom::Start(0)) .await .map_err(Error::CacheWrite)?; - uv_extract::seek::unzip(file, temp_dir.path()).await?; + + // If no hashes are required, parallelize the unzip operation. + let hashes = if hashes.is_empty() { + let file = file.into_std().await; + tokio::task::spawn_blocking({ + let target = temp_dir.path().to_owned(); + move || -> Result<(), uv_extract::Error> { + // Unzip the wheel into a temporary directory. + uv_extract::unzip(file, &target)?; + Ok(()) + } + }) + .await??; + + vec![] + } else { + // Create a hasher for each hash algorithm. + let algorithms = { + let mut hash = hashes.iter().map(HashDigest::algorithm).collect::>(); + hash.sort(); + hash.dedup(); + hash + }; + let mut hashers = algorithms.into_iter().map(Hasher::from).collect::>(); + let mut hasher = uv_extract::hash::HashReader::new(file, &mut hashers); + uv_extract::stream::unzip(&mut hasher, temp_dir.path()).await?; + + // If necessary, exhaust the reader to compute the hash. + hasher.finish().await.map_err(Error::HashExhaustion)?; + + hashers.into_iter().map(HashDigest::from).collect() + }; // Persist the temporary directory to the directory store. - let archive = self + let path = self .build_context .cache() .persist(temp_dir.into_path(), wheel_entry.path()) .await .map_err(Error::CacheRead)?; - Ok(archive) + + Ok(Archive::new(path, hashes)) } .instrument(info_span!("wheel", wheel = %dist)) }; @@ -451,7 +573,6 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context> ), Connectivity::Offline => CacheControl::AllowStale, }; - let archive = self .client .cached_client() @@ -462,6 +583,20 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context> CachedClientError::Client(err) => Error::Client(err), })?; + // If the archive is missing the required hashes, force a refresh. + let archive = if archive.has_digests(hashes) { + archive + } else { + self.client + .cached_client() + .skip_cache(self.request(url)?, &http_entry, download) + .await + .map_err(|err| match err { + CachedClientError::Callback(err) => err, + CachedClientError::Client(err) => Error::Client(err), + })? + }; + Ok(archive) } @@ -472,6 +607,7 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context> filename: &WheelFilename, wheel_entry: CacheEntry, dist: &BuiltDist, + hashes: &[HashDigest], ) -> Result { // Determine the last-modified time of the wheel. let modified = ArchiveTimestamp::from_file(path).map_err(Error::CacheRead)?; @@ -481,20 +617,66 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context> let archive = read_timestamped_archive(&archive_entry, modified)?; // If the file is already unzipped, and the cache is up-to-date, return it. - if let Some(archive) = archive { + if let Some(archive) = archive.filter(|archive| archive.has_digests(hashes)) { Ok(LocalWheel { dist: Dist::Built(dist.clone()), - archive, + archive: archive.path, + hashes: archive.hashes, filename: filename.clone(), }) - } else { + } else if hashes.is_empty() { // Otherwise, unzip the wheel. - let archive = self.unzip_wheel(path, wheel_entry.path()).await?; + let archive = Archive::new(self.unzip_wheel(path, wheel_entry.path()).await?, vec![]); write_timestamped_archive(&archive_entry, archive.clone(), modified).await?; Ok(LocalWheel { dist: Dist::Built(dist.clone()), - archive, + archive: archive.path, + hashes: archive.hashes, + filename: filename.clone(), + }) + } else { + // If necessary, compute the hashes of the wheel. + let file = fs_err::tokio::File::open(path) + .await + .map_err(Error::CacheRead)?; + let temp_dir = tempfile::tempdir_in(self.build_context.cache().root()) + .map_err(Error::CacheWrite)?; + + // Create a hasher for each hash algorithm. + let algorithms = { + let mut hash = hashes.iter().map(HashDigest::algorithm).collect::>(); + hash.sort(); + hash.dedup(); + hash + }; + let mut hashers = algorithms.into_iter().map(Hasher::from).collect::>(); + let mut hasher = uv_extract::hash::HashReader::new(file, &mut hashers); + + // Unzip the wheel to a temporary directory. + uv_extract::stream::unzip(&mut hasher, temp_dir.path()).await?; + + // Exhaust the reader to compute the hash. + hasher.finish().await.map_err(Error::HashExhaustion)?; + + // Persist the temporary directory to the directory store. + let archive = self + .build_context + .cache() + .persist(temp_dir.into_path(), wheel_entry.path()) + .await + .map_err(Error::CacheWrite)?; + + let hashes = hashers.into_iter().map(HashDigest::from).collect(); + + // Write the archive pointer to the cache. + let archive = Archive::new(archive, hashes); + write_timestamped_archive(&archive_entry, archive.clone(), modified).await?; + + Ok(LocalWheel { + dist: Dist::Built(dist.clone()), + archive: archive.path, + hashes: archive.hashes, filename: filename.clone(), }) } @@ -549,7 +731,7 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context> /// Write a timestamped archive path to the cache. async fn write_timestamped_archive( cache_entry: &CacheEntry, - data: PathBuf, + data: Archive, modified: ArchiveTimestamp, ) -> Result<(), Error> { write_atomic( @@ -564,13 +746,13 @@ async fn write_timestamped_archive( } /// Read an existing timestamped archive path, if it exists and is up-to-date. -fn read_timestamped_archive( +pub fn read_timestamped_archive( cache_entry: &CacheEntry, modified: ArchiveTimestamp, -) -> Result, Error> { +) -> Result, Error> { match fs_err::read(cache_entry.path()) { Ok(cached) => { - let cached = rmp_serde::from_slice::>(&cached)?; + let cached = rmp_serde::from_slice::>(&cached)?; if cached.timestamp == modified.timestamp() { return Ok(Some(cached.data)); } diff --git a/crates/uv-distribution/src/download.rs b/crates/uv-distribution/src/download.rs index b7123ff7c..68db0b722 100644 --- a/crates/uv-distribution/src/download.rs +++ b/crates/uv-distribution/src/download.rs @@ -1,8 +1,8 @@ use std::path::{Path, PathBuf}; use distribution_filename::WheelFilename; -use distribution_types::{CachedDist, Dist}; -use pypi_types::Metadata23; +use distribution_types::{CachedDist, Dist, Hashed}; +use pypi_types::{HashDigest, Metadata23}; use crate::Error; @@ -16,6 +16,8 @@ pub struct LocalWheel { /// The canonicalized path in the cache directory to which the wheel was downloaded. /// Typically, a directory within the archive bucket. pub(crate) archive: PathBuf, + /// The computed hashes of the wheel. + pub(crate) hashes: Vec, } impl LocalWheel { @@ -40,10 +42,16 @@ impl LocalWheel { } } +impl Hashed for LocalWheel { + fn hashes(&self) -> &[HashDigest] { + &self.hashes + } +} + /// Convert a [`LocalWheel`] into a [`CachedDist`]. impl From for CachedDist { fn from(wheel: LocalWheel) -> CachedDist { - CachedDist::from_remote(wheel.dist, wheel.filename, wheel.archive) + CachedDist::from_remote(wheel.dist, wheel.filename, wheel.hashes, wheel.archive) } } diff --git a/crates/uv-distribution/src/error.rs b/crates/uv-distribution/src/error.rs index 42bf4f48b..d5a77f81a 100644 --- a/crates/uv-distribution/src/error.rs +++ b/crates/uv-distribution/src/error.rs @@ -4,6 +4,7 @@ use zip::result::ZipError; use distribution_filename::WheelFilenameError; use pep440_rs::Version; +use pypi_types::HashDigest; use uv_client::BetterReqwestError; use uv_normalize::PackageName; @@ -81,6 +82,23 @@ pub enum Error { /// Should not occur; only seen when another task panicked. #[error("The task executor is broken, did some other task panic?")] Join(#[from] JoinError), + + /// An I/O error that occurs while exhausting a reader to compute a hash. + #[error("Failed to hash distribution")] + HashExhaustion(#[source] std::io::Error), + + #[error("Hash mismatch for {distribution}\n\nExpected:\n{expected}\n\nComputed:\n{actual}")] + HashMismatch { + distribution: String, + expected: String, + actual: String, + }, + + #[error("Hash-checking is not supported for local directories: {0}")] + HashesNotSupportedSourceTree(String), + + #[error("Hash-checking is not supported for Git repositories: {0}")] + HashesNotSupportedGit(String), } impl From for Error { @@ -99,3 +117,30 @@ impl From for Error { } } } + +impl Error { + /// Construct a hash mismatch error. + pub fn hash_mismatch( + distribution: String, + expected: &[HashDigest], + actual: &[HashDigest], + ) -> Error { + let expected = expected + .iter() + .map(|hash| format!(" {hash}")) + .collect::>() + .join("\n"); + + let actual = actual + .iter() + .map(|hash| format!(" {hash}")) + .collect::>() + .join("\n"); + + Self::HashMismatch { + distribution, + expected, + actual, + } + } +} diff --git a/crates/uv-distribution/src/index/built_wheel_index.rs b/crates/uv-distribution/src/index/built_wheel_index.rs index ca4f59b45..ea7fd1df6 100644 --- a/crates/uv-distribution/src/index/built_wheel_index.rs +++ b/crates/uv-distribution/src/index/built_wheel_index.rs @@ -1,7 +1,10 @@ -use distribution_types::{git_reference, DirectUrlSourceDist, GitSourceDist, PathSourceDist}; +use distribution_types::{ + git_reference, DirectUrlSourceDist, GitSourceDist, Hashed, PathSourceDist, +}; use platform_tags::Tags; use uv_cache::{ArchiveTimestamp, Cache, CacheBucket, CacheShard, WheelCache}; use uv_fs::symlinks; +use uv_types::RequiredHashes; use crate::index::cached_wheel::CachedWheel; use crate::source::{read_http_revision, read_timestamped_revision, REVISION}; @@ -12,12 +15,17 @@ use crate::Error; pub struct BuiltWheelIndex<'a> { cache: &'a Cache, tags: &'a Tags, + hashes: &'a RequiredHashes, } impl<'a> BuiltWheelIndex<'a> { /// Initialize an index of built distributions. - pub fn new(cache: &'a Cache, tags: &'a Tags) -> Self { - Self { cache, tags } + pub fn new(cache: &'a Cache, tags: &'a Tags, hashes: &'a RequiredHashes) -> Self { + Self { + cache, + tags, + hashes, + } } /// Return the most compatible [`CachedWheel`] for a given source distribution at a direct URL. @@ -31,13 +39,19 @@ impl<'a> BuiltWheelIndex<'a> { WheelCache::Url(source_dist.url.raw()).root(), ); - // Read the revision from the cache. There's no need to enforce freshness, since we - // enforce freshness on the entries. + // Read the revision from the cache. let revision_entry = cache_shard.entry(REVISION); let Some(revision) = read_http_revision(&revision_entry)? else { return Ok(None); }; + // Enforce hash-checking by omitting any wheels that don't satisfy the required hashes. + if let Some(hashes) = self.hashes.get(&source_dist.name) { + if !revision.satisfies(hashes) { + return Ok(None); + } + } + Ok(self.find(&cache_shard.shard(revision.id()))) } @@ -55,18 +69,29 @@ impl<'a> BuiltWheelIndex<'a> { return Err(Error::DirWithoutEntrypoint); }; - // Read the revision from the cache. There's no need to enforce freshness, since we - // enforce freshness on the entries. + // Read the revision from the cache. let revision_entry = cache_shard.entry(REVISION); let Some(revision) = read_timestamped_revision(&revision_entry, modified)? else { return Ok(None); }; + // Enforce hash-checking by omitting any wheels that don't satisfy the required hashes. + if let Some(hashes) = self.hashes.get(&source_dist.name) { + if !revision.satisfies(hashes) { + return Ok(None); + } + } + Ok(self.find(&cache_shard.shard(revision.id()))) } /// Return the most compatible [`CachedWheel`] for a given source distribution at a git URL. pub fn git(&self, source_dist: &GitSourceDist) -> Option { + // Enforce hash-checking, which isn't supported for Git distributions. + if self.hashes.get(&source_dist.name).is_some() { + return None; + } + let Ok(Some(git_sha)) = git_reference(&source_dist.url) else { return None; }; @@ -100,7 +125,7 @@ impl<'a> BuiltWheelIndex<'a> { // Unzipped wheels are stored as symlinks into the archive directory. for subdir in symlinks(shard) { - match CachedWheel::from_path(&subdir) { + match CachedWheel::from_built_source(&subdir) { None => {} Some(dist_info) => { // Pick the wheel with the highest priority diff --git a/crates/uv-distribution/src/index/cached_wheel.rs b/crates/uv-distribution/src/index/cached_wheel.rs index a8e4172aa..157a4cffa 100644 --- a/crates/uv-distribution/src/index/cached_wheel.rs +++ b/crates/uv-distribution/src/index/cached_wheel.rs @@ -1,9 +1,13 @@ use std::path::Path; use distribution_filename::WheelFilename; -use distribution_types::{CachedDirectUrlDist, CachedRegistryDist}; +use distribution_types::{CachedDirectUrlDist, CachedRegistryDist, Hashed}; use pep508_rs::VerbatimUrl; -use uv_cache::CacheEntry; +use pypi_types::HashDigest; +use uv_cache::{CacheEntry, CachedByTimestamp}; +use uv_client::DataWithCachePolicy; + +use crate::archive::Archive; #[derive(Debug, Clone)] pub struct CachedWheel { @@ -11,16 +15,23 @@ pub struct CachedWheel { pub filename: WheelFilename, /// The [`CacheEntry`] for the wheel. pub entry: CacheEntry, + /// The [`HashDigest`]s for the wheel. + pub hashes: Vec, } impl CachedWheel { /// Try to parse a distribution from a cached directory name (like `typing-extensions-4.8.0-py3-none-any`). - pub fn from_path(path: &Path) -> Option { + pub fn from_built_source(path: &Path) -> Option { let filename = path.file_name()?.to_str()?; let filename = WheelFilename::from_stem(filename).ok()?; let archive = path.canonicalize().ok()?; let entry = CacheEntry::from_path(archive); - Some(Self { filename, entry }) + let hashes = Vec::new(); + Some(Self { + filename, + entry, + hashes, + }) } /// Convert a [`CachedWheel`] into a [`CachedRegistryDist`]. @@ -28,6 +39,7 @@ impl CachedWheel { CachedRegistryDist { filename: self.filename, path: self.entry.into_path_buf(), + hashes: self.hashes, } } @@ -38,6 +50,56 @@ impl CachedWheel { url, path: self.entry.into_path_buf(), editable: false, + hashes: self.hashes, } } + + /// Read a cached wheel from a `.http` pointer (e.g., `anyio-4.0.0-py3-none-any.http`). + pub fn from_http_pointer(path: &Path) -> Option { + // Determine the wheel filename. + let filename = path.file_name()?.to_str()?; + let filename = WheelFilename::from_stem(filename).ok()?; + + // Read the pointer. + let file = fs_err::File::open(path).ok()?; + let data = DataWithCachePolicy::from_reader(file).ok()?.data; + let archive = rmp_serde::from_slice::(&data).ok()?; + + // Convert to a cached wheel. + let entry = CacheEntry::from_path(archive.path); + let hashes = archive.hashes; + Some(Self { + filename, + entry, + hashes, + }) + } + + /// Read a cached wheel from a `.rev` pointer (e.g., `anyio-4.0.0-py3-none-any.rev`). + pub fn from_revision_pointer(path: &Path) -> Option { + // Determine the wheel filename. + let filename = path.file_name()?.to_str()?; + let filename = WheelFilename::from_stem(filename).ok()?; + + // Read the pointer. + let cached = fs_err::read(path).ok()?; + let archive = rmp_serde::from_slice::>(&cached) + .ok()? + .data; + + // Convert to a cached wheel. + let entry = CacheEntry::from_path(archive.path); + let hashes = archive.hashes; + Some(Self { + filename, + entry, + hashes, + }) + } +} + +impl Hashed for CachedWheel { + fn hashes(&self) -> &[HashDigest] { + &self.hashes + } } diff --git a/crates/uv-distribution/src/index/registry_wheel_index.rs b/crates/uv-distribution/src/index/registry_wheel_index.rs index d1dbf251f..34e5a85c3 100644 --- a/crates/uv-distribution/src/index/registry_wheel_index.rs +++ b/crates/uv-distribution/src/index/registry_wheel_index.rs @@ -1,16 +1,16 @@ use std::collections::hash_map::Entry; use std::collections::BTreeMap; -use std::path::Path; use rustc_hash::FxHashMap; -use distribution_types::{CachedRegistryDist, FlatIndexLocation, IndexLocations, IndexUrl}; +use distribution_types::{CachedRegistryDist, FlatIndexLocation, Hashed, IndexLocations, IndexUrl}; use pep440_rs::Version; use pep508_rs::VerbatimUrl; use platform_tags::Tags; use uv_cache::{Cache, CacheBucket, WheelCache}; -use uv_fs::{directories, symlinks}; +use uv_fs::{directories, files, symlinks}; use uv_normalize::PackageName; +use uv_types::RequiredHashes; use crate::index::cached_wheel::CachedWheel; use crate::source::{read_http_revision, REVISION}; @@ -21,16 +21,23 @@ pub struct RegistryWheelIndex<'a> { cache: &'a Cache, tags: &'a Tags, index_locations: &'a IndexLocations, + hashes: &'a RequiredHashes, index: FxHashMap<&'a PackageName, BTreeMap>, } impl<'a> RegistryWheelIndex<'a> { /// Initialize an index of registry distributions. - pub fn new(cache: &'a Cache, tags: &'a Tags, index_locations: &'a IndexLocations) -> Self { + pub fn new( + cache: &'a Cache, + tags: &'a Tags, + index_locations: &'a IndexLocations, + hashes: &'a RequiredHashes, + ) -> Self { Self { cache, tags, index_locations, + hashes, index: FxHashMap::default(), } } @@ -65,6 +72,7 @@ impl<'a> RegistryWheelIndex<'a> { self.cache, self.tags, self.index_locations, + self.hashes, )), }; versions @@ -76,8 +84,10 @@ impl<'a> RegistryWheelIndex<'a> { cache: &Cache, tags: &Tags, index_locations: &IndexLocations, + hashes: &RequiredHashes, ) -> BTreeMap { let mut versions = BTreeMap::new(); + let hashes = hashes.get(package).unwrap_or_default(); // Collect into owned `IndexUrl` let flat_index_urls: Vec = index_locations @@ -100,7 +110,34 @@ impl<'a> RegistryWheelIndex<'a> { WheelCache::Index(index_url).wheel_dir(package.to_string()), ); - Self::add_directory(&wheel_dir, tags, &mut versions); + // For registry wheels, the cache structure is: `//.http` + // or `///.rev`. + for file in files(&wheel_dir) { + if file + .extension() + .is_some_and(|ext| ext.eq_ignore_ascii_case("http")) + { + if let Some(wheel) = CachedWheel::from_http_pointer(&wheel_dir.join(&file)) { + // Enforce hash-checking based on the built distribution. + if wheel.satisfies(hashes) { + Self::add_wheel(wheel, tags, &mut versions); + } + } + } + + if file + .extension() + .is_some_and(|ext| ext.eq_ignore_ascii_case("rev")) + { + if let Some(wheel) = CachedWheel::from_revision_pointer(&wheel_dir.join(&file)) + { + // Enforce hash-checking based on the built distribution. + if wheel.satisfies(hashes) { + Self::add_wheel(wheel, tags, &mut versions); + } + } + } + } // Index all the built wheels, created by downloading and building source distributions // from the registry. @@ -115,7 +152,14 @@ impl<'a> RegistryWheelIndex<'a> { let cache_shard = cache_shard.shard(shard); let revision_entry = cache_shard.entry(REVISION); if let Ok(Some(revision)) = read_http_revision(&revision_entry) { - Self::add_directory(cache_shard.join(revision.id()), tags, &mut versions); + // Enforce hash-checking based on the source distribution. + if revision.satisfies(hashes) { + for wheel_dir in symlinks(cache_shard.join(revision.id())) { + if let Some(wheel) = CachedWheel::from_built_source(&wheel_dir) { + Self::add_wheel(wheel, tags, &mut versions); + } + } + } }; } } @@ -123,33 +167,23 @@ impl<'a> RegistryWheelIndex<'a> { versions } - /// Add the wheels in a given directory to the index. - /// - /// Each subdirectory in the given path is expected to be that of an unzipped wheel. - fn add_directory( - path: impl AsRef, + /// Add the [`CachedWheel`] to the index. + fn add_wheel( + wheel: CachedWheel, tags: &Tags, versions: &mut BTreeMap, ) { - // Unzipped wheels are stored as symlinks into the archive directory. - for wheel_dir in symlinks(path.as_ref()) { - match CachedWheel::from_path(&wheel_dir) { - None => {} - Some(dist_info) => { - let dist_info = dist_info.into_registry_dist(); + let dist_info = wheel.into_registry_dist(); - // Pick the wheel with the highest priority - let compatibility = dist_info.filename.compatibility(tags); - if let Some(existing) = versions.get_mut(&dist_info.filename.version) { - // Override if we have better compatibility - if compatibility > existing.filename.compatibility(tags) { - *existing = dist_info; - } - } else if compatibility.is_compatible() { - versions.insert(dist_info.filename.version.clone(), dist_info); - } - } + // Pick the wheel with the highest priority + let compatibility = dist_info.filename.compatibility(tags); + if let Some(existing) = versions.get_mut(&dist_info.filename.version) { + // Override if we have better compatibility + if compatibility > existing.filename.compatibility(tags) { + *existing = dist_info; } + } else if compatibility.is_compatible() { + versions.insert(dist_info.filename.version.clone(), dist_info); } } } diff --git a/crates/uv-distribution/src/lib.rs b/crates/uv-distribution/src/lib.rs index f74b0fc9d..61eeb41a4 100644 --- a/crates/uv-distribution/src/lib.rs +++ b/crates/uv-distribution/src/lib.rs @@ -1,4 +1,5 @@ -pub use distribution_database::DistributionDatabase; +pub use archive::Archive; +pub use distribution_database::{read_timestamped_archive, DistributionDatabase}; pub use download::LocalWheel; pub use error::Error; pub use git::{is_same_reference, to_precise}; @@ -6,6 +7,7 @@ pub use index::{BuiltWheelIndex, RegistryWheelIndex}; pub use reporter::Reporter; pub use source::SourceDistributionBuilder; +mod archive; mod distribution_database; mod download; mod error; diff --git a/crates/uv-distribution/src/source/built_wheel_metadata.rs b/crates/uv-distribution/src/source/built_wheel_metadata.rs index 3115d9882..664e32f8f 100644 --- a/crates/uv-distribution/src/source/built_wheel_metadata.rs +++ b/crates/uv-distribution/src/source/built_wheel_metadata.rs @@ -2,19 +2,23 @@ use std::path::PathBuf; use std::str::FromStr; use distribution_filename::WheelFilename; +use distribution_types::Hashed; use platform_tags::Tags; +use pypi_types::HashDigest; use uv_cache::CacheShard; use uv_fs::files; /// The information about the wheel we either just built or got from the cache. #[derive(Debug, Clone)] -pub struct BuiltWheelMetadata { +pub(crate) struct BuiltWheelMetadata { /// The path to the built wheel. pub(crate) path: PathBuf, /// The expected path to the downloaded wheel's entry in the cache. pub(crate) target: PathBuf, /// The parsed filename. pub(crate) filename: WheelFilename, + /// The computed hashes of the source distribution from which the wheel was built. + pub(crate) hashes: Vec, } impl BuiltWheelMetadata { @@ -39,6 +43,20 @@ impl BuiltWheelMetadata { target: cache_shard.join(filename.stem()), path, filename, + hashes: vec![], }) } + + /// Set the computed hashes of the wheel. + #[must_use] + pub(crate) fn with_hashes(mut self, hashes: Vec) -> Self { + self.hashes = hashes; + self + } +} + +impl Hashed for BuiltWheelMetadata { + fn hashes(&self) -> &[HashDigest] { + &self.hashes + } } diff --git a/crates/uv-distribution/src/source/mod.rs b/crates/uv-distribution/src/source/mod.rs index dab44f313..deb37e753 100644 --- a/crates/uv-distribution/src/source/mod.rs +++ b/crates/uv-distribution/src/source/mod.rs @@ -16,12 +16,12 @@ use zip::ZipArchive; use distribution_filename::WheelFilename; use distribution_types::{ - BuildableSource, DirectArchiveUrl, Dist, FileLocation, GitSourceUrl, LocalEditable, + BuildableSource, DirectArchiveUrl, Dist, FileLocation, GitSourceUrl, Hashed, LocalEditable, PathSourceDist, PathSourceUrl, RemoteSource, SourceDist, SourceUrl, }; use install_wheel_rs::metadata::read_archive_metadata; use platform_tags::Tags; -use pypi_types::Metadata23; +use pypi_types::{HashDigest, Metadata23}; use uv_cache::{ ArchiveTimestamp, CacheBucket, CacheEntry, CacheShard, CachedByTimestamp, Freshness, WheelCache, }; @@ -29,6 +29,7 @@ use uv_client::{ CacheControl, CachedClientError, Connectivity, DataWithCachePolicy, RegistryClient, }; use uv_configuration::{BuildKind, NoBuild}; +use uv_extract::hash::Hasher; use uv_fs::write_atomic; use uv_types::{BuildContext, SourceBuildTrait}; @@ -49,9 +50,7 @@ pub struct SourceDistributionBuilder<'a, T: BuildContext> { } /// The name of the file that contains the revision ID, encoded via `MsgPack`. -/// -/// TODO(charlie): Update the filename whenever we bump the cache version. -pub(crate) const REVISION: &str = "manifest.msgpack"; +pub(crate) const REVISION: &str = "revision.msgpack"; /// The name of the file that contains the cached distribution metadata, encoded via `MsgPack`. pub(crate) const METADATA: &str = "metadata.msgpack"; @@ -76,10 +75,11 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> { } /// Download and build a [`SourceDist`]. - pub async fn download_and_build( + pub(super) async fn download_and_build( &self, source: &BuildableSource<'_>, tags: &Tags, + hashes: &[HashDigest], ) -> Result { let built_wheel_metadata = match &source { BuildableSource::Dist(SourceDist::Registry(dist)) => { @@ -100,6 +100,7 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> { path: Cow::Borrowed(path), }, tags, + hashes, ) .boxed() .await; @@ -115,9 +116,17 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> { .join(dist.filename.version.to_string()), ); - self.url(source, &dist.file.filename, &url, &cache_shard, None, tags) - .boxed() - .await? + self.url( + source, + &dist.file.filename, + &url, + &cache_shard, + None, + tags, + hashes, + ) + .boxed() + .await? } BuildableSource::Dist(SourceDist::DirectUrl(dist)) => { let filename = dist.filename().expect("Distribution must have a filename"); @@ -136,22 +145,23 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> { &cache_shard, subdirectory.as_deref(), tags, + hashes, ) .boxed() .await? } BuildableSource::Dist(SourceDist::Git(dist)) => { - self.git(source, &GitSourceUrl::from(dist), tags) + self.git(source, &GitSourceUrl::from(dist), tags, hashes) .boxed() .await? } BuildableSource::Dist(SourceDist::Path(dist)) => { if dist.path.is_dir() { - self.source_tree(source, &PathSourceUrl::from(dist), tags) + self.source_tree(source, &PathSourceUrl::from(dist), tags, hashes) .boxed() .await? } else { - self.archive(source, &PathSourceUrl::from(dist), tags) + self.archive(source, &PathSourceUrl::from(dist), tags, hashes) .boxed() .await? } @@ -176,18 +186,21 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> { &cache_shard, subdirectory.as_deref(), tags, + hashes, ) .boxed() .await? } BuildableSource::Url(SourceUrl::Git(resource)) => { - self.git(source, resource, tags).boxed().await? + self.git(source, resource, tags, hashes).boxed().await? } BuildableSource::Url(SourceUrl::Path(resource)) => { if resource.path.is_dir() { - self.source_tree(source, resource, tags).boxed().await? + self.source_tree(source, resource, tags, hashes) + .boxed() + .await? } else { - self.archive(source, resource, tags).boxed().await? + self.archive(source, resource, tags, hashes).boxed().await? } } }; @@ -198,9 +211,10 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> { /// Download a [`SourceDist`] and determine its metadata. This typically involves building the /// source distribution into a wheel; however, some build backends support determining the /// metadata without building the source distribution. - pub async fn download_and_build_metadata( + pub(super) async fn download_and_build_metadata( &self, source: &BuildableSource<'_>, + hashes: &[HashDigest], ) -> Result { let metadata = match &source { BuildableSource::Dist(SourceDist::Registry(dist)) => { @@ -220,6 +234,7 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> { url: &url, path: Cow::Borrowed(path), }, + hashes, ) .boxed() .await; @@ -234,9 +249,16 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> { .join(dist.filename.version.to_string()), ); - self.url_metadata(source, &dist.file.filename, &url, &cache_shard, None) - .boxed() - .await? + self.url_metadata( + source, + &dist.file.filename, + &url, + &cache_shard, + None, + hashes, + ) + .boxed() + .await? } BuildableSource::Dist(SourceDist::DirectUrl(dist)) => { let filename = dist.filename().expect("Distribution must have a filename"); @@ -254,22 +276,23 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> { &url, &cache_shard, subdirectory.as_deref(), + hashes, ) .boxed() .await? } BuildableSource::Dist(SourceDist::Git(dist)) => { - self.git_metadata(source, &GitSourceUrl::from(dist)) + self.git_metadata(source, &GitSourceUrl::from(dist), hashes) .boxed() .await? } BuildableSource::Dist(SourceDist::Path(dist)) => { if dist.path.is_dir() { - self.source_tree_metadata(source, &PathSourceUrl::from(dist)) + self.source_tree_metadata(source, &PathSourceUrl::from(dist), hashes) .boxed() .await? } else { - self.archive_metadata(source, &PathSourceUrl::from(dist)) + self.archive_metadata(source, &PathSourceUrl::from(dist), hashes) .boxed() .await? } @@ -293,18 +316,23 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> { &url, &cache_shard, subdirectory.as_deref(), + hashes, ) .boxed() .await? } BuildableSource::Url(SourceUrl::Git(resource)) => { - self.git_metadata(source, resource).boxed().await? + self.git_metadata(source, resource, hashes).boxed().await? } BuildableSource::Url(SourceUrl::Path(resource)) => { if resource.path.is_dir() { - self.source_tree_metadata(source, resource).boxed().await? + self.source_tree_metadata(source, resource, hashes) + .boxed() + .await? } else { - self.archive_metadata(source, resource).boxed().await? + self.archive_metadata(source, resource, hashes) + .boxed() + .await? } } }; @@ -322,19 +350,29 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> { cache_shard: &CacheShard, subdirectory: Option<&'data Path>, tags: &Tags, + hashes: &[HashDigest], ) -> Result { // Fetch the revision for the source distribution. let revision = self - .url_revision(source, filename, url, cache_shard) + .url_revision(source, filename, url, cache_shard, hashes) .await?; + // Before running the build, check that the hashes match. + if !revision.satisfies(hashes) { + return Err(Error::hash_mismatch( + source.to_string(), + hashes, + revision.hashes(), + )); + } + // Scope all operations to the revision. Within the revision, there's no need to check for // freshness, since entries have to be fresher than the revision itself. let cache_shard = cache_shard.shard(revision.id()); // If the cache contains a compatible wheel, return it. if let Some(built_wheel) = BuiltWheelMetadata::find_in_cache(tags, &cache_shard) { - return Ok(built_wheel); + return Ok(built_wheel.with_hashes(revision.into_hashes())); } let task = self @@ -364,6 +402,7 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> { path: cache_shard.join(&disk_filename), target: cache_shard.join(wheel_filename.stem()), filename: wheel_filename, + hashes: revision.into_hashes(), }) } @@ -379,12 +418,22 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> { url: &'data Url, cache_shard: &CacheShard, subdirectory: Option<&'data Path>, + hashes: &[HashDigest], ) -> Result { // Fetch the revision for the source distribution. let revision = self - .url_revision(source, filename, url, cache_shard) + .url_revision(source, filename, url, cache_shard, hashes) .await?; + // Before running the build, check that the hashes match. + if !revision.satisfies(hashes) { + return Err(Error::hash_mismatch( + source.to_string(), + hashes, + revision.hashes(), + )); + } + // Scope all operations to the revision. Within the revision, there's no need to check for // freshness, since entries have to be fresher than the revision itself. let cache_shard = cache_shard.shard(revision.id()); @@ -449,6 +498,7 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> { filename: &str, url: &Url, cache_shard: &CacheShard, + hashes: &[HashDigest], ) -> Result { let cache_entry = cache_shard.entry(REVISION); let cache_control = match self.client.connectivity() { @@ -469,24 +519,40 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> { // Download the source distribution. debug!("Downloading source distribution: {source}"); - let source_dist_entry = cache_shard.shard(revision.id()).entry(filename); - self.persist_url(response, source, filename, &source_dist_entry) + let entry = cache_shard.shard(revision.id()).entry(filename); + let hashes = self + .download_archive(response, source, filename, entry.path(), hashes) .await?; - Ok(revision) + Ok(revision.with_hashes(hashes)) } .boxed() .instrument(info_span!("download", source_dist = %source)) }; let req = self.request(url.clone())?; - self.client + let revision = self + .client .cached_client() .get_serde(req, &cache_entry, cache_control, download) .await .map_err(|err| match err { CachedClientError::Callback(err) => err, CachedClientError::Client(err) => Error::Client(err), - }) + })?; + + // If the archive is missing the required hashes, force a refresh. + if revision.has_digests(hashes) { + Ok(revision) + } else { + self.client + .cached_client() + .skip_cache(self.request(url.clone())?, &cache_entry, download) + .await + .map_err(|err| match err { + CachedClientError::Callback(err) => err, + CachedClientError::Client(err) => Error::Client(err), + }) + } } /// Build a source distribution from a local archive (e.g., `.tar.gz` or `.zip`). @@ -495,6 +561,7 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> { source: &BuildableSource<'_>, resource: &PathSourceUrl<'_>, tags: &Tags, + hashes: &[HashDigest], ) -> Result { let cache_shard = self.build_context.cache().shard( CacheBucket::BuiltWheels, @@ -503,9 +570,18 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> { // Fetch the revision for the source distribution. let revision = self - .archive_revision(source, resource, &cache_shard) + .archive_revision(source, resource, &cache_shard, hashes) .await?; + // Before running the build, check that the hashes match. + if !revision.satisfies(hashes) { + return Err(Error::hash_mismatch( + source.to_string(), + hashes, + revision.hashes(), + )); + } + // Scope all operations to the revision. Within the revision, there's no need to check for // freshness, since entries have to be fresher than the revision itself. let cache_shard = cache_shard.shard(revision.id()); @@ -543,6 +619,7 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> { path: cache_shard.join(&disk_filename), target: cache_shard.join(filename.stem()), filename, + hashes: revision.into_hashes(), }) } @@ -554,6 +631,7 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> { &self, source: &BuildableSource<'_>, resource: &PathSourceUrl<'_>, + hashes: &[HashDigest], ) -> Result { let cache_shard = self.build_context.cache().shard( CacheBucket::BuiltWheels, @@ -562,9 +640,18 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> { // Fetch the revision for the source distribution. let revision = self - .archive_revision(source, resource, &cache_shard) + .archive_revision(source, resource, &cache_shard, hashes) .await?; + // Before running the build, check that the hashes match. + if !revision.satisfies(hashes) { + return Err(Error::hash_mismatch( + source.to_string(), + hashes, + revision.hashes(), + )); + } + // Scope all operations to the revision. Within the revision, there's no need to check for // freshness, since entries have to be fresher than the revision itself. let cache_shard = cache_shard.shard(revision.id()); @@ -627,6 +714,7 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> { source: &BuildableSource<'_>, resource: &PathSourceUrl<'_>, cache_shard: &CacheShard, + hashes: &[HashDigest], ) -> Result { // Determine the last-modified time of the source distribution. let modified = ArchiveTimestamp::from_file(&resource.path).map_err(Error::CacheRead)?; @@ -637,7 +725,9 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> { // If the revision already exists, return it. There's no need to check for freshness, since // we use an exact timestamp. if let Some(revision) = read_timestamped_revision(&revision_entry, modified)? { - return Ok(revision); + if revision.has_digests(hashes) { + return Ok(revision); + } } // Otherwise, we need to create a new revision. @@ -646,7 +736,10 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> { // Unzip the archive to a temporary directory. debug!("Unpacking source distribution: {source}"); let entry = cache_shard.shard(revision.id()).entry("source"); - self.persist_archive(&resource.path, source, &entry).await?; + let hashes = self + .persist_archive(&resource.path, entry.path(), hashes) + .await?; + let revision = revision.with_hashes(hashes); // Persist the revision. write_atomic( @@ -668,7 +761,13 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> { source: &BuildableSource<'_>, resource: &PathSourceUrl<'_>, tags: &Tags, + hashes: &[HashDigest], ) -> Result { + // Before running the build, check that the hashes match. + if !hashes.is_empty() { + return Err(Error::HashesNotSupportedSourceTree(source.to_string())); + } + let cache_shard = self.build_context.cache().shard( CacheBucket::BuiltWheels, WheelCache::Path(resource.url).root(), @@ -714,6 +813,7 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> { path: cache_shard.join(&disk_filename), target: cache_shard.join(filename.stem()), filename, + hashes: vec![], }) } @@ -725,7 +825,13 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> { &self, source: &BuildableSource<'_>, resource: &PathSourceUrl<'_>, + hashes: &[HashDigest], ) -> Result { + // Before running the build, check that the hashes match. + if !hashes.is_empty() { + return Err(Error::HashesNotSupportedSourceTree(source.to_string())); + } + let cache_shard = self.build_context.cache().shard( CacheBucket::BuiltWheels, WheelCache::Path(resource.url).root(), @@ -742,16 +848,9 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> { // If the cache contains compatible metadata, return it. let metadata_entry = cache_shard.entry(METADATA); - if self - .build_context - .cache() - .freshness(&metadata_entry, source.name()) - .is_ok_and(Freshness::is_fresh) - { - if let Some(metadata) = read_cached_metadata(&metadata_entry).await? { - debug!("Using cached metadata for: {source}"); - return Ok(metadata); - } + if let Some(metadata) = read_cached_metadata(&metadata_entry).await? { + debug!("Using cached metadata for: {source}"); + return Ok(metadata); } // If the backend supports `prepare_metadata_for_build_wheel`, use it. @@ -828,7 +927,13 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> { source: &BuildableSource<'_>, resource: &GitSourceUrl<'_>, tags: &Tags, + hashes: &[HashDigest], ) -> Result { + // Before running the build, check that the hashes match. + if !hashes.is_empty() { + return Err(Error::HashesNotSupportedGit(source.to_string())); + } + // Resolve to a precise Git SHA. let url = if let Some(url) = resolve_precise( resource.url, @@ -882,6 +987,7 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> { path: cache_shard.join(&disk_filename), target: cache_shard.join(filename.stem()), filename, + hashes: vec![], }) } @@ -893,7 +999,13 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> { &self, source: &BuildableSource<'_>, resource: &GitSourceUrl<'_>, + hashes: &[HashDigest], ) -> Result { + // Before running the build, check that the hashes match. + if !hashes.is_empty() { + return Err(Error::HashesNotSupportedGit(source.to_string())); + } + // Resolve to a precise Git SHA. let url = if let Some(url) = resolve_precise( resource.url, @@ -975,21 +1087,14 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> { } /// Download and unzip a source distribution into the cache from an HTTP response. - async fn persist_url( + async fn download_archive( &self, response: Response, source: &BuildableSource<'_>, filename: &str, - cache_entry: &CacheEntry, - ) -> Result<(), Error> { - let cache_path = cache_entry.path(); - if cache_path.is_dir() { - debug!("Distribution is already cached: {source}"); - return Ok(()); - } - - // Download and unzip the source distribution into a temporary directory. - let span = info_span!("persist_url", filename = filename, source_dist = %source); + target: &Path, + hashes: &[HashDigest], + ) -> Result, Error> { let temp_dir = tempfile::tempdir_in(self.build_context.cache().bucket(CacheBucket::BuiltWheels)) .map_err(Error::CacheWrite)?; @@ -997,9 +1102,29 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> { .bytes_stream() .map_err(|err| std::io::Error::new(std::io::ErrorKind::Other, err)) .into_async_read(); - uv_extract::stream::archive(reader.compat(), filename, temp_dir.path()).await?; + + // Create a hasher for each hash algorithm. + let algorithms = { + let mut hash = hashes.iter().map(HashDigest::algorithm).collect::>(); + hash.sort(); + hash.dedup(); + hash + }; + let mut hashers = algorithms.into_iter().map(Hasher::from).collect::>(); + let mut hasher = uv_extract::hash::HashReader::new(reader.compat(), &mut hashers); + + // Download and unzip the source distribution into a temporary directory. + let span = info_span!("download_source_dist", filename = filename, source_dist = %source); + uv_extract::stream::archive(&mut hasher, filename, temp_dir.path()).await?; drop(span); + // If necessary, exhaust the reader to compute the hash. + if !hashes.is_empty() { + hasher.finish().await.map_err(Error::HashExhaustion)?; + } + + let hashes = hashers.into_iter().map(HashDigest::from).collect(); + // Extract the top-level directory. let extracted = match uv_extract::strip_component(temp_dir.path()) { Ok(top_level) => top_level, @@ -1008,39 +1133,51 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> { }; // Persist it to the cache. - fs_err::tokio::create_dir_all(cache_path.parent().expect("Cache entry to have parent")) + fs_err::tokio::create_dir_all(target.parent().expect("Cache entry to have parent")) .await .map_err(Error::CacheWrite)?; - fs_err::tokio::rename(extracted, &cache_path) + fs_err::tokio::rename(extracted, target) .await .map_err(Error::CacheWrite)?; - Ok(()) + Ok(hashes) } /// Extract a local archive, and store it at the given [`CacheEntry`]. async fn persist_archive( &self, path: &Path, - source: &BuildableSource<'_>, - cache_entry: &CacheEntry, - ) -> Result<(), Error> { - let cache_path = cache_entry.path(); - if cache_path.is_dir() { - debug!("Distribution is already cached: {source}"); - return Ok(()); - } - + target: &Path, + hashes: &[HashDigest], + ) -> Result, Error> { debug!("Unpacking for build: {}", path.display()); - // Unzip the archive into a temporary directory. let temp_dir = tempfile::tempdir_in(self.build_context.cache().bucket(CacheBucket::BuiltWheels)) .map_err(Error::CacheWrite)?; let reader = fs_err::tokio::File::open(&path) .await .map_err(Error::CacheRead)?; - uv_extract::seek::archive(reader, path, &temp_dir.path()).await?; + + // Create a hasher for each hash algorithm. + let algorithms = { + let mut hash = hashes.iter().map(HashDigest::algorithm).collect::>(); + hash.sort(); + hash.dedup(); + hash + }; + let mut hashers = algorithms.into_iter().map(Hasher::from).collect::>(); + let mut hasher = uv_extract::hash::HashReader::new(reader, &mut hashers); + + // Unzip the archive into a temporary directory. + uv_extract::stream::archive(&mut hasher, path, &temp_dir.path()).await?; + + // If necessary, exhaust the reader to compute the hash. + if !hashes.is_empty() { + hasher.finish().await.map_err(Error::HashExhaustion)?; + } + + let hashes = hashers.into_iter().map(HashDigest::from).collect(); // Extract the top-level directory from the archive. let extracted = match uv_extract::strip_component(temp_dir.path()) { @@ -1050,14 +1187,14 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> { }; // Persist it to the cache. - fs_err::tokio::create_dir_all(cache_path.parent().expect("Cache entry to have parent")) + fs_err::tokio::create_dir_all(target.parent().expect("Cache entry to have parent")) .await .map_err(Error::CacheWrite)?; - fs_err::tokio::rename(extracted, &cache_path) + fs_err::tokio::rename(extracted, &target) .await .map_err(Error::CacheWrite)?; - Ok(()) + Ok(hashes) } /// Build a source distribution, storing the built wheel in the cache. diff --git a/crates/uv-distribution/src/source/revision.rs b/crates/uv-distribution/src/source/revision.rs index b2f6d5b9a..aadc2945a 100644 --- a/crates/uv-distribution/src/source/revision.rs +++ b/crates/uv-distribution/src/source/revision.rs @@ -1,5 +1,8 @@ +use distribution_types::Hashed; use serde::{Deserialize, Serialize}; +use pypi_types::HashDigest; + /// The [`Revision`] is a thin wrapper around a unique identifier for the source distribution. /// /// A revision represents a unique version of a source distribution, at a level more granular than @@ -7,16 +10,45 @@ use serde::{Deserialize, Serialize}; /// at a URL or a local file path may have multiple revisions, each representing a unique state of /// the distribution, despite the reported version number remaining the same. #[derive(Debug, Clone, Serialize, Deserialize)] -pub(crate) struct Revision(String); +pub(crate) struct Revision { + id: String, + hashes: Vec, +} impl Revision { /// Initialize a new [`Revision`] with a random UUID. pub(crate) fn new() -> Self { - Self(nanoid::nanoid!()) + Self { + id: nanoid::nanoid!(), + hashes: vec![], + } } - /// Return the unique ID of the revision. + /// Return the unique ID of the manifest. pub(crate) fn id(&self) -> &str { - &self.0 + &self.id + } + + /// Return the computed hashes of the archive. + pub(crate) fn hashes(&self) -> &[HashDigest] { + &self.hashes + } + + /// Return the computed hashes of the archive. + pub(crate) fn into_hashes(self) -> Vec { + self.hashes + } + + /// Set the computed hashes of the archive. + #[must_use] + pub(crate) fn with_hashes(mut self, hashes: Vec) -> Self { + self.hashes = hashes; + self + } +} + +impl Hashed for Revision { + fn hashes(&self) -> &[HashDigest] { + &self.hashes } } diff --git a/crates/uv-extract/Cargo.toml b/crates/uv-extract/Cargo.toml index f40dee266..0c3dbb8fb 100644 --- a/crates/uv-extract/Cargo.toml +++ b/crates/uv-extract/Cargo.toml @@ -13,12 +13,16 @@ license = { workspace = true } workspace = true [dependencies] +pypi-types = { workspace = true } + async-compression = { workspace = true, features = ["gzip", "zstd"] } async_zip = { workspace = true, features = ["tokio"] } fs-err = { workspace = true, features = ["tokio"] } futures = { workspace = true } +md-5.workspace = true rayon = { workspace = true } rustc-hash = { workspace = true } +sha2 = { workspace = true } thiserror = { workspace = true } tokio = { workspace = true, features = ["io-util"] } tokio-tar = { workspace = true } diff --git a/crates/uv-extract/src/hash.rs b/crates/uv-extract/src/hash.rs new file mode 100644 index 000000000..22072f7c8 --- /dev/null +++ b/crates/uv-extract/src/hash.rs @@ -0,0 +1,146 @@ +use std::pin::Pin; +use std::task::{Context, Poll}; + +use sha2::Digest; +use tokio::io::{AsyncReadExt, ReadBuf}; + +use pypi_types::{HashAlgorithm, HashDigest}; + +pub struct Sha256Reader<'a, R> { + reader: R, + hasher: &'a mut sha2::Sha256, +} + +impl<'a, R> Sha256Reader<'a, R> +where + R: tokio::io::AsyncRead + Unpin, +{ + pub fn new(reader: R, hasher: &'a mut sha2::Sha256) -> Self { + Sha256Reader { reader, hasher } + } +} + +impl<'a, R> tokio::io::AsyncRead for Sha256Reader<'a, R> +where + R: tokio::io::AsyncRead + Unpin, +{ + fn poll_read( + mut self: Pin<&mut Self>, + cx: &mut Context<'_>, + buf: &mut ReadBuf<'_>, + ) -> Poll> { + let reader = Pin::new(&mut self.reader); + match reader.poll_read(cx, buf) { + Poll::Ready(Ok(())) => { + self.hasher.update(buf.filled()); + Poll::Ready(Ok(())) + } + other => other, + } + } +} + +#[derive(Debug)] +pub enum Hasher { + Md5(md5::Md5), + Sha256(sha2::Sha256), + Sha384(sha2::Sha384), + Sha512(sha2::Sha512), +} + +impl Hasher { + pub fn update(&mut self, data: &[u8]) { + match self { + Hasher::Md5(hasher) => hasher.update(data), + Hasher::Sha256(hasher) => hasher.update(data), + Hasher::Sha384(hasher) => hasher.update(data), + Hasher::Sha512(hasher) => hasher.update(data), + } + } + + pub fn finalize(self) -> Vec { + match self { + Hasher::Md5(hasher) => hasher.finalize().to_vec(), + Hasher::Sha256(hasher) => hasher.finalize().to_vec(), + Hasher::Sha384(hasher) => hasher.finalize().to_vec(), + Hasher::Sha512(hasher) => hasher.finalize().to_vec(), + } + } +} + +impl From for Hasher { + fn from(algorithm: HashAlgorithm) -> Self { + match algorithm { + HashAlgorithm::Md5 => Hasher::Md5(md5::Md5::new()), + HashAlgorithm::Sha256 => Hasher::Sha256(sha2::Sha256::new()), + HashAlgorithm::Sha384 => Hasher::Sha384(sha2::Sha384::new()), + HashAlgorithm::Sha512 => Hasher::Sha512(sha2::Sha512::new()), + } + } +} + +impl From for HashDigest { + fn from(hasher: Hasher) -> Self { + match hasher { + Hasher::Md5(hasher) => HashDigest { + algorithm: HashAlgorithm::Md5, + digest: format!("{:x}", hasher.finalize()).into_boxed_str(), + }, + Hasher::Sha256(hasher) => HashDigest { + algorithm: HashAlgorithm::Sha256, + digest: format!("{:x}", hasher.finalize()).into_boxed_str(), + }, + Hasher::Sha384(hasher) => HashDigest { + algorithm: HashAlgorithm::Sha384, + digest: format!("{:x}", hasher.finalize()).into_boxed_str(), + }, + Hasher::Sha512(hasher) => HashDigest { + algorithm: HashAlgorithm::Sha512, + digest: format!("{:x}", hasher.finalize()).into_boxed_str(), + }, + } + } +} + +pub struct HashReader<'a, R> { + reader: R, + hashers: &'a mut [Hasher], +} + +impl<'a, R> HashReader<'a, R> +where + R: tokio::io::AsyncRead + Unpin, +{ + pub fn new(reader: R, hashers: &'a mut [Hasher]) -> Self { + HashReader { reader, hashers } + } + + /// Exhaust the underlying reader. + pub async fn finish(&mut self) -> Result<(), std::io::Error> { + while self.read(&mut vec![0; 8192]).await? > 0 {} + + Ok(()) + } +} + +impl<'a, R> tokio::io::AsyncRead for HashReader<'a, R> +where + R: tokio::io::AsyncRead + Unpin, +{ + fn poll_read( + mut self: Pin<&mut Self>, + cx: &mut Context<'_>, + buf: &mut ReadBuf<'_>, + ) -> Poll> { + let reader = Pin::new(&mut self.reader); + match reader.poll_read(cx, buf) { + Poll::Ready(Ok(())) => { + for hasher in self.hashers.iter_mut() { + hasher.update(buf.filled()); + } + Poll::Ready(Ok(())) + } + other => other, + } + } +} diff --git a/crates/uv-extract/src/lib.rs b/crates/uv-extract/src/lib.rs index 20d433071..192aaa8e0 100644 --- a/crates/uv-extract/src/lib.rs +++ b/crates/uv-extract/src/lib.rs @@ -2,6 +2,7 @@ pub use error::Error; pub use sync::*; mod error; +pub mod hash; pub mod seek; pub mod stream; mod sync; diff --git a/crates/uv-extract/src/stream.rs b/crates/uv-extract/src/stream.rs index f9ac12148..e73db2ae4 100644 --- a/crates/uv-extract/src/stream.rs +++ b/crates/uv-extract/src/stream.rs @@ -161,7 +161,8 @@ pub async fn untar_gz( let mut archive = tokio_tar::ArchiveBuilder::new(decompressed_bytes) .set_preserve_mtime(false) .build(); - Ok(untar_in(&mut archive, target.as_ref()).await?) + untar_in(&mut archive, target.as_ref()).await?; + Ok(()) } /// Unzip a `.tar.zst` archive into the target directory, without requiring `Seek`. diff --git a/crates/uv-installer/Cargo.toml b/crates/uv-installer/Cargo.toml index 413664f62..789fba48d 100644 --- a/crates/uv-installer/Cargo.toml +++ b/crates/uv-installer/Cargo.toml @@ -22,6 +22,7 @@ pypi-types = { workspace = true } requirements-txt = { workspace = true } uv-cache = { workspace = true } uv-client = { workspace = true } +uv-configuration = { workspace = true } uv-distribution = { workspace = true } uv-extract = { workspace = true } uv-fs = { workspace = true } @@ -29,13 +30,13 @@ uv-interpreter = { workspace = true } uv-normalize = { workspace = true } uv-types = { workspace = true } uv-warnings = { workspace = true } -uv-configuration = { workspace = true } anyhow = { workspace = true } async-channel = { workspace = true } fs-err = { workspace = true } futures = { workspace = true } rayon = { workspace = true } +rmp-serde = { workspace = true } rustc-hash = { workspace = true } serde = { workspace = true } tempfile = { workspace = true } diff --git a/crates/uv-installer/src/downloader.rs b/crates/uv-installer/src/downloader.rs index bba3c2fa7..1c60f46e5 100644 --- a/crates/uv-installer/src/downloader.rs +++ b/crates/uv-installer/src/downloader.rs @@ -8,13 +8,14 @@ use tracing::instrument; use url::Url; use distribution_types::{ - BuildableSource, CachedDist, Dist, Identifier, LocalEditable, LocalEditables, RemoteSource, + BuildableSource, CachedDist, Dist, Hashed, Identifier, LocalEditable, LocalEditables, Name, + RemoteSource, }; use platform_tags::Tags; use uv_cache::Cache; use uv_client::RegistryClient; -use uv_distribution::DistributionDatabase; -use uv_types::{BuildContext, InFlight}; +use uv_distribution::{DistributionDatabase, LocalWheel}; +use uv_types::{BuildContext, InFlight, RequiredHashes}; use crate::editable::BuiltEditable; @@ -39,6 +40,7 @@ pub enum Error { pub struct Downloader<'a, Context: BuildContext + Send + Sync> { tags: &'a Tags, cache: &'a Cache, + hashes: &'a RequiredHashes, database: DistributionDatabase<'a, Context>, reporter: Option>, } @@ -47,12 +49,14 @@ impl<'a, Context: BuildContext + Send + Sync> Downloader<'a, Context> { pub fn new( cache: &'a Cache, tags: &'a Tags, + hashes: &'a RequiredHashes, client: &'a RegistryClient, build_context: &'a Context, ) -> Self { Self { tags, cache, + hashes, database: DistributionDatabase::new(client, build_context), reporter: None, } @@ -65,6 +69,7 @@ impl<'a, Context: BuildContext + Send + Sync> Downloader<'a, Context> { Self { tags: self.tags, cache: self.cache, + hashes: self.hashes, database: self.database.with_reporter(Facade::from(reporter.clone())), reporter: Some(reporter.clone()), } @@ -165,12 +170,27 @@ impl<'a, Context: BuildContext + Send + Sync> Downloader<'a, Context> { pub async fn get_wheel(&self, dist: Dist, in_flight: &InFlight) -> Result { let id = dist.distribution_id(); if in_flight.downloads.register(id.clone()) { + let hashes = self.hashes.get(dist.name()).unwrap_or_default(); let result = self .database - .get_or_build_wheel(&dist, self.tags) + .get_or_build_wheel(&dist, self.tags, hashes) .boxed() .map_err(|err| Error::Fetch(dist.clone(), err)) .await + .and_then(|wheel: LocalWheel| { + if wheel.satisfies(hashes) { + Ok(wheel) + } else { + Err(Error::Fetch( + dist.clone(), + uv_distribution::Error::hash_mismatch( + dist.to_string(), + hashes, + wheel.hashes(), + ), + )) + } + }) .map(CachedDist::from); match result { Ok(cached) => { diff --git a/crates/uv-installer/src/plan.rs b/crates/uv-installer/src/plan.rs index 25f74a48d..2151194ae 100644 --- a/crates/uv-installer/src/plan.rs +++ b/crates/uv-installer/src/plan.rs @@ -6,6 +6,7 @@ use anyhow::{bail, Result}; use rustc_hash::FxHashMap; use tracing::{debug, warn}; +use distribution_types::Hashed; use distribution_types::{ BuiltDist, CachedDirectUrlDist, CachedDist, Dist, IndexLocations, InstalledDist, InstalledMetadata, InstalledVersion, Name, SourceDist, @@ -13,10 +14,12 @@ use distribution_types::{ use pep508_rs::{Requirement, VersionOrUrl}; use platform_tags::Tags; use uv_cache::{ArchiveTarget, ArchiveTimestamp, Cache, CacheBucket, WheelCache}; +use uv_client::DataWithCachePolicy; use uv_configuration::{NoBinary, Reinstall}; -use uv_distribution::{BuiltWheelIndex, RegistryWheelIndex}; +use uv_distribution::{read_timestamped_archive, Archive, BuiltWheelIndex, RegistryWheelIndex}; use uv_fs::Simplified; use uv_interpreter::PythonEnvironment; +use uv_types::RequiredHashes; use crate::{ResolvedEditable, SitePackages}; @@ -53,20 +56,25 @@ impl<'a> Planner<'a> { /// plan will respect cache entries created after the current time (as per the [`Refresh`] /// policy). Otherwise, entries will be ignored. The downstream distribution database may still /// read those entries from the cache after revalidating them. + /// + /// The install plan will also respect the required hashes, such that it will never return a + /// cached distribution that does not match the required hash. Like pip, though, it _will_ + /// return an _installed_ distribution that does not match the required hash. #[allow(clippy::too_many_arguments)] pub fn build( self, mut site_packages: SitePackages<'_>, reinstall: &Reinstall, no_binary: &NoBinary, + hashes: &RequiredHashes, index_locations: &IndexLocations, cache: &Cache, venv: &PythonEnvironment, tags: &Tags, ) -> Result { // Index all the already-downloaded wheels in the cache. - let mut registry_index = RegistryWheelIndex::new(cache, tags, index_locations); - let built_index = BuiltWheelIndex::new(cache, tags); + let mut registry_index = RegistryWheelIndex::new(cache, tags, index_locations, hashes); + let built_index = BuiltWheelIndex::new(cache, tags, hashes); let mut cached = vec![]; let mut remote = vec![]; @@ -206,16 +214,9 @@ impl<'a> Planner<'a> { } } Some(VersionOrUrl::VersionSpecifier(specifier)) => { - if let Some(distribution) = - registry_index - .get(&requirement.name) - .find_map(|(version, distribution)| { - if specifier.contains(version) { - Some(distribution) - } else { - None - } - }) + if let Some((_version, distribution)) = registry_index + .get(&requirement.name) + .find(|(version, _)| specifier.contains(version)) { debug!("Requirement already cached: {distribution}"); cached.push(CachedDist::Registry(distribution.clone())); @@ -252,19 +253,30 @@ impl<'a> Planner<'a> { CacheBucket::Wheels, WheelCache::Url(&wheel.url).wheel_dir(wheel.name().as_ref()), ) - .entry(wheel.filename.stem()); + .entry(format!("{}.http", wheel.filename.stem())); - match cache_entry.path().canonicalize() { - Ok(archive) => { - let cached_dist = CachedDirectUrlDist::from_url( - wheel.filename, - wheel.url, - archive, - ); + // Read the HTTP pointer. + match fs_err::File::open(cache_entry.path()) { + Ok(file) => { + let data = DataWithCachePolicy::from_reader(file)?.data; + let archive = rmp_serde::from_slice::(&data)?; - debug!("URL wheel requirement already cached: {cached_dist}"); - cached.push(CachedDist::Url(cached_dist)); - continue; + // Enforce hash checking. + let hashes = hashes.get(&requirement.name).unwrap_or_default(); + if archive.satisfies(hashes) { + let cached_dist = CachedDirectUrlDist::from_url( + wheel.filename, + wheel.url, + archive.hashes, + archive.path, + ); + + debug!( + "URL wheel requirement already cached: {cached_dist}" + ); + cached.push(CachedDist::Url(cached_dist)); + continue; + } } Err(err) if err.kind() == io::ErrorKind::NotFound => { // The cache entry doesn't exist, so it's not fresh. @@ -294,31 +306,25 @@ impl<'a> Planner<'a> { CacheBucket::Wheels, WheelCache::Url(&wheel.url).wheel_dir(wheel.name().as_ref()), ) - .entry(wheel.filename.stem()); + .entry(format!("{}.rev", wheel.filename.stem())); - match cache_entry.path().canonicalize() { - Ok(archive) => { - if ArchiveTimestamp::up_to_date_with( - &wheel.path, - ArchiveTarget::Cache(&archive), - )? { - let cached_dist = CachedDirectUrlDist::from_url( - wheel.filename, - wheel.url, - archive, - ); + if let Some(archive) = read_timestamped_archive( + &cache_entry, + ArchiveTimestamp::from_file(&wheel.path)?, + )? { + let hashes = hashes.get(&requirement.name).unwrap_or_default(); + if archive.satisfies(hashes) { + let cached_dist = CachedDirectUrlDist::from_url( + wheel.filename, + wheel.url, + archive.hashes, + archive.path, + ); - debug!( - "URL wheel requirement already cached: {cached_dist}" - ); - cached.push(CachedDist::Url(cached_dist)); - continue; - } + debug!("Path wheel requirement already cached: {cached_dist}"); + cached.push(CachedDist::Url(cached_dist)); + continue; } - Err(err) if err.kind() == io::ErrorKind::NotFound => { - // The cache entry doesn't exist, so it's not fresh. - } - Err(err) => return Err(err.into()), } } Dist::Source(SourceDist::DirectUrl(sdist)) => { diff --git a/crates/uv-requirements/src/lookahead.rs b/crates/uv-requirements/src/lookahead.rs index d2c0131fc..e38521bac 100644 --- a/crates/uv-requirements/src/lookahead.rs +++ b/crates/uv-requirements/src/lookahead.rs @@ -1,19 +1,18 @@ use std::collections::VecDeque; use anyhow::{Context, Result}; - use futures::stream::FuturesUnordered; use futures::StreamExt; use rustc_hash::FxHashSet; -use distribution_types::{Dist, DistributionMetadata, LocalEditable}; +use distribution_types::{Dist, DistributionMetadata, LocalEditable, Name}; use pep508_rs::{MarkerEnvironment, Requirement, VersionOrUrl}; use pypi_types::Metadata23; use uv_client::RegistryClient; use uv_configuration::{Constraints, Overrides}; use uv_distribution::{DistributionDatabase, Reporter}; use uv_resolver::{InMemoryIndex, MetadataResponse}; -use uv_types::{BuildContext, RequestedRequirements}; +use uv_types::{BuildContext, RequestedRequirements, RequiredHashes}; /// A resolver for resolving lookahead requirements from direct URLs. /// @@ -40,6 +39,8 @@ pub struct LookaheadResolver<'a, Context: BuildContext + Send + Sync> { overrides: &'a Overrides, /// The editable requirements for the project. editables: &'a [(LocalEditable, Metadata23)], + /// The required hashes for the project. + hashes: &'a RequiredHashes, /// The in-memory index for resolving dependencies. index: &'a InMemoryIndex, /// The database for fetching and building distributions. @@ -48,11 +49,13 @@ pub struct LookaheadResolver<'a, Context: BuildContext + Send + Sync> { impl<'a, Context: BuildContext + Send + Sync> LookaheadResolver<'a, Context> { /// Instantiate a new [`LookaheadResolver`] for a given set of requirements. + #[allow(clippy::too_many_arguments)] pub fn new( requirements: &'a [Requirement], constraints: &'a Constraints, overrides: &'a Overrides, editables: &'a [(LocalEditable, Metadata23)], + hashes: &'a RequiredHashes, context: &'a Context, client: &'a RegistryClient, index: &'a InMemoryIndex, @@ -62,6 +65,7 @@ impl<'a, Context: BuildContext + Send + Sync> LookaheadResolver<'a, Context> { constraints, overrides, editables, + hashes, index, database: DistributionDatabase::new(client, context), } @@ -151,9 +155,10 @@ impl<'a, Context: BuildContext + Send + Sync> LookaheadResolver<'a, Context> { metadata.requires_dist.clone() } else { // Run the PEP 517 build process to extract metadata from the source distribution. + let hashes = self.hashes.get(dist.name()).unwrap_or_default(); let metadata = self .database - .get_or_build_wheel_metadata(&dist) + .get_or_build_wheel_metadata(&dist, hashes) .await .with_context(|| match &dist { Dist::Built(built) => format!("Failed to download: {built}"), diff --git a/crates/uv-requirements/src/source_tree.rs b/crates/uv-requirements/src/source_tree.rs index a734db17b..b6cccd01b 100644 --- a/crates/uv-requirements/src/source_tree.rs +++ b/crates/uv-requirements/src/source_tree.rs @@ -1,5 +1,4 @@ use std::borrow::Cow; - use std::path::{Path, PathBuf}; use anyhow::{Context, Result}; @@ -25,6 +24,8 @@ pub struct SourceTreeResolver<'a, Context: BuildContext + Send + Sync> { source_trees: Vec, /// The extras to include when resolving requirements. extras: &'a ExtrasSpecification<'a>, + /// Whether to require hashes for all dependencies. + require_hashes: bool, /// The in-memory index for resolving dependencies. index: &'a InMemoryIndex, /// The database for fetching and building distributions. @@ -36,6 +37,7 @@ impl<'a, Context: BuildContext + Send + Sync> SourceTreeResolver<'a, Context> { pub fn new( source_trees: Vec, extras: &'a ExtrasSpecification<'a>, + require_hashes: bool, context: &'a Context, client: &'a RegistryClient, index: &'a InMemoryIndex, @@ -43,6 +45,7 @@ impl<'a, Context: BuildContext + Send + Sync> SourceTreeResolver<'a, Context> { Self { source_trees, extras, + require_hashes, index, database: DistributionDatabase::new(client, context), } @@ -84,6 +87,16 @@ impl<'a, Context: BuildContext + Send + Sync> SourceTreeResolver<'a, Context> { path: Cow::Owned(path), }); + // TODO(charlie): Should we enforce this earlier? If the metadata can be extracted + // statically, it won't go through this resolver. But we'll fail anyway, since the + // dependencies (when extracted from a `pyproject.toml` or `setup.py`) won't include hashes. + if self.require_hashes { + return Err(anyhow::anyhow!( + "Hash-checking is not supported for local directories: {}", + source_tree.user_display() + )); + } + // Fetch the metadata for the distribution. let metadata = { let id = PackageId::from_url(source.url()); @@ -104,7 +117,7 @@ impl<'a, Context: BuildContext + Send + Sync> SourceTreeResolver<'a, Context> { } else { // Run the PEP 517 build process to extract metadata from the source distribution. let source = BuildableSource::Url(source); - let metadata = self.database.build_wheel_metadata(&source).await?; + let metadata = self.database.build_wheel_metadata(&source, &[]).await?; // Insert the metadata into the index. self.index diff --git a/crates/uv-requirements/src/specification.rs b/crates/uv-requirements/src/specification.rs index a952e5473..9feb8514d 100644 --- a/crates/uv-requirements/src/specification.rs +++ b/crates/uv-requirements/src/specification.rs @@ -7,7 +7,7 @@ use tracing::{instrument, Level}; use cache_key::CanonicalUrl; use distribution_types::{FlatIndexLocation, IndexUrl}; use pep508_rs::{Requirement, RequirementsTxtRequirement}; -use requirements_txt::{EditableRequirement, FindLink, RequirementsTxt}; +use requirements_txt::{EditableRequirement, FindLink, RequirementEntry, RequirementsTxt}; use uv_client::BaseClientBuilder; use uv_configuration::{NoBinary, NoBuild}; use uv_fs::Simplified; @@ -20,6 +20,8 @@ use crate::{ExtrasSpecification, RequirementsSource}; pub struct RequirementsSpecification { /// The name of the project specifying requirements. pub project: Option, + /// The `requirements.txt` entries for the project. + pub entries: Vec, /// The requirements for the project. pub requirements: Vec, /// The constraints for the project. @@ -60,6 +62,7 @@ impl RequirementsSpecification { .with_context(|| format!("Failed to parse `{name}`"))?; Self { project: None, + entries: vec![], requirements: vec![requirement], constraints: vec![], overrides: vec![], @@ -79,6 +82,7 @@ impl RequirementsSpecification { .with_context(|| format!("Failed to parse `{name}`"))?; Self { project: None, + entries: vec![], requirements: vec![], constraints: vec![], overrides: vec![], @@ -98,6 +102,7 @@ impl RequirementsSpecification { RequirementsTxt::parse(path, std::env::current_dir()?, client_builder).await?; Self { project: None, + entries: requirements_txt.requirements.clone(), requirements: requirements_txt .requirements .into_iter() @@ -148,6 +153,7 @@ impl RequirementsSpecification { { Self { project: Some(project.name), + entries: vec![], requirements: project .requirements .into_iter() @@ -175,6 +181,7 @@ impl RequirementsSpecification { })?; Self { project: None, + entries: vec![], requirements: vec![], constraints: vec![], overrides: vec![], @@ -200,6 +207,7 @@ impl RequirementsSpecification { })?; Self { project: None, + entries: vec![], requirements: vec![], constraints: vec![], overrides: vec![], @@ -232,6 +240,7 @@ impl RequirementsSpecification { // a requirements file can also add constraints. for source in requirements { let source = Self::from_source(source, extras, client_builder).await?; + spec.entries.extend(source.entries); spec.requirements.extend(source.requirements); spec.constraints.extend(source.constraints); spec.overrides.extend(source.overrides); @@ -261,7 +270,8 @@ impl RequirementsSpecification { spec.no_build.extend(source.no_build); } - // Read all constraints, treating _everything_ as a constraint. + // Read all constraints, treating _everything_ as a constraint. The raw entries (i.e., + // hashes) are ignored, as they are not relevant for constraints. for source in constraints { let source = Self::from_source(source, extras, client_builder).await?; for requirement in source.requirements { @@ -311,6 +321,7 @@ impl RequirementsSpecification { } } } + spec.entries.extend(source.entries); spec.overrides.extend(source.constraints); spec.overrides.extend(source.overrides); diff --git a/crates/uv-requirements/src/unnamed.rs b/crates/uv-requirements/src/unnamed.rs index 96e5cc179..e658852ae 100644 --- a/crates/uv-requirements/src/unnamed.rs +++ b/crates/uv-requirements/src/unnamed.rs @@ -27,6 +27,8 @@ use uv_types::BuildContext; pub struct NamedRequirementsResolver<'a, Context: BuildContext + Send + Sync> { /// The requirements for the project. requirements: Vec, + /// Whether to check hashes for distributions. + require_hashes: bool, /// The in-memory index for resolving dependencies. index: &'a InMemoryIndex, /// The database for fetching and building distributions. @@ -37,12 +39,14 @@ impl<'a, Context: BuildContext + Send + Sync> NamedRequirementsResolver<'a, Cont /// Instantiate a new [`NamedRequirementsResolver`] for a given set of requirements. pub fn new( requirements: Vec, + require_hashes: bool, context: &'a Context, client: &'a RegistryClient, index: &'a InMemoryIndex, ) -> Self { Self { requirements, + require_hashes, index, database: DistributionDatabase::new(client, context), } @@ -61,6 +65,7 @@ impl<'a, Context: BuildContext + Send + Sync> NamedRequirementsResolver<'a, Cont pub async fn resolve(self) -> Result> { let Self { requirements, + require_hashes, index, database, } = self; @@ -69,7 +74,8 @@ impl<'a, Context: BuildContext + Send + Sync> NamedRequirementsResolver<'a, Cont match requirement { RequirementsTxtRequirement::Pep508(requirement) => Ok(requirement), RequirementsTxtRequirement::Unnamed(requirement) => { - Self::resolve_requirement(requirement, index, &database).await + Self::resolve_requirement(requirement, require_hashes, index, &database) + .await } } }) @@ -81,6 +87,7 @@ impl<'a, Context: BuildContext + Send + Sync> NamedRequirementsResolver<'a, Cont /// Infer the package name for a given "unnamed" requirement. async fn resolve_requirement( requirement: UnnamedRequirement, + require_hashes: bool, index: &InMemoryIndex, database: &DistributionDatabase<'a, Context>, ) -> Result { @@ -233,6 +240,13 @@ impl<'a, Context: BuildContext + Send + Sync> NamedRequirementsResolver<'a, Cont } }; + // TODO(charlie): Support `--require-hashes` for unnamed requirements. + if require_hashes { + return Err(anyhow::anyhow!( + "Unnamed requirements are not supported with `--require-hashes`" + )); + } + // Fetch the metadata for the distribution. let name = { let id = PackageId::from_url(source.url()); @@ -248,7 +262,7 @@ impl<'a, Context: BuildContext + Send + Sync> NamedRequirementsResolver<'a, Cont } else { // Run the PEP 517 build process to extract metadata from the source distribution. let source = BuildableSource::Url(source); - let metadata = database.build_wheel_metadata(&source).await?; + let metadata = database.build_wheel_metadata(&source, &[]).await?; let name = metadata.name.clone(); diff --git a/crates/uv-resolver/src/error.rs b/crates/uv-resolver/src/error.rs index 623136dff..8391ad6aa 100644 --- a/crates/uv-resolver/src/error.rs +++ b/crates/uv-resolver/src/error.rs @@ -93,6 +93,9 @@ pub enum ResolveError { #[error("Attempted to construct an invalid version specifier")] InvalidVersion(#[from] pep440_rs::VersionSpecifierBuildError), + #[error("In `--require-hashes` mode, all requirements must be pinned upfront with `==`, but found: {0}")] + UnhashedPackage(PackageName), + /// Something unexpected happened. #[error("{0}")] Failure(String), diff --git a/crates/uv-resolver/src/flat_index.rs b/crates/uv-resolver/src/flat_index.rs index 820ad310a..3e09689bc 100644 --- a/crates/uv-resolver/src/flat_index.rs +++ b/crates/uv-resolver/src/flat_index.rs @@ -11,9 +11,11 @@ use distribution_types::{ }; use pep440_rs::Version; use platform_tags::Tags; +use pypi_types::HashDigest; use uv_client::FlatIndexEntries; use uv_configuration::{NoBinary, NoBuild}; use uv_normalize::PackageName; +use uv_types::RequiredHashes; /// A set of [`PrioritizedDist`] from a `--find-links` entry, indexed by [`PackageName`] /// and [`Version`]. @@ -32,6 +34,7 @@ impl FlatIndex { pub fn from_entries( entries: FlatIndexEntries, tags: &Tags, + required_hashes: &RequiredHashes, no_build: &NoBuild, no_binary: &NoBinary, ) -> Self { @@ -44,6 +47,7 @@ impl FlatIndex { file, filename, tags, + required_hashes, no_build, no_binary, url, @@ -56,11 +60,13 @@ impl FlatIndex { Self { index, offline } } + #[allow(clippy::too_many_arguments)] fn add_file( distributions: &mut FlatDistributions, file: File, filename: DistFilename, tags: &Tags, + required_hashes: &RequiredHashes, no_build: &NoBuild, no_binary: &NoBinary, index: IndexUrl, @@ -71,7 +77,13 @@ impl FlatIndex { DistFilename::WheelFilename(filename) => { let version = filename.version.clone(); - let compatibility = Self::wheel_compatibility(&filename, tags, no_binary); + let compatibility = Self::wheel_compatibility( + &filename, + &file.hashes, + tags, + required_hashes, + no_binary, + ); let dist = Dist::Built(BuiltDist::Registry(RegistryBuiltDist { filename, file: Box::new(file), @@ -87,7 +99,12 @@ impl FlatIndex { } } DistFilename::SourceDistFilename(filename) => { - let compatibility = Self::source_dist_compatibility(&filename, no_build); + let compatibility = Self::source_dist_compatibility( + &filename, + &file.hashes, + required_hashes, + no_build, + ); let dist = Dist::Source(SourceDist::Registry(RegistrySourceDist { filename: filename.clone(), file: Box::new(file), @@ -107,6 +124,8 @@ impl FlatIndex { fn source_dist_compatibility( filename: &SourceDistFilename, + hashes: &[HashDigest], + required_hashes: &RequiredHashes, no_build: &NoBuild, ) -> SourceDistCompatibility { // Check if source distributions are allowed for this package. @@ -120,12 +139,28 @@ impl FlatIndex { return SourceDistCompatibility::Incompatible(IncompatibleSource::NoBuild); } + // Check if hashes line up + if let Some(required_hashes) = required_hashes.get(&filename.name) { + if !required_hashes.is_empty() { + if hashes.is_empty() { + return SourceDistCompatibility::Incompatible(IncompatibleSource::MissingHash); + } + if !hashes.iter().any(|hash| required_hashes.contains(hash)) { + return SourceDistCompatibility::Incompatible( + IncompatibleSource::MismatchedHash, + ); + } + } + } + SourceDistCompatibility::Compatible } fn wheel_compatibility( filename: &WheelFilename, + hashes: &[HashDigest], tags: &Tags, + required_hashes: &RequiredHashes, no_binary: &NoBinary, ) -> WheelCompatibility { // Check if binaries are allowed for this package. @@ -139,6 +174,18 @@ impl FlatIndex { return WheelCompatibility::Incompatible(IncompatibleWheel::NoBinary); } + // Check if hashes line up + if let Some(required_hashes) = required_hashes.get(&filename.name) { + if !required_hashes.is_empty() { + if hashes.is_empty() { + return WheelCompatibility::Incompatible(IncompatibleWheel::MissingHash); + } + if !hashes.iter().any(|hash| required_hashes.contains(hash)) { + return WheelCompatibility::Incompatible(IncompatibleWheel::MismatchedHash); + } + } + } + // Determine a compatibility for the wheel based on tags. WheelCompatibility::from(filename.compatibility(tags)) } diff --git a/crates/uv-resolver/src/hash_checking_mode.rs b/crates/uv-resolver/src/hash_checking_mode.rs new file mode 100644 index 000000000..080939339 --- /dev/null +++ b/crates/uv-resolver/src/hash_checking_mode.rs @@ -0,0 +1,15 @@ +#[derive(Debug, Default, Clone, Copy)] +pub enum HashCheckingMode { + /// Hash-checking mode is disabled. + #[default] + Disabled, + /// Hash-checking mode is enabled. + Enabled, +} + +impl HashCheckingMode { + /// Returns `true` if hash-checking is enabled. + pub fn is_enabled(self) -> bool { + matches!(self, Self::Enabled) + } +} diff --git a/crates/uv-resolver/src/lib.rs b/crates/uv-resolver/src/lib.rs index c7fd1aea1..4a1dad07f 100644 --- a/crates/uv-resolver/src/lib.rs +++ b/crates/uv-resolver/src/lib.rs @@ -2,6 +2,7 @@ pub use dependency_mode::DependencyMode; pub use error::ResolveError; pub use exclusions::Exclusions; pub use flat_index::FlatIndex; +pub use hash_checking_mode::HashCheckingMode; pub use manifest::Manifest; pub use options::{Options, OptionsBuilder}; pub use preferences::{Preference, PreferenceError}; @@ -26,6 +27,7 @@ mod editables; mod error; mod exclusions; mod flat_index; +mod hash_checking_mode; mod manifest; mod options; mod pins; diff --git a/crates/uv-resolver/src/options.rs b/crates/uv-resolver/src/options.rs index 764728391..bc9481fed 100644 --- a/crates/uv-resolver/src/options.rs +++ b/crates/uv-resolver/src/options.rs @@ -1,5 +1,6 @@ use chrono::{DateTime, Utc}; +use crate::hash_checking_mode::HashCheckingMode; use crate::{DependencyMode, PreReleaseMode, ResolutionMode}; /// Options for resolving a manifest. @@ -8,8 +9,8 @@ pub struct Options { pub resolution_mode: ResolutionMode, pub prerelease_mode: PreReleaseMode, pub dependency_mode: DependencyMode, + pub hash_checking_mode: HashCheckingMode, pub exclude_newer: Option>, - pub require_hashes: bool, } /// Builder for [`Options`]. @@ -18,8 +19,8 @@ pub struct OptionsBuilder { resolution_mode: ResolutionMode, prerelease_mode: PreReleaseMode, dependency_mode: DependencyMode, + hash_checking_mode: HashCheckingMode, exclude_newer: Option>, - require_hashes: bool, } impl OptionsBuilder { @@ -49,6 +50,13 @@ impl OptionsBuilder { self } + /// Sets the hash-checking mode. + #[must_use] + pub fn hash_checking_mode(mut self, hash_checking_mode: HashCheckingMode) -> Self { + self.hash_checking_mode = hash_checking_mode; + self + } + /// Sets the exclusion date. #[must_use] pub fn exclude_newer(mut self, exclude_newer: Option>) -> Self { @@ -56,21 +64,14 @@ impl OptionsBuilder { self } - /// Sets the `--requires-hash` flag. - #[must_use] - pub fn require_hashes(mut self, require_hashes: bool) -> Self { - self.require_hashes = require_hashes; - self - } - /// Builds the options. pub fn build(self) -> Options { Options { resolution_mode: self.resolution_mode, prerelease_mode: self.prerelease_mode, dependency_mode: self.dependency_mode, + hash_checking_mode: self.hash_checking_mode, exclude_newer: self.exclude_newer, - require_hashes: self.require_hashes, } } } diff --git a/crates/uv-resolver/src/resolver/mod.rs b/crates/uv-resolver/src/resolver/mod.rs index 1a4d85354..732cdf867 100644 --- a/crates/uv-resolver/src/resolver/mod.rs +++ b/crates/uv-resolver/src/resolver/mod.rs @@ -31,12 +31,12 @@ use uv_configuration::{Constraints, Overrides}; use uv_distribution::DistributionDatabase; use uv_interpreter::Interpreter; use uv_normalize::PackageName; -use uv_types::{BuildContext, InstalledPackagesProvider}; +use uv_types::{BuildContext, InstalledPackagesProvider, RequiredHashes}; use crate::candidate_selector::{CandidateDist, CandidateSelector}; use crate::editables::Editables; use crate::error::ResolveError; -use crate::flat_index::FlatIndex; +use crate::hash_checking_mode::HashCheckingMode; use crate::manifest::Manifest; use crate::pins::FilePins; use crate::preferences::Preferences; @@ -55,7 +55,7 @@ pub use crate::resolver::provider::{ use crate::resolver::reporter::Facade; pub use crate::resolver::reporter::{BuildId, Reporter}; use crate::yanks::AllowedYanks; -use crate::{DependencyMode, Exclusions, Options}; +use crate::{DependencyMode, Exclusions, FlatIndex, Options}; mod batch_prefetch; mod index; @@ -122,6 +122,8 @@ pub struct Resolver< urls: Urls, locals: Locals, dependency_mode: DependencyMode, + hash_checking_mode: HashCheckingMode, + hashes: &'a RequiredHashes, markers: &'a MarkerEnvironment, python_requirement: PythonRequirement, selector: CandidateSelector, @@ -156,6 +158,7 @@ impl< client: &'a RegistryClient, flat_index: &'a FlatIndex, index: &'a InMemoryIndex, + hashes: &'a RequiredHashes, build_context: &'a Context, installed_packages: &'a InstalledPackages, ) -> Result { @@ -166,6 +169,7 @@ impl< tags, PythonRequirement::new(interpreter, markers), AllowedYanks::from_manifest(&manifest, markers), + hashes, options.exclude_newer, build_context.no_binary(), build_context.no_build(), @@ -173,6 +177,7 @@ impl< Self::new_custom_io( manifest, options, + hashes, markers, PythonRequirement::new(interpreter, markers), index, @@ -189,9 +194,11 @@ impl< > Resolver<'a, Provider, InstalledPackages> { /// Initialize a new resolver using a user provided backend. + #[allow(clippy::too_many_arguments)] pub fn new_custom_io( manifest: Manifest, options: Options, + hashes: &'a RequiredHashes, markers: &'a MarkerEnvironment, python_requirement: PythonRequirement, index: &'a InMemoryIndex, @@ -205,6 +212,7 @@ impl< visited: DashSet::default(), selector: CandidateSelector::for_resolution(options, &manifest, markers), dependency_mode: options.dependency_mode, + hash_checking_mode: options.hash_checking_mode, urls: Urls::from_manifest(&manifest, markers)?, locals: Locals::from_manifest(&manifest, markers), project: manifest.project, @@ -214,6 +222,7 @@ impl< preferences: Preferences::from_iter(manifest.preferences, markers), exclusions: manifest.exclusions, editables: Editables::from_requirements(manifest.editables), + hashes, markers, python_requirement, reporter: None, @@ -518,6 +527,13 @@ impl< PubGrubPackage::Root(_) => {} PubGrubPackage::Python(_) => {} PubGrubPackage::Package(package_name, _extra, None) => { + // Validate that the package is permitted under hash-checking mode. + if self.hash_checking_mode.is_enabled() { + if !self.hashes.contains(package_name) { + return Err(ResolveError::UnhashedPackage(package_name.clone())); + } + } + // Emit a request to fetch the metadata for this package. if self.index.packages.register(package_name.clone()) { priorities.add(package_name.clone()); @@ -527,6 +543,13 @@ impl< } } PubGrubPackage::Package(package_name, _extra, Some(url)) => { + // Validate that the package is permitted under hash-checking mode. + if self.hash_checking_mode.is_enabled() { + if !self.hashes.contains(package_name) { + return Err(ResolveError::UnhashedPackage(package_name.clone())); + } + } + // Emit a request to fetch the metadata for this distribution. let dist = Dist::from_url(package_name.clone(), url.clone())?; if self.index.distributions.register(dist.package_id()) { diff --git a/crates/uv-resolver/src/resolver/provider.rs b/crates/uv-resolver/src/resolver/provider.rs index 77078fd50..eb36edb03 100644 --- a/crates/uv-resolver/src/resolver/provider.rs +++ b/crates/uv-resolver/src/resolver/provider.rs @@ -3,14 +3,14 @@ use std::future::Future; use anyhow::Result; use chrono::{DateTime, Utc}; -use distribution_types::{Dist, IndexLocations}; +use distribution_types::{Dist, IndexLocations, Name}; use platform_tags::Tags; use pypi_types::Metadata23; use uv_client::RegistryClient; use uv_configuration::{NoBinary, NoBuild}; use uv_distribution::DistributionDatabase; use uv_normalize::PackageName; -use uv_types::BuildContext; +use uv_types::{BuildContext, RequiredHashes}; use crate::flat_index::FlatIndex; use crate::python_requirement::PythonRequirement; @@ -83,6 +83,7 @@ pub struct DefaultResolverProvider<'a, Context: BuildContext + Send + Sync> { tags: Tags, python_requirement: PythonRequirement, allowed_yanks: AllowedYanks, + required_hashes: RequiredHashes, exclude_newer: Option>, no_binary: NoBinary, no_build: NoBuild, @@ -98,6 +99,7 @@ impl<'a, Context: BuildContext + Send + Sync> DefaultResolverProvider<'a, Contex tags: &'a Tags, python_requirement: PythonRequirement, allowed_yanks: AllowedYanks, + required_hashes: &'a RequiredHashes, exclude_newer: Option>, no_binary: &'a NoBinary, no_build: &'a NoBuild, @@ -109,6 +111,7 @@ impl<'a, Context: BuildContext + Send + Sync> DefaultResolverProvider<'a, Contex tags: tags.clone(), python_requirement, allowed_yanks, + required_hashes: required_hashes.clone(), exclude_newer, no_binary: no_binary.clone(), no_build: no_build.clone(), @@ -136,6 +139,7 @@ impl<'a, Context: BuildContext + Send + Sync> ResolverProvider &self.tags, &self.python_requirement, &self.allowed_yanks, + &self.required_hashes, self.exclude_newer.as_ref(), self.flat_index.get(package_name).cloned(), &self.no_binary, @@ -175,7 +179,8 @@ impl<'a, Context: BuildContext + Send + Sync> ResolverProvider /// Fetch the metadata for a distribution, building it if necessary. async fn get_or_build_wheel_metadata<'io>(&'io self, dist: &'io Dist) -> WheelMetadataResult { - match self.fetcher.get_or_build_wheel_metadata(dist).await { + let hashes = self.required_hashes.get(dist.name()).unwrap_or_default(); + match self.fetcher.get_or_build_wheel_metadata(dist, hashes).await { Ok(metadata) => Ok(MetadataResponse::Found(metadata)), Err(err) => match err { uv_distribution::Error::Client(client) => match client.into_kind() { diff --git a/crates/uv-resolver/src/version_map.rs b/crates/uv-resolver/src/version_map.rs index f2608425e..2d714ca38 100644 --- a/crates/uv-resolver/src/version_map.rs +++ b/crates/uv-resolver/src/version_map.rs @@ -2,8 +2,9 @@ use std::collections::btree_map::{BTreeMap, Entry}; use std::sync::OnceLock; use chrono::{DateTime, Utc}; +use rkyv::{de::deserializers::SharedDeserializeMap, Deserialize}; use rustc_hash::FxHashSet; -use tracing::{instrument, warn}; +use tracing::instrument; use distribution_filename::{DistFilename, WheelFilename}; use distribution_types::{ @@ -13,10 +14,10 @@ use distribution_types::{ use pep440_rs::{Version, VersionSpecifiers}; use platform_tags::Tags; use pypi_types::{HashDigest, Yanked}; -use rkyv::{de::deserializers::SharedDeserializeMap, Deserialize}; use uv_client::{OwnedArchive, SimpleMetadata, VersionFiles}; use uv_configuration::{NoBinary, NoBuild}; use uv_normalize::PackageName; +use uv_types::RequiredHashes; use uv_warnings::warn_user_once; use crate::flat_index::FlatDistributions; @@ -47,6 +48,7 @@ impl VersionMap { tags: &Tags, python_requirement: &PythonRequirement, allowed_yanks: &AllowedYanks, + required_hashes: &RequiredHashes, exclude_newer: Option<&DateTime>, flat_index: Option, no_binary: &NoBinary, @@ -110,6 +112,10 @@ impl VersionMap { .allowed_versions(package_name) .cloned() .unwrap_or_default(); + let required_hashes = required_hashes + .get(package_name) + .unwrap_or_default() + .to_vec(); Self { inner: VersionMapInner::Lazy(VersionMapLazy { map, @@ -121,6 +127,7 @@ impl VersionMap { python_requirement: python_requirement.clone(), exclude_newer: exclude_newer.copied(), allowed_yanks, + required_hashes, }), } } @@ -303,6 +310,8 @@ struct VersionMapLazy { exclude_newer: Option>, /// Which yanked versions are allowed allowed_yanks: FxHashSet, + /// The hashes of allowed distributions. + required_hashes: Vec, } impl VersionMapLazy { @@ -386,6 +395,7 @@ impl VersionMapLazy { &filename, &version, requires_python, + &hashes, yanked, excluded, upload_time, @@ -401,6 +411,7 @@ impl VersionMapLazy { let compatibility = self.source_dist_compatibility( &version, requires_python, + &hashes, yanked, excluded, upload_time, @@ -423,10 +434,12 @@ impl VersionMapLazy { simple.dist.get_or_init(get_or_init).as_ref() } + #[allow(clippy::too_many_arguments)] fn source_dist_compatibility( &self, version: &Version, requires_python: Option, + hashes: &[HashDigest], yanked: Option, excluded: bool, upload_time: Option, @@ -443,6 +456,19 @@ impl VersionMapLazy { )); } + // Check if hashes line up + if !self.required_hashes.is_empty() { + if hashes.is_empty() { + return SourceDistCompatibility::Incompatible(IncompatibleSource::MissingHash); + } + if !hashes + .iter() + .any(|hash| self.required_hashes.contains(hash)) + { + return SourceDistCompatibility::Incompatible(IncompatibleSource::MismatchedHash); + } + } + // Check if yanked if let Some(yanked) = yanked { if yanked.is_yanked() && !self.allowed_yanks.contains(version) { @@ -466,11 +492,13 @@ impl VersionMapLazy { SourceDistCompatibility::Compatible } + #[allow(clippy::too_many_arguments)] fn wheel_compatibility( &self, filename: &WheelFilename, version: &Version, requires_python: Option, + hashes: &[HashDigest], yanked: Option, excluded: bool, upload_time: Option, @@ -485,6 +513,19 @@ impl VersionMapLazy { return WheelCompatibility::Incompatible(IncompatibleWheel::ExcludeNewer(upload_time)); } + // Check if hashes line up + if !self.required_hashes.is_empty() { + if hashes.is_empty() { + return WheelCompatibility::Incompatible(IncompatibleWheel::MissingHash); + } + if !hashes + .iter() + .any(|hash| self.required_hashes.contains(hash)) + { + return WheelCompatibility::Incompatible(IncompatibleWheel::MismatchedHash); + } + } + // Check if yanked if let Some(yanked) = yanked { if yanked.is_yanked() && !self.allowed_yanks.contains(version) { diff --git a/crates/uv-resolver/tests/resolver.rs b/crates/uv-resolver/tests/resolver.rs index 16cb9e195..93ca66436 100644 --- a/crates/uv-resolver/tests/resolver.rs +++ b/crates/uv-resolver/tests/resolver.rs @@ -21,7 +21,9 @@ use uv_resolver::{ DisplayResolutionGraph, Exclusions, FlatIndex, InMemoryIndex, Manifest, Options, OptionsBuilder, PreReleaseMode, Preference, ResolutionGraph, ResolutionMode, Resolver, }; -use uv_types::{BuildContext, BuildIsolation, EmptyInstalledPackages, SourceBuildTrait}; +use uv_types::{ + BuildContext, BuildIsolation, EmptyInstalledPackages, RequiredHashes, SourceBuildTrait, +}; // Exclude any packages uploaded after this date. static EXCLUDE_NEWER: Lazy> = Lazy::new(|| { @@ -123,6 +125,7 @@ async fn resolve( find_default_python(&Cache::temp().unwrap()).expect("Expected a python to be installed"); let interpreter = Interpreter::artificial(real_interpreter.platform().clone(), markers.clone()); let build_context = DummyContext::new(Cache::temp()?, interpreter.clone()); + let hashes = RequiredHashes::default(); let installed_packages = EmptyInstalledPackages; let resolver = Resolver::new( manifest, @@ -133,6 +136,7 @@ async fn resolve( &client, &flat_index, &index, + &hashes, &build_context, &installed_packages, )?; diff --git a/crates/uv-types/Cargo.toml b/crates/uv-types/Cargo.toml index a4f22dd51..2ee16ce44 100644 --- a/crates/uv-types/Cargo.toml +++ b/crates/uv-types/Cargo.toml @@ -15,7 +15,9 @@ workspace = true [dependencies] distribution-types = { workspace = true } once-map = { workspace = true } +pep440_rs = { workspace = true } pep508_rs = { workspace = true } +pypi-types = { workspace = true } uv-cache = { workspace = true } uv-interpreter = { workspace = true } uv-normalize = { workspace = true } @@ -27,6 +29,7 @@ itertools = { workspace = true } rustc-hash = { workspace = true } serde = { workspace = true, optional = true } serde_json = { workspace = true, optional = true } +thiserror = { workspace = true } [features] default = [] diff --git a/crates/uv-types/src/hashes.rs b/crates/uv-types/src/hashes.rs new file mode 100644 index 000000000..4f12c18c7 --- /dev/null +++ b/crates/uv-types/src/hashes.rs @@ -0,0 +1,99 @@ +use rustc_hash::FxHashMap; +use std::str::FromStr; + +use pep508_rs::{MarkerEnvironment, Requirement, VersionOrUrl}; +use pypi_types::{HashDigest, HashError}; +use uv_normalize::PackageName; + +/// A set of package versions that are permitted, even if they're marked as yanked by the +/// relevant index. +#[derive(Debug, Default, Clone)] +pub struct RequiredHashes(FxHashMap>); + +impl RequiredHashes { + /// Generate the [`RequiredHashes`] from a set of requirement entries. + pub fn from_requirements( + requirements: impl Iterator)>, + markers: &MarkerEnvironment, + ) -> Result { + let mut allowed_hashes = FxHashMap::>::default(); + + // For each requirement, map from name to allowed hashes. We use the last entry for each + // package. + // + // For now, unnamed requirements are unsupported. This should be fine, since `--require-hashes` + // tends to be used after `pip-compile`, which will always output named requirements. + // + // TODO(charlie): Preserve hashes from `requirements.txt` through to this pass, so that we + // can iterate over requirements directly, rather than iterating over the entries. + for (requirement, hashes) in requirements { + if !requirement.evaluate_markers(markers, &[]) { + continue; + } + + // Every requirement must be either a pinned version or a direct URL. + match requirement.version_or_url.as_ref() { + Some(VersionOrUrl::Url(_)) => { + // Direct URLs are always allowed. + } + Some(VersionOrUrl::VersionSpecifier(specifiers)) => { + if specifiers + .iter() + .any(|specifier| matches!(specifier.operator(), pep440_rs::Operator::Equal)) + { + // Pinned versions are allowed. + } else { + return Err(RequiredHashesError::UnpinnedRequirement( + requirement.to_string(), + )); + } + } + None => { + return Err(RequiredHashesError::UnpinnedRequirement( + requirement.to_string(), + )) + } + } + + // Every requirement must include a hash. + if hashes.is_empty() { + return Err(RequiredHashesError::MissingHashes(requirement.to_string())); + } + + // Parse the hashes. + let hashes = hashes + .iter() + .map(|hash| HashDigest::from_str(hash)) + .collect::, _>>() + .unwrap(); + + // TODO(charlie): Extract hashes from URL fragments. + allowed_hashes.insert(requirement.name, hashes); + } + + Ok(Self(allowed_hashes)) + } + + /// Returns versions for the given package which are allowed even if marked as yanked by the + /// relevant index. + pub fn get(&self, package_name: &PackageName) -> Option<&[HashDigest]> { + self.0.get(package_name).map(Vec::as_slice) + } + + /// Returns whether the given package is allowed even if marked as yanked by the relevant index. + pub fn contains(&self, package_name: &PackageName) -> bool { + self.0.contains_key(package_name) + } +} + +#[derive(thiserror::Error, Debug)] +pub enum RequiredHashesError { + #[error(transparent)] + Hash(#[from] HashError), + #[error("Unnamed requirements are not supported in `--require-hashes`")] + UnnamedRequirement, + #[error("In `--require-hashes` mode, all requirement must have their versions pinned with `==`, but found: {0}")] + UnpinnedRequirement(String), + #[error("In `--require-hashes` mode, all requirement must have a hash, but none were provided for: {0}")] + MissingHashes(String), +} diff --git a/crates/uv-types/src/lib.rs b/crates/uv-types/src/lib.rs index cf28981fd..281e685a5 100644 --- a/crates/uv-types/src/lib.rs +++ b/crates/uv-types/src/lib.rs @@ -1,10 +1,12 @@ //! Fundamental types shared across `uv` crates. pub use builds::*; pub use downloads::*; +pub use hashes::*; pub use requirements::*; pub use traits::*; mod builds; mod downloads; +mod hashes; mod requirements; mod traits; diff --git a/crates/uv/src/commands/pip_compile.rs b/crates/uv/src/commands/pip_compile.rs index 4b2235ec7..c1388e6b2 100644 --- a/crates/uv/src/commands/pip_compile.rs +++ b/crates/uv/src/commands/pip_compile.rs @@ -38,7 +38,7 @@ use uv_resolver::{ Manifest, OptionsBuilder, PreReleaseMode, PythonRequirement, ResolutionMode, Resolver, }; use uv_toolchain::PythonVersion; -use uv_types::{BuildIsolation, EmptyInstalledPackages, InFlight}; +use uv_types::{BuildIsolation, EmptyInstalledPackages, InFlight, RequiredHashes}; use uv_warnings::warn_user; use crate::commands::reporters::{DownloadReporter, ResolverReporter}; @@ -101,6 +101,7 @@ pub(crate) async fn pip_compile( // Read all requirements from the provided sources. let RequirementsSpecification { project, + entries: _, requirements, constraints, overrides, @@ -198,6 +199,9 @@ pub(crate) async fn pip_compile( |python_version| Cow::Owned(python_version.markers(interpreter.markers())), ); + // Don't enforce hashes during resolution. + let hashes = RequiredHashes::default(); + // Incorporate any index locations from the provided sources. let index_locations = index_locations.combine(index_url, extra_index_urls, find_links, no_index); @@ -229,7 +233,7 @@ pub(crate) async fn pip_compile( let flat_index = { let client = FlatIndexClient::new(&client, &cache); let entries = client.fetch(index_locations.flat_index()).await?; - FlatIndex::from_entries(entries, &tags, &no_build, &NoBinary::None) + FlatIndex::from_entries(entries, &tags, &hashes, &no_build, &NoBinary::None) }; // Track in-flight downloads, builds, etc., across resolutions. @@ -268,6 +272,7 @@ pub(crate) async fn pip_compile( // Convert from unnamed to named requirements. let mut requirements = NamedRequirementsResolver::new( requirements, + false, &build_dispatch, &client, &top_level_index, @@ -282,6 +287,7 @@ pub(crate) async fn pip_compile( SourceTreeResolver::new( source_trees, &extras, + false, &build_dispatch, &client, &top_level_index, @@ -306,7 +312,7 @@ pub(crate) async fn pip_compile( LocalEditable { url, path, extras } })); - let downloader = Downloader::new(&cache, &tags, &client, &build_dispatch) + let downloader = Downloader::new(&cache, &tags, &hashes, &client, &build_dispatch) .with_reporter(DownloadReporter::from(printer).with_length(editables.len() as u64)); // Build all editables. @@ -354,6 +360,7 @@ pub(crate) async fn pip_compile( &constraints, &overrides, &editables, + &hashes, &build_dispatch, &client, &top_level_index, @@ -370,7 +377,7 @@ pub(crate) async fn pip_compile( preferences, project, editables, - // Do not consider any installed packages during compilation + // Do not consider any installed packages during resolution. Exclusions::All, lookaheads, ); @@ -392,6 +399,7 @@ pub(crate) async fn pip_compile( &client, &flat_index, &top_level_index, + &hashes, &build_dispatch, &EmptyInstalledPackages, )? diff --git a/crates/uv/src/commands/pip_install.rs b/crates/uv/src/commands/pip_install.rs index 2135634a5..040f2ba87 100644 --- a/crates/uv/src/commands/pip_install.rs +++ b/crates/uv/src/commands/pip_install.rs @@ -14,7 +14,7 @@ use distribution_types::{ LocalEditables, Name, Resolution, }; use install_wheel_rs::linker::LinkMode; -use pep508_rs::{MarkerEnvironment, Requirement}; +use pep508_rs::{MarkerEnvironment, Requirement, RequirementsTxtRequirement}; use platform_tags::Tags; use pypi_types::{Metadata23, Yanked}; use requirements_txt::EditableRequirement; @@ -37,10 +37,10 @@ use uv_requirements::{ RequirementsSpecification, SourceTreeResolver, }; use uv_resolver::{ - DependencyMode, Exclusions, FlatIndex, InMemoryIndex, Manifest, Options, OptionsBuilder, - PreReleaseMode, Preference, ResolutionGraph, ResolutionMode, Resolver, + DependencyMode, Exclusions, FlatIndex, HashCheckingMode, InMemoryIndex, Manifest, Options, + OptionsBuilder, PreReleaseMode, Preference, ResolutionGraph, ResolutionMode, Resolver, }; -use uv_types::{BuildIsolation, InFlight}; +use uv_types::{BuildIsolation, InFlight, RequiredHashes}; use uv_warnings::warn_user; use crate::commands::reporters::{DownloadReporter, InstallReporter, ResolverReporter}; @@ -85,10 +85,6 @@ pub(crate) async fn pip_install( ) -> Result { let start = std::time::Instant::now(); - if require_hashes { - warn_user!("Hash-checking mode (via `--require-hashes`) is not yet supported."); - } - let client_builder = BaseClientBuilder::new() .connectivity(connectivity) .native_tls(native_tls) @@ -97,6 +93,7 @@ pub(crate) async fn pip_install( // Read all requirements from the provided sources. let RequirementsSpecification { project, + entries, requirements, constraints, overrides, @@ -188,6 +185,21 @@ pub(crate) async fn pip_install( let tags = venv.interpreter().tags()?; let markers = venv.interpreter().markers(); + // Collect the set of required hashes. + let hashes = if require_hashes { + RequiredHashes::from_requirements( + entries + .into_iter() + .filter_map(|requirement| match requirement.requirement { + RequirementsTxtRequirement::Pep508(req) => Some((req, requirement.hashes)), + RequirementsTxtRequirement::Unnamed(_) => None, + }), + markers, + )? + } else { + RequiredHashes::default() + }; + // Incorporate any index locations from the provided sources. let index_locations = index_locations.combine(index_url, extra_index_urls, find_links, no_index); @@ -212,7 +224,7 @@ pub(crate) async fn pip_install( let flat_index = { let client = FlatIndexClient::new(&client, &cache); let entries = client.fetch(index_locations.flat_index()).await?; - FlatIndex::from_entries(entries, tags, &no_build, &no_binary) + FlatIndex::from_entries(entries, tags, &hashes, &no_build, &no_binary) }; // Determine whether to enable build isolation. @@ -252,19 +264,31 @@ pub(crate) async fn pip_install( // Resolve the requirements from the provided sources. let requirements = { // Convert from unnamed to named requirements. - let mut requirements = - NamedRequirementsResolver::new(requirements, &resolve_dispatch, &client, &index) - .with_reporter(ResolverReporter::from(printer)) - .resolve() - .await?; + let mut requirements = NamedRequirementsResolver::new( + requirements, + require_hashes, + &resolve_dispatch, + &client, + &index, + ) + .with_reporter(ResolverReporter::from(printer)) + .resolve() + .await?; // Resolve any source trees into requirements. if !source_trees.is_empty() { requirements.extend( - SourceTreeResolver::new(source_trees, extras, &resolve_dispatch, &client, &index) - .with_reporter(ResolverReporter::from(printer)) - .resolve() - .await?, + SourceTreeResolver::new( + source_trees, + extras, + require_hashes, + &resolve_dispatch, + &client, + &index, + ) + .with_reporter(ResolverReporter::from(printer)) + .resolve() + .await?, ); } @@ -282,6 +306,7 @@ pub(crate) async fn pip_install( build_editables( &editables, editable_wheel_dir.path(), + &hashes, &cache, &interpreter, tags, @@ -296,8 +321,12 @@ pub(crate) async fn pip_install( .resolution_mode(resolution_mode) .prerelease_mode(prerelease_mode) .dependency_mode(dependency_mode) + .hash_checking_mode(if require_hashes { + HashCheckingMode::Enabled + } else { + HashCheckingMode::Disabled + }) .exclude_newer(exclude_newer) - .require_hashes(require_hashes) .build(); // Resolve the requirements. @@ -307,6 +336,7 @@ pub(crate) async fn pip_install( overrides, project, &editables, + &hashes, &site_packages, &reinstall, &upgrade, @@ -367,6 +397,7 @@ pub(crate) async fn pip_install( link_mode, compile, &index_locations, + &hashes, tags, &client, &in_flight, @@ -442,6 +473,7 @@ async fn read_requirements( async fn build_editables( editables: &[EditableRequirement], editable_wheel_dir: &Path, + hashes: &RequiredHashes, cache: &Cache, interpreter: &Interpreter, tags: &Tags, @@ -451,7 +483,7 @@ async fn build_editables( ) -> Result, Error> { let start = std::time::Instant::now(); - let downloader = Downloader::new(cache, tags, client, build_dispatch) + let downloader = Downloader::new(cache, tags, hashes, client, build_dispatch) .with_reporter(DownloadReporter::from(printer).with_length(editables.len() as u64)); let editables = LocalEditables::from_editables(editables.iter().map(|editable| { @@ -508,6 +540,7 @@ async fn resolve( overrides: Vec, project: Option, editables: &[BuiltEditable], + hashes: &RequiredHashes, site_packages: &SitePackages<'_>, reinstall: &Reinstall, upgrade: &Upgrade, @@ -554,6 +587,7 @@ async fn resolve( &constraints, &overrides, &editables, + hashes, build_dispatch, client, index, @@ -584,6 +618,7 @@ async fn resolve( client, flat_index, index, + hashes, build_dispatch, site_packages, )? @@ -627,6 +662,7 @@ async fn install( link_mode: LinkMode, compile: bool, index_urls: &IndexLocations, + hashes: &RequiredHashes, tags: &Tags, client: &RegistryClient, in_flight: &InFlight, @@ -654,6 +690,7 @@ async fn install( site_packages, reinstall, no_binary, + hashes, index_urls, cache, venv, @@ -706,7 +743,7 @@ async fn install( } else { let start = std::time::Instant::now(); - let downloader = Downloader::new(cache, tags, client, build_dispatch) + let downloader = Downloader::new(cache, tags, hashes, client, build_dispatch) .with_reporter(DownloadReporter::from(printer).with_length(remote.len() as u64)); let wheels = downloader @@ -1022,6 +1059,9 @@ enum Error { #[error(transparent)] Platform(#[from] platform_tags::PlatformError), + #[error(transparent)] + RequiredHashes(#[from] uv_types::RequiredHashesError), + #[error(transparent)] Io(#[from] std::io::Error), diff --git a/crates/uv/src/commands/pip_sync.rs b/crates/uv/src/commands/pip_sync.rs index 70a9f4998..093954682 100644 --- a/crates/uv/src/commands/pip_sync.rs +++ b/crates/uv/src/commands/pip_sync.rs @@ -10,6 +10,7 @@ use distribution_types::{ IndexLocations, InstalledMetadata, LocalDist, LocalEditable, LocalEditables, Name, ResolvedDist, }; use install_wheel_rs::linker::LinkMode; +use pep508_rs::RequirementsTxtRequirement; use platform_tags::Tags; use pypi_types::Yanked; use requirements_txt::EditableRequirement; @@ -29,8 +30,10 @@ use uv_requirements::{ ExtrasSpecification, NamedRequirementsResolver, RequirementsSource, RequirementsSpecification, SourceTreeResolver, }; -use uv_resolver::{DependencyMode, FlatIndex, InMemoryIndex, Manifest, OptionsBuilder, Resolver}; -use uv_types::{BuildIsolation, EmptyInstalledPackages, InFlight}; +use uv_resolver::{ + DependencyMode, FlatIndex, HashCheckingMode, InMemoryIndex, Manifest, OptionsBuilder, Resolver, +}; +use uv_types::{BuildIsolation, EmptyInstalledPackages, InFlight, RequiredHashes}; use uv_warnings::warn_user; use crate::commands::reporters::{DownloadReporter, InstallReporter, ResolverReporter}; @@ -64,10 +67,6 @@ pub(crate) async fn pip_sync( ) -> Result { let start = std::time::Instant::now(); - if require_hashes { - warn_user!("Hash-checking mode (via `--require-hashes`) is not yet supported."); - } - let client_builder = BaseClientBuilder::new() .connectivity(connectivity) .native_tls(native_tls) @@ -76,6 +75,7 @@ pub(crate) async fn pip_sync( // Read all requirements from the provided sources. let RequirementsSpecification { project: _, + entries, requirements, constraints: _, overrides: _, @@ -135,6 +135,22 @@ pub(crate) async fn pip_sync( // Determine the current environment markers. let tags = venv.interpreter().tags()?; + let markers = venv.interpreter().markers(); + + // Collect the set of required hashes. + let hashes = if require_hashes { + RequiredHashes::from_requirements( + entries + .into_iter() + .filter_map(|requirement| match requirement.requirement { + RequirementsTxtRequirement::Pep508(req) => Some((req, requirement.hashes)), + RequirementsTxtRequirement::Unnamed(_) => None, + }), + markers, + )? + } else { + RequiredHashes::default() + }; // Incorporate any index locations from the provided sources. let index_locations = @@ -160,7 +176,7 @@ pub(crate) async fn pip_sync( let flat_index = { let client = FlatIndexClient::new(&client, &cache); let entries = client.fetch(index_locations.flat_index()).await?; - FlatIndex::from_entries(entries, tags, &no_build, &no_binary) + FlatIndex::from_entries(entries, tags, &hashes, &no_build, &no_binary) }; // Create a shared in-memory index. @@ -202,11 +218,16 @@ pub(crate) async fn pip_sync( // Convert from unnamed to named requirements. let requirements = { // Convert from unnamed to named requirements. - let mut requirements = - NamedRequirementsResolver::new(requirements, &build_dispatch, &client, &index) - .with_reporter(ResolverReporter::from(printer)) - .resolve() - .await?; + let mut requirements = NamedRequirementsResolver::new( + requirements, + require_hashes, + &build_dispatch, + &client, + &index, + ) + .with_reporter(ResolverReporter::from(printer)) + .resolve() + .await?; // Resolve any source trees into requirements. if !source_trees.is_empty() { @@ -214,6 +235,7 @@ pub(crate) async fn pip_sync( SourceTreeResolver::new( source_trees, &ExtrasSpecification::None, + require_hashes, &build_dispatch, &client, &index, @@ -232,6 +254,7 @@ pub(crate) async fn pip_sync( editables, &site_packages, reinstall, + &hashes, venv.interpreter(), tags, &cache, @@ -255,6 +278,7 @@ pub(crate) async fn pip_sync( site_packages, reinstall, &no_binary, + &hashes, &index_locations, &cache, &venv, @@ -293,7 +317,11 @@ pub(crate) async fn pip_sync( // Resolve with `--no-deps`. let options = OptionsBuilder::new() .dependency_mode(DependencyMode::Direct) - .require_hashes(require_hashes) + .hash_checking_mode(if require_hashes { + HashCheckingMode::Enabled + } else { + HashCheckingMode::Disabled + }) .build(); // Create a bound on the progress bar, since we know the number of packages upfront. @@ -309,6 +337,7 @@ pub(crate) async fn pip_sync( &client, &flat_index, &index, + &hashes, &build_dispatch, // TODO(zanieb): We should consider support for installed packages in pip sync &EmptyInstalledPackages, @@ -352,7 +381,7 @@ pub(crate) async fn pip_sync( } else { let start = std::time::Instant::now(); - let downloader = Downloader::new(&cache, tags, &client, &build_dispatch) + let downloader = Downloader::new(&cache, tags, &hashes, &client, &build_dispatch) .with_reporter(DownloadReporter::from(printer).with_length(remote.len() as u64)); let wheels = downloader @@ -548,6 +577,7 @@ async fn resolve_editables( editables: Vec, site_packages: &SitePackages<'_>, reinstall: &Reinstall, + hashes: &RequiredHashes, interpreter: &Interpreter, tags: &Tags, cache: &Cache, @@ -614,7 +644,7 @@ async fn resolve_editables( } else { let start = std::time::Instant::now(); - let downloader = Downloader::new(cache, tags, client, build_dispatch) + let downloader = Downloader::new(cache, tags, hashes, client, build_dispatch) .with_reporter(DownloadReporter::from(printer).with_length(uninstalled.len() as u64)); let editables = LocalEditables::from_editables(uninstalled.iter().map(|editable| { diff --git a/crates/uv/src/commands/venv.rs b/crates/uv/src/commands/venv.rs index 8c1f943fd..d90bcb414 100644 --- a/crates/uv/src/commands/venv.rs +++ b/crates/uv/src/commands/venv.rs @@ -21,7 +21,7 @@ use uv_dispatch::BuildDispatch; use uv_fs::Simplified; use uv_interpreter::{find_default_python, find_requested_python, Error}; use uv_resolver::{FlatIndex, InMemoryIndex, OptionsBuilder}; -use uv_types::{BuildContext, BuildIsolation, InFlight}; +use uv_types::{BuildContext, BuildIsolation, InFlight, RequiredHashes}; use crate::commands::ExitStatus; use crate::printer::Printer; @@ -167,7 +167,13 @@ async fn venv_impl( .fetch(index_locations.flat_index()) .await .map_err(VenvError::FlatIndex)?; - FlatIndex::from_entries(entries, tags, &NoBuild::All, &NoBinary::None) + FlatIndex::from_entries( + entries, + tags, + &RequiredHashes::default(), + &NoBuild::All, + &NoBinary::None, + ) }; // Create a shared in-memory index. diff --git a/crates/uv/tests/cache_prune.rs b/crates/uv/tests/cache_prune.rs index ee8f28c3a..4d96e6979 100644 --- a/crates/uv/tests/cache_prune.rs +++ b/crates/uv/tests/cache_prune.rs @@ -128,7 +128,7 @@ fn prune_stale_symlink() -> Result<()> { .success(); // Remove the wheels directory, causing the symlink to become stale. - let wheels = context.cache_dir.child("wheels-v0"); + let wheels = context.cache_dir.child("wheels-v1"); fs_err::remove_dir_all(wheels)?; let filters: Vec<_> = context diff --git a/crates/uv/tests/pip_install.rs b/crates/uv/tests/pip_install.rs index 85a890dc1..dec0a147f 100644 --- a/crates/uv/tests/pip_install.rs +++ b/crates/uv/tests/pip_install.rs @@ -3756,3 +3756,204 @@ fn find_links_no_binary() -> Result<()> { Ok(()) } + +/// Provide the wrong hash with `--require-hashes`. +#[test] +fn require_hashes_mismatch() -> Result<()> { + let context = TestContext::new("3.12"); + + // Write to a requirements file. + let requirements_txt = context.temp_dir.child("requirements.txt"); + requirements_txt.write_str( + "anyio==4.0.0 --hash=sha256:afdb2b588b9fc25ede96d8db56ed50848b0b649dca3dd1df0b11f683bb9e0b5f", + )?; + + // Raise an error. + uv_snapshot!(context.install() + .arg("-r") + .arg("requirements.txt") + .arg("--require-hashes"), @r###" + success: false + exit_code: 1 + ----- stdout ----- + + ----- stderr ----- + × No solution found when resolving dependencies: + ╰─▶ Because anyio==4.0.0 is unusable because the hash does not match and you require anyio==4.0.0, we can conclude that the requirements are unsatisfiable. + "### + ); + + Ok(()) +} + +/// Omit a transitive dependency in `--require-hashes`. +#[test] +fn require_hashes_missing_dependency() -> Result<()> { + let context = TestContext::new("3.12"); + + // Write to a requirements file. + let requirements_txt = context.temp_dir.child("requirements.txt"); + requirements_txt.write_str( + "anyio==4.0.0 --hash=sha256:cfdb2b588b9fc25ede96d8db56ed50848b0b649dca3dd1df0b11f683bb9e0b5f", + )?; + + // Install without error when `--require-hashes` is omitted. + uv_snapshot!(context.install() + .arg("-r") + .arg("requirements.txt") + .arg("--require-hashes"), @r###" + success: false + exit_code: 2 + ----- stdout ----- + + ----- stderr ----- + error: In `--require-hashes` mode, all requirements must be pinned upfront with `==`, but found: idna + "### + ); + + Ok(()) +} + +/// We disallow `--require-hashes` for editables' dependencies. +#[test] +fn require_hashes_editable() -> Result<()> { + let context = TestContext::new("3.12"); + + let requirements_txt = context.temp_dir.child("requirements.txt"); + requirements_txt.write_str(&indoc::formatdoc! {r" + -e file://{workspace_root}/scripts/packages/black_editable[d] + ", + workspace_root = context.workspace_root.simplified_display(), + })?; + + // Install the editable packages. + uv_snapshot!(context.filters(), context.install() + .arg("-r") + .arg(requirements_txt.path()) + .arg("--require-hashes"), @r###" + success: false + exit_code: 2 + ----- stdout ----- + + ----- stderr ----- + Built 1 editable in [TIME] + error: In `--require-hashes` mode, all requirements must be pinned upfront with `==`, but found: aiohttp + "### + ); + + Ok(()) +} + +/// If a hash is only included as a constraint, that's not good enough for `--require-hashes`. +#[test] +fn require_hashes_constraint() -> Result<()> { + let context = TestContext::new("3.12"); + + // Include the hash in the constraint file. + let requirements_txt = context.temp_dir.child("requirements.txt"); + requirements_txt.write_str("anyio==4.0.0")?; + + let constraints_txt = context.temp_dir.child("constraints.txt"); + constraints_txt.write_str("anyio==4.0.0 --hash=sha256:cfdb2b588b9fc25ede96d8db56ed50848b0b649dca3dd1df0b11f683bb9e0b5f")?; + + // Install the editable packages. + uv_snapshot!(context.install() + .arg("-r") + .arg(requirements_txt.path()) + .arg("--require-hashes") + .arg("-c") + .arg(constraints_txt.path()), @r###" + success: false + exit_code: 2 + ----- stdout ----- + + ----- stderr ----- + error: In `--require-hashes` mode, all requirement must have a hash, but none were provided for: anyio==4.0.0 + "### + ); + + // Include the hash in the requirements file, but pin the version in the constraint file. + let requirements_txt = context.temp_dir.child("requirements.txt"); + requirements_txt.write_str( + "anyio --hash=sha256:cfdb2b588b9fc25ede96d8db56ed50848b0b649dca3dd1df0b11f683bb9e0b5f", + )?; + + let constraints_txt = context.temp_dir.child("constraints.txt"); + constraints_txt.write_str("anyio==4.0.0")?; + + // Install the editable packages. + uv_snapshot!(context.install() + .arg("-r") + .arg(requirements_txt.path()) + .arg("--require-hashes") + .arg("-c") + .arg(constraints_txt.path()), @r###" + success: false + exit_code: 2 + ----- stdout ----- + + ----- stderr ----- + error: In `--require-hashes` mode, all requirement must have their versions pinned with `==`, but found: anyio + "### + ); + + Ok(()) +} + +/// If a hash is only included as a override, that's not good enough for `--require-hashes`. +/// +/// TODO(charlie): This _should_ be allowed. It's a bug. +#[test] +fn require_hashes_override() -> Result<()> { + let context = TestContext::new("3.12"); + + // Include the hash in the override file. + let requirements_txt = context.temp_dir.child("requirements.txt"); + requirements_txt.write_str("anyio==4.0.0")?; + + let overrides_txt = context.temp_dir.child("overrides.txt"); + overrides_txt.write_str("anyio==4.0.0 --hash=sha256:cfdb2b588b9fc25ede96d8db56ed50848b0b649dca3dd1df0b11f683bb9e0b5f")?; + + // Install the editable packages. + uv_snapshot!(context.install() + .arg("-r") + .arg(requirements_txt.path()) + .arg("--require-hashes") + .arg("--override") + .arg(overrides_txt.path()), @r###" + success: false + exit_code: 2 + ----- stdout ----- + + ----- stderr ----- + error: In `--require-hashes` mode, all requirement must have a hash, but none were provided for: anyio==4.0.0 + "### + ); + + // Include the hash in the requirements file, but pin the version in the override file. + let requirements_txt = context.temp_dir.child("requirements.txt"); + requirements_txt.write_str( + "anyio --hash=sha256:cfdb2b588b9fc25ede96d8db56ed50848b0b649dca3dd1df0b11f683bb9e0b5f", + )?; + + let overrides_txt = context.temp_dir.child("overrides.txt"); + overrides_txt.write_str("anyio==4.0.0")?; + + // Install the editable packages. + uv_snapshot!(context.install() + .arg("-r") + .arg(requirements_txt.path()) + .arg("--require-hashes") + .arg("--override") + .arg(overrides_txt.path()), @r###" + success: false + exit_code: 2 + ----- stdout ----- + + ----- stderr ----- + error: In `--require-hashes` mode, all requirement must have their versions pinned with `==`, but found: anyio + "### + ); + + Ok(()) +} diff --git a/crates/uv/tests/pip_sync.rs b/crates/uv/tests/pip_sync.rs index 74ba1bf3a..387fc25a5 100644 --- a/crates/uv/tests/pip_sync.rs +++ b/crates/uv/tests/pip_sync.rs @@ -3081,3 +3081,1389 @@ requires-python = "<=3.5" Ok(()) } + +/// Omit the hash with `--require-hashes`. +#[test] +fn require_hashes_missing_hash() -> Result<()> { + let context = TestContext::new("3.12"); + + let requirements_txt = context.temp_dir.child("requirements.txt"); + requirements_txt.write_str("anyio==4.0.0")?; + + // Install without error when `--require-hashes` is omitted. + uv_snapshot!(command(&context) + .arg("requirements.txt"), @r###" + success: true + exit_code: 0 + ----- stdout ----- + + ----- stderr ----- + Resolved 1 package in [TIME] + Downloaded 1 package in [TIME] + Installed 1 package in [TIME] + + anyio==4.0.0 + "### + ); + + // Error when `--require-hashes` is provided. + uv_snapshot!(command(&context) + .arg("requirements.txt") + .arg("--require-hashes"), @r###" + success: false + exit_code: 2 + ----- stdout ----- + + ----- stderr ----- + error: In `--require-hashes` mode, all requirement must have a hash, but none were provided for: anyio==4.0.0 + "### + ); + + Ok(()) +} + +/// Omit the version with `--require-hashes`. +#[test] +fn require_hashes_missing_version() -> Result<()> { + let context = TestContext::new("3.12"); + + let requirements_txt = context.temp_dir.child("requirements.txt"); + requirements_txt.write_str( + "anyio --hash=sha256:cfdb2b588b9fc25ede96d8db56ed50848b0b649dca3dd1df0b11f683bb9e0b5f", + )?; + + // Install without error when `--require-hashes` is omitted. + uv_snapshot!(command(&context) + .arg("requirements.txt"), @r###" + success: true + exit_code: 0 + ----- stdout ----- + + ----- stderr ----- + Resolved 1 package in [TIME] + Downloaded 1 package in [TIME] + Installed 1 package in [TIME] + + anyio==4.3.0 + "### + ); + + // Error when `--require-hashes` is provided. + uv_snapshot!(command(&context) + .arg("requirements.txt") + .arg("--require-hashes"), @r###" + success: false + exit_code: 2 + ----- stdout ----- + + ----- stderr ----- + error: In `--require-hashes` mode, all requirement must have their versions pinned with `==`, but found: anyio + "### + ); + + Ok(()) +} + +/// Include the hash for _just_ the wheel with `--no-binary`. +#[test] +fn require_hashes_wheel_no_binary() -> Result<()> { + let context = TestContext::new("3.12"); + + let requirements_txt = context.temp_dir.child("requirements.txt"); + requirements_txt + .write_str("anyio==4.0.0 --hash=sha256:cfdb2b588b9fc25ede96d8db56ed50848b0b649dca3dd1df0b11f683bb9e0b5f")?; + + uv_snapshot!(command(&context) + .arg("requirements.txt") + .arg("--no-binary") + .arg(":all:") + .arg("--require-hashes"), @r###" + success: false + exit_code: 1 + ----- stdout ----- + + ----- stderr ----- + × No solution found when resolving dependencies: + ╰─▶ Because anyio==4.0.0 is unusable because the hash does not match and you require anyio==4.0.0, we can conclude that the requirements are unsatisfiable. + "### + ); + + Ok(()) +} + +/// Include the hash for _just_ the wheel with `--only-binary`. +#[test] +fn require_hashes_wheel_only_binary() -> Result<()> { + let context = TestContext::new("3.12"); + + let requirements_txt = context.temp_dir.child("requirements.txt"); + requirements_txt + .write_str("anyio==4.0.0 --hash=sha256:cfdb2b588b9fc25ede96d8db56ed50848b0b649dca3dd1df0b11f683bb9e0b5f")?; + + uv_snapshot!(command(&context) + .arg("requirements.txt") + .arg("--only-binary") + .arg(":all:") + .arg("--require-hashes"), @r###" + success: true + exit_code: 0 + ----- stdout ----- + + ----- stderr ----- + Resolved 1 package in [TIME] + Downloaded 1 package in [TIME] + Installed 1 package in [TIME] + + anyio==4.0.0 + "### + ); + + Ok(()) +} + +/// Include the hash for _just_ the source distribution with `--no-binary`. +#[test] +fn require_hashes_source_no_binary() -> Result<()> { + let context = TestContext::new("3.12"); + + let requirements_txt = context.temp_dir.child("requirements.txt"); + requirements_txt + .write_str("anyio==4.0.0 --hash=sha256:f7ed51751b2c2add651e5747c891b47e26d2a21be5d32d9311dfe9692f3e5d7a")?; + + uv_snapshot!(command(&context) + .arg("requirements.txt") + .arg("--no-binary") + .arg(":all:") + .arg("--require-hashes"), @r###" + success: true + exit_code: 0 + ----- stdout ----- + + ----- stderr ----- + Resolved 1 package in [TIME] + Downloaded 1 package in [TIME] + Installed 1 package in [TIME] + + anyio==4.0.0 + "### + ); + + Ok(()) +} + +/// Include the hash for _just_ the source distribution, with `--binary-only`. +#[test] +fn require_hashes_source_only_binary() -> Result<()> { + let context = TestContext::new("3.12"); + + let requirements_txt = context.temp_dir.child("requirements.txt"); + requirements_txt + .write_str("anyio==4.0.0 --hash=sha256:f7ed51751b2c2add651e5747c891b47e26d2a21be5d32d9311dfe9692f3e5d7a")?; + + uv_snapshot!(command(&context) + .arg("requirements.txt") + .arg("--only-binary") + .arg(":all:") + .arg("--require-hashes"), @r###" + success: false + exit_code: 1 + ----- stdout ----- + + ----- stderr ----- + × No solution found when resolving dependencies: + ╰─▶ Because anyio==4.0.0 is unusable because no wheels are usable and building from source is disabled and you require anyio==4.0.0, we can conclude that the requirements are unsatisfiable. + "### + ); + + Ok(()) +} + +/// Include the correct hash algorithm, but the wrong digest. +#[test] +fn require_hashes_wrong_digest() -> Result<()> { + let context = TestContext::new("3.12"); + + let requirements_txt = context.temp_dir.child("requirements.txt"); + requirements_txt + .write_str("anyio==4.0.0 --hash=sha256:afdb2b588b9fc25ede96d8db56ed50848b0b649dca3dd1df0b11f683bb9e0b5f")?; + + uv_snapshot!(command(&context) + .arg("requirements.txt") + .arg("--require-hashes"), @r###" + success: false + exit_code: 1 + ----- stdout ----- + + ----- stderr ----- + × No solution found when resolving dependencies: + ╰─▶ Because anyio==4.0.0 is unusable because the hash does not match and you require anyio==4.0.0, we can conclude that the requirements are unsatisfiable. + "### + ); + + Ok(()) +} + +/// Include the correct hash, but the wrong algorithm. +#[test] +fn require_hashes_wrong_algorithm() -> Result<()> { + let context = TestContext::new("3.12"); + + let requirements_txt = context.temp_dir.child("requirements.txt"); + requirements_txt + .write_str("anyio==4.0.0 --hash=sha512:cfdb2b588b9fc25ede96d8db56ed50848b0b649dca3dd1df0b11f683bb9e0b5f")?; + + uv_snapshot!(command(&context) + .arg("requirements.txt") + .arg("--require-hashes"), @r###" + success: false + exit_code: 1 + ----- stdout ----- + + ----- stderr ----- + × No solution found when resolving dependencies: + ╰─▶ Because anyio==4.0.0 is unusable because the hash does not match and you require anyio==4.0.0, we can conclude that the requirements are unsatisfiable. + "### + ); + + Ok(()) +} + +/// Include the hash for a source distribution specified as a direct URL dependency. +#[test] +fn require_hashes_source_url() -> Result<()> { + let context = TestContext::new("3.12"); + + let requirements_txt = context.temp_dir.child("requirements.txt"); + requirements_txt + .write_str("anyio @ https://files.pythonhosted.org/packages/74/17/5075225ee1abbb93cd7fc30a2d343c6a3f5f71cf388f14768a7a38256581/anyio-4.0.0.tar.gz --hash=sha256:f7ed51751b2c2add651e5747c891b47e26d2a21be5d32d9311dfe9692f3e5d7a")?; + + uv_snapshot!(command(&context) + .arg("requirements.txt") + .arg("--require-hashes"), @r###" + success: true + exit_code: 0 + ----- stdout ----- + + ----- stderr ----- + Resolved 1 package in [TIME] + Downloaded 1 package in [TIME] + Installed 1 package in [TIME] + + anyio==4.0.0 (from https://files.pythonhosted.org/packages/74/17/5075225ee1abbb93cd7fc30a2d343c6a3f5f71cf388f14768a7a38256581/anyio-4.0.0.tar.gz) + "### + ); + + // Reinstall with the right hash, and verify that it's reused. + uv_snapshot!(command(&context) + .arg("requirements.txt") + .arg("--reinstall") + .arg("--require-hashes"), @r###" + success: true + exit_code: 0 + ----- stdout ----- + + ----- stderr ----- + Uninstalled 1 package in [TIME] + Installed 1 package in [TIME] + - anyio==4.0.0 (from https://files.pythonhosted.org/packages/74/17/5075225ee1abbb93cd7fc30a2d343c6a3f5f71cf388f14768a7a38256581/anyio-4.0.0.tar.gz) + + anyio==4.0.0 (from https://files.pythonhosted.org/packages/74/17/5075225ee1abbb93cd7fc30a2d343c6a3f5f71cf388f14768a7a38256581/anyio-4.0.0.tar.gz) + "### + ); + + // Reinstall with the wrong hash, and verify that it's rejected despite being cached. + let requirements_txt = context.temp_dir.child("requirements.txt"); + requirements_txt + .write_str("anyio @ https://files.pythonhosted.org/packages/74/17/5075225ee1abbb93cd7fc30a2d343c6a3f5f71cf388f14768a7a38256581/anyio-4.0.0.tar.gz --hash=sha256:a7ed51751b2c2add651e5747c891b47e26d2a21be5d32d9311dfe9692f3e5d7a")?; + + uv_snapshot!(command(&context) + .arg("requirements.txt") + .arg("--reinstall") + .arg("--require-hashes"), @r###" + success: false + exit_code: 2 + ----- stdout ----- + + ----- stderr ----- + error: Failed to download and build: anyio @ https://files.pythonhosted.org/packages/74/17/5075225ee1abbb93cd7fc30a2d343c6a3f5f71cf388f14768a7a38256581/anyio-4.0.0.tar.gz + Caused by: Hash mismatch for anyio @ https://files.pythonhosted.org/packages/74/17/5075225ee1abbb93cd7fc30a2d343c6a3f5f71cf388f14768a7a38256581/anyio-4.0.0.tar.gz + + Expected: + sha256:a7ed51751b2c2add651e5747c891b47e26d2a21be5d32d9311dfe9692f3e5d7a + + Computed: + sha256:f7ed51751b2c2add651e5747c891b47e26d2a21be5d32d9311dfe9692f3e5d7a + "### + ); + + Ok(()) +} + +/// Include the _wrong_ hash for a source distribution specified as a direct URL dependency. +#[test] +fn require_hashes_source_url_mismatch() -> Result<()> { + let context = TestContext::new("3.12"); + + let requirements_txt = context.temp_dir.child("requirements.txt"); + requirements_txt + .write_str("anyio @ https://files.pythonhosted.org/packages/74/17/5075225ee1abbb93cd7fc30a2d343c6a3f5f71cf388f14768a7a38256581/anyio-4.0.0.tar.gz --hash=sha256:a7ed51751b2c2add651e5747c891b47e26d2a21be5d32d9311dfe9692f3e5d7a")?; + + uv_snapshot!(command(&context) + .arg("requirements.txt") + .arg("--require-hashes"), @r###" + success: false + exit_code: 2 + ----- stdout ----- + + ----- stderr ----- + error: Failed to download and build: anyio @ https://files.pythonhosted.org/packages/74/17/5075225ee1abbb93cd7fc30a2d343c6a3f5f71cf388f14768a7a38256581/anyio-4.0.0.tar.gz + Caused by: Hash mismatch for anyio @ https://files.pythonhosted.org/packages/74/17/5075225ee1abbb93cd7fc30a2d343c6a3f5f71cf388f14768a7a38256581/anyio-4.0.0.tar.gz + + Expected: + sha256:a7ed51751b2c2add651e5747c891b47e26d2a21be5d32d9311dfe9692f3e5d7a + + Computed: + sha256:f7ed51751b2c2add651e5747c891b47e26d2a21be5d32d9311dfe9692f3e5d7a + "### + ); + + Ok(()) +} + +/// Include the hash for a built distribution specified as a direct URL dependency. +#[test] +fn require_hashes_wheel_url() -> Result<()> { + let context = TestContext::new("3.12"); + + let requirements_txt = context.temp_dir.child("requirements.txt"); + requirements_txt + .write_str("anyio @ https://files.pythonhosted.org/packages/36/55/ad4de788d84a630656ece71059665e01ca793c04294c463fd84132f40fe6/anyio-4.0.0-py3-none-any.whl --hash=sha256:cfdb2b588b9fc25ede96d8db56ed50848b0b649dca3dd1df0b11f683bb9e0b5f")?; + + uv_snapshot!(command(&context) + .arg("requirements.txt") + .arg("--require-hashes"), @r###" + success: true + exit_code: 0 + ----- stdout ----- + + ----- stderr ----- + Resolved 1 package in [TIME] + Downloaded 1 package in [TIME] + Installed 1 package in [TIME] + + anyio==4.0.0 (from https://files.pythonhosted.org/packages/36/55/ad4de788d84a630656ece71059665e01ca793c04294c463fd84132f40fe6/anyio-4.0.0-py3-none-any.whl) + "### + ); + + // Reinstall with the right hash, and verify that it's reused. + uv_snapshot!(command(&context) + .arg("requirements.txt") + .arg("--reinstall") + .arg("--require-hashes"), @r###" + success: true + exit_code: 0 + ----- stdout ----- + + ----- stderr ----- + Uninstalled 1 package in [TIME] + Installed 1 package in [TIME] + - anyio==4.0.0 (from https://files.pythonhosted.org/packages/36/55/ad4de788d84a630656ece71059665e01ca793c04294c463fd84132f40fe6/anyio-4.0.0-py3-none-any.whl) + + anyio==4.0.0 (from https://files.pythonhosted.org/packages/36/55/ad4de788d84a630656ece71059665e01ca793c04294c463fd84132f40fe6/anyio-4.0.0-py3-none-any.whl) + "### + ); + + // Reinstall with the wrong hash, and verify that it's rejected despite being cached. + let requirements_txt = context.temp_dir.child("requirements.txt"); + requirements_txt + .write_str("anyio @ https://files.pythonhosted.org/packages/36/55/ad4de788d84a630656ece71059665e01ca793c04294c463fd84132f40fe6/anyio-4.0.0-py3-none-any.whl --hash=sha256:afdb2b588b9fc25ede96d8db56ed50848b0b649dca3dd1df0b11f683bb9e0b5f")?; + + uv_snapshot!(command(&context) + .arg("requirements.txt") + .arg("--reinstall") + .arg("--require-hashes"), @r###" + success: false + exit_code: 2 + ----- stdout ----- + + ----- stderr ----- + Resolved 1 package in [TIME] + error: Failed to download distributions + Caused by: Failed to fetch wheel: anyio @ https://files.pythonhosted.org/packages/36/55/ad4de788d84a630656ece71059665e01ca793c04294c463fd84132f40fe6/anyio-4.0.0-py3-none-any.whl + Caused by: Hash mismatch for anyio @ https://files.pythonhosted.org/packages/36/55/ad4de788d84a630656ece71059665e01ca793c04294c463fd84132f40fe6/anyio-4.0.0-py3-none-any.whl + + Expected: + sha256:afdb2b588b9fc25ede96d8db56ed50848b0b649dca3dd1df0b11f683bb9e0b5f + + Computed: + sha256:cfdb2b588b9fc25ede96d8db56ed50848b0b649dca3dd1df0b11f683bb9e0b5f + "### + ); + + // Sync a new dependency and include the wrong hash for anyio. Verify that we reuse anyio + // despite the wrong hash, like pip, since we don't validate hashes for already-installed + // distributions. + let requirements_txt = context.temp_dir.child("requirements.txt"); + requirements_txt + .write_str("anyio==4.0.0 --hash=sha256:afdb2b588b9fc25ede96d8db56ed50848b0b649dca3dd1df0b11f683bb9e0b5f\niniconfig==2.0.0 --hash=sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374")?; + + uv_snapshot!(command(&context) + .arg("requirements.txt") + .arg("--require-hashes"), @r###" + success: true + exit_code: 0 + ----- stdout ----- + + ----- stderr ----- + Resolved 1 package in [TIME] + Downloaded 1 package in [TIME] + Installed 1 package in [TIME] + + iniconfig==2.0.0 + "### + ); + + Ok(()) +} + +/// Include the _wrong_ hash for a built distribution specified as a direct URL dependency. +#[test] +fn require_hashes_wheel_url_mismatch() -> Result<()> { + let context = TestContext::new("3.12"); + + let requirements_txt = context.temp_dir.child("requirements.txt"); + requirements_txt + .write_str("anyio @ https://files.pythonhosted.org/packages/36/55/ad4de788d84a630656ece71059665e01ca793c04294c463fd84132f40fe6/anyio-4.0.0-py3-none-any.whl --hash=sha256:afdb2b588b9fc25ede96d8db56ed50848b0b649dca3dd1df0b11f683bb9e0b5f")?; + + uv_snapshot!(command(&context) + .arg("requirements.txt") + .arg("--require-hashes"), @r###" + success: false + exit_code: 2 + ----- stdout ----- + + ----- stderr ----- + Resolved 1 package in [TIME] + error: Failed to download distributions + Caused by: Failed to fetch wheel: anyio @ https://files.pythonhosted.org/packages/36/55/ad4de788d84a630656ece71059665e01ca793c04294c463fd84132f40fe6/anyio-4.0.0-py3-none-any.whl + Caused by: Hash mismatch for anyio @ https://files.pythonhosted.org/packages/36/55/ad4de788d84a630656ece71059665e01ca793c04294c463fd84132f40fe6/anyio-4.0.0-py3-none-any.whl + + Expected: + sha256:afdb2b588b9fc25ede96d8db56ed50848b0b649dca3dd1df0b11f683bb9e0b5f + + Computed: + sha256:cfdb2b588b9fc25ede96d8db56ed50848b0b649dca3dd1df0b11f683bb9e0b5f + "### + ); + + Ok(()) +} + +/// Reject Git dependencies when `--require-hashes` is provided. +#[test] +fn require_hashes_git() -> Result<()> { + let context = TestContext::new("3.12"); + + let requirements_txt = context.temp_dir.child("requirements.txt"); + requirements_txt + .write_str("anyio @ git+https://github.com/agronholm/anyio@4a23745badf5bf5ef7928f1e346e9986bd696d82 --hash=sha256:f7ed51751b2c2add651e5747c891b47e26d2a21be5d32d9311dfe9692f3e5d7a")?; + + uv_snapshot!(command(&context) + .arg("requirements.txt") + .arg("--require-hashes"), @r###" + success: false + exit_code: 2 + ----- stdout ----- + + ----- stderr ----- + error: Failed to download and build: anyio @ git+https://github.com/agronholm/anyio@4a23745badf5bf5ef7928f1e346e9986bd696d82 + Caused by: Hash-checking is not supported for Git repositories: anyio @ git+https://github.com/agronholm/anyio@4a23745badf5bf5ef7928f1e346e9986bd696d82 + "### + ); + + Ok(()) +} + +/// Reject local directory dependencies when `--require-hashes` is provided. +#[test] +fn require_hashes_source_tree() -> Result<()> { + let context = TestContext::new("3.12"); + + let requirements_txt = context.temp_dir.child("requirements.txt"); + requirements_txt.write_str(&format!( + "black @ {} --hash=sha256:f7ed51751b2c2add651e5747c891b47e26d2a21be5d32d9311dfe9692f3e5d7a", + context + .workspace_root + .join("scripts/packages/black_editable") + .display() + ))?; + + uv_snapshot!(context.filters(), command(&context) + .arg("requirements.txt") + .arg("--require-hashes"), @r###" + success: false + exit_code: 2 + ----- stdout ----- + + ----- stderr ----- + error: Failed to build: black @ file://[WORKSPACE]/scripts/packages/black_editable + Caused by: Hash-checking is not supported for local directories: black @ file://[WORKSPACE]/scripts/packages/black_editable + "### + ); + + Ok(()) +} + +/// Include the hash for _just_ the wheel with `--only-binary`. +#[test] +fn require_hashes_re_download() -> Result<()> { + let context = TestContext::new("3.12"); + + let requirements_txt = context.temp_dir.child("requirements.txt"); + requirements_txt.write_str("anyio==4.0.0")?; + + // Install without `--require-hashes`. + uv_snapshot!(command(&context) + .arg("requirements.txt"), @r###" + success: true + exit_code: 0 + ----- stdout ----- + + ----- stderr ----- + Resolved 1 package in [TIME] + Downloaded 1 package in [TIME] + Installed 1 package in [TIME] + + anyio==4.0.0 + "### + ); + + // Reinstall with `--require-hashes`, and the wrong hash. + let requirements_txt = context.temp_dir.child("requirements.txt"); + requirements_txt + .write_str("anyio==4.0.0 --hash=sha256:afdb2b588b9fc25ede96d8db56ed50848b0b649dca3dd1df0b11f683bb9e0b5f")?; + + uv_snapshot!(command(&context) + .arg("requirements.txt") + .arg("--reinstall") + .arg("--require-hashes"), @r###" + success: false + exit_code: 1 + ----- stdout ----- + + ----- stderr ----- + × No solution found when resolving dependencies: + ╰─▶ Because anyio==4.0.0 is unusable because the hash does not match and you require anyio==4.0.0, we can conclude that the requirements are unsatisfiable. + "### + ); + + // Reinstall with `--require-hashes`, and the right hash. + let requirements_txt = context.temp_dir.child("requirements.txt"); + requirements_txt + .write_str("anyio==4.0.0 --hash=sha256:cfdb2b588b9fc25ede96d8db56ed50848b0b649dca3dd1df0b11f683bb9e0b5f")?; + + uv_snapshot!(command(&context) + .arg("requirements.txt") + .arg("--reinstall") + .arg("--require-hashes"), @r###" + success: true + exit_code: 0 + ----- stdout ----- + + ----- stderr ----- + Resolved 1 package in [TIME] + Downloaded 1 package in [TIME] + Uninstalled 1 package in [TIME] + Installed 1 package in [TIME] + - anyio==4.0.0 + + anyio==4.0.0 + "### + ); + + Ok(()) +} + +/// Include the hash for a built distribution specified as a local path dependency. +#[test] +fn require_hashes_wheel_path() -> Result<()> { + let context = TestContext::new("3.12"); + + let requirements_txt = context.temp_dir.child("requirements.txt"); + requirements_txt.write_str(&format!( + "tqdm @ {} --hash=sha256:a34996d4bd5abb2336e14ff0a2d22b92cfd0f0ed344e6883041ce01953276a13", + context + .workspace_root + .join("scripts/links/tqdm-1000.0.0-py3-none-any.whl") + .display() + ))?; + + uv_snapshot!(context.filters(), command(&context) + .arg("requirements.txt") + .arg("--require-hashes"), @r###" + success: true + exit_code: 0 + ----- stdout ----- + + ----- stderr ----- + Resolved 1 package in [TIME] + Downloaded 1 package in [TIME] + Installed 1 package in [TIME] + + tqdm==1000.0.0 (from file://[WORKSPACE]/scripts/links/tqdm-1000.0.0-py3-none-any.whl) + "### + ); + + Ok(()) +} + +/// Include the _wrong_ hash for a built distribution specified as a local path dependency. +#[test] +fn require_hashes_wheel_path_mismatch() -> Result<()> { + let context = TestContext::new("3.12"); + + let requirements_txt = context.temp_dir.child("requirements.txt"); + requirements_txt.write_str(&format!( + "tqdm @ {} --hash=sha256:cfdb2b588b9fc25ede96d8db56ed50848b0b649dca3dd1df0b11f683bb9e0b5f", + context + .workspace_root + .join("scripts/links/tqdm-1000.0.0-py3-none-any.whl") + .display() + ))?; + + uv_snapshot!(context.filters(), command(&context) + .arg("requirements.txt") + .arg("--require-hashes"), @r###" + success: false + exit_code: 2 + ----- stdout ----- + + ----- stderr ----- + Resolved 1 package in [TIME] + error: Failed to download distributions + Caused by: Failed to fetch wheel: tqdm @ file://[WORKSPACE]/scripts/links/tqdm-1000.0.0-py3-none-any.whl + Caused by: Hash mismatch for tqdm @ file://[WORKSPACE]/scripts/links/tqdm-1000.0.0-py3-none-any.whl + + Expected: + sha256:cfdb2b588b9fc25ede96d8db56ed50848b0b649dca3dd1df0b11f683bb9e0b5f + + Computed: + sha256:a34996d4bd5abb2336e14ff0a2d22b92cfd0f0ed344e6883041ce01953276a13 + "### + ); + + Ok(()) +} + +/// Include the hash for a source distribution specified as a local path dependency. +#[test] +fn require_hashes_source_path() -> Result<()> { + let context = TestContext::new("3.12"); + + let requirements_txt = context.temp_dir.child("requirements.txt"); + requirements_txt.write_str(&format!( + "tqdm @ {} --hash=sha256:89fa05cffa7f457658373b85de302d24d0c205ceda2819a8739e324b75e9430b", + context + .workspace_root + .join("scripts/links/tqdm-999.0.0.tar.gz") + .display() + ))?; + + uv_snapshot!(context.filters(), command(&context) + .arg("requirements.txt") + .arg("--require-hashes"), @r###" + success: true + exit_code: 0 + ----- stdout ----- + + ----- stderr ----- + Resolved 1 package in [TIME] + Downloaded 1 package in [TIME] + Installed 1 package in [TIME] + + tqdm==999.0.0 (from file://[WORKSPACE]/scripts/links/tqdm-999.0.0.tar.gz) + "### + ); + + Ok(()) +} + +/// Include the _wrong_ hash for a source distribution specified as a local path dependency. +#[test] +fn require_hashes_source_path_mismatch() -> Result<()> { + let context = TestContext::new("3.12"); + + let requirements_txt = context.temp_dir.child("requirements.txt"); + requirements_txt.write_str(&format!( + "tqdm @ {} --hash=sha256:cfdb2b588b9fc25ede96d8db56ed50848b0b649dca3dd1df0b11f683bb9e0b5f", + context + .workspace_root + .join("scripts/links/tqdm-999.0.0.tar.gz") + .display() + ))?; + + uv_snapshot!(context.filters(), command(&context) + .arg("requirements.txt") + .arg("--require-hashes"), @r###" + success: false + exit_code: 2 + ----- stdout ----- + + ----- stderr ----- + error: Failed to build: tqdm @ file://[WORKSPACE]/scripts/links/tqdm-999.0.0.tar.gz + Caused by: Hash mismatch for tqdm @ file://[WORKSPACE]/scripts/links/tqdm-999.0.0.tar.gz + + Expected: + sha256:cfdb2b588b9fc25ede96d8db56ed50848b0b649dca3dd1df0b11f683bb9e0b5f + + Computed: + sha256:89fa05cffa7f457658373b85de302d24d0c205ceda2819a8739e324b75e9430b + "### + ); + + Ok(()) +} + +/// `--require-hashes` isn't supported for unnamed requirements (yet). +#[test] +fn require_hashes_unnamed() -> Result<()> { + let context = TestContext::new("3.12"); + + let requirements_txt = context.temp_dir.child("requirements.txt"); + requirements_txt + .write_str("https://foo.com --hash=sha256:cfdb2b588b9fc25ede96d8db56ed50848b0b649dca3dd1df0b11f683bb9e0b5f")?; + + uv_snapshot!(command(&context) + .arg("requirements.txt") + .arg("--require-hashes"), @r###" + success: false + exit_code: 2 + ----- stdout ----- + + ----- stderr ----- + error: Unnamed requirements are not supported with `--require-hashes` + "### + ); + + Ok(()) +} + +/// We allow `--require-hashes` for editables, as long as no dependencies are included. +#[test] +fn require_hashes_editable() -> Result<()> { + let context = TestContext::new("3.12"); + + let requirements_txt = context.temp_dir.child("requirements.txt"); + requirements_txt.write_str(&indoc::formatdoc! {r" + -e file://{workspace_root}/scripts/packages/black_editable[d] + ", + workspace_root = context.workspace_root.simplified_display(), + })?; + + // Install the editable packages. + uv_snapshot!(context.filters(), command(&context) + .arg(requirements_txt.path()) + .arg("--require-hashes"), @r###" + success: true + exit_code: 0 + ----- stdout ----- + + ----- stderr ----- + Built 1 editable in [TIME] + Installed 1 package in [TIME] + + black==0.1.0 (from file://[WORKSPACE]/scripts/packages/black_editable) + "### + ); + + Ok(()) +} + +/// If a dependency is repeated, the hash should be required for both instances. +#[test] +fn require_hashes_repeated_dependency() -> Result<()> { + let context = TestContext::new("3.12"); + + let requirements_txt = context.temp_dir.child("requirements.txt"); + requirements_txt + .write_str("anyio==4.0.0 --hash=sha256:f7ed51751b2c2add651e5747c891b47e26d2a21be5d32d9311dfe9692f3e5d7a\nanyio")?; + + uv_snapshot!(command(&context) + .arg("requirements.txt") + .arg("--require-hashes"), @r###" + success: false + exit_code: 2 + ----- stdout ----- + + ----- stderr ----- + error: In `--require-hashes` mode, all requirement must have their versions pinned with `==`, but found: anyio + "### + ); + + // Reverse the order. + let requirements_txt = context.temp_dir.child("requirements.txt"); + requirements_txt + .write_str("anyio\nanyio==4.0.0 --hash=sha256:f7ed51751b2c2add651e5747c891b47e26d2a21be5d32d9311dfe9692f3e5d7a")?; + + uv_snapshot!(command(&context) + .arg("requirements.txt") + .arg("--require-hashes"), @r###" + success: false + exit_code: 2 + ----- stdout ----- + + ----- stderr ----- + error: In `--require-hashes` mode, all requirement must have their versions pinned with `==`, but found: anyio + "### + ); + + Ok(()) +} + +/// If a dependency is repeated, use the last hash provided. pip seems to use the _first_ hash. +#[test] +fn require_hashes_repeated_hash() -> Result<()> { + let context = TestContext::new("3.12"); + + // Use the same hash in both cases. + let requirements_txt = context.temp_dir.child("requirements.txt"); + requirements_txt + .write_str(indoc::indoc! { r" + anyio @ https://files.pythonhosted.org/packages/36/55/ad4de788d84a630656ece71059665e01ca793c04294c463fd84132f40fe6/anyio-4.0.0-py3-none-any.whl --hash=sha256:cfdb2b588b9fc25ede96d8db56ed50848b0b649dca3dd1df0b11f683bb9e0b5f + anyio @ https://files.pythonhosted.org/packages/36/55/ad4de788d84a630656ece71059665e01ca793c04294c463fd84132f40fe6/anyio-4.0.0-py3-none-any.whl --hash=sha256:cfdb2b588b9fc25ede96d8db56ed50848b0b649dca3dd1df0b11f683bb9e0b5f + " })?; + + uv_snapshot!(command(&context) + .arg("requirements.txt") + .arg("--require-hashes"), @r###" + success: true + exit_code: 0 + ----- stdout ----- + + ----- stderr ----- + Resolved 1 package in [TIME] + Downloaded 1 package in [TIME] + Installed 1 package in [TIME] + + anyio==4.0.0 (from https://files.pythonhosted.org/packages/36/55/ad4de788d84a630656ece71059665e01ca793c04294c463fd84132f40fe6/anyio-4.0.0-py3-none-any.whl) + "### + ); + + // Use a different hash, but both are correct. + let requirements_txt = context.temp_dir.child("requirements.txt"); + requirements_txt + .write_str(indoc::indoc! { r" + anyio @ https://files.pythonhosted.org/packages/36/55/ad4de788d84a630656ece71059665e01ca793c04294c463fd84132f40fe6/anyio-4.0.0-py3-none-any.whl --hash=sha256:cfdb2b588b9fc25ede96d8db56ed50848b0b649dca3dd1df0b11f683bb9e0b5f + anyio @ https://files.pythonhosted.org/packages/36/55/ad4de788d84a630656ece71059665e01ca793c04294c463fd84132f40fe6/anyio-4.0.0-py3-none-any.whl --hash=sha512:f30761c1e8725b49c498273b90dba4b05c0fd157811994c806183062cb6647e773364ce45f0e1ff0b10e32fe6d0232ea5ad39476ccf37109d6b49603a09c11c2 + " })?; + + uv_snapshot!(command(&context) + .arg("requirements.txt") + .arg("--require-hashes") + .arg("--reinstall"), @r###" + success: true + exit_code: 0 + ----- stdout ----- + + ----- stderr ----- + Resolved 1 package in [TIME] + Downloaded 1 package in [TIME] + Uninstalled 1 package in [TIME] + Installed 1 package in [TIME] + - anyio==4.0.0 (from https://files.pythonhosted.org/packages/36/55/ad4de788d84a630656ece71059665e01ca793c04294c463fd84132f40fe6/anyio-4.0.0-py3-none-any.whl) + + anyio==4.0.0 (from https://files.pythonhosted.org/packages/36/55/ad4de788d84a630656ece71059665e01ca793c04294c463fd84132f40fe6/anyio-4.0.0-py3-none-any.whl) + "### + ); + + // Use a different hash. The first hash is wrong, but that's fine, since we use the last hash. + let requirements_txt = context.temp_dir.child("requirements.txt"); + requirements_txt + .write_str(indoc::indoc! { r" + anyio @ https://files.pythonhosted.org/packages/36/55/ad4de788d84a630656ece71059665e01ca793c04294c463fd84132f40fe6/anyio-4.0.0-py3-none-any.whl --hash=sha256:a7ed51751b2c2add651e5747c891b47e26d2a21be5d32d9311dfe9692f3e5d7a + anyio @ https://files.pythonhosted.org/packages/36/55/ad4de788d84a630656ece71059665e01ca793c04294c463fd84132f40fe6/anyio-4.0.0-py3-none-any.whl --hash=md5:420d85e19168705cdf0223621b18831a + " })?; + + uv_snapshot!(command(&context) + .arg("requirements.txt") + .arg("--require-hashes") + .arg("--reinstall"), @r###" + success: true + exit_code: 0 + ----- stdout ----- + + ----- stderr ----- + Resolved 1 package in [TIME] + Downloaded 1 package in [TIME] + Uninstalled 1 package in [TIME] + Installed 1 package in [TIME] + - anyio==4.0.0 (from https://files.pythonhosted.org/packages/36/55/ad4de788d84a630656ece71059665e01ca793c04294c463fd84132f40fe6/anyio-4.0.0-py3-none-any.whl) + + anyio==4.0.0 (from https://files.pythonhosted.org/packages/36/55/ad4de788d84a630656ece71059665e01ca793c04294c463fd84132f40fe6/anyio-4.0.0-py3-none-any.whl) + "### + ); + + // Use a different hash. The second hash is wrong. This should fail, since we use the last hash. + let requirements_txt = context.temp_dir.child("requirements.txt"); + requirements_txt + .write_str(indoc::indoc! { r" + anyio @ https://files.pythonhosted.org/packages/36/55/ad4de788d84a630656ece71059665e01ca793c04294c463fd84132f40fe6/anyio-4.0.0-py3-none-any.whl --hash=sha256:f7ed51751b2c2add651e5747c891b47e26d2a21be5d32d9311dfe9692f3e5d7a + anyio @ https://files.pythonhosted.org/packages/36/55/ad4de788d84a630656ece71059665e01ca793c04294c463fd84132f40fe6/anyio-4.0.0-py3-none-any.whl --hash=md5:520d85e19168705cdf0223621b18831a + " })?; + + uv_snapshot!(command(&context) + .arg("requirements.txt") + .arg("--require-hashes") + .arg("--reinstall"), @r###" + success: false + exit_code: 2 + ----- stdout ----- + + ----- stderr ----- + Resolved 1 package in [TIME] + error: Failed to download distributions + Caused by: Failed to fetch wheel: anyio @ https://files.pythonhosted.org/packages/36/55/ad4de788d84a630656ece71059665e01ca793c04294c463fd84132f40fe6/anyio-4.0.0-py3-none-any.whl + Caused by: Hash mismatch for anyio @ https://files.pythonhosted.org/packages/36/55/ad4de788d84a630656ece71059665e01ca793c04294c463fd84132f40fe6/anyio-4.0.0-py3-none-any.whl + + Expected: + md5:520d85e19168705cdf0223621b18831a + + Computed: + md5:420d85e19168705cdf0223621b18831a + "### + ); + + Ok(()) +} + +/// If a dependency is repeated, the hash should be required for both instances. +#[test] +fn require_hashes_at_least_one() -> Result<()> { + let context = TestContext::new("3.12"); + + // Request `anyio` with a `sha256` hash. + let requirements_txt = context.temp_dir.child("requirements.txt"); + requirements_txt + .write_str("anyio==4.0.0 --hash=sha256:f7ed51751b2c2add651e5747c891b47e26d2a21be5d32d9311dfe9692f3e5d7a")?; + + uv_snapshot!(command(&context) + .arg("requirements.txt") + .arg("--require-hashes"), @r###" + success: true + exit_code: 0 + ----- stdout ----- + + ----- stderr ----- + Resolved 1 package in [TIME] + Downloaded 1 package in [TIME] + Installed 1 package in [TIME] + + anyio==4.0.0 + "### + ); + + // Reinstall, requesting both `sha256` and `sha512`. We should reinstall from the cache, since + // at least one hash matches. + let requirements_txt = context.temp_dir.child("requirements.txt"); + requirements_txt + .write_str("anyio==4.0.0 --hash=sha256:f7ed51751b2c2add651e5747c891b47e26d2a21be5d32d9311dfe9692f3e5d7a --hash=md5:420d85e19168705cdf0223621b18831a")?; + + uv_snapshot!(command(&context) + .arg("requirements.txt") + .arg("--reinstall") + .arg("--require-hashes"), @r###" + success: true + exit_code: 0 + ----- stdout ----- + + ----- stderr ----- + Uninstalled 1 package in [TIME] + Installed 1 package in [TIME] + - anyio==4.0.0 + + anyio==4.0.0 + "### + ); + + // This should be true even if the second hash is wrong. + let requirements_txt = context.temp_dir.child("requirements.txt"); + requirements_txt + .write_str("anyio==4.0.0 --hash=sha256:f7ed51751b2c2add651e5747c891b47e26d2a21be5d32d9311dfe9692f3e5d7a --hash=md5:1234")?; + + uv_snapshot!(command(&context) + .arg("requirements.txt") + .arg("--reinstall") + .arg("--require-hashes"), @r###" + success: true + exit_code: 0 + ----- stdout ----- + + ----- stderr ----- + Uninstalled 1 package in [TIME] + Installed 1 package in [TIME] + - anyio==4.0.0 + + anyio==4.0.0 + "### + ); + + Ok(()) +} + +/// Using `--find-links`, but the registry doesn't provide us with a hash. +#[test] +fn require_hashes_find_links_no_hash() -> Result<()> { + let context = TestContext::new("3.12"); + + let requirements_txt = context.temp_dir.child("requirements.txt"); + requirements_txt + .write_str("example-a-961b4c22==1.0.0 --hash=sha256:5d69f0b590514103234f0c3526563856f04d044d8d0ea1073a843ae429b3187e")?; + + uv_snapshot!(command(&context) + .arg("requirements.txt") + .arg("--require-hashes") + .arg("--find-links") + .arg("https://raw.githubusercontent.com/astral-test/astral-test-hash/main/no-hash/simple-html/example-a-961b4c22/index.html"), @r###" + success: false + exit_code: 1 + ----- stdout ----- + + ----- stderr ----- + × No solution found when resolving dependencies: + ╰─▶ Because example-a-961b4c22==1.0.0 is unusable because it has no hash and you require example-a-961b4c22==1.0.0, we can conclude that the requirements are unsatisfiable. + "### + ); + + Ok(()) +} + +/// Using `--find-links`, and the registry serves us a correct hash. +#[test] +fn require_hashes_find_links_valid_hash() -> Result<()> { + let context = TestContext::new("3.12"); + + let requirements_txt = context.temp_dir.child("requirements.txt"); + requirements_txt + .write_str("example-a-961b4c22==1.0.0 --hash=sha256:5d69f0b590514103234f0c3526563856f04d044d8d0ea1073a843ae429b3187e")?; + + uv_snapshot!(command(&context) + .arg("requirements.txt") + .arg("--require-hashes") + .arg("--find-links") + .arg("https://raw.githubusercontent.com/astral-test/astral-test-hash/main/valid-hash/simple-html/example-a-961b4c22/index.html"), @r###" + success: true + exit_code: 0 + ----- stdout ----- + + ----- stderr ----- + Resolved 1 package in [TIME] + Downloaded 1 package in [TIME] + Installed 1 package in [TIME] + + example-a-961b4c22==1.0.0 + "### + ); + + Ok(()) +} + +/// Using `--find-links`, and the registry serves us an incorrect hash. +#[test] +fn require_hashes_find_links_invalid_hash() -> Result<()> { + let context = TestContext::new("3.12"); + + // First, request some other hash. + let requirements_txt = context.temp_dir.child("requirements.txt"); + requirements_txt.write_str("example-a-961b4c22==1.0.0 --hash=sha256:123")?; + + uv_snapshot!(command(&context) + .arg("requirements.txt") + .arg("--reinstall") + .arg("--require-hashes") + .arg("--find-links") + .arg("https://raw.githubusercontent.com/astral-test/astral-test-hash/main/invalid-hash/simple-html/example-a-961b4c22/index.html"), @r###" + success: false + exit_code: 1 + ----- stdout ----- + + ----- stderr ----- + × No solution found when resolving dependencies: + ╰─▶ Because example-a-961b4c22==1.0.0 is unusable because the hash does not match and you require example-a-961b4c22==1.0.0, we can conclude that the requirements are unsatisfiable. + "### + ); + + // Second, request the invalid hash, that the registry _thinks_ is correct. We should reject it. + let requirements_txt = context.temp_dir.child("requirements.txt"); + requirements_txt + .write_str("example-a-961b4c22==1.0.0 --hash=sha256:8838f9d005ff0432b258ba648d9cabb1cbdf06ac29d14f788b02edae544032ea")?; + + uv_snapshot!(command(&context) + .arg("requirements.txt") + .arg("--reinstall") + .arg("--require-hashes") + .arg("--find-links") + .arg("https://raw.githubusercontent.com/astral-test/astral-test-hash/main/invalid-hash/simple-html/example-a-961b4c22/index.html"), @r###" + success: false + exit_code: 2 + ----- stdout ----- + + ----- stderr ----- + Resolved 1 package in [TIME] + error: Failed to download distributions + Caused by: Failed to fetch wheel: example-a-961b4c22==1.0.0 + Caused by: Hash mismatch for example-a-961b4c22==1.0.0 + + Expected: + sha256:8838f9d005ff0432b258ba648d9cabb1cbdf06ac29d14f788b02edae544032ea + + Computed: + sha256:5d69f0b590514103234f0c3526563856f04d044d8d0ea1073a843ae429b3187e + "### + ); + + // Third, request the correct hash, that the registry _thinks_ is correct. We should accept + // it, since it's already cached under this hash. + let requirements_txt = context.temp_dir.child("requirements.txt"); + requirements_txt + .write_str("example-a-961b4c22==1.0.0 --hash=sha256:5d69f0b590514103234f0c3526563856f04d044d8d0ea1073a843ae429b3187e")?; + + uv_snapshot!(command(&context) + .arg("requirements.txt") + .arg("--reinstall") + .arg("--require-hashes") + .arg("--find-links") + .arg("https://raw.githubusercontent.com/astral-test/astral-test-hash/main/invalid-hash/simple-html/example-a-961b4c22/index.html"), @r###" + success: true + exit_code: 0 + ----- stdout ----- + + ----- stderr ----- + Installed 1 package in [TIME] + + example-a-961b4c22==1.0.0 + "### + ); + + // Fourth, request the correct hash, that the registry _thinks_ is correct, but without the + // cache. We _should_ accept it, but we currently don't. + let requirements_txt = context.temp_dir.child("requirements.txt"); + requirements_txt + .write_str("example-a-961b4c22==1.0.0 --hash=sha256:5d69f0b590514103234f0c3526563856f04d044d8d0ea1073a843ae429b3187e")?; + + uv_snapshot!(command(&context) + .arg("requirements.txt") + .arg("--refresh") + .arg("--reinstall") + .arg("--require-hashes") + .arg("--find-links") + .arg("https://raw.githubusercontent.com/astral-test/astral-test-hash/main/invalid-hash/simple-html/example-a-961b4c22/index.html"), @r###" + success: false + exit_code: 1 + ----- stdout ----- + + ----- stderr ----- + × No solution found when resolving dependencies: + ╰─▶ Because example-a-961b4c22==1.0.0 is unusable because the hash does not match and you require example-a-961b4c22==1.0.0, we can conclude that the requirements are unsatisfiable. + "### + ); + + // Finally, request the correct hash, along with the incorrect hash for the source distribution. + // Resolution will fail, since the incorrect hash matches the registry's hash. + let requirements_txt = context.temp_dir.child("requirements.txt"); + requirements_txt + .write_str("example-a-961b4c22==1.0.0 --hash=sha256:5d69f0b590514103234f0c3526563856f04d044d8d0ea1073a843ae429b3187e --hash=sha256:a3cf07a05aac526131a2e8b6e4375ee6c6eaac8add05b88035e960ac6cd999ee")?; + + uv_snapshot!(command(&context) + .arg("requirements.txt") + .arg("--refresh") + .arg("--reinstall") + .arg("--require-hashes") + .arg("--find-links") + .arg("https://raw.githubusercontent.com/astral-test/astral-test-hash/main/invalid-hash/simple-html/example-a-961b4c22/index.html"), @r###" + success: false + exit_code: 2 + ----- stdout ----- + + ----- stderr ----- + Resolved 1 package in [TIME] + error: Failed to download distributions + Caused by: Failed to fetch wheel: example-a-961b4c22==1.0.0 + Caused by: Hash mismatch for example-a-961b4c22==1.0.0 + + Expected: + sha256:5d69f0b590514103234f0c3526563856f04d044d8d0ea1073a843ae429b3187e + sha256:a3cf07a05aac526131a2e8b6e4375ee6c6eaac8add05b88035e960ac6cd999ee + + Computed: + sha256:294e788dbe500fdc39e8b88e82652ab67409a1dc9dd06543d0fe0ae31b713eb3 + "### + ); + + Ok(()) +} + +/// Using `--index-url`, but the registry doesn't provide us with a hash. +#[test] +fn require_hashes_registry_no_hash() -> Result<()> { + let context = TestContext::new("3.12"); + + let requirements_txt = context.temp_dir.child("requirements.txt"); + requirements_txt + .write_str("example-a-961b4c22==1.0.0 --hash=sha256:5d69f0b590514103234f0c3526563856f04d044d8d0ea1073a843ae429b3187e")?; + + uv_snapshot!(command(&context) + .arg("requirements.txt") + .arg("--require-hashes") + .arg("--index-url") + .arg("https://astral-test.github.io/astral-test-hash/no-hash/simple-html/"), @r###" + success: false + exit_code: 1 + ----- stdout ----- + + ----- stderr ----- + × No solution found when resolving dependencies: + ╰─▶ Because example-a-961b4c22==1.0.0 is unusable because it has no hash and you require example-a-961b4c22==1.0.0, we can conclude that the requirements are unsatisfiable. + "### + ); + + Ok(()) +} + +/// Using `--index-url`, and the registry serves us a correct hash. +#[test] +fn require_hashes_registry_valid_hash() -> Result<()> { + let context = TestContext::new("3.12"); + + let requirements_txt = context.temp_dir.child("requirements.txt"); + requirements_txt + .write_str("example-a-961b4c22==1.0.0 --hash=sha256:5d69f0b590514103234f0c3526563856f04d044d8d0ea1073a843ae429b3187e")?; + + uv_snapshot!(command(&context) + .arg("requirements.txt") + .arg("--require-hashes") + .arg("--find-links") + .arg("https://astral-test.github.io/astral-test-hash/valid-hash/simple-html/"), @r###" + success: false + exit_code: 1 + ----- stdout ----- + + ----- stderr ----- + × No solution found when resolving dependencies: + ╰─▶ Because example-a-961b4c22==1.0.0 was not found in the package registry and you require example-a-961b4c22==1.0.0, we can conclude that the requirements are unsatisfiable. + "### + ); + + Ok(()) +} + +/// Using `--index-url`, and the registry serves us an incorrect hash. +#[test] +fn require_hashes_registry_invalid_hash() -> Result<()> { + let context = TestContext::new("3.12"); + + // First, request some other hash. + let requirements_txt = context.temp_dir.child("requirements.txt"); + requirements_txt.write_str("example-a-961b4c22==1.0.0 --hash=sha256:123")?; + + uv_snapshot!(command(&context) + .arg("requirements.txt") + .arg("--reinstall") + .arg("--require-hashes") + .arg("--index-url") + .arg("https://astral-test.github.io/astral-test-hash/invalid-hash/simple-html/"), @r###" + success: false + exit_code: 1 + ----- stdout ----- + + ----- stderr ----- + × No solution found when resolving dependencies: + ╰─▶ Because example-a-961b4c22==1.0.0 is unusable because the hash does not match and you require example-a-961b4c22==1.0.0, we can conclude that the requirements are unsatisfiable. + "### + ); + + // Second, request the invalid hash, that the registry _thinks_ is correct. We should reject it. + let requirements_txt = context.temp_dir.child("requirements.txt"); + requirements_txt + .write_str("example-a-961b4c22==1.0.0 --hash=sha256:8838f9d005ff0432b258ba648d9cabb1cbdf06ac29d14f788b02edae544032ea")?; + + uv_snapshot!(command(&context) + .arg("requirements.txt") + .arg("--reinstall") + .arg("--require-hashes") + .arg("--index-url") + .arg("https://astral-test.github.io/astral-test-hash/invalid-hash/simple-html/"), @r###" + success: false + exit_code: 2 + ----- stdout ----- + + ----- stderr ----- + Resolved 1 package in [TIME] + error: Failed to download distributions + Caused by: Failed to fetch wheel: example-a-961b4c22==1.0.0 + Caused by: Hash mismatch for example-a-961b4c22==1.0.0 + + Expected: + sha256:8838f9d005ff0432b258ba648d9cabb1cbdf06ac29d14f788b02edae544032ea + + Computed: + sha256:5d69f0b590514103234f0c3526563856f04d044d8d0ea1073a843ae429b3187e + "### + ); + + // Third, request the correct hash, that the registry _thinks_ is correct. We should accept + // it, since it's already cached under this hash. + let requirements_txt = context.temp_dir.child("requirements.txt"); + requirements_txt + .write_str("example-a-961b4c22==1.0.0 --hash=sha256:5d69f0b590514103234f0c3526563856f04d044d8d0ea1073a843ae429b3187e")?; + + uv_snapshot!(command(&context) + .arg("requirements.txt") + .arg("--reinstall") + .arg("--require-hashes") + .arg("--index-url") + .arg("https://astral-test.github.io/astral-test-hash/invalid-hash/simple-html/"), @r###" + success: true + exit_code: 0 + ----- stdout ----- + + ----- stderr ----- + Installed 1 package in [TIME] + + example-a-961b4c22==1.0.0 + "### + ); + + // Fourth, request the correct hash, that the registry _thinks_ is correct, but without the + // cache. We _should_ accept it, but we currently don't. + let requirements_txt = context.temp_dir.child("requirements.txt"); + requirements_txt + .write_str("example-a-961b4c22==1.0.0 --hash=sha256:5d69f0b590514103234f0c3526563856f04d044d8d0ea1073a843ae429b3187e")?; + + uv_snapshot!(command(&context) + .arg("requirements.txt") + .arg("--refresh") + .arg("--reinstall") + .arg("--require-hashes") + .arg("--index-url") + .arg("https://astral-test.github.io/astral-test-hash/invalid-hash/simple-html/"), @r###" + success: false + exit_code: 1 + ----- stdout ----- + + ----- stderr ----- + × No solution found when resolving dependencies: + ╰─▶ Because example-a-961b4c22==1.0.0 is unusable because the hash does not match and you require example-a-961b4c22==1.0.0, we can conclude that the requirements are unsatisfiable. + "### + ); + + // Finally, request the correct hash, along with the incorrect hash for the source distribution. + // Resolution will fail, since the incorrect hash matches the registry's hash. + let requirements_txt = context.temp_dir.child("requirements.txt"); + requirements_txt + .write_str("example-a-961b4c22==1.0.0 --hash=sha256:5d69f0b590514103234f0c3526563856f04d044d8d0ea1073a843ae429b3187e --hash=sha256:a3cf07a05aac526131a2e8b6e4375ee6c6eaac8add05b88035e960ac6cd999ee")?; + + uv_snapshot!(command(&context) + .arg("requirements.txt") + .arg("--refresh") + .arg("--reinstall") + .arg("--require-hashes") + .arg("--index-url") + .arg("https://astral-test.github.io/astral-test-hash/invalid-hash/simple-html/"), @r###" + success: false + exit_code: 2 + ----- stdout ----- + + ----- stderr ----- + Resolved 1 package in [TIME] + error: Failed to download distributions + Caused by: Failed to fetch wheel: example-a-961b4c22==1.0.0 + Caused by: Hash mismatch for example-a-961b4c22==1.0.0 + + Expected: + sha256:5d69f0b590514103234f0c3526563856f04d044d8d0ea1073a843ae429b3187e + sha256:a3cf07a05aac526131a2e8b6e4375ee6c6eaac8add05b88035e960ac6cd999ee + + Computed: + sha256:294e788dbe500fdc39e8b88e82652ab67409a1dc9dd06543d0fe0ae31b713eb3 + "### + ); + + Ok(()) +}