From 43181f19336fbe869777d1a0361a68d9bb8a29bf Mon Sep 17 00:00:00 2001 From: Yorick Date: Sat, 27 Apr 2024 03:24:54 +0200 Subject: [PATCH] Implement `--index-strategy unsafe-best-match` (#3138) ## Summary This index strategy resolves every package to the latest possible version across indexes. If a version is in multiple indexes, the first available index is selected. Implements #3137 This closely matches pip. ## Test Plan Good question. I'm hesitant to use my certifi example here, since that would inevitably break when torch removes this package. Please comment! --- PIP_COMPATIBILITY.md | 19 +++-- crates/uv-client/src/registry_client.rs | 2 +- crates/uv-configuration/src/build_options.rs | 22 +++++- crates/uv-resolver/src/candidate_selector.rs | 71 +++++++++++++---- crates/uv-resolver/src/options.rs | 12 +++ crates/uv/src/commands/pip_compile.rs | 1 + crates/uv/src/commands/pip_install.rs | 1 + crates/uv/tests/pip_compile.rs | 83 +++++++++++++++++++- uv.schema.json | 11 ++- 9 files changed, 194 insertions(+), 28 deletions(-) diff --git a/PIP_COMPATIBILITY.md b/PIP_COMPATIBILITY.md index df70d3487..1f3a536de 100644 --- a/PIP_COMPATIBILITY.md +++ b/PIP_COMPATIBILITY.md @@ -128,11 +128,20 @@ internal package, thus causing the malicious package to be installed instead of package. See, for example, [the `torchtriton` attack](https://pytorch.org/blog/compromised-nightly-dependency/) from December 2022. -As of v0.1.29, users can opt in to `pip`-style behavior for multiple indexes via the -`--index-strategy unsafe-any-match` command-line option, or the `UV_INDEX_STRATEGY` environment -variable. When enabled, uv will search for each package across all indexes, and consider all -available versions when resolving dependencies, prioritizing the `--extra-index-url` indexes over -the default index URL. (Versions that are duplicated _across_ indexes will be ignored.) +As of v0.1.39, users can opt in to `pip`-style behavior for multiple indexes via the +`--index-strategy` command-line option, or the `UV_INDEX_STRATEGY` environment +variable, which supports the following values: + +- `--first-match` (default): Search for each package across all indexes, limiting the candidate + versions to those present in the first index that contains the package, prioritizing the + `--extra-index-url` indexes over the default index URL. +- `--unsafe-first-match`: Search for each package across all indexes, but prefer the first index + with a compatible version, even if newer versions are available on other indexes. +- `--unsafe-best-match`: Search for each package across all indexes, and select the best version + from the combined set of candidate versions. + +While `--unsafe-best-match` is the closest to `pip`'s behavior, it exposes users to the risk of +"dependency confusion" attacks. In the future, uv will support pinning packages to dedicated indexes (see: [#171](https://github.com/astral-sh/uv/issues/171)). Additionally, [PEP 708](https://peps.python.org/pep-0708/) is a provisional standard that aims to diff --git a/crates/uv-client/src/registry_client.rs b/crates/uv-client/src/registry_client.rs index c28782e57..85861b773 100644 --- a/crates/uv-client/src/registry_client.rs +++ b/crates/uv-client/src/registry_client.rs @@ -231,7 +231,7 @@ impl RegistryClient { results.push((index.clone(), metadata)); // If we're only using the first match, we can stop here. - if self.index_strategy == IndexStrategy::FirstMatch { + if self.index_strategy == IndexStrategy::FirstIndex { break; } } diff --git a/crates/uv-configuration/src/build_options.rs b/crates/uv-configuration/src/build_options.rs index d3cf8dcaf..2bb05a2b5 100644 --- a/crates/uv-configuration/src/build_options.rs +++ b/crates/uv-configuration/src/build_options.rs @@ -196,7 +196,7 @@ impl NoBuild { } } -#[derive(Debug, Default, Clone, Hash, Eq, PartialEq)] +#[derive(Debug, Default, Clone, Copy, Hash, Eq, PartialEq)] #[cfg_attr(feature = "clap", derive(clap::ValueEnum))] #[cfg_attr(feature = "serde", derive(serde::Deserialize))] #[cfg_attr( @@ -210,7 +210,8 @@ pub enum IndexStrategy { /// While this differs from pip's behavior, it's the default index strategy as it's the most /// secure. #[default] - FirstMatch, + #[cfg_attr(feature = "clap", clap(alias = "first-match"))] + FirstIndex, /// Search for every package name across all indexes, exhausting the versions from the first /// index before moving on to the next. /// @@ -222,7 +223,22 @@ pub enum IndexStrategy { /// versions with different ABI tags or Python version constraints). /// /// See: https://peps.python.org/pep-0708/ - UnsafeAnyMatch, + #[cfg_attr(feature = "clap", clap(alias = "unsafe-any-match"))] + UnsafeFirstMatch, + /// Search for every package name across all indexes, preferring the "best" version found. If a + /// package version is in multiple indexes, only look at the entry for the first index. + /// + /// In this strategy, we look for every package across all indexes. When resolving, we consider + /// all versions from all indexes, choosing the "best" version found (typically, the highest + /// compatible version). + /// + /// This most closely matches pip's behavior, but exposes the resolver to "dependency confusion" + /// attacks whereby malicious actors can publish packages to public indexes with the same name + /// as internal packages, causing the resolver to install the malicious package in lieu of + /// the intended internal package. + /// + /// See: https://peps.python.org/pep-0708/ + UnsafeBestMatch, } #[cfg(test)] diff --git a/crates/uv-resolver/src/candidate_selector.rs b/crates/uv-resolver/src/candidate_selector.rs index f95558d16..b9a46eb80 100644 --- a/crates/uv-resolver/src/candidate_selector.rs +++ b/crates/uv-resolver/src/candidate_selector.rs @@ -1,3 +1,4 @@ +use itertools::Itertools; use pubgrub::range::Range; use tracing::debug; @@ -5,6 +6,7 @@ use distribution_types::{CompatibleDist, IncompatibleDist, IncompatibleSource}; use distribution_types::{DistributionMetadata, IncompatibleWheel, Name, PrioritizedDist}; use pep440_rs::Version; use pep508_rs::MarkerEnvironment; +use uv_configuration::IndexStrategy; use uv_normalize::PackageName; use uv_types::InstalledPackagesProvider; @@ -15,9 +17,11 @@ use crate::version_map::{VersionMap, VersionMapDistHandle}; use crate::{Exclusions, Manifest, Options}; #[derive(Debug, Clone)] +#[allow(clippy::struct_field_names)] pub(crate) struct CandidateSelector { resolution_strategy: ResolutionStrategy, prerelease_strategy: PreReleaseStrategy, + index_strategy: IndexStrategy, } impl CandidateSelector { @@ -40,6 +44,7 @@ impl CandidateSelector { markers, options.dependency_mode, ), + index_strategy: options.index_strategy, } } @@ -54,6 +59,12 @@ impl CandidateSelector { pub(crate) fn prerelease_strategy(&self) -> &PreReleaseStrategy { &self.prerelease_strategy } + + #[inline] + #[allow(dead_code)] + pub(crate) fn index_strategy(&self) -> &IndexStrategy { + &self.index_strategy + } } #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] @@ -202,27 +213,54 @@ impl CandidateSelector { version_maps: &'a [VersionMap], ) -> Option { tracing::trace!( - "selecting candidate for package {} with range {:?} with {} remote versions", - package_name, - range, + "selecting candidate for package {package_name} with range {range:?} with {} remote versions", version_maps.iter().map(VersionMap::len).sum::(), ); let highest = self.use_highest_version(package_name); let allow_prerelease = self.allow_prereleases(package_name); - if highest { - version_maps.iter().find_map(|version_map| { + if self.index_strategy == IndexStrategy::UnsafeBestMatch { + if highest { Self::select_candidate( - version_map.iter().rev(), + version_maps + .iter() + .map(|version_map| version_map.iter().rev()) + .kmerge_by(|(version1, _), (version2, _)| version1 > version2), package_name, range, allow_prerelease, ) - }) + } else { + Self::select_candidate( + version_maps + .iter() + .map(VersionMap::iter) + .kmerge_by(|(version1, _), (version2, _)| version1 < version2), + package_name, + range, + allow_prerelease, + ) + } } else { - version_maps.iter().find_map(|version_map| { - Self::select_candidate(version_map.iter(), package_name, range, allow_prerelease) - }) + if highest { + version_maps.iter().find_map(|version_map| { + Self::select_candidate( + version_map.iter().rev(), + package_name, + range, + allow_prerelease, + ) + }) + } else { + version_maps.iter().find_map(|version_map| { + Self::select_candidate( + version_map.iter(), + package_name, + range, + allow_prerelease, + ) + }) + } } } @@ -241,7 +279,7 @@ impl CandidateSelector { /// Select the first-matching [`Candidate`] from a set of candidate versions and files, /// preferring wheels over source distributions. fn select_candidate<'a>( - versions: impl Iterator)> + ExactSizeIterator, + versions: impl Iterator)>, package_name: &'a PackageName, range: &Range, allow_prerelease: AllowPreRelease, @@ -253,8 +291,9 @@ impl CandidateSelector { } let mut prerelease = None; - let versions_len = versions.len(); - for (step, (version, maybe_dist)) in versions.enumerate() { + let mut steps = 0usize; + for (version, maybe_dist) in versions { + steps += 1; let candidate = if version.any_prerelease() { if range.contains(version) { match allow_prerelease { @@ -267,7 +306,7 @@ impl CandidateSelector { after {} steps: {:?} version", package_name, range, - step, + steps, version, ); // If pre-releases are allowed, treat them equivalently @@ -308,7 +347,7 @@ impl CandidateSelector { after {} steps: {:?} version", package_name, range, - step, + steps, version, ); Candidate::new(package_name, version, dist) @@ -340,7 +379,7 @@ impl CandidateSelector { after {} steps", package_name, range, - versions_len, + steps, ); match prerelease { None => None, diff --git a/crates/uv-resolver/src/options.rs b/crates/uv-resolver/src/options.rs index f6f4a4287..4b7e9244b 100644 --- a/crates/uv-resolver/src/options.rs +++ b/crates/uv-resolver/src/options.rs @@ -1,3 +1,5 @@ +use uv_configuration::IndexStrategy; + use crate::{DependencyMode, ExcludeNewer, PreReleaseMode, ResolutionMode}; /// Options for resolving a manifest. @@ -7,6 +9,7 @@ pub struct Options { pub prerelease_mode: PreReleaseMode, pub dependency_mode: DependencyMode, pub exclude_newer: Option, + pub index_strategy: IndexStrategy, } /// Builder for [`Options`]. @@ -16,6 +19,7 @@ pub struct OptionsBuilder { prerelease_mode: PreReleaseMode, dependency_mode: DependencyMode, exclude_newer: Option, + index_strategy: IndexStrategy, } impl OptionsBuilder { @@ -52,6 +56,13 @@ impl OptionsBuilder { self } + /// Sets the index strategy. + #[must_use] + pub fn index_strategy(mut self, index_strategy: IndexStrategy) -> Self { + self.index_strategy = index_strategy; + self + } + /// Builds the options. pub fn build(self) -> Options { Options { @@ -59,6 +70,7 @@ impl OptionsBuilder { prerelease_mode: self.prerelease_mode, dependency_mode: self.dependency_mode, exclude_newer: self.exclude_newer, + index_strategy: self.index_strategy, } } } diff --git a/crates/uv/src/commands/pip_compile.rs b/crates/uv/src/commands/pip_compile.rs index bd0f0435b..434fb7164 100644 --- a/crates/uv/src/commands/pip_compile.rs +++ b/crates/uv/src/commands/pip_compile.rs @@ -439,6 +439,7 @@ pub(crate) async fn pip_compile( .prerelease_mode(prerelease_mode) .dependency_mode(dependency_mode) .exclude_newer(exclude_newer) + .index_strategy(index_strategy) .build(); // Resolve the dependencies. diff --git a/crates/uv/src/commands/pip_install.rs b/crates/uv/src/commands/pip_install.rs index 0a11ec701..f5e7b487e 100644 --- a/crates/uv/src/commands/pip_install.rs +++ b/crates/uv/src/commands/pip_install.rs @@ -378,6 +378,7 @@ pub(crate) async fn pip_install( .prerelease_mode(prerelease_mode) .dependency_mode(dependency_mode) .exclude_newer(exclude_newer) + .index_strategy(index_strategy) .build(); // Resolve the requirements. diff --git a/crates/uv/tests/pip_compile.rs b/crates/uv/tests/pip_compile.rs index 874285327..2bcc70c17 100644 --- a/crates/uv/tests/pip_compile.rs +++ b/crates/uv/tests/pip_compile.rs @@ -7812,7 +7812,8 @@ fn compile_index_url_fallback() -> Result<()> { /// prefer it, even if a newer versions exists on the "primary" index. /// /// In this case, anyio 3.5.0 is hosted on the "extra" index, but newer versions are available on -/// the "primary" index. +/// the "primary" index. We should prefer the older version from the "extra" index, since it's the +/// preferred index. #[test] fn compile_index_url_fallback_prefer_primary() -> Result<()> { let context = TestContext::new("3.12"); @@ -7844,6 +7845,86 @@ fn compile_index_url_fallback_prefer_primary() -> Result<()> { Ok(()) } +/// Install a package via `--extra-index-url`. +/// +/// With `unsafe-best-match`, the resolver should prefer the highest compatible version, +/// regardless of which index it comes from. +/// +/// In this case, anyio 3.5.0 is hosted on the "extra" index, but newer versions are available on +/// the "primary" index. We should prefer the newer version from the "primary" index, despite the +/// "extra" index being the preferred index. +#[test] +fn compile_index_url_unsafe_highest() -> Result<()> { + let context = TestContext::new("3.12"); + + let requirements_in = context.temp_dir.child("requirements.in"); + requirements_in.write_str("anyio")?; + + uv_snapshot!(context.compile() + .arg("--index-strategy") + .arg("unsafe-best-match") + .arg("--index-url") + .arg("https://pypi.org/simple") + .arg("--extra-index-url") + .arg("https://test.pypi.org/simple") + .arg("requirements.in") + .arg("--no-deps"), @r###" + success: true + exit_code: 0 + ----- stdout ----- + # This file was autogenerated by uv via the following command: + # uv pip compile --cache-dir [CACHE_DIR] --exclude-newer 2024-03-25T00:00:00Z --index-strategy unsafe-best-match requirements.in --no-deps + anyio==4.3.0 + + ----- stderr ----- + Resolved 1 package in [TIME] + "### + ); + + Ok(()) +} + +/// Install a package via `--extra-index-url`. +/// +/// With `unsafe-best-match`, the resolver should prefer the lowest compatible version, +/// regardless of which index it comes from. +/// +/// In this case, anyio 3.5.0 is hosted on the "extra" index, but older versions are available on +/// the "primary" index. We should prefer the older version from the "primary" index, despite the +/// "extra" index being the preferred index. +#[test] +fn compile_index_url_unsafe_lowest() -> Result<()> { + let context = TestContext::new("3.12"); + + let requirements_in = context.temp_dir.child("requirements.in"); + requirements_in.write_str("anyio")?; + + uv_snapshot!(context.compile() + .arg("--resolution") + .arg("lowest") + .arg("--index-strategy") + .arg("unsafe-best-match") + .arg("--index-url") + .arg("https://pypi.org/simple") + .arg("--extra-index-url") + .arg("https://test.pypi.org/simple") + .arg("requirements.in") + .arg("--no-deps"), @r###" + success: true + exit_code: 0 + ----- stdout ----- + # This file was autogenerated by uv via the following command: + # uv pip compile --cache-dir [CACHE_DIR] --exclude-newer 2024-03-25T00:00:00Z --resolution lowest --index-strategy unsafe-best-match requirements.in --no-deps + anyio==1.0.0 + + ----- stderr ----- + Resolved 1 package in [TIME] + "### + ); + + Ok(()) +} + /// Ensure that the username and the password are omitted when /// index annotations are displayed via `--emit-index-annotation`. #[test] diff --git a/uv.schema.json b/uv.schema.json index 143e6164f..89c529943 100644 --- a/uv.schema.json +++ b/uv.schema.json @@ -117,14 +117,21 @@ "description": "Only use results from the first index that returns a match for a given package name.\n\nWhile this differs from pip's behavior, it's the default index strategy as it's the most secure.", "type": "string", "enum": [ - "first-match" + "first-index" ] }, { "description": "Search for every package name across all indexes, exhausting the versions from the first index before moving on to the next.\n\nIn this strategy, we look for every package across all indexes. When resolving, we attempt to use versions from the indexes in order, such that we exhaust all available versions from the first index before moving on to the next. Further, if a version is found to be incompatible in the first index, we do not reconsider that version in subsequent indexes, even if the secondary index might contain compatible versions (e.g., variants of the same versions with different ABI tags or Python version constraints).\n\nSee: https://peps.python.org/pep-0708/", "type": "string", "enum": [ - "unsafe-any-match" + "unsafe-first-match" + ] + }, + { + "description": "Search for every package name across all indexes, preferring the \"best\" version found. If a package version is in multiple indexes, only look at the entry for the first index.\n\nIn this strategy, we look for every package across all indexes. When resolving, we consider all versions from all indexes, choosing the \"best\" version found (typically, the highest compatible version).\n\nThis most closely matches pip's behavior, but exposes the resolver to \"dependency confusion\" attacks whereby malicious actors can publish packages to public indexes with the same name as internal packages, causing the resolver to install the malicious package in lieu of the intended internal package.\n\nSee: https://peps.python.org/pep-0708/", + "type": "string", + "enum": [ + "unsafe-best-match" ] } ]