diff --git a/crates/puffin-cache/src/canonical_url.rs b/crates/puffin-cache/src/canonical_url.rs index 714833b90..db88672bd 100644 --- a/crates/puffin-cache/src/canonical_url.rs +++ b/crates/puffin-cache/src/canonical_url.rs @@ -1,3 +1,4 @@ +use std::hash::{Hash, Hasher}; use url::Url; use crate::cache_key::{CacheKey, CacheKeyHasher}; @@ -74,6 +75,14 @@ impl CacheKey for CanonicalUrl { } } +impl Hash for CanonicalUrl { + fn hash(&self, state: &mut H) { + // `as_str` gives the serialisation of a url (which has a spec) and so insulates against + // possible changes in how the URL crate does hashing. + self.0.as_str().hash(state); + } +} + /// Like [`CanonicalUrl`], but attempts to represent an underlying source repository, abstracting /// away details like the specific commit or branch, or the subdirectory to build within the /// repository. @@ -116,6 +125,14 @@ impl CacheKey for RepositoryUrl { } } +impl Hash for RepositoryUrl { + fn hash(&self, state: &mut H) { + // `as_str` gives the serialisation of a url (which has a spec) and so insulates against + // possible changes in how the URL crate does hashing. + self.0.as_str().hash(state); + } +} + #[cfg(test)] mod tests { use super::*; diff --git a/crates/puffin-cli/tests/pip_compile.rs b/crates/puffin-cli/tests/pip_compile.rs index d791d7ab0..feb6424d8 100644 --- a/crates/puffin-cli/tests/pip_compile.rs +++ b/crates/puffin-cli/tests/pip_compile.rs @@ -996,12 +996,9 @@ fn mixed_url_dependency() -> Result<()> { Ok(()) } -/// Request Flask, but include a URL dependency for a conflicting version of Werkzeug. -/// -/// TODO(charlie): This test _should_ fail, but sometimes passes due to inadequacies in our -/// URL dependency model. +/// Request Werkzeug via both a version and a URL dependency at a _different_ version, which +/// should result in a conflict. #[test] -#[ignore] fn conflicting_direct_url_dependency() -> Result<()> { let temp_dir = assert_fs::TempDir::new()?; let cache_dir = assert_fs::TempDir::new()?; @@ -1036,6 +1033,116 @@ fn conflicting_direct_url_dependency() -> Result<()> { Ok(()) } +/// Request Werkzeug via both a version and a URL dependency at _the same_ version, which +/// should prefer the direct URL dependency. +#[test] +fn compatible_direct_url_dependency() -> Result<()> { + let temp_dir = assert_fs::TempDir::new()?; + let cache_dir = assert_fs::TempDir::new()?; + let venv = temp_dir.child(".venv"); + + Command::new(get_cargo_bin(BIN_NAME)) + .arg("venv") + .arg(venv.as_os_str()) + .arg("--cache-dir") + .arg(cache_dir.path()) + .current_dir(&temp_dir) + .assert() + .success(); + venv.assert(predicates::path::is_dir()); + + let requirements_in = temp_dir.child("requirements.in"); + requirements_in.touch()?; + requirements_in.write_str("werkzeug==2.0.0\nwerkzeug @ https://files.pythonhosted.org/packages/ff/1d/960bb4017c68674a1cb099534840f18d3def3ce44aed12b5ed8b78e0153e/Werkzeug-2.0.0-py3-none-any.whl")?; + + insta::with_settings!({ + filters => INSTA_FILTERS.to_vec() + }, { + assert_cmd_snapshot!(Command::new(get_cargo_bin(BIN_NAME)) + .arg("pip-compile") + .arg("requirements.in") + .arg("--cache-dir") + .arg(cache_dir.path()) + .env("VIRTUAL_ENV", venv.as_os_str()) + .current_dir(&temp_dir)); + }); + + Ok(()) +} + +/// Request Werkzeug via two different URLs at different versions, which should result in a conflict. +#[test] +fn conflicting_repeated_url_dependency_version_mismatch() -> Result<()> { + let temp_dir = assert_fs::TempDir::new()?; + let cache_dir = assert_fs::TempDir::new()?; + let venv = temp_dir.child(".venv"); + + Command::new(get_cargo_bin(BIN_NAME)) + .arg("venv") + .arg(venv.as_os_str()) + .arg("--cache-dir") + .arg(cache_dir.path()) + .current_dir(&temp_dir) + .assert() + .success(); + venv.assert(predicates::path::is_dir()); + + let requirements_in = temp_dir.child("requirements.in"); + requirements_in.touch()?; + requirements_in.write_str("werkzeug @ https://files.pythonhosted.org/packages/bd/24/11c3ea5a7e866bf2d97f0501d0b4b1c9bbeade102bb4b588f0d2919a5212/Werkzeug-2.0.1-py3-none-any.whl\nwerkzeug @ https://files.pythonhosted.org/packages/ff/1d/960bb4017c68674a1cb099534840f18d3def3ce44aed12b5ed8b78e0153e/Werkzeug-2.0.0-py3-none-any.whl")?; + + insta::with_settings!({ + filters => INSTA_FILTERS.to_vec() + }, { + assert_cmd_snapshot!(Command::new(get_cargo_bin(BIN_NAME)) + .arg("pip-compile") + .arg("requirements.in") + .arg("--cache-dir") + .arg(cache_dir.path()) + .env("VIRTUAL_ENV", venv.as_os_str()) + .current_dir(&temp_dir)); + }); + + Ok(()) +} + +/// Request Werkzeug via two different URLs at the same version. Despite mapping to the same +/// version, it should still result in a conflict. +#[test] +fn conflicting_repeated_url_dependency_version_match() -> Result<()> { + let temp_dir = assert_fs::TempDir::new()?; + let cache_dir = assert_fs::TempDir::new()?; + let venv = temp_dir.child(".venv"); + + Command::new(get_cargo_bin(BIN_NAME)) + .arg("venv") + .arg(venv.as_os_str()) + .arg("--cache-dir") + .arg(cache_dir.path()) + .current_dir(&temp_dir) + .assert() + .success(); + venv.assert(predicates::path::is_dir()); + + let requirements_in = temp_dir.child("requirements.in"); + requirements_in.touch()?; + requirements_in.write_str("werkzeug @ git+git+https://github.com/pallets/flask.git@2.0.0 \nwerkzeug @ https://files.pythonhosted.org/packages/ff/1d/960bb4017c68674a1cb099534840f18d3def3ce44aed12b5ed8b78e0153e/Werkzeug-2.0.0-py3-none-any.whl")?; + + insta::with_settings!({ + filters => INSTA_FILTERS.to_vec() + }, { + assert_cmd_snapshot!(Command::new(get_cargo_bin(BIN_NAME)) + .arg("pip-compile") + .arg("requirements.in") + .arg("--cache-dir") + .arg(cache_dir.path()) + .env("VIRTUAL_ENV", venv.as_os_str()) + .current_dir(&temp_dir)); + }); + + Ok(()) +} + /// Request Flask, but include a URL dependency for a conflicting version of Werkzeug. #[test] fn conflicting_transitive_url_dependency() -> Result<()> { @@ -1072,6 +1179,130 @@ fn conflicting_transitive_url_dependency() -> Result<()> { Ok(()) } +/// Request `transitive_url_dependency`, which depends on `git+https://github.com/pallets/werkzeug@2.0.0`. +/// Since this URL isn't declared upfront, we should reject it. +#[test] +fn disallowed_transitive_url_dependency() -> Result<()> { + let temp_dir = assert_fs::TempDir::new()?; + let cache_dir = assert_fs::TempDir::new()?; + let venv = temp_dir.child(".venv"); + + Command::new(get_cargo_bin(BIN_NAME)) + .arg("venv") + .arg(venv.as_os_str()) + .arg("--cache-dir") + .arg(cache_dir.path()) + .current_dir(&temp_dir) + .assert() + .success(); + venv.assert(predicates::path::is_dir()); + + let requirements_in = temp_dir.child("requirements.in"); + requirements_in.touch()?; + requirements_in.write_str("transitive_url_dependency @ https://github.com/astral-sh/ruff/files/13257454/transitive_url_dependency.zip")?; + + insta::with_settings!({ + filters => INSTA_FILTERS.to_vec() + }, { + assert_cmd_snapshot!(Command::new(get_cargo_bin(BIN_NAME)) + .arg("pip-compile") + .arg("requirements.in") + .arg("--cache-dir") + .arg(cache_dir.path()) + .env("VIRTUAL_ENV", venv.as_os_str()) + .current_dir(&temp_dir)); + }); + + Ok(()) +} + +/// Request `transitive_url_dependency`, which depends on `git+https://github.com/pallets/werkzeug@2.0.0`. +/// Since this URL is declared as a constraint, we should accept it. +#[test] +fn allowed_transitive_url_dependency() -> Result<()> { + let temp_dir = assert_fs::TempDir::new()?; + let cache_dir = assert_fs::TempDir::new()?; + let venv = temp_dir.child(".venv"); + + Command::new(get_cargo_bin(BIN_NAME)) + .arg("venv") + .arg(venv.as_os_str()) + .arg("--cache-dir") + .arg(cache_dir.path()) + .current_dir(&temp_dir) + .assert() + .success(); + venv.assert(predicates::path::is_dir()); + + let requirements_in = temp_dir.child("requirements.in"); + requirements_in.touch()?; + requirements_in.write_str("transitive_url_dependency @ https://github.com/astral-sh/ruff/files/13257454/transitive_url_dependency.zip")?; + + let constraints_txt = temp_dir.child("constraints.txt"); + constraints_txt.touch()?; + constraints_txt.write_str("werkzeug @ git+https://github.com/pallets/werkzeug@2.0.0")?; + + insta::with_settings!({ + filters => INSTA_FILTERS.to_vec() + }, { + assert_cmd_snapshot!(Command::new(get_cargo_bin(BIN_NAME)) + .arg("pip-compile") + .arg("requirements.in") + .arg("--constraint") + .arg("constraints.txt") + .arg("--cache-dir") + .arg(cache_dir.path()) + .env("VIRTUAL_ENV", venv.as_os_str()) + .current_dir(&temp_dir)); + }); + + Ok(()) +} + +/// Request `transitive_url_dependency`, which depends on `git+https://github.com/pallets/werkzeug@2.0.0`. +/// Since this `git+https://github.com/pallets/werkzeug@2.0.0.git` is declared as a constraint, and +/// those map to the same canonical URL, we should accept it. +#[test] +fn allowed_transitive_canonical_url_dependency() -> Result<()> { + let temp_dir = assert_fs::TempDir::new()?; + let cache_dir = assert_fs::TempDir::new()?; + let venv = temp_dir.child(".venv"); + + Command::new(get_cargo_bin(BIN_NAME)) + .arg("venv") + .arg(venv.as_os_str()) + .arg("--cache-dir") + .arg(cache_dir.path()) + .current_dir(&temp_dir) + .assert() + .success(); + venv.assert(predicates::path::is_dir()); + + let requirements_in = temp_dir.child("requirements.in"); + requirements_in.touch()?; + requirements_in.write_str("transitive_url_dependency @ https://github.com/astral-sh/ruff/files/13257454/transitive_url_dependency.zip")?; + + let constraints_txt = temp_dir.child("constraints.txt"); + constraints_txt.touch()?; + constraints_txt.write_str("werkzeug @ git+https://github.com/pallets/werkzeug.git@2.0.0")?; + + insta::with_settings!({ + filters => INSTA_FILTERS.to_vec() + }, { + assert_cmd_snapshot!(Command::new(get_cargo_bin(BIN_NAME)) + .arg("pip-compile") + .arg("requirements.in") + .arg("--constraint") + .arg("constraints.txt") + .arg("--cache-dir") + .arg(cache_dir.path()) + .env("VIRTUAL_ENV", venv.as_os_str()) + .current_dir(&temp_dir)); + }); + + Ok(()) +} + /// Resolve packages from all optional dependency groups in a `pyproject.toml` file. #[test] fn compile_pyproject_toml_all_extras() -> Result<()> { diff --git a/crates/puffin-cli/tests/snapshots/pip_compile__allowed_transitive_canonical_url_dependency.snap b/crates/puffin-cli/tests/snapshots/pip_compile__allowed_transitive_canonical_url_dependency.snap new file mode 100644 index 000000000..072fc56f6 --- /dev/null +++ b/crates/puffin-cli/tests/snapshots/pip_compile__allowed_transitive_canonical_url_dependency.snap @@ -0,0 +1,26 @@ +--- +source: crates/puffin-cli/tests/pip_compile.rs +info: + program: puffin + args: + - pip-compile + - requirements.in + - "--constraint" + - constraints.txt + - "--cache-dir" + - /var/folders/nt/6gf2v7_s3k13zq_t3944rwz40000gn/T/.tmpb2n1p9 + env: + VIRTUAL_ENV: /var/folders/nt/6gf2v7_s3k13zq_t3944rwz40000gn/T/.tmpAnFqUK/.venv +--- +success: true +exit_code: 0 +----- stdout ----- +# This file was autogenerated by Puffin v0.0.1 via the following command: +# [BIN_PATH] pip-compile requirements.in --constraint constraints.txt --cache-dir [CACHE_DIR] +transitive-url-dependency @ https://github.com/astral-sh/ruff/files/13257454/transitive_url_dependency.zip +werkzeug @ git+https://github.com/pallets/werkzeug@af160e0b6b7ddd81c22f1652c728ff5ac72d5c74 + # via transitive-url-dependency + +----- stderr ----- +Resolved 2 packages in [TIME] + diff --git a/crates/puffin-cli/tests/snapshots/pip_compile__allowed_transitive_url_dependency.snap b/crates/puffin-cli/tests/snapshots/pip_compile__allowed_transitive_url_dependency.snap new file mode 100644 index 000000000..48652813d --- /dev/null +++ b/crates/puffin-cli/tests/snapshots/pip_compile__allowed_transitive_url_dependency.snap @@ -0,0 +1,26 @@ +--- +source: crates/puffin-cli/tests/pip_compile.rs +info: + program: puffin + args: + - pip-compile + - requirements.in + - "--constraint" + - constraints.txt + - "--cache-dir" + - /var/folders/nt/6gf2v7_s3k13zq_t3944rwz40000gn/T/.tmpUeMnec + env: + VIRTUAL_ENV: /var/folders/nt/6gf2v7_s3k13zq_t3944rwz40000gn/T/.tmpaClkf0/.venv +--- +success: true +exit_code: 0 +----- stdout ----- +# This file was autogenerated by Puffin v0.0.1 via the following command: +# [BIN_PATH] pip-compile requirements.in --constraint constraints.txt --cache-dir [CACHE_DIR] +transitive-url-dependency @ https://github.com/astral-sh/ruff/files/13257454/transitive_url_dependency.zip +werkzeug @ git+https://github.com/pallets/werkzeug@af160e0b6b7ddd81c22f1652c728ff5ac72d5c74 + # via transitive-url-dependency + +----- stderr ----- +Resolved 2 packages in [TIME] + diff --git a/crates/puffin-cli/tests/snapshots/pip_compile__compatible_direct_url_dependency.snap b/crates/puffin-cli/tests/snapshots/pip_compile__compatible_direct_url_dependency.snap new file mode 100644 index 000000000..0678ca722 --- /dev/null +++ b/crates/puffin-cli/tests/snapshots/pip_compile__compatible_direct_url_dependency.snap @@ -0,0 +1,22 @@ +--- +source: crates/puffin-cli/tests/pip_compile.rs +info: + program: puffin + args: + - pip-compile + - requirements.in + - "--cache-dir" + - /var/folders/nt/6gf2v7_s3k13zq_t3944rwz40000gn/T/.tmp5pOmCe + env: + VIRTUAL_ENV: /var/folders/nt/6gf2v7_s3k13zq_t3944rwz40000gn/T/.tmpMF1dXI/.venv +--- +success: true +exit_code: 0 +----- stdout ----- +# This file was autogenerated by Puffin v0.0.1 via the following command: +# [BIN_PATH] pip-compile requirements.in --cache-dir [CACHE_DIR] +werkzeug @ https://files.pythonhosted.org/packages/ff/1d/960bb4017c68674a1cb099534840f18d3def3ce44aed12b5ed8b78e0153e/Werkzeug-2.0.0-py3-none-any.whl + +----- stderr ----- +Resolved 1 package in [TIME] + diff --git a/crates/puffin-cli/tests/snapshots/pip_compile__conflicting_direct_url_dependency.snap b/crates/puffin-cli/tests/snapshots/pip_compile__conflicting_direct_url_dependency.snap new file mode 100644 index 000000000..efadf2a19 --- /dev/null +++ b/crates/puffin-cli/tests/snapshots/pip_compile__conflicting_direct_url_dependency.snap @@ -0,0 +1,20 @@ +--- +source: crates/puffin-cli/tests/pip_compile.rs +info: + program: puffin + args: + - pip-compile + - requirements.in + - "--cache-dir" + - /var/folders/nt/6gf2v7_s3k13zq_t3944rwz40000gn/T/.tmpVyrxMG + env: + VIRTUAL_ENV: /var/folders/nt/6gf2v7_s3k13zq_t3944rwz40000gn/T/.tmpF8a4y3/.venv +--- +success: false +exit_code: 1 +----- stdout ----- + +----- stderr ----- + × No solution found when resolving dependencies: + ╰─▶ root depends on werkzeug 3.0.0 + diff --git a/crates/puffin-cli/tests/snapshots/pip_compile__conflicting_repeated_url_dependency_version_match.snap b/crates/puffin-cli/tests/snapshots/pip_compile__conflicting_repeated_url_dependency_version_match.snap new file mode 100644 index 000000000..bdddf7c41 --- /dev/null +++ b/crates/puffin-cli/tests/snapshots/pip_compile__conflicting_repeated_url_dependency_version_match.snap @@ -0,0 +1,19 @@ +--- +source: crates/puffin-cli/tests/pip_compile.rs +info: + program: puffin + args: + - pip-compile + - requirements.in + - "--cache-dir" + - /var/folders/nt/6gf2v7_s3k13zq_t3944rwz40000gn/T/.tmpI5Wp9g + env: + VIRTUAL_ENV: /var/folders/nt/6gf2v7_s3k13zq_t3944rwz40000gn/T/.tmpEvaFaF/.venv +--- +success: false +exit_code: 2 +----- stdout ----- + +----- stderr ----- +error: Conflicting URLs for package `werkzeug`: git+git+https://github.com/pallets/flask.git@2.0.0 and https://files.pythonhosted.org/packages/ff/1d/960bb4017c68674a1cb099534840f18d3def3ce44aed12b5ed8b78e0153e/Werkzeug-2.0.0-py3-none-any.whl + diff --git a/crates/puffin-cli/tests/snapshots/pip_compile__conflicting_repeated_url_dependency_version_mismatch.snap b/crates/puffin-cli/tests/snapshots/pip_compile__conflicting_repeated_url_dependency_version_mismatch.snap new file mode 100644 index 000000000..7550ea5ec --- /dev/null +++ b/crates/puffin-cli/tests/snapshots/pip_compile__conflicting_repeated_url_dependency_version_mismatch.snap @@ -0,0 +1,19 @@ +--- +source: crates/puffin-cli/tests/pip_compile.rs +info: + program: puffin + args: + - pip-compile + - requirements.in + - "--cache-dir" + - /var/folders/nt/6gf2v7_s3k13zq_t3944rwz40000gn/T/.tmpPGmK15 + env: + VIRTUAL_ENV: /var/folders/nt/6gf2v7_s3k13zq_t3944rwz40000gn/T/.tmpgXUIr9/.venv +--- +success: false +exit_code: 2 +----- stdout ----- + +----- stderr ----- +error: Conflicting URLs for package `werkzeug`: https://files.pythonhosted.org/packages/bd/24/11c3ea5a7e866bf2d97f0501d0b4b1c9bbeade102bb4b588f0d2919a5212/Werkzeug-2.0.1-py3-none-any.whl and https://files.pythonhosted.org/packages/ff/1d/960bb4017c68674a1cb099534840f18d3def3ce44aed12b5ed8b78e0153e/Werkzeug-2.0.0-py3-none-any.whl + diff --git a/crates/puffin-cli/tests/snapshots/pip_compile__disallowed_transitive_url_dependency.snap b/crates/puffin-cli/tests/snapshots/pip_compile__disallowed_transitive_url_dependency.snap new file mode 100644 index 000000000..d77c5ddf0 --- /dev/null +++ b/crates/puffin-cli/tests/snapshots/pip_compile__disallowed_transitive_url_dependency.snap @@ -0,0 +1,19 @@ +--- +source: crates/puffin-cli/tests/pip_compile.rs +info: + program: puffin + args: + - pip-compile + - requirements.in + - "--cache-dir" + - /var/folders/nt/6gf2v7_s3k13zq_t3944rwz40000gn/T/.tmpf5MfA7 + env: + VIRTUAL_ENV: /var/folders/nt/6gf2v7_s3k13zq_t3944rwz40000gn/T/.tmpFZtNQj/.venv +--- +success: false +exit_code: 2 +----- stdout ----- + +----- stderr ----- +error: Package `werkzeug` attempted to resolve via URL: git+https://github.com/pallets/werkzeug@2.0.0. URL dependencies must be expressed as direct requirements or constraints. Consider adding `werkzeug @ git+https://github.com/pallets/werkzeug@2.0.0` to your dependencies or constraints file. + diff --git a/crates/puffin-resolver/src/error.rs b/crates/puffin-resolver/src/error.rs index 57195fdb2..85eae54c5 100644 --- a/crates/puffin-resolver/src/error.rs +++ b/crates/puffin-resolver/src/error.rs @@ -33,6 +33,15 @@ pub enum ResolveError { metadata: PackageName, }, + #[error("~= operator requires at least two release segments: {0}")] + InvalidTildeEquals(pep440_rs::VersionSpecifier), + + #[error("Conflicting URLs for package `{0}`: {1} and {2}")] + ConflictingUrls(PackageName, String, String), + + #[error("Package `{0}` attempted to resolve via URL: {1}. URL dependencies must be expressed as direct requirements or constraints. Consider adding `{0} @ {1}` to your dependencies or constraints file.")] + DisallowedUrl(PackageName, Url), + #[error("Failed to build distribution: {filename}")] RegistryDistribution { filename: String, diff --git a/crates/puffin-resolver/src/pubgrub/dependencies.rs b/crates/puffin-resolver/src/pubgrub/dependencies.rs new file mode 100644 index 000000000..25dda164b --- /dev/null +++ b/crates/puffin-resolver/src/pubgrub/dependencies.rs @@ -0,0 +1,235 @@ +use itertools::Itertools; +use pubgrub::range::Range; +use pubgrub::type_aliases::DependencyConstraints; +use tracing::warn; + +use pep508_rs::{MarkerEnvironment, Requirement, VersionOrUrl}; +use puffin_cache::CanonicalUrl; +use puffin_normalize::{ExtraName, PackageName}; + +use crate::pubgrub::specifier::PubGrubSpecifier; +use crate::pubgrub::{PubGrubPackage, PubGrubVersion}; +use crate::ResolveError; + +#[derive(Debug)] +pub struct PubGrubDependencies(DependencyConstraints>); + +impl PubGrubDependencies { + /// Generate a set of `PubGrub` dependencies from a set of requirements. + pub(crate) fn try_from_requirements<'a>( + requirements: &[Requirement], + constraints: &[Requirement], + extra: Option<&'a ExtraName>, + source: Option<&'a PackageName>, + env: &'a MarkerEnvironment, + ) -> Result { + let mut dependencies = + DependencyConstraints::>::default(); + + // Iterate over all declared requirements. + for requirement in requirements { + // Avoid self-dependencies. + if source.is_some_and(|source| source == &requirement.name) { + // TODO(konstin): Warn only once here + warn!("{} depends on itself", requirement.name); + continue; + } + + // If the requirement isn't relevant for the current platform, skip it. + if let Some(extra) = extra { + if !requirement.evaluate_markers(env, &[extra.as_ref()]) { + continue; + } + } else { + if !requirement.evaluate_markers(env, &[]) { + continue; + } + } + + // Add the package, plus any extra variants. + for result in std::iter::once(to_pubgrub(requirement, None)).chain( + requirement + .extras + .clone() + .into_iter() + .flatten() + .map(|extra| to_pubgrub(requirement, Some(ExtraName::new(extra)))), + ) { + let (package, version) = result?; + + if let Some(entry) = dependencies.get_key_value(&package) { + // Merge the versions. + let version = merge_versions(entry.1, &version); + + // Merge the package. + if let Some(package) = merge_package(entry.0, &package)? { + dependencies.remove(&package); + dependencies.insert(package, version); + } else { + dependencies.insert(package, version); + } + } else { + dependencies.insert(package.clone(), version.clone()); + } + } + } + + // If any requirements were further constrained by the user, add those constraints. + for constraint in constraints { + // Avoid self-dependencies. + if source.is_some_and(|source| source == &constraint.name) { + // TODO(konstin): Warn only once here + warn!("{} depends on itself", constraint.name); + continue; + } + + // If the requirement isn't relevant for the current platform, skip it. + if let Some(extra) = extra { + if !constraint.evaluate_markers(env, &[extra.as_ref()]) { + continue; + } + } else { + if !constraint.evaluate_markers(env, &[]) { + continue; + } + } + + // Add the package, plus any extra variants. + for result in std::iter::once(to_pubgrub(constraint, None)).chain( + constraint + .extras + .clone() + .into_iter() + .flatten() + .map(|extra| to_pubgrub(constraint, Some(ExtraName::new(extra)))), + ) { + let (package, version) = result?; + + if let Some(entry) = dependencies.get_key_value(&package) { + // Merge the versions. + let version = merge_versions(entry.1, &version); + + // Merge the package. + if let Some(package) = merge_package(entry.0, &package)? { + dependencies.insert(package, version); + } else { + dependencies.insert(package, version); + } + } + } + } + + Ok(Self(dependencies)) + } + + /// Insert a [`PubGrubPackage`] and [`PubGrubVersion`] range into the set of dependencies. + pub(crate) fn insert( + &mut self, + package: PubGrubPackage, + version: Range, + ) -> Option> { + self.0.insert(package, version) + } + + /// Iterate over the dependencies. + pub(crate) fn iter(&self) -> impl Iterator)> { + self.0.iter() + } +} + +/// Convert a [`PubGrubDependencies`] to a [`DependencyConstraints`]. +impl From for DependencyConstraints> { + fn from(dependencies: PubGrubDependencies) -> Self { + dependencies.0 + } +} + +/// Convert a [`Requirement`] to a `PubGrub`-compatible package and range. +fn to_pubgrub( + requirement: &Requirement, + extra: Option, +) -> Result<(PubGrubPackage, Range), ResolveError> { + match requirement.version_or_url.as_ref() { + // The requirement has no specifier (e.g., `flask`). + None => Ok(( + PubGrubPackage::Package(requirement.name.clone(), extra, None), + Range::full(), + )), + // The requirement has a URL (e.g., `flask @ file:///path/to/flask`). + Some(VersionOrUrl::Url(url)) => Ok(( + PubGrubPackage::Package(requirement.name.clone(), extra, Some(url.clone())), + Range::full(), + )), + // The requirement has a specifier (e.g., `flask>=1.0`). + Some(VersionOrUrl::VersionSpecifier(specifiers)) => { + let version = specifiers + .iter() + .map(PubGrubSpecifier::try_from) + .fold_ok(Range::full(), |range, specifier| { + range.intersection(&specifier.into()) + })?; + Ok(( + PubGrubPackage::Package(requirement.name.clone(), extra, None), + version, + )) + } + } +} + +/// Merge two [`PubGrubVersion`] ranges. +fn merge_versions( + left: &Range, + right: &Range, +) -> Range { + left.intersection(right) +} + +/// Merge two [`PubGrubPackage`] instances. +fn merge_package( + left: &PubGrubPackage, + right: &PubGrubPackage, +) -> Result, ResolveError> { + match (left, right) { + // Either package is `root`. + (PubGrubPackage::Root(_), _) | (_, PubGrubPackage::Root(_)) => Ok(None), + + // Left package has a URL. Propagate the URL. + (PubGrubPackage::Package(name, extra, Some(url)), PubGrubPackage::Package(.., None)) => { + Ok(Some(PubGrubPackage::Package( + name.clone(), + extra.clone(), + Some(url.clone()), + ))) + } + + // Right package has a URL. + (PubGrubPackage::Package(.., None), PubGrubPackage::Package(name, extra, Some(url))) => { + Ok(Some(PubGrubPackage::Package( + name.clone(), + extra.clone(), + Some(url.clone()), + ))) + } + + // Neither package has a URL. + (PubGrubPackage::Package(_name, _extra, None), PubGrubPackage::Package(.., None)) => { + Ok(None) + } + + // Both packages have a URL. + ( + PubGrubPackage::Package(name, _extra, Some(left)), + PubGrubPackage::Package(.., Some(right)), + ) => { + if CanonicalUrl::new(left) == CanonicalUrl::new(right) { + Ok(None) + } else { + Err(ResolveError::ConflictingUrls( + name.clone(), + left.to_string(), + right.to_string(), + )) + } + } + } +} diff --git a/crates/puffin-resolver/src/pubgrub/mod.rs b/crates/puffin-resolver/src/pubgrub/mod.rs index 8ff01d823..1f52e13ed 100644 --- a/crates/puffin-resolver/src/pubgrub/mod.rs +++ b/crates/puffin-resolver/src/pubgrub/mod.rs @@ -1,109 +1,14 @@ -use anyhow::Result; -use itertools::Itertools; -use pubgrub::range::Range; -use tracing::warn; - -use pep508_rs::{MarkerEnvironment, Requirement, VersionOrUrl}; -use puffin_normalize::{ExtraName, PackageName}; - pub(crate) use crate::pubgrub::package::PubGrubPackage; pub(crate) use crate::pubgrub::priority::{PubGrubPriorities, PubGrubPriority}; pub use crate::pubgrub::report::ResolutionFailureReporter; -pub(crate) use crate::pubgrub::specifier::PubGrubSpecifier; + pub(crate) use crate::pubgrub::version::{PubGrubVersion, MIN_VERSION}; +pub(crate) use crate::pubgrub::dependencies::PubGrubDependencies; + +mod dependencies; mod package; mod priority; mod report; mod specifier; mod version; - -/// Convert a set of requirements to a set of `PubGrub` packages and ranges. -pub(crate) fn iter_requirements<'a>( - requirements: impl Iterator + 'a, - extra: Option<&'a ExtraName>, - source: Option<&'a PackageName>, - env: &'a MarkerEnvironment, -) -> impl Iterator)> + 'a { - requirements - .filter(move |requirement| { - if source.is_some_and(|source| source == &requirement.name) { - // TODO(konstin): Warn only once here - warn!("{} depends on itself", requirement.name); - false - } else { - true - } - }) - .filter(move |requirement| { - // TODO(charlie): We shouldn't need a vector here. - let extra = if let Some(extra) = extra { - vec![extra.as_ref()] - } else { - vec![] - }; - requirement.evaluate_markers(env, &extra) - }) - .flat_map(|requirement| { - std::iter::once(pubgrub_package(requirement, None).unwrap()).chain( - requirement - .extras - .clone() - .into_iter() - .flatten() - .map(|extra| { - pubgrub_package(requirement, Some(ExtraName::new(extra))).unwrap() - }), - ) - }) -} - -/// Convert a PEP 508 specifier to a `PubGrub` range. -pub(crate) fn version_range(specifiers: Option<&VersionOrUrl>) -> Result> { - let Some(specifiers) = specifiers else { - return Ok(Range::full()); - }; - - let VersionOrUrl::VersionSpecifier(specifiers) = specifiers else { - return Ok(Range::full()); - }; - - specifiers - .iter() - .map(PubGrubSpecifier::try_from) - .fold_ok(Range::full(), |range, specifier| { - range.intersection(&specifier.into()) - }) -} - -/// Convert a [`Requirement`] to a `PubGrub`-compatible package and range. -fn pubgrub_package( - requirement: &Requirement, - extra: Option, -) -> Result<(PubGrubPackage, Range)> { - match requirement.version_or_url.as_ref() { - // The requirement has no specifier (e.g., `flask`). - None => Ok(( - PubGrubPackage::Package(requirement.name.clone(), extra, None), - Range::full(), - )), - // The requirement has a URL (e.g., `flask @ file:///path/to/flask`). - Some(VersionOrUrl::Url(url)) => Ok(( - PubGrubPackage::Package(requirement.name.clone(), extra, Some(url.clone())), - Range::full(), - )), - // The requirement has a specifier (e.g., `flask>=1.0`). - Some(VersionOrUrl::VersionSpecifier(specifiers)) => { - let version = specifiers - .iter() - .map(PubGrubSpecifier::try_from) - .fold_ok(Range::full(), |range, specifier| { - range.intersection(&specifier.into()) - })?; - Ok(( - PubGrubPackage::Package(requirement.name.clone(), extra, None), - version, - )) - } - } -} diff --git a/crates/puffin-resolver/src/pubgrub/package.rs b/crates/puffin-resolver/src/pubgrub/package.rs index 28885d90d..0e08936fa 100644 --- a/crates/puffin-resolver/src/pubgrub/package.rs +++ b/crates/puffin-resolver/src/pubgrub/package.rs @@ -17,6 +17,52 @@ pub enum PubGrubPackage { Package( PackageName, Option, + /// The URL of the package, if it was specified in the requirement. + /// + /// There are a few challenges that come with URL-based packages, and how they map to + /// PubGrub. + /// + /// If the user declares a direct URL dependency, and then a transitive dependency + /// appears for the same package, we need to ensure that the direct URL dependency can + /// "satisfy" that requirement. So if the user declares a URL dependency on Werkzeug, and a + /// registry dependency on Flask, we need to ensure that Flask's dependency on Werkzeug + /// is resolved by the URL dependency. This means: (1) we need to avoid adding a second + /// Werkzeug variant from PyPI; and (2) we need to error if the Werkzeug version requested + /// by Flask doesn't match that of the URL dependency. + /// + /// Additionally, we need to ensure that we disallow multiple versions of the same package, + /// even if requested from different URLs. + /// + /// Removing the URL from the hash and equality operators is a hack to enable this behavior. + /// When we visit a URL dependency, we include the URL here. But that dependency "looks" + /// equal to the registry version from the solver's perspective, since they hash to the + /// same value. + /// + /// However, this creates the unfortunate requirement that we _must_ visit URL dependencies + /// before their registry variant, so that the URL-based version is used as the "canonical" + /// version. This is because the solver will always prefer the first version it sees, and + /// we need to ensure that the first version is the URL-based version. + /// + /// To enforce this requirement, we in turn require that all possible URL dependencies are + /// defined upfront, as `requirements.txt` or `constraints.txt` or similar. Otherwise, + /// solving these graphs becomes far more complicated -- and the "right" behavior isn't + /// even clear. For example, imagine that you define a direct dependency on Werkzeug, and + /// then one of your other direct dependencies declares a dependency on Werkzeug at some + /// URL. Which is correct? By requiring direct dependencies, the semantics are at least + /// clear. + /// + /// With the list of known URLs available upfront, we then only need to do two things: + /// + /// 1. When iterating over the dependencies for a single package, ensure that we respect + /// URL variants over registry variants, if the package declares a dependency on both + /// `Werkzeug==2.0.0` _and_ `Werkzeug @ https://...` , which is strange but possible. + /// This is enforced by [`crate::pubgrub::dependencies::PubGrubDependencies`]. + /// 2. Reject any URL dependencies that aren't known ahead-of-time. + /// + /// Eventually, we could relax this constraint, in favor of something more lenient, e.g., if + /// we're going to have a dependency that's provided as a URL, we _need_ to visit the URL + /// version before the registry version. So we could just error if we visit a URL variant + /// _after_ a registry variant. #[derivative(PartialEq = "ignore")] #[derivative(PartialOrd = "ignore")] #[derivative(Hash = "ignore")] diff --git a/crates/puffin-resolver/src/pubgrub/specifier.rs b/crates/puffin-resolver/src/pubgrub/specifier.rs index 4099c45cc..cd8588964 100644 --- a/crates/puffin-resolver/src/pubgrub/specifier.rs +++ b/crates/puffin-resolver/src/pubgrub/specifier.rs @@ -4,6 +4,7 @@ use pubgrub::range::Range; use pep440_rs::{Operator, VersionSpecifier}; use crate::pubgrub::version::PubGrubVersion; +use crate::ResolveError; /// A range of versions that can be used to satisfy a requirement. #[derive(Debug)] @@ -17,10 +18,10 @@ impl From for Range { } impl TryFrom<&VersionSpecifier> for PubGrubSpecifier { - type Error = anyhow::Error; + type Error = ResolveError; /// Convert a PEP 508 specifier to a `PubGrub`-compatible version range. - fn try_from(specifier: &VersionSpecifier) -> Result { + fn try_from(specifier: &VersionSpecifier) -> Result { let ranges = match specifier.operator() { Operator::Equal => { let version = PubGrubVersion::from(specifier.version().clone()); @@ -36,9 +37,7 @@ impl TryFrom<&VersionSpecifier> for PubGrubSpecifier { } Operator::TildeEqual => { let [rest @ .., last, _] = specifier.version().release.as_slice() else { - return Err(anyhow::anyhow!( - "~= operator requires at least two release segments" - )); + return Err(ResolveError::InvalidTildeEquals(specifier.clone())); }; let upper = PubGrubVersion::from(pep440_rs::Version { dev: Some(0), diff --git a/crates/puffin-resolver/src/resolver.rs b/crates/puffin-resolver/src/resolver.rs index 079589f20..33b7a2931 100644 --- a/crates/puffin-resolver/src/resolver.rs +++ b/crates/puffin-resolver/src/resolver.rs @@ -1,6 +1,5 @@ //! Given a set of requirements, find a set of compatible packages. -use std::collections::hash_map::Entry; use std::collections::BTreeMap; use std::str::FromStr; use std::sync::Arc; @@ -22,7 +21,7 @@ use waitmap::WaitMap; use distribution_filename::{SourceDistributionFilename, WheelFilename}; use pep508_rs::{MarkerEnvironment, Requirement}; use platform_tags::Tags; -use puffin_cache::RepositoryUrl; +use puffin_cache::{CanonicalUrl, RepositoryUrl}; use puffin_client::RegistryClient; use puffin_distribution::{RemoteDistributionRef, VersionOrUrl}; use puffin_normalize::{ExtraName, PackageName}; @@ -34,14 +33,16 @@ use crate::distribution::{SourceDistributionFetcher, WheelFetcher}; use crate::error::ResolveError; use crate::file::{DistributionFile, SdistFile, WheelFile}; use crate::manifest::Manifest; -use crate::pubgrub::{iter_requirements, version_range}; -use crate::pubgrub::{PubGrubPackage, PubGrubPriorities, PubGrubVersion, MIN_VERSION}; +use crate::pubgrub::{ + PubGrubDependencies, PubGrubPackage, PubGrubPriorities, PubGrubVersion, MIN_VERSION, +}; use crate::resolution::Graph; pub struct Resolver<'a, Context: BuildContext + Sync> { project: Option, requirements: Vec, constraints: Vec, + allowed_urls: AllowedUrls, markers: &'a MarkerEnvironment, tags: &'a Tags, client: &'a RegistryClient, @@ -65,6 +66,18 @@ impl<'a, Context: BuildContext + Sync> Resolver<'a, Context> { index: Arc::new(Index::default()), locks: Arc::new(Locks::default()), selector: CandidateSelector::from(&manifest), + allowed_urls: manifest + .requirements + .iter() + .chain(manifest.constraints.iter()) + .filter_map(|req| { + if let Some(pep508_rs::VersionOrUrl::Url(url)) = &req.version_or_url { + Some(url) + } else { + None + } + }) + .collect(), project: manifest.project, requirements: manifest.requirements, constraints: manifest.constraints, @@ -128,14 +141,6 @@ impl<'a, Context: BuildContext + Sync> Resolver<'a, Context> { let mut pins = FxHashMap::default(); let mut priorities = PubGrubPriorities::default(); - // Push all the requirements into the package sink. - for (package, version) in - iter_requirements(self.requirements.iter(), None, None, self.markers) - { - debug!("Adding root dependency: {package} {version}"); - Self::visit_package(&package, &mut priorities, &mut in_flight, request_sink)?; - } - // Start the solve. let mut state = State::init(root.clone(), MIN_VERSION.clone()); let mut added_dependencies: FxHashMap> = @@ -366,6 +371,14 @@ impl<'a, Context: BuildContext + Sync> Resolver<'a, Context> { PubGrubPackage::Package(package_name, _extra, Some(url)) => { debug!("Searching for a compatible version of {package_name} @ {url} ({range})",); + // If the URL wasn't declared in the direct dependencies or constraints, reject it. + if !self.allowed_urls.contains(url) { + return Err(ResolveError::DisallowedUrl( + package_name.clone(), + url.clone(), + )); + } + if let Ok(wheel_filename) = WheelFilename::try_from(url) { // If the URL is that of a wheel, extract the version. let version = PubGrubVersion::from(wheel_filename.version); @@ -455,37 +468,23 @@ impl<'a, Context: BuildContext + Sync> Resolver<'a, Context> { ) -> Result { match package { PubGrubPackage::Root(_) => { - let mut constraints = - DependencyConstraints::>::default(); - // Add the root requirements. - for (package, version) in - iter_requirements(self.requirements.iter(), None, None, self.markers) - { + let constraints = PubGrubDependencies::try_from_requirements( + &self.requirements, + &self.constraints, + None, + None, + self.markers, + )?; + + for (package, version) in constraints.iter() { + debug!("Adding direct dependency: {package:?} {version:?}"); + // Emit a request to fetch the metadata for this package. - Self::visit_package(&package, priorities, in_flight, request_sink)?; - - // Add it to the constraints. - match constraints.entry(package) { - Entry::Occupied(mut entry) => { - entry.insert(entry.get().intersection(&version)); - } - Entry::Vacant(entry) => { - entry.insert(version); - } - } + Self::visit_package(package, priorities, in_flight, request_sink)?; } - // If any requirements were further constrained by the user, add those constraints. - for (package, version) in - iter_requirements(self.constraints.iter(), None, None, self.markers) - { - if let Some(range) = constraints.get_mut(&package) { - *range = range.intersection(&version); - } - } - - Ok(Dependencies::Known(constraints)) + Ok(Dependencies::Known(constraints.into())) } PubGrubPackage::Package(package_name, extra, url) => { @@ -500,39 +499,19 @@ impl<'a, Context: BuildContext + Sync> Resolver<'a, Context> { }; let metadata = entry.value(); - let mut constraints = - DependencyConstraints::>::default(); - - for (package, version) in iter_requirements( - metadata.requires_dist.iter(), + let mut constraints = PubGrubDependencies::try_from_requirements( + &metadata.requires_dist, + &self.constraints, extra.as_ref(), Some(package_name), self.markers, - ) { + )?; + + for (package, version) in constraints.iter() { debug!("Adding transitive dependency: {package} {version}"); // Emit a request to fetch the metadata for this package. - Self::visit_package(&package, priorities, in_flight, request_sink)?; - - // Add it to the constraints. - match constraints.entry(package) { - Entry::Occupied(mut entry) => { - entry.insert(entry.get().intersection(&version)); - } - Entry::Vacant(entry) => { - entry.insert(version); - } - } - } - - // If any packages were further constrained by the user, add those constraints. - for constraint in &self.constraints { - let package = PubGrubPackage::Package(constraint.name.clone(), None, None); - if let Some(range) = constraints.get_mut(&package) { - *range = range.intersection( - &version_range(constraint.version_or_url.as_ref()).unwrap(), - ); - } + Self::visit_package(package, priorities, in_flight, request_sink)?; } if let Some(extra) = extra { @@ -549,7 +528,7 @@ impl<'a, Context: BuildContext + Sync> Resolver<'a, Context> { ); } - Ok(Dependencies::Known(constraints)) + Ok(Dependencies::Known(constraints.into())) } } } @@ -927,6 +906,21 @@ impl Default for Index { } } +#[derive(Debug, Default)] +struct AllowedUrls(FxHashSet); + +impl AllowedUrls { + fn contains(&self, url: &Url) -> bool { + self.0.contains(&CanonicalUrl::new(url)) + } +} + +impl<'a> FromIterator<&'a Url> for AllowedUrls { + fn from_iter>(iter: T) -> Self { + Self(iter.into_iter().map(CanonicalUrl::new).collect()) + } +} + /// An enum used by [`DependencyProvider`] that holds information about package dependencies. /// For each [Package] there is a set of versions allowed as a dependency. #[derive(Clone)]