Remove usages of verbatim URL in URL resolver (#4221)

## Summary

Should be no behavior changes, but one piece of technical debt I noticed
left over in the URL resolver. We already have structured paths, so we
shouldn't need to compare verbatim URLs.
This commit is contained in:
Charlie Marsh 2024-06-10 14:55:48 -07:00 committed by GitHub
parent fd52fe74ce
commit 0c1dcb797a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 130 additions and 232 deletions

View file

@ -4,13 +4,14 @@ use either::Either;
use itertools::Itertools;
use pubgrub::range::Range;
use rustc_hash::FxHashSet;
use same_file::is_same_file;
use tracing::warn;
use distribution_types::Verbatim;
use pep440_rs::Version;
use pep508_rs::MarkerEnvironment;
use pypi_types::{ParsedUrl, Requirement, RequirementSource, VerbatimParsedUrl};
use pypi_types::{
ParsedArchiveUrl, ParsedGitUrl, ParsedPathUrl, ParsedUrl, Requirement, RequirementSource,
};
use uv_configuration::{Constraints, Overrides};
use uv_git::GitResolver;
use uv_normalize::{ExtraName, GroupName, PackageName};
@ -258,7 +259,11 @@ impl PubGrubRequirement {
version,
})
}
RequirementSource::Url { url, .. } => {
RequirementSource::Url {
subdirectory,
location,
url,
} => {
let Some(expected) = urls.get(&requirement.name) else {
return Err(ResolveError::DisallowedUrl(
requirement.name.clone(),
@ -266,7 +271,11 @@ impl PubGrubRequirement {
));
};
if !Urls::is_allowed(&expected.verbatim, url, git) {
let parsed_url = ParsedUrl::Archive(ParsedArchiveUrl::from_source(
location.clone(),
subdirectory.clone(),
));
if !Urls::same_resource(&expected.parsed_url, &parsed_url, git) {
return Err(ResolveError::ConflictingUrlsTransitive(
requirement.name.clone(),
expected.verbatim.verbatim().to_string(),
@ -284,7 +293,13 @@ impl PubGrubRequirement {
version: Range::full(),
})
}
RequirementSource::Git { url, .. } => {
RequirementSource::Git {
repository,
reference,
precise,
url,
subdirectory,
} => {
let Some(expected) = urls.get(&requirement.name) else {
return Err(ResolveError::DisallowedUrl(
requirement.name.clone(),
@ -292,7 +307,13 @@ impl PubGrubRequirement {
));
};
if !Urls::is_allowed(&expected.verbatim, url, git) {
let parsed_url = ParsedUrl::Git(ParsedGitUrl::from_source(
repository.clone(),
reference.clone(),
*precise,
subdirectory.clone(),
));
if !Urls::same_resource(&expected.parsed_url, &parsed_url, git) {
return Err(ResolveError::ConflictingUrlsTransitive(
requirement.name.clone(),
expected.verbatim.verbatim().to_string(),
@ -310,7 +331,11 @@ impl PubGrubRequirement {
version: Range::full(),
})
}
RequirementSource::Path { url, path, .. } => {
RequirementSource::Path {
editable,
url,
path,
} => {
let Some(expected) = urls.get(&requirement.name) else {
return Err(ResolveError::DisallowedUrl(
requirement.name.clone(),
@ -318,22 +343,12 @@ impl PubGrubRequirement {
));
};
let mut is_allowed = Urls::is_allowed(&expected.verbatim, url, git);
if !is_allowed {
if let VerbatimParsedUrl {
parsed_url: ParsedUrl::Path(previous_path),
..
} = &expected
{
// On Windows, we can have two versions of the same path, e.g.
// `C:\Users\KONSTA~1` and `C:\Users\Konstantin`.
if is_same_file(path, &previous_path.path).unwrap_or(false) {
is_allowed = true;
}
}
}
if !is_allowed {
let parsed_url = ParsedUrl::Path(ParsedPathUrl::from_source(
path.clone(),
*editable,
url.to_url(),
));
if !Urls::same_resource(&expected.parsed_url, &parsed_url, git) {
return Err(ResolveError::ConflictingUrlsTransitive(
requirement.name.clone(),
expected.verbatim.verbatim().to_string(),

View file

@ -1,15 +1,14 @@
use rustc_hash::FxHashMap;
use same_file::is_same_file;
use tracing::debug;
use url::Url;
use cache_key::CanonicalUrl;
use distribution_types::Verbatim;
use pep508_rs::{MarkerEnvironment, VerbatimUrl};
use pep508_rs::MarkerEnvironment;
use pypi_types::{
ParsedArchiveUrl, ParsedGitUrl, ParsedPathUrl, ParsedUrl, RequirementSource, VerbatimParsedUrl,
};
use uv_git::{GitResolver, GitUrl};
use uv_git::GitResolver;
use uv_normalize::PackageName;
use crate::{DependencyMode, Manifest, ResolveError};
@ -37,14 +36,14 @@ impl Urls {
url,
} => {
let url = VerbatimParsedUrl {
parsed_url: ParsedUrl::Archive(ParsedArchiveUrl {
url: location.clone(),
subdirectory: subdirectory.clone(),
}),
parsed_url: ParsedUrl::Archive(ParsedArchiveUrl::from_source(
location.clone(),
subdirectory.clone(),
)),
verbatim: url.clone(),
};
if let Some(previous) = urls.insert(requirement.name.clone(), url.clone()) {
if !is_equal(&previous.verbatim, &url.verbatim) {
if !Self::same_resource(&previous.parsed_url, &url.parsed_url, git) {
return Err(ResolveError::ConflictingUrlsDirect(
requirement.name.clone(),
previous.verbatim.verbatim().to_string(),
@ -59,43 +58,34 @@ impl Urls {
url,
} => {
let url = VerbatimParsedUrl {
parsed_url: ParsedUrl::Path(ParsedPathUrl {
url: url.to_url(),
path: path.clone(),
editable: *editable,
}),
parsed_url: ParsedUrl::Path(ParsedPathUrl::from_source(
path.clone(),
*editable,
url.to_url(),
)),
verbatim: url.clone(),
};
match urls.entry(requirement.name.clone()) {
std::collections::hash_map::Entry::Occupied(mut entry) => {
let previous = entry.get();
if let VerbatimParsedUrl {
parsed_url: ParsedUrl::Path(previous_path),
..
} = &previous
{
// On Windows, we can have two versions of the same path, e.g.
// `C:\Users\KONSTA~1` and `C:\Users\Konstantin`.
if is_same_file(path, &previous_path.path).unwrap_or(false) {
// Allow editables to override non-editables.
if previous_path.editable && !editable {
debug!(
"Allowing {} as an editable variant of {}",
&previous.verbatim, url.verbatim
);
} else {
entry.insert(url.clone());
}
continue;
if Self::same_resource(&previous.parsed_url, &url.parsed_url, git) {
// Allow editables to override non-editables.
if previous.parsed_url.is_editable() && !editable {
debug!(
"Allowing {} as an editable variant of {}",
&previous.verbatim, url.verbatim
);
} else {
entry.insert(url.clone());
}
continue;
}
if !is_equal(&previous.verbatim, &url.verbatim) {
return Err(ResolveError::ConflictingUrlsDirect(
requirement.name.clone(),
previous.verbatim.verbatim().to_string(),
url.verbatim.verbatim().to_string(),
));
}
return Err(ResolveError::ConflictingUrlsDirect(
requirement.name.clone(),
previous.verbatim.verbatim().to_string(),
url.verbatim.verbatim().to_string(),
));
}
std::collections::hash_map::Entry::Vacant(entry) => {
entry.insert(url.clone());
@ -109,31 +99,22 @@ impl Urls {
subdirectory,
url,
} => {
let mut git_url = GitUrl::new(repository.clone(), reference.clone());
if let Some(precise) = precise {
git_url = git_url.with_precise(*precise);
}
let url = VerbatimParsedUrl {
parsed_url: ParsedUrl::Git(ParsedGitUrl {
url: git_url,
subdirectory: subdirectory.clone(),
}),
parsed_url: ParsedUrl::Git(ParsedGitUrl::from_source(
repository.clone(),
reference.clone(),
*precise,
subdirectory.clone(),
)),
verbatim: url.clone(),
};
if let Some(previous) = urls.insert(requirement.name.clone(), url.clone()) {
if !is_equal(&previous.verbatim, &url.verbatim) {
if is_same_reference(&previous.verbatim, &url.verbatim, git) {
debug!(
"Allowing {} as a variant of {}",
&url.verbatim, previous.verbatim
);
} else {
return Err(ResolveError::ConflictingUrlsDirect(
requirement.name.clone(),
previous.verbatim.verbatim().to_string(),
url.verbatim.verbatim().to_string(),
));
}
if !Self::same_resource(&previous.parsed_url, &url.parsed_url, git) {
return Err(ResolveError::ConflictingUrlsDirect(
requirement.name.clone(),
previous.verbatim.verbatim().to_string(),
url.verbatim.verbatim().to_string(),
));
}
}
}
@ -148,155 +129,20 @@ impl Urls {
self.0.get(package)
}
/// Returns `true` if the provided URL is compatible with the given "allowed" URL.
pub(crate) fn is_allowed(
expected: &VerbatimUrl,
provided: &VerbatimUrl,
git: &GitResolver,
) -> bool {
#[allow(clippy::if_same_then_else)]
if is_equal(expected, provided) {
// If the URLs are canonically equivalent, they're compatible.
true
} else if is_same_reference(expected, provided, git) {
// If the URLs refer to the same commit, they're compatible.
true
} else {
// Otherwise, they're incompatible.
false
/// Returns `true` if the [`ParsedUrl`] instances point to the same resource.
pub(crate) fn same_resource(a: &ParsedUrl, b: &ParsedUrl, git: &GitResolver) -> bool {
match (a, b) {
(ParsedUrl::Archive(a), ParsedUrl::Archive(b)) => {
a.subdirectory == b.subdirectory
&& CanonicalUrl::new(&a.url) == CanonicalUrl::new(&b.url)
}
(ParsedUrl::Git(a), ParsedUrl::Git(b)) => {
a.subdirectory == b.subdirectory && git.same_ref(&a.url, &b.url)
}
(ParsedUrl::Path(a), ParsedUrl::Path(b)) => {
a.path == b.path || is_same_file(&a.path, &b.path).unwrap_or(false)
}
_ => false,
}
}
}
/// Returns `true` if the [`VerbatimUrl`] is compatible with the previous [`VerbatimUrl`].
///
/// Accepts URLs that map to the same [`CanonicalUrl`].
fn is_equal(previous: &VerbatimUrl, url: &VerbatimUrl) -> bool {
CanonicalUrl::new(previous.raw()) == CanonicalUrl::new(url.raw())
}
/// Returns `true` if the URLs refer to the same Git commit.
///
/// For example, the previous URL could be a branch or tag, while the current URL would be a
/// precise commit hash.
fn is_same_reference<'a>(a: &'a Url, b: &'a Url, git: &'a GitResolver) -> bool {
// Convert `a` to a Git URL, if possible.
let Ok(a_git) = ParsedGitUrl::try_from(Url::from(CanonicalUrl::new(a))) else {
return false;
};
// Convert `b` to a Git URL, if possible.
let Ok(b_git) = ParsedGitUrl::try_from(Url::from(CanonicalUrl::new(b))) else {
return false;
};
// The URLs must refer to the same subdirectory, if any.
if a_git.subdirectory != b_git.subdirectory {
return false;
}
// The Git URLs must refer to the same commit.
git.same_ref(&a_git.url, &b_git.url)
}
#[cfg(test)]
mod tests {
use std::str::FromStr;
use url::Url;
use pep508_rs::VerbatimUrl;
use uv_git::{GitResolver, GitSha, GitUrl, RepositoryReference};
use crate::resolver::urls::{is_equal, is_same_reference};
#[test]
fn url_compatibility() -> Result<(), url::ParseError> {
// Same repository, same tag.
let previous = VerbatimUrl::parse_url("git+https://example.com/MyProject.git@v1.0")?;
let url = VerbatimUrl::parse_url("git+https://example.com/MyProject.git@v1.0")?;
assert!(is_equal(&previous, &url));
// Same repository, different tags.
let previous = VerbatimUrl::parse_url("git+https://example.com/MyProject.git@v1.0")?;
let url = VerbatimUrl::parse_url("git+https://example.com/MyProject.git@v1.1")?;
assert!(!is_equal(&previous, &url));
// Same repository (with and without `.git`), same tag.
let previous = VerbatimUrl::parse_url("git+https://example.com/MyProject@v1.0")?;
let url = VerbatimUrl::parse_url("git+https://example.com/MyProject.git@v1.0")?;
assert!(is_equal(&previous, &url));
// Same repository, no tag on the previous URL.
let previous = VerbatimUrl::parse_url("git+https://example.com/MyProject.git")?;
let url = VerbatimUrl::parse_url("git+https://example.com/MyProject.git@v1.0")?;
assert!(!is_equal(&previous, &url));
// Same repository, tag on the previous URL, no tag on the overriding URL.
let previous = VerbatimUrl::parse_url("git+https://example.com/MyProject.git@v1.0")?;
let url = VerbatimUrl::parse_url("git+https://example.com/MyProject.git")?;
assert!(!is_equal(&previous, &url));
Ok(())
}
#[test]
fn same_reference() -> anyhow::Result<()> {
let empty = GitResolver::default();
// Same repository, same tag.
let a = Url::parse("git+https://example.com/MyProject.git@main")?;
let b = Url::parse("git+https://example.com/MyProject.git@main")?;
assert!(is_same_reference(&a, &b, &empty));
// Same repository, same tag, same subdirectory.
let a = Url::parse("git+https://example.com/MyProject.git@main#subdirectory=pkg_dir")?;
let b = Url::parse("git+https://example.com/MyProject.git@main#subdirectory=pkg_dir")?;
assert!(is_same_reference(&a, &b, &empty));
// Different repositories, same tag.
let a = Url::parse("git+https://example.com/MyProject.git@main")?;
let b = Url::parse("git+https://example.com/MyOtherProject.git@main")?;
assert!(!is_same_reference(&a, &b, &empty));
// Same repository, different tags.
let a = Url::parse("git+https://example.com/MyProject.git@main")?;
let b = Url::parse("git+https://example.com/MyProject.git@v1.0")?;
assert!(!is_same_reference(&a, &b, &empty));
// Same repository, same tag, different subdirectory.
let a = Url::parse("git+https://example.com/MyProject.git@main#subdirectory=pkg_dir")?;
let b = Url::parse("git+https://example.com/MyProject.git@main#subdirectory=other_dir")?;
assert!(!is_same_reference(&a, &b, &empty));
// Same repository, different tags, but same precise commit.
let a = Url::parse("git+https://example.com/MyProject.git@main")?;
let b = Url::parse(
"git+https://example.com/MyProject.git@164a8735b081663fede48c5041667b194da15d25",
)?;
let resolved_refs = GitResolver::default();
resolved_refs.insert(
RepositoryReference::from(&GitUrl::try_from(Url::parse(
"https://example.com/MyProject@main",
)?)?),
GitSha::from_str("164a8735b081663fede48c5041667b194da15d25")?,
);
assert!(is_same_reference(&a, &b, &resolved_refs));
// Same repository, different tags, different precise commit.
let a = Url::parse("git+https://example.com/MyProject.git@main")?;
let b = Url::parse(
"git+https://example.com/MyProject.git@164a8735b081663fede48c5041667b194da15d25",
)?;
let resolved_refs = GitResolver::default();
resolved_refs.insert(
RepositoryReference::from(&GitUrl::try_from(Url::parse(
"https://example.com/MyProject@main",
)?)?),
GitSha::from_str("f2c9e88f3ec9526bbcec68d150b176d96a750aba")?,
);
assert!(!is_same_reference(&a, &b, &resolved_refs));
Ok(())
}
}