Add Git resolver in lieu of static hash map (#3954)

## Summary

This PR removes the static resolver map:

```rust
static RESOLVED_GIT_REFS: Lazy<Mutex<FxHashMap<RepositoryReference, GitSha>>> =
    Lazy::new(Mutex::default);
```

With a `GitResolver` struct that we now pass around on the
`BuildContext`. There should be no behavior changes here; it's purely an
internal refactor with an eye towards making it cleaner for us to
"pre-populate" the list of resolved SHAs.
This commit is contained in:
Charlie Marsh 2024-05-31 22:44:42 -04:00 committed by GitHub
parent a0652921fc
commit b7d77c04cc
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
31 changed files with 475 additions and 384 deletions

View file

@ -8,6 +8,7 @@ use pep440_rs::Version;
use pep508_rs::MarkerEnvironment;
use pypi_types::{Requirement, RequirementSource};
use uv_configuration::{Constraints, Overrides};
use uv_git::GitResolver;
use uv_normalize::{ExtraName, PackageName};
use crate::pubgrub::specifier::PubGrubSpecifier;
@ -29,6 +30,7 @@ impl PubGrubDependencies {
source_extra: Option<&ExtraName>,
urls: &Urls,
locals: &Locals,
git: &GitResolver,
env: Option<&MarkerEnvironment>,
) -> Result<Self, ResolveError> {
let mut dependencies = Vec::default();
@ -42,6 +44,7 @@ impl PubGrubDependencies {
source_extra,
urls,
locals,
git,
env,
&mut dependencies,
&mut seen,
@ -71,6 +74,7 @@ fn add_requirements(
source_extra: Option<&ExtraName>,
urls: &Urls,
locals: &Locals,
git: &GitResolver,
env: Option<&MarkerEnvironment>,
dependencies: &mut Vec<(PubGrubPackage, Range<Version>)>,
seen: &mut FxHashSet<ExtraName>,
@ -97,9 +101,10 @@ fn add_requirements(
None,
urls,
locals,
git,
))
.chain(requirement.extras.clone().into_iter().map(|extra| {
PubGrubRequirement::from_requirement(requirement, Some(extra), urls, locals)
PubGrubRequirement::from_requirement(requirement, Some(extra), urls, locals, git)
})) {
let PubGrubRequirement { package, version } = result?;
@ -126,6 +131,7 @@ fn add_requirements(
Some(extra),
urls,
locals,
git,
env,
dependencies,
seen,
@ -156,7 +162,7 @@ fn add_requirements(
// Add the package.
let PubGrubRequirement { package, version } =
PubGrubRequirement::from_constraint(constraint, urls, locals)?;
PubGrubRequirement::from_constraint(constraint, urls, locals, git)?;
// Ignore self-dependencies.
if let PubGrubPackageInner::Package { name, .. } = &*package {
@ -196,6 +202,7 @@ impl PubGrubRequirement {
extra: Option<ExtraName>,
urls: &Urls,
locals: &Locals,
git: &GitResolver,
) -> Result<Self, ResolveError> {
match &requirement.source {
RequirementSource::Registry { specifier, .. } => {
@ -241,7 +248,7 @@ impl PubGrubRequirement {
));
};
if !Urls::is_allowed(&expected.verbatim, url) {
if !Urls::is_allowed(&expected.verbatim, url, git) {
return Err(ResolveError::ConflictingUrlsTransitive(
requirement.name.clone(),
expected.verbatim.verbatim().to_string(),
@ -267,7 +274,7 @@ impl PubGrubRequirement {
));
};
if !Urls::is_allowed(&expected.verbatim, url) {
if !Urls::is_allowed(&expected.verbatim, url, git) {
return Err(ResolveError::ConflictingUrlsTransitive(
requirement.name.clone(),
expected.verbatim.verbatim().to_string(),
@ -293,7 +300,7 @@ impl PubGrubRequirement {
));
};
if !Urls::is_allowed(&expected.verbatim, url) {
if !Urls::is_allowed(&expected.verbatim, url, git) {
return Err(ResolveError::ConflictingUrlsTransitive(
requirement.name.clone(),
expected.verbatim.verbatim().to_string(),
@ -319,7 +326,8 @@ impl PubGrubRequirement {
constraint: &Requirement,
urls: &Urls,
locals: &Locals,
git: &GitResolver,
) -> Result<Self, ResolveError> {
Self::from_requirement(constraint, None, urls, locals)
Self::from_requirement(constraint, None, urls, locals, git)
}
}

View file

@ -2,8 +2,39 @@ use url::Url;
use pep508_rs::VerbatimUrl;
use pypi_types::{ParsedGitUrl, ParsedUrl, VerbatimParsedUrl};
use uv_distribution::git_url_to_precise;
use uv_git::GitReference;
use uv_git::{GitReference, GitResolver};
/// Map a URL to a precise URL, if possible.
pub(crate) fn url_to_precise(url: VerbatimParsedUrl, git: &GitResolver) -> VerbatimParsedUrl {
let ParsedUrl::Git(ParsedGitUrl {
url: git_url,
subdirectory,
}) = &url.parsed_url
else {
return url;
};
let Some(new_git_url) = git.precise(git_url.clone()) else {
debug_assert!(
matches!(git_url.reference(), GitReference::FullCommit(_)),
"Unseen Git URL: {}, {:?}",
url.verbatim,
git_url
);
return url;
};
let new_parsed_url = ParsedGitUrl {
url: new_git_url,
subdirectory: subdirectory.clone(),
};
let new_url = Url::from(new_parsed_url.clone());
let new_verbatim_url = apply_redirect(&url.verbatim, new_url);
VerbatimParsedUrl {
parsed_url: ParsedUrl::Git(new_parsed_url),
verbatim: new_verbatim_url,
}
}
/// Given a [`VerbatimUrl`] and a redirect, apply the redirect to the URL while preserving as much
/// of the verbatim representation as possible.
@ -39,37 +70,6 @@ fn apply_redirect(url: &VerbatimUrl, redirect: Url) -> VerbatimUrl {
redirect
}
pub(crate) fn url_to_precise(url: VerbatimParsedUrl) -> VerbatimParsedUrl {
let ParsedUrl::Git(ParsedGitUrl {
url: git_url,
subdirectory,
}) = url.parsed_url.clone()
else {
return url;
};
let Some(new_git_url) = git_url_to_precise(git_url.clone()) else {
debug_assert!(
matches!(git_url.reference(), GitReference::FullCommit(_)),
"Unseen Git URL: {}, {:?}",
url.verbatim,
git_url
);
return url;
};
let new_parsed_url = ParsedGitUrl {
url: new_git_url,
subdirectory,
};
let new_url = Url::from(new_parsed_url.clone());
let new_verbatim_url = apply_redirect(&url.verbatim, new_url);
VerbatimParsedUrl {
parsed_url: ParsedUrl::Git(new_parsed_url),
verbatim: new_verbatim_url,
}
}
#[cfg(test)]
mod tests {
use url::Url;

View file

@ -12,6 +12,7 @@ use distribution_types::{
use pep440_rs::{Version, VersionSpecifier};
use pep508_rs::{MarkerEnvironment, MarkerTree};
use pypi_types::{ParsedUrlError, Requirement, Yanked};
use uv_git::GitResolver;
use uv_normalize::{ExtraName, PackageName};
use crate::preferences::Preferences;
@ -40,6 +41,7 @@ impl ResolutionGraph {
pub(crate) fn from_state(
index: &InMemoryIndex,
preferences: &Preferences,
git: &GitResolver,
resolution: Resolution,
) -> anyhow::Result<Self, ResolveError> {
// Collect all marker expressions from relevant pubgrub packages.
@ -183,7 +185,7 @@ impl ResolutionGraph {
url: Some(url),
} => {
// Create the distribution.
let dist = Dist::from_url(name.clone(), url_to_precise(url.clone()))?;
let dist = Dist::from_url(name.clone(), url_to_precise(url.clone(), git))?;
// Extract the hashes, preserving those that were already present in the
// lockfile if necessary.

View file

@ -31,6 +31,7 @@ use pypi_types::{Metadata23, Requirement};
pub(crate) use urls::Urls;
use uv_configuration::{Constraints, Overrides};
use uv_distribution::{ArchiveMetadata, DistributionDatabase};
use uv_git::GitResolver;
use uv_normalize::{ExtraName, PackageName};
use uv_types::{BuildContext, HashStrategy, InstalledPackagesProvider};
@ -82,6 +83,7 @@ struct ResolverState<InstalledPackages: InstalledPackagesProvider> {
constraints: Constraints,
overrides: Overrides,
preferences: Preferences,
git: GitResolver,
exclusions: Exclusions,
urls: Urls,
locals: Locals,
@ -154,6 +156,7 @@ impl<'a, Context: BuildContext, InstalledPackages: InstalledPackagesProvider>
markers,
python_requirement,
index,
build_context.git(),
provider,
installed_packages,
)
@ -172,16 +175,18 @@ impl<Provider: ResolverProvider, InstalledPackages: InstalledPackagesProvider>
markers: Option<&MarkerEnvironment>,
python_requirement: &PythonRequirement,
index: &InMemoryIndex,
git: &GitResolver,
provider: Provider,
installed_packages: InstalledPackages,
) -> Result<Self, ResolveError> {
let state = ResolverState {
index: index.clone(),
git: git.clone(),
unavailable_packages: DashMap::default(),
incomplete_packages: DashMap::default(),
selector: CandidateSelector::for_resolution(options, &manifest, markers),
dependency_mode: options.dependency_mode,
urls: Urls::from_manifest(&manifest, markers, options.dependency_mode)?,
urls: Urls::from_manifest(&manifest, markers, git, options.dependency_mode)?,
locals: Locals::from_manifest(&manifest, markers, options.dependency_mode),
project: manifest.project,
requirements: manifest.requirements,
@ -549,7 +554,7 @@ impl<InstalledPackages: InstalledPackagesProvider> ResolverState<InstalledPackag
for resolution in resolutions {
combined.union(resolution);
}
ResolutionGraph::from_state(&self.index, &self.preferences, combined)
ResolutionGraph::from_state(&self.index, &self.preferences, &self.git, combined)
}
/// Visit a [`PubGrubPackage`] prior to selection. This should be called on a [`PubGrubPackage`]
@ -907,6 +912,7 @@ impl<InstalledPackages: InstalledPackagesProvider> ResolverState<InstalledPackag
None,
&self.urls,
&self.locals,
&self.git,
self.markers.as_ref(),
);
@ -1050,6 +1056,7 @@ impl<InstalledPackages: InstalledPackagesProvider> ResolverState<InstalledPackag
extra.as_ref(),
&self.urls,
&self.locals,
&self.git,
self.markers.as_ref(),
)?;

View file

@ -1,13 +1,14 @@
use distribution_types::Verbatim;
use rustc_hash::FxHashMap;
use tracing::debug;
use url::Url;
use cache_key::CanonicalUrl;
use distribution_types::Verbatim;
use pep508_rs::{MarkerEnvironment, VerbatimUrl};
use pypi_types::{
ParsedArchiveUrl, ParsedGitUrl, ParsedPathUrl, ParsedUrl, RequirementSource, VerbatimParsedUrl,
};
use uv_distribution::is_same_reference;
use uv_git::GitUrl;
use uv_git::{GitResolver, GitUrl, RepositoryReference};
use uv_normalize::PackageName;
use crate::{DependencyMode, Manifest, ResolveError};
@ -20,6 +21,7 @@ impl Urls {
pub(crate) fn from_manifest(
manifest: &Manifest,
markers: Option<&MarkerEnvironment>,
git: &GitResolver,
dependencies: DependencyMode,
) -> Result<Self, ResolveError> {
let mut urls: FxHashMap<PackageName, VerbatimParsedUrl> = FxHashMap::default();
@ -93,7 +95,7 @@ impl Urls {
};
if let Some(previous) = urls.insert(requirement.name.clone(), url.clone()) {
if !is_equal(&previous.verbatim, &url.verbatim) {
if is_same_reference(&previous.verbatim, &url.verbatim) {
if is_same_reference(&previous.verbatim, &url.verbatim, git) {
debug!(
"Allowing {} as a variant of {}",
&url.verbatim, previous.verbatim
@ -120,12 +122,16 @@ impl Urls {
}
/// Returns `true` if the provided URL is compatible with the given "allowed" URL.
pub(crate) fn is_allowed(expected: &VerbatimUrl, provided: &VerbatimUrl) -> bool {
pub(crate) fn is_allowed(
expected: &VerbatimUrl,
provided: &VerbatimUrl,
git: &GitResolver,
) -> bool {
#[allow(clippy::if_same_then_else)]
if is_equal(expected, provided) {
// If the URLs are canonically equivalent, they're compatible.
true
} else if is_same_reference(expected, provided) {
} else if is_same_reference(expected, provided, git) {
// If the URLs refer to the same commit, they're compatible.
true
} else {
@ -139,12 +145,75 @@ impl Urls {
///
/// Accepts URLs that map to the same [`CanonicalUrl`].
fn is_equal(previous: &VerbatimUrl, url: &VerbatimUrl) -> bool {
cache_key::CanonicalUrl::new(previous.raw()) == cache_key::CanonicalUrl::new(url.raw())
CanonicalUrl::new(previous.raw()) == CanonicalUrl::new(url.raw())
}
/// Returns `true` if the URLs refer to the same Git commit.
///
/// For example, the previous URL could be a branch or tag, while the current URL would be a
/// precise commit hash.
fn is_same_reference<'a>(a: &'a Url, b: &'a Url, git: &'a GitResolver) -> bool {
// Convert `a` to a Git URL, if possible.
let Ok(a_git) = ParsedGitUrl::try_from(Url::from(CanonicalUrl::new(a))) else {
return false;
};
// Convert `b` to a Git URL, if possible.
let Ok(b_git) = ParsedGitUrl::try_from(Url::from(CanonicalUrl::new(b))) else {
return false;
};
// The URLs must refer to the same subdirectory, if any.
if a_git.subdirectory != b_git.subdirectory {
return false;
}
// Convert `a` to a repository URL.
let a_ref = RepositoryReference::from(&a_git.url);
// Convert `b` to a repository URL.
let b_ref = RepositoryReference::from(&b_git.url);
// The URLs must refer to the same repository.
if a_ref.url != b_ref.url {
return false;
}
// If the URLs have the same tag, they refer to the same commit.
if a_ref.reference == b_ref.reference {
return true;
}
// Otherwise, the URLs must resolve to the same precise commit.
let Some(a_precise) = a_git
.url
.precise()
.or_else(|| git.get(&a_ref).map(|sha| *sha))
else {
return false;
};
let Some(b_precise) = b_git
.url
.precise()
.or_else(|| git.get(&b_ref).map(|sha| *sha))
else {
return false;
};
a_precise == b_precise
}
#[cfg(test)]
mod tests {
use super::*;
use std::str::FromStr;
use url::Url;
use pep508_rs::VerbatimUrl;
use uv_git::{GitResolver, GitSha, GitUrl, RepositoryReference};
use crate::resolver::urls::{is_equal, is_same_reference};
#[test]
fn url_compatibility() -> Result<(), url::ParseError> {
@ -175,4 +244,64 @@ mod tests {
Ok(())
}
#[test]
fn same_reference() -> anyhow::Result<()> {
let empty = GitResolver::default();
// Same repository, same tag.
let a = Url::parse("git+https://example.com/MyProject.git@main")?;
let b = Url::parse("git+https://example.com/MyProject.git@main")?;
assert!(is_same_reference(&a, &b, &empty));
// Same repository, same tag, same subdirectory.
let a = Url::parse("git+https://example.com/MyProject.git@main#subdirectory=pkg_dir")?;
let b = Url::parse("git+https://example.com/MyProject.git@main#subdirectory=pkg_dir")?;
assert!(is_same_reference(&a, &b, &empty));
// Different repositories, same tag.
let a = Url::parse("git+https://example.com/MyProject.git@main")?;
let b = Url::parse("git+https://example.com/MyOtherProject.git@main")?;
assert!(!is_same_reference(&a, &b, &empty));
// Same repository, different tags.
let a = Url::parse("git+https://example.com/MyProject.git@main")?;
let b = Url::parse("git+https://example.com/MyProject.git@v1.0")?;
assert!(!is_same_reference(&a, &b, &empty));
// Same repository, same tag, different subdirectory.
let a = Url::parse("git+https://example.com/MyProject.git@main#subdirectory=pkg_dir")?;
let b = Url::parse("git+https://example.com/MyProject.git@main#subdirectory=other_dir")?;
assert!(!is_same_reference(&a, &b, &empty));
// Same repository, different tags, but same precise commit.
let a = Url::parse("git+https://example.com/MyProject.git@main")?;
let b = Url::parse(
"git+https://example.com/MyProject.git@164a8735b081663fede48c5041667b194da15d25",
)?;
let resolved_refs = GitResolver::default();
resolved_refs.insert(
RepositoryReference::from(&GitUrl::try_from(Url::parse(
"https://example.com/MyProject@main",
)?)?),
GitSha::from_str("164a8735b081663fede48c5041667b194da15d25")?,
);
assert!(is_same_reference(&a, &b, &resolved_refs));
// Same repository, different tags, different precise commit.
let a = Url::parse("git+https://example.com/MyProject.git@main")?;
let b = Url::parse(
"git+https://example.com/MyProject.git@164a8735b081663fede48c5041667b194da15d25",
)?;
let resolved_refs = GitResolver::default();
resolved_refs.insert(
RepositoryReference::from(&GitUrl::try_from(Url::parse(
"https://example.com/MyProject@main",
)?)?),
GitSha::from_str("f2c9e88f3ec9526bbcec68d150b176d96a750aba")?,
);
assert!(!is_same_reference(&a, &b, &resolved_refs));
Ok(())
}
}