Track parsed Git URL components in GitSourceUrl (#3656)

## Summary

Closes https://github.com/astral-sh/uv/issues/3571.
This commit is contained in:
Charlie Marsh 2024-05-19 20:43:30 -04:00 committed by GitHub
parent 00f8e8cd1b
commit 0718705c21
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 45 additions and 51 deletions

View file

@ -2,7 +2,9 @@ use std::borrow::Cow;
use std::path::Path;
use pep440_rs::Version;
use pep508_rs::VerbatimUrl;
use url::Url;
use uv_git::GitUrl;
use uv_normalize::PackageName;
@ -101,7 +103,11 @@ impl std::fmt::Display for DirectSourceUrl<'_> {
#[derive(Debug, Clone)]
pub struct GitSourceUrl<'a> {
pub url: &'a Url,
/// The URL with the revision and subdirectory fragment.
pub url: &'a VerbatimUrl,
/// The URL without the revision and subdirectory fragment.
pub git: Cow<'a, GitUrl>,
pub subdirectory: Option<Cow<'a, Path>>,
}
impl std::fmt::Display for GitSourceUrl<'_> {
@ -112,7 +118,11 @@ impl std::fmt::Display for GitSourceUrl<'_> {
impl<'a> From<&'a GitSourceDist> for GitSourceUrl<'a> {
fn from(dist: &'a GitSourceDist) -> Self {
Self { url: &dist.url }
Self {
url: &dist.url,
git: Cow::Borrowed(&dist.git),
subdirectory: dist.subdirectory.as_deref().map(Cow::Borrowed),
}
}
}

View file

@ -1,4 +1,3 @@
use std::path::PathBuf;
use std::sync::{Arc, Mutex};
use anyhow::Result;
@ -48,10 +47,10 @@ impl RepositoryReference {
///
/// Assumes that the URL is a precise Git URL, with a full commit hash.
pub(crate) async fn fetch_git_archive(
url: &Url,
url: &GitUrl,
cache: &Cache,
reporter: Option<&Arc<dyn Reporter>>,
) -> Result<(Fetch, Option<PathBuf>), Error> {
) -> Result<Fetch, Error> {
debug!("Fetching source distribution from Git: {url}");
let git_dir = cache.bucket(CacheBucket::Git);
@ -60,16 +59,13 @@ pub(crate) async fn fetch_git_archive(
fs::create_dir_all(&lock_dir)
.await
.map_err(Error::CacheWrite)?;
let repository_url = RepositoryUrl::new(url);
let repository_url = RepositoryUrl::new(url.repository());
let _lock = LockedFile::acquire(
lock_dir.join(cache_key::digest(&repository_url)),
&repository_url,
)
.map_err(Error::CacheWrite)?;
let ParsedGitUrl { url, subdirectory } =
ParsedGitUrl::try_from(url.clone()).map_err(Box::new)?;
// Fetch the Git repository.
let source = if let Some(reporter) = reporter {
GitSource::new(url.clone(), git_dir).with_reporter(Facade::from(reporter.clone()))
@ -80,7 +76,7 @@ pub(crate) async fn fetch_git_archive(
.await?
.map_err(Error::Git)?;
Ok((fetch, subdirectory))
Ok(fetch)
}
/// Given a remote source distribution, return a precise variant, if possible.
@ -92,13 +88,10 @@ pub(crate) async fn fetch_git_archive(
/// layer. For example: removing `#subdirectory=pkg_dir`-like fragments, and removing `git+`
/// prefix kinds.
pub(crate) async fn resolve_precise(
url: &Url,
url: &GitUrl,
cache: &Cache,
reporter: Option<&Arc<dyn Reporter>>,
) -> Result<Option<Url>, Error> {
let ParsedGitUrl { url, subdirectory } =
ParsedGitUrl::try_from(url.clone()).map_err(Box::new)?;
) -> Result<Option<GitUrl>, Error> {
// If the Git reference already contains a complete SHA, short-circuit.
if url.precise().is_some() {
return Ok(None);
@ -107,12 +100,9 @@ pub(crate) async fn resolve_precise(
// If the Git reference is in the in-memory cache, return it.
{
let resolved_git_refs = RESOLVED_GIT_REFS.lock().unwrap();
let reference = RepositoryReference::new(&url);
let reference = RepositoryReference::new(url);
if let Some(precise) = resolved_git_refs.get(&reference) {
return Ok(Some(Url::from(ParsedGitUrl {
url: url.with_precise(*precise),
subdirectory,
})));
return Ok(Some(url.clone().with_precise(*precise)));
}
}
@ -133,15 +123,11 @@ pub(crate) async fn resolve_precise(
// Insert the resolved URL into the in-memory cache.
if let Some(precise) = git.precise() {
let mut resolved_git_refs = RESOLVED_GIT_REFS.lock().unwrap();
let reference = RepositoryReference::new(&url);
let reference = RepositoryReference::new(url);
resolved_git_refs.insert(reference, precise);
}
// Re-encode as a URL.
Ok(Some(Url::from(ParsedGitUrl {
url: git,
subdirectory,
})))
Ok(Some(git))
}
/// Given a remote source distribution, return a precise variant, if possible.

View file

@ -1024,7 +1024,7 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
// Resolve to a precise Git SHA.
let url = if let Some(url) = resolve_precise(
resource.url,
&resource.git,
self.build_context.cache(),
self.reporter.as_ref(),
)
@ -1032,17 +1032,19 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
{
Cow::Owned(url)
} else {
Cow::Borrowed(resource.url)
Cow::Borrowed(resource.git.as_ref())
};
let subdirectory = resource.subdirectory.as_deref();
// Fetch the Git repository.
let (fetch, subdirectory) =
let fetch =
fetch_git_archive(&url, self.build_context.cache(), self.reporter.as_ref()).await?;
let git_sha = fetch.git().precise().expect("Exact commit after checkout");
let cache_shard = self.build_context.cache().shard(
CacheBucket::BuiltWheels,
WheelCache::Git(&url, &git_sha.to_short_string()).root(),
WheelCache::Git(resource.url, &git_sha.to_short_string()).root(),
);
let _lock = lock_shard(&cache_shard).await?;
@ -1058,7 +1060,7 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
.map(|reporter| reporter.on_build_start(source));
let (disk_filename, filename, metadata) = self
.build_distribution(source, fetch.path(), subdirectory.as_deref(), &cache_shard)
.build_distribution(source, fetch.path(), subdirectory, &cache_shard)
.await?;
if let Some(task) = task {
@ -1098,7 +1100,7 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
// Resolve to a precise Git SHA.
let url = if let Some(url) = resolve_precise(
resource.url,
&resource.git,
self.build_context.cache(),
self.reporter.as_ref(),
)
@ -1106,17 +1108,19 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
{
Cow::Owned(url)
} else {
Cow::Borrowed(resource.url)
Cow::Borrowed(resource.git.as_ref())
};
let subdirectory = resource.subdirectory.as_deref();
// Fetch the Git repository.
let (fetch, subdirectory) =
let fetch =
fetch_git_archive(&url, self.build_context.cache(), self.reporter.as_ref()).await?;
let git_sha = fetch.git().precise().expect("Exact commit after checkout");
let cache_shard = self.build_context.cache().shard(
CacheBucket::BuiltWheels,
WheelCache::Git(&url, &git_sha.to_short_string()).root(),
WheelCache::Git(resource.url, &git_sha.to_short_string()).root(),
);
let _lock = lock_shard(&cache_shard).await?;
@ -1137,7 +1141,7 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
// If the backend supports `prepare_metadata_for_build_wheel`, use it.
if let Some(metadata) = self
.build_metadata(source, fetch.path(), subdirectory.as_deref())
.build_metadata(source, fetch.path(), subdirectory)
.boxed_local()
.await?
{
@ -1159,7 +1163,7 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
.map(|reporter| reporter.on_build_start(source));
let (_disk_filename, _filename, metadata) = self
.build_distribution(source, fetch.path(), subdirectory.as_deref(), &cache_shard)
.build_distribution(source, fetch.path(), subdirectory, &cache_shard)
.await?;
if let Some(task) = task {

View file

@ -11,8 +11,8 @@ use tracing::debug;
use distribution_filename::{SourceDistFilename, WheelFilename};
use distribution_types::{
BuildableSource, DirectSourceUrl, DirectorySourceUrl, GitSourceUrl, PathSourceUrl,
RemoteSource, Requirement, SourceUrl, UnresolvedRequirement,
BuildableSource, DirectSourceUrl, DirectorySourceUrl, GitSourceUrl, ParsedGitUrl,
PathSourceUrl, RemoteSource, Requirement, SourceUrl, UnresolvedRequirement,
UnresolvedRequirementSpecification, VersionId,
};
use pep508_rs::{Scheme, UnnamedRequirement, VersionOrUrl};
@ -240,7 +240,10 @@ impl<'a, Context: BuildContext> NamedRequirementsResolver<'a, Context> {
url: &requirement.url,
}),
Some(Scheme::GitSsh | Scheme::GitHttps | Scheme::GitHttp) => {
let git = ParsedGitUrl::try_from(requirement.url.to_url())?;
SourceUrl::Git(GitSourceUrl {
git: Cow::Owned(git.url),
subdirectory: git.subdirectory.map(Cow::Owned),
url: &requirement.url,
})
}

View file

@ -3,7 +3,7 @@ use url::Url;
use distribution_types::{ParsedGitUrl, ParsedUrl, VerbatimParsedUrl};
use pep508_rs::VerbatimUrl;
use uv_distribution::git_url_to_precise;
use uv_git::{GitReference, GitUrl};
use uv_git::GitReference;
/// Given a [`VerbatimUrl`] and a redirect, apply the redirect to the URL while preserving as much
/// of the verbatim representation as possible.
@ -48,19 +48,10 @@ pub(crate) fn url_to_precise(url: VerbatimParsedUrl) -> VerbatimParsedUrl {
return url;
};
// TODO(konsti): Remove once we carry more context on the `Dist` (e.g., `BranchOrTag` vs. `Tag`).
let lowered_git_ref = git_url
.reference()
.as_str()
.map_or(GitReference::DefaultBranch, |rev| {
GitReference::from_rev(rev.to_string())
});
let git_url = GitUrl::new(git_url.repository().clone(), lowered_git_ref);
let Some(new_git_url) = git_url_to_precise(git_url.clone()) else {
debug_assert!(
matches!(git_url.reference(), GitReference::FullCommit(_)),
"Unseen git url: {}, {:?}",
"Unseen Git URL: {}, {:?}",
url.verbatim,
git_url
);

View file

@ -8982,7 +8982,7 @@ fn git_source_missing_tag() -> Result<()> {
error: Failed to download and build: `uv-public-pypackage @ git+https://github.com/astral-test/uv-public-pypackage@missing`
Caused by: Git operation failed
Caused by: failed to clone into: [CACHE_DIR]/git-v0/db/8dab139913c4b566
Caused by: failed to fetch branch or tag `missing`
Caused by: failed to fetch tag `missing`
Caused by: process didn't exit successfully: `git fetch --force --update-head-ok 'https://github.com/astral-test/uv-public-pypackage' '+refs/tags/missing:refs/remotes/origin/tags/missing'` (exit status: 128)
--- stderr
fatal: couldn't find remote ref refs/tags/missing