Fetch pyproject.toml from GitHub API (#10765)

## Summary

When resolving Git metadata, we may be able to fetch the metadata from
GitHub directly in some cases. This is _way_ faster, since we don't need
to perform many Git operations and, in particular, don't need to clone
the repo.

This only works in the following cases:

- The Git repository is public. Otherwise, I believe you need an access
token, which we don't have.
- The `pyproject.toml` has static metadata.
- The `pyproject.toml` has no `tool.uv.sources`. Otherwise, we need to
lower them... And, if there are any paths or workspace sources, that
requires an install path (i.e., we need the content on-disk).
- The project is in the repo root. If it's in a subdirectory, it could
be a workspace member. And if it's a workspace member, there could be
sources defined in the workspace root. But we can't know without
fetching the workspace root -- and we need the workspace in order to
find the root...

Closes #10568.
This commit is contained in:
Charlie Marsh 2025-01-20 12:50:39 -05:00 committed by GitHub
parent b2d06f01cc
commit 5ee4cf6ff5
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 254 additions and 30 deletions

View file

@ -102,7 +102,7 @@ impl GitReference {
}
/// Converts the [`GitReference`] to a `str` that can be used as a revision.
pub(crate) fn as_rev(&self) -> &str {
pub fn as_rev(&self) -> &str {
match self {
Self::Tag(rev) => rev,
Self::Branch(rev) => rev,

View file

@ -1,5 +1,6 @@
use std::borrow::Cow;
use std::path::PathBuf;
use std::str::FromStr;
use std::sync::Arc;
use tracing::debug;
@ -11,7 +12,7 @@ use reqwest_middleware::ClientWithMiddleware;
use uv_cache_key::{cache_digest, RepositoryUrl};
use uv_fs::LockedFile;
use crate::{Fetch, GitReference, GitSha, GitSource, GitUrl, Reporter};
use crate::{Fetch, GitHubRepository, GitReference, GitSha, GitSource, GitUrl, Reporter};
#[derive(Debug, thiserror::Error)]
pub enum GitResolverError {
@ -21,6 +22,10 @@ pub enum GitResolverError {
Join(#[from] tokio::task::JoinError),
#[error("Git operation failed")]
Git(#[source] anyhow::Error),
#[error(transparent)]
Reqwest(#[from] reqwest::Error),
#[error(transparent)]
ReqwestMiddleware(#[from] reqwest_middleware::Error),
}
/// A resolver for Git repositories.
@ -38,6 +43,61 @@ impl GitResolver {
self.0.get(reference)
}
/// Resolve a Git URL to a specific commit without performing any Git operations.
///
/// Returns a [`GitSha`] if the URL has already been resolved (i.e., is available in the cache),
/// or if it can be fetched via the GitHub API. Otherwise, returns `None`.
pub async fn github_fast_path(
&self,
url: &GitUrl,
client: ClientWithMiddleware,
) -> Result<Option<GitSha>, GitResolverError> {
let reference = RepositoryReference::from(url);
// If we know the precise commit already, return it.
if let Some(precise) = self.get(&reference) {
return Ok(Some(*precise));
}
// If the URL is a GitHub URL, attempt to resolve it via the GitHub API.
let Some(GitHubRepository { owner, repo }) = GitHubRepository::parse(url.repository())
else {
return Ok(None);
};
// Determine the Git reference.
let rev = url.reference().as_rev();
let url = format!("https://api.github.com/repos/{owner}/{repo}/commits/{rev}");
debug!("Attempting GitHub fast path for: {url}");
let mut request = client.get(&url);
request = request.header("Accept", "application/vnd.github.3.sha");
request = request.header("User-Agent", "uv");
let response = request.send().await?;
if !response.status().is_success() {
// Returns a 404 if the repository does not exist, and a 422 if GitHub is unable to
// resolve the requested rev.
debug!(
"GitHub API request failed for: {url} ({})",
response.status()
);
return Ok(None);
}
// Parse the response as a Git SHA.
let precise = response.text().await?;
let precise =
GitSha::from_str(&precise).map_err(|err| GitResolverError::Git(err.into()))?;
// Insert the resolved URL into the in-memory cache. This ensures that subsequent fetches
// resolve to the same precise commit.
self.insert(reference, precise);
Ok(Some(precise))
}
/// Fetch a remote Git repository.
pub async fn fetch(
&self,