From 9e6e1e56fdee637f40af87e45aead78cc1376ab7 Mon Sep 17 00:00:00 2001 From: Charlie Marsh Date: Mon, 20 Jan 2025 09:39:05 -0500 Subject: [PATCH] Add a GitHub repository struct to `uv-git` (#10768) ## Summary This is useful for https://github.com/astral-sh/uv/pull/10765, but we already have one usage today, so carving it out into a standalone PR. --- crates/uv-git/src/git.rs | 56 +++++++++--------------- crates/uv-git/src/github.rs | 85 +++++++++++++++++++++++++++++++++++++ crates/uv-git/src/lib.rs | 2 + 3 files changed, 106 insertions(+), 37 deletions(-) create mode 100644 crates/uv-git/src/github.rs diff --git a/crates/uv-git/src/git.rs b/crates/uv-git/src/git.rs index 418c56745..0a539dc4b 100644 --- a/crates/uv-git/src/git.rs +++ b/crates/uv-git/src/git.rs @@ -7,18 +7,19 @@ use std::path::{Path, PathBuf}; use std::str::{self, FromStr}; use std::sync::LazyLock; -use crate::sha::GitOid; -use crate::GitSha; -use anyhow::{anyhow, Context, Result}; +use anyhow::{Context, Result}; use cargo_util::{paths, ProcessBuilder}; use reqwest::StatusCode; use reqwest_middleware::ClientWithMiddleware; - use tracing::{debug, warn}; use url::Url; + use uv_fs::Simplified; use uv_static::EnvVars; +use crate::sha::GitOid; +use crate::{GitHubRepository, GitSha}; + /// A file indicates that if present, `git reset` has been done and a repo /// checkout is ready to go. See [`GitCheckout::reset`] for why we need this. const CHECKOUT_READY_LOCK: &str = ".ok"; @@ -720,16 +721,17 @@ enum FastPathRev { /// /// [^1]: fn github_fast_path( - repo: &mut GitRepository, + git: &mut GitRepository, url: &Url, reference: &GitReference, client: &ClientWithMiddleware, ) -> Result { - if !is_github(url) { + let Some(GitHubRepository { owner, repo }) = GitHubRepository::parse(url) else { return Ok(FastPathRev::Indeterminate); - } + }; + + let local_object = reference.resolve(git).ok(); - let local_object = reference.resolve(repo).ok(); let github_branch_name = match reference { GitReference::Branch(branch) => branch, GitReference::Tag(tag) => tag, @@ -765,28 +767,12 @@ fn github_fast_path( } }; - // This expects GitHub urls in the form `github.com/user/repo` and nothing - // else - let mut pieces = url - .path_segments() - .ok_or_else(|| anyhow!("no path segments on url"))?; - let username = pieces - .next() - .ok_or_else(|| anyhow!("couldn't find username"))?; - let repository = pieces - .next() - .ok_or_else(|| anyhow!("couldn't find repository name"))?; - if pieces.next().is_some() { - anyhow::bail!("too many segments on URL"); - } + // TODO(charlie): If we _know_ that we have a full commit SHA, there's no need to perform this + // request. We can just return `FastPathRev::NeedsFetch`. However, we need to audit all uses of + // `GitReference::FullCommit` to ensure that we _know_ it's a SHA, as opposed to (e.g.) a Git + // tag that just "looks like" a commit (i.e., a tag composed of 40 hex characters). - // Trim off the `.git` from the repository, if present, since that's - // optional for GitHub and won't work when we try to use the API as well. - let repository = repository.strip_suffix(".git").unwrap_or(repository); - - let url = format!( - "https://api.github.com/repos/{username}/{repository}/commits/{github_branch_name}" - ); + let url = format!("https://api.github.com/repos/{owner}/{repo}/commits/{github_branch_name}"); let runtime = tokio::runtime::Builder::new_current_thread() .enable_all() @@ -802,7 +788,11 @@ fn github_fast_path( } let response = request.send().await?; + + // GitHub returns a 404 if the repository does not exist, and a 422 if it exists but GitHub + // is unable to resolve the requested revision. response.error_for_status_ref()?; + let response_code = response.status(); if response_code == StatusCode::NOT_MODIFIED { Ok(FastPathRev::UpToDate) @@ -810,19 +800,11 @@ fn github_fast_path( let oid_to_fetch = response.text().await?.parse()?; Ok(FastPathRev::NeedsFetch(oid_to_fetch)) } else { - // Usually response_code == 404 if the repository does not exist, and - // response_code == 422 if exists but GitHub is unable to resolve the - // requested rev. Ok(FastPathRev::Indeterminate) } }) } -/// Whether a `url` is one from GitHub. -fn is_github(url: &Url) -> bool { - url.host_str() == Some("github.com") -} - /// Whether a `rev` looks like a commit hash (ASCII hex digits). fn looks_like_commit_hash(rev: &str) -> bool { rev.len() >= 7 && rev.chars().all(|ch| ch.is_ascii_hexdigit()) diff --git a/crates/uv-git/src/github.rs b/crates/uv-git/src/github.rs new file mode 100644 index 000000000..006178a21 --- /dev/null +++ b/crates/uv-git/src/github.rs @@ -0,0 +1,85 @@ +use tracing::debug; +use url::Url; + +/// A reference to a repository on GitHub. +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct GitHubRepository<'a> { + /// The `owner` field for the repository, i.e., the user or organization that owns the + /// repository, like `astral-sh`. + pub owner: &'a str, + /// The `repo` field for the repository, i.e., the name of the repository, like `uv`. + pub repo: &'a str, +} + +impl<'a> GitHubRepository<'a> { + /// Parse a GitHub repository from a URL. + /// + /// Expects to receive a URL of the form: `https://github.com/{user}/{repo}[.git]`, e.g., + /// `https://github.com/astral-sh/uv`. Otherwise, returns `None`. + pub fn parse(url: &'a Url) -> Option { + // The fast path is only available for GitHub repositories. + if url.host_str() != Some("github.com") { + return None; + }; + + // The GitHub URL must take the form: `https://github.com/{user}/{repo}`, e.g., + // `https://github.com/astral-sh/uv`. + let Some(mut segments) = url.path_segments() else { + debug!("GitHub URL is missing path segments: {url}"); + return None; + }; + let Some(owner) = segments.next() else { + debug!("GitHub URL is missing owner: {url}"); + return None; + }; + let Some(repo) = segments.next() else { + debug!("GitHub URL is missing repo: {url}"); + return None; + }; + if segments.next().is_some() { + debug!("GitHub URL has too many path segments: {url}"); + return None; + } + + // Trim off the `.git` from the repository, if present. + let repo = repo.strip_suffix(".git").unwrap_or(repo); + + Some(Self { owner, repo }) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_parse_valid_url() { + let url = Url::parse("https://github.com/astral-sh/uv").unwrap(); + let repo = GitHubRepository::parse(&url).unwrap(); + assert_eq!(repo.owner, "astral-sh"); + assert_eq!(repo.repo, "uv"); + } + + #[test] + fn test_parse_with_git_suffix() { + let url = Url::parse("https://github.com/astral-sh/uv.git").unwrap(); + let repo = GitHubRepository::parse(&url).unwrap(); + assert_eq!(repo.owner, "astral-sh"); + assert_eq!(repo.repo, "uv"); + } + + #[test] + fn test_parse_invalid_host() { + let url = Url::parse("https://gitlab.com/astral-sh/uv").unwrap(); + assert!(GitHubRepository::parse(&url).is_none()); + } + + #[test] + fn test_parse_invalid_path() { + let url = Url::parse("https://github.com/astral-sh").unwrap(); + assert!(GitHubRepository::parse(&url).is_none()); + + let url = Url::parse("https://github.com/astral-sh/uv/extra").unwrap(); + assert!(GitHubRepository::parse(&url).is_none()); + } +} diff --git a/crates/uv-git/src/lib.rs b/crates/uv-git/src/lib.rs index 77b85ce99..ddeb9ca47 100644 --- a/crates/uv-git/src/lib.rs +++ b/crates/uv-git/src/lib.rs @@ -2,6 +2,7 @@ use url::Url; pub use crate::credentials::{store_credentials_from_url, GIT_STORE}; pub use crate::git::{GitReference, GIT}; +pub use crate::github::GitHubRepository; pub use crate::resolver::{ GitResolver, GitResolverError, RepositoryReference, ResolvedRepositoryReference, }; @@ -10,6 +11,7 @@ pub use crate::source::{Fetch, GitSource, Reporter}; mod credentials; mod git; +mod github; mod resolver; mod sha; mod source;