Use shared client in Git fetch implementation (#4487)

## Summary

It turns out that the Git fetch implementation is initializing its own
client, which can be really expensive on macOS (due to loading native
certificates) _and_ bypasses any of our middleware. This PR modifies the
Git implementation to accept a shared client.
This commit is contained in:
Charlie Marsh 2024-06-25 00:09:29 +03:00 committed by GitHub
parent 7221514136
commit 9905521957
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 39 additions and 21 deletions

1
Cargo.lock generated
View file

@ -4829,6 +4829,7 @@ dependencies = [
"dashmap",
"fs-err",
"reqwest",
"reqwest-middleware",
"thiserror",
"tokio",
"tracing",

View file

@ -182,7 +182,7 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
.await?
}
BuildableSource::Dist(SourceDist::Git(dist)) => {
self.git(source, &GitSourceUrl::from(dist), tags, hashes)
self.git(source, &GitSourceUrl::from(dist), tags, hashes, client)
.boxed_local()
.await?
}
@ -234,7 +234,7 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
.await?
}
BuildableSource::Url(SourceUrl::Git(resource)) => {
self.git(source, resource, tags, hashes)
self.git(source, resource, tags, hashes, client)
.boxed_local()
.await?
}
@ -356,7 +356,7 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
.await?
}
BuildableSource::Dist(SourceDist::Git(dist)) => {
self.git_metadata(source, &GitSourceUrl::from(dist), hashes)
self.git_metadata(source, &GitSourceUrl::from(dist), hashes, client)
.boxed_local()
.await?
}
@ -401,7 +401,7 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
.await?
}
BuildableSource::Url(SourceUrl::Git(resource)) => {
self.git_metadata(source, resource, hashes)
self.git_metadata(source, resource, hashes, client)
.boxed_local()
.await?
}
@ -1089,6 +1089,7 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
resource: &GitSourceUrl<'_>,
tags: &Tags,
hashes: HashPolicy<'_>,
client: &ManagedClient<'_>,
) -> Result<BuiltWheelMetadata, Error> {
// Before running the build, check that the hashes match.
if hashes.is_validate() {
@ -1101,6 +1102,7 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
.git()
.resolve(
resource.git,
client.unmanaged.uncached_client().client(),
self.build_context.cache().bucket(CacheBucket::Git),
self.reporter.clone().map(Facade::from),
)
@ -1117,6 +1119,7 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
.git()
.fetch(
&url,
client.unmanaged.uncached_client().client(),
self.build_context.cache().bucket(CacheBucket::Git),
self.reporter.clone().map(Facade::from),
)
@ -1173,6 +1176,7 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
source: &BuildableSource<'_>,
resource: &GitSourceUrl<'_>,
hashes: HashPolicy<'_>,
client: &ManagedClient<'_>,
) -> Result<ArchiveMetadata, Error> {
// Before running the build, check that the hashes match.
if hashes.is_validate() {
@ -1185,6 +1189,7 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
.git()
.resolve(
resource.git,
client.unmanaged.uncached_client().client(),
self.build_context.cache().bucket(CacheBucket::Git),
self.reporter.clone().map(Facade::from),
)
@ -1201,6 +1206,7 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
.git()
.fetch(
&url,
client.unmanaged.uncached_client().client(),
self.build_context.cache().bucket(CacheBucket::Git),
self.reporter.clone().map(Facade::from),
)

View file

@ -21,6 +21,7 @@ cargo-util = { workspace = true }
dashmap = { workspace = true }
fs-err = { workspace = true, features = ["tokio"] }
reqwest = { workspace = true, features = ["blocking"] }
reqwest-middleware = { workspace = true }
thiserror = { workspace = true }
tokio = { workspace = true }
tracing = { workspace = true }

View file

@ -7,8 +7,8 @@ use std::str::{self};
use anyhow::{anyhow, Context, Result};
use cargo_util::{paths, ProcessBuilder};
use reqwest::Client;
use reqwest::StatusCode;
use reqwest_middleware::ClientWithMiddleware;
use tracing::debug;
use url::Url;
use uv_fs::Simplified;
@ -218,7 +218,7 @@ impl GitRemote {
db: Option<GitDatabase>,
reference: &GitReference,
locked_rev: Option<GitOid>,
client: &Client,
client: &ClientWithMiddleware,
) -> Result<(GitDatabase, GitOid)> {
let locked_ref = locked_rev.map(|oid| GitReference::FullCommit(oid.to_string()));
let reference = locked_ref.as_ref().unwrap_or(reference);
@ -344,13 +344,13 @@ impl GitCheckout {
/// is done. Use [`GitCheckout::is_fresh`] to check.
///
/// * The `repo` will be the checked out Git repository.
fn new(revision: GitOid, repo: GitRepository) -> GitCheckout {
GitCheckout { revision, repo }
fn new(revision: GitOid, repo: GitRepository) -> Self {
Self { revision, repo }
}
/// Clone a repo for a `revision` into a local path from a `database`.
/// This is a filesystem-to-filesystem clone.
fn clone_into(into: &Path, database: &GitDatabase, revision: GitOid) -> Result<GitCheckout> {
fn clone_into(into: &Path, database: &GitDatabase, revision: GitOid) -> Result<Self> {
let dirname = into.parent().unwrap();
paths::create_dir_all(dirname)?;
if into.exists() {
@ -440,7 +440,7 @@ pub(crate) fn fetch(
repo: &mut GitRepository,
remote_url: &str,
reference: &GitReference,
client: &Client,
client: &ClientWithMiddleware,
) -> Result<()> {
let oid_to_fetch = match github_fast_path(repo, remote_url, reference, client) {
Ok(FastPathRev::UpToDate) => return Ok(()),
@ -639,7 +639,7 @@ fn github_fast_path(
repo: &mut GitRepository,
url: &str,
reference: &GitReference,
client: &Client,
client: &ClientWithMiddleware,
) -> Result<FastPathRev> {
let url = Url::parse(url)?;
if !is_github(&url) {

View file

@ -7,6 +7,7 @@ use cache_key::RepositoryUrl;
use dashmap::mapref::one::Ref;
use dashmap::DashMap;
use fs_err::tokio as fs;
use reqwest_middleware::ClientWithMiddleware;
use uv_fs::LockedFile;
use crate::{Fetch, GitReference, GitSha, GitSource, GitUrl, Reporter};
@ -51,6 +52,7 @@ impl GitResolver {
pub async fn fetch(
&self,
url: &GitUrl,
client: ClientWithMiddleware,
cache: PathBuf,
reporter: Option<impl Reporter + 'static>,
) -> Result<Fetch, GitResolverError> {
@ -67,9 +69,9 @@ impl GitResolver {
// Fetch the Git repository.
let source = if let Some(reporter) = reporter {
GitSource::new(url.clone(), cache).with_reporter(reporter)
GitSource::new(url.clone(), client, cache).with_reporter(reporter)
} else {
GitSource::new(url.clone(), cache)
GitSource::new(url.clone(), client, cache)
};
let fetch = tokio::task::spawn_blocking(move || source.fetch())
.await?
@ -89,7 +91,8 @@ impl GitResolver {
pub async fn resolve(
&self,
url: &GitUrl,
cache: impl Into<PathBuf>,
client: ClientWithMiddleware,
cache: PathBuf,
reporter: Option<impl Reporter + 'static>,
) -> Result<Option<GitUrl>, GitResolverError> {
// If the Git reference already contains a complete SHA, short-circuit.
@ -108,9 +111,9 @@ impl GitResolver {
// Fetch the precise SHA of the Git reference (which could be a branch, a tag, a partial
// commit, etc.).
let source = if let Some(reporter) = reporter {
GitSource::new(url.clone(), cache).with_reporter(reporter)
GitSource::new(url.clone(), client, cache).with_reporter(reporter)
} else {
GitSource::new(url.clone(), cache)
GitSource::new(url.clone(), client, cache)
};
let fetch = tokio::task::spawn_blocking(move || source.fetch())
.await?

View file

@ -4,7 +4,7 @@
use std::path::{Path, PathBuf};
use anyhow::Result;
use reqwest::Client;
use reqwest_middleware::ClientWithMiddleware;
use tracing::{debug, instrument};
use url::Url;
@ -18,7 +18,7 @@ pub struct GitSource {
/// The Git reference from the manifest file.
git: GitUrl,
/// The HTTP client to use for fetching.
client: Client,
client: ClientWithMiddleware,
/// The path to the Git source database.
cache: PathBuf,
/// The reporter to use for this source.
@ -27,10 +27,14 @@ pub struct GitSource {
impl GitSource {
/// Initialize a new Git source.
pub fn new(git: GitUrl, cache: impl Into<PathBuf>) -> Self {
pub fn new(
git: GitUrl,
client: impl Into<ClientWithMiddleware>,
cache: impl Into<PathBuf>,
) -> Self {
Self {
git,
client: Client::new(),
client: client.into(),
cache: cache.into(),
reporter: None,
}
@ -56,7 +60,10 @@ impl GitSource {
let (db, actual_rev, task) = match (self.git.precise, remote.db_at(&db_path).ok()) {
// If we have a locked revision, and we have a preexisting database
// which has that revision, then no update needs to happen.
(Some(rev), Some(db)) if db.contains(rev.into()) => (db, rev, None),
(Some(rev), Some(db)) if db.contains(rev.into()) => {
debug!("Using existing git source `{:?}`", self.git.repository);
(db, rev, None)
}
// ... otherwise we use this state to update the git database. Note
// that we still check for being offline here, for example in the