From aa9882eee84b2707daa9a985faa25980c024ae46 Mon Sep 17 00:00:00 2001 From: Charlie Marsh Date: Fri, 3 Nov 2023 09:33:14 -0700 Subject: [PATCH] Use locks to prevent concurrent accesses to the same Git repo (#304) Ensures that if we need to access the same Git repo twice in a resolution, we only have one handler to that repo at a time. (Otherwise, `git2` panics.) --- crates/puffin-cache/src/canonical_url.rs | 10 +++++ crates/puffin-cli/tests/pip_compile.rs | 37 ++++++++++++++++ ...ompile__compile_git_concurrent_access.snap | 43 +++++++++++++++++++ crates/puffin-resolver/src/resolver.rs | 26 ++++++++++- 4 files changed, 115 insertions(+), 1 deletion(-) create mode 100644 crates/puffin-cli/tests/snapshots/pip_compile__compile_git_concurrent_access.snap diff --git a/crates/puffin-cache/src/canonical_url.rs b/crates/puffin-cache/src/canonical_url.rs index 45dbc9a45..1abd2f28c 100644 --- a/crates/puffin-cache/src/canonical_url.rs +++ b/crates/puffin-cache/src/canonical_url.rs @@ -23,6 +23,16 @@ impl CanonicalUrl { url.path_segments_mut().unwrap().pop_if_empty(); } + // If a URL starts with a kind (like `git+`), remove it. + if let Some(suffix) = url.as_str().strip_prefix("git+") { + // If a Git URL ends in a reference (like a branch, tag, or commit), remove it. + if let Some((prefix, _)) = suffix.rsplit_once('@') { + url = prefix.parse().unwrap(); + } else { + url = suffix.parse().unwrap(); + } + } + // For GitHub URLs specifically, just lower-case everything. GitHub // treats both the same, but they hash differently, and we're gonna be // hashing them. This wants a more general solution, and also we're diff --git a/crates/puffin-cli/tests/pip_compile.rs b/crates/puffin-cli/tests/pip_compile.rs index 43bb4af62..102ed5d26 100644 --- a/crates/puffin-cli/tests/pip_compile.rs +++ b/crates/puffin-cli/tests/pip_compile.rs @@ -804,6 +804,43 @@ fn compile_git_refs_https_dependency() -> Result<()> { Ok(()) } +/// Resolve two packages from a `requirements.in` file with the same Git HTTPS dependency. +#[test] +fn compile_git_concurrent_access() -> Result<()> { + let temp_dir = assert_fs::TempDir::new()?; + let cache_dir = assert_fs::TempDir::new()?; + let venv = temp_dir.child(".venv"); + + Command::new(get_cargo_bin(BIN_NAME)) + .arg("venv") + .arg(venv.as_os_str()) + .arg("--cache-dir") + .arg(cache_dir.path()) + .current_dir(&temp_dir) + .assert() + .success(); + venv.assert(predicates::path::is_dir()); + + let requirements_in = temp_dir.child("requirements.in"); + requirements_in.touch()?; + requirements_in + .write_str("flask @ git+https://github.com/pallets/flask.git@2.0.0\ndask @ git+https://github.com/pallets/flask.git@3.0.0")?; + + insta::with_settings!({ + filters => INSTA_FILTERS.to_vec() + }, { + assert_cmd_snapshot!(Command::new(get_cargo_bin(BIN_NAME)) + .arg("pip-compile") + .arg("requirements.in") + .arg("--cache-dir") + .arg(cache_dir.path()) + .env("VIRTUAL_ENV", venv.as_os_str()) + .current_dir(&temp_dir)); + }); + + Ok(()) +} + /// Request Flask, but include a URL dependency for Werkzeug, which should avoid adding a /// duplicate dependency from `PyPI`. #[test] diff --git a/crates/puffin-cli/tests/snapshots/pip_compile__compile_git_concurrent_access.snap b/crates/puffin-cli/tests/snapshots/pip_compile__compile_git_concurrent_access.snap new file mode 100644 index 000000000..296c4bb34 --- /dev/null +++ b/crates/puffin-cli/tests/snapshots/pip_compile__compile_git_concurrent_access.snap @@ -0,0 +1,43 @@ +--- +source: crates/puffin-cli/tests/pip_compile.rs +info: + program: puffin + args: + - pip-compile + - requirements.in + - "--cache-dir" + - /var/folders/nt/6gf2v7_s3k13zq_t3944rwz40000gn/T/.tmp6j5dBk + env: + VIRTUAL_ENV: /var/folders/nt/6gf2v7_s3k13zq_t3944rwz40000gn/T/.tmpYyHlPp/.venv +--- +success: true +exit_code: 0 +----- stdout ----- +# This file was autogenerated by Puffin v0.0.1 via the following command: +# [BIN_PATH] pip-compile requirements.in --cache-dir [CACHE_DIR] +click==8.1.7 + # via + # dask + # flask +dask @ git+https://github.com/pallets/flask.git@735a4701d6d5e848241e7d7535db898efb62d400 +flask @ git+https://github.com/pallets/flask.git@2f0c62f5e6e290843f03c1fa70817c7a3c7fd661 +itsdangerous==2.1.2 + # via + # dask + # flask +jinja2==3.1.2 + # via + # dask + # flask +markupsafe==2.1.3 + # via + # jinja2 + # werkzeug +werkzeug==3.0.1 + # via + # dask + # flask + +----- stderr ----- +Resolved 7 packages in [TIME] + diff --git a/crates/puffin-resolver/src/resolver.rs b/crates/puffin-resolver/src/resolver.rs index 3f5f44b30..de1dbec44 100644 --- a/crates/puffin-resolver/src/resolver.rs +++ b/crates/puffin-resolver/src/resolver.rs @@ -14,6 +14,7 @@ use pubgrub::range::Range; use pubgrub::solver::{Incompatibility, State}; use pubgrub::type_aliases::DependencyConstraints; use tokio::select; +use tokio::sync::Mutex; use tracing::{debug, error, trace}; use url::Url; use waitmap::WaitMap; @@ -21,6 +22,7 @@ use waitmap::WaitMap; use distribution_filename::{SourceDistributionFilename, WheelFilename}; use pep508_rs::{MarkerEnvironment, Requirement}; use platform_tags::Tags; +use puffin_cache::CanonicalUrl; use puffin_client::RegistryClient; use puffin_distribution::{RemoteDistributionRef, VersionOrUrl}; use puffin_normalize::{ExtraName, PackageName}; @@ -45,6 +47,7 @@ pub struct Resolver<'a, Context: BuildContext + Sync> { client: &'a RegistryClient, selector: CandidateSelector, index: Arc, + locks: Arc, build_context: &'a Context, reporter: Option>, } @@ -59,8 +62,9 @@ impl<'a, Context: BuildContext + Sync> Resolver<'a, Context> { build_context: &'a Context, ) -> Self { Self { - selector: CandidateSelector::from(&manifest), index: Arc::new(Index::default()), + locks: Arc::new(Locks::default()), + selector: CandidateSelector::from(&manifest), project: manifest.project, requirements: manifest.requirements, constraints: manifest.constraints, @@ -697,6 +701,9 @@ impl<'a, Context: BuildContext + Sync> Resolver<'a, Context> { } // Build a source distribution from a remote URL, returning its metadata. Request::SdistUrl(package_name, url) => { + let lock = self.locks.acquire(&url).await; + let _guard = lock.lock().await; + let fetcher = SourceDistributionFetcher::new(self.build_context); let precise = fetcher @@ -744,6 +751,9 @@ impl<'a, Context: BuildContext + Sync> Resolver<'a, Context> { } // Fetch wheel metadata from a remote URL. Request::WheelUrl(package_name, url) => { + let lock = self.locks.acquire(&url).await; + let _guard = lock.lock().await; + let fetcher = WheelFetcher::new(self.build_context.cache()); let distribution = RemoteDistributionRef::from_url(&package_name, &url); let metadata = match fetcher.find_dist_info(&distribution, self.tags) { @@ -867,6 +877,20 @@ impl InFlight { } } +/// A set of locks used to prevent concurrent access to the same resource. +#[derive(Debug, Default)] +struct Locks(Mutex>>>); + +impl Locks { + /// Acquire a lock on the given resource. + async fn acquire(&self, url: &Url) -> Arc> { + let mut map = self.0.lock().await; + map.entry(puffin_cache::digest(&CanonicalUrl::new(url))) + .or_insert_with(|| Arc::new(Mutex::new(()))) + .clone() + } +} + /// In-memory index of package metadata. struct Index { /// A map from package name to the metadata for that package.