mirror of
https://github.com/astral-sh/uv.git
synced 2025-07-07 21:35:00 +00:00
Use separate representations for canonical repository vs. commit (#317)
Given `https://github.com/pypa/package.git#subdirectory=pkg_a` and `https://github.com/pypa/package.git#subdirectory=pkg_b`, we want these to map to the same shared _resource_ (for locking and cloning), but different _packages_ (for determining whether the wheel already exists in the cache). As such, we need two distinct concepts for "canonical equality". Closes #316.
This commit is contained in:
parent
b589813e59
commit
051188dce0
6 changed files with 192 additions and 25 deletions
|
@ -2,15 +2,14 @@ use url::Url;
|
||||||
|
|
||||||
use crate::cache_key::{CacheKey, CacheKeyHasher};
|
use crate::cache_key::{CacheKey, CacheKeyHasher};
|
||||||
|
|
||||||
/// A wrapper around `Url` which represents a "canonical" version of an
|
/// A wrapper around `Url` which represents a "canonical" version of an original URL.
|
||||||
/// original URL.
|
|
||||||
///
|
///
|
||||||
/// A "canonical" url is only intended for internal comparison purposes. It's
|
/// A "canonical" url is only intended for internal comparison purposes. It's to help paper over
|
||||||
/// to help paper over mistakes such as depending on `github.com/foo/bar` vs.
|
/// mistakes such as depending on `github.com/foo/bar` vs. `github.com/foo/bar.git`.
|
||||||
/// `github.com/foo/bar.git`. This is **only** for internal purposes and
|
///
|
||||||
/// provides no means to actually read the underlying string value of the `Url`
|
/// This is **only** for internal purposes and provides no means to actually read the underlying
|
||||||
/// it contains. This is intentional, because all fetching should still happen
|
/// string value of the `Url` it contains. This is intentional, because all fetching should still
|
||||||
/// within the context of the original URL.
|
/// happen within the context of the original URL.
|
||||||
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone)]
|
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone)]
|
||||||
pub struct CanonicalUrl(Url);
|
pub struct CanonicalUrl(Url);
|
||||||
|
|
||||||
|
@ -23,24 +22,13 @@ impl CanonicalUrl {
|
||||||
url.path_segments_mut().unwrap().pop_if_empty();
|
url.path_segments_mut().unwrap().pop_if_empty();
|
||||||
}
|
}
|
||||||
|
|
||||||
// If a URL starts with a kind (like `git+`), remove it.
|
|
||||||
if let Some(suffix) = url.as_str().strip_prefix("git+") {
|
|
||||||
// If a Git URL ends in a reference (like a branch, tag, or commit), remove it.
|
|
||||||
if let Some((prefix, _)) = suffix.rsplit_once('@') {
|
|
||||||
url = prefix.parse().unwrap();
|
|
||||||
} else {
|
|
||||||
url = suffix.parse().unwrap();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// For GitHub URLs specifically, just lower-case everything. GitHub
|
// For GitHub URLs specifically, just lower-case everything. GitHub
|
||||||
// treats both the same, but they hash differently, and we're gonna be
|
// treats both the same, but they hash differently, and we're gonna be
|
||||||
// hashing them. This wants a more general solution, and also we're
|
// hashing them. This wants a more general solution, and also we're
|
||||||
// almost certainly not using the same case conversion rules that GitHub
|
// almost certainly not using the same case conversion rules that GitHub
|
||||||
// does. (See issue #84)
|
// does. (See issue #84)
|
||||||
if url.host_str() == Some("github.com") {
|
if url.host_str() == Some("github.com") {
|
||||||
url = format!("https{}", &url[url::Position::AfterScheme..])
|
url.set_scheme(url.scheme().to_lowercase().as_str())
|
||||||
.parse()
|
|
||||||
.unwrap();
|
.unwrap();
|
||||||
let path = url.path().to_lowercase();
|
let path = url.path().to_lowercase();
|
||||||
url.set_path(&path);
|
url.set_path(&path);
|
||||||
|
@ -60,6 +48,10 @@ impl CanonicalUrl {
|
||||||
|
|
||||||
CanonicalUrl(url)
|
CanonicalUrl(url)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn parse(url: &str) -> Result<Self, url::ParseError> {
|
||||||
|
Ok(Self::new(&Url::parse(url)?))
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl CacheKey for CanonicalUrl {
|
impl CacheKey for CanonicalUrl {
|
||||||
|
@ -69,3 +61,118 @@ impl CacheKey for CanonicalUrl {
|
||||||
self.0.as_str().cache_key(state);
|
self.0.as_str().cache_key(state);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Like [`CanonicalUrl`], but attempts to represent an underlying source repository, abstracting
|
||||||
|
/// away details like the specific commit or branch, or the subdirectory to build within the
|
||||||
|
/// repository.
|
||||||
|
///
|
||||||
|
/// For example, `https://github.com/pypa/package.git#subdirectory=pkg_a` and
|
||||||
|
/// `https://github.com/pypa/package.git#subdirectory=pkg_b` would map to different
|
||||||
|
/// [`CanonicalUrl`] values, but the same [`RepositoryUrl`], since they map to the same
|
||||||
|
/// resource.
|
||||||
|
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone)]
|
||||||
|
pub struct RepositoryUrl(Url);
|
||||||
|
|
||||||
|
impl RepositoryUrl {
|
||||||
|
pub fn new(url: &Url) -> RepositoryUrl {
|
||||||
|
let mut url = CanonicalUrl::new(url).0;
|
||||||
|
|
||||||
|
// If a Git URL ends in a reference (like a branch, tag, or commit), remove it.
|
||||||
|
if url.scheme().starts_with("git+") {
|
||||||
|
if let Some((prefix, _)) = url.as_str().rsplit_once('@') {
|
||||||
|
url = prefix.parse().unwrap();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Drop any fragments and query parameters.
|
||||||
|
url.set_fragment(None);
|
||||||
|
url.set_query(None);
|
||||||
|
|
||||||
|
RepositoryUrl(url)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn parse(url: &str) -> Result<Self, url::ParseError> {
|
||||||
|
Ok(Self::new(&Url::parse(url)?))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl CacheKey for RepositoryUrl {
|
||||||
|
fn cache_key(&self, state: &mut CacheKeyHasher) {
|
||||||
|
// `as_str` gives the serialisation of a url (which has a spec) and so insulates against
|
||||||
|
// possible changes in how the URL crate does hashing.
|
||||||
|
self.0.as_str().cache_key(state);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn canonical_url() -> Result<(), url::ParseError> {
|
||||||
|
// Two URLs should be considered equal regardless of the `.git` suffix.
|
||||||
|
assert_eq!(
|
||||||
|
CanonicalUrl::parse("git+https://github.com/pypa/sample-namespace-packages.git")?,
|
||||||
|
CanonicalUrl::parse("git+https://github.com/pypa/sample-namespace-packages")?,
|
||||||
|
);
|
||||||
|
|
||||||
|
// Two URLs should be _not_ considered equal if they point to different repositories.
|
||||||
|
assert_ne!(
|
||||||
|
CanonicalUrl::parse("git+https://github.com/pypa/sample-namespace-packages.git")?,
|
||||||
|
CanonicalUrl::parse("git+https://github.com/pypa/sample-packages.git")?,
|
||||||
|
);
|
||||||
|
|
||||||
|
// Two URLs should _not_ be considered equal if they request different subdirectories.
|
||||||
|
assert_ne!(
|
||||||
|
CanonicalUrl::parse("git+https://github.com/pypa/sample-namespace-packages.git#subdirectory=pkg_resources/pkg_a")?,
|
||||||
|
CanonicalUrl::parse("git+https://github.com/pypa/sample-namespace-packages.git#subdirectory=pkg_resources/pkg_b")?,
|
||||||
|
);
|
||||||
|
|
||||||
|
// Two URLs should _not_ be considered equal if they request different commit tags.
|
||||||
|
assert_ne!(
|
||||||
|
CanonicalUrl::parse(
|
||||||
|
"git+https://github.com/pypa/sample-namespace-packages.git@v1.0.0"
|
||||||
|
)?,
|
||||||
|
CanonicalUrl::parse(
|
||||||
|
"git+https://github.com/pypa/sample-namespace-packages.git@v2.0.0"
|
||||||
|
)?,
|
||||||
|
);
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn repository_url() -> Result<(), url::ParseError> {
|
||||||
|
// Two URLs should be considered equal regardless of the `.git` suffix.
|
||||||
|
assert_eq!(
|
||||||
|
CanonicalUrl::parse("git+https://github.com/pypa/sample-namespace-packages.git")?,
|
||||||
|
CanonicalUrl::parse("git+https://github.com/pypa/sample-namespace-packages")?,
|
||||||
|
);
|
||||||
|
|
||||||
|
// Two URLs should be _not_ considered equal if they point to different repositories.
|
||||||
|
assert_ne!(
|
||||||
|
CanonicalUrl::parse("git+https://github.com/pypa/sample-namespace-packages.git")?,
|
||||||
|
CanonicalUrl::parse("git+https://github.com/pypa/sample-packages.git")?,
|
||||||
|
);
|
||||||
|
|
||||||
|
// Two URLs should be considered equal if they map to the same repository, even if they
|
||||||
|
// request different subdirectories.
|
||||||
|
assert_eq!(
|
||||||
|
RepositoryUrl::parse("git+https://github.com/pypa/sample-namespace-packages.git#subdirectory=pkg_resources/pkg_a")?,
|
||||||
|
RepositoryUrl::parse("git+https://github.com/pypa/sample-namespace-packages.git#subdirectory=pkg_resources/pkg_b")?,
|
||||||
|
);
|
||||||
|
|
||||||
|
// Two URLs should be considered equal if they map to the same repository, even if they
|
||||||
|
// request different commit tags.
|
||||||
|
assert_eq!(
|
||||||
|
RepositoryUrl::parse(
|
||||||
|
"git+https://github.com/pypa/sample-namespace-packages.git@v1.0.0"
|
||||||
|
)?,
|
||||||
|
RepositoryUrl::parse(
|
||||||
|
"git+https://github.com/pypa/sample-namespace-packages.git@v2.0.0"
|
||||||
|
)?,
|
||||||
|
);
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -2,7 +2,7 @@ use std::hash::Hasher;
|
||||||
|
|
||||||
use seahash::SeaHasher;
|
use seahash::SeaHasher;
|
||||||
|
|
||||||
pub use canonical_url::CanonicalUrl;
|
pub use canonical_url::{CanonicalUrl, RepositoryUrl};
|
||||||
pub use digest::digest;
|
pub use digest::digest;
|
||||||
|
|
||||||
mod cache_key;
|
mod cache_key;
|
||||||
|
|
|
@ -885,6 +885,43 @@ fn compile_git_subdirectory_dependency() -> Result<()> {
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Resolve two packages from a `requirements.in` file with the same Git HTTPS dependency.
|
||||||
|
#[test]
|
||||||
|
fn compile_git_concurrent_access() -> Result<()> {
|
||||||
|
let temp_dir = assert_fs::TempDir::new()?;
|
||||||
|
let cache_dir = assert_fs::TempDir::new()?;
|
||||||
|
let venv = temp_dir.child(".venv");
|
||||||
|
|
||||||
|
Command::new(get_cargo_bin(BIN_NAME))
|
||||||
|
.arg("venv")
|
||||||
|
.arg(venv.as_os_str())
|
||||||
|
.arg("--cache-dir")
|
||||||
|
.arg(cache_dir.path())
|
||||||
|
.current_dir(&temp_dir)
|
||||||
|
.assert()
|
||||||
|
.success();
|
||||||
|
venv.assert(predicates::path::is_dir());
|
||||||
|
|
||||||
|
let requirements_in = temp_dir.child("requirements.in");
|
||||||
|
requirements_in.touch()?;
|
||||||
|
requirements_in
|
||||||
|
.write_str("example-pkg-a @ git+https://github.com/pypa/sample-namespace-packages.git@df7530eeb8fa0cb7dbb8ecb28363e8e36bfa2f45#subdirectory=pkg_resources/pkg_a\nexample-pkg-b @ git+https://github.com/pypa/sample-namespace-packages.git@df7530eeb8fa0cb7dbb8ecb28363e8e36bfa2f45#subdirectory=pkg_resources/pkg_b")?;
|
||||||
|
|
||||||
|
insta::with_settings!({
|
||||||
|
filters => INSTA_FILTERS.to_vec()
|
||||||
|
}, {
|
||||||
|
assert_cmd_snapshot!(Command::new(get_cargo_bin(BIN_NAME))
|
||||||
|
.arg("pip-compile")
|
||||||
|
.arg("requirements.in")
|
||||||
|
.arg("--cache-dir")
|
||||||
|
.arg(cache_dir.path())
|
||||||
|
.env("VIRTUAL_ENV", venv.as_os_str())
|
||||||
|
.current_dir(&temp_dir));
|
||||||
|
});
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
/// Resolve a Git dependency with a declared name that differs from the true name of the package.
|
/// Resolve a Git dependency with a declared name that differs from the true name of the package.
|
||||||
#[test]
|
#[test]
|
||||||
fn compile_git_mismatched_name() -> Result<()> {
|
fn compile_git_mismatched_name() -> Result<()> {
|
||||||
|
|
|
@ -0,0 +1,23 @@
|
||||||
|
---
|
||||||
|
source: crates/puffin-cli/tests/pip_compile.rs
|
||||||
|
info:
|
||||||
|
program: puffin
|
||||||
|
args:
|
||||||
|
- pip-compile
|
||||||
|
- requirements.in
|
||||||
|
- "--cache-dir"
|
||||||
|
- /var/folders/nt/6gf2v7_s3k13zq_t3944rwz40000gn/T/.tmp41ygCm
|
||||||
|
env:
|
||||||
|
VIRTUAL_ENV: /var/folders/nt/6gf2v7_s3k13zq_t3944rwz40000gn/T/.tmpBfaYxl/.venv
|
||||||
|
---
|
||||||
|
success: true
|
||||||
|
exit_code: 0
|
||||||
|
----- stdout -----
|
||||||
|
# This file was autogenerated by Puffin v0.0.1 via the following command:
|
||||||
|
# [BIN_PATH] pip-compile requirements.in --cache-dir [CACHE_DIR]
|
||||||
|
example-pkg-a @ git+https://github.com/pypa/sample-namespace-packages.git@df7530eeb8fa0cb7dbb8ecb28363e8e36bfa2f45#subdirectory=pkg_resources/pkg_a
|
||||||
|
example-pkg-b @ git+https://github.com/pypa/sample-namespace-packages.git@df7530eeb8fa0cb7dbb8ecb28363e8e36bfa2f45#subdirectory=pkg_resources/pkg_b
|
||||||
|
|
||||||
|
----- stderr -----
|
||||||
|
Resolved 2 packages in [TIME]
|
||||||
|
|
|
@ -7,7 +7,7 @@ use anyhow::Result;
|
||||||
use reqwest::Client;
|
use reqwest::Client;
|
||||||
use tracing::debug;
|
use tracing::debug;
|
||||||
|
|
||||||
use puffin_cache::{digest, CanonicalUrl};
|
use puffin_cache::{digest, RepositoryUrl};
|
||||||
|
|
||||||
use crate::git::GitRemote;
|
use crate::git::GitRemote;
|
||||||
use crate::{FetchStrategy, Git};
|
use crate::{FetchStrategy, Git};
|
||||||
|
@ -36,7 +36,7 @@ impl GitSource {
|
||||||
|
|
||||||
pub fn fetch(self) -> Result<Fetch> {
|
pub fn fetch(self) -> Result<Fetch> {
|
||||||
// The path to the repo, within the Git database.
|
// The path to the repo, within the Git database.
|
||||||
let ident = digest(&CanonicalUrl::new(&self.git.url));
|
let ident = digest(&RepositoryUrl::new(&self.git.url));
|
||||||
let db_path = self.cache.join("db").join(&ident);
|
let db_path = self.cache.join("db").join(&ident);
|
||||||
|
|
||||||
let remote = GitRemote::new(&self.git.url);
|
let remote = GitRemote::new(&self.git.url);
|
||||||
|
|
|
@ -22,7 +22,7 @@ use waitmap::WaitMap;
|
||||||
use distribution_filename::{SourceDistributionFilename, WheelFilename};
|
use distribution_filename::{SourceDistributionFilename, WheelFilename};
|
||||||
use pep508_rs::{MarkerEnvironment, Requirement};
|
use pep508_rs::{MarkerEnvironment, Requirement};
|
||||||
use platform_tags::Tags;
|
use platform_tags::Tags;
|
||||||
use puffin_cache::CanonicalUrl;
|
use puffin_cache::RepositoryUrl;
|
||||||
use puffin_client::RegistryClient;
|
use puffin_client::RegistryClient;
|
||||||
use puffin_distribution::{RemoteDistributionRef, VersionOrUrl};
|
use puffin_distribution::{RemoteDistributionRef, VersionOrUrl};
|
||||||
use puffin_normalize::{ExtraName, PackageName};
|
use puffin_normalize::{ExtraName, PackageName};
|
||||||
|
@ -923,7 +923,7 @@ impl Locks {
|
||||||
/// Acquire a lock on the given resource.
|
/// Acquire a lock on the given resource.
|
||||||
async fn acquire(&self, url: &Url) -> Arc<Mutex<()>> {
|
async fn acquire(&self, url: &Url) -> Arc<Mutex<()>> {
|
||||||
let mut map = self.0.lock().await;
|
let mut map = self.0.lock().await;
|
||||||
map.entry(puffin_cache::digest(&CanonicalUrl::new(url)))
|
map.entry(puffin_cache::digest(&RepositoryUrl::new(url)))
|
||||||
.or_insert_with(|| Arc::new(Mutex::new(())))
|
.or_insert_with(|| Arc::new(Mutex::new(())))
|
||||||
.clone()
|
.clone()
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue