mirror of
https://github.com/astral-sh/uv.git
synced 2025-07-07 13:25:00 +00:00
Use separate representations for canonical repository vs. commit (#317)
Given `https://github.com/pypa/package.git#subdirectory=pkg_a` and `https://github.com/pypa/package.git#subdirectory=pkg_b`, we want these to map to the same shared _resource_ (for locking and cloning), but different _packages_ (for determining whether the wheel already exists in the cache). As such, we need two distinct concepts for "canonical equality". Closes #316.
This commit is contained in:
parent
b589813e59
commit
051188dce0
6 changed files with 192 additions and 25 deletions
|
@ -2,15 +2,14 @@ use url::Url;
|
|||
|
||||
use crate::cache_key::{CacheKey, CacheKeyHasher};
|
||||
|
||||
/// A wrapper around `Url` which represents a "canonical" version of an
|
||||
/// original URL.
|
||||
/// A wrapper around `Url` which represents a "canonical" version of an original URL.
|
||||
///
|
||||
/// A "canonical" url is only intended for internal comparison purposes. It's
|
||||
/// to help paper over mistakes such as depending on `github.com/foo/bar` vs.
|
||||
/// `github.com/foo/bar.git`. This is **only** for internal purposes and
|
||||
/// provides no means to actually read the underlying string value of the `Url`
|
||||
/// it contains. This is intentional, because all fetching should still happen
|
||||
/// within the context of the original URL.
|
||||
/// A "canonical" url is only intended for internal comparison purposes. It's to help paper over
|
||||
/// mistakes such as depending on `github.com/foo/bar` vs. `github.com/foo/bar.git`.
|
||||
///
|
||||
/// This is **only** for internal purposes and provides no means to actually read the underlying
|
||||
/// string value of the `Url` it contains. This is intentional, because all fetching should still
|
||||
/// happen within the context of the original URL.
|
||||
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone)]
|
||||
pub struct CanonicalUrl(Url);
|
||||
|
||||
|
@ -23,24 +22,13 @@ impl CanonicalUrl {
|
|||
url.path_segments_mut().unwrap().pop_if_empty();
|
||||
}
|
||||
|
||||
// If a URL starts with a kind (like `git+`), remove it.
|
||||
if let Some(suffix) = url.as_str().strip_prefix("git+") {
|
||||
// If a Git URL ends in a reference (like a branch, tag, or commit), remove it.
|
||||
if let Some((prefix, _)) = suffix.rsplit_once('@') {
|
||||
url = prefix.parse().unwrap();
|
||||
} else {
|
||||
url = suffix.parse().unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
// For GitHub URLs specifically, just lower-case everything. GitHub
|
||||
// treats both the same, but they hash differently, and we're gonna be
|
||||
// hashing them. This wants a more general solution, and also we're
|
||||
// almost certainly not using the same case conversion rules that GitHub
|
||||
// does. (See issue #84)
|
||||
if url.host_str() == Some("github.com") {
|
||||
url = format!("https{}", &url[url::Position::AfterScheme..])
|
||||
.parse()
|
||||
url.set_scheme(url.scheme().to_lowercase().as_str())
|
||||
.unwrap();
|
||||
let path = url.path().to_lowercase();
|
||||
url.set_path(&path);
|
||||
|
@ -60,6 +48,10 @@ impl CanonicalUrl {
|
|||
|
||||
CanonicalUrl(url)
|
||||
}
|
||||
|
||||
pub fn parse(url: &str) -> Result<Self, url::ParseError> {
|
||||
Ok(Self::new(&Url::parse(url)?))
|
||||
}
|
||||
}
|
||||
|
||||
impl CacheKey for CanonicalUrl {
|
||||
|
@ -69,3 +61,118 @@ impl CacheKey for CanonicalUrl {
|
|||
self.0.as_str().cache_key(state);
|
||||
}
|
||||
}
|
||||
|
||||
/// Like [`CanonicalUrl`], but attempts to represent an underlying source repository, abstracting
|
||||
/// away details like the specific commit or branch, or the subdirectory to build within the
|
||||
/// repository.
|
||||
///
|
||||
/// For example, `https://github.com/pypa/package.git#subdirectory=pkg_a` and
|
||||
/// `https://github.com/pypa/package.git#subdirectory=pkg_b` would map to different
|
||||
/// [`CanonicalUrl`] values, but the same [`RepositoryUrl`], since they map to the same
|
||||
/// resource.
|
||||
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone)]
|
||||
pub struct RepositoryUrl(Url);
|
||||
|
||||
impl RepositoryUrl {
|
||||
pub fn new(url: &Url) -> RepositoryUrl {
|
||||
let mut url = CanonicalUrl::new(url).0;
|
||||
|
||||
// If a Git URL ends in a reference (like a branch, tag, or commit), remove it.
|
||||
if url.scheme().starts_with("git+") {
|
||||
if let Some((prefix, _)) = url.as_str().rsplit_once('@') {
|
||||
url = prefix.parse().unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
// Drop any fragments and query parameters.
|
||||
url.set_fragment(None);
|
||||
url.set_query(None);
|
||||
|
||||
RepositoryUrl(url)
|
||||
}
|
||||
|
||||
pub fn parse(url: &str) -> Result<Self, url::ParseError> {
|
||||
Ok(Self::new(&Url::parse(url)?))
|
||||
}
|
||||
}
|
||||
|
||||
impl CacheKey for RepositoryUrl {
|
||||
fn cache_key(&self, state: &mut CacheKeyHasher) {
|
||||
// `as_str` gives the serialisation of a url (which has a spec) and so insulates against
|
||||
// possible changes in how the URL crate does hashing.
|
||||
self.0.as_str().cache_key(state);
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn canonical_url() -> Result<(), url::ParseError> {
|
||||
// Two URLs should be considered equal regardless of the `.git` suffix.
|
||||
assert_eq!(
|
||||
CanonicalUrl::parse("git+https://github.com/pypa/sample-namespace-packages.git")?,
|
||||
CanonicalUrl::parse("git+https://github.com/pypa/sample-namespace-packages")?,
|
||||
);
|
||||
|
||||
// Two URLs should be _not_ considered equal if they point to different repositories.
|
||||
assert_ne!(
|
||||
CanonicalUrl::parse("git+https://github.com/pypa/sample-namespace-packages.git")?,
|
||||
CanonicalUrl::parse("git+https://github.com/pypa/sample-packages.git")?,
|
||||
);
|
||||
|
||||
// Two URLs should _not_ be considered equal if they request different subdirectories.
|
||||
assert_ne!(
|
||||
CanonicalUrl::parse("git+https://github.com/pypa/sample-namespace-packages.git#subdirectory=pkg_resources/pkg_a")?,
|
||||
CanonicalUrl::parse("git+https://github.com/pypa/sample-namespace-packages.git#subdirectory=pkg_resources/pkg_b")?,
|
||||
);
|
||||
|
||||
// Two URLs should _not_ be considered equal if they request different commit tags.
|
||||
assert_ne!(
|
||||
CanonicalUrl::parse(
|
||||
"git+https://github.com/pypa/sample-namespace-packages.git@v1.0.0"
|
||||
)?,
|
||||
CanonicalUrl::parse(
|
||||
"git+https://github.com/pypa/sample-namespace-packages.git@v2.0.0"
|
||||
)?,
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn repository_url() -> Result<(), url::ParseError> {
|
||||
// Two URLs should be considered equal regardless of the `.git` suffix.
|
||||
assert_eq!(
|
||||
CanonicalUrl::parse("git+https://github.com/pypa/sample-namespace-packages.git")?,
|
||||
CanonicalUrl::parse("git+https://github.com/pypa/sample-namespace-packages")?,
|
||||
);
|
||||
|
||||
// Two URLs should be _not_ considered equal if they point to different repositories.
|
||||
assert_ne!(
|
||||
CanonicalUrl::parse("git+https://github.com/pypa/sample-namespace-packages.git")?,
|
||||
CanonicalUrl::parse("git+https://github.com/pypa/sample-packages.git")?,
|
||||
);
|
||||
|
||||
// Two URLs should be considered equal if they map to the same repository, even if they
|
||||
// request different subdirectories.
|
||||
assert_eq!(
|
||||
RepositoryUrl::parse("git+https://github.com/pypa/sample-namespace-packages.git#subdirectory=pkg_resources/pkg_a")?,
|
||||
RepositoryUrl::parse("git+https://github.com/pypa/sample-namespace-packages.git#subdirectory=pkg_resources/pkg_b")?,
|
||||
);
|
||||
|
||||
// Two URLs should be considered equal if they map to the same repository, even if they
|
||||
// request different commit tags.
|
||||
assert_eq!(
|
||||
RepositoryUrl::parse(
|
||||
"git+https://github.com/pypa/sample-namespace-packages.git@v1.0.0"
|
||||
)?,
|
||||
RepositoryUrl::parse(
|
||||
"git+https://github.com/pypa/sample-namespace-packages.git@v2.0.0"
|
||||
)?,
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
|
|
@ -2,7 +2,7 @@ use std::hash::Hasher;
|
|||
|
||||
use seahash::SeaHasher;
|
||||
|
||||
pub use canonical_url::CanonicalUrl;
|
||||
pub use canonical_url::{CanonicalUrl, RepositoryUrl};
|
||||
pub use digest::digest;
|
||||
|
||||
mod cache_key;
|
||||
|
|
|
@ -885,6 +885,43 @@ fn compile_git_subdirectory_dependency() -> Result<()> {
|
|||
Ok(())
|
||||
}
|
||||
|
||||
/// Resolve two packages from a `requirements.in` file with the same Git HTTPS dependency.
|
||||
#[test]
|
||||
fn compile_git_concurrent_access() -> Result<()> {
|
||||
let temp_dir = assert_fs::TempDir::new()?;
|
||||
let cache_dir = assert_fs::TempDir::new()?;
|
||||
let venv = temp_dir.child(".venv");
|
||||
|
||||
Command::new(get_cargo_bin(BIN_NAME))
|
||||
.arg("venv")
|
||||
.arg(venv.as_os_str())
|
||||
.arg("--cache-dir")
|
||||
.arg(cache_dir.path())
|
||||
.current_dir(&temp_dir)
|
||||
.assert()
|
||||
.success();
|
||||
venv.assert(predicates::path::is_dir());
|
||||
|
||||
let requirements_in = temp_dir.child("requirements.in");
|
||||
requirements_in.touch()?;
|
||||
requirements_in
|
||||
.write_str("example-pkg-a @ git+https://github.com/pypa/sample-namespace-packages.git@df7530eeb8fa0cb7dbb8ecb28363e8e36bfa2f45#subdirectory=pkg_resources/pkg_a\nexample-pkg-b @ git+https://github.com/pypa/sample-namespace-packages.git@df7530eeb8fa0cb7dbb8ecb28363e8e36bfa2f45#subdirectory=pkg_resources/pkg_b")?;
|
||||
|
||||
insta::with_settings!({
|
||||
filters => INSTA_FILTERS.to_vec()
|
||||
}, {
|
||||
assert_cmd_snapshot!(Command::new(get_cargo_bin(BIN_NAME))
|
||||
.arg("pip-compile")
|
||||
.arg("requirements.in")
|
||||
.arg("--cache-dir")
|
||||
.arg(cache_dir.path())
|
||||
.env("VIRTUAL_ENV", venv.as_os_str())
|
||||
.current_dir(&temp_dir));
|
||||
});
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Resolve a Git dependency with a declared name that differs from the true name of the package.
|
||||
#[test]
|
||||
fn compile_git_mismatched_name() -> Result<()> {
|
||||
|
|
|
@ -0,0 +1,23 @@
|
|||
---
|
||||
source: crates/puffin-cli/tests/pip_compile.rs
|
||||
info:
|
||||
program: puffin
|
||||
args:
|
||||
- pip-compile
|
||||
- requirements.in
|
||||
- "--cache-dir"
|
||||
- /var/folders/nt/6gf2v7_s3k13zq_t3944rwz40000gn/T/.tmp41ygCm
|
||||
env:
|
||||
VIRTUAL_ENV: /var/folders/nt/6gf2v7_s3k13zq_t3944rwz40000gn/T/.tmpBfaYxl/.venv
|
||||
---
|
||||
success: true
|
||||
exit_code: 0
|
||||
----- stdout -----
|
||||
# This file was autogenerated by Puffin v0.0.1 via the following command:
|
||||
# [BIN_PATH] pip-compile requirements.in --cache-dir [CACHE_DIR]
|
||||
example-pkg-a @ git+https://github.com/pypa/sample-namespace-packages.git@df7530eeb8fa0cb7dbb8ecb28363e8e36bfa2f45#subdirectory=pkg_resources/pkg_a
|
||||
example-pkg-b @ git+https://github.com/pypa/sample-namespace-packages.git@df7530eeb8fa0cb7dbb8ecb28363e8e36bfa2f45#subdirectory=pkg_resources/pkg_b
|
||||
|
||||
----- stderr -----
|
||||
Resolved 2 packages in [TIME]
|
||||
|
|
@ -7,7 +7,7 @@ use anyhow::Result;
|
|||
use reqwest::Client;
|
||||
use tracing::debug;
|
||||
|
||||
use puffin_cache::{digest, CanonicalUrl};
|
||||
use puffin_cache::{digest, RepositoryUrl};
|
||||
|
||||
use crate::git::GitRemote;
|
||||
use crate::{FetchStrategy, Git};
|
||||
|
@ -36,7 +36,7 @@ impl GitSource {
|
|||
|
||||
pub fn fetch(self) -> Result<Fetch> {
|
||||
// The path to the repo, within the Git database.
|
||||
let ident = digest(&CanonicalUrl::new(&self.git.url));
|
||||
let ident = digest(&RepositoryUrl::new(&self.git.url));
|
||||
let db_path = self.cache.join("db").join(&ident);
|
||||
|
||||
let remote = GitRemote::new(&self.git.url);
|
||||
|
|
|
@ -22,7 +22,7 @@ use waitmap::WaitMap;
|
|||
use distribution_filename::{SourceDistributionFilename, WheelFilename};
|
||||
use pep508_rs::{MarkerEnvironment, Requirement};
|
||||
use platform_tags::Tags;
|
||||
use puffin_cache::CanonicalUrl;
|
||||
use puffin_cache::RepositoryUrl;
|
||||
use puffin_client::RegistryClient;
|
||||
use puffin_distribution::{RemoteDistributionRef, VersionOrUrl};
|
||||
use puffin_normalize::{ExtraName, PackageName};
|
||||
|
@ -923,7 +923,7 @@ impl Locks {
|
|||
/// Acquire a lock on the given resource.
|
||||
async fn acquire(&self, url: &Url) -> Arc<Mutex<()>> {
|
||||
let mut map = self.0.lock().await;
|
||||
map.entry(puffin_cache::digest(&CanonicalUrl::new(url)))
|
||||
map.entry(puffin_cache::digest(&RepositoryUrl::new(url)))
|
||||
.or_insert_with(|| Arc::new(Mutex::new(())))
|
||||
.clone()
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue