Respect pip-like Git branch, tag, and commit references (#297)

We need to parse revisions out from URLs like `MyProject @
git+https://git.example.com/MyProject.git@v1.0`, per [VCS
Support](https://pip.pypa.io/en/stable/topics/vcs-support/). Cargo has
the advantage that it uses a TOML table in its configuration, so the
user has to specify whether they're fetching a commit, a tag, a branch,
etc. We have to instead assume that anything that isn't clearly a commit
is _either_ a branch or a tag.

Closes https://github.com/astral-sh/puffin/issues/296.
This commit is contained in:
Charlie Marsh 2023-11-02 12:10:02 -07:00 committed by GitHub
parent a4002fe132
commit e47d3f1f66
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
8 changed files with 370 additions and 29 deletions

View file

@ -637,6 +637,168 @@ fn compile_git_https_dependency() -> Result<()> {
Ok(())
}
/// Resolve a specific Flask branch via a Git HTTPS dependency.
#[test]
fn compile_git_branch_https_dependency() -> Result<()> {
let temp_dir = assert_fs::TempDir::new()?;
let cache_dir = assert_fs::TempDir::new()?;
let venv = temp_dir.child(".venv");
Command::new(get_cargo_bin(BIN_NAME))
.arg("venv")
.arg(venv.as_os_str())
.arg("--cache-dir")
.arg(cache_dir.path())
.current_dir(&temp_dir)
.assert()
.success();
venv.assert(predicates::path::is_dir());
let requirements_in = temp_dir.child("requirements.in");
requirements_in.touch()?;
requirements_in.write_str("flask @ git+https://github.com/pallets/flask.git@1.0.x")?;
insta::with_settings!({
filters => vec![
(r"(\d|\.)+(ms|s)", "[TIME]"),
(r"# .* pip-compile", "# [BIN_PATH] pip-compile"),
(r"--cache-dir .*", "--cache-dir [CACHE_DIR]"),
]
}, {
assert_cmd_snapshot!(Command::new(get_cargo_bin(BIN_NAME))
.arg("pip-compile")
.arg("requirements.in")
.arg("--cache-dir")
.arg(cache_dir.path())
.env("VIRTUAL_ENV", venv.as_os_str())
.current_dir(&temp_dir));
});
Ok(())
}
/// Resolve a specific Flask tag via a Git HTTPS dependency.
#[test]
fn compile_git_tag_https_dependency() -> Result<()> {
let temp_dir = assert_fs::TempDir::new()?;
let cache_dir = assert_fs::TempDir::new()?;
let venv = temp_dir.child(".venv");
Command::new(get_cargo_bin(BIN_NAME))
.arg("venv")
.arg(venv.as_os_str())
.arg("--cache-dir")
.arg(cache_dir.path())
.current_dir(&temp_dir)
.assert()
.success();
venv.assert(predicates::path::is_dir());
let requirements_in = temp_dir.child("requirements.in");
requirements_in.touch()?;
requirements_in.write_str("flask @ git+https://github.com/pallets/flask.git@3.0.0")?;
insta::with_settings!({
filters => vec![
(r"(\d|\.)+(ms|s)", "[TIME]"),
(r"# .* pip-compile", "# [BIN_PATH] pip-compile"),
(r"--cache-dir .*", "--cache-dir [CACHE_DIR]"),
]
}, {
assert_cmd_snapshot!(Command::new(get_cargo_bin(BIN_NAME))
.arg("pip-compile")
.arg("requirements.in")
.arg("--cache-dir")
.arg(cache_dir.path())
.env("VIRTUAL_ENV", venv.as_os_str())
.current_dir(&temp_dir));
});
Ok(())
}
/// Resolve a specific Flask commit via a Git HTTPS dependency.
#[test]
fn compile_git_long_commit_https_dependency() -> Result<()> {
let temp_dir = assert_fs::TempDir::new()?;
let cache_dir = assert_fs::TempDir::new()?;
let venv = temp_dir.child(".venv");
Command::new(get_cargo_bin(BIN_NAME))
.arg("venv")
.arg(venv.as_os_str())
.arg("--cache-dir")
.arg(cache_dir.path())
.current_dir(&temp_dir)
.assert()
.success();
venv.assert(predicates::path::is_dir());
let requirements_in = temp_dir.child("requirements.in");
requirements_in.touch()?;
requirements_in.write_str(
"flask @ git+https://github.com/pallets/flask.git@d92b64aa275841b0c9aea3903aba72fbc4275d91",
)?;
insta::with_settings!({
filters => vec![
(r"(\d|\.)+(ms|s)", "[TIME]"),
(r"# .* pip-compile", "# [BIN_PATH] pip-compile"),
(r"--cache-dir .*", "--cache-dir [CACHE_DIR]"),
]
}, {
assert_cmd_snapshot!(Command::new(get_cargo_bin(BIN_NAME))
.arg("pip-compile")
.arg("requirements.in")
.arg("--cache-dir")
.arg(cache_dir.path())
.env("VIRTUAL_ENV", venv.as_os_str())
.current_dir(&temp_dir));
});
Ok(())
}
/// Resolve a specific Flask commit via a Git HTTPS dependency.
#[test]
fn compile_git_short_commit_https_dependency() -> Result<()> {
let temp_dir = assert_fs::TempDir::new()?;
let cache_dir = assert_fs::TempDir::new()?;
let venv = temp_dir.child(".venv");
Command::new(get_cargo_bin(BIN_NAME))
.arg("venv")
.arg(venv.as_os_str())
.arg("--cache-dir")
.arg(cache_dir.path())
.current_dir(&temp_dir)
.assert()
.success();
venv.assert(predicates::path::is_dir());
let requirements_in = temp_dir.child("requirements.in");
requirements_in.touch()?;
requirements_in.write_str("flask @ git+https://github.com/pallets/flask.git@d92b64a")?;
insta::with_settings!({
filters => vec![
(r"(\d|\.)+(ms|s)", "[TIME]"),
(r"# .* pip-compile", "# [BIN_PATH] pip-compile"),
(r"--cache-dir .*", "--cache-dir [CACHE_DIR]"),
]
}, {
assert_cmd_snapshot!(Command::new(get_cargo_bin(BIN_NAME))
.arg("pip-compile")
.arg("requirements.in")
.arg("--cache-dir")
.arg(cache_dir.path())
.env("VIRTUAL_ENV", venv.as_os_str())
.current_dir(&temp_dir));
});
Ok(())
}
/// Request Flask, but include a URL dependency for Werkzeug, which should avoid adding a
/// duplicate dependency from `PyPI`.
#[test]

View file

@ -0,0 +1,34 @@
---
source: crates/puffin-cli/tests/pip_compile.rs
info:
program: puffin
args:
- pip-compile
- requirements.in
- "--cache-dir"
- /var/folders/nt/6gf2v7_s3k13zq_t3944rwz40000gn/T/.tmpbXamls
env:
VIRTUAL_ENV: /var/folders/nt/6gf2v7_s3k13zq_t3944rwz40000gn/T/.tmp5ZJExV/.venv
---
success: true
exit_code: 0
----- stdout -----
# This file was autogenerated by Puffin v0.0.1 via the following command:
# [BIN_PATH] pip-compile requirements.in --cache-dir [CACHE_DIR]
click==8.1.7
# via flask
flask @ git+https://github.com/pallets/flask.git@1.0.x
itsdangerous==2.1.2
# via flask
jinja2==3.1.2
# via flask
markupsafe==2.1.3
# via
# jinja2
# werkzeug
werkzeug==3.0.1
# via flask
----- stderr -----
Resolved 6 packages in [TIME]

View file

@ -0,0 +1,34 @@
---
source: crates/puffin-cli/tests/pip_compile.rs
info:
program: puffin
args:
- pip-compile
- requirements.in
- "--cache-dir"
- /var/folders/nt/6gf2v7_s3k13zq_t3944rwz40000gn/T/.tmp6sEYVi
env:
VIRTUAL_ENV: /var/folders/nt/6gf2v7_s3k13zq_t3944rwz40000gn/T/.tmpsyl0w0/.venv
---
success: true
exit_code: 0
----- stdout -----
# This file was autogenerated by Puffin v0.0.1 via the following command:
# [BIN_PATH] pip-compile requirements.in --cache-dir [CACHE_DIR]
click==8.1.7
# via flask
flask @ git+https://github.com/pallets/flask.git@d92b64aa275841b0c9aea3903aba72fbc4275d91
itsdangerous==2.1.2
# via flask
jinja2==3.1.2
# via flask
markupsafe==2.1.3
# via
# jinja2
# werkzeug
werkzeug==3.0.1
# via flask
----- stderr -----
Resolved 6 packages in [TIME]

View file

@ -0,0 +1,34 @@
---
source: crates/puffin-cli/tests/pip_compile.rs
info:
program: puffin
args:
- pip-compile
- requirements.in
- "--cache-dir"
- /var/folders/nt/6gf2v7_s3k13zq_t3944rwz40000gn/T/.tmponnpWe
env:
VIRTUAL_ENV: /var/folders/nt/6gf2v7_s3k13zq_t3944rwz40000gn/T/.tmp7PNND2/.venv
---
success: true
exit_code: 0
----- stdout -----
# This file was autogenerated by Puffin v0.0.1 via the following command:
# [BIN_PATH] pip-compile requirements.in --cache-dir [CACHE_DIR]
click==8.1.7
# via flask
flask @ git+https://github.com/pallets/flask.git@d92b64a
itsdangerous==2.1.2
# via flask
jinja2==3.1.2
# via flask
markupsafe==2.1.3
# via
# jinja2
# werkzeug
werkzeug==3.0.1
# via flask
----- stderr -----
Resolved 6 packages in [TIME]

View file

@ -0,0 +1,36 @@
---
source: crates/puffin-cli/tests/pip_compile.rs
info:
program: puffin
args:
- pip-compile
- requirements.in
- "--cache-dir"
- /var/folders/nt/6gf2v7_s3k13zq_t3944rwz40000gn/T/.tmpSeBcqs
env:
VIRTUAL_ENV: /var/folders/nt/6gf2v7_s3k13zq_t3944rwz40000gn/T/.tmp3ahlnG/.venv
---
success: true
exit_code: 0
----- stdout -----
# This file was autogenerated by Puffin v0.0.1 via the following command:
# [BIN_PATH] pip-compile requirements.in --cache-dir [CACHE_DIR]
blinker==1.7.0
# via flask
click==8.1.7
# via flask
flask @ git+https://github.com/pallets/flask.git@3.0.0
itsdangerous==2.1.2
# via flask
jinja2==3.1.2
# via flask
markupsafe==2.1.3
# via
# jinja2
# werkzeug
werkzeug==3.0.1
# via flask
----- stderr -----
Resolved 7 packages in [TIME]

View file

@ -15,12 +15,40 @@ use tracing::{debug, info, warn};
use url::Url;
use crate::util::retry;
use crate::{FetchStrategy, GitReference};
use crate::FetchStrategy;
/// A file indicates that if present, `git reset` has been done and a repo
/// checkout is ready to go. See [`GitCheckout::reset`] for why we need this.
const CHECKOUT_READY_LOCK: &str = ".ok";
/// A reference to commit or commit-ish.
#[derive(Debug, Clone)]
pub(crate) enum GitReference {
/// From a branch.
#[allow(unused)]
Branch(String),
/// From a tag.
#[allow(unused)]
Tag(String),
/// From a reference that's ambiguously a branch or tag.
BranchOrTag(String),
/// From a specific revision. Can be a commit hash (either short or full),
/// or a named reference like `refs/pull/493/head`.
Rev(String),
/// The default branch of the repository, the reference named `HEAD`.
DefaultBranch,
}
impl GitReference {
pub(crate) fn from_rev(rev: &str) -> Self {
if looks_like_commit_hash(rev) || rev.starts_with("refs/") {
Self::Rev(rev.to_owned())
} else {
Self::BranchOrTag(rev.to_owned())
}
}
}
/// A short abbreviated OID.
///
/// Exists for avoiding extra allocations in [`GitDatabase::to_short_id`].
@ -210,6 +238,23 @@ impl GitReference {
.ok_or_else(|| anyhow::format_err!("branch `{s}` did not have a target"))?
}
// Attempt to resolve the branch, then the tag.
GitReference::BranchOrTag(s) => {
let name = format!("origin/{s}");
repo.find_branch(&name, git2::BranchType::Remote)
.ok()
.and_then(|b| b.get().target())
.or_else(|| {
let refname = format!("refs/remotes/origin/tags/{s}");
let id = repo.refname_to_id(&refname).ok()?;
let obj = repo.find_object(id, None).ok()?;
let obj = obj.peel(ObjectType::Commit).ok()?;
Some(obj.id())
})
.ok_or_else(|| anyhow::format_err!("failed to find branch or tag `{s}`"))?
}
// We'll be using the HEAD commit
GitReference::DefaultBranch => {
let head_id = repo.refname_to_id("refs/remotes/origin/HEAD")?;
@ -859,6 +904,15 @@ pub(crate) fn fetch(
refspecs.push(format!("+refs/tags/{tag}:refs/remotes/origin/tags/{tag}"));
}
GitReference::BranchOrTag(branch_or_tag) => {
refspecs.push(format!(
"+refs/heads/{branch_or_tag}:refs/remotes/origin/{branch_or_tag}"
));
refspecs.push(format!(
"+refs/tags/{branch_or_tag}:refs/remotes/origin/tags/{branch_or_tag}"
));
}
GitReference::DefaultBranch => {
refspecs.push(String::from("+HEAD:refs/remotes/origin/HEAD"));
}
@ -1158,6 +1212,7 @@ fn github_fast_path(
let github_branch_name = match reference {
GitReference::Branch(branch) => branch,
GitReference::Tag(tag) => tag,
GitReference::BranchOrTag(branch_or_tag) => branch_or_tag,
GitReference::DefaultBranch => "HEAD",
GitReference::Rev(rev) => {
if rev.starts_with("refs/") {

View file

@ -1,5 +1,7 @@
use url::Url;
use crate::git::GitReference;
pub use self::source::GitSource;
mod git;
@ -22,24 +24,22 @@ impl TryFrom<Url> for Git {
/// Initialize a [`Git`] source from a URL.
fn try_from(mut url: Url) -> Result<Self, Self::Error> {
let mut reference = GitReference::DefaultBranch;
for (k, v) in url.query_pairs() {
match &k[..] {
// Map older 'ref' to branch.
"branch" | "ref" => reference = GitReference::Branch(v.into_owned()),
"rev" => reference = GitReference::Rev(v.into_owned()),
"tag" => reference = GitReference::Tag(v.into_owned()),
_ => {}
}
}
let precise = url.fragment().map(git2::Oid::from_str).transpose()?;
// Remove any query parameters and fragments.
url.set_fragment(None);
url.set_query(None);
// If the URL ends with a reference, like `https://git.example.com/MyProject.git@v1.0`,
// extract it.
let mut reference = GitReference::DefaultBranch;
if let Some((prefix, rev)) = url.as_str().rsplit_once('@') {
reference = GitReference::from_rev(rev);
url = Url::parse(prefix)?;
}
Ok(Self {
url,
reference,
precise,
precise: None,
})
}
}
@ -50,20 +50,6 @@ impl std::fmt::Display for Git {
}
}
/// Information to find a specific commit in a Git repository.
#[derive(Debug, Clone)]
pub enum GitReference {
/// From a tag.
Tag(String),
/// From a branch.
Branch(String),
/// From a specific revision. Can be a commit hash (either short or full),
/// or a named reference like `refs/pull/493/head`.
Rev(String),
/// The default branch of the repository, the reference named `HEAD`.
DefaultBranch,
}
#[derive(Debug, Clone, Copy)]
pub enum FetchStrategy {
/// Fetch Git repositories using libgit2.

View file

@ -9,8 +9,8 @@ use tracing::debug;
use puffin_cache::{digest, CanonicalUrl};
use crate::git::GitRemote;
use crate::{FetchStrategy, Git, GitReference};
use crate::git::{GitReference, GitRemote};
use crate::{FetchStrategy, Git};
/// A remote Git source that can be checked out locally.
pub struct GitSource {