Write fully-precise Git SHAs to pip-compile output (#299)

This PR adds a mechanism by which we can ensure that we _always_ try to
refresh Git dependencies when resolving; further, we now write the fully
resolved SHA to the "lockfile". However, nothing in the code _assumes_
we do this, so the installer will remain agnostic to this behavior.

The specific approach taken here is minimally invasive. Specifically,
when we try to fetch a source distribution, we check if it's a Git
dependency; if it is, we fetch, and return the exact SHA, which we then
map back to a new URL. In the resolver, we keep track of URL
"redirects", and then we use the redirect (1) for the actual source
distribution building, and (2) when writing back out to the lockfile. As
such, none of the types outside of the resolver change at all, since
we're just mapping `RemoteDistribution` to `RemoteDistribution`, but
swapping out the internal URLs.

There are some inefficiencies here since, e.g., we do the Git fetch,
send back the "precise" URL, then a moment later, do a Git checkout of
that URL (which will be _mostly_ a no-op -- since we have a full SHA, we
don't have to fetch anything, but we _do_ check back on disk to see if
the SHA is still checked out). A more efficient approach would be to
return the path to the checked-out revision when we do this conversion
to a "precise" URL, since we'd then only interact with the Git repo
exactly once. But this runs the risk that the checked-out SHA changes
between the time we make the "precise" URL and the time we build the
source distribution.

Closes #286.
This commit is contained in:
Charlie Marsh 2023-11-03 09:26:57 -07:00 committed by GitHub
parent addcfe533a
commit fa1bbbbe08
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
14 changed files with 254 additions and 124 deletions

1
Cargo.lock generated
View file

@ -2371,6 +2371,7 @@ dependencies = [
"platform-host",
"platform-tags",
"pubgrub",
"puffin-cache",
"puffin-client",
"puffin-distribution",
"puffin-git",

View file

@ -244,11 +244,7 @@ fn compile_constraints_markers() -> Result<()> {
constraints_txt.write_str("sniffio==1.3.0;python_version>'3.7'")?;
insta::with_settings!({
filters => vec![
(r"(\d|\.)+(ms|s)", "[TIME]"),
(r"# .* pip-compile", "# [BIN_PATH] pip-compile"),
(r"--cache-dir .*", "--cache-dir [CACHE_DIR]"),
]
filters => INSTA_FILTERS.to_vec()
}, {
assert_cmd_snapshot!(Command::new(get_cargo_bin(BIN_NAME))
.arg("pip-compile")
@ -497,11 +493,7 @@ optional-dependencies.foo = [
)?;
insta::with_settings!({
filters => vec![
(r"(\d|\.)+(ms|s)", "[TIME]"),
(r"# .* pip-compile", "# [BIN_PATH] pip-compile"),
(r"--cache-dir .*", "--cache-dir [CACHE_DIR]"),
]
filters => INSTA_FILTERS.to_vec()
}, {
assert_cmd_snapshot!(Command::new(get_cargo_bin(BIN_NAME))
.arg("pip-compile")
@ -539,11 +531,7 @@ fn compile_wheel_url_dependency() -> Result<()> {
requirements_in.write_str("flask @ https://files.pythonhosted.org/packages/36/42/015c23096649b908c809c69388a805a571a3bea44362fe87e33fc3afa01f/flask-3.0.0-py3-none-any.whl")?;
insta::with_settings!({
filters => vec![
(r"(\d|\.)+(ms|s)", "[TIME]"),
(r"# .* pip-compile", "# [BIN_PATH] pip-compile"),
(r"--cache-dir .*", "--cache-dir [CACHE_DIR]"),
]
filters => INSTA_FILTERS.to_vec()
}, {
assert_cmd_snapshot!(Command::new(get_cargo_bin(BIN_NAME))
.arg("pip-compile")
@ -579,11 +567,7 @@ fn compile_sdist_url_dependency() -> Result<()> {
requirements_in.write_str("flask @ https://files.pythonhosted.org/packages/d8/09/c1a7354d3925a3c6c8cfdebf4245bae67d633ffda1ba415add06ffc839c5/flask-3.0.0.tar.gz")?;
insta::with_settings!({
filters => vec![
(r"(\d|\.)+(ms|s)", "[TIME]"),
(r"# .* pip-compile", "# [BIN_PATH] pip-compile"),
(r"--cache-dir .*", "--cache-dir [CACHE_DIR]"),
]
filters => INSTA_FILTERS.to_vec()
}, {
assert_cmd_snapshot!(Command::new(get_cargo_bin(BIN_NAME))
.arg("pip-compile")
@ -618,12 +602,12 @@ fn compile_git_https_dependency() -> Result<()> {
requirements_in.touch()?;
requirements_in.write_str("flask @ git+https://github.com/pallets/flask.git")?;
// In addition to the standard filters, remove the `main` commit, which will change frequently.
let mut filters = INSTA_FILTERS.to_vec();
filters.push((r"@(\d|\w){40}", "@[COMMIT]"));
insta::with_settings!({
filters => vec![
(r"(\d|\.)+(ms|s)", "[TIME]"),
(r"# .* pip-compile", "# [BIN_PATH] pip-compile"),
(r"--cache-dir .*", "--cache-dir [CACHE_DIR]"),
]
filters => filters
}, {
assert_cmd_snapshot!(Command::new(get_cargo_bin(BIN_NAME))
.arg("pip-compile")
@ -659,11 +643,7 @@ fn compile_git_branch_https_dependency() -> Result<()> {
requirements_in.write_str("flask @ git+https://github.com/pallets/flask.git@1.0.x")?;
insta::with_settings!({
filters => vec![
(r"(\d|\.)+(ms|s)", "[TIME]"),
(r"# .* pip-compile", "# [BIN_PATH] pip-compile"),
(r"--cache-dir .*", "--cache-dir [CACHE_DIR]"),
]
filters => INSTA_FILTERS.to_vec()
}, {
assert_cmd_snapshot!(Command::new(get_cargo_bin(BIN_NAME))
.arg("pip-compile")
@ -699,11 +679,7 @@ fn compile_git_tag_https_dependency() -> Result<()> {
requirements_in.write_str("flask @ git+https://github.com/pallets/flask.git@3.0.0")?;
insta::with_settings!({
filters => vec![
(r"(\d|\.)+(ms|s)", "[TIME]"),
(r"# .* pip-compile", "# [BIN_PATH] pip-compile"),
(r"--cache-dir .*", "--cache-dir [CACHE_DIR]"),
]
filters => INSTA_FILTERS.to_vec()
}, {
assert_cmd_snapshot!(Command::new(get_cargo_bin(BIN_NAME))
.arg("pip-compile")
@ -741,11 +717,7 @@ fn compile_git_long_commit_https_dependency() -> Result<()> {
)?;
insta::with_settings!({
filters => vec![
(r"(\d|\.)+(ms|s)", "[TIME]"),
(r"# .* pip-compile", "# [BIN_PATH] pip-compile"),
(r"--cache-dir .*", "--cache-dir [CACHE_DIR]"),
]
filters => INSTA_FILTERS.to_vec()
}, {
assert_cmd_snapshot!(Command::new(get_cargo_bin(BIN_NAME))
.arg("pip-compile")
@ -781,11 +753,44 @@ fn compile_git_short_commit_https_dependency() -> Result<()> {
requirements_in.write_str("flask @ git+https://github.com/pallets/flask.git@d92b64a")?;
insta::with_settings!({
filters => vec![
(r"(\d|\.)+(ms|s)", "[TIME]"),
(r"# .* pip-compile", "# [BIN_PATH] pip-compile"),
(r"--cache-dir .*", "--cache-dir [CACHE_DIR]"),
]
filters => INSTA_FILTERS.to_vec()
}, {
assert_cmd_snapshot!(Command::new(get_cargo_bin(BIN_NAME))
.arg("pip-compile")
.arg("requirements.in")
.arg("--cache-dir")
.arg(cache_dir.path())
.env("VIRTUAL_ENV", venv.as_os_str())
.current_dir(&temp_dir));
});
Ok(())
}
/// Resolve a specific Flask ref via a Git HTTPS dependency.
#[test]
fn compile_git_refs_https_dependency() -> Result<()> {
let temp_dir = assert_fs::TempDir::new()?;
let cache_dir = assert_fs::TempDir::new()?;
let venv = temp_dir.child(".venv");
Command::new(get_cargo_bin(BIN_NAME))
.arg("venv")
.arg(venv.as_os_str())
.arg("--cache-dir")
.arg(cache_dir.path())
.current_dir(&temp_dir)
.assert()
.success();
venv.assert(predicates::path::is_dir());
let requirements_in = temp_dir.child("requirements.in");
requirements_in.touch()?;
requirements_in
.write_str("flask @ git+https://github.com/pallets/flask.git@refs/pull/5313/head")?;
insta::with_settings!({
filters => INSTA_FILTERS.to_vec()
}, {
assert_cmd_snapshot!(Command::new(get_cargo_bin(BIN_NAME))
.arg("pip-compile")
@ -822,11 +827,7 @@ fn mixed_url_dependency() -> Result<()> {
requirements_in.write_str("flask==3.0.0\nwerkzeug @ https://files.pythonhosted.org/packages/c3/fc/254c3e9b5feb89ff5b9076a23218dafbc99c96ac5941e900b71206e6313b/werkzeug-3.0.1-py3-none-any.whl")?;
insta::with_settings!({
filters => vec![
(r"(\d|\.)+(ms|s)", "[TIME]"),
(r"# .* pip-compile", "# [BIN_PATH] pip-compile"),
(r"--cache-dir .*", "--cache-dir [CACHE_DIR]"),
]
filters => INSTA_FILTERS.to_vec()
}, {
assert_cmd_snapshot!(Command::new(get_cargo_bin(BIN_NAME))
.arg("pip-compile")
@ -866,11 +867,7 @@ fn conflicting_direct_url_dependency() -> Result<()> {
requirements_in.write_str("werkzeug==3.0.0\nwerkzeug @ https://files.pythonhosted.org/packages/ff/1d/960bb4017c68674a1cb099534840f18d3def3ce44aed12b5ed8b78e0153e/Werkzeug-2.0.0-py3-none-any.whl")?;
insta::with_settings!({
filters => vec![
(r"(\d|\.)+(ms|s)", "[TIME]"),
(r"# .* pip-compile", "# [BIN_PATH] pip-compile"),
(r"--cache-dir .*", "--cache-dir [CACHE_DIR]"),
]
filters => INSTA_FILTERS.to_vec()
}, {
assert_cmd_snapshot!(Command::new(get_cargo_bin(BIN_NAME))
.arg("pip-compile")
@ -906,11 +903,7 @@ fn conflicting_transitive_url_dependency() -> Result<()> {
requirements_in.write_str("flask==3.0.0\nwerkzeug @ https://files.pythonhosted.org/packages/ff/1d/960bb4017c68674a1cb099534840f18d3def3ce44aed12b5ed8b78e0153e/Werkzeug-2.0.0-py3-none-any.whl")?;
insta::with_settings!({
filters => vec![
(r"(\d|\.)+(ms|s)", "[TIME]"),
(r"# .* pip-compile", "# [BIN_PATH] pip-compile"),
(r"--cache-dir .*", "--cache-dir [CACHE_DIR]"),
]
filters => INSTA_FILTERS.to_vec()
}, {
assert_cmd_snapshot!(Command::new(get_cargo_bin(BIN_NAME))
.arg("pip-compile")
@ -960,11 +953,7 @@ optional-dependencies.bar = [
)?;
insta::with_settings!({
filters => vec![
(r"(\d|\.)+(ms|s)", "[TIME]"),
(r"# .* pip-compile", "# [BIN_PATH] pip-compile"),
(r"--cache-dir .*", "--cache-dir [CACHE_DIR]"),
]
filters => INSTA_FILTERS.to_vec()
}, {
assert_cmd_snapshot!(Command::new(get_cargo_bin(BIN_NAME))
.arg("pip-compile")
@ -1015,11 +1004,7 @@ optional-dependencies.bar = [
)?;
insta::with_settings!({
filters => vec![
(r"(\d|\.)+(ms|s)", "[TIME]"),
(r"# .* pip-compile", "# [BIN_PATH] pip-compile"),
(r"--cache-dir .*", "--cache-dir [CACHE_DIR]"),
]
filters => INSTA_FILTERS.to_vec()
}, {
assert_cmd_snapshot!(Command::new(get_cargo_bin(BIN_NAME))
.arg("pip-compile")

View file

@ -6,9 +6,9 @@ info:
- pip-compile
- requirements.in
- "--cache-dir"
- /var/folders/nt/6gf2v7_s3k13zq_t3944rwz40000gn/T/.tmpbXamls
- /var/folders/nt/6gf2v7_s3k13zq_t3944rwz40000gn/T/.tmpefZfz5
env:
VIRTUAL_ENV: /var/folders/nt/6gf2v7_s3k13zq_t3944rwz40000gn/T/.tmp5ZJExV/.venv
VIRTUAL_ENV: /var/folders/nt/6gf2v7_s3k13zq_t3944rwz40000gn/T/.tmpYoAohH/.venv
---
success: true
exit_code: 0
@ -17,7 +17,7 @@ exit_code: 0
# [BIN_PATH] pip-compile requirements.in --cache-dir [CACHE_DIR]
click==8.1.7
# via flask
flask @ git+https://github.com/pallets/flask.git@1.0.x
flask @ git+https://github.com/pallets/flask.git@d92b64aa275841b0c9aea3903aba72fbc4275d91
itsdangerous==2.1.2
# via flask
jinja2==3.1.2

View file

@ -6,9 +6,9 @@ info:
- pip-compile
- requirements.in
- "--cache-dir"
- /var/folders/nt/6gf2v7_s3k13zq_t3944rwz40000gn/T/.tmpbvYz3u
- /var/folders/nt/6gf2v7_s3k13zq_t3944rwz40000gn/T/.tmpXoXvPC
env:
VIRTUAL_ENV: /var/folders/nt/6gf2v7_s3k13zq_t3944rwz40000gn/T/.tmpHYkK5F/.venv
VIRTUAL_ENV: /var/folders/nt/6gf2v7_s3k13zq_t3944rwz40000gn/T/.tmpavaydK/.venv
---
success: true
exit_code: 0
@ -19,7 +19,7 @@ blinker==1.7.0
# via flask
click==8.1.7
# via flask
flask @ git+https://github.com/pallets/flask.git
flask @ git+https://github.com/pallets/flask.git@[COMMIT]
itsdangerous==2.1.2
# via flask
jinja2==3.1.2

View file

@ -0,0 +1,36 @@
---
source: crates/puffin-cli/tests/pip_compile.rs
info:
program: puffin
args:
- pip-compile
- requirements.in
- "--cache-dir"
- /var/folders/nt/6gf2v7_s3k13zq_t3944rwz40000gn/T/.tmppFSZms
env:
VIRTUAL_ENV: /var/folders/nt/6gf2v7_s3k13zq_t3944rwz40000gn/T/.tmpNxY9HX/.venv
---
success: true
exit_code: 0
----- stdout -----
# This file was autogenerated by Puffin v0.0.1 via the following command:
# [BIN_PATH] pip-compile requirements.in --cache-dir [CACHE_DIR]
blinker==1.7.0
# via flask
click==8.1.7
# via flask
flask @ git+https://github.com/pallets/flask.git@7af0271f4703a71beef8e26d1f5f6f8da04100e6
itsdangerous==2.1.2
# via flask
jinja2==3.1.2
# via flask
markupsafe==2.1.3
# via
# jinja2
# werkzeug
werkzeug==3.0.1
# via flask
----- stderr -----
Resolved 7 packages in [TIME]

View file

@ -6,9 +6,9 @@ info:
- pip-compile
- requirements.in
- "--cache-dir"
- /var/folders/nt/6gf2v7_s3k13zq_t3944rwz40000gn/T/.tmponnpWe
- /var/folders/nt/6gf2v7_s3k13zq_t3944rwz40000gn/T/.tmpWTmXCs
env:
VIRTUAL_ENV: /var/folders/nt/6gf2v7_s3k13zq_t3944rwz40000gn/T/.tmp7PNND2/.venv
VIRTUAL_ENV: /var/folders/nt/6gf2v7_s3k13zq_t3944rwz40000gn/T/.tmpptZ1v5/.venv
---
success: true
exit_code: 0
@ -17,7 +17,7 @@ exit_code: 0
# [BIN_PATH] pip-compile requirements.in --cache-dir [CACHE_DIR]
click==8.1.7
# via flask
flask @ git+https://github.com/pallets/flask.git@d92b64a
flask @ git+https://github.com/pallets/flask.git@d92b64aa275841b0c9aea3903aba72fbc4275d91
itsdangerous==2.1.2
# via flask
jinja2==3.1.2

View file

@ -6,9 +6,9 @@ info:
- pip-compile
- requirements.in
- "--cache-dir"
- /var/folders/nt/6gf2v7_s3k13zq_t3944rwz40000gn/T/.tmpSeBcqs
- /var/folders/nt/6gf2v7_s3k13zq_t3944rwz40000gn/T/.tmpb1bm7b
env:
VIRTUAL_ENV: /var/folders/nt/6gf2v7_s3k13zq_t3944rwz40000gn/T/.tmp3ahlnG/.venv
VIRTUAL_ENV: /var/folders/nt/6gf2v7_s3k13zq_t3944rwz40000gn/T/.tmp5Bcufx/.venv
---
success: true
exit_code: 0
@ -19,7 +19,7 @@ blinker==1.7.0
# via flask
click==8.1.7
# via flask
flask @ git+https://github.com/pallets/flask.git@3.0.0
flask @ git+https://github.com/pallets/flask.git@735a4701d6d5e848241e7d7535db898efb62d400
itsdangerous==2.1.2
# via flask
jinja2==3.1.2

View file

@ -922,7 +922,9 @@ pub(crate) fn fetch(
refspecs.push(format!("+{rev}:{rev}"));
} else if let Some(oid_to_fetch) = oid_to_fetch {
refspecs.push(format!("+{oid_to_fetch}:refs/commit/{oid_to_fetch}"));
} else if rev.parse::<git2::Oid>().is_ok() {
} else if rev.parse::<git2::Oid>().is_ok()
&& (rev.len() == 40 || rev.starts_with("refs/"))
{
// There is a specific commit to fetch and we will do so in shallow-mode only
// to not disturb the previous logic.
// Note that with typical settings for shallowing, we will just fetch a single `rev`

View file

@ -1,8 +1,7 @@
use url::Url;
use crate::git::GitReference;
pub use self::source::GitSource;
pub use crate::source::GitSource;
mod git;
mod source;
@ -19,6 +18,14 @@ pub struct Git {
precise: Option<git2::Oid>,
}
impl Git {
#[must_use]
pub(crate) fn with_precise(mut self, precise: git2::Oid) -> Self {
self.precise = Some(precise);
self
}
}
impl TryFrom<Url> for Git {
type Error = anyhow::Error;
@ -44,6 +51,30 @@ impl TryFrom<Url> for Git {
}
}
impl From<Git> for Url {
fn from(git: Git) -> Self {
let mut url = git.url;
// If we have a precise commit, add `@` and the commit hash to the URL.
if let Some(precise) = git.precise {
url.set_path(&format!("{}@{}", url.path(), precise));
} else {
// Otherwise, add the branch or tag name.
match git.reference {
GitReference::Branch(rev)
| GitReference::Tag(rev)
| GitReference::BranchOrTag(rev)
| GitReference::Rev(rev) => {
url.set_path(&format!("{}@{}", url.path(), rev));
}
GitReference::DefaultBranch => {}
}
}
url
}
}
impl std::fmt::Display for Git {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", self.url)

View file

@ -9,47 +9,38 @@ use tracing::debug;
use puffin_cache::{digest, CanonicalUrl};
use crate::git::{GitReference, GitRemote};
use crate::git::GitRemote;
use crate::{FetchStrategy, Git};
/// A remote Git source that can be checked out locally.
pub struct GitSource {
/// The git remote which we're going to fetch from.
remote: GitRemote,
/// The Git reference from the manifest file.
manifest_reference: GitReference,
/// The revision which a git source is locked to.
/// This is expected to be set after the Git repository is fetched.
locked_rev: Option<git2::Oid>,
/// The identifier of this source for Cargo's Git cache directory.
/// See [`ident`] for more.
ident: String,
git: Git,
/// The HTTP client to use for fetching.
client: Client,
/// The fetch strategy to use when cloning.
strategy: FetchStrategy,
/// The path to the Git source database.
git: PathBuf,
cache: PathBuf,
}
impl GitSource {
pub fn new(reference: Git, git: PathBuf) -> Self {
pub fn new(git: Git, cache: impl Into<PathBuf>) -> Self {
Self {
remote: GitRemote::new(&reference.url),
manifest_reference: reference.reference,
locked_rev: reference.precise,
ident: digest(&CanonicalUrl::new(&reference.url)),
git,
client: Client::new(),
strategy: FetchStrategy::Libgit2,
git,
cache: cache.into(),
}
}
pub fn fetch(self) -> Result<PathBuf> {
pub fn fetch(self) -> Result<Fetch> {
// The path to the repo, within the Git database.
let db_path = self.git.join("db").join(&self.ident);
let ident = digest(&CanonicalUrl::new(&self.git.url));
let db_path = self.cache.join("db").join(&ident);
let (db, actual_rev) = match (self.locked_rev, self.remote.db_at(&db_path).ok()) {
let remote = GitRemote::new(&self.git.url);
let (db, actual_rev) = match (self.git.precise, remote.db_at(&db_path).ok()) {
// If we have a locked revision, and we have a preexisting database
// which has that revision, then no update needs to happen.
(Some(rev), Some(db)) if db.contains(rev) => (db, rev),
@ -59,12 +50,12 @@ impl GitSource {
// situation that we have a locked revision but the database
// doesn't have it.
(locked_rev, db) => {
debug!("Updating Git source: `{:?}`", self.remote);
debug!("Updating Git source: `{:?}`", remote);
self.remote.checkout(
remote.checkout(
&db_path,
db,
&self.manifest_reference,
&self.git.reference,
locked_rev,
self.strategy,
&self.client,
@ -80,12 +71,34 @@ impl GitSource {
// filesystem. This will use hard links and such to ideally make the
// checkout operation here pretty fast.
let checkout_path = self
.git
.cache
.join("checkouts")
.join(&self.ident)
.join(&ident)
.join(short_id.as_str());
db.copy_to(actual_rev, &checkout_path, self.strategy, &self.client)?;
Ok(checkout_path)
Ok(Fetch {
git: self.git.with_precise(actual_rev),
path: checkout_path,
})
}
}
pub struct Fetch {
/// The [`Git`] reference that was fetched.
git: Git,
/// The path to the checked out repository.
path: PathBuf,
}
impl From<Fetch> for Git {
fn from(fetch: Fetch) -> Self {
fetch.git
}
}
impl From<Fetch> for PathBuf {
fn from(fetch: Fetch) -> Self {
fetch.path
}
}

View file

@ -16,6 +16,7 @@ pep508_rs = { path = "../pep508-rs" }
platform-host = { path = "../platform-host" }
platform-tags = { path = "../platform-tags" }
pubgrub = { path = "../../vendor/pubgrub" }
puffin-cache = { path = "../puffin-cache" }
puffin-client = { path = "../puffin-client" }
puffin-distribution = { path = "../puffin-distribution" }
puffin-normalize = { path = "../puffin-normalize" }

View file

@ -5,12 +5,13 @@ use fs_err::tokio as fs;
use tempfile::tempdir_in;
use tokio_util::compat::FuturesAsyncReadCompatExt;
use tracing::debug;
use url::Url;
use distribution_filename::WheelFilename;
use platform_tags::Tags;
use puffin_client::RegistryClient;
use puffin_distribution::RemoteDistributionRef;
use puffin_git::GitSource;
use puffin_git::{Git, GitSource};
use puffin_package::pypi_types::Metadata21;
use puffin_traits::BuildContext;
@ -72,7 +73,9 @@ impl<'a, T: BuildContext> SourceDistributionFetcher<'a, T> {
let git_dir = self.0.cache().join(GIT_CACHE);
let source = GitSource::new(git, git_dir);
tokio::task::spawn_blocking(move || source.fetch()).await??
tokio::task::spawn_blocking(move || source.fetch())
.await??
.into()
}
};
@ -100,4 +103,28 @@ impl<'a, T: BuildContext> SourceDistributionFetcher<'a, T> {
debug!("Finished building: {distribution}");
Ok(metadata21)
}
/// Given a URL dependency for a source distribution, return a precise variant, if possible.
///
/// For example, given a Git dependency with a reference to a branch or tag, return a URL
/// with a precise reference to the current commit of that branch or tag.
pub(crate) async fn precise(&self, url: &Url) -> Result<Option<Url>> {
let Some(url) = url.as_str().strip_prefix("git+") else {
return Ok(None);
};
// Fetch the precise SHA of the Git reference (which could be a branch, a tag, a partial
// commit, etc.).
let url = Url::parse(url)?;
let git = Git::try_from(url)?;
let git_dir = self.0.cache().join(GIT_CACHE);
let source = GitSource::new(git, git_dir);
let precise = tokio::task::spawn_blocking(move || source.fetch()).await??;
// TODO(charlie): Avoid this double-parse by encoding the source kind separately from the
// URL.
let url = Url::from(Git::from(precise));
let url = Url::parse(&format!("{}{}", "git+", url.as_str()))?;
Ok(Some(url))
}
}

View file

@ -6,6 +6,8 @@ use petgraph::visit::EdgeRef;
use pubgrub::range::Range;
use pubgrub::solver::{Kind, State};
use pubgrub::type_aliases::SelectedDependencies;
use url::Url;
use waitmap::WaitMap;
use pep440_rs::{Version, VersionSpecifier, VersionSpecifiers};
use pep508_rs::{Requirement, VersionOrUrl};
@ -56,6 +58,7 @@ impl Graph {
pub fn from_state(
selection: &SelectedDependencies<PubGrubPackage, PubGrubVersion>,
pins: &FxHashMap<PackageName, FxHashMap<Version, File>>,
redirects: &WaitMap<Url, Url>,
state: &State<PubGrubPackage, Range<PubGrubVersion>, PubGrubPriority>,
) -> Self {
// TODO(charlie): petgraph is a really heavy and unnecessary dependency here. We should
@ -81,6 +84,9 @@ impl Graph {
inverse.insert(package_name, index);
}
PubGrubPackage::Package(package_name, None, Some(url)) => {
let url = redirects
.get(url)
.map_or_else(|| url.clone(), |url| url.value().clone());
let pinned_package =
RemoteDistribution::from_url(package_name.clone(), url.clone());

View file

@ -182,7 +182,12 @@ impl<'a, Context: BuildContext + Sync> Resolver<'a, Context> {
})
else {
let selection = state.partial_solution.extract_solution();
return Ok(Graph::from_state(&selection, &pins, &state));
return Ok(Graph::from_state(
&selection,
&pins,
&self.index.redirects,
&state,
));
};
next = highest_priority_pkg;
@ -572,7 +577,7 @@ impl<'a, Context: BuildContext + Sync> Resolver<'a, Context> {
while let Some(response) = response_stream.next().await {
match response? {
Response::Package(package_name, metadata) => {
trace!("Received package metadata for: {}", package_name);
trace!("Received package metadata for: {package_name}");
// Group the distributions by version and kind, discarding any incompatible
// distributions.
@ -623,13 +628,19 @@ impl<'a, Context: BuildContext + Sync> Resolver<'a, Context> {
.versions
.insert(file.hashes.sha256.clone(), metadata);
}
Response::WheelUrl(url, metadata) => {
trace!("Received remote wheel metadata for: {}", url);
Response::WheelUrl(url, precise, metadata) => {
trace!("Received remote wheel metadata for: {url}");
self.index.versions.insert(url.to_string(), metadata);
if let Some(precise) = precise {
self.index.redirects.insert(url, precise);
}
}
Response::SdistUrl(url, metadata) => {
trace!("Received remote source distribution metadata for: {}", url);
Response::SdistUrl(url, precise, metadata) => {
trace!("Received remote source distribution metadata for: {url}");
self.index.versions.insert(url.to_string(), metadata);
if let Some(precise) = precise {
self.index.redirects.insert(url, precise);
}
}
}
}
@ -687,7 +698,20 @@ impl<'a, Context: BuildContext + Sync> Resolver<'a, Context> {
// Build a source distribution from a remote URL, returning its metadata.
Request::SdistUrl(package_name, url) => {
let fetcher = SourceDistributionFetcher::new(self.build_context);
let distribution = RemoteDistributionRef::from_url(&package_name, &url);
let precise =
fetcher
.precise(&url)
.await
.map_err(|err| ResolveError::UrlDistribution {
url: url.clone(),
err,
})?;
let distribution = RemoteDistributionRef::from_url(
&package_name,
precise.as_ref().unwrap_or(&url),
);
let metadata = match fetcher.find_dist_info(&distribution, self.tags) {
Ok(Some(metadata)) => {
debug!("Found source distribution metadata in cache: {url}");
@ -716,7 +740,7 @@ impl<'a, Context: BuildContext + Sync> Resolver<'a, Context> {
})?
}
};
Ok(Response::SdistUrl(url, metadata))
Ok(Response::SdistUrl(url, precise, metadata))
}
// Fetch wheel metadata from a remote URL.
Request::WheelUrl(package_name, url) => {
@ -748,7 +772,7 @@ impl<'a, Context: BuildContext + Sync> Resolver<'a, Context> {
})?
}
};
Ok(Response::WheelUrl(url, metadata))
Ok(Response::WheelUrl(url, None, metadata))
}
}
}
@ -810,9 +834,9 @@ enum Response {
/// The returned metadata for a source distribution hosted on a registry.
Sdist(SdistFile, Metadata21),
/// The returned metadata for a wheel hosted on a remote URL.
WheelUrl(Url, Metadata21),
WheelUrl(Url, Option<Url>, Metadata21),
/// The returned metadata for a source distribution hosted on a remote URL.
SdistUrl(Url, Metadata21),
SdistUrl(Url, Option<Url>, Metadata21),
}
pub(crate) type VersionMap = BTreeMap<PubGrubVersion, DistributionFile>;
@ -850,6 +874,9 @@ struct Index {
/// A map from wheel SHA or URL to the metadata for that wheel.
versions: WaitMap<String, Metadata21>,
/// A map from source URL to precise URL.
redirects: WaitMap<Url, Url>,
}
impl Default for Index {
@ -857,6 +884,7 @@ impl Default for Index {
Self {
packages: WaitMap::new(),
versions: WaitMap::new(),
redirects: WaitMap::new(),
}
}
}