mirror of
https://github.com/astral-sh/uv.git
synced 2025-07-07 13:25:00 +00:00
Add support for Git dependencies (#283)
## Summary
This PR adds support for Git dependencies, like:
```
flask @ git+https://github.com/pallets/flask.git
```
Right now, they're only supported in the resolver (and not the
installer), since the installer doesn't yet support source distributions
at all.
The general approach here is based on Cargo's Git implementation.
Specifically, I adapted Cargo's
[`git`](23eb492cf9/src/cargo/sources/git/mod.rs
)
module to perform the cloning, which is based on `libgit2`.
As compared to Cargo's implementation, I made the following changes:
- Removed any unnecessary code.
- Fixed any Clippy errors for our stricter ruleset.
- Removed the dependency on `curl`, in favor of `reqwest` which we use
elsewhere.
- Removed the ability to use `gix`. Cargo allows the use of `gix` as an
experimental flag, but it only supports a small subset of the
operations. When Cargo fully adopts `gix`, we should plan to do the
same.
- Removed Cargo's host key checking. We need to re-add this! I'll do it
shortly.
- Removed Cargo's progress bars. We should re-add this too, but we use
`indicatif` and Cargo had their own thing.
There are a few follow-ups to consider:
- Adding support in the installer.
- When we lock, we should write out the Git URL that includes the exact
SHA. This lets us cache in perpetuity and avoids dependencies changing
without re-locking.
- When we resolve, we should _always_ try to refresh Git dependencies.
(Right now, we skip if the wheel was already built.)
I'll work on the latter two in follow-up PRs.
Closes #202.
This commit is contained in:
parent
4adaa9a700
commit
62c474d880
15 changed files with 2162 additions and 32 deletions
168
Cargo.lock
generated
168
Cargo.lock
generated
|
@ -362,12 +362,35 @@ dependencies = [
|
|||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cargo-util"
|
||||
version = "0.2.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "77042b5b585f701f1cfb4b6b12ebc02b9b0cefbc8dcce235906b6bf376d4245d"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"core-foundation",
|
||||
"filetime",
|
||||
"hex",
|
||||
"jobserver",
|
||||
"libc",
|
||||
"miow",
|
||||
"same-file",
|
||||
"sha2",
|
||||
"shell-escape",
|
||||
"tempfile",
|
||||
"tracing",
|
||||
"walkdir",
|
||||
"windows-sys 0.48.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cc"
|
||||
version = "1.0.83"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f1174fb0b6ec23863f8b971027804a42614e347eafb0a95bf0b12cdae21fc4d0"
|
||||
dependencies = [
|
||||
"jobserver",
|
||||
"libc",
|
||||
]
|
||||
|
||||
|
@ -956,6 +979,27 @@ version = "0.28.0"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6fb8d784f27acf97159b40fc4db5ecd8aa23b9ad5ef69cdd136d3bc80665f0c0"
|
||||
|
||||
[[package]]
|
||||
name = "git2"
|
||||
version = "0.18.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fbf97ba92db08df386e10c8ede66a2a0369bd277090afd8710e19e38de9ec0cd"
|
||||
dependencies = [
|
||||
"bitflags 2.4.1",
|
||||
"libc",
|
||||
"libgit2-sys",
|
||||
"log",
|
||||
"openssl-probe",
|
||||
"openssl-sys",
|
||||
"url",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "glob"
|
||||
version = "0.3.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b"
|
||||
|
||||
[[package]]
|
||||
name = "globset"
|
||||
version = "0.4.13"
|
||||
|
@ -1412,6 +1456,15 @@ version = "1.0.9"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "af150ab688ff2122fcef229be89cb50dd66af9e01a4ff320cc137eecc9bacc38"
|
||||
|
||||
[[package]]
|
||||
name = "jobserver"
|
||||
version = "0.1.27"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8c37f63953c4c63420ed5fd3d6d398c719489b9f872b9fa683262f8edd363c7d"
|
||||
dependencies = [
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "js-sys"
|
||||
version = "0.3.64"
|
||||
|
@ -1433,6 +1486,46 @@ version = "0.2.149"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a08173bc88b7955d1b3145aa561539096c421ac8debde8cbc3612ec635fee29b"
|
||||
|
||||
[[package]]
|
||||
name = "libgit2-sys"
|
||||
version = "0.16.1+1.7.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f2a2bb3680b094add03bb3732ec520ece34da31a8cd2d633d1389d0f0fb60d0c"
|
||||
dependencies = [
|
||||
"cc",
|
||||
"libc",
|
||||
"libssh2-sys",
|
||||
"libz-sys",
|
||||
"openssl-sys",
|
||||
"pkg-config",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "libssh2-sys"
|
||||
version = "0.3.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2dc8a030b787e2119a731f1951d6a773e2280c660f8ec4b0f5e1505a386e71ee"
|
||||
dependencies = [
|
||||
"cc",
|
||||
"libc",
|
||||
"libz-sys",
|
||||
"openssl-sys",
|
||||
"pkg-config",
|
||||
"vcpkg",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "libz-sys"
|
||||
version = "1.1.12"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d97137b25e321a73eef1418d1d5d2eda4d77e12813f8e6dead84bc52c5870a7b"
|
||||
dependencies = [
|
||||
"cc",
|
||||
"libc",
|
||||
"pkg-config",
|
||||
"vcpkg",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "line-wrap"
|
||||
version = "0.1.1"
|
||||
|
@ -1582,6 +1675,15 @@ dependencies = [
|
|||
"windows-sys 0.48.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "miow"
|
||||
version = "0.6.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "359f76430b20a79f9e20e115b3428614e654f04fab314482fc0fda0ebd3c6044"
|
||||
dependencies = [
|
||||
"windows-sys 0.48.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "normalize-line-endings"
|
||||
version = "0.3.0"
|
||||
|
@ -1638,6 +1740,34 @@ version = "1.18.0"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d"
|
||||
|
||||
[[package]]
|
||||
name = "openssl-probe"
|
||||
version = "0.1.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ff011a302c396a5197692431fc1948019154afc178baf7d8e37367442a4601cf"
|
||||
|
||||
[[package]]
|
||||
name = "openssl-src"
|
||||
version = "300.1.6+3.1.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "439fac53e092cd7442a3660c85dde4643ab3b5bd39040912388dcdabf6b88085"
|
||||
dependencies = [
|
||||
"cc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "openssl-sys"
|
||||
version = "0.9.93"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "db4d56a4c0478783083cfafcc42493dd4a981d41669da64b4572a2a089b51b1d"
|
||||
dependencies = [
|
||||
"cc",
|
||||
"libc",
|
||||
"openssl-src",
|
||||
"pkg-config",
|
||||
"vcpkg",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "option-ext"
|
||||
version = "0.2.0"
|
||||
|
@ -1778,6 +1908,12 @@ version = "0.1.0"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184"
|
||||
|
||||
[[package]]
|
||||
name = "pkg-config"
|
||||
version = "0.3.27"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "26072860ba924cbfa98ea39c8c19b4dd6a4a25423dbdf219c1eca91aa0cf6964"
|
||||
|
||||
[[package]]
|
||||
name = "plain"
|
||||
version = "0.2.3"
|
||||
|
@ -2110,6 +2246,25 @@ dependencies = [
|
|||
"url",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "puffin-git"
|
||||
version = "0.0.1"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"cargo-util",
|
||||
"git2",
|
||||
"glob",
|
||||
"hex",
|
||||
"once_cell",
|
||||
"puffin-cache",
|
||||
"rand",
|
||||
"reqwest",
|
||||
"serde",
|
||||
"tokio",
|
||||
"tracing",
|
||||
"url",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "puffin-installer"
|
||||
version = "0.0.1"
|
||||
|
@ -2218,6 +2373,7 @@ dependencies = [
|
|||
"pubgrub",
|
||||
"puffin-client",
|
||||
"puffin-distribution",
|
||||
"puffin-git",
|
||||
"puffin-interpreter",
|
||||
"puffin-normalize",
|
||||
"puffin-package",
|
||||
|
@ -2841,6 +2997,12 @@ dependencies = [
|
|||
"lazy_static",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "shell-escape"
|
||||
version = "0.1.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "45bb67a18fa91266cc7807181f62f9178a6873bfad7dc788c42e6430db40184f"
|
||||
|
||||
[[package]]
|
||||
name = "similar"
|
||||
version = "2.3.0"
|
||||
|
@ -3505,6 +3667,12 @@ version = "0.1.0"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "830b7e5d4d90034032940e4ace0d9a9a057e7a45cd94e6c007832e39edb82f6d"
|
||||
|
||||
[[package]]
|
||||
name = "vcpkg"
|
||||
version = "0.2.15"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426"
|
||||
|
||||
[[package]]
|
||||
name = "version_check"
|
||||
version = "0.9.4"
|
||||
|
|
|
@ -29,6 +29,7 @@ fs-err = { version = "2.9.0" }
|
|||
fs2 = { version = "0.4.3" }
|
||||
futures = { version = "0.3.28" }
|
||||
fxhash = { version = "0.2.1" }
|
||||
glob = { version = "0.3.1" }
|
||||
goblin = { version = "0.7.1" }
|
||||
hex = { version = "0.4.3" }
|
||||
http-cache-reqwest = { version = "0.11.3" }
|
||||
|
@ -42,6 +43,7 @@ petgraph = { version = "0.6.4" }
|
|||
platform-info = { version = "2.0.2" }
|
||||
plist = { version = "1.5.0" }
|
||||
pyproject-toml = { version = "0.7.0" }
|
||||
rand = { version = "0.8.5" }
|
||||
rayon = { version = "1.8.0" }
|
||||
reflink-copy = { version = "0.1.10" }
|
||||
regex = { version = "1.9.6" }
|
||||
|
|
|
@ -117,7 +117,8 @@ pub struct SourceDistributionBuilder {
|
|||
}
|
||||
|
||||
impl SourceDistributionBuilder {
|
||||
/// Extract the source distribution and create a venv with the required packages
|
||||
/// Create a virtual environment in which to build a source distribution, extracting the
|
||||
/// contents from an archive if necessary.
|
||||
pub async fn setup(
|
||||
sdist: &Path,
|
||||
interpreter_info: &InterpreterInfo,
|
||||
|
@ -126,9 +127,13 @@ impl SourceDistributionBuilder {
|
|||
let temp_dir = tempdir()?;
|
||||
|
||||
// TODO(konstin): Parse and verify filenames
|
||||
debug!("Unpacking for build {}", sdist.display());
|
||||
let extracted = temp_dir.path().join("extracted");
|
||||
let source_tree = extract_archive(sdist, &extracted)?;
|
||||
let source_tree = if fs::metadata(sdist)?.is_dir() {
|
||||
sdist.to_path_buf()
|
||||
} else {
|
||||
debug!("Unpacking for build: {}", sdist.display());
|
||||
let extracted = temp_dir.path().join("extracted");
|
||||
extract_archive(sdist, &extracted)?
|
||||
};
|
||||
|
||||
// Check if we have a PEP 517 build, a legacy setup.py, or an edge case
|
||||
let build_system = if source_tree.join("pyproject.toml").is_file() {
|
||||
|
|
|
@ -627,6 +627,46 @@ fn compile_sdist_url_dependency() -> Result<()> {
|
|||
Ok(())
|
||||
}
|
||||
|
||||
/// Resolve a specific Flask source distribution via a Git HTTPS dependency.
|
||||
#[test]
|
||||
fn compile_git_https_dependency() -> Result<()> {
|
||||
let temp_dir = assert_fs::TempDir::new()?;
|
||||
let cache_dir = assert_fs::TempDir::new()?;
|
||||
let venv = temp_dir.child(".venv");
|
||||
|
||||
Command::new(get_cargo_bin(BIN_NAME))
|
||||
.arg("venv")
|
||||
.arg(venv.as_os_str())
|
||||
.arg("--cache-dir")
|
||||
.arg(cache_dir.path())
|
||||
.current_dir(&temp_dir)
|
||||
.assert()
|
||||
.success();
|
||||
venv.assert(predicates::path::is_dir());
|
||||
|
||||
let requirements_in = temp_dir.child("requirements.in");
|
||||
requirements_in.touch()?;
|
||||
requirements_in.write_str("flask @ git+https://github.com/pallets/flask.git")?;
|
||||
|
||||
insta::with_settings!({
|
||||
filters => vec![
|
||||
(r"(\d|\.)+(ms|s)", "[TIME]"),
|
||||
(r"# .* pip-compile", "# [BIN_PATH] pip-compile"),
|
||||
(r"--cache-dir .*", "--cache-dir [CACHE_DIR]"),
|
||||
]
|
||||
}, {
|
||||
assert_cmd_snapshot!(Command::new(get_cargo_bin(BIN_NAME))
|
||||
.arg("pip-compile")
|
||||
.arg("requirements.in")
|
||||
.arg("--cache-dir")
|
||||
.arg(cache_dir.path())
|
||||
.env("VIRTUAL_ENV", venv.as_os_str())
|
||||
.current_dir(&temp_dir));
|
||||
});
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Request Flask, but include a URL dependency for Werkzeug, which should avoid adding a
|
||||
/// duplicate dependency from `PyPI`.
|
||||
#[test]
|
||||
|
|
|
@ -0,0 +1,36 @@
|
|||
---
|
||||
source: crates/puffin-cli/tests/pip_compile.rs
|
||||
info:
|
||||
program: puffin
|
||||
args:
|
||||
- pip-compile
|
||||
- requirements.in
|
||||
- "--cache-dir"
|
||||
- /var/folders/nt/6gf2v7_s3k13zq_t3944rwz40000gn/T/.tmpbvYz3u
|
||||
env:
|
||||
VIRTUAL_ENV: /var/folders/nt/6gf2v7_s3k13zq_t3944rwz40000gn/T/.tmpHYkK5F/.venv
|
||||
---
|
||||
success: true
|
||||
exit_code: 0
|
||||
----- stdout -----
|
||||
# This file was autogenerated by Puffin v0.0.1 via the following command:
|
||||
# [BIN_PATH] pip-compile requirements.in --cache-dir [CACHE_DIR]
|
||||
blinker==1.7.0
|
||||
# via flask
|
||||
click==8.1.7
|
||||
# via flask
|
||||
flask @ git+https://github.com/pallets/flask.git
|
||||
itsdangerous==2.1.2
|
||||
# via flask
|
||||
jinja2==3.1.2
|
||||
# via flask
|
||||
markupsafe==2.1.3
|
||||
# via
|
||||
# jinja2
|
||||
# werkzeug
|
||||
werkzeug==3.0.1
|
||||
# via flask
|
||||
|
||||
----- stderr -----
|
||||
Resolved 7 packages in [TIME]
|
||||
|
30
crates/puffin-git/Cargo.toml
Normal file
30
crates/puffin-git/Cargo.toml
Normal file
|
@ -0,0 +1,30 @@
|
|||
[package]
|
||||
name = "puffin-git"
|
||||
version = "0.0.1"
|
||||
edition = { workspace = true }
|
||||
rust-version = { workspace = true }
|
||||
homepage = { workspace = true }
|
||||
documentation = { workspace = true }
|
||||
repository = { workspace = true }
|
||||
authors = { workspace = true }
|
||||
license = { workspace = true }
|
||||
|
||||
[dependencies]
|
||||
puffin-cache = { path = "../puffin-cache" }
|
||||
|
||||
anyhow = { workspace = true }
|
||||
cargo-util = { version = "0.2.6" }
|
||||
git2 = { version = "0.18.1" }
|
||||
glob = { workspace = true }
|
||||
hex = { workspace = true }
|
||||
once_cell = { workspace = true }
|
||||
rand = { workspace = true }
|
||||
serde = { workspace = true }
|
||||
tracing = { workspace = true }
|
||||
url = { workspace = true }
|
||||
reqwest = { workspace = true, features = ["blocking"] }
|
||||
tokio.workspace = true
|
||||
|
||||
[features]
|
||||
vendored-libgit2 = ["git2/vendored-libgit2"]
|
||||
vendored-openssl = ["git2/vendored-openssl"]
|
1365
crates/puffin-git/src/git.rs
Normal file
1365
crates/puffin-git/src/git.rs
Normal file
File diff suppressed because it is too large
Load diff
73
crates/puffin-git/src/lib.rs
Normal file
73
crates/puffin-git/src/lib.rs
Normal file
|
@ -0,0 +1,73 @@
|
|||
use url::Url;
|
||||
|
||||
pub use self::source::GitSource;
|
||||
|
||||
mod git;
|
||||
mod source;
|
||||
mod util;
|
||||
|
||||
/// A reference to a Git repository.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Git {
|
||||
/// The URL of the Git repository, with any query parameters and fragments removed.
|
||||
url: Url,
|
||||
/// The reference to the commit to use, which could be a branch, tag or revision.
|
||||
reference: GitReference,
|
||||
/// The precise commit to use, if known.
|
||||
precise: Option<git2::Oid>,
|
||||
}
|
||||
|
||||
impl TryFrom<Url> for Git {
|
||||
type Error = anyhow::Error;
|
||||
|
||||
/// Initialize a [`Git`] source from a URL.
|
||||
fn try_from(mut url: Url) -> Result<Self, Self::Error> {
|
||||
let mut reference = GitReference::DefaultBranch;
|
||||
for (k, v) in url.query_pairs() {
|
||||
match &k[..] {
|
||||
// Map older 'ref' to branch.
|
||||
"branch" | "ref" => reference = GitReference::Branch(v.into_owned()),
|
||||
"rev" => reference = GitReference::Rev(v.into_owned()),
|
||||
"tag" => reference = GitReference::Tag(v.into_owned()),
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
let precise = url.fragment().map(git2::Oid::from_str).transpose()?;
|
||||
url.set_fragment(None);
|
||||
url.set_query(None);
|
||||
|
||||
Ok(Self {
|
||||
url,
|
||||
reference,
|
||||
precise,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Display for Git {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "{}", self.url)
|
||||
}
|
||||
}
|
||||
|
||||
/// Information to find a specific commit in a Git repository.
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum GitReference {
|
||||
/// From a tag.
|
||||
Tag(String),
|
||||
/// From a branch.
|
||||
Branch(String),
|
||||
/// From a specific revision. Can be a commit hash (either short or full),
|
||||
/// or a named reference like `refs/pull/493/head`.
|
||||
Rev(String),
|
||||
/// The default branch of the repository, the reference named `HEAD`.
|
||||
DefaultBranch,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub enum FetchStrategy {
|
||||
/// Fetch Git repositories using libgit2.
|
||||
Libgit2,
|
||||
/// Fetch Git repositories using the `git` CLI.
|
||||
Cli,
|
||||
}
|
91
crates/puffin-git/src/source.rs
Normal file
91
crates/puffin-git/src/source.rs
Normal file
|
@ -0,0 +1,91 @@
|
|||
//! Git support is derived from Cargo's implementation.
|
||||
//! Cargo is dual-licensed under either Apache 2.0 or MIT, at the user's choice.
|
||||
//! Source: <https://github.com/rust-lang/cargo/blob/23eb492cf920ce051abfc56bbaf838514dc8365c/src/cargo/sources/git/source.rs>
|
||||
use std::path::PathBuf;
|
||||
|
||||
use anyhow::Result;
|
||||
use reqwest::Client;
|
||||
use tracing::debug;
|
||||
|
||||
use puffin_cache::{digest, CanonicalUrl};
|
||||
|
||||
use crate::git::GitRemote;
|
||||
use crate::{FetchStrategy, Git, GitReference};
|
||||
|
||||
/// A remote Git source that can be checked out locally.
|
||||
pub struct GitSource {
|
||||
/// The git remote which we're going to fetch from.
|
||||
remote: GitRemote,
|
||||
/// The Git reference from the manifest file.
|
||||
manifest_reference: GitReference,
|
||||
/// The revision which a git source is locked to.
|
||||
/// This is expected to be set after the Git repository is fetched.
|
||||
locked_rev: Option<git2::Oid>,
|
||||
/// The identifier of this source for Cargo's Git cache directory.
|
||||
/// See [`ident`] for more.
|
||||
ident: String,
|
||||
/// The HTTP client to use for fetching.
|
||||
client: Client,
|
||||
/// The fetch strategy to use when cloning.
|
||||
strategy: FetchStrategy,
|
||||
/// The path to the Git source database.
|
||||
git: PathBuf,
|
||||
}
|
||||
|
||||
impl GitSource {
|
||||
pub fn new(reference: Git, git: PathBuf) -> Self {
|
||||
Self {
|
||||
remote: GitRemote::new(&reference.url),
|
||||
manifest_reference: reference.reference,
|
||||
locked_rev: reference.precise,
|
||||
ident: digest(&CanonicalUrl::new(&reference.url)),
|
||||
client: Client::new(),
|
||||
strategy: FetchStrategy::Libgit2,
|
||||
git,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn fetch(self) -> Result<PathBuf> {
|
||||
// The path to the repo, within the Git database.
|
||||
let db_path = self.git.join("db").join(&self.ident);
|
||||
|
||||
let (db, actual_rev) = match (self.locked_rev, self.remote.db_at(&db_path).ok()) {
|
||||
// If we have a locked revision, and we have a preexisting database
|
||||
// which has that revision, then no update needs to happen.
|
||||
(Some(rev), Some(db)) if db.contains(rev) => (db, rev),
|
||||
|
||||
// ... otherwise we use this state to update the git database. Note
|
||||
// that we still check for being offline here, for example in the
|
||||
// situation that we have a locked revision but the database
|
||||
// doesn't have it.
|
||||
(locked_rev, db) => {
|
||||
debug!("Updating Git source: `{:?}`", self.remote);
|
||||
|
||||
self.remote.checkout(
|
||||
&db_path,
|
||||
db,
|
||||
&self.manifest_reference,
|
||||
locked_rev,
|
||||
self.strategy,
|
||||
&self.client,
|
||||
)?
|
||||
}
|
||||
};
|
||||
|
||||
// Don’t use the full hash, in order to contribute less to reaching the
|
||||
// path length limit on Windows.
|
||||
let short_id = db.to_short_id(actual_rev)?;
|
||||
|
||||
// Check out `actual_rev` from the database to a scoped location on the
|
||||
// filesystem. This will use hard links and such to ideally make the
|
||||
// checkout operation here pretty fast.
|
||||
let checkout_path = self
|
||||
.git
|
||||
.join("checkouts")
|
||||
.join(&self.ident)
|
||||
.join(short_id.as_str());
|
||||
db.copy_to(actual_rev, &checkout_path, self.strategy, &self.client)?;
|
||||
|
||||
Ok(checkout_path)
|
||||
}
|
||||
}
|
45
crates/puffin-git/src/util/errors.rs
Normal file
45
crates/puffin-git/src/util/errors.rs
Normal file
|
@ -0,0 +1,45 @@
|
|||
//! Git support is derived from Cargo's implementation.
|
||||
//! Cargo is dual-licensed under either Apache 2.0 or MIT, at the user's choice.
|
||||
//! Source: <https://github.com/rust-lang/cargo/blob/23eb492cf920ce051abfc56bbaf838514dc8365c/src/cargo/util/errors.rs>
|
||||
use std::fmt::{self, Write};
|
||||
|
||||
use super::truncate_with_ellipsis;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub(crate) struct HttpNotSuccessful {
|
||||
pub(crate) code: u32,
|
||||
pub(crate) url: String,
|
||||
pub(crate) ip: Option<String>,
|
||||
pub(crate) body: Vec<u8>,
|
||||
}
|
||||
|
||||
impl HttpNotSuccessful {
|
||||
fn render(&self) -> String {
|
||||
let mut result = String::new();
|
||||
let body = std::str::from_utf8(&self.body).map_or_else(
|
||||
|_| format!("[{} non-utf8 bytes]", self.body.len()),
|
||||
|s| truncate_with_ellipsis(s, 512),
|
||||
);
|
||||
|
||||
write!(
|
||||
result,
|
||||
"failed to get successful HTTP response from `{}`",
|
||||
self.url
|
||||
)
|
||||
.unwrap();
|
||||
if let Some(ip) = &self.ip {
|
||||
write!(result, " ({ip})").unwrap();
|
||||
}
|
||||
writeln!(result, ", got {}", self.code).unwrap();
|
||||
write!(result, "body:\n{body}").unwrap();
|
||||
result
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for HttpNotSuccessful {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
f.write_str(&self.render())
|
||||
}
|
||||
}
|
||||
|
||||
impl std::error::Error for HttpNotSuccessful {}
|
17
crates/puffin-git/src/util/mod.rs
Normal file
17
crates/puffin-git/src/util/mod.rs
Normal file
|
@ -0,0 +1,17 @@
|
|||
//! Git support is derived from Cargo's implementation.
|
||||
//! Cargo is dual-licensed under either Apache 2.0 or MIT, at the user's choice.
|
||||
//! Source: <https://github.com/rust-lang/cargo/blob/23eb492cf920ce051abfc56bbaf838514dc8365c/src/cargo/util/mod.rs>
|
||||
pub(crate) mod errors;
|
||||
pub(crate) mod retry;
|
||||
|
||||
pub(crate) fn truncate_with_ellipsis(s: &str, max_width: usize) -> String {
|
||||
// We should truncate at grapheme-boundary and compute character-widths,
|
||||
// yet the dependencies on unicode-segmentation and unicode-width are
|
||||
// not worth it.
|
||||
let mut chars = s.chars();
|
||||
let mut prefix = (&mut chars).take(max_width - 1).collect::<String>();
|
||||
if chars.next().is_some() {
|
||||
prefix.push('…');
|
||||
}
|
||||
prefix
|
||||
}
|
187
crates/puffin-git/src/util/retry.rs
Normal file
187
crates/puffin-git/src/util/retry.rs
Normal file
|
@ -0,0 +1,187 @@
|
|||
//! Utilities for retrying a network operation.
|
||||
//!
|
||||
//! Some network errors are considered "spurious", meaning it is not a real
|
||||
//! error (such as a 404 not found) and is likely a transient error (like a
|
||||
//! bad network connection) that we can hope will resolve itself shortly. The
|
||||
//! [`Retry`] type offers a way to repeatedly perform some kind of network
|
||||
//! operation with a delay if it detects one of these possibly transient
|
||||
//! errors.
|
||||
//!
|
||||
//! This supports errors from [`git2`], [`reqwest`], and [`HttpNotSuccessful`]
|
||||
//! 5xx HTTP errors.
|
||||
//!
|
||||
//! The number of retries can be configured by the user via the `net.retry`
|
||||
//! config option. This indicates the number of times to retry the operation
|
||||
//! (default 3 times for a total of 4 attempts).
|
||||
//!
|
||||
//! There are hard-coded constants that indicate how long to sleep between
|
||||
//! retries. The constants are tuned to balance a few factors, such as the
|
||||
//! responsiveness to the user (we don't want cargo to hang for too long
|
||||
//! retrying things), and accommodating things like Cloudfront's default
|
||||
//! negative TTL of 10 seconds (if Cloudfront gets a 5xx error for whatever
|
||||
//! reason it won't try to fetch again for 10 seconds).
|
||||
//!
|
||||
//! The timeout also implements a primitive form of random jitter. This is so
|
||||
//! that if multiple requests fail at the same time that they don't all flood
|
||||
//! the server at the same time when they are retried. This jitter still has
|
||||
//! some clumping behavior, but should be good enough.
|
||||
//!
|
||||
//! [`Retry`] is the core type for implementing retry logic. The
|
||||
//! [`Retry::try`] method can be called with a callback, and it will
|
||||
//! indicate if it needs to be called again sometime in the future if there
|
||||
//! was a possibly transient error. The caller is responsible for sleeping the
|
||||
//! appropriate amount of time and then calling [`Retry::try`] again.
|
||||
//!
|
||||
//! [`with_retry`] is a convenience function that will create a [`Retry`] and
|
||||
//! handle repeatedly running a callback until it succeeds, or it runs out of
|
||||
//! retries.
|
||||
//!
|
||||
//! Some interesting resources about retries:
|
||||
//! - <https://aws.amazon.com/blogs/architecture/exponential-backoff-and-jitter/>
|
||||
//! - <https://en.wikipedia.org/wiki/Exponential_backoff>
|
||||
//! - <https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Retry-After>
|
||||
|
||||
//! Git support is derived from Cargo's implementation.
|
||||
//! Cargo is dual-licensed under either Apache 2.0 or MIT, at the user's choice.
|
||||
//! Source: <https://github.com/rust-lang/cargo/blob/23eb492cf920ce051abfc56bbaf838514dc8365c/src/cargo/util/network/retry.rs>
|
||||
use std::cmp::min;
|
||||
use std::time::Duration;
|
||||
|
||||
use anyhow::{Error, Result};
|
||||
use rand::Rng;
|
||||
use tracing::warn;
|
||||
|
||||
use crate::util::errors::HttpNotSuccessful;
|
||||
|
||||
/// State for managing retrying a network operation.
|
||||
pub(crate) struct Retry {
|
||||
/// The number of failed attempts that have been done so far.
|
||||
///
|
||||
/// Starts at 0, and increases by one each time an attempt fails.
|
||||
retries: u64,
|
||||
/// The maximum number of times the operation should be retried.
|
||||
///
|
||||
/// 0 means it should never retry.
|
||||
max_retries: u64,
|
||||
}
|
||||
|
||||
/// The result of attempting some operation via [`Retry::try`].
|
||||
pub(crate) enum RetryResult<T> {
|
||||
/// The operation was successful.
|
||||
///
|
||||
/// The wrapped value is the return value of the callback function.
|
||||
Success(T),
|
||||
/// The operation was an error, and it should not be tried again.
|
||||
Err(Error),
|
||||
/// The operation failed, and should be tried again in the future.
|
||||
///
|
||||
/// The wrapped value is the number of milliseconds to wait before trying
|
||||
/// again. The caller is responsible for waiting this long and then
|
||||
/// calling [`Retry::try`] again.
|
||||
Retry(u64),
|
||||
}
|
||||
|
||||
/// Maximum amount of time a single retry can be delayed (milliseconds).
|
||||
const MAX_RETRY_SLEEP_MS: u64 = 10 * 1000;
|
||||
/// The minimum initial amount of time a retry will be delayed (milliseconds).
|
||||
///
|
||||
/// The actual amount of time will be a random value above this.
|
||||
const INITIAL_RETRY_SLEEP_BASE_MS: u64 = 500;
|
||||
/// The maximum amount of additional time the initial retry will take (milliseconds).
|
||||
///
|
||||
/// The initial delay will be [`INITIAL_RETRY_SLEEP_BASE_MS`] plus a random range
|
||||
/// from 0 to this value.
|
||||
const INITIAL_RETRY_JITTER_MS: u64 = 1000;
|
||||
|
||||
impl Retry {
|
||||
pub(crate) fn new() -> Retry {
|
||||
Retry {
|
||||
retries: 0,
|
||||
max_retries: 3,
|
||||
}
|
||||
}
|
||||
|
||||
/// Calls the given callback, and returns a [`RetryResult`] which
|
||||
/// indicates whether or not this needs to be called again at some point
|
||||
/// in the future to retry the operation if it failed.
|
||||
pub(crate) fn r#try<T>(&mut self, f: impl FnOnce() -> Result<T>) -> RetryResult<T> {
|
||||
match f() {
|
||||
Err(ref err) if maybe_spurious(err) && self.retries < self.max_retries => {
|
||||
let err_msg = err.downcast_ref::<HttpNotSuccessful>().map_or_else(
|
||||
|| err.root_cause().to_string(),
|
||||
HttpNotSuccessful::to_string,
|
||||
);
|
||||
warn!(
|
||||
"Spurious network error ({} tries remaining): {err_msg}",
|
||||
self.max_retries - self.retries,
|
||||
);
|
||||
self.retries += 1;
|
||||
RetryResult::Retry(self.next_sleep_ms())
|
||||
}
|
||||
Err(e) => RetryResult::Err(e),
|
||||
Ok(r) => RetryResult::Success(r),
|
||||
}
|
||||
}
|
||||
|
||||
/// Gets the next sleep duration in milliseconds.
|
||||
fn next_sleep_ms(&self) -> u64 {
|
||||
if self.retries == 1 {
|
||||
let mut rng = rand::thread_rng();
|
||||
INITIAL_RETRY_SLEEP_BASE_MS + rng.gen_range(0..INITIAL_RETRY_JITTER_MS)
|
||||
} else {
|
||||
min(
|
||||
((self.retries - 1) * 3) * 1000 + INITIAL_RETRY_SLEEP_BASE_MS,
|
||||
MAX_RETRY_SLEEP_MS,
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn maybe_spurious(err: &Error) -> bool {
|
||||
if let Some(git_err) = err.downcast_ref::<git2::Error>() {
|
||||
match git_err.class() {
|
||||
git2::ErrorClass::Net
|
||||
| git2::ErrorClass::Os
|
||||
| git2::ErrorClass::Zlib
|
||||
| git2::ErrorClass::Http => return git_err.code() != git2::ErrorCode::Certificate,
|
||||
_ => (),
|
||||
}
|
||||
}
|
||||
if let Some(reqwest_err) = err.downcast_ref::<reqwest::Error>() {
|
||||
if reqwest_err.is_timeout()
|
||||
|| reqwest_err.is_connect()
|
||||
|| reqwest_err
|
||||
.status()
|
||||
.map_or(false, |status| status.is_server_error())
|
||||
{
|
||||
return true;
|
||||
}
|
||||
}
|
||||
if let Some(not_200) = err.downcast_ref::<HttpNotSuccessful>() {
|
||||
if 500 <= not_200.code && not_200.code < 600 {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
false
|
||||
}
|
||||
|
||||
/// Wrapper method for network call retry logic.
|
||||
///
|
||||
/// Retry counts provided by Config object `net.retry`. Config shell outputs
|
||||
/// a warning on per retry.
|
||||
///
|
||||
/// Closure must return a `Result`.
|
||||
pub(crate) fn with_retry<T, F>(mut callback: F) -> Result<T>
|
||||
where
|
||||
F: FnMut() -> Result<T>,
|
||||
{
|
||||
let mut retry = Retry::new();
|
||||
loop {
|
||||
match retry.r#try(&mut callback) {
|
||||
RetryResult::Success(r) => return Ok(r),
|
||||
RetryResult::Err(e) => return Err(e),
|
||||
RetryResult::Retry(sleep) => std::thread::sleep(Duration::from_millis(sleep)),
|
||||
}
|
||||
}
|
||||
}
|
|
@ -21,6 +21,7 @@ puffin-distribution = { path = "../puffin-distribution" }
|
|||
puffin-normalize = { path = "../puffin-normalize" }
|
||||
puffin-package = { path = "../puffin-package" }
|
||||
puffin-traits = { path = "../puffin-traits" }
|
||||
puffin-git = { path = "../puffin-git" }
|
||||
distribution-filename = { path = "../distribution-filename" }
|
||||
|
||||
anyhow = { workspace = true }
|
||||
|
|
|
@ -687,14 +687,20 @@ impl<'a, Context: BuildContext + Sync> Resolver<'a, Context> {
|
|||
let build_tree = SourceDistributionBuildTree::new(self.build_context);
|
||||
let distribution = RemoteDistributionRef::from_url(&package_name, &url);
|
||||
let metadata = match build_tree.find_dist_info(&distribution, self.tags) {
|
||||
Ok(Some(metadata)) => metadata,
|
||||
Ok(None) => build_tree
|
||||
.download_and_build_sdist(&distribution, self.client)
|
||||
.await
|
||||
.map_err(|err| ResolveError::UrlDistribution {
|
||||
url: url.clone(),
|
||||
err,
|
||||
})?,
|
||||
Ok(Some(metadata)) => {
|
||||
debug!("Found source distribution metadata in cache: {url}");
|
||||
metadata
|
||||
}
|
||||
Ok(None) => {
|
||||
debug!("Downloading source distribution from: {url}");
|
||||
build_tree
|
||||
.download_and_build_sdist(&distribution, self.client)
|
||||
.await
|
||||
.map_err(|err| ResolveError::UrlDistribution {
|
||||
url: url.clone(),
|
||||
err,
|
||||
})?
|
||||
}
|
||||
Err(err) => {
|
||||
error!(
|
||||
"Failed to read source distribution {distribution} from cache: {err}",
|
||||
|
@ -715,18 +721,22 @@ impl<'a, Context: BuildContext + Sync> Resolver<'a, Context> {
|
|||
let build_tree = SourceDistributionBuildTree::new(self.build_context);
|
||||
let distribution = RemoteDistributionRef::from_url(&package_name, &url);
|
||||
let metadata = match build_tree.find_dist_info(&distribution, self.tags) {
|
||||
Ok(Some(metadata)) => metadata,
|
||||
Ok(None) => build_tree
|
||||
.download_wheel(&distribution, self.client)
|
||||
.await
|
||||
.map_err(|err| ResolveError::UrlDistribution {
|
||||
url: url.clone(),
|
||||
err,
|
||||
})?,
|
||||
Ok(Some(metadata)) => {
|
||||
debug!("Found wheel metadata in cache: {url}");
|
||||
metadata
|
||||
}
|
||||
Ok(None) => {
|
||||
debug!("Downloading wheel from: {url}");
|
||||
build_tree
|
||||
.download_wheel(&distribution, self.client)
|
||||
.await
|
||||
.map_err(|err| ResolveError::UrlDistribution {
|
||||
url: url.clone(),
|
||||
err,
|
||||
})?
|
||||
}
|
||||
Err(err) => {
|
||||
error!(
|
||||
"Failed to read built distribution {distribution} from cache: {err}",
|
||||
);
|
||||
error!("Failed to read wheel {distribution} from cache: {err}",);
|
||||
build_tree
|
||||
.download_wheel(&distribution, self.client)
|
||||
.await
|
||||
|
|
|
@ -1,17 +1,20 @@
|
|||
use std::borrow::Cow;
|
||||
use std::path::PathBuf;
|
||||
use std::str::FromStr;
|
||||
|
||||
use anyhow::Result;
|
||||
use anyhow::{Error, Result};
|
||||
use fs_err::tokio as fs;
|
||||
use tempfile::tempdir;
|
||||
use tokio_util::compat::FuturesAsyncReadCompatExt;
|
||||
use tracing::debug;
|
||||
use url::Url;
|
||||
use zip::ZipArchive;
|
||||
|
||||
use distribution_filename::WheelFilename;
|
||||
use platform_tags::Tags;
|
||||
use puffin_client::RegistryClient;
|
||||
use puffin_distribution::RemoteDistributionRef;
|
||||
use puffin_git::{Git, GitSource};
|
||||
use puffin_package::pypi_types::Metadata21;
|
||||
use puffin_traits::BuildContext;
|
||||
|
||||
|
@ -19,6 +22,8 @@ const BUILT_WHEELS_CACHE: &str = "built-wheels-v0";
|
|||
|
||||
const REMOTE_WHEELS_CACHE: &str = "remote-wheels-v0";
|
||||
|
||||
const GIT_CACHE: &str = "git-v0";
|
||||
|
||||
/// Stores wheels built from source distributions. We need to keep those separate from the regular
|
||||
/// wheel cache since a wheel with the same name may be uploaded after we made our build and in that
|
||||
/// case the hashes would clash.
|
||||
|
@ -49,16 +54,36 @@ impl<'a, T: BuildContext> SourceDistributionBuildTree<'a, T> {
|
|||
client: &RegistryClient,
|
||||
) -> Result<Metadata21> {
|
||||
debug!("Building: {distribution}");
|
||||
let url = distribution.url()?;
|
||||
let reader = client.stream_external(&url).await?;
|
||||
let mut reader = tokio::io::BufReader::new(reader.compat());
|
||||
|
||||
let temp_dir = tempdir()?;
|
||||
|
||||
// Download the source distribution.
|
||||
let sdist_filename = distribution.filename()?;
|
||||
let sdist_file = temp_dir.path().join(sdist_filename.as_ref());
|
||||
let mut writer = tokio::fs::File::create(&sdist_file).await?;
|
||||
tokio::io::copy(&mut reader, &mut writer).await?;
|
||||
let source = DistributionSource::try_from(distribution)?;
|
||||
let sdist_file = match source {
|
||||
DistributionSource::Url(url) => {
|
||||
debug!("Fetching source distribution from: {url}");
|
||||
|
||||
let reader = client.stream_external(&url).await?;
|
||||
let mut reader = tokio::io::BufReader::new(reader.compat());
|
||||
|
||||
// Download the source distribution.
|
||||
let sdist_filename = distribution.filename()?;
|
||||
let sdist_file = temp_dir.path().join(sdist_filename.as_ref());
|
||||
let mut writer = tokio::fs::File::create(&sdist_file).await?;
|
||||
tokio::io::copy(&mut reader, &mut writer).await?;
|
||||
|
||||
sdist_file
|
||||
}
|
||||
DistributionSource::Git(git) => {
|
||||
debug!("Fetching source distribution from: {git}");
|
||||
|
||||
let git_dir = self.0.cache().map_or_else(
|
||||
|| temp_dir.path().join(GIT_CACHE),
|
||||
|cache| cache.join(GIT_CACHE),
|
||||
);
|
||||
let source = GitSource::new(git, git_dir);
|
||||
tokio::task::spawn_blocking(move || source.fetch()).await??
|
||||
}
|
||||
};
|
||||
|
||||
// Create a directory for the wheel.
|
||||
let wheel_dir = self.0.cache().map_or_else(
|
||||
|
@ -166,3 +191,38 @@ fn read_dist_info(wheel: &CachedWheel) -> Result<Metadata21> {
|
|||
)?;
|
||||
Ok(Metadata21::parse(dist_info.as_bytes())?)
|
||||
}
|
||||
|
||||
/// The host source for a distribution.
|
||||
#[derive(Debug)]
|
||||
enum DistributionSource<'a> {
|
||||
/// The distribution is available at a remote URL. This could be a dedicated URL, or a URL
|
||||
/// served by a registry, like PyPI.
|
||||
Url(Cow<'a, Url>),
|
||||
/// The distribution is available in a remote Git repository.
|
||||
Git(Git),
|
||||
}
|
||||
|
||||
impl<'a> TryFrom<&'a RemoteDistributionRef<'_>> for DistributionSource<'a> {
|
||||
type Error = Error;
|
||||
|
||||
fn try_from(value: &'a RemoteDistributionRef<'_>) -> Result<Self, Self::Error> {
|
||||
match value {
|
||||
// If a distribution is hosted on a registry, it must be available at a URL.
|
||||
RemoteDistributionRef::Registry(_, _, file) => {
|
||||
let url = Url::parse(&file.url)?;
|
||||
Ok(Self::Url(Cow::Owned(url)))
|
||||
}
|
||||
// If a distribution is specified via a direct URL, it could be a URL to a hosted file,
|
||||
// or a URL to a Git repository.
|
||||
RemoteDistributionRef::Url(_, url) => {
|
||||
if let Some(url) = url.as_str().strip_prefix("git+") {
|
||||
let url = Url::parse(url)?;
|
||||
let git = Git::try_from(url)?;
|
||||
Ok(Self::Git(git))
|
||||
} else {
|
||||
Ok(Self::Url(Cow::Borrowed(url)))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue