Add support for URL dependencies (#251)

## Summary

This PR adds support for resolving and installing dependencies via
direct URLs, like:

```
werkzeug @ 960bb4017c/Werkzeug-2.0.0-py3-none-any.whl
```

These are fairly common (e.g., with `torch`), but you most often see
them as Git dependencies.

Broadly, structs like `RemoteDistribution` and friends are now enums
that can represent either registry-based dependencies or URL-based
dependencies:

```rust
/// A built distribution (wheel) that exists as a remote file (e.g., on `PyPI`).
#[derive(Debug, Clone)]
#[allow(clippy::large_enum_variant)]
pub enum RemoteDistribution {
    /// The distribution exists in a registry, like `PyPI`.
    Registry(PackageName, Version, File),
    /// The distribution exists at an arbitrary URL.
    Url(PackageName, Url),
}
```

In the resolver, we now allow packages to take on an extra, optional
`Url` field:

```rust
#[derive(Debug, Clone, Eq, Derivative)]
#[derivative(PartialEq, Hash)]
pub enum PubGrubPackage {
    Root,
    Package(
        PackageName,
        Option<DistInfoName>,
        #[derivative(PartialEq = "ignore")]
        #[derivative(PartialOrd = "ignore")]
        #[derivative(Hash = "ignore")]
        Option<Url>,
    ),
}
```

However, for the purpose of version satisfaction, we ignore the URL.
This allows for the URL dependency to satisfy the transitive request in
cases like:

```
flask==3.0.0
werkzeug @ 254c3e9b5f/werkzeug-3.0.1-py3-none-any.whl
```

There are a couple limitations in the current approach:

- The caching for remote URLs is done separately in the resolver vs. the
installer. I decided not to sweat this too much... We need to figure out
caching holistically.
- We don't support any sort of time-based cache for remote URLs -- they
just exist forever. This will be a problem for URL dependencies, where
we need some way to evict and refresh them. But I've deferred it for
now.
- I think I need to redo how this is modeled in the resolver, because
right now, we don't detect a variety of invalid cases, e.g., providing
two different URLs for a dependency, asking for a URL dependency and a
_different version_ of the same dependency in the list of first-party
dependencies, etc.
- (We don't yet support VCS dependencies.)
This commit is contained in:
Charlie Marsh 2023-11-01 06:21:44 -07:00 committed by GitHub
parent fa9f8df396
commit 2652caa3e3
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
44 changed files with 1334 additions and 369 deletions

18
Cargo.lock generated
View file

@ -641,6 +641,17 @@ dependencies = [
"powerfmt",
]
[[package]]
name = "derivative"
version = "2.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fcc3dd5e9e9c0b295d6e1e4d811fb6f157d5ffd784b8d202fc62eac8035a770b"
dependencies = [
"proc-macro2",
"quote",
"syn 1.0.109",
]
[[package]]
name = "difflib"
version = "0.4.0"
@ -695,6 +706,7 @@ dependencies = [
"platform-tags",
"puffin-package",
"thiserror",
"url",
]
[[package]]
@ -2080,8 +2092,11 @@ version = "0.1.0"
dependencies = [
"anyhow",
"distribution-filename",
"hex",
"pep440_rs 0.3.12",
"puffin-package",
"sha2",
"url",
]
[[package]]
@ -2092,6 +2107,7 @@ dependencies = [
"cacache",
"distribution-filename",
"fs-err",
"fxhash",
"install-wheel-rs",
"pep440_rs 0.3.12",
"pep508_rs",
@ -2157,6 +2173,7 @@ dependencies = [
"bitflags 2.4.1",
"clap",
"colored",
"derivative",
"distribution-filename",
"fs-err",
"futures",
@ -2177,6 +2194,7 @@ dependencies = [
"puffin-interpreter",
"puffin-package",
"puffin-traits",
"sha2",
"tempfile",
"thiserror",
"tokio",

View file

@ -30,6 +30,7 @@ fs2 = { version = "0.4.3" }
futures = { version = "0.3.28" }
fxhash = { version = "0.2.1" }
goblin = { version = "0.7.1" }
hex = { version = "0.4.3" }
http-cache-reqwest = { version = "0.11.3" }
indicatif = { version = "0.17.7" }
indoc = { version = "2.0.4" }

View file

@ -15,3 +15,4 @@ platform-tags = { path = "../platform-tags" }
puffin-package = { path = "../puffin-package" }
thiserror = { workspace = true }
url = { workspace = true }

View file

@ -3,6 +3,7 @@ use std::str::FromStr;
use pep440_rs::Version;
use thiserror::Error;
use url::Url;
use platform_tags::Tags;
@ -134,6 +135,29 @@ impl WheelFilename {
}
}
impl TryFrom<&Url> for WheelFilename {
type Error = WheelFilenameError;
fn try_from(url: &Url) -> Result<Self, Self::Error> {
let filename = url
.path_segments()
.ok_or_else(|| {
WheelFilenameError::InvalidWheelFileName(
url.to_string(),
"URL must have a path".to_string(),
)
})?
.last()
.ok_or_else(|| {
WheelFilenameError::InvalidWheelFileName(
url.to_string(),
"URL must contain a filename".to_string(),
)
})?;
Self::from_str(filename)
}
}
#[derive(Error, Debug)]
pub enum WheelFilenameError {
#[error("The wheel filename \"{0}\" is invalid: {1}")]

View file

@ -22,10 +22,10 @@ pub(crate) fn freeze(cache: Option<&Path>, _printer: Printer) -> Result<ExitStat
// Build the installed index.
let site_packages = SitePackages::try_from_executable(&python)?;
for (name, dist_info) in site_packages.iter() {
for distribution in site_packages.distributions() {
#[allow(clippy::print_stdout)]
{
println!("{}=={}", name, dist_info.version());
println!("{distribution}");
}
}

View file

@ -111,7 +111,7 @@ pub(crate) async fn sync_requirements(
builder.build()
};
// Resolve the dependencies.
// Resolve any registry-based requirements.
let remote = if remote.is_empty() {
Vec::new()
} else {
@ -262,7 +262,7 @@ pub(crate) async fn sync_requirements(
" {} {}{}",
"+".green(),
event.distribution.name().as_ref().white().bold(),
format!("@{}", event.distribution.version()).dimmed()
event.distribution.version_or_url().to_string().dimmed()
)?;
}
ChangeEventKind::Remove => {
@ -271,7 +271,7 @@ pub(crate) async fn sync_requirements(
" {} {}{}",
"-".red(),
event.distribution.name().as_ref().white().bold(),
format!("@{}", event.distribution.version()).dimmed()
event.distribution.version_or_url().to_string().dimmed()
)?;
}
}

View file

@ -114,7 +114,7 @@ pub(crate) async fn pip_uninstall(
" {} {}{}",
"-".red(),
distribution.name().as_ref().white().bold(),
format!("@{}", distribution.version()).dimmed()
distribution.version_or_url().to_string().dimmed()
)?;
}

View file

@ -1,8 +1,8 @@
use indicatif::{ProgressBar, ProgressStyle};
use pep440_rs::Version;
use std::time::Duration;
use puffin_distribution::{CachedDistribution, RemoteDistribution};
use indicatif::{ProgressBar, ProgressStyle};
use puffin_distribution::{CachedDistribution, RemoteDistribution, VersionOrUrl};
use puffin_package::dist_info_name::DistInfoName;
use puffin_package::package_name::PackageName;
@ -168,12 +168,27 @@ impl From<Printer> for ResolverReporter {
}
impl puffin_resolver::ResolverReporter for ResolverReporter {
fn on_progress(&self, name: &PackageName, extra: Option<&DistInfoName>, version: &Version) {
if let Some(extra) = extra {
self.progress
.set_message(format!("{name}[{extra}]=={version}"));
} else {
self.progress.set_message(format!("{name}=={version}"));
fn on_progress(
&self,
name: &PackageName,
extra: Option<&DistInfoName>,
version_or_url: VersionOrUrl,
) {
match (extra, version_or_url) {
(None, VersionOrUrl::Version(version)) => {
self.progress.set_message(format!("{name}=={version}"));
}
(None, VersionOrUrl::Url(url)) => {
self.progress.set_message(format!("{name} @ {url}"));
}
(Some(extra), VersionOrUrl::Version(version)) => {
self.progress
.set_message(format!("{name}[{extra}]=={version}"));
}
(Some(extra), VersionOrUrl::Url(url)) => {
self.progress
.set_message(format!("{name}[{extra}] @ {url}"));
}
}
}

View file

@ -387,7 +387,7 @@ optional-dependencies.foo = [
Ok(())
}
/// Request multple extras that do not exist as a dependency group in a `pyproject.toml` file.
/// Request multiple extras that do not exist as a dependency group in a `pyproject.toml` file.
#[test]
fn compile_pyproject_toml_extras_missing() -> Result<()> {
let temp_dir = assert_fs::TempDir::new()?;
@ -443,3 +443,208 @@ optional-dependencies.foo = [
Ok(())
}
/// Resolve a specific Flask wheel via a URL dependency.
#[test]
fn compile_wheel_url_dependency() -> Result<()> {
let temp_dir = assert_fs::TempDir::new()?;
let cache_dir = assert_fs::TempDir::new()?;
let venv = temp_dir.child(".venv");
Command::new(get_cargo_bin(BIN_NAME))
.arg("venv")
.arg(venv.as_os_str())
.arg("--cache-dir")
.arg(cache_dir.path())
.current_dir(&temp_dir)
.assert()
.success();
venv.assert(predicates::path::is_dir());
let requirements_in = temp_dir.child("requirements.in");
requirements_in.touch()?;
requirements_in.write_str("flask @ https://files.pythonhosted.org/packages/36/42/015c23096649b908c809c69388a805a571a3bea44362fe87e33fc3afa01f/flask-3.0.0-py3-none-any.whl")?;
insta::with_settings!({
filters => vec![
(r"(\d|\.)+(ms|s)", "[TIME]"),
(r"# .* pip-compile", "# [BIN_PATH] pip-compile"),
(r"--cache-dir .*", "--cache-dir [CACHE_DIR]"),
]
}, {
assert_cmd_snapshot!(Command::new(get_cargo_bin(BIN_NAME))
.arg("pip-compile")
.arg("requirements.in")
.arg("--cache-dir")
.arg(cache_dir.path())
.env("VIRTUAL_ENV", venv.as_os_str())
.current_dir(&temp_dir));
});
Ok(())
}
/// Resolve a specific Flask source distribution via a URL dependency.
#[test]
fn compile_sdist_url_dependency() -> Result<()> {
let temp_dir = assert_fs::TempDir::new()?;
let cache_dir = assert_fs::TempDir::new()?;
let venv = temp_dir.child(".venv");
Command::new(get_cargo_bin(BIN_NAME))
.arg("venv")
.arg(venv.as_os_str())
.arg("--cache-dir")
.arg(cache_dir.path())
.current_dir(&temp_dir)
.assert()
.success();
venv.assert(predicates::path::is_dir());
let requirements_in = temp_dir.child("requirements.in");
requirements_in.touch()?;
requirements_in.write_str("flask @ https://files.pythonhosted.org/packages/d8/09/c1a7354d3925a3c6c8cfdebf4245bae67d633ffda1ba415add06ffc839c5/flask-3.0.0.tar.gz")?;
insta::with_settings!({
filters => vec![
(r"(\d|\.)+(ms|s)", "[TIME]"),
(r"# .* pip-compile", "# [BIN_PATH] pip-compile"),
(r"--cache-dir .*", "--cache-dir [CACHE_DIR]"),
]
}, {
assert_cmd_snapshot!(Command::new(get_cargo_bin(BIN_NAME))
.arg("pip-compile")
.arg("requirements.in")
.arg("--cache-dir")
.arg(cache_dir.path())
.env("VIRTUAL_ENV", venv.as_os_str())
.current_dir(&temp_dir));
});
Ok(())
}
/// Request Flask, but include a URL dependency for Werkzeug, which should avoid adding a
/// duplicate dependency from `PyPI`.
#[test]
fn mixed_url_dependency() -> Result<()> {
let temp_dir = assert_fs::TempDir::new()?;
let cache_dir = assert_fs::TempDir::new()?;
let venv = temp_dir.child(".venv");
Command::new(get_cargo_bin(BIN_NAME))
.arg("venv")
.arg(venv.as_os_str())
.arg("--cache-dir")
.arg(cache_dir.path())
.current_dir(&temp_dir)
.assert()
.success();
venv.assert(predicates::path::is_dir());
let requirements_in = temp_dir.child("requirements.in");
requirements_in.touch()?;
requirements_in.write_str("flask==3.0.0\nwerkzeug @ https://files.pythonhosted.org/packages/c3/fc/254c3e9b5feb89ff5b9076a23218dafbc99c96ac5941e900b71206e6313b/werkzeug-3.0.1-py3-none-any.whl")?;
insta::with_settings!({
filters => vec![
(r"(\d|\.)+(ms|s)", "[TIME]"),
(r"# .* pip-compile", "# [BIN_PATH] pip-compile"),
(r"--cache-dir .*", "--cache-dir [CACHE_DIR]"),
]
}, {
assert_cmd_snapshot!(Command::new(get_cargo_bin(BIN_NAME))
.arg("pip-compile")
.arg("requirements.in")
.arg("--cache-dir")
.arg(cache_dir.path())
.env("VIRTUAL_ENV", venv.as_os_str())
.current_dir(&temp_dir));
});
Ok(())
}
/// Request Flask, but include a URL dependency for a conflicting version of Werkzeug.
///
/// TODO(charlie): This test _should_ fail, but sometimes passes due to inadequacies in our
/// URL dependency model.
#[test]
#[ignore]
fn conflicting_direct_url_dependency() -> Result<()> {
let temp_dir = assert_fs::TempDir::new()?;
let cache_dir = assert_fs::TempDir::new()?;
let venv = temp_dir.child(".venv");
Command::new(get_cargo_bin(BIN_NAME))
.arg("venv")
.arg(venv.as_os_str())
.arg("--cache-dir")
.arg(cache_dir.path())
.current_dir(&temp_dir)
.assert()
.success();
venv.assert(predicates::path::is_dir());
let requirements_in = temp_dir.child("requirements.in");
requirements_in.touch()?;
requirements_in.write_str("werkzeug==3.0.0\nwerkzeug @ https://files.pythonhosted.org/packages/ff/1d/960bb4017c68674a1cb099534840f18d3def3ce44aed12b5ed8b78e0153e/Werkzeug-2.0.0-py3-none-any.whl")?;
insta::with_settings!({
filters => vec![
(r"(\d|\.)+(ms|s)", "[TIME]"),
(r"# .* pip-compile", "# [BIN_PATH] pip-compile"),
(r"--cache-dir .*", "--cache-dir [CACHE_DIR]"),
]
}, {
assert_cmd_snapshot!(Command::new(get_cargo_bin(BIN_NAME))
.arg("pip-compile")
.arg("requirements.in")
.arg("--cache-dir")
.arg(cache_dir.path())
.env("VIRTUAL_ENV", venv.as_os_str())
.current_dir(&temp_dir));
});
Ok(())
}
/// Request Flask, but include a URL dependency for a conflicting version of Werkzeug.
#[test]
fn conflicting_transitive_url_dependency() -> Result<()> {
let temp_dir = assert_fs::TempDir::new()?;
let cache_dir = assert_fs::TempDir::new()?;
let venv = temp_dir.child(".venv");
Command::new(get_cargo_bin(BIN_NAME))
.arg("venv")
.arg(venv.as_os_str())
.arg("--cache-dir")
.arg(cache_dir.path())
.current_dir(&temp_dir)
.assert()
.success();
venv.assert(predicates::path::is_dir());
let requirements_in = temp_dir.child("requirements.in");
requirements_in.touch()?;
requirements_in.write_str("flask==3.0.0\nwerkzeug @ https://files.pythonhosted.org/packages/ff/1d/960bb4017c68674a1cb099534840f18d3def3ce44aed12b5ed8b78e0153e/Werkzeug-2.0.0-py3-none-any.whl")?;
insta::with_settings!({
filters => vec![
(r"(\d|\.)+(ms|s)", "[TIME]"),
(r"# .* pip-compile", "# [BIN_PATH] pip-compile"),
(r"--cache-dir .*", "--cache-dir [CACHE_DIR]"),
]
}, {
assert_cmd_snapshot!(Command::new(get_cargo_bin(BIN_NAME))
.arg("pip-compile")
.arg("requirements.in")
.arg("--cache-dir")
.arg(cache_dir.path())
.env("VIRTUAL_ENV", venv.as_os_str())
.current_dir(&temp_dir));
});
Ok(())
}

View file

@ -540,3 +540,48 @@ fn upgrade() -> Result<()> {
Ok(())
}
/// Install a package into a virtual environment from a URL.
#[test]
fn install_url() -> Result<()> {
let temp_dir = assert_fs::TempDir::new()?;
let cache_dir = assert_fs::TempDir::new()?;
let venv = temp_dir.child(".venv");
Command::new(get_cargo_bin(BIN_NAME))
.arg("venv")
.arg(venv.as_os_str())
.arg("--cache-dir")
.arg(cache_dir.path())
.current_dir(&temp_dir)
.assert()
.success();
venv.assert(predicates::path::is_dir());
let requirements_txt = temp_dir.child("requirements.txt");
requirements_txt.touch()?;
requirements_txt.write_str("werkzeug @ https://files.pythonhosted.org/packages/ff/1d/960bb4017c68674a1cb099534840f18d3def3ce44aed12b5ed8b78e0153e/Werkzeug-2.0.0-py3-none-any.whl")?;
insta::with_settings!({
filters => vec![
(r"(\d|\.)+(ms|s)", "[TIME]"),
]
}, {
assert_cmd_snapshot!(Command::new(get_cargo_bin(BIN_NAME))
.arg("pip-sync")
.arg("requirements.txt")
.arg("--cache-dir")
.arg(cache_dir.path())
.env("VIRTUAL_ENV", venv.as_os_str())
.current_dir(&temp_dir));
});
Command::new(venv.join("bin").join("python"))
.arg("-c")
.arg("import werkzeug")
.current_dir(&temp_dir)
.assert()
.success();
Ok(())
}

View file

@ -0,0 +1,36 @@
---
source: crates/puffin-cli/tests/pip_compile.rs
info:
program: puffin
args:
- pip-compile
- requirements.in
- "--cache-dir"
- /var/folders/nt/6gf2v7_s3k13zq_t3944rwz40000gn/T/.tmpqLat7L
env:
VIRTUAL_ENV: /var/folders/nt/6gf2v7_s3k13zq_t3944rwz40000gn/T/.tmpgjHydf/.venv
---
success: true
exit_code: 0
----- stdout -----
# This file was autogenerated by Puffin v0.0.1 via the following command:
# [BIN_PATH] pip-compile requirements.in --cache-dir [CACHE_DIR]
blinker==1.6.3
# via flask
click==8.1.7
# via flask
flask @ https://files.pythonhosted.org/packages/d8/09/c1a7354d3925a3c6c8cfdebf4245bae67d633ffda1ba415add06ffc839c5/flask-3.0.0.tar.gz
itsdangerous==2.1.2
# via flask
jinja2==3.1.2
# via flask
markupsafe==2.1.3
# via
# jinja2
# werkzeug
werkzeug==3.0.1
# via flask
----- stderr -----
Resolved 7 packages in [TIME]

View file

@ -0,0 +1,36 @@
---
source: crates/puffin-cli/tests/pip_compile.rs
info:
program: puffin
args:
- pip-compile
- requirements.in
- "--cache-dir"
- /var/folders/nt/6gf2v7_s3k13zq_t3944rwz40000gn/T/.tmppB5CDv
env:
VIRTUAL_ENV: /var/folders/nt/6gf2v7_s3k13zq_t3944rwz40000gn/T/.tmpYYP3a4/.venv
---
success: true
exit_code: 0
----- stdout -----
# This file was autogenerated by Puffin v0.0.1 via the following command:
# [BIN_PATH] pip-compile requirements.in --cache-dir [CACHE_DIR]
blinker==1.6.3
# via flask
click==8.1.7
# via flask
flask @ https://files.pythonhosted.org/packages/36/42/015c23096649b908c809c69388a805a571a3bea44362fe87e33fc3afa01f/flask-3.0.0-py3-none-any.whl
itsdangerous==2.1.2
# via flask
jinja2==3.1.2
# via flask
markupsafe==2.1.3
# via
# jinja2
# werkzeug
werkzeug==3.0.1
# via flask
----- stderr -----
Resolved 7 packages in [TIME]

View file

@ -0,0 +1,21 @@
---
source: crates/puffin-cli/tests/pip_compile.rs
info:
program: puffin
args:
- pip-compile
- requirements.in
- "--cache-dir"
- /var/folders/nt/6gf2v7_s3k13zq_t3944rwz40000gn/T/.tmpb7ldgy
env:
VIRTUAL_ENV: /var/folders/nt/6gf2v7_s3k13zq_t3944rwz40000gn/T/.tmp6FMPGr/.venv
---
success: false
exit_code: 1
----- stdout -----
----- stderr -----
× No solution found when resolving dependencies:
╰─▶ Because flask 3.0.0 depends on werkzeug >=3.0.0 and root 0a0.dev0
depends on flask 3.0.0, root 0a0.dev0 is forbidden.

View file

@ -0,0 +1,36 @@
---
source: crates/puffin-cli/tests/pip_compile.rs
info:
program: puffin
args:
- pip-compile
- requirements.in
- "--cache-dir"
- /var/folders/nt/6gf2v7_s3k13zq_t3944rwz40000gn/T/.tmpax2HqL
env:
VIRTUAL_ENV: /var/folders/nt/6gf2v7_s3k13zq_t3944rwz40000gn/T/.tmpRwQW5s/.venv
---
success: true
exit_code: 0
----- stdout -----
# This file was autogenerated by Puffin v0.0.1 via the following command:
# [BIN_PATH] pip-compile requirements.in --cache-dir [CACHE_DIR]
blinker==1.6.3
# via flask
click==8.1.7
# via flask
flask==3.0.0
itsdangerous==2.1.2
# via flask
jinja2==3.1.2
# via flask
markupsafe==2.1.3
# via
# jinja2
# werkzeug
werkzeug @ https://files.pythonhosted.org/packages/c3/fc/254c3e9b5feb89ff5b9076a23218dafbc99c96ac5941e900b71206e6313b/werkzeug-3.0.1-py3-none-any.whl
# via flask
----- stderr -----
Resolved 7 packages in [TIME]

View file

@ -6,9 +6,9 @@ info:
- pip-sync
- requirements.txt
- "--cache-dir"
- /var/folders/nt/6gf2v7_s3k13zq_t3944rwz40000gn/T/.tmpdWoKpL
- /var/folders/nt/6gf2v7_s3k13zq_t3944rwz40000gn/T/.tmp2PSOBG
env:
VIRTUAL_ENV: /var/folders/nt/6gf2v7_s3k13zq_t3944rwz40000gn/T/.tmpVhsDyT/.venv
VIRTUAL_ENV: /var/folders/nt/6gf2v7_s3k13zq_t3944rwz40000gn/T/.tmpA5I2GI/.venv
---
success: true
exit_code: 0
@ -20,6 +20,6 @@ Downloaded 1 package in [TIME]
Unzipped 1 package in [TIME]
Uninstalled 1 package in [TIME]
Installed 1 package in [TIME]
- markupsafe@2.1.3
+ tomli@2.0.1
- markupsafe==2.1.3
+ tomli==2.0.1

View file

@ -6,9 +6,9 @@ info:
- pip-sync
- requirements.txt
- "--cache-dir"
- /var/folders/nt/6gf2v7_s3k13zq_t3944rwz40000gn/T/.tmpkNx6zh
- /var/folders/nt/6gf2v7_s3k13zq_t3944rwz40000gn/T/.tmp4zUIvb
env:
VIRTUAL_ENV: /var/folders/nt/6gf2v7_s3k13zq_t3944rwz40000gn/T/.tmpvozar2/.venv
VIRTUAL_ENV: /var/folders/nt/6gf2v7_s3k13zq_t3944rwz40000gn/T/.tmpm4MSW5/.venv
---
success: true
exit_code: 0
@ -19,5 +19,5 @@ Resolved 1 package in [TIME]
Downloaded 1 package in [TIME]
Unzipped 1 package in [TIME]
Installed 1 package in [TIME]
+ markupsafe@2.1.3
+ markupsafe==2.1.3

View file

@ -8,9 +8,9 @@ info:
- "--link-mode"
- copy
- "--cache-dir"
- /var/folders/nt/6gf2v7_s3k13zq_t3944rwz40000gn/T/.tmpqNznsn
- /var/folders/nt/6gf2v7_s3k13zq_t3944rwz40000gn/T/.tmpKTa9Px
env:
VIRTUAL_ENV: /var/folders/nt/6gf2v7_s3k13zq_t3944rwz40000gn/T/.tmp7lgVHK/.venv
VIRTUAL_ENV: /var/folders/nt/6gf2v7_s3k13zq_t3944rwz40000gn/T/.tmp3Lb98Q/.venv
---
success: true
exit_code: 0
@ -21,5 +21,5 @@ Resolved 1 package in [TIME]
Downloaded 1 package in [TIME]
Unzipped 1 package in [TIME]
Installed 1 package in [TIME]
+ markupsafe@2.1.3
+ markupsafe==2.1.3

View file

@ -8,9 +8,9 @@ info:
- "--link-mode"
- hardlink
- "--cache-dir"
- /var/folders/nt/6gf2v7_s3k13zq_t3944rwz40000gn/T/.tmpGHO8A6
- /var/folders/nt/6gf2v7_s3k13zq_t3944rwz40000gn/T/.tmpAoHlVb
env:
VIRTUAL_ENV: /var/folders/nt/6gf2v7_s3k13zq_t3944rwz40000gn/T/.tmpBPXcHk/.venv
VIRTUAL_ENV: /var/folders/nt/6gf2v7_s3k13zq_t3944rwz40000gn/T/.tmpmzbaq2/.venv
---
success: true
exit_code: 0
@ -21,5 +21,5 @@ Resolved 1 package in [TIME]
Downloaded 1 package in [TIME]
Unzipped 1 package in [TIME]
Installed 1 package in [TIME]
+ markupsafe@2.1.3
+ markupsafe==2.1.3

View file

@ -6,9 +6,9 @@ info:
- pip-sync
- requirements.txt
- "--cache-dir"
- /var/folders/nt/6gf2v7_s3k13zq_t3944rwz40000gn/T/.tmpoqh2hP
- /var/folders/nt/6gf2v7_s3k13zq_t3944rwz40000gn/T/.tmp5ZOhrc
env:
VIRTUAL_ENV: /var/folders/nt/6gf2v7_s3k13zq_t3944rwz40000gn/T/.tmpZCdve2/.venv
VIRTUAL_ENV: /var/folders/nt/6gf2v7_s3k13zq_t3944rwz40000gn/T/.tmpTMqlR7/.venv
---
success: true
exit_code: 0
@ -19,6 +19,6 @@ Resolved 2 packages in [TIME]
Downloaded 2 packages in [TIME]
Unzipped 2 packages in [TIME]
Installed 2 packages in [TIME]
+ markupsafe@2.1.3
+ tomli@2.0.1
+ markupsafe==2.1.3
+ tomli==2.0.1

View file

@ -6,9 +6,9 @@ info:
- pip-sync
- requirements.txt
- "--cache-dir"
- /var/folders/nt/6gf2v7_s3k13zq_t3944rwz40000gn/T/.tmpWxh1hG
- /var/folders/nt/6gf2v7_s3k13zq_t3944rwz40000gn/T/.tmpZLe8WR
env:
VIRTUAL_ENV: /var/folders/nt/6gf2v7_s3k13zq_t3944rwz40000gn/T/.tmpUUUwuX/.venv
VIRTUAL_ENV: /var/folders/nt/6gf2v7_s3k13zq_t3944rwz40000gn/T/.tmpRRpvhw/.venv
---
success: true
exit_code: 0
@ -19,5 +19,5 @@ Resolved 1 package in [TIME]
Downloaded 1 package in [TIME]
Unzipped 1 package in [TIME]
Installed 1 package in [TIME]
+ tomli@2.0.1
+ tomli==2.0.1

View file

@ -0,0 +1,23 @@
---
source: crates/puffin-cli/tests/pip_sync.rs
info:
program: puffin
args:
- pip-sync
- requirements.txt
- "--cache-dir"
- /var/folders/nt/6gf2v7_s3k13zq_t3944rwz40000gn/T/.tmpbYKeok
env:
VIRTUAL_ENV: /var/folders/nt/6gf2v7_s3k13zq_t3944rwz40000gn/T/.tmpGrw3HR/.venv
---
success: true
exit_code: 0
----- stdout -----
----- stderr -----
Resolved 1 package in [TIME]
Downloaded 1 package in [TIME]
Unzipped 1 package in [TIME]
Installed 1 package in [TIME]
+ werkzeug @ https://files.pythonhosted.org/packages/ff/1d/960bb4017c68674a1cb099534840f18d3def3ce44aed12b5ed8b78e0153e/Werkzeug-2.0.0-py3-none-any.whl

View file

@ -6,9 +6,9 @@ info:
- pip-sync
- requirements.txt
- "--cache-dir"
- /var/folders/nt/6gf2v7_s3k13zq_t3944rwz40000gn/T/.tmprConY4
- /var/folders/nt/6gf2v7_s3k13zq_t3944rwz40000gn/T/.tmp2caFOB
env:
VIRTUAL_ENV: /var/folders/nt/6gf2v7_s3k13zq_t3944rwz40000gn/T/.tmpxukjZu/.venv
VIRTUAL_ENV: /var/folders/nt/6gf2v7_s3k13zq_t3944rwz40000gn/T/.tmpl6AyjB/.venv
---
success: true
exit_code: 0
@ -16,5 +16,5 @@ exit_code: 0
----- stderr -----
Installed 1 package in [TIME]
+ markupsafe@2.1.3
+ markupsafe==2.1.3

View file

@ -6,9 +6,9 @@ info:
- pip-sync
- requirements.txt
- "--cache-dir"
- /var/folders/nt/6gf2v7_s3k13zq_t3944rwz40000gn/T/.tmpVgm17b
- /var/folders/nt/6gf2v7_s3k13zq_t3944rwz40000gn/T/.tmpwvC4H8
env:
VIRTUAL_ENV: /var/folders/nt/6gf2v7_s3k13zq_t3944rwz40000gn/T/.tmpDIXrmg/.venv
VIRTUAL_ENV: /var/folders/nt/6gf2v7_s3k13zq_t3944rwz40000gn/T/.tmp582sCg/.venv
---
success: true
exit_code: 0
@ -20,6 +20,6 @@ Downloaded 1 package in [TIME]
Unzipped 1 package in [TIME]
Uninstalled 1 package in [TIME]
Installed 1 package in [TIME]
- tomli@2.0.0
+ tomli@2.0.1
- tomli==2.0.0
+ tomli==2.0.1

View file

@ -15,3 +15,6 @@ pep440_rs = { path = "../pep440-rs" }
puffin-package = { path = "../puffin-package" }
anyhow = { workspace = true }
hex = { workspace = true }
sha2 = { workspace = true }
url = { workspace = true }

View file

@ -1,9 +1,11 @@
use std::borrow::Cow;
use std::path::{Path, PathBuf};
use std::str::FromStr;
use anyhow::{anyhow, Result};
use sha2::{Digest, Sha256};
use url::Url;
use distribution_filename::WheelFilename;
use pep440_rs::Version;
use puffin_package::dist_info_name::DistInfoName;
use puffin_package::package_name::PackageName;
@ -27,23 +29,13 @@ impl Distribution {
}
}
/// Return the [`Version`] of the distribution.
pub fn version(&self) -> &Version {
/// Return a [`Version`], for registry-based distributions, or a [`Url`], for URL-based
/// distributions.
pub fn version_or_url(&self) -> VersionOrUrl {
match self {
Self::Remote(dist) => dist.version(),
Self::Cached(dist) => dist.version(),
Self::Installed(dist) => dist.version(),
}
}
/// Return an identifier for a built distribution (wheel). The ID should be equivalent to the
/// `.dist-info` directory name, i.e., `<distribution>-<version>.dist-info`, where
/// `distribution` is the normalized package name with hyphens replaced by underscores.
pub fn id(&self) -> String {
match self {
Self::Remote(dist) => dist.id(),
Self::Cached(dist) => dist.id(),
Self::Installed(dist) => dist.id(),
Self::Remote(dist) => dist.version_or_url(),
Self::Cached(dist) => dist.version_or_url(),
Self::Installed(dist) => dist.version_or_url(),
}
}
}
@ -66,73 +58,107 @@ impl From<InstalledDistribution> for Distribution {
}
}
#[derive(Debug, Clone)]
pub enum VersionOrUrl<'a> {
/// A PEP 440 version specifier, used to identify a distribution in a registry.
Version(&'a Version),
/// A URL, used to identify a distribution at an arbitrary location.
Url(&'a Url),
}
impl std::fmt::Display for VersionOrUrl<'_> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
VersionOrUrl::Version(version) => write!(f, "=={version}"),
VersionOrUrl::Url(url) => write!(f, " @ {url}"),
}
}
}
/// A built distribution (wheel) that exists as a remote file (e.g., on `PyPI`).
#[derive(Debug, Clone)]
pub struct RemoteDistribution {
name: PackageName,
version: Version,
file: File,
#[allow(clippy::large_enum_variant)]
pub enum RemoteDistribution {
/// The distribution exists in a registry, like `PyPI`.
Registry(PackageName, Version, File),
/// The distribution exists at an arbitrary URL.
Url(PackageName, Url),
}
impl RemoteDistribution {
/// Initialize a new [`RemoteDistribution`].
pub fn new(name: PackageName, version: Version, file: File) -> Self {
Self {
name,
version,
file,
/// Create a [`RemoteDistribution`] for a registry-based distribution.
pub fn from_registry(name: PackageName, version: Version, file: File) -> Self {
Self::Registry(name, version, file)
}
/// Create a [`RemoteDistribution`] for a URL-based distribution.
pub fn from_url(name: PackageName, url: Url) -> Self {
Self::Url(name, url)
}
/// Return the normalized [`PackageName`] of the distribution.
pub fn name(&self) -> &PackageName {
match self {
Self::Registry(name, _, _) => name,
Self::Url(name, _) => name,
}
}
/// Try to parse a remote distribution from a remote file (like `django-5.0a1-py3-none-any.whl`).
pub fn from_file(file: File) -> Result<Self> {
let filename = WheelFilename::from_str(&file.filename)?;
let name = PackageName::normalize(&filename.distribution);
Ok(Self {
name,
version: filename.version.clone(),
file,
})
}
pub fn name(&self) -> &PackageName {
&self.name
}
pub fn version(&self) -> &Version {
&self.version
}
pub fn file(&self) -> &File {
&self.file
/// Return a [`Version`], for registry-based distributions, or a [`Url`], for URL-based
/// distributions.
pub fn version_or_url(&self) -> VersionOrUrl {
match self {
Self::Registry(_, version, _) => VersionOrUrl::Version(version),
Self::Url(_, url) => VersionOrUrl::Url(url),
}
}
/// Returns a unique identifier for this distribution.
pub fn id(&self) -> String {
format!("{}-{}", DistInfoName::from(self.name()), self.version())
match self {
Self::Registry(name, version, _) => {
format!("{}-{}", DistInfoName::from(name), version)
}
Self::Url(_name, url) => {
let mut hasher = Sha256::new();
hasher.update(url.as_str().as_bytes());
let result = hasher.finalize();
hex::encode(result)
}
}
}
}
impl std::fmt::Display for RemoteDistribution {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}@{}", self.name(), self.version())
match self {
Self::Registry(name, version, _file) => {
write!(f, "{name}=={version}")
}
Self::Url(name, url) => {
write!(f, "{name} @ {url}")
}
}
}
}
/// A built distribution (wheel) that exists in a local cache.
#[derive(Debug, Clone)]
pub struct CachedDistribution {
name: PackageName,
version: Version,
path: PathBuf,
pub enum CachedDistribution {
/// The distribution exists in a registry, like `PyPI`.
Registry(PackageName, Version, PathBuf),
/// The distribution exists at an arbitrary URL.
Url(PackageName, Url, PathBuf),
}
impl CachedDistribution {
/// Initialize a new cached distribution.
pub fn new(name: PackageName, version: Version, path: PathBuf) -> Self {
Self {
name,
version,
path,
/// Initialize a [`CachedDistribution`] from a [`RemoteDistribution`].
pub fn from_remote(remote: RemoteDistribution, path: PathBuf) -> Self {
match remote {
RemoteDistribution::Registry(name, version, _file) => {
Self::Registry(name, version, path)
}
RemoteDistribution::Url(name, url) => Self::Url(name, url, path),
}
}
@ -152,33 +178,45 @@ impl CachedDistribution {
let version = Version::from_str(version).map_err(|err| anyhow!(err))?;
let path = path.to_path_buf();
Ok(Some(CachedDistribution {
name,
version,
path,
}))
Ok(Some(Self::Registry(name, version, path)))
}
/// Return the normalized [`PackageName`] of the distribution.
pub fn name(&self) -> &PackageName {
&self.name
}
pub fn version(&self) -> &Version {
&self.version
match self {
Self::Registry(name, _, _) => name,
Self::Url(name, _, _) => name,
}
}
/// Return the [`Path`] at which the distribution is stored on-disk.
pub fn path(&self) -> &Path {
&self.path
match self {
Self::Registry(_, _, path) => path,
Self::Url(_, _, path) => path,
}
}
pub fn id(&self) -> String {
format!("{}-{}", DistInfoName::from(self.name()), self.version())
/// Return a [`Version`], for registry-based distributions, or a [`Url`], for URL-based
/// distributions.
pub fn version_or_url(&self) -> VersionOrUrl {
match self {
Self::Registry(_, version, _) => VersionOrUrl::Version(version),
Self::Url(_, url, _) => VersionOrUrl::Url(url),
}
}
}
impl std::fmt::Display for CachedDistribution {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}@{}", self.name(), self.version())
match self {
Self::Registry(name, version, _file) => {
write!(f, "{name}=={version}")
}
Self::Url(name, url, _path) => {
write!(f, "{name} @ {url}")
}
}
}
}
@ -229,6 +267,7 @@ impl InstalledDistribution {
Ok(None)
}
/// Return the normalized [`PackageName`] of the distribution.
pub fn name(&self) -> &PackageName {
&self.name
}
@ -237,57 +276,111 @@ impl InstalledDistribution {
&self.version
}
/// Return the [`Path`] at which the distribution is stored on-disk.
pub fn path(&self) -> &Path {
&self.path
}
pub fn id(&self) -> String {
format!("{}-{}", DistInfoName::from(self.name()), self.version())
/// Return a [`Version`], for registry-based distributions, or a [`Url`], for URL-based
/// distributions.
pub fn version_or_url(&self) -> VersionOrUrl {
VersionOrUrl::Version(&self.version)
}
}
impl std::fmt::Display for InstalledDistribution {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}@{}", self.name(), self.version())
write!(f, "{}=={}", self.name(), self.version())
}
}
/// Unowned reference to a [`RemoteDistribution`].
#[derive(Debug, Clone)]
pub struct RemoteDistributionRef<'a> {
name: &'a PackageName,
version: &'a Version,
file: &'a File,
pub enum RemoteDistributionRef<'a> {
/// The distribution exists in a registry, like `PyPI`.
Registry(&'a PackageName, &'a Version, &'a File),
/// The distribution exists at an arbitrary URL.
Url(&'a PackageName, &'a Url),
}
impl<'a> RemoteDistributionRef<'a> {
pub fn new(name: &'a PackageName, version: &'a Version, file: &'a File) -> Self {
Self {
name,
version,
file,
/// Create a [`RemoteDistribution`] for a registry-based distribution.
pub fn from_registry(name: &'a PackageName, version: &'a Version, file: &'a File) -> Self {
Self::Registry(name, version, file)
}
/// Create a [`RemoteDistribution`] for a URL-based distribution.
pub fn from_url(name: &'a PackageName, url: &'a Url) -> Self {
Self::Url(name, url)
}
/// Return the URL of the distribution.
pub fn url(&self) -> Result<Cow<'_, Url>> {
match self {
Self::Registry(_, _, file) => {
let url = Url::parse(&file.url)?;
Ok(Cow::Owned(url))
}
Self::Url(_, url) => Ok(Cow::Borrowed(url)),
}
}
/// Return the filename of the distribution.
pub fn filename(&self) -> Result<Cow<'_, str>> {
match self {
Self::Registry(_, _, file) => Ok(Cow::Borrowed(&file.filename)),
Self::Url(_, url) => {
let filename = url
.path_segments()
.and_then(std::iter::Iterator::last)
.ok_or_else(|| anyhow!("Could not parse filename from URL: {}", url))?;
Ok(Cow::Owned(filename.to_owned()))
}
}
}
/// Return the normalized [`PackageName`] of the distribution.
pub fn name(&self) -> &PackageName {
self.name
match self {
Self::Registry(name, _, _) => name,
Self::Url(name, _) => name,
}
}
pub fn version(&self) -> &Version {
self.version
}
pub fn file(&self) -> &File {
self.file
/// Return a [`Version`], for registry-based distributions, or a [`Url`], for URL-based
/// distributions.
pub fn version_or_url(&self) -> VersionOrUrl {
match self {
Self::Registry(_, version, _) => VersionOrUrl::Version(version),
Self::Url(_, url) => VersionOrUrl::Url(url),
}
}
/// Returns a unique identifier for this distribution.
pub fn id(&self) -> String {
format!("{}-{}", DistInfoName::from(self.name()), self.version())
match self {
Self::Registry(name, version, _) => {
format!("{}-{}", DistInfoName::from(*name), version)
}
Self::Url(_name, url) => {
let mut hasher = Sha256::new();
hasher.update(url.as_str().as_bytes());
let result = hasher.finalize();
hex::encode(result)
}
}
}
}
impl std::fmt::Display for RemoteDistributionRef<'_> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}@{}", self.name(), self.version())
match self {
Self::Registry(name, version, _file) => {
write!(f, "{name}=={version}")
}
Self::Url(name, url) => {
write!(f, "{name} @ {url}")
}
}
}
}

View file

@ -22,6 +22,7 @@ distribution-filename = { path = "../distribution-filename" }
anyhow = { workspace = true }
cacache = { workspace = true }
fs-err = { workspace = true }
fxhash = { workspace = true }
rayon = { workspace = true }
tempfile = { workspace = true }
tokio = { workspace = true }

View file

@ -1,6 +1,7 @@
use std::path::{Path, PathBuf};
use fs_err as fs;
use puffin_distribution::RemoteDistribution;
static WHEEL_CACHE: &str = "wheels-v0";
@ -17,19 +18,21 @@ impl WheelCache {
}
}
/// Return the path at which a given wheel would be stored.
pub(crate) fn entry(&self, id: &str) -> PathBuf {
self.root.join(id)
}
/// Initialize the wheel cache.
pub(crate) fn init(&self) -> std::io::Result<()> {
fs::create_dir_all(&self.root)
}
/// Return the path at which a given [`RemoteDistribution`] would be stored.
pub(crate) fn entry(&self, distribution: &RemoteDistribution) -> PathBuf {
self.root
.join(CacheShard::from(distribution).segment())
.join(distribution.id())
}
/// Returns a handle to the wheel cache directory.
pub(crate) fn read_dir(&self) -> std::io::Result<fs::ReadDir> {
fs::read_dir(&self.root)
pub(crate) fn read_dir(&self, shard: CacheShard) -> std::io::Result<fs::ReadDir> {
fs::read_dir(self.root.join(shard.segment()))
}
/// Returns the cache root.
@ -37,3 +40,28 @@ impl WheelCache {
&self.root
}
}
/// A shard of the wheel cache.
#[derive(Debug, Copy, Clone)]
pub(crate) enum CacheShard {
Registry,
Url,
}
impl CacheShard {
fn segment(&self) -> impl AsRef<Path> + '_ {
match self {
Self::Registry => "registry",
Self::Url => "url",
}
}
}
impl From<&RemoteDistribution> for CacheShard {
fn from(distribution: &RemoteDistribution) -> Self {
match distribution {
RemoteDistribution::Registry(_, _, _) => Self::Registry,
RemoteDistribution::Url(_, _) => Self::Url,
}
}
}

View file

@ -43,13 +43,16 @@ impl<'a> Downloader<'a> {
) -> Result<Vec<InMemoryDistribution>> {
// Sort the wheels by size.
let mut wheels = wheels;
wheels.sort_unstable_by_key(|wheel| Reverse(wheel.file().size));
wheels.sort_unstable_by_key(|wheel| match wheel {
RemoteDistribution::Registry(_package, _version, file) => Reverse(file.size),
RemoteDistribution::Url(_, _) => Reverse(usize::MIN),
});
// Phase 1: Fetch the wheels in parallel.
let mut fetches = JoinSet::new();
let mut downloads = Vec::with_capacity(wheels.len());
for remote in wheels {
debug!("Downloading wheel: {}", remote.file().filename);
debug!("Downloading wheel: {remote}");
fetches.spawn(fetch_wheel(
remote.clone(),
@ -96,31 +99,47 @@ async fn fetch_wheel(
client: RegistryClient,
cache: Option<impl AsRef<Path>>,
) -> Result<InMemoryDistribution> {
// Parse the wheel's SRI.
let sri = Integrity::from_hex(&remote.file().hashes.sha256, Algorithm::Sha256)?;
match &remote {
RemoteDistribution::Registry(_package, _version, file) => {
// Parse the wheel's SRI.
let sri = Integrity::from_hex(&file.hashes.sha256, Algorithm::Sha256)?;
// Read from the cache, if possible.
if let Some(cache) = cache.as_ref() {
if let Ok(buffer) = cacache::read_hash(&cache, &sri).await {
debug!("Extracted wheel from cache: {}", remote.file().filename);
return Ok(InMemoryDistribution { remote, buffer });
// Read from the cache, if possible.
if let Some(cache) = cache.as_ref() {
if let Ok(buffer) = cacache::read_hash(&cache, &sri).await {
debug!("Extracted wheel from cache: {remote}");
return Ok(InMemoryDistribution { remote, buffer });
}
}
// Fetch the wheel.
let url = Url::parse(&file.url)?;
let reader = client.stream_external(&url).await?;
// Read into a buffer.
let mut buffer = Vec::with_capacity(file.size);
let mut reader = tokio::io::BufReader::new(reader.compat());
tokio::io::copy(&mut reader, &mut buffer).await?;
// Write the buffer to the cache, if possible.
if let Some(cache) = cache.as_ref() {
cacache::write_hash(&cache, &buffer).await?;
}
Ok(InMemoryDistribution { remote, buffer })
}
RemoteDistribution::Url(_package, url) => {
// Fetch the wheel.
let reader = client.stream_external(url).await?;
// Read into a buffer.
let mut buffer = Vec::with_capacity(1024 * 1024);
let mut reader = tokio::io::BufReader::new(reader.compat());
tokio::io::copy(&mut reader, &mut buffer).await?;
Ok(InMemoryDistribution { remote, buffer })
}
}
let url = Url::parse(&remote.file().url)?;
let reader = client.stream_external(&url).await?;
// Read into a buffer.
let mut buffer = Vec::with_capacity(remote.file().size);
let mut reader = tokio::io::BufReader::new(reader.compat());
tokio::io::copy(&mut reader, &mut buffer).await?;
// Write the buffer to the cache, if possible.
if let Some(cache) = cache.as_ref() {
cacache::write_hash(&cache, &buffer).await?;
}
Ok(InMemoryDistribution { remote, buffer })
}
pub trait Reporter: Send + Sync {

View file

@ -45,9 +45,7 @@ impl<'a> Installer<'a> {
);
install_wheel_rs::linker::install_wheel(&location, wheel.path(), self.link_mode)
.with_context(|| {
format!("Failed to install {} {}", wheel.name(), wheel.version())
})?;
.with_context(|| format!("Failed to install: {wheel}"))?;
if let Some(reporter) = self.reporter.as_ref() {
reporter.on_install_progress(wheel);

View file

@ -1,7 +1,7 @@
pub use downloader::{Downloader, Reporter as DownloadReporter};
pub use installer::{Installer, Reporter as InstallReporter};
pub use local_index::LocalIndex;
pub use plan::PartitionedRequirements;
pub use registry_index::RegistryIndex;
pub use site_packages::SitePackages;
pub use uninstall::uninstall;
pub use unzipper::{Reporter as UnzipReporter, Unzipper};
@ -9,9 +9,10 @@ pub use unzipper::{Reporter as UnzipReporter, Unzipper};
mod cache;
mod downloader;
mod installer;
mod local_index;
mod plan;
mod registry_index;
mod site_packages;
mod uninstall;
mod unzipper;
mod url_index;
mod vendor;

View file

@ -3,12 +3,13 @@ use std::path::Path;
use anyhow::Result;
use tracing::debug;
use pep508_rs::Requirement;
use pep508_rs::{Requirement, VersionOrUrl};
use puffin_distribution::{CachedDistribution, InstalledDistribution};
use puffin_interpreter::Virtualenv;
use puffin_package::package_name::PackageName;
use crate::{LocalIndex, SitePackages};
use crate::url_index::UrlIndex;
use crate::{RegistryIndex, SitePackages};
#[derive(Debug, Default)]
pub struct PartitionedRequirements {
@ -37,10 +38,16 @@ impl PartitionedRequirements {
let mut site_packages = SitePackages::try_from_executable(venv)?;
// Index all the already-downloaded wheels in the cache.
let local_index = if let Some(cache) = cache {
LocalIndex::try_from_directory(cache)?
let registry_index = if let Some(cache) = cache {
RegistryIndex::try_from_directory(cache)?
} else {
LocalIndex::default()
RegistryIndex::default()
};
let url_index = if let Some(cache) = cache {
UrlIndex::try_from_directory(cache)?
} else {
UrlIndex::default()
};
let mut local = vec![];
@ -51,38 +58,44 @@ impl PartitionedRequirements {
let package = PackageName::normalize(&requirement.name);
// Filter out already-installed packages.
if let Some(dist) = site_packages.remove(&package) {
if requirement.is_satisfied_by(dist.version()) {
debug!(
"Requirement already satisfied: {} ({})",
package,
dist.version()
);
if let Some(distribution) = site_packages.remove(&package) {
if requirement.is_satisfied_by(distribution.version()) {
debug!("Requirement already satisfied: {distribution}",);
continue;
}
extraneous.push(dist);
extraneous.push(distribution);
}
// Identify any locally-available distributions that satisfy the requirement.
if let Some(distribution) = local_index
.get(&package)
.filter(|dist| requirement.is_satisfied_by(dist.version()))
{
debug!(
"Requirement already cached: {} ({})",
distribution.name(),
distribution.version()
);
local.push(distribution.clone());
} else {
debug!("Identified uncached requirement: {}", requirement);
remote.push(requirement.clone());
match requirement.version_or_url.as_ref() {
None | Some(VersionOrUrl::VersionSpecifier(_)) => {
if let Some(distribution) = registry_index.get(&package).filter(|dist| {
let CachedDistribution::Registry(_name, version, _path) = dist else {
return false;
};
requirement.is_satisfied_by(version)
}) {
debug!("Requirement already cached: {distribution}");
local.push(distribution.clone());
continue;
}
}
Some(VersionOrUrl::Url(url)) => {
if let Some(distribution) = url_index.get(&package, url) {
debug!("Requirement already cached: {distribution}");
local.push(distribution.clone());
continue;
}
}
}
debug!("Identified uncached requirement: {requirement}");
remote.push(requirement.clone());
}
// Remove any unnecessary packages.
for (package, dist_info) in site_packages {
debug!("Unnecessary package: {} ({})", package, dist_info.version());
for (_package, dist_info) in site_packages {
debug!("Unnecessary package: {dist_info}");
extraneous.push(dist_info);
}

View file

@ -6,19 +6,19 @@ use anyhow::Result;
use puffin_distribution::CachedDistribution;
use puffin_package::package_name::PackageName;
use crate::cache::WheelCache;
use crate::cache::{CacheShard, WheelCache};
/// A local index of cached distributions.
/// A local index of distributions that originate from a registry, like `PyPI`.
#[derive(Debug, Default)]
pub struct LocalIndex(HashMap<PackageName, CachedDistribution>);
pub struct RegistryIndex(HashMap<PackageName, CachedDistribution>);
impl LocalIndex {
impl RegistryIndex {
/// Build an index of cached distributions from a directory.
pub fn try_from_directory(path: &Path) -> Result<Self> {
let mut index = HashMap::new();
let cache = WheelCache::new(path);
let Ok(dir) = cache.read_dir() else {
let Ok(dir) = cache.read_dir(CacheShard::Registry) else {
return Ok(Self(index));
};

View file

@ -27,9 +27,9 @@ impl SitePackages {
Ok(Self(index))
}
/// Returns an iterator over the installed packages.
pub fn iter(&self) -> impl Iterator<Item = (&PackageName, &InstalledDistribution)> {
self.0.iter()
/// Returns an iterator over the installed distributions.
pub fn distributions(&self) -> impl Iterator<Item = &InstalledDistribution> {
self.0.values()
}
/// Returns the version of the given package, if it is installed.

View file

@ -48,7 +48,7 @@ impl Unzipper {
for download in downloads {
let remote = download.remote.clone();
debug!("Unpacking wheel: {}", remote.file().filename);
debug!("Unpacking wheel: {remote}");
// Unzip the wheel.
tokio::task::spawn_blocking({
@ -58,29 +58,26 @@ impl Unzipper {
.await??;
// Write the unzipped wheel to the target directory.
let result = fs_err::tokio::rename(
staging.path().join(remote.id()),
wheel_cache.entry(&remote.id()),
)
.await;
let target = wheel_cache.entry(&remote);
if let Some(parent) = target.parent() {
fs_err::create_dir_all(parent)?;
}
let result = fs_err::tokio::rename(staging.path().join(remote.id()), target).await;
if let Err(err) = result {
// If the renaming failed because another instance was faster, that's fine
// (`DirectoryNotEmpty` is not stable so we can't match on it)
if !wheel_cache.entry(&remote.id()).is_dir() {
if !wheel_cache.entry(&remote).is_dir() {
return Err(err.into());
}
}
wheels.push(CachedDistribution::new(
remote.name().clone(),
remote.version().clone(),
wheel_cache.entry(&remote.id()),
));
if let Some(reporter) = self.reporter.as_ref() {
reporter.on_unzip_progress(&remote);
}
let path = wheel_cache.entry(&remote);
wheels.push(CachedDistribution::from_remote(remote, path));
}
if let Some(reporter) = self.reporter.as_ref() {

View file

@ -0,0 +1,50 @@
use std::path::{Path, PathBuf};
use anyhow::Result;
use fxhash::FxHashMap;
use url::Url;
use crate::cache::{CacheShard, WheelCache};
use puffin_distribution::{CachedDistribution, RemoteDistributionRef};
use puffin_package::package_name::PackageName;
/// A local index of distributions that originate from arbitrary URLs (as opposed to being
/// downloaded from a registry, like `PyPI`).
#[derive(Debug, Default)]
pub(crate) struct UrlIndex(FxHashMap<String, PathBuf>);
impl UrlIndex {
/// Build an index of cached distributions from a directory.
pub(crate) fn try_from_directory(path: &Path) -> Result<Self> {
let mut index = FxHashMap::default();
let cache = WheelCache::new(path);
let Ok(dir) = cache.read_dir(CacheShard::Url) else {
return Ok(Self(index));
};
for entry in dir {
let entry = entry?;
if entry.file_type()?.is_dir() {
let file_name = entry.file_name();
let Some(filename) = file_name.to_str() else {
continue;
};
index.insert(filename.to_string(), entry.path());
}
}
Ok(Self(index))
}
/// Returns a distribution from the index, if it exists.
pub(crate) fn get(&self, name: &PackageName, url: &Url) -> Option<CachedDistribution> {
let distribution = RemoteDistributionRef::from_url(name, url);
let path = self.0.get(&distribution.id())?;
Some(CachedDistribution::Url(
name.clone(),
url.clone(),
path.clone(),
))
}
}

View file

@ -40,6 +40,8 @@ tracing = { workspace = true }
url = { workspace = true }
waitmap = { workspace = true }
zip = { workspace = true }
derivative = { version = "2.2.0" }
sha2 = { workspace = true }
[dev-dependencies]
gourgeist = { path = "../gourgeist" }

View file

@ -1,5 +1,6 @@
use pubgrub::range::Range;
use thiserror::Error;
use url::Url;
use pep508_rs::Requirement;
@ -25,13 +26,21 @@ pub enum ResolveError {
#[error(transparent)]
PubGrub(#[from] pubgrub::error::PubGrubError<PubGrubPackage, Range<PubGrubVersion>>),
#[error("Failed to build source distribution {filename}")]
SourceDistribution {
#[error("Failed to build distribution: {filename}")]
RegistryDistribution {
filename: String,
// TODO(konstin): Gives this a proper error type
#[source]
err: anyhow::Error,
},
#[error("Failed to build distribution: {url}")]
UrlDistribution {
url: Url,
// TODO(konstin): Gives this a proper error type
#[source]
err: anyhow::Error,
},
}
impl<T> From<futures::channel::mpsc::TrySendError<T>> for ResolveError {

View file

@ -3,7 +3,7 @@ use itertools::Itertools;
use pubgrub::range::Range;
use tracing::warn;
use pep508_rs::{MarkerEnvironment, Requirement};
use pep508_rs::{MarkerEnvironment, Requirement, VersionOrUrl};
use puffin_package::dist_info_name::DistInfoName;
use puffin_package::package_name::PackageName;
@ -28,8 +28,8 @@ pub(crate) fn iter_requirements<'a>(
.filter(move |requirement| {
let normalized = PackageName::normalize(&requirement.name);
if source.is_some_and(|source| source == &normalized) {
// TODO: Warn only once here
warn!("{} depends on itself", normalized);
// TODO(konstin): Warn only once here
warn!("{normalized} depends on itself");
false
} else {
true
@ -45,38 +45,26 @@ pub(crate) fn iter_requirements<'a>(
requirement.evaluate_markers(env, &extra)
})
.flat_map(|requirement| {
let normalized_name = PackageName::normalize(&requirement.name);
let package = PubGrubPackage::Package(normalized_name.clone(), None);
let versions = version_range(requirement.version_or_url.as_ref()).unwrap();
std::iter::once((package, versions)).chain(
std::iter::once(pubgrub_package(requirement, None).unwrap()).chain(
requirement
.extras
.clone()
.into_iter()
.flatten()
.map(move |extra| {
let package = PubGrubPackage::Package(
normalized_name.clone(),
Some(DistInfoName::normalize(extra)),
);
let versions = version_range(requirement.version_or_url.as_ref()).unwrap();
(package, versions)
.map(|extra| {
pubgrub_package(requirement, Some(DistInfoName::normalize(extra))).unwrap()
}),
)
})
}
/// Convert a PEP 508 specifier to a `PubGrub` range.
pub(crate) fn version_range(
specifiers: Option<&pep508_rs::VersionOrUrl>,
) -> Result<Range<PubGrubVersion>> {
pub(crate) fn version_range(specifiers: Option<&VersionOrUrl>) -> Result<Range<PubGrubVersion>> {
let Some(specifiers) = specifiers else {
return Ok(Range::full());
};
let pep508_rs::VersionOrUrl::VersionSpecifier(specifiers) = specifiers else {
let VersionOrUrl::VersionSpecifier(specifiers) = specifiers else {
return Ok(Range::full());
};
@ -87,3 +75,39 @@ pub(crate) fn version_range(
range.intersection(&specifier.into())
})
}
/// Convert a [`Requirement`] to a `PubGrub`-compatible package and range.
fn pubgrub_package(
requirement: &Requirement,
extra: Option<DistInfoName>,
) -> Result<(PubGrubPackage, Range<PubGrubVersion>)> {
match requirement.version_or_url.as_ref() {
// The requirement has no specifier (e.g., `flask`).
None => Ok((
PubGrubPackage::Package(PackageName::normalize(&requirement.name), extra, None),
Range::full(),
)),
// The requirement has a URL (e.g., `flask @ file:///path/to/flask`).
Some(VersionOrUrl::Url(url)) => Ok((
PubGrubPackage::Package(
PackageName::normalize(&requirement.name),
extra,
Some(url.clone()),
),
Range::full(),
)),
// The requirement has a specifier (e.g., `flask>=1.0`).
Some(VersionOrUrl::VersionSpecifier(specifiers)) => {
let version = specifiers
.iter()
.map(PubGrubSpecifier::try_from)
.fold_ok(Range::full(), |range, specifier| {
range.intersection(&specifier.into())
})?;
Ok((
PubGrubPackage::Package(PackageName::normalize(&requirement.name), extra, None),
version,
))
}
}
}

View file

@ -1,3 +1,6 @@
use derivative::Derivative;
use url::Url;
use puffin_package::dist_info_name::DistInfoName;
use puffin_package::package_name::PackageName;
@ -8,18 +11,26 @@ use puffin_package::package_name::PackageName;
/// 2. Uses the same strategy as pip and posy to handle extras: for each extra, we create a virtual
/// package (e.g., `black[colorama]`), and mark it as a dependency of the real package (e.g.,
/// `black`). We then discard the virtual packages at the end of the resolution process.
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
#[derive(Debug, Clone, Eq, Derivative)]
#[derivative(PartialEq, Hash)]
pub enum PubGrubPackage {
Root,
Package(PackageName, Option<DistInfoName>),
Package(
PackageName,
Option<DistInfoName>,
#[derivative(PartialEq = "ignore")]
#[derivative(PartialOrd = "ignore")]
#[derivative(Hash = "ignore")]
Option<Url>,
),
}
impl std::fmt::Display for PubGrubPackage {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
PubGrubPackage::Root => write!(f, "root"),
PubGrubPackage::Package(name, None) => write!(f, "{name}"),
PubGrubPackage::Package(name, Some(extra)) => {
PubGrubPackage::Package(name, None, ..) => write!(f, "{name}"),
PubGrubPackage::Package(name, Some(extra), ..) => {
write!(f, "{name}[{extra}]")
}
}

View file

@ -17,7 +17,7 @@ impl PubGrubPriorities {
pub(crate) fn get(&self, package: &PubGrubPackage) -> Option<PubGrubPriority> {
match package {
PubGrubPackage::Root => Some(Reverse(0)),
PubGrubPackage::Package(name, _) => self
PubGrubPackage::Package(name, _, _) => self
.0
.get(name)
.copied()

View file

@ -66,19 +66,29 @@ impl Graph {
let mut inverse =
FxHashMap::with_capacity_and_hasher(selection.len(), BuildHasherDefault::default());
for (package, version) in selection {
let PubGrubPackage::Package(package_name, None) = package else {
continue;
};
let version = Version::from(version.clone());
let file = pins
.get(package_name)
.and_then(|versions| versions.get(&version))
.unwrap()
.clone();
let pinned_package = RemoteDistribution::new(package_name.clone(), version, file);
let index = graph.add_node(pinned_package);
match package {
PubGrubPackage::Package(package_name, None, None) => {
let version = Version::from(version.clone());
let file = pins
.get(package_name)
.and_then(|versions| versions.get(&version))
.unwrap()
.clone();
let pinned_package =
RemoteDistribution::from_registry(package_name.clone(), version, file);
inverse.insert(package_name, index);
let index = graph.add_node(pinned_package);
inverse.insert(package_name, index);
}
PubGrubPackage::Package(package_name, None, Some(url)) => {
let pinned_package =
RemoteDistribution::from_url(package_name.clone(), url.clone());
let index = graph.add_node(pinned_package);
inverse.insert(package_name, index);
}
_ => {}
};
}
// Add every edge to the graph.
@ -87,17 +97,18 @@ impl Graph {
if let Kind::FromDependencyOf(self_package, self_version, dependency_package, _) =
&state.incompatibility_store[*id].kind
{
let PubGrubPackage::Package(self_package, None) = self_package else {
let PubGrubPackage::Package(self_package, None, _) = self_package else {
continue;
};
let PubGrubPackage::Package(dependency_package, None) = dependency_package
let PubGrubPackage::Package(dependency_package, None, _) = dependency_package
else {
continue;
};
if self_version.contains(version) {
let self_index = &inverse[self_package];
let dependency_index = &inverse[dependency_package];
graph.add_edge(*dependency_index, *self_index, ());
graph.update_edge(*dependency_index, *self_index, ());
}
}
}
@ -126,13 +137,21 @@ impl Graph {
nodes.sort_unstable_by_key(|(_, package)| package.name());
self.0
.node_indices()
.map(|node| Requirement {
name: self.0[node].name().to_string(),
extras: None,
version_or_url: Some(VersionOrUrl::VersionSpecifier(VersionSpecifiers::from(
VersionSpecifier::equals_version(self.0[node].version().clone()),
))),
marker: None,
.map(|node| match &self.0[node] {
RemoteDistribution::Registry(name, version, _file) => Requirement {
name: name.to_string(),
extras: None,
version_or_url: Some(VersionOrUrl::VersionSpecifier(VersionSpecifiers::from(
VersionSpecifier::equals_version(version.clone()),
))),
marker: None,
},
RemoteDistribution::Url(name, url) => Requirement {
name: name.to_string(),
extras: None,
version_or_url: Some(VersionOrUrl::Url(url.clone())),
marker: None,
},
})
.collect()
}
@ -151,7 +170,7 @@ impl std::fmt::Display for Graph {
// Print out the dependency graph.
for (index, package) in nodes {
writeln!(f, "{}=={}", package.name(), package.version())?;
writeln!(f, "{package}")?;
let mut edges = self
.0

View file

@ -15,13 +15,14 @@ use pubgrub::solver::{Incompatibility, State};
use pubgrub::type_aliases::DependencyConstraints;
use tokio::select;
use tracing::{debug, error, trace};
use url::Url;
use waitmap::WaitMap;
use distribution_filename::{SourceDistributionFilename, WheelFilename};
use pep508_rs::{MarkerEnvironment, Requirement};
use platform_tags::Tags;
use puffin_client::RegistryClient;
use puffin_distribution::RemoteDistributionRef;
use puffin_distribution::{RemoteDistributionRef, VersionOrUrl};
use puffin_package::dist_info_name::DistInfoName;
use puffin_package::package_name::PackageName;
use puffin_package::pypi_types::{File, Metadata21, SimpleJson};
@ -118,18 +119,39 @@ impl<'a, Context: BuildContext + Sync> Resolver<'a, Context> {
let root = PubGrubPackage::Root;
// Keep track of the packages for which we've requested metadata.
let mut requested_packages = FxHashSet::default();
let mut requested_versions = FxHashSet::default();
let mut in_flight = InFlight::default();
let mut pins = FxHashMap::default();
let mut priorities = PubGrubPriorities::default();
// Push all the requirements into the package sink.
for requirement in &self.requirements {
debug!("Adding root dependency: {}", requirement);
debug!("Adding root dependency: {requirement}");
let package_name = PackageName::normalize(&requirement.name);
if requested_packages.insert(package_name.clone()) {
priorities.add(package_name.clone());
request_sink.unbounded_send(Request::Package(package_name))?;
match &requirement.version_or_url {
// If this is a registry-based package, fetch the package metadata.
None | Some(pep508_rs::VersionOrUrl::VersionSpecifier(_)) => {
if in_flight.insert_package(&package_name) {
priorities.add(package_name.clone());
request_sink.unbounded_send(Request::Package(package_name))?;
}
}
// If this is a URL-based package, fetch the source.
Some(pep508_rs::VersionOrUrl::Url(url)) => {
if in_flight.insert_url(url) {
priorities.add(package_name.clone());
if WheelFilename::try_from(url).is_ok() {
request_sink.unbounded_send(Request::WheelUrl(
package_name.clone(),
url.clone(),
))?;
} else {
request_sink.unbounded_send(Request::SdistUrl(
package_name.clone(),
url.clone(),
))?;
}
}
}
}
}
@ -146,7 +168,7 @@ impl<'a, Context: BuildContext + Sync> Resolver<'a, Context> {
// Pre-visit all candidate packages, to allow metadata to be fetched in parallel.
self.pre_visit(
state.partial_solution.prioritized_packages(),
&mut requested_versions,
&mut in_flight,
request_sink,
)?;
@ -174,7 +196,7 @@ impl<'a, Context: BuildContext + Sync> Resolver<'a, Context> {
&next,
term_intersection.unwrap_positive(),
&mut pins,
&mut requested_versions,
&mut in_flight,
request_sink,
)
.await?;
@ -211,7 +233,7 @@ impl<'a, Context: BuildContext + Sync> Resolver<'a, Context> {
&version,
&mut pins,
&mut priorities,
&mut requested_packages,
&mut in_flight,
request_sink,
)
.await?
@ -254,18 +276,54 @@ impl<'a, Context: BuildContext + Sync> Resolver<'a, Context> {
}
}
/// Visit the set of candidate packages prior to selection. This allows us to fetch metadata for
/// all of the packages in parallel.
/// Visit a [`PubGrubPackage`] prior to selection. This should be called on a [`PubGrubPackage`]
/// before it is selected, to allow metadata to be fetched in parallel.
fn visit_package(
package: &PubGrubPackage,
priorities: &mut PubGrubPriorities,
in_flight: &mut InFlight,
request_sink: &futures::channel::mpsc::UnboundedSender<Request>,
) -> Result<(), ResolveError> {
match package {
PubGrubPackage::Root => {}
PubGrubPackage::Package(package_name, _extra, None) => {
// Emit a request to fetch the metadata for this package.
if in_flight.insert_package(package_name) {
priorities.add(package_name.clone());
request_sink.unbounded_send(Request::Package(package_name.clone()))?;
}
}
PubGrubPackage::Package(package_name, _extra, Some(url)) => {
// Emit a request to fetch the metadata for this package.
if in_flight.insert_url(url) {
priorities.add(package_name.clone());
if WheelFilename::try_from(url).is_ok() {
// Kick off a request to download the wheel.
request_sink
.unbounded_send(Request::WheelUrl(package_name.clone(), url.clone()))?;
} else {
// Otherwise, assume this is a source distribution.
request_sink
.unbounded_send(Request::SdistUrl(package_name.clone(), url.clone()))?;
}
}
}
}
Ok(())
}
/// Visit the set of [`PubGrubPackage`] candidates prior to selection. This allows us to fetch
/// metadata for all of the packages in parallel.
fn pre_visit(
&self,
packages: impl Iterator<Item = (&'a PubGrubPackage, &'a Range<PubGrubVersion>)>,
in_flight: &mut FxHashSet<String>,
in_flight: &mut InFlight,
request_sink: &futures::channel::mpsc::UnboundedSender<Request>,
) -> Result<(), ResolveError> {
// Iterate over the potential packages, and fetch file metadata for any of them. These
// represent our current best guesses for the versions that we _might_ select.
for (package, range) in packages {
let PubGrubPackage::Package(package_name, _) = package else {
let PubGrubPackage::Package(package_name, _extra, None) = package else {
continue;
};
@ -285,16 +343,16 @@ impl<'a, Context: BuildContext + Sync> Resolver<'a, Context> {
// Emit a request to fetch the metadata for this version.
match candidate.file {
DistributionFile::Wheel(file) => {
if in_flight.insert(file.hashes.sha256.clone()) {
if in_flight.insert_file(&file) {
request_sink.unbounded_send(Request::Wheel(file.clone()))?;
}
}
DistributionFile::Sdist(file) => {
if in_flight.insert(file.hashes.sha256.clone()) {
if in_flight.insert_file(&file) {
request_sink.unbounded_send(Request::Sdist(
file.clone(),
candidate.package_name.clone(),
candidate.version.clone().into(),
file.clone(),
))?;
}
}
@ -310,20 +368,42 @@ impl<'a, Context: BuildContext + Sync> Resolver<'a, Context> {
package: &PubGrubPackage,
range: &Range<PubGrubVersion>,
pins: &mut FxHashMap<PackageName, FxHashMap<pep440_rs::Version, File>>,
in_flight: &mut FxHashSet<String>,
in_flight: &mut InFlight,
request_sink: &futures::channel::mpsc::UnboundedSender<Request>,
) -> Result<Option<PubGrubVersion>, ResolveError> {
return match package {
PubGrubPackage::Root => Ok(Some(MIN_VERSION.clone())),
PubGrubPackage::Package(package_name, _) => {
PubGrubPackage::Package(package_name, _extra, Some(url)) => {
debug!("Searching for a compatible version of {package_name} @ {url} ({range})",);
if let Ok(wheel_filename) = WheelFilename::try_from(url) {
// If the URL is that of a wheel, extract the version.
let version = PubGrubVersion::from(wheel_filename.version);
if range.contains(&version) {
Ok(Some(version))
} else {
Ok(None)
}
} else {
// Otherwise, assume this is a source distribution.
let entry = self.index.versions.wait(url.as_str()).await.unwrap();
let metadata = entry.value();
let version = PubGrubVersion::from(metadata.version.clone());
if range.contains(&version) {
Ok(Some(version))
} else {
Ok(None)
}
}
}
PubGrubPackage::Package(package_name, _extra, None) => {
// Wait for the metadata to be available.
let entry = self.index.packages.wait(package_name).await.unwrap();
let version_map = entry.value();
debug!(
"Searching for a compatible version of {} ({})",
package_name, range,
);
debug!("Searching for a compatible version of {package_name} ({range})");
// Find a compatible version.
let Some(candidate) = self.selector.select(package_name, range, version_map) else {
@ -350,16 +430,16 @@ impl<'a, Context: BuildContext + Sync> Resolver<'a, Context> {
// Emit a request to fetch the metadata for this version.
match candidate.file {
DistributionFile::Wheel(file) => {
if in_flight.insert(file.hashes.sha256.clone()) {
if in_flight.insert_file(&file) {
request_sink.unbounded_send(Request::Wheel(file.clone()))?;
}
}
DistributionFile::Sdist(file) => {
if in_flight.insert(file.hashes.sha256.clone()) {
if in_flight.insert_file(&file) {
request_sink.unbounded_send(Request::Sdist(
file.clone(),
candidate.package_name.clone(),
candidate.version.clone().into(),
file.clone(),
))?;
}
}
@ -378,7 +458,7 @@ impl<'a, Context: BuildContext + Sync> Resolver<'a, Context> {
version: &PubGrubVersion,
pins: &mut FxHashMap<PackageName, FxHashMap<pep440_rs::Version, File>>,
priorities: &mut PubGrubPriorities,
requested_packages: &mut FxHashSet<PackageName>,
in_flight: &mut InFlight,
request_sink: &futures::channel::mpsc::UnboundedSender<Request>,
) -> Result<Dependencies, ResolveError> {
match package {
@ -391,12 +471,7 @@ impl<'a, Context: BuildContext + Sync> Resolver<'a, Context> {
iter_requirements(self.requirements.iter(), None, None, self.markers)
{
// Emit a request to fetch the metadata for this package.
if let PubGrubPackage::Package(package_name, None) = &package {
if requested_packages.insert(package_name.clone()) {
priorities.add(package_name.clone());
request_sink.unbounded_send(Request::Package(package_name.clone()))?;
}
}
Self::visit_package(&package, priorities, in_flight, request_sink)?;
// Add it to the constraints.
match constraints.entry(package) {
@ -409,10 +484,15 @@ impl<'a, Context: BuildContext + Sync> Resolver<'a, Context> {
}
}
debug!("Got constraints: {:#?}", constraints);
// If any requirements were further constrained by the user, add those constraints.
for constraint in &self.constraints {
let package =
PubGrubPackage::Package(PackageName::normalize(&constraint.name), None);
let package = PubGrubPackage::Package(
PackageName::normalize(&constraint.name),
None,
None,
);
if let Some(range) = constraints.get_mut(&package) {
*range = range.intersection(
&version_range(constraint.version_or_url.as_ref()).unwrap(),
@ -422,20 +502,17 @@ impl<'a, Context: BuildContext + Sync> Resolver<'a, Context> {
Ok(Dependencies::Known(constraints))
}
PubGrubPackage::Package(package_name, extra) => {
if let Some(extra) = extra.as_ref() {
debug!(
"Fetching dependencies for {}[{}]@{}",
package_name, extra, version
);
} else {
debug!("Fetching dependencies for {}@{}", package_name, version);
}
PubGrubPackage::Package(package_name, extra, url) => {
// Wait for the metadata to be available.
let versions = pins.get(package_name).unwrap();
let file = versions.get(version.into()).unwrap();
let entry = self.index.versions.wait(&file.hashes.sha256).await.unwrap();
let entry = match url {
Some(url) => self.index.versions.wait(url.as_str()).await.unwrap(),
None => {
let versions = pins.get(package_name).unwrap();
let file = versions.get(version.into()).unwrap();
self.index.versions.wait(&file.hashes.sha256).await.unwrap()
}
};
let metadata = entry.value();
let mut constraints =
@ -450,12 +527,7 @@ impl<'a, Context: BuildContext + Sync> Resolver<'a, Context> {
debug!("Adding transitive dependency: {package} {version}");
// Emit a request to fetch the metadata for this package.
if let PubGrubPackage::Package(package_name, None) = &package {
if requested_packages.insert(package_name.clone()) {
priorities.add(package_name.clone());
request_sink.unbounded_send(Request::Package(package_name.clone()))?;
}
}
Self::visit_package(&package, priorities, in_flight, request_sink)?;
// Add it to the constraints.
match constraints.entry(package) {
@ -470,8 +542,11 @@ impl<'a, Context: BuildContext + Sync> Resolver<'a, Context> {
// If any packages were further constrained by the user, add those constraints.
for constraint in &self.constraints {
let package =
PubGrubPackage::Package(PackageName::normalize(&constraint.name), None);
let package = PubGrubPackage::Package(
PackageName::normalize(&constraint.name),
None,
None,
);
if let Some(range) = constraints.get_mut(&package) {
*range = range.intersection(
&version_range(constraint.version_or_url.as_ref()).unwrap(),
@ -488,7 +563,7 @@ impl<'a, Context: BuildContext + Sync> Resolver<'a, Context> {
return Ok(Dependencies::Unknown);
}
constraints.insert(
PubGrubPackage::Package(package_name.clone(), None),
PubGrubPackage::Package(package_name.clone(), None, None),
Range::singleton(version.clone()),
);
}
@ -507,7 +582,7 @@ impl<'a, Context: BuildContext + Sync> Resolver<'a, Context> {
while let Some(response) = response_stream.next().await {
match response? {
Response::Package(package_name, metadata) => {
trace!("Received package metadata for {}", package_name);
trace!("Received package metadata for: {}", package_name);
// Group the distributions by version and kind, discarding any incompatible
// distributions.
@ -547,17 +622,25 @@ impl<'a, Context: BuildContext + Sync> Resolver<'a, Context> {
.insert(package_name.clone(), version_map);
}
Response::Wheel(file, metadata) => {
trace!("Received file metadata for {}", file.filename);
trace!("Received wheel metadata for: {}", file.filename);
self.index
.versions
.insert(file.hashes.sha256.clone(), metadata);
}
Response::Sdist(file, metadata) => {
trace!("Received sdist build metadata for {}", file.filename);
trace!("Received sdist metadata for: {}", file.filename);
self.index
.versions
.insert(file.hashes.sha256.clone(), metadata);
}
Response::WheelUrl(url, metadata) => {
trace!("Received remote wheel metadata for: {}", url);
self.index.versions.insert(url.to_string(), metadata);
}
Response::SdistUrl(url, metadata) => {
trace!("Received remote source distribution metadata for: {}", url);
self.index.versions.insert(url.to_string(), metadata);
}
}
}
@ -566,6 +649,7 @@ impl<'a, Context: BuildContext + Sync> Resolver<'a, Context> {
async fn process_request(&'a self, request: Request) -> Result<Response, ResolveError> {
match request {
// Fetch package metadata from the registry.
Request::Package(package_name) => {
self.client
.simple(package_name.clone())
@ -573,6 +657,7 @@ impl<'a, Context: BuildContext + Sync> Resolver<'a, Context> {
.map_err(ResolveError::Client)
.await
}
// Fetch wheel metadata from the registry.
Request::Wheel(file) => {
self.client
.file(file.clone().into())
@ -580,27 +665,28 @@ impl<'a, Context: BuildContext + Sync> Resolver<'a, Context> {
.map_err(ResolveError::Client)
.await
}
Request::Sdist(file, package_name, version) => {
// Build a source distribution from the registry, returning its metadata.
Request::Sdist(package_name, version, file) => {
let build_tree = SourceDistributionBuildTree::new(self.build_context);
let distribution = RemoteDistributionRef::new(&package_name, &version, &file);
let distribution =
RemoteDistributionRef::from_registry(&package_name, &version, &file);
let metadata = match build_tree.find_dist_info(&distribution, self.tags) {
Ok(Some(metadata)) => metadata,
Ok(None) => build_tree
.download_and_build_sdist(&distribution, self.client)
.await
.map_err(|err| ResolveError::SourceDistribution {
.map_err(|err| ResolveError::RegistryDistribution {
filename: file.filename.clone(),
err,
})?,
Err(err) => {
error!(
"Failed to read source distribution {} from cache: {}",
file.filename, err
"Failed to read source distribution {distribution} from cache: {err}",
);
build_tree
.download_and_build_sdist(&distribution, self.client)
.await
.map_err(|err| ResolveError::SourceDistribution {
.map_err(|err| ResolveError::RegistryDistribution {
filename: file.filename.clone(),
err,
})?
@ -608,13 +694,79 @@ impl<'a, Context: BuildContext + Sync> Resolver<'a, Context> {
};
Ok(Response::Sdist(file, metadata))
}
// Build a source distribution from a remote URL, returning its metadata.
Request::SdistUrl(package_name, url) => {
let build_tree = SourceDistributionBuildTree::new(self.build_context);
let distribution = RemoteDistributionRef::from_url(&package_name, &url);
let metadata = match build_tree.find_dist_info(&distribution, self.tags) {
Ok(Some(metadata)) => metadata,
Ok(None) => build_tree
.download_and_build_sdist(&distribution, self.client)
.await
.map_err(|err| ResolveError::UrlDistribution {
url: url.clone(),
err,
})?,
Err(err) => {
error!(
"Failed to read source distribution {distribution} from cache: {err}",
);
build_tree
.download_and_build_sdist(&distribution, self.client)
.await
.map_err(|err| ResolveError::UrlDistribution {
url: url.clone(),
err,
})?
}
};
Ok(Response::SdistUrl(url, metadata))
}
// Fetch wheel metadata from a remote URL.
Request::WheelUrl(package_name, url) => {
let build_tree = SourceDistributionBuildTree::new(self.build_context);
let distribution = RemoteDistributionRef::from_url(&package_name, &url);
let metadata = match build_tree.find_dist_info(&distribution, self.tags) {
Ok(Some(metadata)) => metadata,
Ok(None) => build_tree
.download_wheel(&distribution, self.client)
.await
.map_err(|err| ResolveError::UrlDistribution {
url: url.clone(),
err,
})?,
Err(err) => {
error!(
"Failed to read built distribution {distribution} from cache: {err}",
);
build_tree
.download_wheel(&distribution, self.client)
.await
.map_err(|err| ResolveError::UrlDistribution {
url: url.clone(),
err,
})?
}
};
Ok(Response::WheelUrl(url, metadata))
}
}
}
fn on_progress(&self, package: &PubGrubPackage, version: &PubGrubVersion) {
if let Some(reporter) = self.reporter.as_ref() {
if let PubGrubPackage::Package(package_name, extra) = package {
reporter.on_progress(package_name, extra.as_ref(), version.into());
match package {
PubGrubPackage::Root => {}
PubGrubPackage::Package(package_name, extra, Some(url)) => {
reporter.on_progress(package_name, extra.as_ref(), VersionOrUrl::Url(url));
}
PubGrubPackage::Package(package_name, extra, None) => {
reporter.on_progress(
package_name,
extra.as_ref(),
VersionOrUrl::Version(version.into()),
);
}
}
}
}
@ -628,12 +780,7 @@ impl<'a, Context: BuildContext + Sync> Resolver<'a, Context> {
pub trait Reporter: Send + Sync {
/// Callback to invoke when a dependency is resolved.
fn on_progress(
&self,
name: &PackageName,
extra: Option<&DistInfoName>,
version: &pep440_rs::Version,
);
fn on_progress(&self, name: &PackageName, extra: Option<&DistInfoName>, version: VersionOrUrl);
/// Callback to invoke when the resolution is complete.
fn on_complete(&self);
@ -644,30 +791,64 @@ pub trait Reporter: Send + Sync {
enum Request {
/// A request to fetch the metadata for a package.
Package(PackageName),
/// A request to fetch and build the source distribution for a specific package version
Sdist(SdistFile, PackageName, pep440_rs::Version),
/// A request to fetch the metadata for a specific version of a package.
/// A request to fetch wheel metadata from a registry.
Wheel(WheelFile),
/// A request to fetch source distribution metadata from a registry.
Sdist(PackageName, pep440_rs::Version, SdistFile),
/// A request to fetch wheel metadata from a remote URL.
WheelUrl(PackageName, Url),
/// A request to fetch source distribution metadata from a remote URL.
SdistUrl(PackageName, Url),
}
#[derive(Debug)]
enum Response {
/// The returned metadata for a package.
/// The returned metadata for a package hosted on a registry.
Package(PackageName, SimpleJson),
/// The returned metadata for a specific version of a package.
/// The returned metadata for a wheel hosted on a registry.
Wheel(WheelFile, Metadata21),
/// The returned metadata for an sdist build.
/// The returned metadata for a source distribution hosted on a registry.
Sdist(SdistFile, Metadata21),
/// The returned metadata for a wheel hosted on a remote URL.
WheelUrl(Url, Metadata21),
/// The returned metadata for a source distribution hosted on a remote URL.
SdistUrl(Url, Metadata21),
}
pub(crate) type VersionMap = BTreeMap<PubGrubVersion, DistributionFile>;
/// In-memory index of in-flight network requests. Any request in an [`InFlight`] state will be
/// eventually be inserted into an [`Index`].
#[derive(Debug, Default)]
struct InFlight {
/// The set of requested [`PackageName`]s.
packages: FxHashSet<PackageName>,
/// The set of requested registry-based files, represented by their SHAs.
files: FxHashSet<String>,
/// The set of requested URLs.
urls: FxHashSet<Url>,
}
impl InFlight {
fn insert_package(&mut self, package_name: &PackageName) -> bool {
self.packages.insert(package_name.clone())
}
fn insert_file(&mut self, file: &File) -> bool {
self.files.insert(file.hashes.sha256.clone())
}
fn insert_url(&mut self, url: &Url) -> bool {
self.urls.insert(url.clone())
}
}
/// In-memory index of package metadata.
struct Index {
/// A map from package name to the metadata for that package.
packages: WaitMap<PackageName, VersionMap>,
/// A map from wheel SHA to the metadata for that wheel.
/// A map from wheel SHA or URL to the metadata for that wheel.
versions: WaitMap<String, Metadata21>,
}

View file

@ -6,7 +6,6 @@ use fs_err::tokio as fs;
use tempfile::tempdir;
use tokio_util::compat::FuturesAsyncReadCompatExt;
use tracing::debug;
use url::Url;
use zip::ZipArchive;
use distribution_filename::WheelFilename;
@ -18,6 +17,8 @@ use puffin_traits::BuildContext;
const BUILT_WHEELS_CACHE: &str = "built-wheels-v0";
const REMOTE_WHEELS_CACHE: &str = "remote-wheels-v0";
/// Stores wheels built from source distributions. We need to keep those separate from the regular
/// wheel cache since a wheel with the same name may be uploaded after we made our build and in that
/// case the hashes would clash.
@ -47,36 +48,74 @@ impl<'a, T: BuildContext> SourceDistributionBuildTree<'a, T> {
distribution: &RemoteDistributionRef<'_>,
client: &RegistryClient,
) -> Result<Metadata21> {
debug!("Building: {}", distribution.file().filename);
let url = Url::parse(&distribution.file().url)?;
debug!("Building: {distribution}");
let url = distribution.url()?;
let reader = client.stream_external(&url).await?;
let mut reader = tokio::io::BufReader::new(reader.compat());
let temp_dir = tempdir()?;
let sdist_dir = temp_dir.path().join("sdist");
tokio::fs::create_dir(&sdist_dir).await?;
let sdist_file = sdist_dir.join(&distribution.file().filename);
// Download the source distribution.
let sdist_filename = distribution.filename()?;
let sdist_file = temp_dir.path().join(sdist_filename.as_ref());
let mut writer = tokio::fs::File::create(&sdist_file).await?;
tokio::io::copy(&mut reader, &mut writer).await?;
// Create a directory for the wheel.
let wheel_dir = self.0.cache().map_or_else(
|| temp_dir.path().join(BUILT_WHEELS_CACHE),
|cache| cache.join(BUILT_WHEELS_CACHE).join(distribution.id()),
);
fs::create_dir_all(&wheel_dir).await?;
// Build the wheel.
let disk_filename = self
.0
.build_source_distribution(&sdist_file, &wheel_dir)
.await?;
// Read the metadata from the wheel.
let wheel = CachedWheel {
path: wheel_dir.join(&disk_filename),
filename: WheelFilename::from_str(&disk_filename)?,
};
let metadata21 = read_dist_info(&wheel)?;
debug!("Finished building: {}", distribution.file().filename);
debug!("Finished building: {distribution}");
Ok(metadata21)
}
pub(crate) async fn download_wheel(
&self,
distribution: &RemoteDistributionRef<'_>,
client: &RegistryClient,
) -> Result<Metadata21> {
debug!("Downloading: {distribution}");
let url = distribution.url()?;
let reader = client.stream_external(&url).await?;
let mut reader = tokio::io::BufReader::new(reader.compat());
let temp_dir = tempdir()?;
// Create a directory for the wheel.
let wheel_dir = self.0.cache().map_or_else(
|| temp_dir.path().join(REMOTE_WHEELS_CACHE),
|cache| cache.join(REMOTE_WHEELS_CACHE).join(distribution.id()),
);
fs::create_dir_all(&wheel_dir).await?;
// Download the wheel.
let wheel_filename = distribution.filename()?;
let wheel_file = wheel_dir.join(wheel_filename.as_ref());
let mut writer = tokio::fs::File::create(&wheel_file).await?;
tokio::io::copy(&mut reader, &mut writer).await?;
// Read the metadata from the wheel.
let wheel = CachedWheel {
path: wheel_file,
filename: WheelFilename::from_str(&wheel_filename)?,
};
let metadata21 = read_dist_info(&wheel)?;
debug!("Finished downloading: {distribution}");
Ok(metadata21)
}

View file

@ -12,7 +12,7 @@ use fxhash::FxHashMap;
use tracing::debug;
use distribution_filename::WheelFilename;
use pep508_rs::Requirement;
use pep508_rs::{Requirement, VersionOrUrl};
use platform_tags::Tags;
use puffin_client::RegistryClient;
use puffin_distribution::RemoteDistribution;
@ -62,13 +62,11 @@ impl<'a> WheelFinder<'a> {
.map(|request: Request| match request {
Request::Package(requirement) => Either::Left(
self.client
// TODO(charlie): Remove this clone.
.simple(requirement.name.clone())
.map_ok(move |metadata| Response::Package(requirement, metadata)),
),
Request::Version(requirement, file) => Either::Right(
self.client
// TODO(charlie): Remove this clone.
.file(file.clone())
.map_ok(move |metadata| Response::Version(requirement, file, metadata)),
),
@ -76,15 +74,33 @@ impl<'a> WheelFinder<'a> {
.buffer_unordered(32)
.ready_chunks(32);
// Push all the requirements into the package sink.
for requirement in requirements {
package_sink.unbounded_send(Request::Package(requirement.clone()))?;
}
// Resolve the requirements.
let mut resolution: FxHashMap<PackageName, RemoteDistribution> =
FxHashMap::with_capacity_and_hasher(requirements.len(), BuildHasherDefault::default());
// Push all the requirements into the package sink.
for requirement in requirements {
match requirement.version_or_url.as_ref() {
None | Some(VersionOrUrl::VersionSpecifier(_)) => {
package_sink.unbounded_send(Request::Package(requirement.clone()))?;
}
Some(VersionOrUrl::Url(url)) => {
let package_name = PackageName::normalize(&requirement.name);
let package = RemoteDistribution::from_url(package_name.clone(), url.clone());
resolution.insert(package_name, package);
}
}
}
// If all the dependencies were already resolved, we're done.
if resolution.len() == requirements.len() {
if let Some(reporter) = self.reporter.as_ref() {
reporter.on_complete();
}
return Ok(Resolution::new(resolution));
}
// Otherwise, wait for the package stream to complete.
while let Some(chunk) = package_stream.next().await {
for result in chunk {
let result: Response = result?;
@ -114,7 +130,7 @@ impl<'a> WheelFinder<'a> {
metadata.name, metadata.version, file.filename
);
let package = RemoteDistribution::new(
let package = RemoteDistribution::from_registry(
PackageName::normalize(&metadata.name),
metadata.version,
file,