Add support for subdirectories in URL dependencies (#312)

Closes https://github.com/astral-sh/puffin/issues/307.
This commit is contained in:
Charlie Marsh 2023-11-03 12:28:38 -07:00 committed by GitHub
parent cbfd6af125
commit edce4ccb24
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
9 changed files with 155 additions and 29 deletions

View file

@ -121,19 +121,25 @@ impl SourceDistributionBuilder {
/// contents from an archive if necessary.
pub async fn setup(
sdist: &Path,
subdirectory: Option<&Path>,
interpreter_info: &InterpreterInfo,
build_context: &impl BuildContext,
) -> Result<SourceDistributionBuilder, Error> {
let temp_dir = tempdir()?;
// TODO(konstin): Parse and verify filenames
let source_tree = if fs::metadata(sdist)?.is_dir() {
let source_root = if fs::metadata(sdist)?.is_dir() {
sdist.to_path_buf()
} else {
debug!("Unpacking for build: {}", sdist.display());
let extracted = temp_dir.path().join("extracted");
extract_archive(sdist, &extracted)?
};
let source_tree = if let Some(subdir) = subdirectory {
source_root.join(subdir)
} else {
source_root
};
// Check if we have a PEP 517 build, a legacy setup.py, or an edge case
let build_system = if source_tree.join("pyproject.toml").is_file() {

View file

@ -849,6 +849,42 @@ fn compile_git_refs_https_dependency() -> Result<()> {
Ok(())
}
/// Resolve a specific Git dependency with a subdirectory.
#[test]
fn compile_git_subdirectory_dependency() -> Result<()> {
let temp_dir = assert_fs::TempDir::new()?;
let cache_dir = assert_fs::TempDir::new()?;
let venv = temp_dir.child(".venv");
Command::new(get_cargo_bin(BIN_NAME))
.arg("venv")
.arg(venv.as_os_str())
.arg("--cache-dir")
.arg(cache_dir.path())
.current_dir(&temp_dir)
.assert()
.success();
venv.assert(predicates::path::is_dir());
let requirements_in = temp_dir.child("requirements.in");
requirements_in.touch()?;
requirements_in.write_str("hypothesis @ git+https://github.com/Zac-HD/hypothesis.git@ablation-mutation#subdirectory=hypothesis-python")?;
insta::with_settings!({
filters => INSTA_FILTERS.to_vec()
}, {
assert_cmd_snapshot!(Command::new(get_cargo_bin(BIN_NAME))
.arg("pip-compile")
.arg("requirements.in")
.arg("--cache-dir")
.arg(cache_dir.path())
.env("VIRTUAL_ENV", venv.as_os_str())
.current_dir(&temp_dir));
});
Ok(())
}
/// Resolve two packages from a `requirements.in` file with the same Git HTTPS dependency.
#[test]
fn compile_git_concurrent_access() -> Result<()> {

View file

@ -0,0 +1,26 @@
---
source: crates/puffin-cli/tests/pip_compile.rs
info:
program: puffin
args:
- pip-compile
- requirements.in
- "--cache-dir"
- /var/folders/nt/6gf2v7_s3k13zq_t3944rwz40000gn/T/.tmpfUuY8I
env:
VIRTUAL_ENV: /var/folders/nt/6gf2v7_s3k13zq_t3944rwz40000gn/T/.tmpIJnuRB/.venv
---
success: true
exit_code: 0
----- stdout -----
# This file was autogenerated by Puffin v0.0.1 via the following command:
# [BIN_PATH] pip-compile requirements.in --cache-dir [CACHE_DIR]
attrs==23.1.0
# via hypothesis
hypothesis @ git+https://github.com/Zac-HD/hypothesis.git@3812dd1e1d5bc7a4e76399556988b7adaea7751d#subdirectory=hypothesis-python
sortedcontainers==2.4.0
# via hypothesis
----- stderr -----
Resolved 3 packages in [TIME]

View file

@ -21,7 +21,10 @@ pub(crate) struct BuildArgs {
/// Directory to story the built wheel in
#[clap(short, long)]
wheels: Option<PathBuf>,
/// The source distribution to build, either a directory or a source archive.
sdist: PathBuf,
/// The subdirectory to build within the source distribution.
subdirectory: Option<PathBuf>,
}
/// Build a source distribution to a wheel
@ -49,9 +52,13 @@ pub(crate) async fn build(args: BuildArgs) -> Result<PathBuf> {
venv.interpreter_info().clone(),
fs::canonicalize(venv.python_executable())?,
);
let builder =
SourceDistributionBuilder::setup(&args.sdist, venv.interpreter_info(), &build_dispatch)
.await?;
let builder = SourceDistributionBuilder::setup(
&args.sdist,
args.subdirectory.as_deref(),
venv.interpreter_info(),
&build_dispatch,
)
.await?;
let wheel = builder.build(&wheel_dir)?;
Ok(wheel_dir.join(wheel))
}

View file

@ -197,11 +197,13 @@ impl BuildContext for BuildDispatch {
fn build_source_distribution<'a>(
&'a self,
sdist: &'a Path,
subdirectory: Option<&'a Path>,
wheel_dir: &'a Path,
) -> Pin<Box<dyn Future<Output = Result<String>> + Send + 'a>> {
Box::pin(async move {
let builder =
SourceDistributionBuilder::setup(sdist, &self.interpreter_info, self).await?;
SourceDistributionBuilder::setup(sdist, subdirectory, &self.interpreter_info, self)
.await?;
Ok(builder.build(wheel_dir)?)
})
}

View file

@ -1,4 +1,4 @@
use std::borrow::Cow;
use std::path::PathBuf;
use anyhow::{Error, Result};
use url::Url;
@ -9,11 +9,12 @@ use puffin_git::Git;
/// The source of a distribution.
#[derive(Debug)]
pub(crate) enum Source<'a> {
/// The distribution is available at a remote URL. This could be a dedicated URL, or a URL
/// served by a registry, like PyPI.
Url(Cow<'a, Url>),
/// The distribution is available at a URL in a registry, like PyPI.
RegistryUrl(Url),
/// The distribution is available at an arbitrary remote URL, like a GitHub Release.
RemoteUrl(&'a Url, Option<PathBuf>),
/// The distribution is available in a remote Git repository.
Git(Git),
Git(Git, Option<PathBuf>),
}
impl<'a> TryFrom<&'a RemoteDistributionRef<'_>> for Source<'a> {
@ -23,18 +24,27 @@ impl<'a> TryFrom<&'a RemoteDistributionRef<'_>> for Source<'a> {
match value {
// If a distribution is hosted on a registry, it must be available at a URL.
RemoteDistributionRef::Registry(_, _, file) => {
let url = Url::parse(&file.url)?;
Ok(Self::Url(Cow::Owned(url)))
Ok(Self::RegistryUrl(Url::parse(&file.url)?))
}
// If a distribution is specified via a direct URL, it could be a URL to a hosted file,
// or a URL to a Git repository.
RemoteDistributionRef::Url(_, url) => {
// If the URL points to a subdirectory, extract it, as in:
// `https://git.example.com/MyProject.git@v1.0#subdirectory=pkg_dir`
// `https://git.example.com/MyProject.git@v1.0#egg=pkg&subdirectory=pkg_dir`
let subdirectory = url.fragment().and_then(|fragment| {
fragment.split('&').find_map(|fragment| {
fragment.strip_prefix("subdirectory=").map(PathBuf::from)
})
});
if let Some(url) = url.as_str().strip_prefix("git+") {
let url = Url::parse(url)?;
let git = Git::try_from(url)?;
Ok(Self::Git(git))
Ok(Self::Git(git, subdirectory))
} else {
Ok(Self::Url(Cow::Borrowed(url)))
Ok(Self::RemoteUrl(url, subdirectory))
}
}
}

View file

@ -51,10 +51,11 @@ impl<'a, T: BuildContext> SourceDistributionFetcher<'a, T> {
) -> Result<Metadata21> {
debug!("Building: {distribution}");
// This could extract the subdirectory.
let source = Source::try_from(distribution)?;
let sdist_file = match source {
Source::Url(url) => {
debug!("Fetching source distribution from: {url}");
let (sdist_file, subdirectory) = match source {
Source::RegistryUrl(url) => {
debug!("Fetching source distribution from registry: {url}");
let reader = client.stream_external(&url).await?;
let mut reader = tokio::io::BufReader::new(reader.compat());
@ -66,16 +67,36 @@ impl<'a, T: BuildContext> SourceDistributionFetcher<'a, T> {
let mut writer = tokio::fs::File::create(&sdist_file).await?;
tokio::io::copy(&mut reader, &mut writer).await?;
sdist_file
// Registry dependencies can't specify a subdirectory.
let subdirectory = None;
(sdist_file, subdirectory)
}
Source::Git(git) => {
debug!("Fetching source distribution from: {git}");
Source::RemoteUrl(url, subdirectory) => {
debug!("Fetching source distribution from URL: {url}");
let reader = client.stream_external(url).await?;
let mut reader = tokio::io::BufReader::new(reader.compat());
// Download the source distribution.
let temp_dir = tempdir_in(self.0.cache())?.into_path();
let sdist_filename = distribution.filename()?;
let sdist_file = temp_dir.join(sdist_filename.as_ref());
let mut writer = tokio::fs::File::create(&sdist_file).await?;
tokio::io::copy(&mut reader, &mut writer).await?;
(sdist_file, subdirectory)
}
Source::Git(git, subdirectory) => {
debug!("Fetching source distribution from Git: {git}");
let git_dir = self.0.cache().join(GIT_CACHE);
let source = GitSource::new(git, git_dir);
tokio::task::spawn_blocking(move || source.fetch())
let sdist_file = tokio::task::spawn_blocking(move || source.fetch())
.await??
.into()
.into();
(sdist_file, subdirectory)
}
};
@ -90,7 +111,7 @@ impl<'a, T: BuildContext> SourceDistributionFetcher<'a, T> {
// Build the wheel.
let disk_filename = self
.0
.build_source_distribution(&sdist_file, &wheel_dir)
.build_source_distribution(&sdist_file, subdirectory.as_deref(), &wheel_dir)
.await?;
// Read the metadata from the wheel.
@ -108,23 +129,39 @@ impl<'a, T: BuildContext> SourceDistributionFetcher<'a, T> {
///
/// For example, given a Git dependency with a reference to a branch or tag, return a URL
/// with a precise reference to the current commit of that branch or tag.
///
/// This method takes into account various normalizations that are independent from the Git
/// layer. For example: removing `#subdirectory=pkg_dir`-like fragments, and removing `git+`
/// prefix kinds.
pub(crate) async fn precise(&self, url: &Url) -> Result<Option<Url>> {
// Extract the subdirectory.
let subdirectory = url.fragment().and_then(|fragment| {
fragment
.split('&')
.find_map(|fragment| fragment.strip_prefix("subdirectory="))
});
let Some(url) = url.as_str().strip_prefix("git+") else {
return Ok(None);
};
// Fetch the precise SHA of the Git reference (which could be a branch, a tag, a partial
// commit, etc.).
let url = Url::parse(url)?;
let git = Git::try_from(url)?;
let git_dir = self.0.cache().join(GIT_CACHE);
let source = GitSource::new(git, git_dir);
let git = Git::try_from(Url::parse(url)?)?;
let dir = self.0.cache().join(GIT_CACHE);
let source = GitSource::new(git, dir);
let precise = tokio::task::spawn_blocking(move || source.fetch()).await??;
let git = Git::from(precise);
// TODO(charlie): Avoid this double-parse by encoding the source kind separately from the
// URL.
let url = Url::from(Git::from(precise));
let url = Url::parse(&format!("{}{}", "git+", url.as_str()))?;
let mut url = Url::parse(&format!("{}{}", "git+", Url::from(git).as_str()))?;
// Re-add the subdirectory fragment.
if let Some(subdirectory) = subdirectory {
url.set_fragment(Some(&format!("subdirectory={subdirectory}")));
}
Ok(Some(url))
}
}

View file

@ -52,6 +52,7 @@ impl BuildContext for DummyContext {
fn build_source_distribution<'a>(
&'a self,
_sdist: &'a Path,
_subdirectory: Option<&'a Path>,
_wheel_dir: &'a Path,
) -> Pin<Box<dyn Future<Output = Result<String>> + Send + 'a>> {
panic!("The test should not need to build source distributions")

View file

@ -74,6 +74,7 @@ pub trait BuildContext {
fn build_source_distribution<'a>(
&'a self,
sdist: &'a Path,
subdirectory: Option<&'a Path>,
wheel_dir: &'a Path,
) -> Pin<Box<dyn Future<Output = anyhow::Result<String>> + Send + 'a>>;
}