diff --git a/crates/puffin-build/src/lib.rs b/crates/puffin-build/src/lib.rs index fe0954cc7..bee55e261 100644 --- a/crates/puffin-build/src/lib.rs +++ b/crates/puffin-build/src/lib.rs @@ -121,19 +121,25 @@ impl SourceDistributionBuilder { /// contents from an archive if necessary. pub async fn setup( sdist: &Path, + subdirectory: Option<&Path>, interpreter_info: &InterpreterInfo, build_context: &impl BuildContext, ) -> Result { let temp_dir = tempdir()?; // TODO(konstin): Parse and verify filenames - let source_tree = if fs::metadata(sdist)?.is_dir() { + let source_root = if fs::metadata(sdist)?.is_dir() { sdist.to_path_buf() } else { debug!("Unpacking for build: {}", sdist.display()); let extracted = temp_dir.path().join("extracted"); extract_archive(sdist, &extracted)? }; + let source_tree = if let Some(subdir) = subdirectory { + source_root.join(subdir) + } else { + source_root + }; // Check if we have a PEP 517 build, a legacy setup.py, or an edge case let build_system = if source_tree.join("pyproject.toml").is_file() { diff --git a/crates/puffin-cli/tests/pip_compile.rs b/crates/puffin-cli/tests/pip_compile.rs index c54e773c5..f722bb1d9 100644 --- a/crates/puffin-cli/tests/pip_compile.rs +++ b/crates/puffin-cli/tests/pip_compile.rs @@ -849,6 +849,42 @@ fn compile_git_refs_https_dependency() -> Result<()> { Ok(()) } +/// Resolve a specific Git dependency with a subdirectory. +#[test] +fn compile_git_subdirectory_dependency() -> Result<()> { + let temp_dir = assert_fs::TempDir::new()?; + let cache_dir = assert_fs::TempDir::new()?; + let venv = temp_dir.child(".venv"); + + Command::new(get_cargo_bin(BIN_NAME)) + .arg("venv") + .arg(venv.as_os_str()) + .arg("--cache-dir") + .arg(cache_dir.path()) + .current_dir(&temp_dir) + .assert() + .success(); + venv.assert(predicates::path::is_dir()); + + let requirements_in = temp_dir.child("requirements.in"); + requirements_in.touch()?; + requirements_in.write_str("hypothesis @ git+https://github.com/Zac-HD/hypothesis.git@ablation-mutation#subdirectory=hypothesis-python")?; + + insta::with_settings!({ + filters => INSTA_FILTERS.to_vec() + }, { + assert_cmd_snapshot!(Command::new(get_cargo_bin(BIN_NAME)) + .arg("pip-compile") + .arg("requirements.in") + .arg("--cache-dir") + .arg(cache_dir.path()) + .env("VIRTUAL_ENV", venv.as_os_str()) + .current_dir(&temp_dir)); + }); + + Ok(()) +} + /// Resolve two packages from a `requirements.in` file with the same Git HTTPS dependency. #[test] fn compile_git_concurrent_access() -> Result<()> { diff --git a/crates/puffin-cli/tests/snapshots/pip_compile__compile_git_subdirectory_dependency.snap b/crates/puffin-cli/tests/snapshots/pip_compile__compile_git_subdirectory_dependency.snap new file mode 100644 index 000000000..3e4e6520e --- /dev/null +++ b/crates/puffin-cli/tests/snapshots/pip_compile__compile_git_subdirectory_dependency.snap @@ -0,0 +1,26 @@ +--- +source: crates/puffin-cli/tests/pip_compile.rs +info: + program: puffin + args: + - pip-compile + - requirements.in + - "--cache-dir" + - /var/folders/nt/6gf2v7_s3k13zq_t3944rwz40000gn/T/.tmpfUuY8I + env: + VIRTUAL_ENV: /var/folders/nt/6gf2v7_s3k13zq_t3944rwz40000gn/T/.tmpIJnuRB/.venv +--- +success: true +exit_code: 0 +----- stdout ----- +# This file was autogenerated by Puffin v0.0.1 via the following command: +# [BIN_PATH] pip-compile requirements.in --cache-dir [CACHE_DIR] +attrs==23.1.0 + # via hypothesis +hypothesis @ git+https://github.com/Zac-HD/hypothesis.git@3812dd1e1d5bc7a4e76399556988b7adaea7751d#subdirectory=hypothesis-python +sortedcontainers==2.4.0 + # via hypothesis + +----- stderr ----- +Resolved 3 packages in [TIME] + diff --git a/crates/puffin-dev/src/build.rs b/crates/puffin-dev/src/build.rs index 7bc0aa0b0..1ca643dee 100644 --- a/crates/puffin-dev/src/build.rs +++ b/crates/puffin-dev/src/build.rs @@ -21,7 +21,10 @@ pub(crate) struct BuildArgs { /// Directory to story the built wheel in #[clap(short, long)] wheels: Option, + /// The source distribution to build, either a directory or a source archive. sdist: PathBuf, + /// The subdirectory to build within the source distribution. + subdirectory: Option, } /// Build a source distribution to a wheel @@ -49,9 +52,13 @@ pub(crate) async fn build(args: BuildArgs) -> Result { venv.interpreter_info().clone(), fs::canonicalize(venv.python_executable())?, ); - let builder = - SourceDistributionBuilder::setup(&args.sdist, venv.interpreter_info(), &build_dispatch) - .await?; + let builder = SourceDistributionBuilder::setup( + &args.sdist, + args.subdirectory.as_deref(), + venv.interpreter_info(), + &build_dispatch, + ) + .await?; let wheel = builder.build(&wheel_dir)?; Ok(wheel_dir.join(wheel)) } diff --git a/crates/puffin-dispatch/src/lib.rs b/crates/puffin-dispatch/src/lib.rs index b4dd21f3d..86cd10e32 100644 --- a/crates/puffin-dispatch/src/lib.rs +++ b/crates/puffin-dispatch/src/lib.rs @@ -197,11 +197,13 @@ impl BuildContext for BuildDispatch { fn build_source_distribution<'a>( &'a self, sdist: &'a Path, + subdirectory: Option<&'a Path>, wheel_dir: &'a Path, ) -> Pin> + Send + 'a>> { Box::pin(async move { let builder = - SourceDistributionBuilder::setup(sdist, &self.interpreter_info, self).await?; + SourceDistributionBuilder::setup(sdist, subdirectory, &self.interpreter_info, self) + .await?; Ok(builder.build(wheel_dir)?) }) } diff --git a/crates/puffin-resolver/src/distribution/source.rs b/crates/puffin-resolver/src/distribution/source.rs index e8a235381..3f602395c 100644 --- a/crates/puffin-resolver/src/distribution/source.rs +++ b/crates/puffin-resolver/src/distribution/source.rs @@ -1,4 +1,4 @@ -use std::borrow::Cow; +use std::path::PathBuf; use anyhow::{Error, Result}; use url::Url; @@ -9,11 +9,12 @@ use puffin_git::Git; /// The source of a distribution. #[derive(Debug)] pub(crate) enum Source<'a> { - /// The distribution is available at a remote URL. This could be a dedicated URL, or a URL - /// served by a registry, like PyPI. - Url(Cow<'a, Url>), + /// The distribution is available at a URL in a registry, like PyPI. + RegistryUrl(Url), + /// The distribution is available at an arbitrary remote URL, like a GitHub Release. + RemoteUrl(&'a Url, Option), /// The distribution is available in a remote Git repository. - Git(Git), + Git(Git, Option), } impl<'a> TryFrom<&'a RemoteDistributionRef<'_>> for Source<'a> { @@ -23,18 +24,27 @@ impl<'a> TryFrom<&'a RemoteDistributionRef<'_>> for Source<'a> { match value { // If a distribution is hosted on a registry, it must be available at a URL. RemoteDistributionRef::Registry(_, _, file) => { - let url = Url::parse(&file.url)?; - Ok(Self::Url(Cow::Owned(url))) + Ok(Self::RegistryUrl(Url::parse(&file.url)?)) } + // If a distribution is specified via a direct URL, it could be a URL to a hosted file, // or a URL to a Git repository. RemoteDistributionRef::Url(_, url) => { + // If the URL points to a subdirectory, extract it, as in: + // `https://git.example.com/MyProject.git@v1.0#subdirectory=pkg_dir` + // `https://git.example.com/MyProject.git@v1.0#egg=pkg&subdirectory=pkg_dir` + let subdirectory = url.fragment().and_then(|fragment| { + fragment.split('&').find_map(|fragment| { + fragment.strip_prefix("subdirectory=").map(PathBuf::from) + }) + }); + if let Some(url) = url.as_str().strip_prefix("git+") { let url = Url::parse(url)?; let git = Git::try_from(url)?; - Ok(Self::Git(git)) + Ok(Self::Git(git, subdirectory)) } else { - Ok(Self::Url(Cow::Borrowed(url))) + Ok(Self::RemoteUrl(url, subdirectory)) } } } diff --git a/crates/puffin-resolver/src/distribution/source_distribution.rs b/crates/puffin-resolver/src/distribution/source_distribution.rs index 09be0a765..12643ca3c 100644 --- a/crates/puffin-resolver/src/distribution/source_distribution.rs +++ b/crates/puffin-resolver/src/distribution/source_distribution.rs @@ -51,10 +51,11 @@ impl<'a, T: BuildContext> SourceDistributionFetcher<'a, T> { ) -> Result { debug!("Building: {distribution}"); + // This could extract the subdirectory. let source = Source::try_from(distribution)?; - let sdist_file = match source { - Source::Url(url) => { - debug!("Fetching source distribution from: {url}"); + let (sdist_file, subdirectory) = match source { + Source::RegistryUrl(url) => { + debug!("Fetching source distribution from registry: {url}"); let reader = client.stream_external(&url).await?; let mut reader = tokio::io::BufReader::new(reader.compat()); @@ -66,16 +67,36 @@ impl<'a, T: BuildContext> SourceDistributionFetcher<'a, T> { let mut writer = tokio::fs::File::create(&sdist_file).await?; tokio::io::copy(&mut reader, &mut writer).await?; - sdist_file + // Registry dependencies can't specify a subdirectory. + let subdirectory = None; + + (sdist_file, subdirectory) } - Source::Git(git) => { - debug!("Fetching source distribution from: {git}"); + Source::RemoteUrl(url, subdirectory) => { + debug!("Fetching source distribution from URL: {url}"); + + let reader = client.stream_external(url).await?; + let mut reader = tokio::io::BufReader::new(reader.compat()); + + // Download the source distribution. + let temp_dir = tempdir_in(self.0.cache())?.into_path(); + let sdist_filename = distribution.filename()?; + let sdist_file = temp_dir.join(sdist_filename.as_ref()); + let mut writer = tokio::fs::File::create(&sdist_file).await?; + tokio::io::copy(&mut reader, &mut writer).await?; + + (sdist_file, subdirectory) + } + Source::Git(git, subdirectory) => { + debug!("Fetching source distribution from Git: {git}"); let git_dir = self.0.cache().join(GIT_CACHE); let source = GitSource::new(git, git_dir); - tokio::task::spawn_blocking(move || source.fetch()) + let sdist_file = tokio::task::spawn_blocking(move || source.fetch()) .await?? - .into() + .into(); + + (sdist_file, subdirectory) } }; @@ -90,7 +111,7 @@ impl<'a, T: BuildContext> SourceDistributionFetcher<'a, T> { // Build the wheel. let disk_filename = self .0 - .build_source_distribution(&sdist_file, &wheel_dir) + .build_source_distribution(&sdist_file, subdirectory.as_deref(), &wheel_dir) .await?; // Read the metadata from the wheel. @@ -108,23 +129,39 @@ impl<'a, T: BuildContext> SourceDistributionFetcher<'a, T> { /// /// For example, given a Git dependency with a reference to a branch or tag, return a URL /// with a precise reference to the current commit of that branch or tag. + /// + /// This method takes into account various normalizations that are independent from the Git + /// layer. For example: removing `#subdirectory=pkg_dir`-like fragments, and removing `git+` + /// prefix kinds. pub(crate) async fn precise(&self, url: &Url) -> Result> { + // Extract the subdirectory. + let subdirectory = url.fragment().and_then(|fragment| { + fragment + .split('&') + .find_map(|fragment| fragment.strip_prefix("subdirectory=")) + }); + let Some(url) = url.as_str().strip_prefix("git+") else { return Ok(None); }; // Fetch the precise SHA of the Git reference (which could be a branch, a tag, a partial // commit, etc.). - let url = Url::parse(url)?; - let git = Git::try_from(url)?; - let git_dir = self.0.cache().join(GIT_CACHE); - let source = GitSource::new(git, git_dir); + let git = Git::try_from(Url::parse(url)?)?; + let dir = self.0.cache().join(GIT_CACHE); + let source = GitSource::new(git, dir); let precise = tokio::task::spawn_blocking(move || source.fetch()).await??; + let git = Git::from(precise); // TODO(charlie): Avoid this double-parse by encoding the source kind separately from the // URL. - let url = Url::from(Git::from(precise)); - let url = Url::parse(&format!("{}{}", "git+", url.as_str()))?; + let mut url = Url::parse(&format!("{}{}", "git+", Url::from(git).as_str()))?; + + // Re-add the subdirectory fragment. + if let Some(subdirectory) = subdirectory { + url.set_fragment(Some(&format!("subdirectory={subdirectory}"))); + } + Ok(Some(url)) } } diff --git a/crates/puffin-resolver/tests/resolver.rs b/crates/puffin-resolver/tests/resolver.rs index 282868e75..de3ebbc01 100644 --- a/crates/puffin-resolver/tests/resolver.rs +++ b/crates/puffin-resolver/tests/resolver.rs @@ -52,6 +52,7 @@ impl BuildContext for DummyContext { fn build_source_distribution<'a>( &'a self, _sdist: &'a Path, + _subdirectory: Option<&'a Path>, _wheel_dir: &'a Path, ) -> Pin> + Send + 'a>> { panic!("The test should not need to build source distributions") diff --git a/crates/puffin-traits/src/lib.rs b/crates/puffin-traits/src/lib.rs index a53e7a3fb..8d7fe66c8 100644 --- a/crates/puffin-traits/src/lib.rs +++ b/crates/puffin-traits/src/lib.rs @@ -74,6 +74,7 @@ pub trait BuildContext { fn build_source_distribution<'a>( &'a self, sdist: &'a Path, + subdirectory: Option<&'a Path>, wheel_dir: &'a Path, ) -> Pin> + Send + 'a>>; }