diff --git a/crates/puffin-resolver/src/distribution/cached_wheel.rs b/crates/puffin-resolver/src/distribution/cached_wheel.rs new file mode 100644 index 000000000..496e39007 --- /dev/null +++ b/crates/puffin-resolver/src/distribution/cached_wheel.rs @@ -0,0 +1,60 @@ +use std::path::{Path, PathBuf}; +use std::str::FromStr; + +use anyhow::Result; +use zip::ZipArchive; + +use distribution_filename::WheelFilename; +use platform_tags::Tags; +use puffin_distribution::RemoteDistributionRef; +use puffin_package::pypi_types::Metadata21; + +/// A cached wheel built from a remote source. +#[derive(Debug)] +pub(super) struct CachedWheel { + path: PathBuf, + filename: WheelFilename, +} + +impl CachedWheel { + pub(super) fn new(path: PathBuf, filename: WheelFilename) -> Self { + Self { path, filename } + } + + /// Search for a wheel matching the tags that was built from the given distribution. + pub(super) fn find_in_cache( + distribution: &RemoteDistributionRef<'_>, + tags: &Tags, + cache: &Path, + ) -> Option { + let wheel_dir = cache.join(distribution.id()); + let Ok(read_dir) = fs_err::read_dir(wheel_dir) else { + return None; + }; + for entry in read_dir { + let Ok(entry) = entry else { + continue; + }; + let Ok(filename) = + WheelFilename::from_str(entry.file_name().to_string_lossy().as_ref()) + else { + continue; + }; + if filename.is_compatible(tags) { + let path = entry.path().clone(); + return Some(CachedWheel { path, filename }); + } + } + None + } + + /// Read the [`Metadata21`] from a wheel. + pub(super) fn read_dist_info(&self) -> Result { + let mut archive = ZipArchive::new(fs_err::File::open(&self.path)?)?; + let dist_info_prefix = install_wheel_rs::find_dist_info(&self.filename, &mut archive)?; + let dist_info = std::io::read_to_string( + archive.by_name(&format!("{dist_info_prefix}.dist-info/METADATA"))?, + )?; + Ok(Metadata21::parse(dist_info.as_bytes())?) + } +} diff --git a/crates/puffin-resolver/src/distribution/mod.rs b/crates/puffin-resolver/src/distribution/mod.rs new file mode 100644 index 000000000..4432018b4 --- /dev/null +++ b/crates/puffin-resolver/src/distribution/mod.rs @@ -0,0 +1,7 @@ +pub(crate) use source_distribution::SourceDistributionFetcher; +pub(crate) use wheel::WheelFetcher; + +mod cached_wheel; +mod source; +mod source_distribution; +mod wheel; diff --git a/crates/puffin-resolver/src/distribution/source.rs b/crates/puffin-resolver/src/distribution/source.rs new file mode 100644 index 000000000..e8a235381 --- /dev/null +++ b/crates/puffin-resolver/src/distribution/source.rs @@ -0,0 +1,42 @@ +use std::borrow::Cow; + +use anyhow::{Error, Result}; +use url::Url; + +use puffin_distribution::RemoteDistributionRef; +use puffin_git::Git; + +/// The source of a distribution. +#[derive(Debug)] +pub(crate) enum Source<'a> { + /// The distribution is available at a remote URL. This could be a dedicated URL, or a URL + /// served by a registry, like PyPI. + Url(Cow<'a, Url>), + /// The distribution is available in a remote Git repository. + Git(Git), +} + +impl<'a> TryFrom<&'a RemoteDistributionRef<'_>> for Source<'a> { + type Error = Error; + + fn try_from(value: &'a RemoteDistributionRef<'_>) -> Result { + match value { + // If a distribution is hosted on a registry, it must be available at a URL. + RemoteDistributionRef::Registry(_, _, file) => { + let url = Url::parse(&file.url)?; + Ok(Self::Url(Cow::Owned(url))) + } + // If a distribution is specified via a direct URL, it could be a URL to a hosted file, + // or a URL to a Git repository. + RemoteDistributionRef::Url(_, url) => { + if let Some(url) = url.as_str().strip_prefix("git+") { + let url = Url::parse(url)?; + let git = Git::try_from(url)?; + Ok(Self::Git(git)) + } else { + Ok(Self::Url(Cow::Borrowed(url))) + } + } + } + } +} diff --git a/crates/puffin-resolver/src/distribution/source_distribution.rs b/crates/puffin-resolver/src/distribution/source_distribution.rs new file mode 100644 index 000000000..f8fcf5bfd --- /dev/null +++ b/crates/puffin-resolver/src/distribution/source_distribution.rs @@ -0,0 +1,109 @@ +use std::str::FromStr; + +use anyhow::Result; +use fs_err::tokio as fs; +use tempfile::tempdir; +use tokio_util::compat::FuturesAsyncReadCompatExt; +use tracing::debug; + +use distribution_filename::WheelFilename; +use platform_tags::Tags; +use puffin_client::RegistryClient; +use puffin_distribution::RemoteDistributionRef; +use puffin_git::GitSource; +use puffin_package::pypi_types::Metadata21; +use puffin_traits::BuildContext; + +use crate::distribution::cached_wheel::CachedWheel; +use crate::distribution::source::Source; + +const BUILT_WHEELS_CACHE: &str = "built-wheels-v0"; + +const GIT_CACHE: &str = "git-v0"; + +/// Fetch and build a source distribution from a remote source, or from a local cache. +pub(crate) struct SourceDistributionFetcher<'a, T: BuildContext>(&'a T); + +impl<'a, T: BuildContext> SourceDistributionFetcher<'a, T> { + /// Initialize a [`SourceDistributionFetcher`] from a [`BuildContext`]. + pub(crate) fn new(build_context: &'a T) -> Self { + Self(build_context) + } + + /// Read the [`Metadata21`] from a built source distribution, if it exists in the cache. + pub(crate) fn find_dist_info( + &self, + distribution: &RemoteDistributionRef<'_>, + tags: &Tags, + ) -> Result> { + let Some(cache) = self.0.cache() else { + return Ok(None); + }; + CachedWheel::find_in_cache(distribution, tags, &cache.join(BUILT_WHEELS_CACHE)) + .as_ref() + .map(CachedWheel::read_dist_info) + .transpose() + } + + /// Download and build a source distribution, storing the built wheel in the cache. + pub(crate) async fn download_and_build_sdist( + &self, + distribution: &RemoteDistributionRef<'_>, + client: &RegistryClient, + ) -> Result { + debug!("Building: {distribution}"); + + let temp_dir = tempdir()?; + + let source = Source::try_from(distribution)?; + let sdist_file = match source { + Source::Url(url) => { + debug!("Fetching source distribution from: {url}"); + + let reader = client.stream_external(&url).await?; + let mut reader = tokio::io::BufReader::new(reader.compat()); + + // Download the source distribution. + let sdist_filename = distribution.filename()?; + let sdist_file = temp_dir.path().join(sdist_filename.as_ref()); + let mut writer = tokio::fs::File::create(&sdist_file).await?; + tokio::io::copy(&mut reader, &mut writer).await?; + + sdist_file + } + Source::Git(git) => { + debug!("Fetching source distribution from: {git}"); + + let git_dir = self.0.cache().map_or_else( + || temp_dir.path().join(GIT_CACHE), + |cache| cache.join(GIT_CACHE), + ); + let source = GitSource::new(git, git_dir); + tokio::task::spawn_blocking(move || source.fetch()).await?? + } + }; + + // Create a directory for the wheel. + let wheel_dir = self.0.cache().map_or_else( + || temp_dir.path().join(BUILT_WHEELS_CACHE), + |cache| cache.join(BUILT_WHEELS_CACHE).join(distribution.id()), + ); + fs::create_dir_all(&wheel_dir).await?; + + // Build the wheel. + let disk_filename = self + .0 + .build_source_distribution(&sdist_file, &wheel_dir) + .await?; + + // Read the metadata from the wheel. + let wheel = CachedWheel::new( + wheel_dir.join(&disk_filename), + WheelFilename::from_str(&disk_filename)?, + ); + let metadata21 = wheel.read_dist_info()?; + + debug!("Finished building: {distribution}"); + Ok(metadata21) + } +} diff --git a/crates/puffin-resolver/src/distribution/wheel.rs b/crates/puffin-resolver/src/distribution/wheel.rs new file mode 100644 index 000000000..337c19cb7 --- /dev/null +++ b/crates/puffin-resolver/src/distribution/wheel.rs @@ -0,0 +1,76 @@ +use std::path::Path; +use std::str::FromStr; + +use anyhow::Result; +use fs_err::tokio as fs; +use tempfile::tempdir; +use tokio_util::compat::FuturesAsyncReadCompatExt; +use tracing::debug; + +use distribution_filename::WheelFilename; +use platform_tags::Tags; +use puffin_client::RegistryClient; +use puffin_distribution::RemoteDistributionRef; +use puffin_package::pypi_types::Metadata21; + +use crate::distribution::cached_wheel::CachedWheel; + +const REMOTE_WHEELS_CACHE: &str = "remote-wheels-v0"; + +/// Fetch a built distribution from a remote source, or from a local cache. +pub(crate) struct WheelFetcher<'a>(Option<&'a Path>); + +impl<'a> WheelFetcher<'a> { + /// Initialize a [`WheelFetcher`] from a [`BuildContext`]. + pub(crate) fn new(cache: Option<&'a Path>) -> Self { + Self(cache) + } + + /// Read the [`Metadata21`] from a wheel, if it exists in the cache. + pub(crate) fn find_dist_info( + &self, + distribution: &RemoteDistributionRef<'_>, + tags: &Tags, + ) -> Result> { + let Some(cache) = self.0 else { + return Ok(None); + }; + CachedWheel::find_in_cache(distribution, tags, &cache.join(REMOTE_WHEELS_CACHE)) + .as_ref() + .map(CachedWheel::read_dist_info) + .transpose() + } + + /// Download a wheel, storing it in the cache. + pub(crate) async fn download_wheel( + &self, + distribution: &RemoteDistributionRef<'_>, + client: &RegistryClient, + ) -> Result { + debug!("Downloading: {distribution}"); + let url = distribution.url()?; + let reader = client.stream_external(&url).await?; + let mut reader = tokio::io::BufReader::new(reader.compat()); + let temp_dir = tempdir()?; + + // Create a directory for the wheel. + let wheel_dir = self.0.map_or_else( + || temp_dir.path().join(REMOTE_WHEELS_CACHE), + |cache| cache.join(REMOTE_WHEELS_CACHE).join(distribution.id()), + ); + fs::create_dir_all(&wheel_dir).await?; + + // Download the wheel. + let wheel_filename = distribution.filename()?; + let wheel_file = wheel_dir.join(wheel_filename.as_ref()); + let mut writer = tokio::fs::File::create(&wheel_file).await?; + tokio::io::copy(&mut reader, &mut writer).await?; + + // Read the metadata from the wheel. + let wheel = CachedWheel::new(wheel_file, WheelFilename::from_str(&wheel_filename)?); + let metadata21 = wheel.read_dist_info()?; + + debug!("Finished downloading: {distribution}"); + Ok(metadata21) + } +} diff --git a/crates/puffin-resolver/src/lib.rs b/crates/puffin-resolver/src/lib.rs index e0dcb9e2f..8277f5441 100644 --- a/crates/puffin-resolver/src/lib.rs +++ b/crates/puffin-resolver/src/lib.rs @@ -7,6 +7,7 @@ pub use resolver::{Reporter as ResolverReporter, Resolver}; pub use wheel_finder::{Reporter as WheelFinderReporter, WheelFinder}; mod candidate_selector; +mod distribution; mod error; mod file; mod manifest; @@ -15,5 +16,4 @@ mod pubgrub; mod resolution; mod resolution_mode; mod resolver; -mod source_distribution; mod wheel_finder; diff --git a/crates/puffin-resolver/src/resolver.rs b/crates/puffin-resolver/src/resolver.rs index cbf688426..2ebd03f51 100644 --- a/crates/puffin-resolver/src/resolver.rs +++ b/crates/puffin-resolver/src/resolver.rs @@ -28,13 +28,13 @@ use puffin_package::pypi_types::{File, Metadata21, SimpleJson}; use puffin_traits::BuildContext; use crate::candidate_selector::CandidateSelector; +use crate::distribution::{SourceDistributionFetcher, WheelFetcher}; use crate::error::ResolveError; use crate::file::{DistributionFile, SdistFile, WheelFile}; use crate::manifest::Manifest; use crate::pubgrub::{iter_requirements, version_range}; use crate::pubgrub::{PubGrubPackage, PubGrubPriorities, PubGrubVersion, MIN_VERSION}; use crate::resolution::Graph; -use crate::source_distribution::SourceDistributionBuildTree; pub struct Resolver<'a, Context: BuildContext + Sync> { requirements: Vec, @@ -655,12 +655,12 @@ impl<'a, Context: BuildContext + Sync> Resolver<'a, Context> { } // Build a source distribution from the registry, returning its metadata. Request::Sdist(package_name, version, file) => { - let build_tree = SourceDistributionBuildTree::new(self.build_context); + let builder = SourceDistributionFetcher::new(self.build_context); let distribution = RemoteDistributionRef::from_registry(&package_name, &version, &file); - let metadata = match build_tree.find_dist_info(&distribution, self.tags) { + let metadata = match builder.find_dist_info(&distribution, self.tags) { Ok(Some(metadata)) => metadata, - Ok(None) => build_tree + Ok(None) => builder .download_and_build_sdist(&distribution, self.client) .await .map_err(|err| ResolveError::RegistryDistribution { @@ -671,7 +671,7 @@ impl<'a, Context: BuildContext + Sync> Resolver<'a, Context> { error!( "Failed to read source distribution {distribution} from cache: {err}", ); - build_tree + builder .download_and_build_sdist(&distribution, self.client) .await .map_err(|err| ResolveError::RegistryDistribution { @@ -684,16 +684,16 @@ impl<'a, Context: BuildContext + Sync> Resolver<'a, Context> { } // Build a source distribution from a remote URL, returning its metadata. Request::SdistUrl(package_name, url) => { - let build_tree = SourceDistributionBuildTree::new(self.build_context); + let fetcher = SourceDistributionFetcher::new(self.build_context); let distribution = RemoteDistributionRef::from_url(&package_name, &url); - let metadata = match build_tree.find_dist_info(&distribution, self.tags) { + let metadata = match fetcher.find_dist_info(&distribution, self.tags) { Ok(Some(metadata)) => { debug!("Found source distribution metadata in cache: {url}"); metadata } Ok(None) => { debug!("Downloading source distribution from: {url}"); - build_tree + fetcher .download_and_build_sdist(&distribution, self.client) .await .map_err(|err| ResolveError::UrlDistribution { @@ -705,7 +705,7 @@ impl<'a, Context: BuildContext + Sync> Resolver<'a, Context> { error!( "Failed to read source distribution {distribution} from cache: {err}", ); - build_tree + fetcher .download_and_build_sdist(&distribution, self.client) .await .map_err(|err| ResolveError::UrlDistribution { @@ -718,16 +718,16 @@ impl<'a, Context: BuildContext + Sync> Resolver<'a, Context> { } // Fetch wheel metadata from a remote URL. Request::WheelUrl(package_name, url) => { - let build_tree = SourceDistributionBuildTree::new(self.build_context); + let fetcher = WheelFetcher::new(self.build_context.cache()); let distribution = RemoteDistributionRef::from_url(&package_name, &url); - let metadata = match build_tree.find_dist_info(&distribution, self.tags) { + let metadata = match fetcher.find_dist_info(&distribution, self.tags) { Ok(Some(metadata)) => { debug!("Found wheel metadata in cache: {url}"); metadata } Ok(None) => { debug!("Downloading wheel from: {url}"); - build_tree + fetcher .download_wheel(&distribution, self.client) .await .map_err(|err| ResolveError::UrlDistribution { @@ -737,7 +737,7 @@ impl<'a, Context: BuildContext + Sync> Resolver<'a, Context> { } Err(err) => { error!("Failed to read wheel {distribution} from cache: {err}",); - build_tree + fetcher .download_wheel(&distribution, self.client) .await .map_err(|err| ResolveError::UrlDistribution { diff --git a/crates/puffin-resolver/src/source_distribution.rs b/crates/puffin-resolver/src/source_distribution.rs deleted file mode 100644 index 6d3de9b07..000000000 --- a/crates/puffin-resolver/src/source_distribution.rs +++ /dev/null @@ -1,228 +0,0 @@ -use std::borrow::Cow; -use std::path::PathBuf; -use std::str::FromStr; - -use anyhow::{Error, Result}; -use fs_err::tokio as fs; -use tempfile::tempdir; -use tokio_util::compat::FuturesAsyncReadCompatExt; -use tracing::debug; -use url::Url; -use zip::ZipArchive; - -use distribution_filename::WheelFilename; -use platform_tags::Tags; -use puffin_client::RegistryClient; -use puffin_distribution::RemoteDistributionRef; -use puffin_git::{Git, GitSource}; -use puffin_package::pypi_types::Metadata21; -use puffin_traits::BuildContext; - -const BUILT_WHEELS_CACHE: &str = "built-wheels-v0"; - -const REMOTE_WHEELS_CACHE: &str = "remote-wheels-v0"; - -const GIT_CACHE: &str = "git-v0"; - -/// Stores wheels built from source distributions. We need to keep those separate from the regular -/// wheel cache since a wheel with the same name may be uploaded after we made our build and in that -/// case the hashes would clash. -pub(crate) struct SourceDistributionBuildTree<'a, T: BuildContext>(&'a T); - -impl<'a, T: BuildContext> SourceDistributionBuildTree<'a, T> { - /// Initialize a [`SourceDistributionBuildTree`] from a [`BuildContext`]. - pub(crate) fn new(build_context: &'a T) -> Self { - Self(build_context) - } - - /// Read the [`Metadata21`] from a built source distribution, if it exists in the cache. - pub(crate) fn find_dist_info( - &self, - distribution: &RemoteDistributionRef<'_>, - tags: &Tags, - ) -> Result> { - self.find_wheel(distribution, tags) - .as_ref() - .map(read_dist_info) - .transpose() - } - - /// Download and build a source distribution, storing the built wheel in the cache. - pub(crate) async fn download_and_build_sdist( - &self, - distribution: &RemoteDistributionRef<'_>, - client: &RegistryClient, - ) -> Result { - debug!("Building: {distribution}"); - - let temp_dir = tempdir()?; - - let source = DistributionSource::try_from(distribution)?; - let sdist_file = match source { - DistributionSource::Url(url) => { - debug!("Fetching source distribution from: {url}"); - - let reader = client.stream_external(&url).await?; - let mut reader = tokio::io::BufReader::new(reader.compat()); - - // Download the source distribution. - let sdist_filename = distribution.filename()?; - let sdist_file = temp_dir.path().join(sdist_filename.as_ref()); - let mut writer = tokio::fs::File::create(&sdist_file).await?; - tokio::io::copy(&mut reader, &mut writer).await?; - - sdist_file - } - DistributionSource::Git(git) => { - debug!("Fetching source distribution from: {git}"); - - let git_dir = self.0.cache().map_or_else( - || temp_dir.path().join(GIT_CACHE), - |cache| cache.join(GIT_CACHE), - ); - let source = GitSource::new(git, git_dir); - tokio::task::spawn_blocking(move || source.fetch()).await?? - } - }; - - // Create a directory for the wheel. - let wheel_dir = self.0.cache().map_or_else( - || temp_dir.path().join(BUILT_WHEELS_CACHE), - |cache| cache.join(BUILT_WHEELS_CACHE).join(distribution.id()), - ); - fs::create_dir_all(&wheel_dir).await?; - - // Build the wheel. - let disk_filename = self - .0 - .build_source_distribution(&sdist_file, &wheel_dir) - .await?; - - // Read the metadata from the wheel. - let wheel = CachedWheel { - path: wheel_dir.join(&disk_filename), - filename: WheelFilename::from_str(&disk_filename)?, - }; - let metadata21 = read_dist_info(&wheel)?; - - debug!("Finished building: {distribution}"); - Ok(metadata21) - } - - pub(crate) async fn download_wheel( - &self, - distribution: &RemoteDistributionRef<'_>, - client: &RegistryClient, - ) -> Result { - debug!("Downloading: {distribution}"); - let url = distribution.url()?; - let reader = client.stream_external(&url).await?; - let mut reader = tokio::io::BufReader::new(reader.compat()); - let temp_dir = tempdir()?; - - // Create a directory for the wheel. - let wheel_dir = self.0.cache().map_or_else( - || temp_dir.path().join(REMOTE_WHEELS_CACHE), - |cache| cache.join(REMOTE_WHEELS_CACHE).join(distribution.id()), - ); - fs::create_dir_all(&wheel_dir).await?; - - // Download the wheel. - let wheel_filename = distribution.filename()?; - let wheel_file = wheel_dir.join(wheel_filename.as_ref()); - let mut writer = tokio::fs::File::create(&wheel_file).await?; - tokio::io::copy(&mut reader, &mut writer).await?; - - // Read the metadata from the wheel. - let wheel = CachedWheel { - path: wheel_file, - filename: WheelFilename::from_str(&wheel_filename)?, - }; - let metadata21 = read_dist_info(&wheel)?; - - debug!("Finished downloading: {distribution}"); - Ok(metadata21) - } - - /// Search for a wheel matching the tags that was built from the given source distribution. - fn find_wheel( - &self, - distribution: &RemoteDistributionRef<'_>, - tags: &Tags, - ) -> Option { - let wheel_dir = self - .0 - .cache()? - .join(BUILT_WHEELS_CACHE) - .join(distribution.id()); - let Ok(read_dir) = fs_err::read_dir(wheel_dir) else { - return None; - }; - for entry in read_dir { - let Ok(entry) = entry else { - continue; - }; - let Ok(filename) = - WheelFilename::from_str(entry.file_name().to_string_lossy().as_ref()) - else { - continue; - }; - if filename.is_compatible(tags) { - let path = entry.path().clone(); - return Some(CachedWheel { path, filename }); - } - } - None - } -} - -#[derive(Debug)] -struct CachedWheel { - path: PathBuf, - filename: WheelFilename, -} - -/// Read the [`Metadata21`] from a wheel. -fn read_dist_info(wheel: &CachedWheel) -> Result { - let mut archive = ZipArchive::new(fs_err::File::open(&wheel.path)?)?; - let dist_info_prefix = install_wheel_rs::find_dist_info(&wheel.filename, &mut archive)?; - let dist_info = std::io::read_to_string( - archive.by_name(&format!("{dist_info_prefix}.dist-info/METADATA"))?, - )?; - Ok(Metadata21::parse(dist_info.as_bytes())?) -} - -/// The host source for a distribution. -#[derive(Debug)] -enum DistributionSource<'a> { - /// The distribution is available at a remote URL. This could be a dedicated URL, or a URL - /// served by a registry, like PyPI. - Url(Cow<'a, Url>), - /// The distribution is available in a remote Git repository. - Git(Git), -} - -impl<'a> TryFrom<&'a RemoteDistributionRef<'_>> for DistributionSource<'a> { - type Error = Error; - - fn try_from(value: &'a RemoteDistributionRef<'_>) -> Result { - match value { - // If a distribution is hosted on a registry, it must be available at a URL. - RemoteDistributionRef::Registry(_, _, file) => { - let url = Url::parse(&file.url)?; - Ok(Self::Url(Cow::Owned(url))) - } - // If a distribution is specified via a direct URL, it could be a URL to a hosted file, - // or a URL to a Git repository. - RemoteDistributionRef::Url(_, url) => { - if let Some(url) = url.as_str().strip_prefix("git+") { - let url = Url::parse(url)?; - let git = Git::try_from(url)?; - Ok(Self::Git(git)) - } else { - Ok(Self::Url(Cow::Borrowed(url))) - } - } - } - } -}