Split source_distribution.rs into separate wheel and sdist fetchers (#291)

This commit is contained in:
Charlie Marsh 2023-11-02 09:04:51 -07:00 committed by GitHub
parent c6f2dfd727
commit a02bf2e415
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
8 changed files with 308 additions and 242 deletions

View file

@ -0,0 +1,60 @@
use std::path::{Path, PathBuf};
use std::str::FromStr;
use anyhow::Result;
use zip::ZipArchive;
use distribution_filename::WheelFilename;
use platform_tags::Tags;
use puffin_distribution::RemoteDistributionRef;
use puffin_package::pypi_types::Metadata21;
/// A cached wheel built from a remote source.
#[derive(Debug)]
pub(super) struct CachedWheel {
path: PathBuf,
filename: WheelFilename,
}
impl CachedWheel {
pub(super) fn new(path: PathBuf, filename: WheelFilename) -> Self {
Self { path, filename }
}
/// Search for a wheel matching the tags that was built from the given distribution.
pub(super) fn find_in_cache(
distribution: &RemoteDistributionRef<'_>,
tags: &Tags,
cache: &Path,
) -> Option<Self> {
let wheel_dir = cache.join(distribution.id());
let Ok(read_dir) = fs_err::read_dir(wheel_dir) else {
return None;
};
for entry in read_dir {
let Ok(entry) = entry else {
continue;
};
let Ok(filename) =
WheelFilename::from_str(entry.file_name().to_string_lossy().as_ref())
else {
continue;
};
if filename.is_compatible(tags) {
let path = entry.path().clone();
return Some(CachedWheel { path, filename });
}
}
None
}
/// Read the [`Metadata21`] from a wheel.
pub(super) fn read_dist_info(&self) -> Result<Metadata21> {
let mut archive = ZipArchive::new(fs_err::File::open(&self.path)?)?;
let dist_info_prefix = install_wheel_rs::find_dist_info(&self.filename, &mut archive)?;
let dist_info = std::io::read_to_string(
archive.by_name(&format!("{dist_info_prefix}.dist-info/METADATA"))?,
)?;
Ok(Metadata21::parse(dist_info.as_bytes())?)
}
}

View file

@ -0,0 +1,7 @@
pub(crate) use source_distribution::SourceDistributionFetcher;
pub(crate) use wheel::WheelFetcher;
mod cached_wheel;
mod source;
mod source_distribution;
mod wheel;

View file

@ -0,0 +1,42 @@
use std::borrow::Cow;
use anyhow::{Error, Result};
use url::Url;
use puffin_distribution::RemoteDistributionRef;
use puffin_git::Git;
/// The source of a distribution.
#[derive(Debug)]
pub(crate) enum Source<'a> {
/// The distribution is available at a remote URL. This could be a dedicated URL, or a URL
/// served by a registry, like PyPI.
Url(Cow<'a, Url>),
/// The distribution is available in a remote Git repository.
Git(Git),
}
impl<'a> TryFrom<&'a RemoteDistributionRef<'_>> for Source<'a> {
type Error = Error;
fn try_from(value: &'a RemoteDistributionRef<'_>) -> Result<Self, Self::Error> {
match value {
// If a distribution is hosted on a registry, it must be available at a URL.
RemoteDistributionRef::Registry(_, _, file) => {
let url = Url::parse(&file.url)?;
Ok(Self::Url(Cow::Owned(url)))
}
// If a distribution is specified via a direct URL, it could be a URL to a hosted file,
// or a URL to a Git repository.
RemoteDistributionRef::Url(_, url) => {
if let Some(url) = url.as_str().strip_prefix("git+") {
let url = Url::parse(url)?;
let git = Git::try_from(url)?;
Ok(Self::Git(git))
} else {
Ok(Self::Url(Cow::Borrowed(url)))
}
}
}
}
}

View file

@ -0,0 +1,109 @@
use std::str::FromStr;
use anyhow::Result;
use fs_err::tokio as fs;
use tempfile::tempdir;
use tokio_util::compat::FuturesAsyncReadCompatExt;
use tracing::debug;
use distribution_filename::WheelFilename;
use platform_tags::Tags;
use puffin_client::RegistryClient;
use puffin_distribution::RemoteDistributionRef;
use puffin_git::GitSource;
use puffin_package::pypi_types::Metadata21;
use puffin_traits::BuildContext;
use crate::distribution::cached_wheel::CachedWheel;
use crate::distribution::source::Source;
const BUILT_WHEELS_CACHE: &str = "built-wheels-v0";
const GIT_CACHE: &str = "git-v0";
/// Fetch and build a source distribution from a remote source, or from a local cache.
pub(crate) struct SourceDistributionFetcher<'a, T: BuildContext>(&'a T);
impl<'a, T: BuildContext> SourceDistributionFetcher<'a, T> {
/// Initialize a [`SourceDistributionFetcher`] from a [`BuildContext`].
pub(crate) fn new(build_context: &'a T) -> Self {
Self(build_context)
}
/// Read the [`Metadata21`] from a built source distribution, if it exists in the cache.
pub(crate) fn find_dist_info(
&self,
distribution: &RemoteDistributionRef<'_>,
tags: &Tags,
) -> Result<Option<Metadata21>> {
let Some(cache) = self.0.cache() else {
return Ok(None);
};
CachedWheel::find_in_cache(distribution, tags, &cache.join(BUILT_WHEELS_CACHE))
.as_ref()
.map(CachedWheel::read_dist_info)
.transpose()
}
/// Download and build a source distribution, storing the built wheel in the cache.
pub(crate) async fn download_and_build_sdist(
&self,
distribution: &RemoteDistributionRef<'_>,
client: &RegistryClient,
) -> Result<Metadata21> {
debug!("Building: {distribution}");
let temp_dir = tempdir()?;
let source = Source::try_from(distribution)?;
let sdist_file = match source {
Source::Url(url) => {
debug!("Fetching source distribution from: {url}");
let reader = client.stream_external(&url).await?;
let mut reader = tokio::io::BufReader::new(reader.compat());
// Download the source distribution.
let sdist_filename = distribution.filename()?;
let sdist_file = temp_dir.path().join(sdist_filename.as_ref());
let mut writer = tokio::fs::File::create(&sdist_file).await?;
tokio::io::copy(&mut reader, &mut writer).await?;
sdist_file
}
Source::Git(git) => {
debug!("Fetching source distribution from: {git}");
let git_dir = self.0.cache().map_or_else(
|| temp_dir.path().join(GIT_CACHE),
|cache| cache.join(GIT_CACHE),
);
let source = GitSource::new(git, git_dir);
tokio::task::spawn_blocking(move || source.fetch()).await??
}
};
// Create a directory for the wheel.
let wheel_dir = self.0.cache().map_or_else(
|| temp_dir.path().join(BUILT_WHEELS_CACHE),
|cache| cache.join(BUILT_WHEELS_CACHE).join(distribution.id()),
);
fs::create_dir_all(&wheel_dir).await?;
// Build the wheel.
let disk_filename = self
.0
.build_source_distribution(&sdist_file, &wheel_dir)
.await?;
// Read the metadata from the wheel.
let wheel = CachedWheel::new(
wheel_dir.join(&disk_filename),
WheelFilename::from_str(&disk_filename)?,
);
let metadata21 = wheel.read_dist_info()?;
debug!("Finished building: {distribution}");
Ok(metadata21)
}
}

View file

@ -0,0 +1,76 @@
use std::path::Path;
use std::str::FromStr;
use anyhow::Result;
use fs_err::tokio as fs;
use tempfile::tempdir;
use tokio_util::compat::FuturesAsyncReadCompatExt;
use tracing::debug;
use distribution_filename::WheelFilename;
use platform_tags::Tags;
use puffin_client::RegistryClient;
use puffin_distribution::RemoteDistributionRef;
use puffin_package::pypi_types::Metadata21;
use crate::distribution::cached_wheel::CachedWheel;
const REMOTE_WHEELS_CACHE: &str = "remote-wheels-v0";
/// Fetch a built distribution from a remote source, or from a local cache.
pub(crate) struct WheelFetcher<'a>(Option<&'a Path>);
impl<'a> WheelFetcher<'a> {
/// Initialize a [`WheelFetcher`] from a [`BuildContext`].
pub(crate) fn new(cache: Option<&'a Path>) -> Self {
Self(cache)
}
/// Read the [`Metadata21`] from a wheel, if it exists in the cache.
pub(crate) fn find_dist_info(
&self,
distribution: &RemoteDistributionRef<'_>,
tags: &Tags,
) -> Result<Option<Metadata21>> {
let Some(cache) = self.0 else {
return Ok(None);
};
CachedWheel::find_in_cache(distribution, tags, &cache.join(REMOTE_WHEELS_CACHE))
.as_ref()
.map(CachedWheel::read_dist_info)
.transpose()
}
/// Download a wheel, storing it in the cache.
pub(crate) async fn download_wheel(
&self,
distribution: &RemoteDistributionRef<'_>,
client: &RegistryClient,
) -> Result<Metadata21> {
debug!("Downloading: {distribution}");
let url = distribution.url()?;
let reader = client.stream_external(&url).await?;
let mut reader = tokio::io::BufReader::new(reader.compat());
let temp_dir = tempdir()?;
// Create a directory for the wheel.
let wheel_dir = self.0.map_or_else(
|| temp_dir.path().join(REMOTE_WHEELS_CACHE),
|cache| cache.join(REMOTE_WHEELS_CACHE).join(distribution.id()),
);
fs::create_dir_all(&wheel_dir).await?;
// Download the wheel.
let wheel_filename = distribution.filename()?;
let wheel_file = wheel_dir.join(wheel_filename.as_ref());
let mut writer = tokio::fs::File::create(&wheel_file).await?;
tokio::io::copy(&mut reader, &mut writer).await?;
// Read the metadata from the wheel.
let wheel = CachedWheel::new(wheel_file, WheelFilename::from_str(&wheel_filename)?);
let metadata21 = wheel.read_dist_info()?;
debug!("Finished downloading: {distribution}");
Ok(metadata21)
}
}

View file

@ -7,6 +7,7 @@ pub use resolver::{Reporter as ResolverReporter, Resolver};
pub use wheel_finder::{Reporter as WheelFinderReporter, WheelFinder};
mod candidate_selector;
mod distribution;
mod error;
mod file;
mod manifest;
@ -15,5 +16,4 @@ mod pubgrub;
mod resolution;
mod resolution_mode;
mod resolver;
mod source_distribution;
mod wheel_finder;

View file

@ -28,13 +28,13 @@ use puffin_package::pypi_types::{File, Metadata21, SimpleJson};
use puffin_traits::BuildContext;
use crate::candidate_selector::CandidateSelector;
use crate::distribution::{SourceDistributionFetcher, WheelFetcher};
use crate::error::ResolveError;
use crate::file::{DistributionFile, SdistFile, WheelFile};
use crate::manifest::Manifest;
use crate::pubgrub::{iter_requirements, version_range};
use crate::pubgrub::{PubGrubPackage, PubGrubPriorities, PubGrubVersion, MIN_VERSION};
use crate::resolution::Graph;
use crate::source_distribution::SourceDistributionBuildTree;
pub struct Resolver<'a, Context: BuildContext + Sync> {
requirements: Vec<Requirement>,
@ -655,12 +655,12 @@ impl<'a, Context: BuildContext + Sync> Resolver<'a, Context> {
}
// Build a source distribution from the registry, returning its metadata.
Request::Sdist(package_name, version, file) => {
let build_tree = SourceDistributionBuildTree::new(self.build_context);
let builder = SourceDistributionFetcher::new(self.build_context);
let distribution =
RemoteDistributionRef::from_registry(&package_name, &version, &file);
let metadata = match build_tree.find_dist_info(&distribution, self.tags) {
let metadata = match builder.find_dist_info(&distribution, self.tags) {
Ok(Some(metadata)) => metadata,
Ok(None) => build_tree
Ok(None) => builder
.download_and_build_sdist(&distribution, self.client)
.await
.map_err(|err| ResolveError::RegistryDistribution {
@ -671,7 +671,7 @@ impl<'a, Context: BuildContext + Sync> Resolver<'a, Context> {
error!(
"Failed to read source distribution {distribution} from cache: {err}",
);
build_tree
builder
.download_and_build_sdist(&distribution, self.client)
.await
.map_err(|err| ResolveError::RegistryDistribution {
@ -684,16 +684,16 @@ impl<'a, Context: BuildContext + Sync> Resolver<'a, Context> {
}
// Build a source distribution from a remote URL, returning its metadata.
Request::SdistUrl(package_name, url) => {
let build_tree = SourceDistributionBuildTree::new(self.build_context);
let fetcher = SourceDistributionFetcher::new(self.build_context);
let distribution = RemoteDistributionRef::from_url(&package_name, &url);
let metadata = match build_tree.find_dist_info(&distribution, self.tags) {
let metadata = match fetcher.find_dist_info(&distribution, self.tags) {
Ok(Some(metadata)) => {
debug!("Found source distribution metadata in cache: {url}");
metadata
}
Ok(None) => {
debug!("Downloading source distribution from: {url}");
build_tree
fetcher
.download_and_build_sdist(&distribution, self.client)
.await
.map_err(|err| ResolveError::UrlDistribution {
@ -705,7 +705,7 @@ impl<'a, Context: BuildContext + Sync> Resolver<'a, Context> {
error!(
"Failed to read source distribution {distribution} from cache: {err}",
);
build_tree
fetcher
.download_and_build_sdist(&distribution, self.client)
.await
.map_err(|err| ResolveError::UrlDistribution {
@ -718,16 +718,16 @@ impl<'a, Context: BuildContext + Sync> Resolver<'a, Context> {
}
// Fetch wheel metadata from a remote URL.
Request::WheelUrl(package_name, url) => {
let build_tree = SourceDistributionBuildTree::new(self.build_context);
let fetcher = WheelFetcher::new(self.build_context.cache());
let distribution = RemoteDistributionRef::from_url(&package_name, &url);
let metadata = match build_tree.find_dist_info(&distribution, self.tags) {
let metadata = match fetcher.find_dist_info(&distribution, self.tags) {
Ok(Some(metadata)) => {
debug!("Found wheel metadata in cache: {url}");
metadata
}
Ok(None) => {
debug!("Downloading wheel from: {url}");
build_tree
fetcher
.download_wheel(&distribution, self.client)
.await
.map_err(|err| ResolveError::UrlDistribution {
@ -737,7 +737,7 @@ impl<'a, Context: BuildContext + Sync> Resolver<'a, Context> {
}
Err(err) => {
error!("Failed to read wheel {distribution} from cache: {err}",);
build_tree
fetcher
.download_wheel(&distribution, self.client)
.await
.map_err(|err| ResolveError::UrlDistribution {

View file

@ -1,228 +0,0 @@
use std::borrow::Cow;
use std::path::PathBuf;
use std::str::FromStr;
use anyhow::{Error, Result};
use fs_err::tokio as fs;
use tempfile::tempdir;
use tokio_util::compat::FuturesAsyncReadCompatExt;
use tracing::debug;
use url::Url;
use zip::ZipArchive;
use distribution_filename::WheelFilename;
use platform_tags::Tags;
use puffin_client::RegistryClient;
use puffin_distribution::RemoteDistributionRef;
use puffin_git::{Git, GitSource};
use puffin_package::pypi_types::Metadata21;
use puffin_traits::BuildContext;
const BUILT_WHEELS_CACHE: &str = "built-wheels-v0";
const REMOTE_WHEELS_CACHE: &str = "remote-wheels-v0";
const GIT_CACHE: &str = "git-v0";
/// Stores wheels built from source distributions. We need to keep those separate from the regular
/// wheel cache since a wheel with the same name may be uploaded after we made our build and in that
/// case the hashes would clash.
pub(crate) struct SourceDistributionBuildTree<'a, T: BuildContext>(&'a T);
impl<'a, T: BuildContext> SourceDistributionBuildTree<'a, T> {
/// Initialize a [`SourceDistributionBuildTree`] from a [`BuildContext`].
pub(crate) fn new(build_context: &'a T) -> Self {
Self(build_context)
}
/// Read the [`Metadata21`] from a built source distribution, if it exists in the cache.
pub(crate) fn find_dist_info(
&self,
distribution: &RemoteDistributionRef<'_>,
tags: &Tags,
) -> Result<Option<Metadata21>> {
self.find_wheel(distribution, tags)
.as_ref()
.map(read_dist_info)
.transpose()
}
/// Download and build a source distribution, storing the built wheel in the cache.
pub(crate) async fn download_and_build_sdist(
&self,
distribution: &RemoteDistributionRef<'_>,
client: &RegistryClient,
) -> Result<Metadata21> {
debug!("Building: {distribution}");
let temp_dir = tempdir()?;
let source = DistributionSource::try_from(distribution)?;
let sdist_file = match source {
DistributionSource::Url(url) => {
debug!("Fetching source distribution from: {url}");
let reader = client.stream_external(&url).await?;
let mut reader = tokio::io::BufReader::new(reader.compat());
// Download the source distribution.
let sdist_filename = distribution.filename()?;
let sdist_file = temp_dir.path().join(sdist_filename.as_ref());
let mut writer = tokio::fs::File::create(&sdist_file).await?;
tokio::io::copy(&mut reader, &mut writer).await?;
sdist_file
}
DistributionSource::Git(git) => {
debug!("Fetching source distribution from: {git}");
let git_dir = self.0.cache().map_or_else(
|| temp_dir.path().join(GIT_CACHE),
|cache| cache.join(GIT_CACHE),
);
let source = GitSource::new(git, git_dir);
tokio::task::spawn_blocking(move || source.fetch()).await??
}
};
// Create a directory for the wheel.
let wheel_dir = self.0.cache().map_or_else(
|| temp_dir.path().join(BUILT_WHEELS_CACHE),
|cache| cache.join(BUILT_WHEELS_CACHE).join(distribution.id()),
);
fs::create_dir_all(&wheel_dir).await?;
// Build the wheel.
let disk_filename = self
.0
.build_source_distribution(&sdist_file, &wheel_dir)
.await?;
// Read the metadata from the wheel.
let wheel = CachedWheel {
path: wheel_dir.join(&disk_filename),
filename: WheelFilename::from_str(&disk_filename)?,
};
let metadata21 = read_dist_info(&wheel)?;
debug!("Finished building: {distribution}");
Ok(metadata21)
}
pub(crate) async fn download_wheel(
&self,
distribution: &RemoteDistributionRef<'_>,
client: &RegistryClient,
) -> Result<Metadata21> {
debug!("Downloading: {distribution}");
let url = distribution.url()?;
let reader = client.stream_external(&url).await?;
let mut reader = tokio::io::BufReader::new(reader.compat());
let temp_dir = tempdir()?;
// Create a directory for the wheel.
let wheel_dir = self.0.cache().map_or_else(
|| temp_dir.path().join(REMOTE_WHEELS_CACHE),
|cache| cache.join(REMOTE_WHEELS_CACHE).join(distribution.id()),
);
fs::create_dir_all(&wheel_dir).await?;
// Download the wheel.
let wheel_filename = distribution.filename()?;
let wheel_file = wheel_dir.join(wheel_filename.as_ref());
let mut writer = tokio::fs::File::create(&wheel_file).await?;
tokio::io::copy(&mut reader, &mut writer).await?;
// Read the metadata from the wheel.
let wheel = CachedWheel {
path: wheel_file,
filename: WheelFilename::from_str(&wheel_filename)?,
};
let metadata21 = read_dist_info(&wheel)?;
debug!("Finished downloading: {distribution}");
Ok(metadata21)
}
/// Search for a wheel matching the tags that was built from the given source distribution.
fn find_wheel(
&self,
distribution: &RemoteDistributionRef<'_>,
tags: &Tags,
) -> Option<CachedWheel> {
let wheel_dir = self
.0
.cache()?
.join(BUILT_WHEELS_CACHE)
.join(distribution.id());
let Ok(read_dir) = fs_err::read_dir(wheel_dir) else {
return None;
};
for entry in read_dir {
let Ok(entry) = entry else {
continue;
};
let Ok(filename) =
WheelFilename::from_str(entry.file_name().to_string_lossy().as_ref())
else {
continue;
};
if filename.is_compatible(tags) {
let path = entry.path().clone();
return Some(CachedWheel { path, filename });
}
}
None
}
}
#[derive(Debug)]
struct CachedWheel {
path: PathBuf,
filename: WheelFilename,
}
/// Read the [`Metadata21`] from a wheel.
fn read_dist_info(wheel: &CachedWheel) -> Result<Metadata21> {
let mut archive = ZipArchive::new(fs_err::File::open(&wheel.path)?)?;
let dist_info_prefix = install_wheel_rs::find_dist_info(&wheel.filename, &mut archive)?;
let dist_info = std::io::read_to_string(
archive.by_name(&format!("{dist_info_prefix}.dist-info/METADATA"))?,
)?;
Ok(Metadata21::parse(dist_info.as_bytes())?)
}
/// The host source for a distribution.
#[derive(Debug)]
enum DistributionSource<'a> {
/// The distribution is available at a remote URL. This could be a dedicated URL, or a URL
/// served by a registry, like PyPI.
Url(Cow<'a, Url>),
/// The distribution is available in a remote Git repository.
Git(Git),
}
impl<'a> TryFrom<&'a RemoteDistributionRef<'_>> for DistributionSource<'a> {
type Error = Error;
fn try_from(value: &'a RemoteDistributionRef<'_>) -> Result<Self, Self::Error> {
match value {
// If a distribution is hosted on a registry, it must be available at a URL.
RemoteDistributionRef::Registry(_, _, file) => {
let url = Url::parse(&file.url)?;
Ok(Self::Url(Cow::Owned(url)))
}
// If a distribution is specified via a direct URL, it could be a URL to a hosted file,
// or a URL to a Git repository.
RemoteDistributionRef::Url(_, url) => {
if let Some(url) = url.as_str().strip_prefix("git+") {
let url = Url::parse(url)?;
let git = Git::try_from(url)?;
Ok(Self::Git(git))
} else {
Ok(Self::Url(Cow::Borrowed(url)))
}
}
}
}
}