puffin-client: rejigger error type (#1102)

This PR changes the error type to be boxed internally so that it uses
less size on the stack. This makes functions returning `Result<T,
Error>`, in particular, return something much smaller.

The specific thing that motivated this was Clippy lints firing when I
tried to refactor code in this crate.

I chose to achieve boxing by splitting the enum out into a separate
type, and then wiring up the necessary `From` impl to make error
conversions easy, and then making `Error` itself opaque. We could expose
the `Box`, but there isn't a ton of benefit in doing so because one
cannot pattern match through a `Box`.

This required using more explicit error conversions in several places.
And as a result, I was able to remove all `#[from]` attributes on
non-transparent error variants.
This commit is contained in:
Andrew Gallant 2024-01-25 13:13:21 -05:00 committed by GitHub
parent 3e86c80874
commit 067acfe79e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 194 additions and 109 deletions

View file

@ -12,26 +12,32 @@ use tracing::{debug, info_span, instrument, trace, warn, Instrument};
use puffin_cache::{CacheEntry, Freshness};
use puffin_fs::write_atomic;
use crate::cache_headers::CacheHeaders;
use crate::{cache_headers::CacheHeaders, Error, ErrorKind};
/// Either a cached client error or a (user specified) error from the callback
#[derive(Debug)]
pub enum CachedClientError<CallbackError> {
Client(crate::Error),
Client(Error),
Callback(CallbackError),
}
impl<CallbackError> From<crate::Error> for CachedClientError<CallbackError> {
fn from(error: crate::Error) -> Self {
impl<CallbackError> From<Error> for CachedClientError<CallbackError> {
fn from(error: Error) -> Self {
CachedClientError::Client(error)
}
}
impl From<CachedClientError<crate::Error>> for crate::Error {
fn from(error: CachedClientError<crate::Error>) -> crate::Error {
impl<CallbackError> From<ErrorKind> for CachedClientError<CallbackError> {
fn from(error: ErrorKind) -> Self {
CachedClientError::Client(error.into())
}
}
impl<E: Into<Error>> From<CachedClientError<E>> for Error {
fn from(error: CachedClientError<E>) -> Error {
match error {
CachedClientError::Client(error) => error,
CachedClientError::Callback(error) => error,
CachedClientError::Callback(error) => error.into(),
}
}
}
@ -145,10 +151,10 @@ impl CachedClient {
CachedResponse::NotModified(data_with_cache_policy) => {
async {
let data =
rmp_serde::to_vec(&data_with_cache_policy).map_err(crate::Error::from)?;
rmp_serde::to_vec(&data_with_cache_policy).map_err(ErrorKind::Encode)?;
write_atomic(cache_entry.path(), data)
.await
.map_err(crate::Error::CacheWrite)?;
.map_err(ErrorKind::CacheWrite)?;
Ok(data_with_cache_policy.data)
}
.instrument(write_cache)
@ -170,12 +176,12 @@ impl CachedClient {
async {
fs_err::tokio::create_dir_all(cache_entry.dir())
.await
.map_err(crate::Error::CacheWrite)?;
.map_err(ErrorKind::CacheWrite)?;
let data = rmp_serde::to_vec(&data_with_cache_policy)
.map_err(crate::Error::from)?;
.map_err(ErrorKind::Encode)?;
write_atomic(cache_entry.path(), data)
.await
.map_err(crate::Error::CacheWrite)?;
.map_err(ErrorKind::CacheWrite)?;
Ok(data_with_cache_policy.data)
}
.instrument(write_cache)
@ -193,13 +199,14 @@ impl CachedClient {
mut req: Request,
cache_control: CacheControl,
cached: Option<DataWithCachePolicy<T>>,
) -> Result<CachedResponse<T>, crate::Error> {
) -> Result<CachedResponse<T>, Error> {
// The converted types are from the specific `reqwest` types to the more generic `http`
// types.
let mut converted_req = http::Request::try_from(
req.try_clone()
.expect("You can't use streaming request bodies with this function"),
)?;
)
.map_err(ErrorKind::RequestError)?;
let url = req.url().clone();
let cached_response = if let Some(cached) = cached {
@ -246,8 +253,10 @@ impl CachedClient {
.0
.execute(req)
.instrument(info_span!("revalidation_request", url = url.as_str()))
.await?
.error_for_status()?;
.await
.map_err(ErrorKind::RequestMiddlewareError)?
.error_for_status()
.map_err(ErrorKind::RequestError)?;
let mut converted_res = http::Response::new(());
*converted_res.status_mut() = res.status();
for header in res.headers() {
@ -295,9 +304,15 @@ impl CachedClient {
&self,
req: Request,
converted_req: http::Request<reqwest::Body>,
) -> Result<CachedResponse<T>, crate::Error> {
) -> Result<CachedResponse<T>, Error> {
trace!("{} {}", req.method(), req.url());
let res = self.0.execute(req).await?.error_for_status()?;
let res = self
.0
.execute(req)
.await
.map_err(ErrorKind::RequestMiddlewareError)?
.error_for_status()
.map_err(ErrorKind::RequestError)?;
let mut converted_res = http::Response::new(());
*converted_res.status_mut() = res.status();
for header in res.headers() {

View file

@ -8,7 +8,35 @@ use puffin_normalize::PackageName;
use crate::html;
#[derive(Debug, thiserror::Error)]
pub enum Error {
#[error(transparent)]
pub struct Error {
kind: Box<ErrorKind>,
}
impl Error {
pub fn into_kind(self) -> ErrorKind {
*self.kind
}
pub(crate) fn from_json_err(err: serde_json::Error, url: Url) -> Self {
ErrorKind::BadJson { source: err, url }.into()
}
pub(crate) fn from_html_err(err: html::Error, url: Url) -> Self {
ErrorKind::BadHtml { source: err, url }.into()
}
}
impl From<ErrorKind> for Error {
fn from(kind: ErrorKind) -> Error {
Error {
kind: Box::new(kind),
}
}
}
#[derive(Debug, thiserror::Error)]
pub enum ErrorKind {
/// An invalid URL was provided.
#[error(transparent)]
UrlParseError(#[from] url::ParseError),
@ -58,7 +86,7 @@ pub enum Error {
InvalidDistInfo(WheelFilename, String),
#[error("{0} is not a valid wheel filename")]
WheelFilename(#[from] WheelFilenameError),
WheelFilename(#[source] WheelFilenameError),
#[error("Package metadata name `{metadata}` does not match given name `{given}`")]
NameMismatch {
@ -76,10 +104,10 @@ pub enum Error {
Io(#[from] std::io::Error),
#[error("Cache deserialization failed")]
Decode(#[from] rmp_serde::decode::Error),
Decode(#[source] rmp_serde::decode::Error),
#[error("Cache serialization failed")]
Encode(#[from] rmp_serde::encode::Error),
Encode(#[source] rmp_serde::encode::Error),
/// An [`io::Error`] with a filename attached
#[error(transparent)]
@ -94,13 +122,3 @@ pub enum Error {
#[error("Unsupported `Content-Type` \"{1}\" for {0}. Expected JSON or HTML.")]
UnsupportedMediaType(Url, String),
}
impl Error {
pub(crate) fn from_json_err(err: serde_json::Error, url: Url) -> Self {
Self::BadJson { source: err, url }
}
pub(crate) fn from_html_err(err: html::Error, url: Url) -> Self {
Self::BadHtml { source: err, url }
}
}

View file

@ -19,9 +19,9 @@ use puffin_cache::{Cache, CacheBucket};
use puffin_normalize::PackageName;
use pypi_types::Hashes;
use crate::cached_client::CacheControl;
use crate::cached_client::{CacheControl, CachedClientError};
use crate::html::SimpleHtml;
use crate::{Error, RegistryClient};
use crate::{Error, ErrorKind, RegistryClient};
#[derive(Debug, thiserror::Error)]
pub enum FlatIndexError {
@ -92,7 +92,11 @@ impl<'a> FlatIndexClient<'a> {
"html",
format!("{}.msgpack", cache_key::digest(&url.to_string())),
);
let cache_control = CacheControl::from(self.cache.freshness(&cache_entry, None)?);
let cache_control = CacheControl::from(
self.cache
.freshness(&cache_entry, None)
.map_err(ErrorKind::Io)?,
);
let cached_client = self.client.cached_client();
@ -101,10 +105,11 @@ impl<'a> FlatIndexClient<'a> {
.get(url.clone())
.header("Accept-Encoding", "gzip")
.header("Accept", "text/html")
.build()?;
.build()
.map_err(ErrorKind::RequestError)?;
let parse_simple_response = |response: Response| {
async {
let text = response.text().await?;
let text = response.text().await.map_err(ErrorKind::RequestError)?;
let SimpleHtml { base, files } = SimpleHtml::parse(&text, url)
.map_err(|err| Error::from_html_err(err, url.clone()))?;
@ -121,7 +126,7 @@ impl<'a> FlatIndexClient<'a> {
}
})
.collect();
Ok(files)
Ok::<Vec<File>, CachedClientError<Error>>(files)
}
.boxed()
.instrument(info_span!("parse_flat_index_html", url = % url))

View file

@ -1,5 +1,5 @@
pub use cached_client::{CacheControl, CachedClient, CachedClientError, DataWithCachePolicy};
pub use error::Error;
pub use error::{Error, ErrorKind};
pub use flat_index::{FlatDistributions, FlatIndex, FlatIndexClient, FlatIndexError};
pub use registry_client::{
read_metadata_async, RegistryClient, RegistryClientBuilder, SimpleMetadata, VersionFiles,

View file

@ -27,7 +27,7 @@ use pypi_types::{BaseUrl, Metadata21, SimpleJson};
use crate::cached_client::CacheControl;
use crate::html::SimpleHtml;
use crate::remote_metadata::wheel_metadata_from_remote_zip;
use crate::{CachedClient, CachedClientError, Error};
use crate::{CachedClient, CachedClientError, Error, ErrorKind};
/// A builder for an [`RegistryClient`].
#[derive(Debug, Clone)]
@ -124,7 +124,7 @@ impl RegistryClient {
package_name: &PackageName,
) -> Result<(IndexUrl, SimpleMetadata), Error> {
if self.index_urls.no_index() {
return Err(Error::NoIndex(package_name.as_ref().to_string()));
return Err(ErrorKind::NoIndex(package_name.as_ref().to_string()).into());
}
for index in self.index_urls.indexes() {
@ -132,17 +132,20 @@ impl RegistryClient {
return match result {
Ok(metadata) => Ok((index.clone(), metadata)),
Err(CachedClientError::Client(Error::RequestError(err))) => {
Err(CachedClientError::Client(err)) => match err.into_kind() {
ErrorKind::RequestError(err) => {
if err.status() == Some(StatusCode::NOT_FOUND) {
continue;
}
Err(err.into())
Err(ErrorKind::RequestError(err).into())
}
Err(err) => Err(err.into()),
other => Err(other.into()),
},
Err(CachedClientError::Callback(err)) => Err(err),
};
}
Err(Error::PackageNotFound(package_name.to_string()))
Err(ErrorKind::PackageNotFound(package_name.to_string()).into())
}
async fn simple_single_index(
@ -167,8 +170,11 @@ impl RegistryClient {
}),
format!("{package_name}.msgpack"),
);
let cache_control =
CacheControl::from(self.cache.freshness(&cache_entry, Some(package_name))?);
let cache_control = CacheControl::from(
self.cache
.freshness(&cache_entry, Some(package_name))
.map_err(ErrorKind::Io)?,
);
let simple_request = self
.client
@ -176,24 +182,28 @@ impl RegistryClient {
.get(url.clone())
.header("Accept-Encoding", "gzip")
.header("Accept", MediaType::accepts())
.build()?;
.build()
.map_err(ErrorKind::RequestError)?;
let parse_simple_response = |response: Response| {
async {
let content_type = response
.headers()
.get("content-type")
.ok_or_else(|| Error::MissingContentType(url.clone()))?;
let content_type = content_type
.to_str()
.map_err(|err| Error::InvalidContentTypeHeader(url.clone(), err))?;
.ok_or_else(|| Error::from(ErrorKind::MissingContentType(url.clone())))?;
let content_type = content_type.to_str().map_err(|err| {
Error::from(ErrorKind::InvalidContentTypeHeader(url.clone(), err))
})?;
let media_type = content_type.split(';').next().unwrap_or(content_type);
let media_type = MediaType::from_str(media_type).ok_or_else(|| {
Error::UnsupportedMediaType(url.clone(), media_type.to_string())
Error::from(ErrorKind::UnsupportedMediaType(
url.clone(),
media_type.to_string(),
))
})?;
match media_type {
MediaType::Json => {
let bytes = response.bytes().await?;
let bytes = response.bytes().await.map_err(ErrorKind::RequestError)?;
let data: SimpleJson = serde_json::from_slice(bytes.as_ref())
.map_err(|err| Error::from_json_err(err, url.clone()))?;
let base = BaseUrl::from(url.clone());
@ -201,7 +211,7 @@ impl RegistryClient {
Ok(metadata)
}
MediaType::Html => {
let text = response.text().await?;
let text = response.text().await.map_err(ErrorKind::RequestError)?;
let SimpleHtml { base, files } = SimpleHtml::parse(&text, &url)
.map_err(|err| Error::from_html_err(err, url.clone()))?;
let metadata = SimpleMetadata::from_files(files, package_name, &base);
@ -235,17 +245,19 @@ impl RegistryClient {
let metadata = match &built_dist {
BuiltDist::Registry(wheel) => match &wheel.file.url {
FileLocation::RelativeUrl(base, url) => {
let url = base.join_relative(url)?;
let url = base.join_relative(url).map_err(ErrorKind::UrlParseError)?;
self.wheel_metadata_registry(&wheel.index, &wheel.file, &url)
.await?
}
FileLocation::AbsoluteUrl(url) => {
let url = Url::parse(url)?;
let url = Url::parse(url).map_err(ErrorKind::UrlParseError)?;
self.wheel_metadata_registry(&wheel.index, &wheel.file, &url)
.await?
}
FileLocation::Path(path) => {
let file = fs_err::tokio::File::open(&path).await?;
let file = fs_err::tokio::File::open(&path)
.await
.map_err(ErrorKind::Io)?;
let reader = tokio::io::BufReader::new(file);
read_metadata_async(&wheel.filename, built_dist.to_string(), reader).await?
}
@ -259,17 +271,19 @@ impl RegistryClient {
.await?
}
BuiltDist::Path(wheel) => {
let file = fs_err::tokio::File::open(&wheel.path).await?;
let file = fs_err::tokio::File::open(&wheel.path)
.await
.map_err(ErrorKind::Io)?;
let reader = tokio::io::BufReader::new(file);
read_metadata_async(&wheel.filename, built_dist.to_string(), reader).await?
}
};
if metadata.name != *built_dist.name() {
return Err(Error::NameMismatch {
return Err(Error::from(ErrorKind::NameMismatch {
metadata: metadata.name,
given: built_dist.name().clone(),
});
}));
}
Ok(metadata)
@ -283,32 +297,44 @@ impl RegistryClient {
url: &Url,
) -> Result<Metadata21, Error> {
// If the metadata file is available at its own url (PEP 658), download it from there.
let filename = WheelFilename::from_str(&file.filename)?;
let filename = WheelFilename::from_str(&file.filename).map_err(ErrorKind::WheelFilename)?;
if file
.dist_info_metadata
.as_ref()
.is_some_and(pypi_types::DistInfoMetadata::is_available)
{
let url = Url::parse(&format!("{}.metadata", url))?;
let url = Url::parse(&format!("{}.metadata", url)).map_err(ErrorKind::UrlParseError)?;
let cache_entry = self.cache.entry(
CacheBucket::Wheels,
WheelCache::Index(index).remote_wheel_dir(filename.name.as_ref()),
format!("{}.msgpack", filename.stem()),
);
let cache_control =
CacheControl::from(self.cache.freshness(&cache_entry, Some(&filename.name))?);
let cache_control = CacheControl::from(
self.cache
.freshness(&cache_entry, Some(&filename.name))
.map_err(ErrorKind::Io)?,
);
let response_callback = |response: Response| async {
let bytes = response.bytes().await?;
let bytes = response.bytes().await.map_err(ErrorKind::RequestError)?;
info_span!("parse_metadata21")
.in_scope(|| Metadata21::parse(bytes.as_ref()))
.map_err(|err| {
Error::MetadataParseError(filename, url.to_string(), Box::new(err))
Error::from(ErrorKind::MetadataParseError(
filename,
url.to_string(),
Box::new(err),
))
})
};
let req = self.client.uncached().get(url.clone()).build()?;
let req = self
.client
.uncached()
.get(url.clone())
.build()
.map_err(ErrorKind::RequestError)?;
Ok(self
.client
.get_cached_with_callback(req, &cache_entry, cache_control, response_callback)
@ -334,27 +360,41 @@ impl RegistryClient {
cache_shard.remote_wheel_dir(filename.name.as_ref()),
format!("{}.msgpack", filename.stem()),
);
let cache_control =
CacheControl::from(self.cache.freshness(&cache_entry, Some(&filename.name))?);
let cache_control = CacheControl::from(
self.cache
.freshness(&cache_entry, Some(&filename.name))
.map_err(ErrorKind::Io)?,
);
// This response callback is special, we actually make a number of subsequent requests to
// fetch the file from the remote zip.
let client = self.client_raw.clone();
let read_metadata_range_request = |response: Response| {
async {
let mut reader = AsyncHttpRangeReader::from_head_response(client, response).await?;
let mut reader = AsyncHttpRangeReader::from_head_response(client, response)
.await
.map_err(ErrorKind::AsyncHttpRangeReader)?;
trace!("Getting metadata for {filename} by range request");
let text = wheel_metadata_from_remote_zip(filename, &mut reader).await?;
let metadata = Metadata21::parse(text.as_bytes()).map_err(|err| {
Error::MetadataParseError(filename.clone(), url.to_string(), Box::new(err))
Error::from(ErrorKind::MetadataParseError(
filename.clone(),
url.to_string(),
Box::new(err),
))
})?;
Ok(metadata)
Ok::<Metadata21, CachedClientError<Error>>(metadata)
}
.boxed()
.instrument(info_span!("read_metadata_range_request", wheel = %filename))
};
let req = self.client.uncached().head(url.clone()).build()?;
let req = self
.client
.uncached()
.head(url.clone())
.build()
.map_err(ErrorKind::RequestError)?;
let result = self
.client
.get_cached_with_callback(
@ -368,10 +408,12 @@ impl RegistryClient {
match result {
Ok(metadata) => return Ok(metadata),
Err(Error::AsyncHttpRangeReader(
Err(err) => match err.into_kind() {
ErrorKind::AsyncHttpRangeReader(
AsyncHttpRangeReaderError::HttpRangeRequestUnsupported,
)) => {}
Err(err) => return Err(err),
) => {}
kind => return Err(kind.into()),
},
}
// The range request version failed (this is bad, the webserver should support this), fall
@ -383,12 +425,12 @@ impl RegistryClient {
// you host your wheels for some reasons doesn't support range requests
// (tbh we should probably warn here and tell users to get a better registry because
// their current one makes resolution unnecessary slow).
let temp_download = tempfile_in(self.cache.root()).map_err(Error::CacheWrite)?;
let temp_download = tempfile_in(self.cache.root()).map_err(ErrorKind::CacheWrite)?;
let mut writer = BufWriter::new(tokio::fs::File::from_std(temp_download));
let mut reader = self.stream_external(url).await?.compat();
tokio::io::copy(&mut reader, &mut writer)
.await
.map_err(Error::CacheWrite)?;
.map_err(ErrorKind::CacheWrite)?;
let reader = writer.into_inner();
read_metadata_async(filename, url.to_string(), reader).await
@ -404,8 +446,10 @@ impl RegistryClient {
.uncached()
.get(url.to_string())
.send()
.await?
.error_for_status()?
.await
.map_err(ErrorKind::RequestMiddlewareError)?
.error_for_status()
.map_err(ErrorKind::RequestError)?
.bytes_stream()
.map_err(|err| std::io::Error::new(std::io::ErrorKind::Other, err))
.into_async_read(),
@ -421,7 +465,7 @@ pub async fn read_metadata_async(
) -> Result<Metadata21, Error> {
let mut zip_reader = ZipFileReader::with_tokio(reader)
.await
.map_err(|err| Error::Zip(filename.clone(), err))?;
.map_err(|err| ErrorKind::Zip(filename.clone(), err))?;
let (metadata_idx, _dist_info_prefix) = find_dist_info(
filename,
@ -431,20 +475,22 @@ pub async fn read_metadata_async(
.iter()
.enumerate()
.filter_map(|(idx, e)| Some((idx, e.filename().as_str().ok()?))),
)?;
)
.map_err(ErrorKind::InstallWheel)?;
// Read the contents of the METADATA file
let mut contents = Vec::new();
zip_reader
.reader_with_entry(metadata_idx)
.await
.map_err(|err| Error::Zip(filename.clone(), err))?
.map_err(|err| ErrorKind::Zip(filename.clone(), err))?
.read_to_end_checked(&mut contents)
.await
.map_err(|err| Error::Zip(filename.clone(), err))?;
.map_err(|err| ErrorKind::Zip(filename.clone(), err))?;
let metadata = Metadata21::parse(&contents)
.map_err(|err| Error::MetadataParseError(filename.clone(), debug_source, Box::new(err)))?;
let metadata = Metadata21::parse(&contents).map_err(|err| {
ErrorKind::MetadataParseError(filename.clone(), debug_source, Box::new(err))
})?;
Ok(metadata)
}

View file

@ -5,7 +5,7 @@ use tokio_util::compat::TokioAsyncReadCompatExt;
use distribution_filename::WheelFilename;
use install_wheel_rs::find_dist_info;
use crate::Error;
use crate::{Error, ErrorKind};
/// Read the `.dist-info/METADATA` file from a async remote zip reader, so we avoid downloading the
/// entire wheel just for the one file.
@ -63,7 +63,7 @@ pub(crate) async fn wheel_metadata_from_remote_zip(
// Construct a zip reader to uses the stream.
let mut reader = ZipFileReader::new(reader.compat())
.await
.map_err(|err| Error::Zip(filename.clone(), err))?;
.map_err(|err| ErrorKind::Zip(filename.clone(), err))?;
let ((metadata_idx, metadata_entry), _dist_info_prefix) = find_dist_info(
filename,
@ -73,7 +73,8 @@ pub(crate) async fn wheel_metadata_from_remote_zip(
.iter()
.enumerate()
.filter_map(|(idx, e)| Some(((idx, e), e.filename().as_str().ok()?))),
)?;
)
.map_err(ErrorKind::InstallWheel)?;
let offset = metadata_entry.header_offset();
let size = metadata_entry.compressed_size()
@ -97,10 +98,10 @@ pub(crate) async fn wheel_metadata_from_remote_zip(
reader
.reader_with_entry(metadata_idx)
.await
.map_err(|err| Error::Zip(filename.clone(), err))?
.map_err(|err| ErrorKind::Zip(filename.clone(), err))?
.read_to_string_checked(&mut contents)
.await
.map_err(|err| Error::Zip(filename.clone(), err))?;
.map_err(|err| ErrorKind::Zip(filename.clone(), err))?;
Ok(contents)
}

View file

@ -754,17 +754,17 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> {
// Create a temporary directory.
let temp_dir = tempfile::tempdir_in(self.build_context.cache().root())
.map_err(puffin_client::Error::CacheWrite)?;
.map_err(puffin_client::ErrorKind::CacheWrite)?;
// Download the source distribution to a temporary file.
let mut writer = tokio::io::BufWriter::new(
fs_err::tokio::File::create(temp_dir.path().join(source_dist_filename))
.await
.map_err(puffin_client::Error::CacheWrite)?,
.map_err(puffin_client::ErrorKind::CacheWrite)?,
);
tokio::io::copy(&mut reader, &mut writer)
.await
.map_err(puffin_client::Error::CacheWrite)?;
.map_err(puffin_client::ErrorKind::CacheWrite)?;
Ok(temp_dir)
}

View file

@ -105,17 +105,17 @@ impl<'a, Context: BuildContext + Send + Sync> ResolverProvider
self.flat_index.get(package_name).cloned(),
self.no_binary,
)),
Err(
err @ (puffin_client::Error::PackageNotFound(_)
| puffin_client::Error::NoIndex(_)),
) => {
Err(err) => match err.into_kind() {
kind @ (puffin_client::ErrorKind::PackageNotFound(_)
| puffin_client::ErrorKind::NoIndex(_)) => {
if let Some(flat_index) = self.flat_index.get(package_name).cloned() {
Ok(VersionMap::from(flat_index))
} else {
Err(err)
Err(kind.into())
}
}
Err(err) => Err(err),
kind => Err(kind.into()),
},
})
}