From cedd2e0b3f8c84b8d81b47710c1f69d3afd84d68 Mon Sep 17 00:00:00 2001 From: Charlie Marsh Date: Wed, 24 Jan 2024 12:22:55 -0800 Subject: [PATCH] Use a buffered reader for wheel metadata (#1082) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Summary It turns out this is significantly faster when reading (e.g.) _just_ the `METADATA` file from a zipped wheel. I audited other `File::open` usages, and everything else seems to be using a buffered reader already (directly, or in whatever third-party crate it's passed to) _or_ is read immediately in full. See the criterion benchmark: ``` file_reader/numpy-1.26.3-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl time: [6.9618 ms 6.9664 ms 6.9713 ms] Found 4 outliers among 100 measurements (4.00%) 4 (4.00%) high mild file_reader/flask-3.0.1-py3-none-any.whl time: [237.50 µs 238.25 µs 239.13 µs] Found 7 outliers among 100 measurements (7.00%) 3 (3.00%) high mild 4 (4.00%) high severe buffered_reader/numpy-1.26.3-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl time: [648.92 µs 653.85 µs 660.09 µs] Found 4 outliers among 100 measurements (4.00%) 3 (3.00%) high mild 1 (1.00%) high severe buffered_reader/flask-3.0.1-py3-none-any.whl time: [39.578 µs 39.712 µs 39.869 µs] Found 8 outliers among 100 measurements (8.00%) 3 (3.00%) high mild 5 (5.00%) high severe ``` --- crates/puffin-client/src/registry_client.rs | 6 ++++-- crates/puffin-distribution/src/download.rs | 18 ------------------ crates/puffin-distribution/src/error.rs | 12 +----------- crates/puffin-distribution/src/source/mod.rs | 4 +++- 4 files changed, 8 insertions(+), 32 deletions(-) diff --git a/crates/puffin-client/src/registry_client.rs b/crates/puffin-client/src/registry_client.rs index 5410004f2..d2d3acba9 100644 --- a/crates/puffin-client/src/registry_client.rs +++ b/crates/puffin-client/src/registry_client.rs @@ -245,7 +245,8 @@ impl RegistryClient { .await? } FileLocation::Path(path) => { - let reader = fs_err::tokio::File::open(&path).await?; + let file = fs_err::tokio::File::open(&path).await?; + let reader = tokio::io::BufReader::new(file); read_metadata_async(&wheel.filename, built_dist.to_string(), reader).await? } }, @@ -258,7 +259,8 @@ impl RegistryClient { .await? } BuiltDist::Path(wheel) => { - let reader = fs_err::tokio::File::open(&wheel.path).await?; + let file = fs_err::tokio::File::open(&wheel.path).await?; + let reader = tokio::io::BufReader::new(file); read_metadata_async(&wheel.filename, built_dist.to_string(), reader).await? } }; diff --git a/crates/puffin-distribution/src/download.rs b/crates/puffin-distribution/src/download.rs index 1ac8f67a5..5ca29962e 100644 --- a/crates/puffin-distribution/src/download.rs +++ b/crates/puffin-distribution/src/download.rs @@ -1,14 +1,7 @@ use std::path::{Path, PathBuf}; -use anyhow::Result; -use zip::ZipArchive; - use distribution_filename::WheelFilename; use distribution_types::{CachedDist, Dist}; -use install_wheel_rs::read_dist_info; -use pypi_types::Metadata21; - -use crate::error::Error; /// A wheel that's been unzipped while downloading #[derive(Debug, Clone)] @@ -126,14 +119,3 @@ impl std::fmt::Display for LocalWheel { write!(f, "{}", self.remote()) } } - -impl BuiltWheel { - /// Read the [`Metadata21`] from a wheel. - pub fn read_dist_info(&self) -> Result { - let mut archive = ZipArchive::new(fs_err::File::open(&self.path)?)?; - let dist_info = read_dist_info(&self.filename, &mut archive).map_err(|err| { - Error::DistInfo(Box::new(self.filename.clone()), self.dist.to_string(), err) - })?; - Ok(Metadata21::parse(&dist_info)?) - } -} diff --git a/crates/puffin-distribution/src/error.rs b/crates/puffin-distribution/src/error.rs index f5e9a8a8a..f77e16e84 100644 --- a/crates/puffin-distribution/src/error.rs +++ b/crates/puffin-distribution/src/error.rs @@ -1,19 +1,9 @@ -use distribution_filename::WheelFilename; - #[derive(thiserror::Error, Debug)] -pub enum Error { +pub(crate) enum Error { #[error(transparent)] IO(#[from] std::io::Error), #[error(transparent)] PypiTypes(#[from] pypi_types::Error), #[error(transparent)] Zip(#[from] zip::result::ZipError), - #[error("Unable to read .dist-info directory in {0} from {1}")] - DistInfo( - Box, - String, - #[source] install_wheel_rs::Error, - ), - #[error("Unable to parse wheel filename for: {0}")] - FilenameParse(String, #[source] anyhow::Error), } diff --git a/crates/puffin-distribution/src/source/mod.rs b/crates/puffin-distribution/src/source/mod.rs index ae48c8d85..6a1e34d4d 100644 --- a/crates/puffin-distribution/src/source/mod.rs +++ b/crates/puffin-distribution/src/source/mod.rs @@ -1005,7 +1005,9 @@ fn read_wheel_metadata( filename: &WheelFilename, wheel: impl Into, ) -> Result { - let mut archive = ZipArchive::new(fs_err::File::open(wheel)?)?; + let file = fs_err::File::open(wheel)?; + let reader = std::io::BufReader::new(file); + let mut archive = ZipArchive::new(reader)?; let dist_info = read_dist_info(filename, &mut archive)?; Ok(Metadata21::parse(&dist_info)?) }