Use a buffered reader for wheel metadata (#1082)

## Summary

It turns out this is significantly faster when reading (e.g.) _just_ the
`METADATA` file from a zipped wheel.

I audited other `File::open` usages, and everything else seems to be
using a buffered reader already (directly, or in whatever third-party
crate it's passed to) _or_ is read immediately in full.

See the criterion benchmark:

```
file_reader/numpy-1.26.3-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
                        time:   [6.9618 ms 6.9664 ms 6.9713 ms]
Found 4 outliers among 100 measurements (4.00%)
  4 (4.00%) high mild
file_reader/flask-3.0.1-py3-none-any.whl
                        time:   [237.50 µs 238.25 µs 239.13 µs]
Found 7 outliers among 100 measurements (7.00%)
  3 (3.00%) high mild
  4 (4.00%) high severe

buffered_reader/numpy-1.26.3-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
                        time:   [648.92 µs 653.85 µs 660.09 µs]
Found 4 outliers among 100 measurements (4.00%)
  3 (3.00%) high mild
  1 (1.00%) high severe
buffered_reader/flask-3.0.1-py3-none-any.whl
                        time:   [39.578 µs 39.712 µs 39.869 µs]
Found 8 outliers among 100 measurements (8.00%)
  3 (3.00%) high mild
  5 (5.00%) high severe
```
This commit is contained in:
Charlie Marsh 2024-01-24 12:22:55 -08:00 committed by GitHub
parent 0019fe71f6
commit cedd2e0b3f
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 8 additions and 32 deletions

View file

@ -245,7 +245,8 @@ impl RegistryClient {
.await? .await?
} }
FileLocation::Path(path) => { FileLocation::Path(path) => {
let reader = fs_err::tokio::File::open(&path).await?; let file = fs_err::tokio::File::open(&path).await?;
let reader = tokio::io::BufReader::new(file);
read_metadata_async(&wheel.filename, built_dist.to_string(), reader).await? read_metadata_async(&wheel.filename, built_dist.to_string(), reader).await?
} }
}, },
@ -258,7 +259,8 @@ impl RegistryClient {
.await? .await?
} }
BuiltDist::Path(wheel) => { BuiltDist::Path(wheel) => {
let reader = fs_err::tokio::File::open(&wheel.path).await?; let file = fs_err::tokio::File::open(&wheel.path).await?;
let reader = tokio::io::BufReader::new(file);
read_metadata_async(&wheel.filename, built_dist.to_string(), reader).await? read_metadata_async(&wheel.filename, built_dist.to_string(), reader).await?
} }
}; };

View file

@ -1,14 +1,7 @@
use std::path::{Path, PathBuf}; use std::path::{Path, PathBuf};
use anyhow::Result;
use zip::ZipArchive;
use distribution_filename::WheelFilename; use distribution_filename::WheelFilename;
use distribution_types::{CachedDist, Dist}; use distribution_types::{CachedDist, Dist};
use install_wheel_rs::read_dist_info;
use pypi_types::Metadata21;
use crate::error::Error;
/// A wheel that's been unzipped while downloading /// A wheel that's been unzipped while downloading
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
@ -126,14 +119,3 @@ impl std::fmt::Display for LocalWheel {
write!(f, "{}", self.remote()) write!(f, "{}", self.remote())
} }
} }
impl BuiltWheel {
/// Read the [`Metadata21`] from a wheel.
pub fn read_dist_info(&self) -> Result<Metadata21, Error> {
let mut archive = ZipArchive::new(fs_err::File::open(&self.path)?)?;
let dist_info = read_dist_info(&self.filename, &mut archive).map_err(|err| {
Error::DistInfo(Box::new(self.filename.clone()), self.dist.to_string(), err)
})?;
Ok(Metadata21::parse(&dist_info)?)
}
}

View file

@ -1,19 +1,9 @@
use distribution_filename::WheelFilename;
#[derive(thiserror::Error, Debug)] #[derive(thiserror::Error, Debug)]
pub enum Error { pub(crate) enum Error {
#[error(transparent)] #[error(transparent)]
IO(#[from] std::io::Error), IO(#[from] std::io::Error),
#[error(transparent)] #[error(transparent)]
PypiTypes(#[from] pypi_types::Error), PypiTypes(#[from] pypi_types::Error),
#[error(transparent)] #[error(transparent)]
Zip(#[from] zip::result::ZipError), Zip(#[from] zip::result::ZipError),
#[error("Unable to read .dist-info directory in {0} from {1}")]
DistInfo(
Box<WheelFilename>,
String,
#[source] install_wheel_rs::Error,
),
#[error("Unable to parse wheel filename for: {0}")]
FilenameParse(String, #[source] anyhow::Error),
} }

View file

@ -1005,7 +1005,9 @@ fn read_wheel_metadata(
filename: &WheelFilename, filename: &WheelFilename,
wheel: impl Into<PathBuf>, wheel: impl Into<PathBuf>,
) -> Result<Metadata21, SourceDistError> { ) -> Result<Metadata21, SourceDistError> {
let mut archive = ZipArchive::new(fs_err::File::open(wheel)?)?; let file = fs_err::File::open(wheel)?;
let reader = std::io::BufReader::new(file);
let mut archive = ZipArchive::new(reader)?;
let dist_info = read_dist_info(filename, &mut archive)?; let dist_info = read_dist_info(filename, &mut archive)?;
Ok(Metadata21::parse(&dist_info)?) Ok(Metadata21::parse(&dist_info)?)
} }