Add Seek fallback for zip files (#2320)

## Summary

Some zip files can't be streamed; in particular, `rs-async-zip` doesn't
support data descriptors right now (though it may in the future). This
PR adds a fallback path for such zips that downloads the entire zip file
to disk, then unzips it from disk (which gives us `Seek`).

Closes https://github.com/astral-sh/uv/issues/2216.

## Test Plan

`cargo run pip install --extra-index-url https://buf.build/gen/python
hashb_foxglove_protocolbuffers_python==25.3.0.1.20240226043130+465630478360
--force-reinstall -n`
This commit is contained in:
Charlie Marsh 2024-03-10 08:39:28 -07:00 committed by GitHub
parent 67fb023f10
commit a267a501b6
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
14 changed files with 591 additions and 160 deletions

View file

@ -1,16 +1,13 @@
//! Takes a wheel and installs it into a venv.
use std::io;
use std::io::{Read, Seek};
use std::path::PathBuf;
use std::str::FromStr;
use platform_info::PlatformInfoError;
use thiserror::Error;
use zip::result::ZipError;
use zip::ZipArchive;
use distribution_filename::WheelFilename;
use pep440_rs::Version;
use platform_host::{Arch, Os};
use pypi_types::Scheme;
@ -19,6 +16,7 @@ use uv_fs::Simplified;
use uv_normalize::PackageName;
pub mod linker;
pub mod metadata;
mod record;
mod script;
mod uninstall;
@ -99,131 +97,3 @@ pub enum Error {
#[error("Wheel version does not match filename: {0} != {1}")]
MismatchedVersion(Version, Version),
}
/// Returns `true` if the file is a `METADATA` file in a `dist-info` directory that matches the
/// wheel filename.
pub fn is_metadata_entry(path: &str, filename: &WheelFilename) -> bool {
let Some((dist_info_dir, file)) = path.split_once('/') else {
return false;
};
if file != "METADATA" {
return false;
}
let Some(dir_stem) = dist_info_dir.strip_suffix(".dist-info") else {
return false;
};
let Some((name, version)) = dir_stem.rsplit_once('-') else {
return false;
};
let Ok(name) = PackageName::from_str(name) else {
return false;
};
if name != filename.name {
return false;
}
let Ok(version) = Version::from_str(version) else {
return false;
};
if version != filename.version {
return false;
}
true
}
/// Find the `dist-info` directory from a list of files.
///
/// The metadata name may be uppercase, while the wheel and dist info names are lowercase, or
/// the metadata name and the dist info name are lowercase, while the wheel name is uppercase.
/// Either way, we just search the wheel for the name.
///
/// Returns the dist info dir prefix without the `.dist-info` extension.
///
/// Reference implementation: <https://github.com/pypa/packaging/blob/2f83540272e79e3fe1f5d42abae8df0c14ddf4c2/src/packaging/utils.py#L146-L172>
pub fn find_dist_info<'a, T: Copy>(
filename: &WheelFilename,
files: impl Iterator<Item = (T, &'a str)>,
) -> Result<(T, &'a str), Error> {
let metadatas: Vec<_> = files
.filter_map(|(payload, path)| {
let (dist_info_dir, file) = path.split_once('/')?;
if file != "METADATA" {
return None;
}
let dir_stem = dist_info_dir.strip_suffix(".dist-info")?;
let (name, version) = dir_stem.rsplit_once('-')?;
if PackageName::from_str(name).ok()? != filename.name {
return None;
}
if Version::from_str(version).ok()? != filename.version {
return None;
}
Some((payload, dir_stem))
})
.collect();
let (payload, dist_info_prefix) = match metadatas[..] {
[] => {
return Err(Error::MissingDistInfo);
}
[(payload, path)] => (payload, path),
_ => {
return Err(Error::MultipleDistInfo(
metadatas
.into_iter()
.map(|(_, dist_info_dir)| dist_info_dir.to_string())
.collect::<Vec<_>>()
.join(", "),
));
}
};
Ok((payload, dist_info_prefix))
}
/// Given an archive, read the `dist-info` metadata into a buffer.
pub fn read_dist_info(
filename: &WheelFilename,
archive: &mut ZipArchive<impl Read + Seek + Sized>,
) -> Result<Vec<u8>, Error> {
let dist_info_prefix =
find_dist_info(filename, archive.file_names().map(|name| (name, name)))?.1;
let mut file = archive
.by_name(&format!("{dist_info_prefix}.dist-info/METADATA"))
.map_err(|err| Error::Zip(filename.to_string(), err))?;
#[allow(clippy::cast_possible_truncation)]
let mut buffer = Vec::with_capacity(file.size() as usize);
file.read_to_end(&mut buffer)?;
Ok(buffer)
}
#[cfg(test)]
mod test {
use std::str::FromStr;
use distribution_filename::WheelFilename;
use crate::find_dist_info;
#[test]
fn test_dot_in_name() {
let files = [
"mastodon/Mastodon.py",
"mastodon/__init__.py",
"mastodon/streaming.py",
"Mastodon.py-1.5.1.dist-info/DESCRIPTION.rst",
"Mastodon.py-1.5.1.dist-info/metadata.json",
"Mastodon.py-1.5.1.dist-info/top_level.txt",
"Mastodon.py-1.5.1.dist-info/WHEEL",
"Mastodon.py-1.5.1.dist-info/METADATA",
"Mastodon.py-1.5.1.dist-info/RECORD",
];
let filename = WheelFilename::from_str("Mastodon.py-1.5.1-py2.py3-none-any.whl").unwrap();
let (_, dist_info_prefix) =
find_dist_info(&filename, files.into_iter().map(|file| (file, file))).unwrap();
assert_eq!(dist_info_prefix, "Mastodon.py-1.5.1");
}
}

View file

@ -0,0 +1,197 @@
use std::io::{Read, Seek};
use std::path::Path;
use std::str::FromStr;
use zip::ZipArchive;
use distribution_filename::WheelFilename;
use pep440_rs::Version;
use uv_normalize::PackageName;
use crate::Error;
/// Returns `true` if the file is a `METADATA` file in a `.dist-info` directory that matches the
/// wheel filename.
pub fn is_metadata_entry(path: &str, filename: &WheelFilename) -> bool {
let Some((dist_info_dir, file)) = path.split_once('/') else {
return false;
};
if file != "METADATA" {
return false;
}
let Some(dir_stem) = dist_info_dir.strip_suffix(".dist-info") else {
return false;
};
let Some((name, version)) = dir_stem.rsplit_once('-') else {
return false;
};
let Ok(name) = PackageName::from_str(name) else {
return false;
};
if name != filename.name {
return false;
}
let Ok(version) = Version::from_str(version) else {
return false;
};
if version != filename.version {
return false;
}
true
}
/// Find the `.dist-info` directory in a zipped wheel.
///
/// The metadata name may be uppercase, while the wheel and dist info names are lowercase, or
/// the metadata name and the dist info name are lowercase, while the wheel name is uppercase.
/// Either way, we just search the wheel for the name.
///
/// Returns the dist info dir prefix without the `.dist-info` extension.
///
/// Reference implementation: <https://github.com/pypa/packaging/blob/2f83540272e79e3fe1f5d42abae8df0c14ddf4c2/src/packaging/utils.py#L146-L172>
pub fn find_archive_dist_info<'a, T: Copy>(
filename: &WheelFilename,
files: impl Iterator<Item = (T, &'a str)>,
) -> Result<(T, &'a str), Error> {
let metadatas: Vec<_> = files
.filter_map(|(payload, path)| {
let (dist_info_dir, file) = path.split_once('/')?;
if file != "METADATA" {
return None;
}
let dir_stem = dist_info_dir.strip_suffix(".dist-info")?;
let (name, version) = dir_stem.rsplit_once('-')?;
if PackageName::from_str(name).ok()? != filename.name {
return None;
}
if Version::from_str(version).ok()? != filename.version {
return None;
}
Some((payload, dir_stem))
})
.collect();
let (payload, dist_info_prefix) = match metadatas[..] {
[] => {
return Err(Error::MissingDistInfo);
}
[(payload, path)] => (payload, path),
_ => {
return Err(Error::MultipleDistInfo(
metadatas
.into_iter()
.map(|(_, dist_info_dir)| dist_info_dir.to_string())
.collect::<Vec<_>>()
.join(", "),
));
}
};
Ok((payload, dist_info_prefix))
}
/// Given an archive, read the `METADATA` from the `.dist-info` directory.
pub fn read_archive_metadata(
filename: &WheelFilename,
archive: &mut ZipArchive<impl Read + Seek + Sized>,
) -> Result<Vec<u8>, Error> {
let dist_info_prefix =
find_archive_dist_info(filename, archive.file_names().map(|name| (name, name)))?.1;
let mut file = archive
.by_name(&format!("{dist_info_prefix}.dist-info/METADATA"))
.map_err(|err| Error::Zip(filename.to_string(), err))?;
#[allow(clippy::cast_possible_truncation)]
let mut buffer = Vec::with_capacity(file.size() as usize);
file.read_to_end(&mut buffer)?;
Ok(buffer)
}
/// Find the `.dist-info` directory in an unzipped wheel.
///
/// See: <https://github.com/PyO3/python-pkginfo-rs>
pub fn find_flat_dist_info(
filename: &WheelFilename,
path: impl AsRef<Path>,
) -> Result<String, Error> {
// Iterate over `path` to find the `.dist-info` directory. It should be at the top-level.
let Some(dist_info) = fs_err::read_dir(path.as_ref())?.find_map(|entry| {
let entry = entry.ok()?;
let file_type = entry.file_type().ok()?;
if file_type.is_dir() {
let path = entry.path();
let extension = path.extension()?;
if extension != "dist-info" {
return None;
}
let stem = path.file_stem()?;
let (name, version) = stem.to_str()?.rsplit_once('-')?;
if PackageName::from_str(name).ok()? != filename.name {
return None;
}
if Version::from_str(version).ok()? != filename.version {
return None;
}
Some(path)
} else {
None
}
}) else {
return Err(Error::InvalidWheel(
"Missing .dist-info directory".to_string(),
));
};
let Some(dist_info_prefix) = dist_info.file_stem() else {
return Err(Error::InvalidWheel(
"Missing .dist-info directory".to_string(),
));
};
Ok(dist_info_prefix.to_string_lossy().to_string())
}
/// Read the wheel `METADATA` metadata from a `.dist-info` directory.
pub fn read_dist_info_metadata(
dist_info_prefix: &str,
wheel: impl AsRef<Path>,
) -> Result<Vec<u8>, Error> {
let metadata_file = wheel
.as_ref()
.join(format!("{dist_info_prefix}.dist-info/METADATA"));
Ok(fs_err::read(metadata_file)?)
}
#[cfg(test)]
mod test {
use std::str::FromStr;
use distribution_filename::WheelFilename;
use crate::metadata::find_archive_dist_info;
#[test]
fn test_dot_in_name() {
let files = [
"mastodon/Mastodon.py",
"mastodon/__init__.py",
"mastodon/streaming.py",
"Mastodon.py-1.5.1.dist-info/DESCRIPTION.rst",
"Mastodon.py-1.5.1.dist-info/metadata.json",
"Mastodon.py-1.5.1.dist-info/top_level.txt",
"Mastodon.py-1.5.1.dist-info/WHEEL",
"Mastodon.py-1.5.1.dist-info/METADATA",
"Mastodon.py-1.5.1.dist-info/RECORD",
];
let filename = WheelFilename::from_str("Mastodon.py-1.5.1-py2.py3-none-any.whl").unwrap();
let (_, dist_info_prefix) =
find_archive_dist_info(&filename, files.into_iter().map(|file| (file, file))).unwrap();
assert_eq!(dist_info_prefix, "Mastodon.py-1.5.1");
}
}

View file

@ -182,7 +182,7 @@ pub enum ErrorKind {
metadata: PackageName,
},
#[error("The wheel {0} is not a valid zip file")]
#[error("Failed to unzip wheel: {0}")]
Zip(WheelFilename, #[source] ZipError),
#[error("Failed to write to the client cache")]

View file

@ -18,7 +18,7 @@ use url::Url;
use distribution_filename::{DistFilename, SourceDistFilename, WheelFilename};
use distribution_types::{BuiltDist, File, FileLocation, IndexUrl, IndexUrls, Name};
use install_wheel_rs::{find_dist_info, is_metadata_entry};
use install_wheel_rs::metadata::{find_archive_dist_info, is_metadata_entry};
use pep440_rs::Version;
use pypi_types::{Metadata23, SimpleJson};
use uv_auth::safe_copy_url_auth;
@ -602,7 +602,7 @@ async fn read_metadata_async_seek(
.await
.map_err(|err| ErrorKind::Zip(filename.clone(), err))?;
let (metadata_idx, _dist_info_prefix) = find_dist_info(
let (metadata_idx, _dist_info_prefix) = find_archive_dist_info(
filename,
zip_reader
.file()

View file

@ -3,7 +3,7 @@ use async_zip::tokio::read::seek::ZipFileReader;
use tokio_util::compat::TokioAsyncReadCompatExt;
use distribution_filename::WheelFilename;
use install_wheel_rs::find_dist_info;
use install_wheel_rs::metadata::find_archive_dist_info;
use crate::{Error, ErrorKind};
@ -65,7 +65,7 @@ pub(crate) async fn wheel_metadata_from_remote_zip(
.await
.map_err(|err| ErrorKind::Zip(filename.clone(), err))?;
let ((metadata_idx, metadata_entry), _dist_info_prefix) = find_dist_info(
let ((metadata_idx, metadata_entry), _dist_info_prefix) = find_archive_dist_info(
filename,
reader
.file()

View file

@ -4,8 +4,9 @@ use std::path::{Path, PathBuf};
use std::sync::Arc;
use futures::{FutureExt, TryStreamExt};
use tokio::io::AsyncSeekExt;
use tokio_util::compat::FuturesAsyncReadCompatExt;
use tracing::{info_span, instrument, Instrument};
use tracing::{info_span, instrument, warn, Instrument};
use url::Url;
use distribution_filename::WheelFilename;
@ -158,14 +159,33 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context>
);
// Download and unzip.
let archive = self
match self
.stream_wheel(url.clone(), &wheel.filename, &wheel_entry, &dist)
.await?;
Ok(LocalWheel::Unzipped(UnzippedWheel {
dist: dist.clone(),
archive,
filename: wheel.filename.clone(),
}))
.await
{
Ok(archive) => Ok(LocalWheel::Unzipped(UnzippedWheel {
dist: dist.clone(),
archive,
filename: wheel.filename.clone(),
})),
Err(Error::Extract(err)) if err.is_http_streaming_unsupported() => {
warn!(
"Streaming unsupported for {dist}; downloading wheel to disk ({err})"
);
// If the request failed because streaming is unsupported, download the
// wheel directly.
let archive = self
.download_wheel(url, &wheel.filename, &wheel_entry, &dist)
.await?;
Ok(LocalWheel::Unzipped(UnzippedWheel {
dist: dist.clone(),
archive,
filename: wheel.filename.clone(),
}))
}
Err(err) => Err(err),
}
}
Dist::Built(BuiltDist::DirectUrl(wheel)) => {
@ -181,19 +201,43 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context>
);
// Download and unzip.
let archive = self
match self
.stream_wheel(
wheel.url.raw().clone(),
&wheel.filename,
&wheel_entry,
&dist,
)
.await?;
Ok(LocalWheel::Unzipped(UnzippedWheel {
dist: dist.clone(),
archive,
filename: wheel.filename.clone(),
}))
.await
{
Ok(archive) => Ok(LocalWheel::Unzipped(UnzippedWheel {
dist: dist.clone(),
archive,
filename: wheel.filename.clone(),
})),
Err(Error::Client(err)) if err.is_http_streaming_unsupported() => {
warn!(
"Streaming unsupported for {dist}; downloading wheel to disk ({err})"
);
// If the request failed because streaming is unsupported, download the
// wheel directly.
let archive = self
.download_wheel(
wheel.url.raw().clone(),
&wheel.filename,
&wheel_entry,
&dist,
)
.await?;
Ok(LocalWheel::Unzipped(UnzippedWheel {
dist: dist.clone(),
archive,
filename: wheel.filename.clone(),
}))
}
Err(err) => Err(err),
}
}
Dist::Built(BuiltDist::Path(wheel)) => {
@ -277,7 +321,18 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context>
) -> Result<(Metadata23, Option<Url>), Error> {
match dist {
Dist::Built(built_dist) => {
Ok((self.client.wheel_metadata(built_dist).boxed().await?, None))
match self.client.wheel_metadata(built_dist).boxed().await {
Ok(metadata) => Ok((metadata, None)),
Err(err) if err.is_http_streaming_unsupported() => {
warn!("Streaming unsupported when fetching metadata for {dist}; downloading wheel directly ({err})");
// If the request failed due to an error that could be resolved by
// downloading the wheel directly, try that.
let wheel = self.get_or_build_wheel(dist.clone()).await?;
Ok((wheel.metadata()?, None))
}
Err(err) => Err(err.into()),
}
}
Dist::Source(source_dist) => {
let no_build = match self.build_context.no_build() {
@ -437,6 +492,87 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context>
Ok(archive)
}
/// Download a wheel from a URL, then unzip it into the cache.
async fn download_wheel(
&self,
url: Url,
filename: &WheelFilename,
wheel_entry: &CacheEntry,
dist: &Dist,
) -> Result<PathBuf, Error> {
// Create an entry for the HTTP cache.
let http_entry = wheel_entry.with_file(format!("{}.http", filename.stem()));
let download = |response: reqwest::Response| {
async {
let reader = response
.bytes_stream()
.map_err(|err| self.handle_response_errors(err))
.into_async_read();
// Download the wheel to a temporary file.
let temp_file =
tempfile::tempfile_in(self.cache.root()).map_err(Error::CacheWrite)?;
let mut writer = tokio::io::BufWriter::new(tokio::fs::File::from_std(temp_file));
tokio::io::copy(&mut reader.compat(), &mut writer)
.await
.map_err(Error::CacheWrite)?;
// Unzip the wheel to a temporary directory.
let temp_dir =
tempfile::tempdir_in(self.cache.root()).map_err(Error::CacheWrite)?;
let mut file = writer.into_inner();
file.seek(io::SeekFrom::Start(0))
.await
.map_err(Error::CacheWrite)?;
let reader = tokio::io::BufReader::new(file);
uv_extract::seek::unzip(reader, temp_dir.path()).await?;
// Persist the temporary directory to the directory store.
let archive = self
.cache
.persist(temp_dir.into_path(), wheel_entry.path())
.map_err(Error::CacheRead)?;
Ok(archive)
}
.instrument(info_span!("wheel", wheel = %dist))
};
let req = self
.client
.cached_client()
.uncached()
.get(url)
.header(
// `reqwest` defaults to accepting compressed responses.
// Specify identity encoding to get consistent .whl downloading
// behavior from servers. ref: https://github.com/pypa/pip/pull/1688
"accept-encoding",
reqwest::header::HeaderValue::from_static("identity"),
)
.build()?;
let cache_control = match self.client.connectivity() {
Connectivity::Online => CacheControl::from(
self.cache
.freshness(&http_entry, Some(&filename.name))
.map_err(Error::CacheRead)?,
),
Connectivity::Offline => CacheControl::AllowStale,
};
let archive = self
.client
.cached_client()
.get_serde(req, &http_entry, cache_control, download)
.await
.map_err(|err| match err {
CachedClientError::Callback(err) => err,
CachedClientError::Client(err) => Error::Client(err),
})?;
Ok(archive)
}
/// Return the [`IndexLocations`] used by this resolver.
pub fn index_locations(&self) -> &IndexLocations {
self.build_context.index_locations()

View file

@ -2,6 +2,9 @@ use std::path::{Path, PathBuf};
use distribution_filename::WheelFilename;
use distribution_types::{CachedDist, Dist};
use pypi_types::Metadata23;
use crate::Error;
/// A wheel that's been unzipped while downloading
#[derive(Debug, Clone)]
@ -87,6 +90,15 @@ impl LocalWheel {
Self::Built(wheel) => CachedDist::from_remote(wheel.dist, wheel.filename, archive),
}
}
/// Read the [`Metadata23`] from a wheel.
pub fn metadata(&self) -> Result<Metadata23, Error> {
match self {
Self::Unzipped(wheel) => read_flat_wheel_metadata(&wheel.filename, &wheel.archive),
Self::Disk(wheel) => read_built_wheel_metadata(&wheel.filename, &wheel.path),
Self::Built(wheel) => read_built_wheel_metadata(&wheel.filename, &wheel.path),
}
}
}
impl UnzippedWheel {
@ -121,3 +133,25 @@ impl std::fmt::Display for LocalWheel {
write!(f, "{}", self.remote())
}
}
/// Read the [`Metadata23`] from a built wheel.
fn read_built_wheel_metadata(
filename: &WheelFilename,
wheel: impl AsRef<Path>,
) -> Result<Metadata23, Error> {
let file = fs_err::File::open(wheel.as_ref()).map_err(Error::CacheRead)?;
let reader = std::io::BufReader::new(file);
let mut archive = zip::ZipArchive::new(reader)?;
let metadata = install_wheel_rs::metadata::read_archive_metadata(filename, &mut archive)?;
Ok(Metadata23::parse_metadata(&metadata)?)
}
/// Read the [`Metadata23`] from an unzipped wheel.
fn read_flat_wheel_metadata(
filename: &WheelFilename,
wheel: impl AsRef<Path>,
) -> Result<Metadata23, Error> {
let dist_info = install_wheel_rs::metadata::find_flat_dist_info(filename, &wheel)?;
let metadata = install_wheel_rs::metadata::read_dist_info_metadata(&dist_info, &wheel)?;
Ok(Metadata23::parse_metadata(&metadata)?)
}

View file

@ -55,7 +55,7 @@ pub enum Error {
Zip(#[from] ZipError),
#[error("Source distribution directory contains neither readable pyproject.toml nor setup.py")]
DirWithoutEntrypoint,
#[error("Failed to extract source distribution")]
#[error("Failed to extract archive")]
Extract(#[from] uv_extract::Error),
#[error("Source distribution not found at: {0}")]
NotFound(PathBuf),

View file

@ -19,7 +19,7 @@ use distribution_types::{
DirectArchiveUrl, DirectGitUrl, Dist, FileLocation, GitSourceDist, LocalEditable, Name,
PathSourceDist, RemoteSource, SourceDist,
};
use install_wheel_rs::read_dist_info;
use install_wheel_rs::metadata::read_archive_metadata;
use pep508_rs::VerbatimUrl;
use platform_tags::Tags;
use pypi_types::Metadata23;
@ -903,7 +903,7 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> {
let reader = fs_err::tokio::File::open(&path)
.await
.map_err(Error::CacheRead)?;
uv_extract::stream::archive(tokio::io::BufReader::new(reader), path, &temp_dir.path())
uv_extract::seek::archive(tokio::io::BufReader::new(reader), path, &temp_dir.path())
.await?;
// Extract the top-level directory from the archive.
@ -1212,6 +1212,6 @@ fn read_wheel_metadata(
let file = fs_err::File::open(wheel).map_err(Error::CacheRead)?;
let reader = std::io::BufReader::new(file);
let mut archive = ZipArchive::new(reader)?;
let dist_info = read_dist_info(filename, &mut archive)?;
let dist_info = read_archive_metadata(filename, &mut archive)?;
Ok(Metadata23::parse_metadata(&dist_info)?)
}

View file

@ -1,11 +1,9 @@
use std::{ffi::OsString, path::PathBuf};
use zip::result::ZipError;
#[derive(Debug, thiserror::Error)]
pub enum Error {
#[error(transparent)]
Zip(#[from] ZipError),
Zip(#[from] zip::result::ZipError),
#[error(transparent)]
AsyncZip(#[from] async_zip::error::ZipError),
#[error(transparent)]
@ -19,3 +17,15 @@ pub enum Error {
#[error("The top-level of the archive must only contain a list directory, but it's empty")]
EmptyArchive,
}
impl Error {
/// Returns `true` if the error is due to the server not supporting HTTP streaming. Most
/// commonly, this is due to serving ZIP files with features that are incompatible with
/// streaming, like data descriptors.
pub fn is_http_streaming_unsupported(&self) -> bool {
matches!(
self,
Self::AsyncZip(async_zip::error::ZipError::FeatureNotSupported(_))
)
}
}

View file

@ -2,6 +2,7 @@ pub use error::Error;
pub use sync::*;
mod error;
pub mod seek;
pub mod stream;
mod sync;
mod tar;

View file

@ -0,0 +1,115 @@
use std::path::Path;
use rustc_hash::FxHashSet;
use tokio_util::compat::FuturesAsyncReadCompatExt;
use tokio_util::compat::TokioAsyncReadCompatExt;
use crate::Error;
/// Unzip a `.zip` archive into the target directory, requiring `Seek`.
///
/// This is useful for unzipping files asynchronously that already exist on disk.
pub async fn unzip<R: tokio::io::AsyncRead + tokio::io::AsyncSeek + Unpin>(
reader: R,
target: impl AsRef<Path>,
) -> Result<(), Error> {
let target = target.as_ref();
let mut reader = reader.compat();
let mut zip = async_zip::base::read::seek::ZipFileReader::new(&mut reader).await?;
let mut directories = FxHashSet::default();
for index in 0..zip.file().entries().len() {
let reader = zip.reader_with_entry(index).await?;
// Construct the (expected) path to the file on-disk.
let path = reader.entry().filename().as_str()?;
let path = target.join(path);
let is_dir = reader.entry().dir()?;
// Either create the directory or write the file to disk.
if is_dir {
if directories.insert(path.clone()) {
fs_err::tokio::create_dir_all(path).await?;
}
} else {
if let Some(parent) = path.parent() {
if directories.insert(parent.to_path_buf()) {
fs_err::tokio::create_dir_all(parent).await?;
}
}
// Copy the mode.
#[cfg(unix)]
let mode = reader.entry().unix_permissions();
// Copy the file contents.
let file = fs_err::tokio::File::create(&path).await?;
let mut writer = if let Ok(size) = usize::try_from(reader.entry().uncompressed_size()) {
tokio::io::BufWriter::with_capacity(size, file)
} else {
tokio::io::BufWriter::new(file)
};
tokio::io::copy(&mut reader.compat(), &mut writer).await?;
// See `uv_extract::stream::unzip`.
#[cfg(unix)]
{
use std::fs::Permissions;
use std::os::unix::fs::PermissionsExt;
let Some(mode) = mode else {
continue;
};
// The executable bit is the only permission we preserve, otherwise we use the OS defaults.
// https://github.com/pypa/pip/blob/3898741e29b7279e7bffe044ecfbe20f6a438b1e/src/pip/_internal/utils/unpacking.py#L88-L100
let has_any_executable_bit = mode & 0o111;
if has_any_executable_bit != 0 {
let permissions = fs_err::tokio::metadata(&path).await?.permissions();
fs_err::tokio::set_permissions(
&path,
Permissions::from_mode(permissions.mode() | 0o111),
)
.await?;
}
}
}
}
Ok(())
}
/// Unzip a `.zip` or `.tar.gz` archive into the target directory, requiring `Seek`.
pub async fn archive<R: tokio::io::AsyncBufRead + tokio::io::AsyncSeek + Unpin>(
reader: R,
source: impl AsRef<Path>,
target: impl AsRef<Path>,
) -> Result<(), Error> {
// `.zip`
if source
.as_ref()
.extension()
.is_some_and(|ext| ext.eq_ignore_ascii_case("zip"))
{
unzip(reader, target).await?;
return Ok(());
}
// `.tar.gz`
if source
.as_ref()
.extension()
.is_some_and(|ext| ext.eq_ignore_ascii_case("gz"))
&& source.as_ref().file_stem().is_some_and(|stem| {
Path::new(stem)
.extension()
.is_some_and(|ext| ext.eq_ignore_ascii_case("tar"))
})
{
crate::stream::untar(reader, target).await?;
return Ok(());
}
Err(Error::UnsupportedArchive(source.as_ref().to_path_buf()))
}

View file

@ -4949,3 +4949,41 @@ fn metadata_2_2() -> Result<()> {
Ok(())
}
/// Resolve packages from an index that "doesn't support" zip file streaming (by way of using
/// data descriptors).
#[test]
fn no_stream() -> Result<()> {
let context = TestContext::new("3.12");
// Write to a requirements file.
let requirements_in = context.temp_dir.child("requirements.in");
requirements_in
.write_str("hashb_foxglove_protocolbuffers_python==25.3.0.1.20240226043130+465630478360")?;
uv_snapshot!(Command::new(get_bin())
.arg("pip")
.arg("compile")
.arg("requirements.in")
.arg("--extra-index-url")
.arg("https://buf.build/gen/python")
.arg("--cache-dir")
.arg(context.cache_dir.path())
.env("VIRTUAL_ENV", context.venv.as_os_str())
.current_dir(&context.temp_dir), @r###"
success: true
exit_code: 0
----- stdout -----
# This file was autogenerated by uv via the following command:
# uv pip compile requirements.in --cache-dir [CACHE_DIR]
hashb-foxglove-protocolbuffers-python==25.3.0.1.20240226043130+465630478360
protobuf==4.25.3
# via hashb-foxglove-protocolbuffers-python
----- stderr -----
Resolved 2 packages in [TIME]
"###
);
Ok(())
}

View file

@ -2994,3 +2994,33 @@ requires-python = "<=3.5"
Ok(())
}
/// Install packages from an index that "doesn't support" zip file streaming (by way of using
/// data descriptors).
#[test]
fn no_stream() -> Result<()> {
let context = TestContext::new("3.12");
// Write to a requirements file.
let requirements_txt = context.temp_dir.child("requirements.txt");
requirements_txt
.write_str("hashb_foxglove_protocolbuffers_python==25.3.0.1.20240226043130+465630478360")?;
uv_snapshot!(command(&context)
.arg("requirements.txt")
.arg("--index-url")
.arg("https://buf.build/gen/python"), @r###"
success: true
exit_code: 0
----- stdout -----
----- stderr -----
Resolved 1 package in [TIME]
Downloaded 1 package in [TIME]
Installed 1 package in [TIME]
+ hashb-foxglove-protocolbuffers-python==25.3.0.1.20240226043130+465630478360
"###
);
Ok(())
}