mirror of
https://github.com/astral-sh/uv.git
synced 2025-08-04 19:08:04 +00:00
Add Seek
fallback for zip files (#2320)
## Summary Some zip files can't be streamed; in particular, `rs-async-zip` doesn't support data descriptors right now (though it may in the future). This PR adds a fallback path for such zips that downloads the entire zip file to disk, then unzips it from disk (which gives us `Seek`). Closes https://github.com/astral-sh/uv/issues/2216. ## Test Plan `cargo run pip install --extra-index-url https://buf.build/gen/python hashb_foxglove_protocolbuffers_python==25.3.0.1.20240226043130+465630478360 --force-reinstall -n`
This commit is contained in:
parent
67fb023f10
commit
a267a501b6
14 changed files with 591 additions and 160 deletions
|
@ -1,11 +1,9 @@
|
|||
use std::{ffi::OsString, path::PathBuf};
|
||||
|
||||
use zip::result::ZipError;
|
||||
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
pub enum Error {
|
||||
#[error(transparent)]
|
||||
Zip(#[from] ZipError),
|
||||
Zip(#[from] zip::result::ZipError),
|
||||
#[error(transparent)]
|
||||
AsyncZip(#[from] async_zip::error::ZipError),
|
||||
#[error(transparent)]
|
||||
|
@ -19,3 +17,15 @@ pub enum Error {
|
|||
#[error("The top-level of the archive must only contain a list directory, but it's empty")]
|
||||
EmptyArchive,
|
||||
}
|
||||
|
||||
impl Error {
|
||||
/// Returns `true` if the error is due to the server not supporting HTTP streaming. Most
|
||||
/// commonly, this is due to serving ZIP files with features that are incompatible with
|
||||
/// streaming, like data descriptors.
|
||||
pub fn is_http_streaming_unsupported(&self) -> bool {
|
||||
matches!(
|
||||
self,
|
||||
Self::AsyncZip(async_zip::error::ZipError::FeatureNotSupported(_))
|
||||
)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -2,6 +2,7 @@ pub use error::Error;
|
|||
pub use sync::*;
|
||||
|
||||
mod error;
|
||||
pub mod seek;
|
||||
pub mod stream;
|
||||
mod sync;
|
||||
mod tar;
|
||||
|
|
115
crates/uv-extract/src/seek.rs
Normal file
115
crates/uv-extract/src/seek.rs
Normal file
|
@ -0,0 +1,115 @@
|
|||
use std::path::Path;
|
||||
|
||||
use rustc_hash::FxHashSet;
|
||||
use tokio_util::compat::FuturesAsyncReadCompatExt;
|
||||
use tokio_util::compat::TokioAsyncReadCompatExt;
|
||||
|
||||
use crate::Error;
|
||||
|
||||
/// Unzip a `.zip` archive into the target directory, requiring `Seek`.
|
||||
///
|
||||
/// This is useful for unzipping files asynchronously that already exist on disk.
|
||||
pub async fn unzip<R: tokio::io::AsyncRead + tokio::io::AsyncSeek + Unpin>(
|
||||
reader: R,
|
||||
target: impl AsRef<Path>,
|
||||
) -> Result<(), Error> {
|
||||
let target = target.as_ref();
|
||||
let mut reader = reader.compat();
|
||||
let mut zip = async_zip::base::read::seek::ZipFileReader::new(&mut reader).await?;
|
||||
|
||||
let mut directories = FxHashSet::default();
|
||||
|
||||
for index in 0..zip.file().entries().len() {
|
||||
let reader = zip.reader_with_entry(index).await?;
|
||||
|
||||
// Construct the (expected) path to the file on-disk.
|
||||
let path = reader.entry().filename().as_str()?;
|
||||
let path = target.join(path);
|
||||
let is_dir = reader.entry().dir()?;
|
||||
|
||||
// Either create the directory or write the file to disk.
|
||||
if is_dir {
|
||||
if directories.insert(path.clone()) {
|
||||
fs_err::tokio::create_dir_all(path).await?;
|
||||
}
|
||||
} else {
|
||||
if let Some(parent) = path.parent() {
|
||||
if directories.insert(parent.to_path_buf()) {
|
||||
fs_err::tokio::create_dir_all(parent).await?;
|
||||
}
|
||||
}
|
||||
|
||||
// Copy the mode.
|
||||
#[cfg(unix)]
|
||||
let mode = reader.entry().unix_permissions();
|
||||
|
||||
// Copy the file contents.
|
||||
let file = fs_err::tokio::File::create(&path).await?;
|
||||
let mut writer = if let Ok(size) = usize::try_from(reader.entry().uncompressed_size()) {
|
||||
tokio::io::BufWriter::with_capacity(size, file)
|
||||
} else {
|
||||
tokio::io::BufWriter::new(file)
|
||||
};
|
||||
tokio::io::copy(&mut reader.compat(), &mut writer).await?;
|
||||
|
||||
// See `uv_extract::stream::unzip`.
|
||||
#[cfg(unix)]
|
||||
{
|
||||
use std::fs::Permissions;
|
||||
use std::os::unix::fs::PermissionsExt;
|
||||
|
||||
let Some(mode) = mode else {
|
||||
continue;
|
||||
};
|
||||
|
||||
// The executable bit is the only permission we preserve, otherwise we use the OS defaults.
|
||||
// https://github.com/pypa/pip/blob/3898741e29b7279e7bffe044ecfbe20f6a438b1e/src/pip/_internal/utils/unpacking.py#L88-L100
|
||||
let has_any_executable_bit = mode & 0o111;
|
||||
if has_any_executable_bit != 0 {
|
||||
let permissions = fs_err::tokio::metadata(&path).await?.permissions();
|
||||
fs_err::tokio::set_permissions(
|
||||
&path,
|
||||
Permissions::from_mode(permissions.mode() | 0o111),
|
||||
)
|
||||
.await?;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Unzip a `.zip` or `.tar.gz` archive into the target directory, requiring `Seek`.
|
||||
pub async fn archive<R: tokio::io::AsyncBufRead + tokio::io::AsyncSeek + Unpin>(
|
||||
reader: R,
|
||||
source: impl AsRef<Path>,
|
||||
target: impl AsRef<Path>,
|
||||
) -> Result<(), Error> {
|
||||
// `.zip`
|
||||
if source
|
||||
.as_ref()
|
||||
.extension()
|
||||
.is_some_and(|ext| ext.eq_ignore_ascii_case("zip"))
|
||||
{
|
||||
unzip(reader, target).await?;
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
// `.tar.gz`
|
||||
if source
|
||||
.as_ref()
|
||||
.extension()
|
||||
.is_some_and(|ext| ext.eq_ignore_ascii_case("gz"))
|
||||
&& source.as_ref().file_stem().is_some_and(|stem| {
|
||||
Path::new(stem)
|
||||
.extension()
|
||||
.is_some_and(|ext| ext.eq_ignore_ascii_case("tar"))
|
||||
})
|
||||
{
|
||||
crate::stream::untar(reader, target).await?;
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
Err(Error::UnsupportedArchive(source.as_ref().to_path_buf()))
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue