mirror of
https://github.com/astral-sh/uv.git
synced 2025-08-04 19:08:04 +00:00
Add Seek
fallback for zip files (#2320)
## Summary Some zip files can't be streamed; in particular, `rs-async-zip` doesn't support data descriptors right now (though it may in the future). This PR adds a fallback path for such zips that downloads the entire zip file to disk, then unzips it from disk (which gives us `Seek`). Closes https://github.com/astral-sh/uv/issues/2216. ## Test Plan `cargo run pip install --extra-index-url https://buf.build/gen/python hashb_foxglove_protocolbuffers_python==25.3.0.1.20240226043130+465630478360 --force-reinstall -n`
This commit is contained in:
parent
67fb023f10
commit
a267a501b6
14 changed files with 591 additions and 160 deletions
|
@ -1,16 +1,13 @@
|
|||
//! Takes a wheel and installs it into a venv.
|
||||
|
||||
use std::io;
|
||||
use std::io::{Read, Seek};
|
||||
|
||||
use std::path::PathBuf;
|
||||
use std::str::FromStr;
|
||||
|
||||
use platform_info::PlatformInfoError;
|
||||
use thiserror::Error;
|
||||
use zip::result::ZipError;
|
||||
use zip::ZipArchive;
|
||||
|
||||
use distribution_filename::WheelFilename;
|
||||
use pep440_rs::Version;
|
||||
use platform_host::{Arch, Os};
|
||||
use pypi_types::Scheme;
|
||||
|
@ -19,6 +16,7 @@ use uv_fs::Simplified;
|
|||
use uv_normalize::PackageName;
|
||||
|
||||
pub mod linker;
|
||||
pub mod metadata;
|
||||
mod record;
|
||||
mod script;
|
||||
mod uninstall;
|
||||
|
@ -99,131 +97,3 @@ pub enum Error {
|
|||
#[error("Wheel version does not match filename: {0} != {1}")]
|
||||
MismatchedVersion(Version, Version),
|
||||
}
|
||||
|
||||
/// Returns `true` if the file is a `METADATA` file in a `dist-info` directory that matches the
|
||||
/// wheel filename.
|
||||
pub fn is_metadata_entry(path: &str, filename: &WheelFilename) -> bool {
|
||||
let Some((dist_info_dir, file)) = path.split_once('/') else {
|
||||
return false;
|
||||
};
|
||||
if file != "METADATA" {
|
||||
return false;
|
||||
}
|
||||
let Some(dir_stem) = dist_info_dir.strip_suffix(".dist-info") else {
|
||||
return false;
|
||||
};
|
||||
let Some((name, version)) = dir_stem.rsplit_once('-') else {
|
||||
return false;
|
||||
};
|
||||
let Ok(name) = PackageName::from_str(name) else {
|
||||
return false;
|
||||
};
|
||||
if name != filename.name {
|
||||
return false;
|
||||
}
|
||||
let Ok(version) = Version::from_str(version) else {
|
||||
return false;
|
||||
};
|
||||
if version != filename.version {
|
||||
return false;
|
||||
}
|
||||
true
|
||||
}
|
||||
|
||||
/// Find the `dist-info` directory from a list of files.
|
||||
///
|
||||
/// The metadata name may be uppercase, while the wheel and dist info names are lowercase, or
|
||||
/// the metadata name and the dist info name are lowercase, while the wheel name is uppercase.
|
||||
/// Either way, we just search the wheel for the name.
|
||||
///
|
||||
/// Returns the dist info dir prefix without the `.dist-info` extension.
|
||||
///
|
||||
/// Reference implementation: <https://github.com/pypa/packaging/blob/2f83540272e79e3fe1f5d42abae8df0c14ddf4c2/src/packaging/utils.py#L146-L172>
|
||||
pub fn find_dist_info<'a, T: Copy>(
|
||||
filename: &WheelFilename,
|
||||
files: impl Iterator<Item = (T, &'a str)>,
|
||||
) -> Result<(T, &'a str), Error> {
|
||||
let metadatas: Vec<_> = files
|
||||
.filter_map(|(payload, path)| {
|
||||
let (dist_info_dir, file) = path.split_once('/')?;
|
||||
if file != "METADATA" {
|
||||
return None;
|
||||
}
|
||||
|
||||
let dir_stem = dist_info_dir.strip_suffix(".dist-info")?;
|
||||
let (name, version) = dir_stem.rsplit_once('-')?;
|
||||
if PackageName::from_str(name).ok()? != filename.name {
|
||||
return None;
|
||||
}
|
||||
|
||||
if Version::from_str(version).ok()? != filename.version {
|
||||
return None;
|
||||
}
|
||||
|
||||
Some((payload, dir_stem))
|
||||
})
|
||||
.collect();
|
||||
let (payload, dist_info_prefix) = match metadatas[..] {
|
||||
[] => {
|
||||
return Err(Error::MissingDistInfo);
|
||||
}
|
||||
[(payload, path)] => (payload, path),
|
||||
_ => {
|
||||
return Err(Error::MultipleDistInfo(
|
||||
metadatas
|
||||
.into_iter()
|
||||
.map(|(_, dist_info_dir)| dist_info_dir.to_string())
|
||||
.collect::<Vec<_>>()
|
||||
.join(", "),
|
||||
));
|
||||
}
|
||||
};
|
||||
Ok((payload, dist_info_prefix))
|
||||
}
|
||||
|
||||
/// Given an archive, read the `dist-info` metadata into a buffer.
|
||||
pub fn read_dist_info(
|
||||
filename: &WheelFilename,
|
||||
archive: &mut ZipArchive<impl Read + Seek + Sized>,
|
||||
) -> Result<Vec<u8>, Error> {
|
||||
let dist_info_prefix =
|
||||
find_dist_info(filename, archive.file_names().map(|name| (name, name)))?.1;
|
||||
|
||||
let mut file = archive
|
||||
.by_name(&format!("{dist_info_prefix}.dist-info/METADATA"))
|
||||
.map_err(|err| Error::Zip(filename.to_string(), err))?;
|
||||
|
||||
#[allow(clippy::cast_possible_truncation)]
|
||||
let mut buffer = Vec::with_capacity(file.size() as usize);
|
||||
file.read_to_end(&mut buffer)?;
|
||||
|
||||
Ok(buffer)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use std::str::FromStr;
|
||||
|
||||
use distribution_filename::WheelFilename;
|
||||
|
||||
use crate::find_dist_info;
|
||||
|
||||
#[test]
|
||||
fn test_dot_in_name() {
|
||||
let files = [
|
||||
"mastodon/Mastodon.py",
|
||||
"mastodon/__init__.py",
|
||||
"mastodon/streaming.py",
|
||||
"Mastodon.py-1.5.1.dist-info/DESCRIPTION.rst",
|
||||
"Mastodon.py-1.5.1.dist-info/metadata.json",
|
||||
"Mastodon.py-1.5.1.dist-info/top_level.txt",
|
||||
"Mastodon.py-1.5.1.dist-info/WHEEL",
|
||||
"Mastodon.py-1.5.1.dist-info/METADATA",
|
||||
"Mastodon.py-1.5.1.dist-info/RECORD",
|
||||
];
|
||||
let filename = WheelFilename::from_str("Mastodon.py-1.5.1-py2.py3-none-any.whl").unwrap();
|
||||
let (_, dist_info_prefix) =
|
||||
find_dist_info(&filename, files.into_iter().map(|file| (file, file))).unwrap();
|
||||
assert_eq!(dist_info_prefix, "Mastodon.py-1.5.1");
|
||||
}
|
||||
}
|
||||
|
|
197
crates/install-wheel-rs/src/metadata.rs
Normal file
197
crates/install-wheel-rs/src/metadata.rs
Normal file
|
@ -0,0 +1,197 @@
|
|||
use std::io::{Read, Seek};
|
||||
use std::path::Path;
|
||||
use std::str::FromStr;
|
||||
|
||||
use zip::ZipArchive;
|
||||
|
||||
use distribution_filename::WheelFilename;
|
||||
use pep440_rs::Version;
|
||||
use uv_normalize::PackageName;
|
||||
|
||||
use crate::Error;
|
||||
|
||||
/// Returns `true` if the file is a `METADATA` file in a `.dist-info` directory that matches the
|
||||
/// wheel filename.
|
||||
pub fn is_metadata_entry(path: &str, filename: &WheelFilename) -> bool {
|
||||
let Some((dist_info_dir, file)) = path.split_once('/') else {
|
||||
return false;
|
||||
};
|
||||
if file != "METADATA" {
|
||||
return false;
|
||||
}
|
||||
let Some(dir_stem) = dist_info_dir.strip_suffix(".dist-info") else {
|
||||
return false;
|
||||
};
|
||||
let Some((name, version)) = dir_stem.rsplit_once('-') else {
|
||||
return false;
|
||||
};
|
||||
let Ok(name) = PackageName::from_str(name) else {
|
||||
return false;
|
||||
};
|
||||
if name != filename.name {
|
||||
return false;
|
||||
}
|
||||
let Ok(version) = Version::from_str(version) else {
|
||||
return false;
|
||||
};
|
||||
if version != filename.version {
|
||||
return false;
|
||||
}
|
||||
true
|
||||
}
|
||||
|
||||
/// Find the `.dist-info` directory in a zipped wheel.
|
||||
///
|
||||
/// The metadata name may be uppercase, while the wheel and dist info names are lowercase, or
|
||||
/// the metadata name and the dist info name are lowercase, while the wheel name is uppercase.
|
||||
/// Either way, we just search the wheel for the name.
|
||||
///
|
||||
/// Returns the dist info dir prefix without the `.dist-info` extension.
|
||||
///
|
||||
/// Reference implementation: <https://github.com/pypa/packaging/blob/2f83540272e79e3fe1f5d42abae8df0c14ddf4c2/src/packaging/utils.py#L146-L172>
|
||||
pub fn find_archive_dist_info<'a, T: Copy>(
|
||||
filename: &WheelFilename,
|
||||
files: impl Iterator<Item = (T, &'a str)>,
|
||||
) -> Result<(T, &'a str), Error> {
|
||||
let metadatas: Vec<_> = files
|
||||
.filter_map(|(payload, path)| {
|
||||
let (dist_info_dir, file) = path.split_once('/')?;
|
||||
if file != "METADATA" {
|
||||
return None;
|
||||
}
|
||||
|
||||
let dir_stem = dist_info_dir.strip_suffix(".dist-info")?;
|
||||
let (name, version) = dir_stem.rsplit_once('-')?;
|
||||
if PackageName::from_str(name).ok()? != filename.name {
|
||||
return None;
|
||||
}
|
||||
|
||||
if Version::from_str(version).ok()? != filename.version {
|
||||
return None;
|
||||
}
|
||||
|
||||
Some((payload, dir_stem))
|
||||
})
|
||||
.collect();
|
||||
let (payload, dist_info_prefix) = match metadatas[..] {
|
||||
[] => {
|
||||
return Err(Error::MissingDistInfo);
|
||||
}
|
||||
[(payload, path)] => (payload, path),
|
||||
_ => {
|
||||
return Err(Error::MultipleDistInfo(
|
||||
metadatas
|
||||
.into_iter()
|
||||
.map(|(_, dist_info_dir)| dist_info_dir.to_string())
|
||||
.collect::<Vec<_>>()
|
||||
.join(", "),
|
||||
));
|
||||
}
|
||||
};
|
||||
Ok((payload, dist_info_prefix))
|
||||
}
|
||||
|
||||
/// Given an archive, read the `METADATA` from the `.dist-info` directory.
|
||||
pub fn read_archive_metadata(
|
||||
filename: &WheelFilename,
|
||||
archive: &mut ZipArchive<impl Read + Seek + Sized>,
|
||||
) -> Result<Vec<u8>, Error> {
|
||||
let dist_info_prefix =
|
||||
find_archive_dist_info(filename, archive.file_names().map(|name| (name, name)))?.1;
|
||||
|
||||
let mut file = archive
|
||||
.by_name(&format!("{dist_info_prefix}.dist-info/METADATA"))
|
||||
.map_err(|err| Error::Zip(filename.to_string(), err))?;
|
||||
|
||||
#[allow(clippy::cast_possible_truncation)]
|
||||
let mut buffer = Vec::with_capacity(file.size() as usize);
|
||||
file.read_to_end(&mut buffer)?;
|
||||
|
||||
Ok(buffer)
|
||||
}
|
||||
|
||||
/// Find the `.dist-info` directory in an unzipped wheel.
|
||||
///
|
||||
/// See: <https://github.com/PyO3/python-pkginfo-rs>
|
||||
pub fn find_flat_dist_info(
|
||||
filename: &WheelFilename,
|
||||
path: impl AsRef<Path>,
|
||||
) -> Result<String, Error> {
|
||||
// Iterate over `path` to find the `.dist-info` directory. It should be at the top-level.
|
||||
let Some(dist_info) = fs_err::read_dir(path.as_ref())?.find_map(|entry| {
|
||||
let entry = entry.ok()?;
|
||||
let file_type = entry.file_type().ok()?;
|
||||
if file_type.is_dir() {
|
||||
let path = entry.path();
|
||||
|
||||
let extension = path.extension()?;
|
||||
if extension != "dist-info" {
|
||||
return None;
|
||||
}
|
||||
|
||||
let stem = path.file_stem()?;
|
||||
let (name, version) = stem.to_str()?.rsplit_once('-')?;
|
||||
if PackageName::from_str(name).ok()? != filename.name {
|
||||
return None;
|
||||
}
|
||||
if Version::from_str(version).ok()? != filename.version {
|
||||
return None;
|
||||
}
|
||||
|
||||
Some(path)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}) else {
|
||||
return Err(Error::InvalidWheel(
|
||||
"Missing .dist-info directory".to_string(),
|
||||
));
|
||||
};
|
||||
|
||||
let Some(dist_info_prefix) = dist_info.file_stem() else {
|
||||
return Err(Error::InvalidWheel(
|
||||
"Missing .dist-info directory".to_string(),
|
||||
));
|
||||
};
|
||||
|
||||
Ok(dist_info_prefix.to_string_lossy().to_string())
|
||||
}
|
||||
|
||||
/// Read the wheel `METADATA` metadata from a `.dist-info` directory.
|
||||
pub fn read_dist_info_metadata(
|
||||
dist_info_prefix: &str,
|
||||
wheel: impl AsRef<Path>,
|
||||
) -> Result<Vec<u8>, Error> {
|
||||
let metadata_file = wheel
|
||||
.as_ref()
|
||||
.join(format!("{dist_info_prefix}.dist-info/METADATA"));
|
||||
Ok(fs_err::read(metadata_file)?)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use std::str::FromStr;
|
||||
|
||||
use distribution_filename::WheelFilename;
|
||||
|
||||
use crate::metadata::find_archive_dist_info;
|
||||
|
||||
#[test]
|
||||
fn test_dot_in_name() {
|
||||
let files = [
|
||||
"mastodon/Mastodon.py",
|
||||
"mastodon/__init__.py",
|
||||
"mastodon/streaming.py",
|
||||
"Mastodon.py-1.5.1.dist-info/DESCRIPTION.rst",
|
||||
"Mastodon.py-1.5.1.dist-info/metadata.json",
|
||||
"Mastodon.py-1.5.1.dist-info/top_level.txt",
|
||||
"Mastodon.py-1.5.1.dist-info/WHEEL",
|
||||
"Mastodon.py-1.5.1.dist-info/METADATA",
|
||||
"Mastodon.py-1.5.1.dist-info/RECORD",
|
||||
];
|
||||
let filename = WheelFilename::from_str("Mastodon.py-1.5.1-py2.py3-none-any.whl").unwrap();
|
||||
let (_, dist_info_prefix) =
|
||||
find_archive_dist_info(&filename, files.into_iter().map(|file| (file, file))).unwrap();
|
||||
assert_eq!(dist_info_prefix, "Mastodon.py-1.5.1");
|
||||
}
|
||||
}
|
|
@ -182,7 +182,7 @@ pub enum ErrorKind {
|
|||
metadata: PackageName,
|
||||
},
|
||||
|
||||
#[error("The wheel {0} is not a valid zip file")]
|
||||
#[error("Failed to unzip wheel: {0}")]
|
||||
Zip(WheelFilename, #[source] ZipError),
|
||||
|
||||
#[error("Failed to write to the client cache")]
|
||||
|
|
|
@ -18,7 +18,7 @@ use url::Url;
|
|||
|
||||
use distribution_filename::{DistFilename, SourceDistFilename, WheelFilename};
|
||||
use distribution_types::{BuiltDist, File, FileLocation, IndexUrl, IndexUrls, Name};
|
||||
use install_wheel_rs::{find_dist_info, is_metadata_entry};
|
||||
use install_wheel_rs::metadata::{find_archive_dist_info, is_metadata_entry};
|
||||
use pep440_rs::Version;
|
||||
use pypi_types::{Metadata23, SimpleJson};
|
||||
use uv_auth::safe_copy_url_auth;
|
||||
|
@ -602,7 +602,7 @@ async fn read_metadata_async_seek(
|
|||
.await
|
||||
.map_err(|err| ErrorKind::Zip(filename.clone(), err))?;
|
||||
|
||||
let (metadata_idx, _dist_info_prefix) = find_dist_info(
|
||||
let (metadata_idx, _dist_info_prefix) = find_archive_dist_info(
|
||||
filename,
|
||||
zip_reader
|
||||
.file()
|
||||
|
|
|
@ -3,7 +3,7 @@ use async_zip::tokio::read::seek::ZipFileReader;
|
|||
use tokio_util::compat::TokioAsyncReadCompatExt;
|
||||
|
||||
use distribution_filename::WheelFilename;
|
||||
use install_wheel_rs::find_dist_info;
|
||||
use install_wheel_rs::metadata::find_archive_dist_info;
|
||||
|
||||
use crate::{Error, ErrorKind};
|
||||
|
||||
|
@ -65,7 +65,7 @@ pub(crate) async fn wheel_metadata_from_remote_zip(
|
|||
.await
|
||||
.map_err(|err| ErrorKind::Zip(filename.clone(), err))?;
|
||||
|
||||
let ((metadata_idx, metadata_entry), _dist_info_prefix) = find_dist_info(
|
||||
let ((metadata_idx, metadata_entry), _dist_info_prefix) = find_archive_dist_info(
|
||||
filename,
|
||||
reader
|
||||
.file()
|
||||
|
|
|
@ -4,8 +4,9 @@ use std::path::{Path, PathBuf};
|
|||
use std::sync::Arc;
|
||||
|
||||
use futures::{FutureExt, TryStreamExt};
|
||||
use tokio::io::AsyncSeekExt;
|
||||
use tokio_util::compat::FuturesAsyncReadCompatExt;
|
||||
use tracing::{info_span, instrument, Instrument};
|
||||
use tracing::{info_span, instrument, warn, Instrument};
|
||||
use url::Url;
|
||||
|
||||
use distribution_filename::WheelFilename;
|
||||
|
@ -158,14 +159,33 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context>
|
|||
);
|
||||
|
||||
// Download and unzip.
|
||||
let archive = self
|
||||
match self
|
||||
.stream_wheel(url.clone(), &wheel.filename, &wheel_entry, &dist)
|
||||
.await?;
|
||||
Ok(LocalWheel::Unzipped(UnzippedWheel {
|
||||
dist: dist.clone(),
|
||||
archive,
|
||||
filename: wheel.filename.clone(),
|
||||
}))
|
||||
.await
|
||||
{
|
||||
Ok(archive) => Ok(LocalWheel::Unzipped(UnzippedWheel {
|
||||
dist: dist.clone(),
|
||||
archive,
|
||||
filename: wheel.filename.clone(),
|
||||
})),
|
||||
Err(Error::Extract(err)) if err.is_http_streaming_unsupported() => {
|
||||
warn!(
|
||||
"Streaming unsupported for {dist}; downloading wheel to disk ({err})"
|
||||
);
|
||||
|
||||
// If the request failed because streaming is unsupported, download the
|
||||
// wheel directly.
|
||||
let archive = self
|
||||
.download_wheel(url, &wheel.filename, &wheel_entry, &dist)
|
||||
.await?;
|
||||
Ok(LocalWheel::Unzipped(UnzippedWheel {
|
||||
dist: dist.clone(),
|
||||
archive,
|
||||
filename: wheel.filename.clone(),
|
||||
}))
|
||||
}
|
||||
Err(err) => Err(err),
|
||||
}
|
||||
}
|
||||
|
||||
Dist::Built(BuiltDist::DirectUrl(wheel)) => {
|
||||
|
@ -181,19 +201,43 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context>
|
|||
);
|
||||
|
||||
// Download and unzip.
|
||||
let archive = self
|
||||
match self
|
||||
.stream_wheel(
|
||||
wheel.url.raw().clone(),
|
||||
&wheel.filename,
|
||||
&wheel_entry,
|
||||
&dist,
|
||||
)
|
||||
.await?;
|
||||
Ok(LocalWheel::Unzipped(UnzippedWheel {
|
||||
dist: dist.clone(),
|
||||
archive,
|
||||
filename: wheel.filename.clone(),
|
||||
}))
|
||||
.await
|
||||
{
|
||||
Ok(archive) => Ok(LocalWheel::Unzipped(UnzippedWheel {
|
||||
dist: dist.clone(),
|
||||
archive,
|
||||
filename: wheel.filename.clone(),
|
||||
})),
|
||||
Err(Error::Client(err)) if err.is_http_streaming_unsupported() => {
|
||||
warn!(
|
||||
"Streaming unsupported for {dist}; downloading wheel to disk ({err})"
|
||||
);
|
||||
|
||||
// If the request failed because streaming is unsupported, download the
|
||||
// wheel directly.
|
||||
let archive = self
|
||||
.download_wheel(
|
||||
wheel.url.raw().clone(),
|
||||
&wheel.filename,
|
||||
&wheel_entry,
|
||||
&dist,
|
||||
)
|
||||
.await?;
|
||||
Ok(LocalWheel::Unzipped(UnzippedWheel {
|
||||
dist: dist.clone(),
|
||||
archive,
|
||||
filename: wheel.filename.clone(),
|
||||
}))
|
||||
}
|
||||
Err(err) => Err(err),
|
||||
}
|
||||
}
|
||||
|
||||
Dist::Built(BuiltDist::Path(wheel)) => {
|
||||
|
@ -277,7 +321,18 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context>
|
|||
) -> Result<(Metadata23, Option<Url>), Error> {
|
||||
match dist {
|
||||
Dist::Built(built_dist) => {
|
||||
Ok((self.client.wheel_metadata(built_dist).boxed().await?, None))
|
||||
match self.client.wheel_metadata(built_dist).boxed().await {
|
||||
Ok(metadata) => Ok((metadata, None)),
|
||||
Err(err) if err.is_http_streaming_unsupported() => {
|
||||
warn!("Streaming unsupported when fetching metadata for {dist}; downloading wheel directly ({err})");
|
||||
|
||||
// If the request failed due to an error that could be resolved by
|
||||
// downloading the wheel directly, try that.
|
||||
let wheel = self.get_or_build_wheel(dist.clone()).await?;
|
||||
Ok((wheel.metadata()?, None))
|
||||
}
|
||||
Err(err) => Err(err.into()),
|
||||
}
|
||||
}
|
||||
Dist::Source(source_dist) => {
|
||||
let no_build = match self.build_context.no_build() {
|
||||
|
@ -437,6 +492,87 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context>
|
|||
Ok(archive)
|
||||
}
|
||||
|
||||
/// Download a wheel from a URL, then unzip it into the cache.
|
||||
async fn download_wheel(
|
||||
&self,
|
||||
url: Url,
|
||||
filename: &WheelFilename,
|
||||
wheel_entry: &CacheEntry,
|
||||
dist: &Dist,
|
||||
) -> Result<PathBuf, Error> {
|
||||
// Create an entry for the HTTP cache.
|
||||
let http_entry = wheel_entry.with_file(format!("{}.http", filename.stem()));
|
||||
|
||||
let download = |response: reqwest::Response| {
|
||||
async {
|
||||
let reader = response
|
||||
.bytes_stream()
|
||||
.map_err(|err| self.handle_response_errors(err))
|
||||
.into_async_read();
|
||||
|
||||
// Download the wheel to a temporary file.
|
||||
let temp_file =
|
||||
tempfile::tempfile_in(self.cache.root()).map_err(Error::CacheWrite)?;
|
||||
let mut writer = tokio::io::BufWriter::new(tokio::fs::File::from_std(temp_file));
|
||||
tokio::io::copy(&mut reader.compat(), &mut writer)
|
||||
.await
|
||||
.map_err(Error::CacheWrite)?;
|
||||
|
||||
// Unzip the wheel to a temporary directory.
|
||||
let temp_dir =
|
||||
tempfile::tempdir_in(self.cache.root()).map_err(Error::CacheWrite)?;
|
||||
let mut file = writer.into_inner();
|
||||
file.seek(io::SeekFrom::Start(0))
|
||||
.await
|
||||
.map_err(Error::CacheWrite)?;
|
||||
let reader = tokio::io::BufReader::new(file);
|
||||
uv_extract::seek::unzip(reader, temp_dir.path()).await?;
|
||||
|
||||
// Persist the temporary directory to the directory store.
|
||||
let archive = self
|
||||
.cache
|
||||
.persist(temp_dir.into_path(), wheel_entry.path())
|
||||
.map_err(Error::CacheRead)?;
|
||||
Ok(archive)
|
||||
}
|
||||
.instrument(info_span!("wheel", wheel = %dist))
|
||||
};
|
||||
|
||||
let req = self
|
||||
.client
|
||||
.cached_client()
|
||||
.uncached()
|
||||
.get(url)
|
||||
.header(
|
||||
// `reqwest` defaults to accepting compressed responses.
|
||||
// Specify identity encoding to get consistent .whl downloading
|
||||
// behavior from servers. ref: https://github.com/pypa/pip/pull/1688
|
||||
"accept-encoding",
|
||||
reqwest::header::HeaderValue::from_static("identity"),
|
||||
)
|
||||
.build()?;
|
||||
let cache_control = match self.client.connectivity() {
|
||||
Connectivity::Online => CacheControl::from(
|
||||
self.cache
|
||||
.freshness(&http_entry, Some(&filename.name))
|
||||
.map_err(Error::CacheRead)?,
|
||||
),
|
||||
Connectivity::Offline => CacheControl::AllowStale,
|
||||
};
|
||||
|
||||
let archive = self
|
||||
.client
|
||||
.cached_client()
|
||||
.get_serde(req, &http_entry, cache_control, download)
|
||||
.await
|
||||
.map_err(|err| match err {
|
||||
CachedClientError::Callback(err) => err,
|
||||
CachedClientError::Client(err) => Error::Client(err),
|
||||
})?;
|
||||
|
||||
Ok(archive)
|
||||
}
|
||||
|
||||
/// Return the [`IndexLocations`] used by this resolver.
|
||||
pub fn index_locations(&self) -> &IndexLocations {
|
||||
self.build_context.index_locations()
|
||||
|
|
|
@ -2,6 +2,9 @@ use std::path::{Path, PathBuf};
|
|||
|
||||
use distribution_filename::WheelFilename;
|
||||
use distribution_types::{CachedDist, Dist};
|
||||
use pypi_types::Metadata23;
|
||||
|
||||
use crate::Error;
|
||||
|
||||
/// A wheel that's been unzipped while downloading
|
||||
#[derive(Debug, Clone)]
|
||||
|
@ -87,6 +90,15 @@ impl LocalWheel {
|
|||
Self::Built(wheel) => CachedDist::from_remote(wheel.dist, wheel.filename, archive),
|
||||
}
|
||||
}
|
||||
|
||||
/// Read the [`Metadata23`] from a wheel.
|
||||
pub fn metadata(&self) -> Result<Metadata23, Error> {
|
||||
match self {
|
||||
Self::Unzipped(wheel) => read_flat_wheel_metadata(&wheel.filename, &wheel.archive),
|
||||
Self::Disk(wheel) => read_built_wheel_metadata(&wheel.filename, &wheel.path),
|
||||
Self::Built(wheel) => read_built_wheel_metadata(&wheel.filename, &wheel.path),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl UnzippedWheel {
|
||||
|
@ -121,3 +133,25 @@ impl std::fmt::Display for LocalWheel {
|
|||
write!(f, "{}", self.remote())
|
||||
}
|
||||
}
|
||||
|
||||
/// Read the [`Metadata23`] from a built wheel.
|
||||
fn read_built_wheel_metadata(
|
||||
filename: &WheelFilename,
|
||||
wheel: impl AsRef<Path>,
|
||||
) -> Result<Metadata23, Error> {
|
||||
let file = fs_err::File::open(wheel.as_ref()).map_err(Error::CacheRead)?;
|
||||
let reader = std::io::BufReader::new(file);
|
||||
let mut archive = zip::ZipArchive::new(reader)?;
|
||||
let metadata = install_wheel_rs::metadata::read_archive_metadata(filename, &mut archive)?;
|
||||
Ok(Metadata23::parse_metadata(&metadata)?)
|
||||
}
|
||||
|
||||
/// Read the [`Metadata23`] from an unzipped wheel.
|
||||
fn read_flat_wheel_metadata(
|
||||
filename: &WheelFilename,
|
||||
wheel: impl AsRef<Path>,
|
||||
) -> Result<Metadata23, Error> {
|
||||
let dist_info = install_wheel_rs::metadata::find_flat_dist_info(filename, &wheel)?;
|
||||
let metadata = install_wheel_rs::metadata::read_dist_info_metadata(&dist_info, &wheel)?;
|
||||
Ok(Metadata23::parse_metadata(&metadata)?)
|
||||
}
|
||||
|
|
|
@ -55,7 +55,7 @@ pub enum Error {
|
|||
Zip(#[from] ZipError),
|
||||
#[error("Source distribution directory contains neither readable pyproject.toml nor setup.py")]
|
||||
DirWithoutEntrypoint,
|
||||
#[error("Failed to extract source distribution")]
|
||||
#[error("Failed to extract archive")]
|
||||
Extract(#[from] uv_extract::Error),
|
||||
#[error("Source distribution not found at: {0}")]
|
||||
NotFound(PathBuf),
|
||||
|
|
|
@ -19,7 +19,7 @@ use distribution_types::{
|
|||
DirectArchiveUrl, DirectGitUrl, Dist, FileLocation, GitSourceDist, LocalEditable, Name,
|
||||
PathSourceDist, RemoteSource, SourceDist,
|
||||
};
|
||||
use install_wheel_rs::read_dist_info;
|
||||
use install_wheel_rs::metadata::read_archive_metadata;
|
||||
use pep508_rs::VerbatimUrl;
|
||||
use platform_tags::Tags;
|
||||
use pypi_types::Metadata23;
|
||||
|
@ -903,7 +903,7 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> {
|
|||
let reader = fs_err::tokio::File::open(&path)
|
||||
.await
|
||||
.map_err(Error::CacheRead)?;
|
||||
uv_extract::stream::archive(tokio::io::BufReader::new(reader), path, &temp_dir.path())
|
||||
uv_extract::seek::archive(tokio::io::BufReader::new(reader), path, &temp_dir.path())
|
||||
.await?;
|
||||
|
||||
// Extract the top-level directory from the archive.
|
||||
|
@ -1212,6 +1212,6 @@ fn read_wheel_metadata(
|
|||
let file = fs_err::File::open(wheel).map_err(Error::CacheRead)?;
|
||||
let reader = std::io::BufReader::new(file);
|
||||
let mut archive = ZipArchive::new(reader)?;
|
||||
let dist_info = read_dist_info(filename, &mut archive)?;
|
||||
let dist_info = read_archive_metadata(filename, &mut archive)?;
|
||||
Ok(Metadata23::parse_metadata(&dist_info)?)
|
||||
}
|
||||
|
|
|
@ -1,11 +1,9 @@
|
|||
use std::{ffi::OsString, path::PathBuf};
|
||||
|
||||
use zip::result::ZipError;
|
||||
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
pub enum Error {
|
||||
#[error(transparent)]
|
||||
Zip(#[from] ZipError),
|
||||
Zip(#[from] zip::result::ZipError),
|
||||
#[error(transparent)]
|
||||
AsyncZip(#[from] async_zip::error::ZipError),
|
||||
#[error(transparent)]
|
||||
|
@ -19,3 +17,15 @@ pub enum Error {
|
|||
#[error("The top-level of the archive must only contain a list directory, but it's empty")]
|
||||
EmptyArchive,
|
||||
}
|
||||
|
||||
impl Error {
|
||||
/// Returns `true` if the error is due to the server not supporting HTTP streaming. Most
|
||||
/// commonly, this is due to serving ZIP files with features that are incompatible with
|
||||
/// streaming, like data descriptors.
|
||||
pub fn is_http_streaming_unsupported(&self) -> bool {
|
||||
matches!(
|
||||
self,
|
||||
Self::AsyncZip(async_zip::error::ZipError::FeatureNotSupported(_))
|
||||
)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -2,6 +2,7 @@ pub use error::Error;
|
|||
pub use sync::*;
|
||||
|
||||
mod error;
|
||||
pub mod seek;
|
||||
pub mod stream;
|
||||
mod sync;
|
||||
mod tar;
|
||||
|
|
115
crates/uv-extract/src/seek.rs
Normal file
115
crates/uv-extract/src/seek.rs
Normal file
|
@ -0,0 +1,115 @@
|
|||
use std::path::Path;
|
||||
|
||||
use rustc_hash::FxHashSet;
|
||||
use tokio_util::compat::FuturesAsyncReadCompatExt;
|
||||
use tokio_util::compat::TokioAsyncReadCompatExt;
|
||||
|
||||
use crate::Error;
|
||||
|
||||
/// Unzip a `.zip` archive into the target directory, requiring `Seek`.
|
||||
///
|
||||
/// This is useful for unzipping files asynchronously that already exist on disk.
|
||||
pub async fn unzip<R: tokio::io::AsyncRead + tokio::io::AsyncSeek + Unpin>(
|
||||
reader: R,
|
||||
target: impl AsRef<Path>,
|
||||
) -> Result<(), Error> {
|
||||
let target = target.as_ref();
|
||||
let mut reader = reader.compat();
|
||||
let mut zip = async_zip::base::read::seek::ZipFileReader::new(&mut reader).await?;
|
||||
|
||||
let mut directories = FxHashSet::default();
|
||||
|
||||
for index in 0..zip.file().entries().len() {
|
||||
let reader = zip.reader_with_entry(index).await?;
|
||||
|
||||
// Construct the (expected) path to the file on-disk.
|
||||
let path = reader.entry().filename().as_str()?;
|
||||
let path = target.join(path);
|
||||
let is_dir = reader.entry().dir()?;
|
||||
|
||||
// Either create the directory or write the file to disk.
|
||||
if is_dir {
|
||||
if directories.insert(path.clone()) {
|
||||
fs_err::tokio::create_dir_all(path).await?;
|
||||
}
|
||||
} else {
|
||||
if let Some(parent) = path.parent() {
|
||||
if directories.insert(parent.to_path_buf()) {
|
||||
fs_err::tokio::create_dir_all(parent).await?;
|
||||
}
|
||||
}
|
||||
|
||||
// Copy the mode.
|
||||
#[cfg(unix)]
|
||||
let mode = reader.entry().unix_permissions();
|
||||
|
||||
// Copy the file contents.
|
||||
let file = fs_err::tokio::File::create(&path).await?;
|
||||
let mut writer = if let Ok(size) = usize::try_from(reader.entry().uncompressed_size()) {
|
||||
tokio::io::BufWriter::with_capacity(size, file)
|
||||
} else {
|
||||
tokio::io::BufWriter::new(file)
|
||||
};
|
||||
tokio::io::copy(&mut reader.compat(), &mut writer).await?;
|
||||
|
||||
// See `uv_extract::stream::unzip`.
|
||||
#[cfg(unix)]
|
||||
{
|
||||
use std::fs::Permissions;
|
||||
use std::os::unix::fs::PermissionsExt;
|
||||
|
||||
let Some(mode) = mode else {
|
||||
continue;
|
||||
};
|
||||
|
||||
// The executable bit is the only permission we preserve, otherwise we use the OS defaults.
|
||||
// https://github.com/pypa/pip/blob/3898741e29b7279e7bffe044ecfbe20f6a438b1e/src/pip/_internal/utils/unpacking.py#L88-L100
|
||||
let has_any_executable_bit = mode & 0o111;
|
||||
if has_any_executable_bit != 0 {
|
||||
let permissions = fs_err::tokio::metadata(&path).await?.permissions();
|
||||
fs_err::tokio::set_permissions(
|
||||
&path,
|
||||
Permissions::from_mode(permissions.mode() | 0o111),
|
||||
)
|
||||
.await?;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Unzip a `.zip` or `.tar.gz` archive into the target directory, requiring `Seek`.
|
||||
pub async fn archive<R: tokio::io::AsyncBufRead + tokio::io::AsyncSeek + Unpin>(
|
||||
reader: R,
|
||||
source: impl AsRef<Path>,
|
||||
target: impl AsRef<Path>,
|
||||
) -> Result<(), Error> {
|
||||
// `.zip`
|
||||
if source
|
||||
.as_ref()
|
||||
.extension()
|
||||
.is_some_and(|ext| ext.eq_ignore_ascii_case("zip"))
|
||||
{
|
||||
unzip(reader, target).await?;
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
// `.tar.gz`
|
||||
if source
|
||||
.as_ref()
|
||||
.extension()
|
||||
.is_some_and(|ext| ext.eq_ignore_ascii_case("gz"))
|
||||
&& source.as_ref().file_stem().is_some_and(|stem| {
|
||||
Path::new(stem)
|
||||
.extension()
|
||||
.is_some_and(|ext| ext.eq_ignore_ascii_case("tar"))
|
||||
})
|
||||
{
|
||||
crate::stream::untar(reader, target).await?;
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
Err(Error::UnsupportedArchive(source.as_ref().to_path_buf()))
|
||||
}
|
|
@ -4949,3 +4949,41 @@ fn metadata_2_2() -> Result<()> {
|
|||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Resolve packages from an index that "doesn't support" zip file streaming (by way of using
|
||||
/// data descriptors).
|
||||
#[test]
|
||||
fn no_stream() -> Result<()> {
|
||||
let context = TestContext::new("3.12");
|
||||
|
||||
// Write to a requirements file.
|
||||
let requirements_in = context.temp_dir.child("requirements.in");
|
||||
requirements_in
|
||||
.write_str("hashb_foxglove_protocolbuffers_python==25.3.0.1.20240226043130+465630478360")?;
|
||||
|
||||
uv_snapshot!(Command::new(get_bin())
|
||||
.arg("pip")
|
||||
.arg("compile")
|
||||
.arg("requirements.in")
|
||||
.arg("--extra-index-url")
|
||||
.arg("https://buf.build/gen/python")
|
||||
.arg("--cache-dir")
|
||||
.arg(context.cache_dir.path())
|
||||
.env("VIRTUAL_ENV", context.venv.as_os_str())
|
||||
.current_dir(&context.temp_dir), @r###"
|
||||
success: true
|
||||
exit_code: 0
|
||||
----- stdout -----
|
||||
# This file was autogenerated by uv via the following command:
|
||||
# uv pip compile requirements.in --cache-dir [CACHE_DIR]
|
||||
hashb-foxglove-protocolbuffers-python==25.3.0.1.20240226043130+465630478360
|
||||
protobuf==4.25.3
|
||||
# via hashb-foxglove-protocolbuffers-python
|
||||
|
||||
----- stderr -----
|
||||
Resolved 2 packages in [TIME]
|
||||
"###
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
|
|
@ -2994,3 +2994,33 @@ requires-python = "<=3.5"
|
|||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Install packages from an index that "doesn't support" zip file streaming (by way of using
|
||||
/// data descriptors).
|
||||
#[test]
|
||||
fn no_stream() -> Result<()> {
|
||||
let context = TestContext::new("3.12");
|
||||
|
||||
// Write to a requirements file.
|
||||
let requirements_txt = context.temp_dir.child("requirements.txt");
|
||||
requirements_txt
|
||||
.write_str("hashb_foxglove_protocolbuffers_python==25.3.0.1.20240226043130+465630478360")?;
|
||||
|
||||
uv_snapshot!(command(&context)
|
||||
.arg("requirements.txt")
|
||||
.arg("--index-url")
|
||||
.arg("https://buf.build/gen/python"), @r###"
|
||||
success: true
|
||||
exit_code: 0
|
||||
----- stdout -----
|
||||
|
||||
----- stderr -----
|
||||
Resolved 1 package in [TIME]
|
||||
Downloaded 1 package in [TIME]
|
||||
Installed 1 package in [TIME]
|
||||
+ hashb-foxglove-protocolbuffers-python==25.3.0.1.20240226043130+465630478360
|
||||
"###
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue