mirror of
https://github.com/astral-sh/uv.git
synced 2025-08-04 10:58:28 +00:00
Move archive extraction into its own crate (#647)
We have some shared utilities beyond `puffin-build` and `puffin-distribution`, and further, I want to be able to access the sdist archive extraction logic from `puffin-distribution`. This is really generic, so moving into its own crate.
This commit is contained in:
parent
388641643d
commit
db7e2dedbb
14 changed files with 174 additions and 134 deletions
|
@ -20,6 +20,7 @@ pep440_rs = { path = "../pep440-rs" }
|
|||
platform-tags = { path = "../platform-tags" }
|
||||
puffin-cache = { path = "../puffin-cache" }
|
||||
puffin-client = { path = "../puffin-client" }
|
||||
puffin-extract = { path = "../puffin-extract" }
|
||||
puffin-fs = { path = "../puffin-fs" }
|
||||
puffin-git = { path = "../puffin-git" }
|
||||
puffin-normalize = { path = "../puffin-normalize" }
|
||||
|
@ -31,12 +32,10 @@ bytesize = { workspace = true }
|
|||
fs-err = { workspace = true }
|
||||
fs2 = { workspace = true }
|
||||
futures = { workspace = true }
|
||||
rayon = { workspace = true }
|
||||
reqwest = { workspace = true }
|
||||
rustc-hash = { workspace = true }
|
||||
serde = { workspace = true , features = ["derive"] }
|
||||
serde_json = { workspace = true }
|
||||
sha2 = { workspace = true }
|
||||
tempfile = { workspace = true }
|
||||
thiserror = { workspace = true }
|
||||
tokio = { workspace = true }
|
||||
|
|
|
@ -3,7 +3,7 @@ pub use download::{DiskWheel, InMemoryWheel, LocalWheel};
|
|||
pub use index::{BuiltWheelIndex, RegistryWheelIndex};
|
||||
pub use reporter::Reporter;
|
||||
pub use source_dist::{SourceDistCachedBuilder, SourceDistError};
|
||||
pub use unzip::{Unzip, UnzipError};
|
||||
pub use unzip::Unzip;
|
||||
|
||||
mod distribution_database;
|
||||
mod download;
|
||||
|
@ -13,4 +13,3 @@ mod locks;
|
|||
mod reporter;
|
||||
mod source_dist;
|
||||
mod unzip;
|
||||
mod vendor;
|
||||
|
|
|
@ -1,49 +1,35 @@
|
|||
use std::io;
|
||||
use std::io::{Read, Seek};
|
||||
use std::path::Path;
|
||||
|
||||
use rayon::prelude::*;
|
||||
use thiserror::Error;
|
||||
use zip::result::ZipError;
|
||||
use zip::ZipArchive;
|
||||
use puffin_extract::{unzip_archive, Error};
|
||||
|
||||
use crate::download::BuiltWheel;
|
||||
use crate::vendor::{CloneableSeekableReader, HasLength};
|
||||
use crate::{DiskWheel, InMemoryWheel, LocalWheel};
|
||||
|
||||
#[derive(Debug, Error)]
|
||||
pub enum UnzipError {
|
||||
#[error(transparent)]
|
||||
Zip(#[from] ZipError),
|
||||
#[error(transparent)]
|
||||
Io(#[from] io::Error),
|
||||
}
|
||||
|
||||
pub trait Unzip {
|
||||
/// Unzip a wheel into the target directory.
|
||||
fn unzip(&self, target: &Path) -> Result<(), UnzipError>;
|
||||
fn unzip(&self, target: &Path) -> Result<(), Error>;
|
||||
}
|
||||
|
||||
impl Unzip for InMemoryWheel {
|
||||
fn unzip(&self, target: &Path) -> Result<(), UnzipError> {
|
||||
fn unzip(&self, target: &Path) -> Result<(), Error> {
|
||||
unzip_archive(std::io::Cursor::new(&self.buffer), target)
|
||||
}
|
||||
}
|
||||
|
||||
impl Unzip for DiskWheel {
|
||||
fn unzip(&self, target: &Path) -> Result<(), UnzipError> {
|
||||
fn unzip(&self, target: &Path) -> Result<(), Error> {
|
||||
unzip_archive(fs_err::File::open(&self.path)?, target)
|
||||
}
|
||||
}
|
||||
|
||||
impl Unzip for BuiltWheel {
|
||||
fn unzip(&self, target: &Path) -> Result<(), UnzipError> {
|
||||
fn unzip(&self, target: &Path) -> Result<(), Error> {
|
||||
unzip_archive(fs_err::File::open(&self.path)?, target)
|
||||
}
|
||||
}
|
||||
|
||||
impl Unzip for LocalWheel {
|
||||
fn unzip(&self, target: &Path) -> Result<(), UnzipError> {
|
||||
fn unzip(&self, target: &Path) -> Result<(), Error> {
|
||||
match self {
|
||||
LocalWheel::InMemory(wheel) => wheel.unzip(target),
|
||||
LocalWheel::Disk(wheel) => wheel.unzip(target),
|
||||
|
@ -51,52 +37,3 @@ impl Unzip for LocalWheel {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Unzip a zip archive into the target directory.
|
||||
fn unzip_archive<R: Send + Read + Seek + HasLength>(
|
||||
reader: R,
|
||||
target: &Path,
|
||||
) -> Result<(), UnzipError> {
|
||||
// Unzip in parallel.
|
||||
let archive = ZipArchive::new(CloneableSeekableReader::new(reader))?;
|
||||
(0..archive.len())
|
||||
.par_bridge()
|
||||
.map(|file_number| {
|
||||
let mut archive = archive.clone();
|
||||
let mut file = archive.by_index(file_number)?;
|
||||
|
||||
// Determine the path of the file within the wheel.
|
||||
let file_path = match file.enclosed_name() {
|
||||
Some(path) => path.to_owned(),
|
||||
None => return Ok(()),
|
||||
};
|
||||
|
||||
// Create necessary parent directories.
|
||||
let path = target.join(file_path);
|
||||
if file.is_dir() {
|
||||
fs_err::create_dir_all(path)?;
|
||||
return Ok(());
|
||||
}
|
||||
if let Some(parent) = path.parent() {
|
||||
fs_err::create_dir_all(parent)?;
|
||||
}
|
||||
|
||||
// Write the file.
|
||||
let mut outfile = fs_err::File::create(&path)?;
|
||||
std::io::copy(&mut file, &mut outfile)?;
|
||||
|
||||
// Set permissions.
|
||||
#[cfg(unix)]
|
||||
{
|
||||
use std::fs::Permissions;
|
||||
use std::os::unix::fs::PermissionsExt;
|
||||
|
||||
if let Some(mode) = file.unix_mode() {
|
||||
std::fs::set_permissions(&path, Permissions::from_mode(mode))?;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
})
|
||||
.collect::<Result<_, UnzipError>>()
|
||||
}
|
||||
|
|
|
@ -1,172 +0,0 @@
|
|||
// Copyright 2022 Google LLC
|
||||
|
||||
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||
// https://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
||||
// <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your
|
||||
// option. This file may not be copied, modified, or distributed
|
||||
// except according to those terms.
|
||||
|
||||
#![allow(clippy::cast_sign_loss)]
|
||||
|
||||
use std::{
|
||||
io::{BufReader, Cursor, Read, Seek, SeekFrom},
|
||||
sync::{Arc, Mutex},
|
||||
};
|
||||
|
||||
/// A trait to represent some reader which has a total length known in
|
||||
/// advance. This is roughly equivalent to the nightly
|
||||
/// [`Seek::stream_len`] API.
|
||||
pub(crate) trait HasLength {
|
||||
/// Return the current total length of this stream.
|
||||
fn len(&self) -> u64;
|
||||
}
|
||||
|
||||
/// A [`Read`] which refers to its underlying stream by reference count,
|
||||
/// and thus can be cloned cheaply. It supports seeking; each cloned instance
|
||||
/// maintains its own pointer into the file, and the underlying instance
|
||||
/// is seeked prior to each read.
|
||||
pub(crate) struct CloneableSeekableReader<R: Read + Seek + HasLength> {
|
||||
file: Arc<Mutex<R>>,
|
||||
pos: u64,
|
||||
// TODO determine and store this once instead of per cloneable file
|
||||
file_length: Option<u64>,
|
||||
}
|
||||
|
||||
impl<R: Read + Seek + HasLength> Clone for CloneableSeekableReader<R> {
|
||||
fn clone(&self) -> Self {
|
||||
Self {
|
||||
file: self.file.clone(),
|
||||
pos: self.pos,
|
||||
file_length: self.file_length,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<R: Read + Seek + HasLength> CloneableSeekableReader<R> {
|
||||
/// Constructor. Takes ownership of the underlying `Read`.
|
||||
/// You should pass in only streams whose total length you expect
|
||||
/// to be fixed and unchanging. Odd behavior may occur if the length
|
||||
/// of the stream changes; any subsequent seeks will not take account
|
||||
/// of the changed stream length.
|
||||
pub(crate) fn new(file: R) -> Self {
|
||||
Self {
|
||||
file: Arc::new(Mutex::new(file)),
|
||||
pos: 0u64,
|
||||
file_length: None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Determine the length of the underlying stream.
|
||||
fn ascertain_file_length(&mut self) -> u64 {
|
||||
self.file_length.unwrap_or_else(|| {
|
||||
let len = self.file.lock().unwrap().len();
|
||||
self.file_length = Some(len);
|
||||
len
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl<R: Read + Seek + HasLength> Read for CloneableSeekableReader<R> {
|
||||
fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
|
||||
let mut underlying_file = self.file.lock().expect("Unable to get underlying file");
|
||||
// TODO share an object which knows current position to avoid unnecessary
|
||||
// seeks
|
||||
underlying_file.seek(SeekFrom::Start(self.pos))?;
|
||||
let read_result = underlying_file.read(buf);
|
||||
if let Ok(bytes_read) = read_result {
|
||||
// TODO, once stabilised, use checked_add_signed
|
||||
self.pos += bytes_read as u64;
|
||||
}
|
||||
read_result
|
||||
}
|
||||
}
|
||||
|
||||
impl<R: Read + Seek + HasLength> Seek for CloneableSeekableReader<R> {
|
||||
fn seek(&mut self, pos: SeekFrom) -> std::io::Result<u64> {
|
||||
let new_pos = match pos {
|
||||
SeekFrom::Start(pos) => pos,
|
||||
SeekFrom::End(offset_from_end) => {
|
||||
let file_len = self.ascertain_file_length();
|
||||
if -offset_from_end as u64 > file_len {
|
||||
return Err(std::io::Error::new(
|
||||
std::io::ErrorKind::InvalidInput,
|
||||
"Seek too far backwards",
|
||||
));
|
||||
}
|
||||
// TODO, once stabilised, use checked_add_signed
|
||||
file_len - (-offset_from_end as u64)
|
||||
}
|
||||
// TODO, once stabilised, use checked_add_signed
|
||||
SeekFrom::Current(offset_from_pos) => {
|
||||
if offset_from_pos > 0 {
|
||||
self.pos + (offset_from_pos as u64)
|
||||
} else {
|
||||
self.pos - ((-offset_from_pos) as u64)
|
||||
}
|
||||
}
|
||||
};
|
||||
self.pos = new_pos;
|
||||
Ok(new_pos)
|
||||
}
|
||||
}
|
||||
|
||||
impl<R: HasLength> HasLength for BufReader<R> {
|
||||
fn len(&self) -> u64 {
|
||||
self.get_ref().len()
|
||||
}
|
||||
}
|
||||
|
||||
impl HasLength for std::fs::File {
|
||||
fn len(&self) -> u64 {
|
||||
self.metadata().unwrap().len()
|
||||
}
|
||||
}
|
||||
|
||||
impl HasLength for fs_err::File {
|
||||
fn len(&self) -> u64 {
|
||||
self.metadata().unwrap().len()
|
||||
}
|
||||
}
|
||||
|
||||
impl HasLength for Cursor<Vec<u8>> {
|
||||
fn len(&self) -> u64 {
|
||||
self.get_ref().len() as u64
|
||||
}
|
||||
}
|
||||
|
||||
impl HasLength for Cursor<&Vec<u8>> {
|
||||
fn len(&self) -> u64 {
|
||||
self.get_ref().len() as u64
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use std::io::{Cursor, Read, Seek, SeekFrom};
|
||||
|
||||
use super::CloneableSeekableReader;
|
||||
|
||||
#[test]
|
||||
fn test_cloneable_seekable_reader() {
|
||||
let buf: Vec<u8> = vec![0, 1, 2, 3, 4, 5, 6, 7, 8, 9];
|
||||
let buf = Cursor::new(buf);
|
||||
let mut reader = CloneableSeekableReader::new(buf);
|
||||
let mut out = vec![0; 2];
|
||||
assert!(reader.read_exact(&mut out).is_ok());
|
||||
assert_eq!(out[0], 0);
|
||||
assert_eq!(out[1], 1);
|
||||
assert!(reader.seek(SeekFrom::Start(0)).is_ok());
|
||||
assert!(reader.read_exact(&mut out).is_ok());
|
||||
assert_eq!(out[0], 0);
|
||||
assert_eq!(out[1], 1);
|
||||
assert!(reader.stream_position().is_ok());
|
||||
assert!(reader.read_exact(&mut out).is_ok());
|
||||
assert_eq!(out[0], 2);
|
||||
assert_eq!(out[1], 3);
|
||||
assert!(reader.seek(SeekFrom::End(-2)).is_ok());
|
||||
assert!(reader.read_exact(&mut out).is_ok());
|
||||
assert_eq!(out[0], 8);
|
||||
assert_eq!(out[1], 9);
|
||||
assert!(reader.read_exact(&mut out).is_err());
|
||||
}
|
||||
}
|
3
crates/puffin-distribution/src/vendor/mod.rs
vendored
3
crates/puffin-distribution/src/vendor/mod.rs
vendored
|
@ -1,3 +0,0 @@
|
|||
pub(crate) use cloneable_seekable_reader::{CloneableSeekableReader, HasLength};
|
||||
|
||||
mod cloneable_seekable_reader;
|
Loading…
Add table
Add a link
Reference in a new issue