mirror of
https://github.com/astral-sh/uv.git
synced 2025-12-10 11:59:07 +00:00
Move archive extraction into its own crate (#647)
We have some shared utilities beyond `puffin-build` and `puffin-distribution`, and further, I want to be able to access the sdist archive extraction logic from `puffin-distribution`. This is really generic, so moving into its own crate.
This commit is contained in:
parent
388641643d
commit
db7e2dedbb
14 changed files with 174 additions and 134 deletions
22
Cargo.lock
generated
22
Cargo.lock
generated
|
|
@ -2243,7 +2243,6 @@ name = "puffin-build"
|
||||||
version = "0.0.1"
|
version = "0.0.1"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"flate2",
|
|
||||||
"fs-err",
|
"fs-err",
|
||||||
"gourgeist",
|
"gourgeist",
|
||||||
"indoc",
|
"indoc",
|
||||||
|
|
@ -2252,6 +2251,7 @@ dependencies = [
|
||||||
"once_cell",
|
"once_cell",
|
||||||
"pep508_rs",
|
"pep508_rs",
|
||||||
"platform-host",
|
"platform-host",
|
||||||
|
"puffin-extract",
|
||||||
"puffin-fs",
|
"puffin-fs",
|
||||||
"puffin-interpreter",
|
"puffin-interpreter",
|
||||||
"puffin-traits",
|
"puffin-traits",
|
||||||
|
|
@ -2260,14 +2260,11 @@ dependencies = [
|
||||||
"regex",
|
"regex",
|
||||||
"serde",
|
"serde",
|
||||||
"serde_json",
|
"serde_json",
|
||||||
"tar",
|
|
||||||
"tempfile",
|
"tempfile",
|
||||||
"thiserror",
|
"thiserror",
|
||||||
"tokio",
|
"tokio",
|
||||||
"toml 0.8.8",
|
"toml 0.8.8",
|
||||||
"tracing",
|
"tracing",
|
||||||
"which",
|
|
||||||
"zip",
|
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
|
|
@ -2463,17 +2460,16 @@ dependencies = [
|
||||||
"platform-tags",
|
"platform-tags",
|
||||||
"puffin-cache",
|
"puffin-cache",
|
||||||
"puffin-client",
|
"puffin-client",
|
||||||
|
"puffin-extract",
|
||||||
"puffin-fs",
|
"puffin-fs",
|
||||||
"puffin-git",
|
"puffin-git",
|
||||||
"puffin-normalize",
|
"puffin-normalize",
|
||||||
"puffin-traits",
|
"puffin-traits",
|
||||||
"pypi-types",
|
"pypi-types",
|
||||||
"rayon",
|
|
||||||
"reqwest",
|
"reqwest",
|
||||||
"rustc-hash",
|
"rustc-hash",
|
||||||
"serde",
|
"serde",
|
||||||
"serde_json",
|
"serde_json",
|
||||||
"sha2",
|
|
||||||
"tempfile",
|
"tempfile",
|
||||||
"thiserror",
|
"thiserror",
|
||||||
"tokio",
|
"tokio",
|
||||||
|
|
@ -2483,6 +2479,19 @@ dependencies = [
|
||||||
"zip",
|
"zip",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "puffin-extract"
|
||||||
|
version = "0.0.1"
|
||||||
|
dependencies = [
|
||||||
|
"flate2",
|
||||||
|
"fs-err",
|
||||||
|
"rayon",
|
||||||
|
"tar",
|
||||||
|
"thiserror",
|
||||||
|
"tokio",
|
||||||
|
"zip",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "puffin-fs"
|
name = "puffin-fs"
|
||||||
version = "0.0.1"
|
version = "0.0.1"
|
||||||
|
|
@ -2527,6 +2536,7 @@ dependencies = [
|
||||||
"puffin-cache",
|
"puffin-cache",
|
||||||
"puffin-client",
|
"puffin-client",
|
||||||
"puffin-distribution",
|
"puffin-distribution",
|
||||||
|
"puffin-extract",
|
||||||
"puffin-fs",
|
"puffin-fs",
|
||||||
"puffin-git",
|
"puffin-git",
|
||||||
"puffin-interpreter",
|
"puffin-interpreter",
|
||||||
|
|
|
||||||
|
|
@ -68,6 +68,10 @@ Implements the traits defined in `puffin-traits`.
|
||||||
Client for interacting with built distributions (wheels) and source distributions (sdists).
|
Client for interacting with built distributions (wheels) and source distributions (sdists).
|
||||||
Capable of fetching metadata, distribution contents, etc.
|
Capable of fetching metadata, distribution contents, etc.
|
||||||
|
|
||||||
|
## [puffin-extract](./puffin-extract)
|
||||||
|
|
||||||
|
Utilities for extracting files from archives.
|
||||||
|
|
||||||
## [puffin-fs](./puffin-fs)
|
## [puffin-fs](./puffin-fs)
|
||||||
|
|
||||||
Utilities for interacting with the filesystem.
|
Utilities for interacting with the filesystem.
|
||||||
|
|
|
||||||
|
|
@ -17,13 +17,13 @@ workspace = true
|
||||||
gourgeist = { path = "../gourgeist" }
|
gourgeist = { path = "../gourgeist" }
|
||||||
pep508_rs = { path = "../pep508-rs" }
|
pep508_rs = { path = "../pep508-rs" }
|
||||||
platform-host = { path = "../platform-host" }
|
platform-host = { path = "../platform-host" }
|
||||||
|
puffin-extract = { path = "../puffin-extract" }
|
||||||
puffin-fs = { path = "../puffin-fs" }
|
puffin-fs = { path = "../puffin-fs" }
|
||||||
puffin-interpreter = { path = "../puffin-interpreter" }
|
puffin-interpreter = { path = "../puffin-interpreter" }
|
||||||
puffin-traits = { path = "../puffin-traits" }
|
puffin-traits = { path = "../puffin-traits" }
|
||||||
pypi-types = { path = "../pypi-types" }
|
pypi-types = { path = "../pypi-types" }
|
||||||
|
|
||||||
anyhow = { workspace = true }
|
anyhow = { workspace = true }
|
||||||
flate2 = { workspace = true }
|
|
||||||
fs-err = { workspace = true }
|
fs-err = { workspace = true }
|
||||||
indoc = { workspace = true }
|
indoc = { workspace = true }
|
||||||
itertools = { workspace = true }
|
itertools = { workspace = true }
|
||||||
|
|
@ -32,14 +32,11 @@ pyproject-toml = { workspace = true }
|
||||||
regex = { workspace = true }
|
regex = { workspace = true }
|
||||||
serde = { workspace = true }
|
serde = { workspace = true }
|
||||||
serde_json = { workspace = true }
|
serde_json = { workspace = true }
|
||||||
tar = { workspace = true }
|
|
||||||
tempfile = { workspace = true }
|
tempfile = { workspace = true }
|
||||||
thiserror = { workspace = true }
|
thiserror = { workspace = true }
|
||||||
tokio = { workspace = true, features = ["sync", "process"] }
|
tokio = { workspace = true, features = ["sync", "process"] }
|
||||||
toml = { workspace = true }
|
toml = { workspace = true }
|
||||||
tracing = { workspace = true }
|
tracing = { workspace = true }
|
||||||
which = { workspace = true}
|
|
||||||
zip = { workspace = true }
|
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
insta = { version = "1.34.0" }
|
insta = { version = "1.34.0" }
|
||||||
|
|
|
||||||
|
|
@ -13,22 +13,19 @@ use std::process::Output;
|
||||||
use std::str::FromStr;
|
use std::str::FromStr;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
use flate2::read::GzDecoder;
|
|
||||||
use fs_err as fs;
|
use fs_err as fs;
|
||||||
use fs_err::{DirEntry, File};
|
use fs_err::DirEntry;
|
||||||
use indoc::formatdoc;
|
use indoc::formatdoc;
|
||||||
use itertools::Itertools;
|
use itertools::Itertools;
|
||||||
use once_cell::sync::Lazy;
|
use once_cell::sync::Lazy;
|
||||||
use pyproject_toml::{BuildSystem, Project};
|
use pyproject_toml::{BuildSystem, Project};
|
||||||
use regex::Regex;
|
use regex::Regex;
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
use tar::Archive;
|
|
||||||
use tempfile::{tempdir, tempdir_in, TempDir};
|
use tempfile::{tempdir, tempdir_in, TempDir};
|
||||||
use thiserror::Error;
|
use thiserror::Error;
|
||||||
use tokio::process::Command;
|
use tokio::process::Command;
|
||||||
use tokio::sync::Mutex;
|
use tokio::sync::Mutex;
|
||||||
use tracing::{debug, info_span, instrument};
|
use tracing::{debug, info_span, instrument};
|
||||||
use zip::ZipArchive;
|
|
||||||
|
|
||||||
use pep508_rs::Requirement;
|
use pep508_rs::Requirement;
|
||||||
use puffin_interpreter::{Interpreter, Virtualenv};
|
use puffin_interpreter::{Interpreter, Virtualenv};
|
||||||
|
|
@ -46,8 +43,8 @@ static MISSING_HEADER_RE: Lazy<Regex> = Lazy::new(|| {
|
||||||
pub enum Error {
|
pub enum Error {
|
||||||
#[error(transparent)]
|
#[error(transparent)]
|
||||||
IO(#[from] io::Error),
|
IO(#[from] io::Error),
|
||||||
#[error("Failed to read zip file")]
|
#[error("Failed to extract archive: {0}")]
|
||||||
Zip(#[from] zip::result::ZipError),
|
Extraction(PathBuf, #[source] puffin_extract::Error),
|
||||||
#[error("Unsupported archive format (extension not recognized): {0}")]
|
#[error("Unsupported archive format (extension not recognized): {0}")]
|
||||||
UnsupportedArchiveType(String),
|
UnsupportedArchiveType(String),
|
||||||
#[error("Invalid source distribution: {0}")]
|
#[error("Invalid source distribution: {0}")]
|
||||||
|
|
@ -671,40 +668,12 @@ async fn create_pep517_build_environment(
|
||||||
/// Returns the directory with the `pyproject.toml`/`setup.py`
|
/// Returns the directory with the `pyproject.toml`/`setup.py`
|
||||||
#[instrument(skip_all, fields(sdist = ? sdist.file_name().unwrap_or(sdist.as_os_str())))]
|
#[instrument(skip_all, fields(sdist = ? sdist.file_name().unwrap_or(sdist.as_os_str())))]
|
||||||
fn extract_archive(sdist: &Path, extracted: &PathBuf) -> Result<PathBuf, Error> {
|
fn extract_archive(sdist: &Path, extracted: &PathBuf) -> Result<PathBuf, Error> {
|
||||||
if sdist
|
puffin_extract::extract_archive(sdist, extracted)
|
||||||
.extension()
|
.map_err(|err| Error::Extraction(sdist.to_path_buf(), err))?;
|
||||||
.is_some_and(|ext| ext.eq_ignore_ascii_case("zip"))
|
|
||||||
{
|
|
||||||
// .zip
|
|
||||||
let mut archive = ZipArchive::new(File::open(sdist)?)?;
|
|
||||||
archive.extract(extracted)?;
|
|
||||||
} else if sdist
|
|
||||||
.extension()
|
|
||||||
.is_some_and(|ext| ext.eq_ignore_ascii_case("gz"))
|
|
||||||
&& sdist.file_stem().is_some_and(|stem| {
|
|
||||||
Path::new(stem)
|
|
||||||
.extension()
|
|
||||||
.is_some_and(|ext| ext.eq_ignore_ascii_case("tar"))
|
|
||||||
})
|
|
||||||
{
|
|
||||||
// .tar.gz
|
|
||||||
let mut archive = Archive::new(GzDecoder::new(File::open(sdist)?));
|
|
||||||
// https://github.com/alexcrichton/tar-rs/issues/349
|
|
||||||
archive.set_preserve_mtime(false);
|
|
||||||
archive.unpack(extracted)?;
|
|
||||||
} else {
|
|
||||||
return Err(Error::UnsupportedArchiveType(
|
|
||||||
sdist
|
|
||||||
.file_name()
|
|
||||||
.unwrap_or(sdist.as_os_str())
|
|
||||||
.to_string_lossy()
|
|
||||||
.to_string(),
|
|
||||||
));
|
|
||||||
}
|
|
||||||
|
|
||||||
// > A .tar.gz source distribution (sdist) contains a single top-level directory called
|
// > A .tar.gz source distribution (sdist) contains a single top-level directory called
|
||||||
// > `{name}-{version}` (e.g. foo-1.0), containing the source files of the package.
|
// > `{name}-{version}` (e.g. foo-1.0), containing the source files of the package.
|
||||||
// TODO(konstin): Verify the name of the directory
|
// TODO(konstin): Verify the name of the directory.
|
||||||
let top_level = fs::read_dir(extracted)?.collect::<io::Result<Vec<DirEntry>>>()?;
|
let top_level = fs::read_dir(extracted)?.collect::<io::Result<Vec<DirEntry>>>()?;
|
||||||
let [root] = top_level.as_slice() else {
|
let [root] = top_level.as_slice() else {
|
||||||
return Err(Error::InvalidSourceDist(format!(
|
return Err(Error::InvalidSourceDist(format!(
|
||||||
|
|
|
||||||
|
|
@ -20,6 +20,7 @@ pep440_rs = { path = "../pep440-rs" }
|
||||||
platform-tags = { path = "../platform-tags" }
|
platform-tags = { path = "../platform-tags" }
|
||||||
puffin-cache = { path = "../puffin-cache" }
|
puffin-cache = { path = "../puffin-cache" }
|
||||||
puffin-client = { path = "../puffin-client" }
|
puffin-client = { path = "../puffin-client" }
|
||||||
|
puffin-extract = { path = "../puffin-extract" }
|
||||||
puffin-fs = { path = "../puffin-fs" }
|
puffin-fs = { path = "../puffin-fs" }
|
||||||
puffin-git = { path = "../puffin-git" }
|
puffin-git = { path = "../puffin-git" }
|
||||||
puffin-normalize = { path = "../puffin-normalize" }
|
puffin-normalize = { path = "../puffin-normalize" }
|
||||||
|
|
@ -31,12 +32,10 @@ bytesize = { workspace = true }
|
||||||
fs-err = { workspace = true }
|
fs-err = { workspace = true }
|
||||||
fs2 = { workspace = true }
|
fs2 = { workspace = true }
|
||||||
futures = { workspace = true }
|
futures = { workspace = true }
|
||||||
rayon = { workspace = true }
|
|
||||||
reqwest = { workspace = true }
|
reqwest = { workspace = true }
|
||||||
rustc-hash = { workspace = true }
|
rustc-hash = { workspace = true }
|
||||||
serde = { workspace = true , features = ["derive"] }
|
serde = { workspace = true , features = ["derive"] }
|
||||||
serde_json = { workspace = true }
|
serde_json = { workspace = true }
|
||||||
sha2 = { workspace = true }
|
|
||||||
tempfile = { workspace = true }
|
tempfile = { workspace = true }
|
||||||
thiserror = { workspace = true }
|
thiserror = { workspace = true }
|
||||||
tokio = { workspace = true }
|
tokio = { workspace = true }
|
||||||
|
|
|
||||||
|
|
@ -3,7 +3,7 @@ pub use download::{DiskWheel, InMemoryWheel, LocalWheel};
|
||||||
pub use index::{BuiltWheelIndex, RegistryWheelIndex};
|
pub use index::{BuiltWheelIndex, RegistryWheelIndex};
|
||||||
pub use reporter::Reporter;
|
pub use reporter::Reporter;
|
||||||
pub use source_dist::{SourceDistCachedBuilder, SourceDistError};
|
pub use source_dist::{SourceDistCachedBuilder, SourceDistError};
|
||||||
pub use unzip::{Unzip, UnzipError};
|
pub use unzip::Unzip;
|
||||||
|
|
||||||
mod distribution_database;
|
mod distribution_database;
|
||||||
mod download;
|
mod download;
|
||||||
|
|
@ -13,4 +13,3 @@ mod locks;
|
||||||
mod reporter;
|
mod reporter;
|
||||||
mod source_dist;
|
mod source_dist;
|
||||||
mod unzip;
|
mod unzip;
|
||||||
mod vendor;
|
|
||||||
|
|
|
||||||
|
|
@ -1,49 +1,35 @@
|
||||||
use std::io;
|
|
||||||
use std::io::{Read, Seek};
|
|
||||||
use std::path::Path;
|
use std::path::Path;
|
||||||
|
|
||||||
use rayon::prelude::*;
|
use puffin_extract::{unzip_archive, Error};
|
||||||
use thiserror::Error;
|
|
||||||
use zip::result::ZipError;
|
|
||||||
use zip::ZipArchive;
|
|
||||||
|
|
||||||
use crate::download::BuiltWheel;
|
use crate::download::BuiltWheel;
|
||||||
use crate::vendor::{CloneableSeekableReader, HasLength};
|
|
||||||
use crate::{DiskWheel, InMemoryWheel, LocalWheel};
|
use crate::{DiskWheel, InMemoryWheel, LocalWheel};
|
||||||
|
|
||||||
#[derive(Debug, Error)]
|
|
||||||
pub enum UnzipError {
|
|
||||||
#[error(transparent)]
|
|
||||||
Zip(#[from] ZipError),
|
|
||||||
#[error(transparent)]
|
|
||||||
Io(#[from] io::Error),
|
|
||||||
}
|
|
||||||
|
|
||||||
pub trait Unzip {
|
pub trait Unzip {
|
||||||
/// Unzip a wheel into the target directory.
|
/// Unzip a wheel into the target directory.
|
||||||
fn unzip(&self, target: &Path) -> Result<(), UnzipError>;
|
fn unzip(&self, target: &Path) -> Result<(), Error>;
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Unzip for InMemoryWheel {
|
impl Unzip for InMemoryWheel {
|
||||||
fn unzip(&self, target: &Path) -> Result<(), UnzipError> {
|
fn unzip(&self, target: &Path) -> Result<(), Error> {
|
||||||
unzip_archive(std::io::Cursor::new(&self.buffer), target)
|
unzip_archive(std::io::Cursor::new(&self.buffer), target)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Unzip for DiskWheel {
|
impl Unzip for DiskWheel {
|
||||||
fn unzip(&self, target: &Path) -> Result<(), UnzipError> {
|
fn unzip(&self, target: &Path) -> Result<(), Error> {
|
||||||
unzip_archive(fs_err::File::open(&self.path)?, target)
|
unzip_archive(fs_err::File::open(&self.path)?, target)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Unzip for BuiltWheel {
|
impl Unzip for BuiltWheel {
|
||||||
fn unzip(&self, target: &Path) -> Result<(), UnzipError> {
|
fn unzip(&self, target: &Path) -> Result<(), Error> {
|
||||||
unzip_archive(fs_err::File::open(&self.path)?, target)
|
unzip_archive(fs_err::File::open(&self.path)?, target)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Unzip for LocalWheel {
|
impl Unzip for LocalWheel {
|
||||||
fn unzip(&self, target: &Path) -> Result<(), UnzipError> {
|
fn unzip(&self, target: &Path) -> Result<(), Error> {
|
||||||
match self {
|
match self {
|
||||||
LocalWheel::InMemory(wheel) => wheel.unzip(target),
|
LocalWheel::InMemory(wheel) => wheel.unzip(target),
|
||||||
LocalWheel::Disk(wheel) => wheel.unzip(target),
|
LocalWheel::Disk(wheel) => wheel.unzip(target),
|
||||||
|
|
@ -51,52 +37,3 @@ impl Unzip for LocalWheel {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Unzip a zip archive into the target directory.
|
|
||||||
fn unzip_archive<R: Send + Read + Seek + HasLength>(
|
|
||||||
reader: R,
|
|
||||||
target: &Path,
|
|
||||||
) -> Result<(), UnzipError> {
|
|
||||||
// Unzip in parallel.
|
|
||||||
let archive = ZipArchive::new(CloneableSeekableReader::new(reader))?;
|
|
||||||
(0..archive.len())
|
|
||||||
.par_bridge()
|
|
||||||
.map(|file_number| {
|
|
||||||
let mut archive = archive.clone();
|
|
||||||
let mut file = archive.by_index(file_number)?;
|
|
||||||
|
|
||||||
// Determine the path of the file within the wheel.
|
|
||||||
let file_path = match file.enclosed_name() {
|
|
||||||
Some(path) => path.to_owned(),
|
|
||||||
None => return Ok(()),
|
|
||||||
};
|
|
||||||
|
|
||||||
// Create necessary parent directories.
|
|
||||||
let path = target.join(file_path);
|
|
||||||
if file.is_dir() {
|
|
||||||
fs_err::create_dir_all(path)?;
|
|
||||||
return Ok(());
|
|
||||||
}
|
|
||||||
if let Some(parent) = path.parent() {
|
|
||||||
fs_err::create_dir_all(parent)?;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Write the file.
|
|
||||||
let mut outfile = fs_err::File::create(&path)?;
|
|
||||||
std::io::copy(&mut file, &mut outfile)?;
|
|
||||||
|
|
||||||
// Set permissions.
|
|
||||||
#[cfg(unix)]
|
|
||||||
{
|
|
||||||
use std::fs::Permissions;
|
|
||||||
use std::os::unix::fs::PermissionsExt;
|
|
||||||
|
|
||||||
if let Some(mode) = file.unix_mode() {
|
|
||||||
std::fs::set_permissions(&path, Permissions::from_mode(mode))?;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
})
|
|
||||||
.collect::<Result<_, UnzipError>>()
|
|
||||||
}
|
|
||||||
|
|
|
||||||
3
crates/puffin-distribution/src/vendor/mod.rs
vendored
3
crates/puffin-distribution/src/vendor/mod.rs
vendored
|
|
@ -1,3 +0,0 @@
|
||||||
pub(crate) use cloneable_seekable_reader::{CloneableSeekableReader, HasLength};
|
|
||||||
|
|
||||||
mod cloneable_seekable_reader;
|
|
||||||
22
crates/puffin-extract/Cargo.toml
Normal file
22
crates/puffin-extract/Cargo.toml
Normal file
|
|
@ -0,0 +1,22 @@
|
||||||
|
[package]
|
||||||
|
name = "puffin-extract"
|
||||||
|
version = "0.0.1"
|
||||||
|
edition = { workspace = true }
|
||||||
|
rust-version = { workspace = true }
|
||||||
|
homepage = { workspace = true }
|
||||||
|
documentation = { workspace = true }
|
||||||
|
repository = { workspace = true }
|
||||||
|
authors = { workspace = true }
|
||||||
|
license = { workspace = true }
|
||||||
|
|
||||||
|
[lints]
|
||||||
|
workspace = true
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
flate2 = { workspace = true }
|
||||||
|
fs-err = { workspace = true }
|
||||||
|
rayon = { workspace = true }
|
||||||
|
tar = { workspace = true }
|
||||||
|
thiserror = { workspace = true }
|
||||||
|
tokio = { workspace = true }
|
||||||
|
zip = { workspace = true }
|
||||||
104
crates/puffin-extract/src/lib.rs
Normal file
104
crates/puffin-extract/src/lib.rs
Normal file
|
|
@ -0,0 +1,104 @@
|
||||||
|
use std::path::{Path, PathBuf};
|
||||||
|
|
||||||
|
use rayon::prelude::*;
|
||||||
|
use zip::result::ZipError;
|
||||||
|
use zip::ZipArchive;
|
||||||
|
|
||||||
|
pub use crate::vendor::{CloneableSeekableReader, HasLength};
|
||||||
|
|
||||||
|
mod vendor;
|
||||||
|
|
||||||
|
#[derive(Debug, thiserror::Error)]
|
||||||
|
pub enum Error {
|
||||||
|
#[error(transparent)]
|
||||||
|
Zip(#[from] ZipError),
|
||||||
|
#[error(transparent)]
|
||||||
|
Io(#[from] std::io::Error),
|
||||||
|
#[error("Unsupported archive type: {0}")]
|
||||||
|
UnsupportedArchive(PathBuf),
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Unzip a zip archive into the target directory.
|
||||||
|
pub fn unzip_archive<R: Send + std::io::Read + std::io::Seek + HasLength>(
|
||||||
|
reader: R,
|
||||||
|
target: &Path,
|
||||||
|
) -> Result<(), Error> {
|
||||||
|
// Unzip in parallel.
|
||||||
|
let archive = ZipArchive::new(CloneableSeekableReader::new(reader))?;
|
||||||
|
(0..archive.len())
|
||||||
|
.par_bridge()
|
||||||
|
.map(|file_number| {
|
||||||
|
let mut archive = archive.clone();
|
||||||
|
let mut file = archive.by_index(file_number)?;
|
||||||
|
|
||||||
|
// Determine the path of the file within the wheel.
|
||||||
|
let file_path = match file.enclosed_name() {
|
||||||
|
Some(path) => path.to_owned(),
|
||||||
|
None => return Ok(()),
|
||||||
|
};
|
||||||
|
|
||||||
|
// Create necessary parent directories.
|
||||||
|
let path = target.join(file_path);
|
||||||
|
if file.is_dir() {
|
||||||
|
fs_err::create_dir_all(path)?;
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
if let Some(parent) = path.parent() {
|
||||||
|
fs_err::create_dir_all(parent)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Write the file.
|
||||||
|
let mut outfile = fs_err::File::create(&path)?;
|
||||||
|
std::io::copy(&mut file, &mut outfile)?;
|
||||||
|
|
||||||
|
// Set permissions.
|
||||||
|
#[cfg(unix)]
|
||||||
|
{
|
||||||
|
use std::fs::Permissions;
|
||||||
|
use std::os::unix::fs::PermissionsExt;
|
||||||
|
|
||||||
|
if let Some(mode) = file.unix_mode() {
|
||||||
|
std::fs::set_permissions(&path, Permissions::from_mode(mode))?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
})
|
||||||
|
.collect::<Result<_, Error>>()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Extract a `.zip` or `.tar.gz` archive into the target directory.
|
||||||
|
pub fn extract_archive(source: impl AsRef<Path>, target: impl AsRef<Path>) -> Result<(), Error> {
|
||||||
|
// .zip
|
||||||
|
if source
|
||||||
|
.as_ref()
|
||||||
|
.extension()
|
||||||
|
.is_some_and(|ext| ext.eq_ignore_ascii_case("zip"))
|
||||||
|
{
|
||||||
|
unzip_archive(fs_err::File::open(source.as_ref())?, target.as_ref())?;
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
|
||||||
|
// .tar.gz
|
||||||
|
if source
|
||||||
|
.as_ref()
|
||||||
|
.extension()
|
||||||
|
.is_some_and(|ext| ext.eq_ignore_ascii_case("gz"))
|
||||||
|
{
|
||||||
|
if source.as_ref().file_stem().is_some_and(|stem| {
|
||||||
|
Path::new(stem)
|
||||||
|
.extension()
|
||||||
|
.is_some_and(|ext| ext.eq_ignore_ascii_case("tar"))
|
||||||
|
}) {
|
||||||
|
let mut archive = tar::Archive::new(flate2::read::GzDecoder::new(fs_err::File::open(
|
||||||
|
source.as_ref(),
|
||||||
|
)?));
|
||||||
|
// https://github.com/alexcrichton/tar-rs/issues/349
|
||||||
|
archive.set_preserve_mtime(false);
|
||||||
|
archive.unpack(target)?;
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Err(Error::UnsupportedArchive(source.as_ref().to_path_buf()))
|
||||||
|
}
|
||||||
|
|
@ -16,7 +16,8 @@ use std::{
|
||||||
/// A trait to represent some reader which has a total length known in
|
/// A trait to represent some reader which has a total length known in
|
||||||
/// advance. This is roughly equivalent to the nightly
|
/// advance. This is roughly equivalent to the nightly
|
||||||
/// [`Seek::stream_len`] API.
|
/// [`Seek::stream_len`] API.
|
||||||
pub(crate) trait HasLength {
|
#[allow(clippy::len_without_is_empty)]
|
||||||
|
pub trait HasLength {
|
||||||
/// Return the current total length of this stream.
|
/// Return the current total length of this stream.
|
||||||
fn len(&self) -> u64;
|
fn len(&self) -> u64;
|
||||||
}
|
}
|
||||||
|
|
@ -25,7 +26,7 @@ pub(crate) trait HasLength {
|
||||||
/// and thus can be cloned cheaply. It supports seeking; each cloned instance
|
/// and thus can be cloned cheaply. It supports seeking; each cloned instance
|
||||||
/// maintains its own pointer into the file, and the underlying instance
|
/// maintains its own pointer into the file, and the underlying instance
|
||||||
/// is seeked prior to each read.
|
/// is seeked prior to each read.
|
||||||
pub(crate) struct CloneableSeekableReader<R: Read + Seek + HasLength> {
|
pub struct CloneableSeekableReader<R: Read + Seek + HasLength> {
|
||||||
file: Arc<Mutex<R>>,
|
file: Arc<Mutex<R>>,
|
||||||
pos: u64,
|
pos: u64,
|
||||||
// TODO determine and store this once instead of per cloneable file
|
// TODO determine and store this once instead of per cloneable file
|
||||||
3
crates/puffin-extract/src/vendor/mod.rs
vendored
Normal file
3
crates/puffin-extract/src/vendor/mod.rs
vendored
Normal file
|
|
@ -0,0 +1,3 @@
|
||||||
|
pub use cloneable_seekable_reader::{CloneableSeekableReader, HasLength};
|
||||||
|
|
||||||
|
mod cloneable_seekable_reader;
|
||||||
|
|
@ -14,14 +14,15 @@ workspace = true
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
distribution-filename = { path = "../distribution-filename" }
|
distribution-filename = { path = "../distribution-filename" }
|
||||||
|
distribution-types = { path = "../distribution-types" }
|
||||||
install-wheel-rs = { path = "../install-wheel-rs", default-features = false }
|
install-wheel-rs = { path = "../install-wheel-rs", default-features = false }
|
||||||
pep440_rs = { path = "../pep440-rs" }
|
pep440_rs = { path = "../pep440-rs" }
|
||||||
pep508_rs = { path = "../pep508-rs" }
|
pep508_rs = { path = "../pep508-rs" }
|
||||||
|
platform-tags = { path = "../platform-tags" }
|
||||||
puffin-cache = { path = "../puffin-cache" }
|
puffin-cache = { path = "../puffin-cache" }
|
||||||
puffin-client = { path = "../puffin-client" }
|
puffin-client = { path = "../puffin-client" }
|
||||||
distribution-types = { path = "../distribution-types" }
|
|
||||||
platform-tags = { path = "../platform-tags" }
|
|
||||||
puffin-distribution = { path = "../puffin-distribution" }
|
puffin-distribution = { path = "../puffin-distribution" }
|
||||||
|
puffin-extract = { path = "../puffin-extract" }
|
||||||
puffin-fs = { path = "../puffin-fs" }
|
puffin-fs = { path = "../puffin-fs" }
|
||||||
puffin-git = { path = "../puffin-git" }
|
puffin-git = { path = "../puffin-git" }
|
||||||
puffin-interpreter = { path = "../puffin-interpreter" }
|
puffin-interpreter = { path = "../puffin-interpreter" }
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,5 @@
|
||||||
use std::cmp::Reverse;
|
use std::cmp::Reverse;
|
||||||
use std::path::PathBuf;
|
use std::path::PathBuf;
|
||||||
|
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
use futures::{StreamExt, TryFutureExt};
|
use futures::{StreamExt, TryFutureExt};
|
||||||
|
|
@ -12,18 +11,16 @@ use distribution_types::{CachedDist, Dist, RemoteSource, SourceDist};
|
||||||
use platform_tags::Tags;
|
use platform_tags::Tags;
|
||||||
use puffin_cache::Cache;
|
use puffin_cache::Cache;
|
||||||
use puffin_client::RegistryClient;
|
use puffin_client::RegistryClient;
|
||||||
use puffin_distribution::{
|
use puffin_distribution::{DistributionDatabase, DistributionDatabaseError, LocalWheel, Unzip};
|
||||||
DistributionDatabase, DistributionDatabaseError, LocalWheel, Unzip, UnzipError,
|
|
||||||
};
|
|
||||||
use puffin_traits::{BuildContext, OnceMap};
|
use puffin_traits::{BuildContext, OnceMap};
|
||||||
|
|
||||||
#[derive(thiserror::Error, Debug)]
|
#[derive(thiserror::Error, Debug)]
|
||||||
pub enum Error {
|
pub enum Error {
|
||||||
#[error("Failed to unzip wheel: {0}")]
|
#[error("Failed to unzip wheel: {0}")]
|
||||||
Unzip(Dist, #[source] UnzipError),
|
Unzip(Dist, #[source] puffin_extract::Error),
|
||||||
#[error("Failed to fetch wheel: {0}")]
|
#[error("Failed to fetch wheel: {0}")]
|
||||||
Fetch(Dist, #[source] DistributionDatabaseError),
|
Fetch(Dist, #[source] DistributionDatabaseError),
|
||||||
/// Should not occur, i've only seen it when another task panicked
|
/// Should not occur; only seen when another task panicked.
|
||||||
#[error("The task executor is broken, did some other task panic?")]
|
#[error("The task executor is broken, did some other task panic?")]
|
||||||
Join(#[from] JoinError),
|
Join(#[from] JoinError),
|
||||||
#[error("Unzip failed in another thread: {0}")]
|
#[error("Unzip failed in another thread: {0}")]
|
||||||
|
|
@ -160,7 +157,7 @@ impl<'a, Context: BuildContext + Send + Sync> Downloader<'a, Context> {
|
||||||
|
|
||||||
// Unzip the wheel.
|
// Unzip the wheel.
|
||||||
let normalized_path = tokio::task::spawn_blocking({
|
let normalized_path = tokio::task::spawn_blocking({
|
||||||
move || -> Result<PathBuf, UnzipError> {
|
move || -> Result<PathBuf, puffin_extract::Error> {
|
||||||
// Unzip the wheel into a temporary directory.
|
// Unzip the wheel into a temporary directory.
|
||||||
let parent = download
|
let parent = download
|
||||||
.target()
|
.target()
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue