Use async unzip for local source distributions (#1809)

## Summary

We currently maintain separate untar methods for sync and async, but we
only use the sync version when the user provides a local source
distribution. (Otherwise, we untar as we download the distribution.) In
my testing, this is actually slower anyway:

```
❯ python -m scripts.bench \
        --uv-path ./target/release/main \
        --uv-path ./target/release/uv \
        ./requirements.in --benchmark resolve-cold --min-runs 50
Benchmark 1: ./target/release/main (resolve-cold)
  Time (mean ± σ):     835.2 ms ± 107.4 ms    [User: 346.0 ms, System: 151.3 ms]
  Range (min … max):   639.2 ms … 1051.0 ms    50 runs

Benchmark 2: ./target/release/uv (resolve-cold)
  Time (mean ± σ):     750.7 ms ±  91.9 ms    [User: 345.7 ms, System: 149.4 ms]
  Range (min … max):   637.9 ms … 905.7 ms    50 runs

Summary
  './target/release/uv (resolve-cold)' ran
    1.11 ± 0.20 times faster than './target/release/main (resolve-cold)'
```
This commit is contained in:
Charlie Marsh 2024-02-21 09:11:37 -05:00 committed by GitHub
parent a2a1b2fb0f
commit 88a0c13865
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 3 additions and 51 deletions

12
Cargo.lock generated
View file

@ -3489,17 +3489,6 @@ version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369" checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369"
[[package]]
name = "tar"
version = "0.4.40"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b16afcea1f22891c49a00c751c7b63b2233284064f11a200fc624137c51e2ddb"
dependencies = [
"filetime",
"libc",
"xattr",
]
[[package]] [[package]]
name = "target-lexicon" name = "target-lexicon"
version = "0.12.13" version = "0.12.13"
@ -4425,7 +4414,6 @@ dependencies = [
"futures", "futures",
"rayon", "rayon",
"rustc-hash", "rustc-hash",
"tar",
"thiserror", "thiserror",
"tokio", "tokio",
"tokio-tar", "tokio-tar",

View file

@ -86,7 +86,6 @@ serde = { version = "1.0.194" }
serde_json = { version = "1.0.111" } serde_json = { version = "1.0.111" }
sha1 = { version = "0.10.6" } sha1 = { version = "0.10.6" }
sha2 = { version = "0.10.8" } sha2 = { version = "0.10.8" }
tar = { version = "0.4.40" }
target-lexicon = { version = "0.12.13" } target-lexicon = { version = "0.12.13" }
task-local-extensions = { version = "0.1.4" } task-local-extensions = { version = "0.1.4" }
tempfile = { version = "3.9.0" } tempfile = { version = "3.9.0" }

View file

@ -301,7 +301,9 @@ impl SourceBuild {
let extracted = temp_dir.path().join("extracted"); let extracted = temp_dir.path().join("extracted");
// Unzip the archive into the temporary directory. // Unzip the archive into the temporary directory.
uv_extract::archive(source, &extracted) let reader = fs_err::tokio::File::open(source).await?;
uv_extract::stream::archive(tokio::io::BufReader::new(reader), source, &extracted)
.await
.map_err(|err| Error::Extraction(extracted.clone(), err))?; .map_err(|err| Error::Extraction(extracted.clone(), err))?;
// Extract the top-level directory from the archive. // Extract the top-level directory from the archive.

View file

@ -20,7 +20,6 @@ fs-err = { workspace = true, features = ["tokio"] }
futures = { workspace = true } futures = { workspace = true }
rayon = { workspace = true } rayon = { workspace = true }
rustc-hash = { workspace = true } rustc-hash = { workspace = true }
tar = { workspace = true }
thiserror = { workspace = true } thiserror = { workspace = true }
tokio = { workspace = true, features = ["io-util"] } tokio = { workspace = true, features = ["io-util"] }
tokio-tar = { workspace = true } tokio-tar = { workspace = true }

View file

@ -73,42 +73,6 @@ pub fn unzip<R: Send + std::io::Read + std::io::Seek + HasLength>(
.collect::<Result<_, Error>>() .collect::<Result<_, Error>>()
} }
/// Extract a `.zip` or `.tar.gz` archive into the target directory.
pub fn archive(source: impl AsRef<Path>, target: impl AsRef<Path>) -> Result<(), Error> {
// `.zip`
if source
.as_ref()
.extension()
.is_some_and(|ext| ext.eq_ignore_ascii_case("zip"))
{
unzip(fs_err::File::open(source.as_ref())?, target.as_ref())?;
return Ok(());
}
// `.tar.gz`
if source
.as_ref()
.extension()
.is_some_and(|ext| ext.eq_ignore_ascii_case("gz"))
{
if source.as_ref().file_stem().is_some_and(|stem| {
Path::new(stem)
.extension()
.is_some_and(|ext| ext.eq_ignore_ascii_case("tar"))
}) {
let mut archive = tar::Archive::new(flate2::read::GzDecoder::new(fs_err::File::open(
source.as_ref(),
)?));
// https://github.com/alexcrichton/tar-rs/issues/349
archive.set_preserve_mtime(false);
archive.unpack(target)?;
return Ok(());
}
}
Err(Error::UnsupportedArchive(source.as_ref().to_path_buf()))
}
/// Extract the top-level directory from an unpacked archive. /// Extract the top-level directory from an unpacked archive.
/// ///
/// The specification says: /// The specification says: