diff --git a/Cargo.lock b/Cargo.lock index d9c52f8c2..0a8f9a19d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -178,6 +178,30 @@ dependencies = [ "tempfile", ] +[[package]] +name = "async-channel" +version = "1.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "81953c529336010edd6d8e358f886d9581267795c61b19475b71314bffa46d35" +dependencies = [ + "concurrent-queue", + "event-listener 2.5.3", + "futures-core", +] + +[[package]] +name = "async-channel" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ca33f4bc4ed1babef42cad36cc1f51fa88be00420404e5b1e80ab1b18f7678c" +dependencies = [ + "concurrent-queue", + "event-listener 4.0.3", + "event-listener-strategy", + "futures-core", + "pin-project-lite", +] + [[package]] name = "async-compression" version = "0.4.6" @@ -193,6 +217,176 @@ dependencies = [ "tokio", ] +[[package]] +name = "async-executor" +version = "1.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "17ae5ebefcc48e7452b4987947920dac9450be1110cadf34d1b8c116bdbaf97c" +dependencies = [ + "async-lock 3.3.0", + "async-task", + "concurrent-queue", + "fastrand 2.0.1", + "futures-lite 2.2.0", + "slab", +] + +[[package]] +name = "async-global-executor" +version = "2.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05b1b633a2115cd122d73b955eadd9916c18c8f510ec9cd1686404c60ad1c29c" +dependencies = [ + "async-channel 2.1.1", + "async-executor", + "async-io 2.3.0", + "async-lock 3.3.0", + "blocking", + "futures-lite 2.2.0", + "once_cell", +] + +[[package]] +name = "async-io" +version = "1.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fc5b45d93ef0529756f812ca52e44c221b35341892d3dcc34132ac02f3dd2af" +dependencies = [ + "async-lock 2.8.0", + "autocfg", + "cfg-if 1.0.0", + "concurrent-queue", + "futures-lite 1.13.0", + "log", + "parking", + "polling 2.8.0", + "rustix 0.37.27", + "slab", + "socket2 0.4.10", + "waker-fn", +] + +[[package]] +name = "async-io" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fb41eb19024a91746eba0773aa5e16036045bbf45733766661099e182ea6a744" +dependencies = [ + "async-lock 3.3.0", + "cfg-if 1.0.0", + "concurrent-queue", + "futures-io", + "futures-lite 2.2.0", + "parking", + "polling 3.3.2", + "rustix 0.38.30", + "slab", + "tracing", + "windows-sys 0.52.0", +] + +[[package]] +name = "async-lock" +version = "2.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "287272293e9d8c41773cec55e365490fe034813a2f172f502d6ddcf75b2f582b" +dependencies = [ + "event-listener 2.5.3", +] + +[[package]] +name = "async-lock" +version = "3.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d034b430882f8381900d3fe6f0aaa3ad94f2cb4ac519b429692a1bc2dda4ae7b" +dependencies = [ + "event-listener 4.0.3", + "event-listener-strategy", + "pin-project-lite", +] + +[[package]] +name = "async-process" +version = "1.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ea6438ba0a08d81529c69b36700fa2f95837bfe3e776ab39cde9c14d9149da88" +dependencies = [ + "async-io 1.13.0", + "async-lock 2.8.0", + "async-signal", + "blocking", + "cfg-if 1.0.0", + "event-listener 3.1.0", + "futures-lite 1.13.0", + "rustix 0.38.30", + "windows-sys 0.48.0", +] + +[[package]] +name = "async-signal" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1079d27511f6c038736279421774ef4ad4bdd2e300825f4a48c4cc463a57cedf" +dependencies = [ + "async-io 1.13.0", + "async-lock 2.8.0", + "atomic-waker", + "cfg-if 1.0.0", + "futures-core", + "futures-io", + "rustix 0.38.30", + "signal-hook-registry", + "slab", + "windows-sys 0.48.0", +] + +[[package]] +name = "async-std" +version = "1.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62565bb4402e926b29953c785397c6dc0391b7b446e45008b0049eb43cec6f5d" +dependencies = [ + "async-channel 1.9.0", + "async-global-executor", + "async-io 1.13.0", + "async-lock 2.8.0", + "async-process", + "crossbeam-utils", + "futures-channel", + "futures-core", + "futures-io", + "futures-lite 1.13.0", + "gloo-timers", + "kv-log-macro", + "log", + "memchr", + "once_cell", + "pin-project-lite", + "pin-utils", + "slab", + "wasm-bindgen-futures", +] + +[[package]] +name = "async-tar" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c49359998a76e32ef6e870dbc079ebad8f1e53e8441c5dd39d27b44493fe331" +dependencies = [ + "async-std", + "filetime", + "libc", + "pin-project", + "redox_syscall 0.2.16", + "xattr 0.2.3", +] + +[[package]] +name = "async-task" +version = "4.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fbb36e985947064623dbd357f727af08ffd077f93d696782f3c56365fa2e2799" + [[package]] name = "async-trait" version = "0.1.77" @@ -229,13 +423,19 @@ source = "git+https://github.com/charliermarsh/rs-async-zip?rev=d76801da0943de98 dependencies = [ "async-compression", "crc32fast", - "futures-lite", + "futures-lite 2.2.0", "pin-project", "thiserror", "tokio", "tokio-util", ] +[[package]] +name = "atomic-waker" +version = "1.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1505bd5d3d116872e7271a6d4e16d81d0c8570876c8de68093a09ac269d8aac0" + [[package]] name = "autocfg" version = "1.1.0" @@ -326,6 +526,22 @@ dependencies = [ "generic-array", ] +[[package]] +name = "blocking" +version = "1.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a37913e8dc4ddcc604f0c6d3bf2887c995153af3611de9e23c352b44c1b9118" +dependencies = [ + "async-channel 2.1.1", + "async-lock 3.3.0", + "async-task", + "fastrand 2.0.1", + "futures-io", + "futures-lite 2.2.0", + "piper", + "tracing", +] + [[package]] name = "brotli" version = "3.4.0" @@ -582,6 +798,15 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "acbf1af155f9b9ef647e42cdc158db4b64a1b61f743629225fde6f3e0be2a7c7" +[[package]] +name = "concurrent-queue" +version = "2.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d16048cd947b08fa32c24458a22f5dc5e835264f689f4f5653210c69fd107363" +dependencies = [ + "crossbeam-utils", +] + [[package]] name = "configparser" version = "3.0.4" @@ -928,6 +1153,53 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "event-listener" +version = "2.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0206175f82b8d6bf6652ff7d71a1e27fd2e4efde587fd368662814d6ec1d9ce0" + +[[package]] +name = "event-listener" +version = "3.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d93877bcde0eb80ca09131a08d23f0a5c18a620b01db137dba666d18cd9b30c2" +dependencies = [ + "concurrent-queue", + "parking", + "pin-project-lite", +] + +[[package]] +name = "event-listener" +version = "4.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67b215c49b2b248c855fb73579eb1f4f26c38ffdc12973e20e07b91d78d5646e" +dependencies = [ + "concurrent-queue", + "parking", + "pin-project-lite", +] + +[[package]] +name = "event-listener-strategy" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "958e4d70b6d5e81971bebec42271ec641e7ff4e170a6fa605f2b8a8b65cb97d3" +dependencies = [ + "event-listener 4.0.3", + "pin-project-lite", +] + +[[package]] +name = "fastrand" +version = "1.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e51093e27b0797c359783294ca4f0a911c270184cb10f85783b118614a1501be" +dependencies = [ + "instant", +] + [[package]] name = "fastrand" version = "2.0.1" @@ -1061,13 +1333,28 @@ version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a44623e20b9681a318efdd71c299b6b222ed6f231972bfe2f224ebad6311f0c1" +[[package]] +name = "futures-lite" +version = "1.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49a9d51ce47660b1e808d3c990b4709f2f415d928835a17dfd16991515c46bce" +dependencies = [ + "fastrand 1.9.0", + "futures-core", + "futures-io", + "memchr", + "parking", + "pin-project-lite", + "waker-fn", +] + [[package]] name = "futures-lite" version = "2.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "445ba825b27408685aaecefd65178908c36c6e96aaf6d8599419d46e624192ba" dependencies = [ - "fastrand", + "fastrand 2.0.1", "futures-core", "futures-io", "parking", @@ -1189,6 +1476,18 @@ dependencies = [ "walkdir", ] +[[package]] +name = "gloo-timers" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b995a66bb87bebce9a0f4a95aed01daca4872c050bfcb21653361c03bc35e5c" +dependencies = [ + "futures-channel", + "futures-core", + "js-sys", + "wasm-bindgen", +] + [[package]] name = "goblin" version = "0.8.0" @@ -1385,7 +1684,7 @@ dependencies = [ "httpdate", "itoa", "pin-project-lite", - "socket2", + "socket2 0.5.5", "tokio", "tower-service", "tracing", @@ -1574,6 +1873,17 @@ dependencies = [ "web-sys", ] +[[package]] +name = "io-lifetimes" +version = "1.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eae7b9aee968036d54dce06cebaefd919e4472e753296daccd6d344e3e2df0c2" +dependencies = [ + "hermit-abi", + "libc", + "windows-sys 0.48.0", +] + [[package]] name = "ipnet" version = "2.9.0" @@ -1587,7 +1897,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0bad00257d07be169d870ab665980b06cdb366d792ad690bf2e76876dc503455" dependencies = [ "hermit-abi", - "rustix", + "rustix 0.38.30", "windows-sys 0.52.0", ] @@ -1658,6 +1968,15 @@ dependencies = [ "winapi", ] +[[package]] +name = "kv-log-macro" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0de8b303297635ad57c9f5059fd9cee7a47f8e8daa09df0fcd07dd39fb22977f" +dependencies = [ + "log", +] + [[package]] name = "lazy_static" version = "1.4.0" @@ -1756,6 +2075,12 @@ version = "0.5.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0717cef1bc8b636c6e1c1bbdefc09e6322da8a9321966e8928ef80d20f7f770f" +[[package]] +name = "linux-raw-sys" +version = "0.3.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ef53942eb7bf7ff43a617b3e2c1c4a5ecf5944a7c1bc12d7ee39bbb15e5c1519" + [[package]] name = "linux-raw-sys" version = "0.4.13" @@ -1777,6 +2102,9 @@ name = "log" version = "0.4.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f" +dependencies = [ + "value-bag", +] [[package]] name = "mailparse" @@ -2199,6 +2527,17 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" +[[package]] +name = "piper" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "668d31b1c4eba19242f2088b2bf3316b82ca31082a8335764db4e083db7485d4" +dependencies = [ + "atomic-waker", + "fastrand 2.0.1", + "futures-io", +] + [[package]] name = "pkg-config" version = "0.3.29" @@ -2260,6 +2599,36 @@ dependencies = [ "time", ] +[[package]] +name = "polling" +version = "2.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4b2d323e8ca7996b3e23126511a523f7e62924d93ecd5ae73b333815b0eb3dce" +dependencies = [ + "autocfg", + "bitflags 1.3.2", + "cfg-if 1.0.0", + "concurrent-queue", + "libc", + "log", + "pin-project-lite", + "windows-sys 0.48.0", +] + +[[package]] +name = "polling" +version = "3.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "545c980a3880efd47b2e262f6a4bb6daad6555cf3367aa9c4e52895f69537a41" +dependencies = [ + "cfg-if 1.0.0", + "concurrent-queue", + "pin-project-lite", + "rustix 0.38.30", + "tracing", + "windows-sys 0.52.0", +] + [[package]] name = "portable-atomic" version = "1.6.0" @@ -2632,6 +3001,9 @@ dependencies = [ name = "puffin-extract" version = "0.0.1" dependencies = [ + "async-compression", + "async-std", + "async-tar", "async_zip", "flate2", "fs-err", @@ -3069,7 +3441,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "767be24c0da52e7448d495b8d162506a9aa125426651d547d545d6c2b4b65b62" dependencies = [ "cfg-if 1.0.0", - "rustix", + "rustix 0.38.30", "windows", ] @@ -3322,6 +3694,20 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" +[[package]] +name = "rustix" +version = "0.37.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fea8ca367a3a01fe35e6943c400addf443c0f57670e6ec51196f71a4b8762dd2" +dependencies = [ + "bitflags 1.3.2", + "errno", + "io-lifetimes", + "libc", + "linux-raw-sys 0.3.8", + "windows-sys 0.48.0", +] + [[package]] name = "rustix" version = "0.38.30" @@ -3331,7 +3717,7 @@ dependencies = [ "bitflags 2.4.2", "errno", "libc", - "linux-raw-sys", + "linux-raw-sys 0.4.13", "windows-sys 0.52.0", ] @@ -3560,6 +3946,16 @@ version = "0.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b7c388c1b5e93756d0c740965c41e8822f866621d41acbdf6336a6a168f8840c" +[[package]] +name = "socket2" +version = "0.4.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9f7916fc008ca5542385b89a3d3ce689953c143e9304a9bf8beec1de48994c0d" +dependencies = [ + "libc", + "winapi", +] + [[package]] name = "socket2" version = "0.5.5" @@ -3679,7 +4075,7 @@ checksum = "b16afcea1f22891c49a00c751c7b63b2233284064f11a200fc624137c51e2ddb" dependencies = [ "filetime", "libc", - "xattr", + "xattr 1.3.1", ] [[package]] @@ -3704,9 +4100,9 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "01ce4141aa927a6d1bd34a041795abd0db1cccba5d5f24b009f694bdf3a1f3fa" dependencies = [ "cfg-if 1.0.0", - "fastrand", + "fastrand 2.0.1", "redox_syscall 0.4.1", - "rustix", + "rustix 0.38.30", "windows-sys 0.52.0", ] @@ -3911,7 +4307,7 @@ dependencies = [ "num_cpus", "pin-project-lite", "signal-hook-registry", - "socket2", + "socket2 0.5.5", "tokio-macros", "windows-sys 0.48.0", ] @@ -4216,6 +4612,12 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "830b7e5d4d90034032940e4ace0d9a9a057e7a45cd94e6c007832e39edb82f6d" +[[package]] +name = "value-bag" +version = "1.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7cdbaf5e132e593e9fc1de6a15bbec912395b11fb9719e061cf64f804524c503" + [[package]] name = "vcpkg" version = "0.2.15" @@ -4280,6 +4682,12 @@ dependencies = [ "smallvec", ] +[[package]] +name = "waker-fn" +version = "1.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f3c4517f54858c779bbcbf228f4fca63d121bf85fbecb2dc578cdf4a39395690" + [[package]] name = "walkdir" version = "2.4.0" @@ -4424,7 +4832,7 @@ dependencies = [ "either", "home", "once_cell", - "rustix", + "rustix 0.38.30", "windows-sys 0.48.0", ] @@ -4638,6 +5046,15 @@ dependencies = [ "tap", ] +[[package]] +name = "xattr" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d1526bbe5aaeb5eb06885f4d987bcdfa5e23187055de9b83fe00156a821fabc" +dependencies = [ + "libc", +] + [[package]] name = "xattr" version = "1.3.1" @@ -4645,8 +5062,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8da84f1a25939b27f6820d92aed108f83ff920fdf11a7b19366c27c4cda81d4f" dependencies = [ "libc", - "linux-raw-sys", - "rustix", + "linux-raw-sys 0.4.13", + "rustix 0.38.30", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index ec6573679..3eb72c42b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -19,6 +19,9 @@ license = "MIT OR Apache-2.0" [workspace.dependencies] anstream = { version = "0.6.5" } anyhow = { version = "1.0.79" } +async-compression = { version = "0.4.6" } +async-std = {version = "1.6.0" } +async-tar = { version = "0.4.2" } async_http_range_reader = { git = "https://github.com/baszalmstra/async_http_range_reader", rev = "8dab2c08ac864fec1df014465264f9a7c8eae905" } async_zip = { git = "https://github.com/charliermarsh/rs-async-zip", rev = "d76801da0943de985254fc6255c0e476b57c5836", features = ["deflate"] } base64 = { version = "0.21.7" } diff --git a/crates/puffin-build/src/lib.rs b/crates/puffin-build/src/lib.rs index 5aea85f9d..9270ddbdd 100644 --- a/crates/puffin-build/src/lib.rs +++ b/crates/puffin-build/src/lib.rs @@ -27,7 +27,6 @@ use tracing::{debug, info_span, instrument, Instrument}; use distribution_types::Resolution; use pep508_rs::Requirement; -use puffin_extract::extract_source; use puffin_interpreter::{Interpreter, Virtualenv}; use puffin_traits::{BuildContext, BuildKind, SetupPyStrategy, SourceBuildTrait}; @@ -297,8 +296,15 @@ impl SourceBuild { source.to_path_buf() } else { debug!("Unpacking for build: {}", source.display()); + let extracted = temp_dir.path().join("extracted"); - extract_source(source, &extracted) + + // Unzip the archive into the temporary directory. + puffin_extract::archive(source, &extracted) + .map_err(|err| Error::Extraction(extracted.clone(), err))?; + + // Extract the top-level directory from the archive. + puffin_extract::strip_component(&extracted) .map_err(|err| Error::Extraction(extracted.clone(), err))? }; let source_tree = if let Some(subdir) = subdirectory { diff --git a/crates/puffin-distribution/src/distribution_database.rs b/crates/puffin-distribution/src/distribution_database.rs index 3e1e0169e..b9f0e75d9 100644 --- a/crates/puffin-distribution/src/distribution_database.rs +++ b/crates/puffin-distribution/src/distribution_database.rs @@ -14,7 +14,6 @@ use distribution_types::{ use platform_tags::Tags; use puffin_cache::{Cache, CacheBucket, Timestamp, WheelCache}; use puffin_client::{CacheControl, CachedClientError, RegistryClient}; -use puffin_extract::unzip_no_seek; use puffin_fs::metadata_if_exists; use puffin_git::GitSource; use puffin_traits::{BuildContext, NoBinary}; @@ -157,7 +156,7 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context> // Download and unzip the wheel to a temporary directory. let temp_dir = tempfile::tempdir_in(self.cache.root()).map_err(Error::CacheWrite)?; - unzip_no_seek(reader.compat(), temp_dir.path()).await?; + puffin_extract::stream::unzip(reader.compat(), temp_dir.path()).await?; // Persist the temporary directory to the directory store. let archive = self @@ -215,7 +214,7 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context> // Download and unzip the wheel to a temporary directory. let temp_dir = tempfile::tempdir_in(self.cache.root()).map_err(Error::CacheWrite)?; - unzip_no_seek(reader.compat(), temp_dir.path()).await?; + puffin_extract::stream::unzip(reader.compat(), temp_dir.path()).await?; // Persist the temporary directory to the directory store. let archive = self diff --git a/crates/puffin-distribution/src/source/mod.rs b/crates/puffin-distribution/src/source/mod.rs index d427db914..f7a661bfa 100644 --- a/crates/puffin-distribution/src/source/mod.rs +++ b/crates/puffin-distribution/src/source/mod.rs @@ -8,7 +8,6 @@ use anyhow::Result; use fs_err::tokio as fs; use futures::{FutureExt, TryStreamExt}; use reqwest::Response; -use tempfile::TempDir; use tokio_util::compat::FuturesAsyncReadCompatExt; use tracing::{debug, info_span, instrument, Instrument}; use url::Url; @@ -750,60 +749,30 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> { return Ok(cache_path); } - // Download the source distribution to a temporary file. - // TODO(charlie): Unzip as we download, as with wheels. + // Download and unzip the source distribution into a temporary directory. let span = info_span!("download_source_dist", filename = filename, source_dist = %source_dist); - let download_dir = self.download_source_dist_url(response, filename).await?; - drop(span); - - // Unzip the source distribution to a temporary directory. - let span = - info_span!("extract_source_dist", filename = filename, source_dist = %source_dist); - let source_dist_dir = puffin_extract::extract_source( - download_dir.path().join(filename), - download_dir.path().join("extracted"), - )?; - drop(span); - - // Persist the unzipped distribution to the cache. - fs_err::tokio::create_dir_all(cache_path.parent().expect("Cache entry to have parent")) - .await - .map_err(Error::CacheWrite)?; - fs_err::tokio::rename(&source_dist_dir, &cache_path) - .await - .map_err(Error::CacheWrite)?; - - Ok(cache_path) - } - - /// Download a source distribution from a URL to a temporary file. - async fn download_source_dist_url( - &self, - response: Response, - source_dist_filename: &str, - ) -> Result { + let temp_dir = + tempfile::tempdir_in(self.build_context.cache().root()).map_err(Error::CacheWrite)?; let reader = response .bytes_stream() .map_err(|err| std::io::Error::new(std::io::ErrorKind::Other, err)) .into_async_read(); - let mut reader = tokio::io::BufReader::new(reader.compat()); + puffin_extract::stream::archive(reader.compat(), filename, temp_dir.path()).await?; + drop(span); - // Create a temporary directory. - let temp_dir = tempfile::tempdir_in(self.build_context.cache().root()) - .map_err(puffin_client::ErrorKind::CacheWrite)?; + // Extract the top-level directory. + let extracted = puffin_extract::strip_component(temp_dir.path())?; - // Download the source distribution to a temporary file. - let mut writer = tokio::io::BufWriter::new( - fs_err::tokio::File::create(temp_dir.path().join(source_dist_filename)) - .await - .map_err(puffin_client::ErrorKind::CacheWrite)?, - ); - tokio::io::copy(&mut reader, &mut writer) + // Persist it to the cache. + fs_err::tokio::create_dir_all(cache_path.parent().expect("Cache entry to have parent")) .await - .map_err(puffin_client::ErrorKind::CacheWrite)?; + .map_err(Error::CacheWrite)?; + fs_err::tokio::rename(extracted, &cache_path) + .await + .map_err(Error::CacheWrite)?; - Ok(temp_dir) + Ok(cache_path) } /// Download a source distribution from a Git repository. diff --git a/crates/puffin-distribution/src/unzip.rs b/crates/puffin-distribution/src/unzip.rs index 37a8f6843..2cf933988 100644 --- a/crates/puffin-distribution/src/unzip.rs +++ b/crates/puffin-distribution/src/unzip.rs @@ -1,6 +1,6 @@ use std::path::Path; -use puffin_extract::{unzip_archive, Error}; +use puffin_extract::Error; use crate::download::BuiltWheel; use crate::{DiskWheel, LocalWheel}; @@ -12,13 +12,13 @@ pub trait Unzip { impl Unzip for DiskWheel { fn unzip(&self, target: &Path) -> Result<(), Error> { - unzip_archive(fs_err::File::open(&self.path)?, target) + puffin_extract::unzip(fs_err::File::open(&self.path)?, target) } } impl Unzip for BuiltWheel { fn unzip(&self, target: &Path) -> Result<(), Error> { - unzip_archive(fs_err::File::open(&self.path)?, target) + puffin_extract::unzip(fs_err::File::open(&self.path)?, target) } } diff --git a/crates/puffin-extract/Cargo.toml b/crates/puffin-extract/Cargo.toml index d034f102b..214ba2679 100644 --- a/crates/puffin-extract/Cargo.toml +++ b/crates/puffin-extract/Cargo.toml @@ -13,6 +13,10 @@ license = { workspace = true } workspace = true [dependencies] +async-compression = { workspace = true, features = ["gzip"] } +# See: https://github.com/dignifiedquire/async-tar/pull/35 +async-std = { workspace = true, features = ["unstable"] } +async-tar = { workspace = true } async_zip = { workspace = true, features = ["tokio"] } flate2 = { workspace = true } fs-err = { workspace = true, features = ["tokio"] } diff --git a/crates/puffin-extract/src/error.rs b/crates/puffin-extract/src/error.rs new file mode 100644 index 000000000..c1d2ce641 --- /dev/null +++ b/crates/puffin-extract/src/error.rs @@ -0,0 +1,19 @@ +use std::path::PathBuf; + +use zip::result::ZipError; + +#[derive(Debug, thiserror::Error)] +pub enum Error { + #[error(transparent)] + Zip(#[from] ZipError), + #[error(transparent)] + AsyncZip(#[from] async_zip::error::ZipError), + #[error(transparent)] + Io(#[from] std::io::Error), + #[error("Unsupported archive type: {0}")] + UnsupportedArchive(PathBuf), + #[error( + "The top level of the archive must only contain a list directory, but it contains: {0:?}" + )] + InvalidArchive(Vec), +} diff --git a/crates/puffin-extract/src/lib.rs b/crates/puffin-extract/src/lib.rs index b5ecaff23..d9b25aeb8 100644 --- a/crates/puffin-extract/src/lib.rs +++ b/crates/puffin-extract/src/lib.rs @@ -1,223 +1,7 @@ -use std::fs::OpenOptions; -use std::path::{Path, PathBuf}; -use std::sync::Mutex; - -use rayon::prelude::*; -use rustc_hash::FxHashSet; -use tokio_util::compat::{FuturesAsyncReadCompatExt, TokioAsyncReadCompatExt}; -use zip::result::ZipError; -use zip::ZipArchive; - -pub use crate::vendor::{CloneableSeekableReader, HasLength}; +pub use error::Error; +pub use sync::*; +mod error; +pub mod stream; +mod sync; mod vendor; - -#[derive(Debug, thiserror::Error)] -pub enum Error { - #[error(transparent)] - Zip(#[from] ZipError), - #[error(transparent)] - AsyncZip(#[from] async_zip::error::ZipError), - #[error(transparent)] - Io(#[from] std::io::Error), - #[error("Unsupported archive type: {0}")] - UnsupportedArchive(PathBuf), - #[error( - "The top level of the archive must only contain a list directory, but it contains: {0:?}" - )] - InvalidArchive(Vec), -} - -/// Unzip a `.zip` archive into the target directory without requiring Seek. -/// -/// This is useful for unzipping files as they're being downloaded. If the archive -/// is already fully on disk, consider using `unzip_archive`, which can use multiple -/// threads to work faster in that case. -pub async fn unzip_no_seek( - reader: R, - target: &Path, -) -> Result<(), Error> { - let mut reader = reader.compat(); - let mut zip = async_zip::base::read::stream::ZipFileReader::new(&mut reader); - - let mut directories = FxHashSet::default(); - - while let Some(mut entry) = zip.next_with_entry().await? { - // Construct the (expected) path to the file on-disk. - let path = entry.reader().entry().filename().as_str()?; - let path = target.join(path); - let is_dir = entry.reader().entry().dir()?; - - // Either create the directory or write the file to disk. - if is_dir { - if directories.insert(path.clone()) { - fs_err::tokio::create_dir_all(path).await?; - } - } else { - if let Some(parent) = path.parent() { - if directories.insert(parent.to_path_buf()) { - fs_err::tokio::create_dir_all(parent).await?; - } - } - - let file = fs_err::tokio::File::create(path).await?; - let mut writer = - if let Ok(size) = usize::try_from(entry.reader().entry().uncompressed_size()) { - tokio::io::BufWriter::with_capacity(size, file) - } else { - tokio::io::BufWriter::new(file) - }; - let mut reader = entry.reader_mut().compat(); - tokio::io::copy(&mut reader, &mut writer).await?; - } - - // Close current file to get access to the next one. See docs: - // https://docs.rs/async_zip/0.0.16/async_zip/base/read/stream/ - zip = entry.skip().await?; - } - - // On Unix, we need to set file permissions, which are stored in the central directory, at the - // end of the archive. The `ZipFileReader` reads until it sees a central directory signature, - // which indicates the first entry in the central directory. So we continue reading from there. - #[cfg(unix)] - { - use std::fs::Permissions; - use std::os::unix::fs::PermissionsExt; - - // To avoid lots of small reads to `reader` when parsing the central directory, wrap it in - // a buffer. - let mut buf = futures::io::BufReader::new(reader); - let mut directory = async_zip::base::read::cd::CentralDirectoryReader::new(&mut buf); - while let Some(entry) = directory.next().await? { - if entry.dir()? { - continue; - } - - // Construct the (expected) path to the file on-disk. - let path = entry.filename().as_str()?; - let path = target.join(path); - - if let Some(mode) = entry.unix_permissions() { - fs_err::set_permissions(&path, Permissions::from_mode(mode))?; - } - } - } - - Ok(()) -} - -/// Unzip a `.zip` archive into the target directory. -pub fn unzip_archive( - reader: R, - target: &Path, -) -> Result<(), Error> { - // Unzip in parallel. - let archive = ZipArchive::new(CloneableSeekableReader::new(reader))?; - let directories = Mutex::new(FxHashSet::default()); - (0..archive.len()) - .par_bridge() - .map(|file_number| { - let mut archive = archive.clone(); - let mut file = archive.by_index(file_number)?; - - // Determine the path of the file within the wheel. - let Some(enclosed_name) = file.enclosed_name() else { - return Ok(()); - }; - - // Create necessary parent directories. - let path = target.join(enclosed_name); - if file.is_dir() { - let mut directories = directories.lock().unwrap(); - if directories.insert(path.clone()) { - fs_err::create_dir_all(path)?; - } - return Ok(()); - } - - if let Some(parent) = path.parent() { - let mut directories = directories.lock().unwrap(); - if directories.insert(parent.to_path_buf()) { - fs_err::create_dir_all(parent)?; - } - } - - // Create the file, with the correct permissions (on Unix). - let mut options = OpenOptions::new(); - options.write(true); - options.create_new(true); - - #[cfg(unix)] - { - use std::os::unix::fs::OpenOptionsExt; - - if let Some(mode) = file.unix_mode() { - options.mode(mode); - } - } - - // Copy the file contents. - let mut outfile = options.open(&path)?; - std::io::copy(&mut file, &mut outfile)?; - - Ok(()) - }) - .collect::>() -} - -/// Extract a `.zip` or `.tar.gz` archive into the target directory. -pub fn extract_archive(source: impl AsRef, target: impl AsRef) -> Result<(), Error> { - // .zip - if source - .as_ref() - .extension() - .is_some_and(|ext| ext.eq_ignore_ascii_case("zip")) - { - unzip_archive(fs_err::File::open(source.as_ref())?, target.as_ref())?; - return Ok(()); - } - - // .tar.gz - if source - .as_ref() - .extension() - .is_some_and(|ext| ext.eq_ignore_ascii_case("gz")) - { - if source.as_ref().file_stem().is_some_and(|stem| { - Path::new(stem) - .extension() - .is_some_and(|ext| ext.eq_ignore_ascii_case("tar")) - }) { - let mut archive = tar::Archive::new(flate2::read::GzDecoder::new(fs_err::File::open( - source.as_ref(), - )?)); - // https://github.com/alexcrichton/tar-rs/issues/349 - archive.set_preserve_mtime(false); - archive.unpack(target)?; - return Ok(()); - } - } - - Err(Error::UnsupportedArchive(source.as_ref().to_path_buf())) -} - -/// Extract a source distribution into the target directory. -/// -/// Returns the path to the top-level directory of the source distribution. -pub fn extract_source( - source: impl AsRef, - target: impl AsRef, -) -> Result { - extract_archive(&source, &target)?; - - // > A .tar.gz source distribution (sdist) contains a single top-level directory called - // > `{name}-{version}` (e.g. foo-1.0), containing the source files of the package. - // TODO(konstin): Verify the name of the directory. - let top_level = - fs_err::read_dir(target.as_ref())?.collect::>>()?; - let [root] = top_level.as_slice() else { - return Err(Error::InvalidArchive(top_level)); - }; - - Ok(root.path()) -} diff --git a/crates/puffin-extract/src/stream.rs b/crates/puffin-extract/src/stream.rs new file mode 100644 index 000000000..2a2c813ff --- /dev/null +++ b/crates/puffin-extract/src/stream.rs @@ -0,0 +1,133 @@ +use std::path::Path; + +use rustc_hash::FxHashSet; +use tokio_util::compat::{FuturesAsyncReadCompatExt, TokioAsyncReadCompatExt}; + +use crate::Error; + +/// Unzip a `.zip` archive into the target directory, without requiring `Seek`. +/// +/// This is useful for unzipping files as they're being downloaded. If the archive +/// is already fully on disk, consider using `unzip_archive`, which can use multiple +/// threads to work faster in that case. +pub async fn unzip( + reader: R, + target: impl AsRef, +) -> Result<(), Error> { + let mut reader = reader.compat(); + let mut zip = async_zip::base::read::stream::ZipFileReader::new(&mut reader); + + let mut directories = FxHashSet::default(); + + while let Some(mut entry) = zip.next_with_entry().await? { + // Construct the (expected) path to the file on-disk. + let path = entry.reader().entry().filename().as_str()?; + let path = target.as_ref().join(path); + let is_dir = entry.reader().entry().dir()?; + + // Either create the directory or write the file to disk. + if is_dir { + if directories.insert(path.clone()) { + fs_err::tokio::create_dir_all(path).await?; + } + } else { + if let Some(parent) = path.parent() { + if directories.insert(parent.to_path_buf()) { + fs_err::tokio::create_dir_all(parent).await?; + } + } + + let file = fs_err::tokio::File::create(path).await?; + let mut writer = + if let Ok(size) = usize::try_from(entry.reader().entry().uncompressed_size()) { + tokio::io::BufWriter::with_capacity(size, file) + } else { + tokio::io::BufWriter::new(file) + }; + let mut reader = entry.reader_mut().compat(); + tokio::io::copy(&mut reader, &mut writer).await?; + } + + // Close current file to get access to the next one. See docs: + // https://docs.rs/async_zip/0.0.16/async_zip/base/read/stream/ + zip = entry.skip().await?; + } + + // On Unix, we need to set file permissions, which are stored in the central directory, at the + // end of the archive. The `ZipFileReader` reads until it sees a central directory signature, + // which indicates the first entry in the central directory. So we continue reading from there. + #[cfg(unix)] + { + use std::fs::Permissions; + use std::os::unix::fs::PermissionsExt; + + // To avoid lots of small reads to `reader` when parsing the central directory, wrap it in + // a buffer. + let mut buf = futures::io::BufReader::new(reader); + let mut directory = async_zip::base::read::cd::CentralDirectoryReader::new(&mut buf); + while let Some(entry) = directory.next().await? { + if entry.dir()? { + continue; + } + + // Construct the (expected) path to the file on-disk. + let path = entry.filename().as_str()?; + let path = target.as_ref().join(path); + + if let Some(mode) = entry.unix_permissions() { + fs_err::set_permissions(&path, Permissions::from_mode(mode))?; + } + } + } + + Ok(()) +} + +/// Unzip a `.tar.gz` archive into the target directory, without requiring `Seek`. +/// +/// This is useful for unpacking files as they're being downloaded. +pub async fn untar( + reader: R, + target: impl AsRef, +) -> Result<(), Error> { + let decompressed_bytes = async_compression::futures::bufread::GzipDecoder::new(reader.compat()); + let archive = async_tar::ArchiveBuilder::new(decompressed_bytes) + .set_preserve_permissions(false) + .build(); + Ok(archive.unpack(target.as_ref()).await?) +} + +/// Unzip a `.zip` or `.tar.gz` archive into the target directory, without requiring `Seek`. +pub async fn archive( + reader: R, + source: impl AsRef, + target: impl AsRef, +) -> Result<(), Error> { + // `.zip` + if source + .as_ref() + .extension() + .is_some_and(|ext| ext.eq_ignore_ascii_case("zip")) + { + unzip(reader, target).await?; + return Ok(()); + } + + // `.tar.gz` + if source + .as_ref() + .extension() + .is_some_and(|ext| ext.eq_ignore_ascii_case("gz")) + { + if source.as_ref().file_stem().is_some_and(|stem| { + Path::new(stem) + .extension() + .is_some_and(|ext| ext.eq_ignore_ascii_case("tar")) + }) { + untar(reader, target).await?; + return Ok(()); + } + } + + Err(Error::UnsupportedArchive(source.as_ref().to_path_buf())) +} diff --git a/crates/puffin-extract/src/sync.rs b/crates/puffin-extract/src/sync.rs new file mode 100644 index 000000000..6c8a15a99 --- /dev/null +++ b/crates/puffin-extract/src/sync.rs @@ -0,0 +1,122 @@ +use std::fs::OpenOptions; +use std::path::{Path, PathBuf}; +use std::sync::Mutex; + +use rayon::prelude::*; +use rustc_hash::FxHashSet; +use zip::ZipArchive; + +use crate::vendor::{CloneableSeekableReader, HasLength}; +use crate::Error; + +/// Unzip a `.zip` archive into the target directory. +pub fn unzip( + reader: R, + target: &Path, +) -> Result<(), Error> { + // Unzip in parallel. + let archive = ZipArchive::new(CloneableSeekableReader::new(reader))?; + let directories = Mutex::new(FxHashSet::default()); + (0..archive.len()) + .par_bridge() + .map(|file_number| { + let mut archive = archive.clone(); + let mut file = archive.by_index(file_number)?; + + // Determine the path of the file within the wheel. + let Some(enclosed_name) = file.enclosed_name() else { + return Ok(()); + }; + + // Create necessary parent directories. + let path = target.join(enclosed_name); + if file.is_dir() { + let mut directories = directories.lock().unwrap(); + if directories.insert(path.clone()) { + fs_err::create_dir_all(path)?; + } + return Ok(()); + } + + if let Some(parent) = path.parent() { + let mut directories = directories.lock().unwrap(); + if directories.insert(parent.to_path_buf()) { + fs_err::create_dir_all(parent)?; + } + } + + // Create the file, with the correct permissions (on Unix). + let mut options = OpenOptions::new(); + options.write(true); + options.create_new(true); + + #[cfg(unix)] + { + use std::os::unix::fs::OpenOptionsExt; + + if let Some(mode) = file.unix_mode() { + options.mode(mode); + } + } + + // Copy the file contents. + let mut outfile = options.open(&path)?; + std::io::copy(&mut file, &mut outfile)?; + + Ok(()) + }) + .collect::>() +} + +/// Extract a `.zip` or `.tar.gz` archive into the target directory. +pub fn archive(source: impl AsRef, target: impl AsRef) -> Result<(), Error> { + // `.zip` + if source + .as_ref() + .extension() + .is_some_and(|ext| ext.eq_ignore_ascii_case("zip")) + { + unzip(fs_err::File::open(source.as_ref())?, target.as_ref())?; + return Ok(()); + } + + // `.tar.gz` + if source + .as_ref() + .extension() + .is_some_and(|ext| ext.eq_ignore_ascii_case("gz")) + { + if source.as_ref().file_stem().is_some_and(|stem| { + Path::new(stem) + .extension() + .is_some_and(|ext| ext.eq_ignore_ascii_case("tar")) + }) { + let mut archive = tar::Archive::new(flate2::read::GzDecoder::new(fs_err::File::open( + source.as_ref(), + )?)); + // https://github.com/alexcrichton/tar-rs/issues/349 + archive.set_preserve_mtime(false); + archive.unpack(target)?; + return Ok(()); + } + } + + Err(Error::UnsupportedArchive(source.as_ref().to_path_buf())) +} + +/// Extract the top-level directory from an unpacked archive. +/// +/// The specification says: +/// > A .tar.gz source distribution (sdist) contains a single top-level directory called +/// > `{name}-{version}` (e.g. foo-1.0), containing the source files of the package. +/// +/// This function returns the path to that top-level directory. +pub fn strip_component(source: impl AsRef) -> Result { + // TODO(konstin): Verify the name of the directory. + let top_level = + fs_err::read_dir(source.as_ref())?.collect::>>()?; + let [root] = top_level.as_slice() else { + return Err(Error::InvalidArchive(top_level)); + }; + Ok(root.path()) +} diff --git a/crates/puffin-extract/src/vendor/cloneable_seekable_reader.rs b/crates/puffin-extract/src/vendor/cloneable_seekable_reader.rs index 9048ac21e..b2c58e07f 100644 --- a/crates/puffin-extract/src/vendor/cloneable_seekable_reader.rs +++ b/crates/puffin-extract/src/vendor/cloneable_seekable_reader.rs @@ -26,7 +26,7 @@ pub trait HasLength { /// and thus can be cloned cheaply. It supports seeking; each cloned instance /// maintains its own pointer into the file, and the underlying instance /// is seeked prior to each read. -pub struct CloneableSeekableReader { +pub(crate) struct CloneableSeekableReader { file: Arc>, pos: u64, // TODO determine and store this once instead of per cloneable file diff --git a/crates/puffin-extract/src/vendor/mod.rs b/crates/puffin-extract/src/vendor/mod.rs index 22f8fc671..3148e2edd 100644 --- a/crates/puffin-extract/src/vendor/mod.rs +++ b/crates/puffin-extract/src/vendor/mod.rs @@ -1,3 +1,3 @@ -pub use cloneable_seekable_reader::{CloneableSeekableReader, HasLength}; +pub(crate) use cloneable_seekable_reader::{CloneableSeekableReader, HasLength}; mod cloneable_seekable_reader; diff --git a/crates/puffin/tests/pip_compile.rs b/crates/puffin/tests/pip_compile.rs index cb032c0d6..75bbd2f74 100644 --- a/crates/puffin/tests/pip_compile.rs +++ b/crates/puffin/tests/pip_compile.rs @@ -1617,7 +1617,7 @@ fn disallowed_transitive_url_dependency() -> Result<()> { let venv = create_venv_py312(&temp_dir, &cache_dir); let requirements_in = temp_dir.child("requirements.in"); - requirements_in.write_str("transitive_url_dependency @ https://github.com/astral-sh/ruff/files/13257454/transitive_url_dependency.zip")?; + requirements_in.write_str("transitive_url_dependency @ https://github.com/astral-sh/ruff/files/14078476/transitive_url_dependency.zip")?; insta::with_settings!({ filters => INSTA_FILTERS.to_vec() @@ -1654,7 +1654,7 @@ fn allowed_transitive_url_dependency() -> Result<()> { let venv = create_venv_py312(&temp_dir, &cache_dir); let requirements_in = temp_dir.child("requirements.in"); - requirements_in.write_str("transitive_url_dependency @ https://github.com/astral-sh/ruff/files/13257454/transitive_url_dependency.zip")?; + requirements_in.write_str("transitive_url_dependency @ https://github.com/astral-sh/ruff/files/14078476/transitive_url_dependency.zip")?; let constraints_txt = temp_dir.child("constraints.txt"); constraints_txt.write_str("werkzeug @ git+https://github.com/pallets/werkzeug@2.0.0")?; @@ -1679,7 +1679,7 @@ fn allowed_transitive_url_dependency() -> Result<()> { ----- stdout ----- # This file was autogenerated by Puffin v[VERSION] via the following command: # puffin pip compile requirements.in --constraint constraints.txt --cache-dir [CACHE_DIR] - transitive-url-dependency @ https://github.com/astral-sh/ruff/files/13257454/transitive_url_dependency.zip + transitive-url-dependency @ https://github.com/astral-sh/ruff/files/14078476/transitive_url_dependency.zip werkzeug @ git+https://github.com/pallets/werkzeug@af160e0b6b7ddd81c22f1652c728ff5ac72d5c74 # via transitive-url-dependency @@ -1702,7 +1702,7 @@ fn allowed_transitive_canonical_url_dependency() -> Result<()> { let venv = create_venv_py312(&temp_dir, &cache_dir); let requirements_in = temp_dir.child("requirements.in"); - requirements_in.write_str("transitive_url_dependency @ https://github.com/astral-sh/ruff/files/13257454/transitive_url_dependency.zip")?; + requirements_in.write_str("transitive_url_dependency @ https://github.com/astral-sh/ruff/files/14078476/transitive_url_dependency.zip")?; let constraints_txt = temp_dir.child("constraints.txt"); constraints_txt.write_str("werkzeug @ git+https://github.com/pallets/werkzeug.git@2.0.0")?; @@ -1727,7 +1727,7 @@ fn allowed_transitive_canonical_url_dependency() -> Result<()> { ----- stdout ----- # This file was autogenerated by Puffin v[VERSION] via the following command: # puffin pip compile requirements.in --constraint constraints.txt --cache-dir [CACHE_DIR] - transitive-url-dependency @ https://github.com/astral-sh/ruff/files/13257454/transitive_url_dependency.zip + transitive-url-dependency @ https://github.com/astral-sh/ruff/files/14078476/transitive_url_dependency.zip werkzeug @ git+https://github.com/pallets/werkzeug@af160e0b6b7ddd81c22f1652c728ff5ac72d5c74 # via transitive-url-dependency