mirror of
https://github.com/astral-sh/uv.git
synced 2025-10-28 18:54:10 +00:00
Stream unpacking of source distribution downloads (#1157)
This PR migrates our source distribution downloads to unzip as we
stream, similar to our approach for wheels.
In my testing, this showed a consistent speedup (e.g., 6% here for a few
representative source distributions):
```text
❯ python -m scripts.bench --puffin-path ./target/release/main --puffin-path ./target/release/puffin --benchmark install-cold requirements.in
Benchmark 1: ./target/release/main (install-cold)
Time (mean ± σ): 1.503 s ± 0.039 s [User: 1.479 s, System: 0.537 s]
Range (min … max): 1.466 s … 1.605 s 10 runs
Benchmark 2: ./target/release/puffin (install-cold)
Time (mean ± σ): 1.421 s ± 0.024 s [User: 1.505 s, System: 0.593 s]
Range (min … max): 1.381 s … 1.454 s 10 runs
Summary
'./target/release/puffin (install-cold)' ran
1.06 ± 0.03 times faster than './target/release/main (install-cold)'
```
This commit is contained in:
parent
5219d37250
commit
d88ce76979
14 changed files with 750 additions and 294 deletions
443
Cargo.lock
generated
443
Cargo.lock
generated
|
|
@ -178,6 +178,30 @@ dependencies = [
|
|||
"tempfile",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "async-channel"
|
||||
version = "1.9.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "81953c529336010edd6d8e358f886d9581267795c61b19475b71314bffa46d35"
|
||||
dependencies = [
|
||||
"concurrent-queue",
|
||||
"event-listener 2.5.3",
|
||||
"futures-core",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "async-channel"
|
||||
version = "2.1.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1ca33f4bc4ed1babef42cad36cc1f51fa88be00420404e5b1e80ab1b18f7678c"
|
||||
dependencies = [
|
||||
"concurrent-queue",
|
||||
"event-listener 4.0.3",
|
||||
"event-listener-strategy",
|
||||
"futures-core",
|
||||
"pin-project-lite",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "async-compression"
|
||||
version = "0.4.6"
|
||||
|
|
@ -193,6 +217,176 @@ dependencies = [
|
|||
"tokio",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "async-executor"
|
||||
version = "1.8.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "17ae5ebefcc48e7452b4987947920dac9450be1110cadf34d1b8c116bdbaf97c"
|
||||
dependencies = [
|
||||
"async-lock 3.3.0",
|
||||
"async-task",
|
||||
"concurrent-queue",
|
||||
"fastrand 2.0.1",
|
||||
"futures-lite 2.2.0",
|
||||
"slab",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "async-global-executor"
|
||||
version = "2.4.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "05b1b633a2115cd122d73b955eadd9916c18c8f510ec9cd1686404c60ad1c29c"
|
||||
dependencies = [
|
||||
"async-channel 2.1.1",
|
||||
"async-executor",
|
||||
"async-io 2.3.0",
|
||||
"async-lock 3.3.0",
|
||||
"blocking",
|
||||
"futures-lite 2.2.0",
|
||||
"once_cell",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "async-io"
|
||||
version = "1.13.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0fc5b45d93ef0529756f812ca52e44c221b35341892d3dcc34132ac02f3dd2af"
|
||||
dependencies = [
|
||||
"async-lock 2.8.0",
|
||||
"autocfg",
|
||||
"cfg-if 1.0.0",
|
||||
"concurrent-queue",
|
||||
"futures-lite 1.13.0",
|
||||
"log",
|
||||
"parking",
|
||||
"polling 2.8.0",
|
||||
"rustix 0.37.27",
|
||||
"slab",
|
||||
"socket2 0.4.10",
|
||||
"waker-fn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "async-io"
|
||||
version = "2.3.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fb41eb19024a91746eba0773aa5e16036045bbf45733766661099e182ea6a744"
|
||||
dependencies = [
|
||||
"async-lock 3.3.0",
|
||||
"cfg-if 1.0.0",
|
||||
"concurrent-queue",
|
||||
"futures-io",
|
||||
"futures-lite 2.2.0",
|
||||
"parking",
|
||||
"polling 3.3.2",
|
||||
"rustix 0.38.30",
|
||||
"slab",
|
||||
"tracing",
|
||||
"windows-sys 0.52.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "async-lock"
|
||||
version = "2.8.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "287272293e9d8c41773cec55e365490fe034813a2f172f502d6ddcf75b2f582b"
|
||||
dependencies = [
|
||||
"event-listener 2.5.3",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "async-lock"
|
||||
version = "3.3.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d034b430882f8381900d3fe6f0aaa3ad94f2cb4ac519b429692a1bc2dda4ae7b"
|
||||
dependencies = [
|
||||
"event-listener 4.0.3",
|
||||
"event-listener-strategy",
|
||||
"pin-project-lite",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "async-process"
|
||||
version = "1.8.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ea6438ba0a08d81529c69b36700fa2f95837bfe3e776ab39cde9c14d9149da88"
|
||||
dependencies = [
|
||||
"async-io 1.13.0",
|
||||
"async-lock 2.8.0",
|
||||
"async-signal",
|
||||
"blocking",
|
||||
"cfg-if 1.0.0",
|
||||
"event-listener 3.1.0",
|
||||
"futures-lite 1.13.0",
|
||||
"rustix 0.38.30",
|
||||
"windows-sys 0.48.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "async-signal"
|
||||
version = "0.2.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1079d27511f6c038736279421774ef4ad4bdd2e300825f4a48c4cc463a57cedf"
|
||||
dependencies = [
|
||||
"async-io 1.13.0",
|
||||
"async-lock 2.8.0",
|
||||
"atomic-waker",
|
||||
"cfg-if 1.0.0",
|
||||
"futures-core",
|
||||
"futures-io",
|
||||
"rustix 0.38.30",
|
||||
"signal-hook-registry",
|
||||
"slab",
|
||||
"windows-sys 0.48.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "async-std"
|
||||
version = "1.12.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "62565bb4402e926b29953c785397c6dc0391b7b446e45008b0049eb43cec6f5d"
|
||||
dependencies = [
|
||||
"async-channel 1.9.0",
|
||||
"async-global-executor",
|
||||
"async-io 1.13.0",
|
||||
"async-lock 2.8.0",
|
||||
"async-process",
|
||||
"crossbeam-utils",
|
||||
"futures-channel",
|
||||
"futures-core",
|
||||
"futures-io",
|
||||
"futures-lite 1.13.0",
|
||||
"gloo-timers",
|
||||
"kv-log-macro",
|
||||
"log",
|
||||
"memchr",
|
||||
"once_cell",
|
||||
"pin-project-lite",
|
||||
"pin-utils",
|
||||
"slab",
|
||||
"wasm-bindgen-futures",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "async-tar"
|
||||
version = "0.4.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5c49359998a76e32ef6e870dbc079ebad8f1e53e8441c5dd39d27b44493fe331"
|
||||
dependencies = [
|
||||
"async-std",
|
||||
"filetime",
|
||||
"libc",
|
||||
"pin-project",
|
||||
"redox_syscall 0.2.16",
|
||||
"xattr 0.2.3",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "async-task"
|
||||
version = "4.7.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fbb36e985947064623dbd357f727af08ffd077f93d696782f3c56365fa2e2799"
|
||||
|
||||
[[package]]
|
||||
name = "async-trait"
|
||||
version = "0.1.77"
|
||||
|
|
@ -229,13 +423,19 @@ source = "git+https://github.com/charliermarsh/rs-async-zip?rev=d76801da0943de98
|
|||
dependencies = [
|
||||
"async-compression",
|
||||
"crc32fast",
|
||||
"futures-lite",
|
||||
"futures-lite 2.2.0",
|
||||
"pin-project",
|
||||
"thiserror",
|
||||
"tokio",
|
||||
"tokio-util",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "atomic-waker"
|
||||
version = "1.1.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1505bd5d3d116872e7271a6d4e16d81d0c8570876c8de68093a09ac269d8aac0"
|
||||
|
||||
[[package]]
|
||||
name = "autocfg"
|
||||
version = "1.1.0"
|
||||
|
|
@ -326,6 +526,22 @@ dependencies = [
|
|||
"generic-array",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "blocking"
|
||||
version = "1.5.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6a37913e8dc4ddcc604f0c6d3bf2887c995153af3611de9e23c352b44c1b9118"
|
||||
dependencies = [
|
||||
"async-channel 2.1.1",
|
||||
"async-lock 3.3.0",
|
||||
"async-task",
|
||||
"fastrand 2.0.1",
|
||||
"futures-io",
|
||||
"futures-lite 2.2.0",
|
||||
"piper",
|
||||
"tracing",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "brotli"
|
||||
version = "3.4.0"
|
||||
|
|
@ -582,6 +798,15 @@ version = "1.0.0"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "acbf1af155f9b9ef647e42cdc158db4b64a1b61f743629225fde6f3e0be2a7c7"
|
||||
|
||||
[[package]]
|
||||
name = "concurrent-queue"
|
||||
version = "2.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d16048cd947b08fa32c24458a22f5dc5e835264f689f4f5653210c69fd107363"
|
||||
dependencies = [
|
||||
"crossbeam-utils",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "configparser"
|
||||
version = "3.0.4"
|
||||
|
|
@ -928,6 +1153,53 @@ dependencies = [
|
|||
"windows-sys 0.52.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "event-listener"
|
||||
version = "2.5.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0206175f82b8d6bf6652ff7d71a1e27fd2e4efde587fd368662814d6ec1d9ce0"
|
||||
|
||||
[[package]]
|
||||
name = "event-listener"
|
||||
version = "3.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d93877bcde0eb80ca09131a08d23f0a5c18a620b01db137dba666d18cd9b30c2"
|
||||
dependencies = [
|
||||
"concurrent-queue",
|
||||
"parking",
|
||||
"pin-project-lite",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "event-listener"
|
||||
version = "4.0.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "67b215c49b2b248c855fb73579eb1f4f26c38ffdc12973e20e07b91d78d5646e"
|
||||
dependencies = [
|
||||
"concurrent-queue",
|
||||
"parking",
|
||||
"pin-project-lite",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "event-listener-strategy"
|
||||
version = "0.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "958e4d70b6d5e81971bebec42271ec641e7ff4e170a6fa605f2b8a8b65cb97d3"
|
||||
dependencies = [
|
||||
"event-listener 4.0.3",
|
||||
"pin-project-lite",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "fastrand"
|
||||
version = "1.9.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e51093e27b0797c359783294ca4f0a911c270184cb10f85783b118614a1501be"
|
||||
dependencies = [
|
||||
"instant",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "fastrand"
|
||||
version = "2.0.1"
|
||||
|
|
@ -1061,13 +1333,28 @@ version = "0.3.30"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a44623e20b9681a318efdd71c299b6b222ed6f231972bfe2f224ebad6311f0c1"
|
||||
|
||||
[[package]]
|
||||
name = "futures-lite"
|
||||
version = "1.13.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "49a9d51ce47660b1e808d3c990b4709f2f415d928835a17dfd16991515c46bce"
|
||||
dependencies = [
|
||||
"fastrand 1.9.0",
|
||||
"futures-core",
|
||||
"futures-io",
|
||||
"memchr",
|
||||
"parking",
|
||||
"pin-project-lite",
|
||||
"waker-fn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "futures-lite"
|
||||
version = "2.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "445ba825b27408685aaecefd65178908c36c6e96aaf6d8599419d46e624192ba"
|
||||
dependencies = [
|
||||
"fastrand",
|
||||
"fastrand 2.0.1",
|
||||
"futures-core",
|
||||
"futures-io",
|
||||
"parking",
|
||||
|
|
@ -1189,6 +1476,18 @@ dependencies = [
|
|||
"walkdir",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "gloo-timers"
|
||||
version = "0.2.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9b995a66bb87bebce9a0f4a95aed01daca4872c050bfcb21653361c03bc35e5c"
|
||||
dependencies = [
|
||||
"futures-channel",
|
||||
"futures-core",
|
||||
"js-sys",
|
||||
"wasm-bindgen",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "goblin"
|
||||
version = "0.8.0"
|
||||
|
|
@ -1385,7 +1684,7 @@ dependencies = [
|
|||
"httpdate",
|
||||
"itoa",
|
||||
"pin-project-lite",
|
||||
"socket2",
|
||||
"socket2 0.5.5",
|
||||
"tokio",
|
||||
"tower-service",
|
||||
"tracing",
|
||||
|
|
@ -1574,6 +1873,17 @@ dependencies = [
|
|||
"web-sys",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "io-lifetimes"
|
||||
version = "1.0.11"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "eae7b9aee968036d54dce06cebaefd919e4472e753296daccd6d344e3e2df0c2"
|
||||
dependencies = [
|
||||
"hermit-abi",
|
||||
"libc",
|
||||
"windows-sys 0.48.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ipnet"
|
||||
version = "2.9.0"
|
||||
|
|
@ -1587,7 +1897,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|||
checksum = "0bad00257d07be169d870ab665980b06cdb366d792ad690bf2e76876dc503455"
|
||||
dependencies = [
|
||||
"hermit-abi",
|
||||
"rustix",
|
||||
"rustix 0.38.30",
|
||||
"windows-sys 0.52.0",
|
||||
]
|
||||
|
||||
|
|
@ -1658,6 +1968,15 @@ dependencies = [
|
|||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "kv-log-macro"
|
||||
version = "1.0.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0de8b303297635ad57c9f5059fd9cee7a47f8e8daa09df0fcd07dd39fb22977f"
|
||||
dependencies = [
|
||||
"log",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "lazy_static"
|
||||
version = "1.4.0"
|
||||
|
|
@ -1756,6 +2075,12 @@ version = "0.5.6"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0717cef1bc8b636c6e1c1bbdefc09e6322da8a9321966e8928ef80d20f7f770f"
|
||||
|
||||
[[package]]
|
||||
name = "linux-raw-sys"
|
||||
version = "0.3.8"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ef53942eb7bf7ff43a617b3e2c1c4a5ecf5944a7c1bc12d7ee39bbb15e5c1519"
|
||||
|
||||
[[package]]
|
||||
name = "linux-raw-sys"
|
||||
version = "0.4.13"
|
||||
|
|
@ -1777,6 +2102,9 @@ name = "log"
|
|||
version = "0.4.20"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f"
|
||||
dependencies = [
|
||||
"value-bag",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "mailparse"
|
||||
|
|
@ -2199,6 +2527,17 @@ version = "0.1.0"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184"
|
||||
|
||||
[[package]]
|
||||
name = "piper"
|
||||
version = "0.2.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "668d31b1c4eba19242f2088b2bf3316b82ca31082a8335764db4e083db7485d4"
|
||||
dependencies = [
|
||||
"atomic-waker",
|
||||
"fastrand 2.0.1",
|
||||
"futures-io",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pkg-config"
|
||||
version = "0.3.29"
|
||||
|
|
@ -2260,6 +2599,36 @@ dependencies = [
|
|||
"time",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "polling"
|
||||
version = "2.8.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4b2d323e8ca7996b3e23126511a523f7e62924d93ecd5ae73b333815b0eb3dce"
|
||||
dependencies = [
|
||||
"autocfg",
|
||||
"bitflags 1.3.2",
|
||||
"cfg-if 1.0.0",
|
||||
"concurrent-queue",
|
||||
"libc",
|
||||
"log",
|
||||
"pin-project-lite",
|
||||
"windows-sys 0.48.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "polling"
|
||||
version = "3.3.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "545c980a3880efd47b2e262f6a4bb6daad6555cf3367aa9c4e52895f69537a41"
|
||||
dependencies = [
|
||||
"cfg-if 1.0.0",
|
||||
"concurrent-queue",
|
||||
"pin-project-lite",
|
||||
"rustix 0.38.30",
|
||||
"tracing",
|
||||
"windows-sys 0.52.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "portable-atomic"
|
||||
version = "1.6.0"
|
||||
|
|
@ -2632,6 +3001,9 @@ dependencies = [
|
|||
name = "puffin-extract"
|
||||
version = "0.0.1"
|
||||
dependencies = [
|
||||
"async-compression",
|
||||
"async-std",
|
||||
"async-tar",
|
||||
"async_zip",
|
||||
"flate2",
|
||||
"fs-err",
|
||||
|
|
@ -3069,7 +3441,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|||
checksum = "767be24c0da52e7448d495b8d162506a9aa125426651d547d545d6c2b4b65b62"
|
||||
dependencies = [
|
||||
"cfg-if 1.0.0",
|
||||
"rustix",
|
||||
"rustix 0.38.30",
|
||||
"windows",
|
||||
]
|
||||
|
||||
|
|
@ -3322,6 +3694,20 @@ version = "1.1.0"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2"
|
||||
|
||||
[[package]]
|
||||
name = "rustix"
|
||||
version = "0.37.27"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fea8ca367a3a01fe35e6943c400addf443c0f57670e6ec51196f71a4b8762dd2"
|
||||
dependencies = [
|
||||
"bitflags 1.3.2",
|
||||
"errno",
|
||||
"io-lifetimes",
|
||||
"libc",
|
||||
"linux-raw-sys 0.3.8",
|
||||
"windows-sys 0.48.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rustix"
|
||||
version = "0.38.30"
|
||||
|
|
@ -3331,7 +3717,7 @@ dependencies = [
|
|||
"bitflags 2.4.2",
|
||||
"errno",
|
||||
"libc",
|
||||
"linux-raw-sys",
|
||||
"linux-raw-sys 0.4.13",
|
||||
"windows-sys 0.52.0",
|
||||
]
|
||||
|
||||
|
|
@ -3560,6 +3946,16 @@ version = "0.3.2"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b7c388c1b5e93756d0c740965c41e8822f866621d41acbdf6336a6a168f8840c"
|
||||
|
||||
[[package]]
|
||||
name = "socket2"
|
||||
version = "0.4.10"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9f7916fc008ca5542385b89a3d3ce689953c143e9304a9bf8beec1de48994c0d"
|
||||
dependencies = [
|
||||
"libc",
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "socket2"
|
||||
version = "0.5.5"
|
||||
|
|
@ -3679,7 +4075,7 @@ checksum = "b16afcea1f22891c49a00c751c7b63b2233284064f11a200fc624137c51e2ddb"
|
|||
dependencies = [
|
||||
"filetime",
|
||||
"libc",
|
||||
"xattr",
|
||||
"xattr 1.3.1",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
|
@ -3704,9 +4100,9 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|||
checksum = "01ce4141aa927a6d1bd34a041795abd0db1cccba5d5f24b009f694bdf3a1f3fa"
|
||||
dependencies = [
|
||||
"cfg-if 1.0.0",
|
||||
"fastrand",
|
||||
"fastrand 2.0.1",
|
||||
"redox_syscall 0.4.1",
|
||||
"rustix",
|
||||
"rustix 0.38.30",
|
||||
"windows-sys 0.52.0",
|
||||
]
|
||||
|
||||
|
|
@ -3911,7 +4307,7 @@ dependencies = [
|
|||
"num_cpus",
|
||||
"pin-project-lite",
|
||||
"signal-hook-registry",
|
||||
"socket2",
|
||||
"socket2 0.5.5",
|
||||
"tokio-macros",
|
||||
"windows-sys 0.48.0",
|
||||
]
|
||||
|
|
@ -4216,6 +4612,12 @@ version = "0.1.0"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "830b7e5d4d90034032940e4ace0d9a9a057e7a45cd94e6c007832e39edb82f6d"
|
||||
|
||||
[[package]]
|
||||
name = "value-bag"
|
||||
version = "1.6.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7cdbaf5e132e593e9fc1de6a15bbec912395b11fb9719e061cf64f804524c503"
|
||||
|
||||
[[package]]
|
||||
name = "vcpkg"
|
||||
version = "0.2.15"
|
||||
|
|
@ -4280,6 +4682,12 @@ dependencies = [
|
|||
"smallvec",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "waker-fn"
|
||||
version = "1.1.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f3c4517f54858c779bbcbf228f4fca63d121bf85fbecb2dc578cdf4a39395690"
|
||||
|
||||
[[package]]
|
||||
name = "walkdir"
|
||||
version = "2.4.0"
|
||||
|
|
@ -4424,7 +4832,7 @@ dependencies = [
|
|||
"either",
|
||||
"home",
|
||||
"once_cell",
|
||||
"rustix",
|
||||
"rustix 0.38.30",
|
||||
"windows-sys 0.48.0",
|
||||
]
|
||||
|
||||
|
|
@ -4638,6 +5046,15 @@ dependencies = [
|
|||
"tap",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "xattr"
|
||||
version = "0.2.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6d1526bbe5aaeb5eb06885f4d987bcdfa5e23187055de9b83fe00156a821fabc"
|
||||
dependencies = [
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "xattr"
|
||||
version = "1.3.1"
|
||||
|
|
@ -4645,8 +5062,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|||
checksum = "8da84f1a25939b27f6820d92aed108f83ff920fdf11a7b19366c27c4cda81d4f"
|
||||
dependencies = [
|
||||
"libc",
|
||||
"linux-raw-sys",
|
||||
"rustix",
|
||||
"linux-raw-sys 0.4.13",
|
||||
"rustix 0.38.30",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
|
|
|||
|
|
@ -19,6 +19,9 @@ license = "MIT OR Apache-2.0"
|
|||
[workspace.dependencies]
|
||||
anstream = { version = "0.6.5" }
|
||||
anyhow = { version = "1.0.79" }
|
||||
async-compression = { version = "0.4.6" }
|
||||
async-std = {version = "1.6.0" }
|
||||
async-tar = { version = "0.4.2" }
|
||||
async_http_range_reader = { git = "https://github.com/baszalmstra/async_http_range_reader", rev = "8dab2c08ac864fec1df014465264f9a7c8eae905" }
|
||||
async_zip = { git = "https://github.com/charliermarsh/rs-async-zip", rev = "d76801da0943de985254fc6255c0e476b57c5836", features = ["deflate"] }
|
||||
base64 = { version = "0.21.7" }
|
||||
|
|
|
|||
|
|
@ -27,7 +27,6 @@ use tracing::{debug, info_span, instrument, Instrument};
|
|||
|
||||
use distribution_types::Resolution;
|
||||
use pep508_rs::Requirement;
|
||||
use puffin_extract::extract_source;
|
||||
use puffin_interpreter::{Interpreter, Virtualenv};
|
||||
use puffin_traits::{BuildContext, BuildKind, SetupPyStrategy, SourceBuildTrait};
|
||||
|
||||
|
|
@ -297,8 +296,15 @@ impl SourceBuild {
|
|||
source.to_path_buf()
|
||||
} else {
|
||||
debug!("Unpacking for build: {}", source.display());
|
||||
|
||||
let extracted = temp_dir.path().join("extracted");
|
||||
extract_source(source, &extracted)
|
||||
|
||||
// Unzip the archive into the temporary directory.
|
||||
puffin_extract::archive(source, &extracted)
|
||||
.map_err(|err| Error::Extraction(extracted.clone(), err))?;
|
||||
|
||||
// Extract the top-level directory from the archive.
|
||||
puffin_extract::strip_component(&extracted)
|
||||
.map_err(|err| Error::Extraction(extracted.clone(), err))?
|
||||
};
|
||||
let source_tree = if let Some(subdir) = subdirectory {
|
||||
|
|
|
|||
|
|
@ -14,7 +14,6 @@ use distribution_types::{
|
|||
use platform_tags::Tags;
|
||||
use puffin_cache::{Cache, CacheBucket, Timestamp, WheelCache};
|
||||
use puffin_client::{CacheControl, CachedClientError, RegistryClient};
|
||||
use puffin_extract::unzip_no_seek;
|
||||
use puffin_fs::metadata_if_exists;
|
||||
use puffin_git::GitSource;
|
||||
use puffin_traits::{BuildContext, NoBinary};
|
||||
|
|
@ -157,7 +156,7 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context>
|
|||
// Download and unzip the wheel to a temporary directory.
|
||||
let temp_dir =
|
||||
tempfile::tempdir_in(self.cache.root()).map_err(Error::CacheWrite)?;
|
||||
unzip_no_seek(reader.compat(), temp_dir.path()).await?;
|
||||
puffin_extract::stream::unzip(reader.compat(), temp_dir.path()).await?;
|
||||
|
||||
// Persist the temporary directory to the directory store.
|
||||
let archive = self
|
||||
|
|
@ -215,7 +214,7 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context>
|
|||
// Download and unzip the wheel to a temporary directory.
|
||||
let temp_dir =
|
||||
tempfile::tempdir_in(self.cache.root()).map_err(Error::CacheWrite)?;
|
||||
unzip_no_seek(reader.compat(), temp_dir.path()).await?;
|
||||
puffin_extract::stream::unzip(reader.compat(), temp_dir.path()).await?;
|
||||
|
||||
// Persist the temporary directory to the directory store.
|
||||
let archive = self
|
||||
|
|
|
|||
|
|
@ -8,7 +8,6 @@ use anyhow::Result;
|
|||
use fs_err::tokio as fs;
|
||||
use futures::{FutureExt, TryStreamExt};
|
||||
use reqwest::Response;
|
||||
use tempfile::TempDir;
|
||||
use tokio_util::compat::FuturesAsyncReadCompatExt;
|
||||
use tracing::{debug, info_span, instrument, Instrument};
|
||||
use url::Url;
|
||||
|
|
@ -750,60 +749,30 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> {
|
|||
return Ok(cache_path);
|
||||
}
|
||||
|
||||
// Download the source distribution to a temporary file.
|
||||
// TODO(charlie): Unzip as we download, as with wheels.
|
||||
// Download and unzip the source distribution into a temporary directory.
|
||||
let span =
|
||||
info_span!("download_source_dist", filename = filename, source_dist = %source_dist);
|
||||
let download_dir = self.download_source_dist_url(response, filename).await?;
|
||||
drop(span);
|
||||
|
||||
// Unzip the source distribution to a temporary directory.
|
||||
let span =
|
||||
info_span!("extract_source_dist", filename = filename, source_dist = %source_dist);
|
||||
let source_dist_dir = puffin_extract::extract_source(
|
||||
download_dir.path().join(filename),
|
||||
download_dir.path().join("extracted"),
|
||||
)?;
|
||||
drop(span);
|
||||
|
||||
// Persist the unzipped distribution to the cache.
|
||||
fs_err::tokio::create_dir_all(cache_path.parent().expect("Cache entry to have parent"))
|
||||
.await
|
||||
.map_err(Error::CacheWrite)?;
|
||||
fs_err::tokio::rename(&source_dist_dir, &cache_path)
|
||||
.await
|
||||
.map_err(Error::CacheWrite)?;
|
||||
|
||||
Ok(cache_path)
|
||||
}
|
||||
|
||||
/// Download a source distribution from a URL to a temporary file.
|
||||
async fn download_source_dist_url(
|
||||
&self,
|
||||
response: Response,
|
||||
source_dist_filename: &str,
|
||||
) -> Result<TempDir, puffin_client::Error> {
|
||||
let temp_dir =
|
||||
tempfile::tempdir_in(self.build_context.cache().root()).map_err(Error::CacheWrite)?;
|
||||
let reader = response
|
||||
.bytes_stream()
|
||||
.map_err(|err| std::io::Error::new(std::io::ErrorKind::Other, err))
|
||||
.into_async_read();
|
||||
let mut reader = tokio::io::BufReader::new(reader.compat());
|
||||
puffin_extract::stream::archive(reader.compat(), filename, temp_dir.path()).await?;
|
||||
drop(span);
|
||||
|
||||
// Create a temporary directory.
|
||||
let temp_dir = tempfile::tempdir_in(self.build_context.cache().root())
|
||||
.map_err(puffin_client::ErrorKind::CacheWrite)?;
|
||||
// Extract the top-level directory.
|
||||
let extracted = puffin_extract::strip_component(temp_dir.path())?;
|
||||
|
||||
// Download the source distribution to a temporary file.
|
||||
let mut writer = tokio::io::BufWriter::new(
|
||||
fs_err::tokio::File::create(temp_dir.path().join(source_dist_filename))
|
||||
.await
|
||||
.map_err(puffin_client::ErrorKind::CacheWrite)?,
|
||||
);
|
||||
tokio::io::copy(&mut reader, &mut writer)
|
||||
// Persist it to the cache.
|
||||
fs_err::tokio::create_dir_all(cache_path.parent().expect("Cache entry to have parent"))
|
||||
.await
|
||||
.map_err(puffin_client::ErrorKind::CacheWrite)?;
|
||||
.map_err(Error::CacheWrite)?;
|
||||
fs_err::tokio::rename(extracted, &cache_path)
|
||||
.await
|
||||
.map_err(Error::CacheWrite)?;
|
||||
|
||||
Ok(temp_dir)
|
||||
Ok(cache_path)
|
||||
}
|
||||
|
||||
/// Download a source distribution from a Git repository.
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
use std::path::Path;
|
||||
|
||||
use puffin_extract::{unzip_archive, Error};
|
||||
use puffin_extract::Error;
|
||||
|
||||
use crate::download::BuiltWheel;
|
||||
use crate::{DiskWheel, LocalWheel};
|
||||
|
|
@ -12,13 +12,13 @@ pub trait Unzip {
|
|||
|
||||
impl Unzip for DiskWheel {
|
||||
fn unzip(&self, target: &Path) -> Result<(), Error> {
|
||||
unzip_archive(fs_err::File::open(&self.path)?, target)
|
||||
puffin_extract::unzip(fs_err::File::open(&self.path)?, target)
|
||||
}
|
||||
}
|
||||
|
||||
impl Unzip for BuiltWheel {
|
||||
fn unzip(&self, target: &Path) -> Result<(), Error> {
|
||||
unzip_archive(fs_err::File::open(&self.path)?, target)
|
||||
puffin_extract::unzip(fs_err::File::open(&self.path)?, target)
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -13,6 +13,10 @@ license = { workspace = true }
|
|||
workspace = true
|
||||
|
||||
[dependencies]
|
||||
async-compression = { workspace = true, features = ["gzip"] }
|
||||
# See: https://github.com/dignifiedquire/async-tar/pull/35
|
||||
async-std = { workspace = true, features = ["unstable"] }
|
||||
async-tar = { workspace = true }
|
||||
async_zip = { workspace = true, features = ["tokio"] }
|
||||
flate2 = { workspace = true }
|
||||
fs-err = { workspace = true, features = ["tokio"] }
|
||||
|
|
|
|||
19
crates/puffin-extract/src/error.rs
Normal file
19
crates/puffin-extract/src/error.rs
Normal file
|
|
@ -0,0 +1,19 @@
|
|||
use std::path::PathBuf;
|
||||
|
||||
use zip::result::ZipError;
|
||||
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
pub enum Error {
|
||||
#[error(transparent)]
|
||||
Zip(#[from] ZipError),
|
||||
#[error(transparent)]
|
||||
AsyncZip(#[from] async_zip::error::ZipError),
|
||||
#[error(transparent)]
|
||||
Io(#[from] std::io::Error),
|
||||
#[error("Unsupported archive type: {0}")]
|
||||
UnsupportedArchive(PathBuf),
|
||||
#[error(
|
||||
"The top level of the archive must only contain a list directory, but it contains: {0:?}"
|
||||
)]
|
||||
InvalidArchive(Vec<fs_err::DirEntry>),
|
||||
}
|
||||
|
|
@ -1,223 +1,7 @@
|
|||
use std::fs::OpenOptions;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::sync::Mutex;
|
||||
|
||||
use rayon::prelude::*;
|
||||
use rustc_hash::FxHashSet;
|
||||
use tokio_util::compat::{FuturesAsyncReadCompatExt, TokioAsyncReadCompatExt};
|
||||
use zip::result::ZipError;
|
||||
use zip::ZipArchive;
|
||||
|
||||
pub use crate::vendor::{CloneableSeekableReader, HasLength};
|
||||
pub use error::Error;
|
||||
pub use sync::*;
|
||||
|
||||
mod error;
|
||||
pub mod stream;
|
||||
mod sync;
|
||||
mod vendor;
|
||||
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
pub enum Error {
|
||||
#[error(transparent)]
|
||||
Zip(#[from] ZipError),
|
||||
#[error(transparent)]
|
||||
AsyncZip(#[from] async_zip::error::ZipError),
|
||||
#[error(transparent)]
|
||||
Io(#[from] std::io::Error),
|
||||
#[error("Unsupported archive type: {0}")]
|
||||
UnsupportedArchive(PathBuf),
|
||||
#[error(
|
||||
"The top level of the archive must only contain a list directory, but it contains: {0:?}"
|
||||
)]
|
||||
InvalidArchive(Vec<fs_err::DirEntry>),
|
||||
}
|
||||
|
||||
/// Unzip a `.zip` archive into the target directory without requiring Seek.
|
||||
///
|
||||
/// This is useful for unzipping files as they're being downloaded. If the archive
|
||||
/// is already fully on disk, consider using `unzip_archive`, which can use multiple
|
||||
/// threads to work faster in that case.
|
||||
pub async fn unzip_no_seek<R: tokio::io::AsyncRead + Unpin>(
|
||||
reader: R,
|
||||
target: &Path,
|
||||
) -> Result<(), Error> {
|
||||
let mut reader = reader.compat();
|
||||
let mut zip = async_zip::base::read::stream::ZipFileReader::new(&mut reader);
|
||||
|
||||
let mut directories = FxHashSet::default();
|
||||
|
||||
while let Some(mut entry) = zip.next_with_entry().await? {
|
||||
// Construct the (expected) path to the file on-disk.
|
||||
let path = entry.reader().entry().filename().as_str()?;
|
||||
let path = target.join(path);
|
||||
let is_dir = entry.reader().entry().dir()?;
|
||||
|
||||
// Either create the directory or write the file to disk.
|
||||
if is_dir {
|
||||
if directories.insert(path.clone()) {
|
||||
fs_err::tokio::create_dir_all(path).await?;
|
||||
}
|
||||
} else {
|
||||
if let Some(parent) = path.parent() {
|
||||
if directories.insert(parent.to_path_buf()) {
|
||||
fs_err::tokio::create_dir_all(parent).await?;
|
||||
}
|
||||
}
|
||||
|
||||
let file = fs_err::tokio::File::create(path).await?;
|
||||
let mut writer =
|
||||
if let Ok(size) = usize::try_from(entry.reader().entry().uncompressed_size()) {
|
||||
tokio::io::BufWriter::with_capacity(size, file)
|
||||
} else {
|
||||
tokio::io::BufWriter::new(file)
|
||||
};
|
||||
let mut reader = entry.reader_mut().compat();
|
||||
tokio::io::copy(&mut reader, &mut writer).await?;
|
||||
}
|
||||
|
||||
// Close current file to get access to the next one. See docs:
|
||||
// https://docs.rs/async_zip/0.0.16/async_zip/base/read/stream/
|
||||
zip = entry.skip().await?;
|
||||
}
|
||||
|
||||
// On Unix, we need to set file permissions, which are stored in the central directory, at the
|
||||
// end of the archive. The `ZipFileReader` reads until it sees a central directory signature,
|
||||
// which indicates the first entry in the central directory. So we continue reading from there.
|
||||
#[cfg(unix)]
|
||||
{
|
||||
use std::fs::Permissions;
|
||||
use std::os::unix::fs::PermissionsExt;
|
||||
|
||||
// To avoid lots of small reads to `reader` when parsing the central directory, wrap it in
|
||||
// a buffer.
|
||||
let mut buf = futures::io::BufReader::new(reader);
|
||||
let mut directory = async_zip::base::read::cd::CentralDirectoryReader::new(&mut buf);
|
||||
while let Some(entry) = directory.next().await? {
|
||||
if entry.dir()? {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Construct the (expected) path to the file on-disk.
|
||||
let path = entry.filename().as_str()?;
|
||||
let path = target.join(path);
|
||||
|
||||
if let Some(mode) = entry.unix_permissions() {
|
||||
fs_err::set_permissions(&path, Permissions::from_mode(mode))?;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Unzip a `.zip` archive into the target directory.
|
||||
pub fn unzip_archive<R: Send + std::io::Read + std::io::Seek + HasLength>(
|
||||
reader: R,
|
||||
target: &Path,
|
||||
) -> Result<(), Error> {
|
||||
// Unzip in parallel.
|
||||
let archive = ZipArchive::new(CloneableSeekableReader::new(reader))?;
|
||||
let directories = Mutex::new(FxHashSet::default());
|
||||
(0..archive.len())
|
||||
.par_bridge()
|
||||
.map(|file_number| {
|
||||
let mut archive = archive.clone();
|
||||
let mut file = archive.by_index(file_number)?;
|
||||
|
||||
// Determine the path of the file within the wheel.
|
||||
let Some(enclosed_name) = file.enclosed_name() else {
|
||||
return Ok(());
|
||||
};
|
||||
|
||||
// Create necessary parent directories.
|
||||
let path = target.join(enclosed_name);
|
||||
if file.is_dir() {
|
||||
let mut directories = directories.lock().unwrap();
|
||||
if directories.insert(path.clone()) {
|
||||
fs_err::create_dir_all(path)?;
|
||||
}
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
if let Some(parent) = path.parent() {
|
||||
let mut directories = directories.lock().unwrap();
|
||||
if directories.insert(parent.to_path_buf()) {
|
||||
fs_err::create_dir_all(parent)?;
|
||||
}
|
||||
}
|
||||
|
||||
// Create the file, with the correct permissions (on Unix).
|
||||
let mut options = OpenOptions::new();
|
||||
options.write(true);
|
||||
options.create_new(true);
|
||||
|
||||
#[cfg(unix)]
|
||||
{
|
||||
use std::os::unix::fs::OpenOptionsExt;
|
||||
|
||||
if let Some(mode) = file.unix_mode() {
|
||||
options.mode(mode);
|
||||
}
|
||||
}
|
||||
|
||||
// Copy the file contents.
|
||||
let mut outfile = options.open(&path)?;
|
||||
std::io::copy(&mut file, &mut outfile)?;
|
||||
|
||||
Ok(())
|
||||
})
|
||||
.collect::<Result<_, Error>>()
|
||||
}
|
||||
|
||||
/// Extract a `.zip` or `.tar.gz` archive into the target directory.
|
||||
pub fn extract_archive(source: impl AsRef<Path>, target: impl AsRef<Path>) -> Result<(), Error> {
|
||||
// .zip
|
||||
if source
|
||||
.as_ref()
|
||||
.extension()
|
||||
.is_some_and(|ext| ext.eq_ignore_ascii_case("zip"))
|
||||
{
|
||||
unzip_archive(fs_err::File::open(source.as_ref())?, target.as_ref())?;
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
// .tar.gz
|
||||
if source
|
||||
.as_ref()
|
||||
.extension()
|
||||
.is_some_and(|ext| ext.eq_ignore_ascii_case("gz"))
|
||||
{
|
||||
if source.as_ref().file_stem().is_some_and(|stem| {
|
||||
Path::new(stem)
|
||||
.extension()
|
||||
.is_some_and(|ext| ext.eq_ignore_ascii_case("tar"))
|
||||
}) {
|
||||
let mut archive = tar::Archive::new(flate2::read::GzDecoder::new(fs_err::File::open(
|
||||
source.as_ref(),
|
||||
)?));
|
||||
// https://github.com/alexcrichton/tar-rs/issues/349
|
||||
archive.set_preserve_mtime(false);
|
||||
archive.unpack(target)?;
|
||||
return Ok(());
|
||||
}
|
||||
}
|
||||
|
||||
Err(Error::UnsupportedArchive(source.as_ref().to_path_buf()))
|
||||
}
|
||||
|
||||
/// Extract a source distribution into the target directory.
|
||||
///
|
||||
/// Returns the path to the top-level directory of the source distribution.
|
||||
pub fn extract_source(
|
||||
source: impl AsRef<Path>,
|
||||
target: impl AsRef<Path>,
|
||||
) -> Result<PathBuf, Error> {
|
||||
extract_archive(&source, &target)?;
|
||||
|
||||
// > A .tar.gz source distribution (sdist) contains a single top-level directory called
|
||||
// > `{name}-{version}` (e.g. foo-1.0), containing the source files of the package.
|
||||
// TODO(konstin): Verify the name of the directory.
|
||||
let top_level =
|
||||
fs_err::read_dir(target.as_ref())?.collect::<std::io::Result<Vec<fs_err::DirEntry>>>()?;
|
||||
let [root] = top_level.as_slice() else {
|
||||
return Err(Error::InvalidArchive(top_level));
|
||||
};
|
||||
|
||||
Ok(root.path())
|
||||
}
|
||||
|
|
|
|||
133
crates/puffin-extract/src/stream.rs
Normal file
133
crates/puffin-extract/src/stream.rs
Normal file
|
|
@ -0,0 +1,133 @@
|
|||
use std::path::Path;
|
||||
|
||||
use rustc_hash::FxHashSet;
|
||||
use tokio_util::compat::{FuturesAsyncReadCompatExt, TokioAsyncReadCompatExt};
|
||||
|
||||
use crate::Error;
|
||||
|
||||
/// Unzip a `.zip` archive into the target directory, without requiring `Seek`.
|
||||
///
|
||||
/// This is useful for unzipping files as they're being downloaded. If the archive
|
||||
/// is already fully on disk, consider using `unzip_archive`, which can use multiple
|
||||
/// threads to work faster in that case.
|
||||
pub async fn unzip<R: tokio::io::AsyncRead + Unpin>(
|
||||
reader: R,
|
||||
target: impl AsRef<Path>,
|
||||
) -> Result<(), Error> {
|
||||
let mut reader = reader.compat();
|
||||
let mut zip = async_zip::base::read::stream::ZipFileReader::new(&mut reader);
|
||||
|
||||
let mut directories = FxHashSet::default();
|
||||
|
||||
while let Some(mut entry) = zip.next_with_entry().await? {
|
||||
// Construct the (expected) path to the file on-disk.
|
||||
let path = entry.reader().entry().filename().as_str()?;
|
||||
let path = target.as_ref().join(path);
|
||||
let is_dir = entry.reader().entry().dir()?;
|
||||
|
||||
// Either create the directory or write the file to disk.
|
||||
if is_dir {
|
||||
if directories.insert(path.clone()) {
|
||||
fs_err::tokio::create_dir_all(path).await?;
|
||||
}
|
||||
} else {
|
||||
if let Some(parent) = path.parent() {
|
||||
if directories.insert(parent.to_path_buf()) {
|
||||
fs_err::tokio::create_dir_all(parent).await?;
|
||||
}
|
||||
}
|
||||
|
||||
let file = fs_err::tokio::File::create(path).await?;
|
||||
let mut writer =
|
||||
if let Ok(size) = usize::try_from(entry.reader().entry().uncompressed_size()) {
|
||||
tokio::io::BufWriter::with_capacity(size, file)
|
||||
} else {
|
||||
tokio::io::BufWriter::new(file)
|
||||
};
|
||||
let mut reader = entry.reader_mut().compat();
|
||||
tokio::io::copy(&mut reader, &mut writer).await?;
|
||||
}
|
||||
|
||||
// Close current file to get access to the next one. See docs:
|
||||
// https://docs.rs/async_zip/0.0.16/async_zip/base/read/stream/
|
||||
zip = entry.skip().await?;
|
||||
}
|
||||
|
||||
// On Unix, we need to set file permissions, which are stored in the central directory, at the
|
||||
// end of the archive. The `ZipFileReader` reads until it sees a central directory signature,
|
||||
// which indicates the first entry in the central directory. So we continue reading from there.
|
||||
#[cfg(unix)]
|
||||
{
|
||||
use std::fs::Permissions;
|
||||
use std::os::unix::fs::PermissionsExt;
|
||||
|
||||
// To avoid lots of small reads to `reader` when parsing the central directory, wrap it in
|
||||
// a buffer.
|
||||
let mut buf = futures::io::BufReader::new(reader);
|
||||
let mut directory = async_zip::base::read::cd::CentralDirectoryReader::new(&mut buf);
|
||||
while let Some(entry) = directory.next().await? {
|
||||
if entry.dir()? {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Construct the (expected) path to the file on-disk.
|
||||
let path = entry.filename().as_str()?;
|
||||
let path = target.as_ref().join(path);
|
||||
|
||||
if let Some(mode) = entry.unix_permissions() {
|
||||
fs_err::set_permissions(&path, Permissions::from_mode(mode))?;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Unzip a `.tar.gz` archive into the target directory, without requiring `Seek`.
|
||||
///
|
||||
/// This is useful for unpacking files as they're being downloaded.
|
||||
pub async fn untar<R: tokio::io::AsyncBufRead + Unpin>(
|
||||
reader: R,
|
||||
target: impl AsRef<Path>,
|
||||
) -> Result<(), Error> {
|
||||
let decompressed_bytes = async_compression::futures::bufread::GzipDecoder::new(reader.compat());
|
||||
let archive = async_tar::ArchiveBuilder::new(decompressed_bytes)
|
||||
.set_preserve_permissions(false)
|
||||
.build();
|
||||
Ok(archive.unpack(target.as_ref()).await?)
|
||||
}
|
||||
|
||||
/// Unzip a `.zip` or `.tar.gz` archive into the target directory, without requiring `Seek`.
|
||||
pub async fn archive<R: tokio::io::AsyncBufRead + Unpin>(
|
||||
reader: R,
|
||||
source: impl AsRef<Path>,
|
||||
target: impl AsRef<Path>,
|
||||
) -> Result<(), Error> {
|
||||
// `.zip`
|
||||
if source
|
||||
.as_ref()
|
||||
.extension()
|
||||
.is_some_and(|ext| ext.eq_ignore_ascii_case("zip"))
|
||||
{
|
||||
unzip(reader, target).await?;
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
// `.tar.gz`
|
||||
if source
|
||||
.as_ref()
|
||||
.extension()
|
||||
.is_some_and(|ext| ext.eq_ignore_ascii_case("gz"))
|
||||
{
|
||||
if source.as_ref().file_stem().is_some_and(|stem| {
|
||||
Path::new(stem)
|
||||
.extension()
|
||||
.is_some_and(|ext| ext.eq_ignore_ascii_case("tar"))
|
||||
}) {
|
||||
untar(reader, target).await?;
|
||||
return Ok(());
|
||||
}
|
||||
}
|
||||
|
||||
Err(Error::UnsupportedArchive(source.as_ref().to_path_buf()))
|
||||
}
|
||||
122
crates/puffin-extract/src/sync.rs
Normal file
122
crates/puffin-extract/src/sync.rs
Normal file
|
|
@ -0,0 +1,122 @@
|
|||
use std::fs::OpenOptions;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::sync::Mutex;
|
||||
|
||||
use rayon::prelude::*;
|
||||
use rustc_hash::FxHashSet;
|
||||
use zip::ZipArchive;
|
||||
|
||||
use crate::vendor::{CloneableSeekableReader, HasLength};
|
||||
use crate::Error;
|
||||
|
||||
/// Unzip a `.zip` archive into the target directory.
|
||||
pub fn unzip<R: Send + std::io::Read + std::io::Seek + HasLength>(
|
||||
reader: R,
|
||||
target: &Path,
|
||||
) -> Result<(), Error> {
|
||||
// Unzip in parallel.
|
||||
let archive = ZipArchive::new(CloneableSeekableReader::new(reader))?;
|
||||
let directories = Mutex::new(FxHashSet::default());
|
||||
(0..archive.len())
|
||||
.par_bridge()
|
||||
.map(|file_number| {
|
||||
let mut archive = archive.clone();
|
||||
let mut file = archive.by_index(file_number)?;
|
||||
|
||||
// Determine the path of the file within the wheel.
|
||||
let Some(enclosed_name) = file.enclosed_name() else {
|
||||
return Ok(());
|
||||
};
|
||||
|
||||
// Create necessary parent directories.
|
||||
let path = target.join(enclosed_name);
|
||||
if file.is_dir() {
|
||||
let mut directories = directories.lock().unwrap();
|
||||
if directories.insert(path.clone()) {
|
||||
fs_err::create_dir_all(path)?;
|
||||
}
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
if let Some(parent) = path.parent() {
|
||||
let mut directories = directories.lock().unwrap();
|
||||
if directories.insert(parent.to_path_buf()) {
|
||||
fs_err::create_dir_all(parent)?;
|
||||
}
|
||||
}
|
||||
|
||||
// Create the file, with the correct permissions (on Unix).
|
||||
let mut options = OpenOptions::new();
|
||||
options.write(true);
|
||||
options.create_new(true);
|
||||
|
||||
#[cfg(unix)]
|
||||
{
|
||||
use std::os::unix::fs::OpenOptionsExt;
|
||||
|
||||
if let Some(mode) = file.unix_mode() {
|
||||
options.mode(mode);
|
||||
}
|
||||
}
|
||||
|
||||
// Copy the file contents.
|
||||
let mut outfile = options.open(&path)?;
|
||||
std::io::copy(&mut file, &mut outfile)?;
|
||||
|
||||
Ok(())
|
||||
})
|
||||
.collect::<Result<_, Error>>()
|
||||
}
|
||||
|
||||
/// Extract a `.zip` or `.tar.gz` archive into the target directory.
|
||||
pub fn archive(source: impl AsRef<Path>, target: impl AsRef<Path>) -> Result<(), Error> {
|
||||
// `.zip`
|
||||
if source
|
||||
.as_ref()
|
||||
.extension()
|
||||
.is_some_and(|ext| ext.eq_ignore_ascii_case("zip"))
|
||||
{
|
||||
unzip(fs_err::File::open(source.as_ref())?, target.as_ref())?;
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
// `.tar.gz`
|
||||
if source
|
||||
.as_ref()
|
||||
.extension()
|
||||
.is_some_and(|ext| ext.eq_ignore_ascii_case("gz"))
|
||||
{
|
||||
if source.as_ref().file_stem().is_some_and(|stem| {
|
||||
Path::new(stem)
|
||||
.extension()
|
||||
.is_some_and(|ext| ext.eq_ignore_ascii_case("tar"))
|
||||
}) {
|
||||
let mut archive = tar::Archive::new(flate2::read::GzDecoder::new(fs_err::File::open(
|
||||
source.as_ref(),
|
||||
)?));
|
||||
// https://github.com/alexcrichton/tar-rs/issues/349
|
||||
archive.set_preserve_mtime(false);
|
||||
archive.unpack(target)?;
|
||||
return Ok(());
|
||||
}
|
||||
}
|
||||
|
||||
Err(Error::UnsupportedArchive(source.as_ref().to_path_buf()))
|
||||
}
|
||||
|
||||
/// Extract the top-level directory from an unpacked archive.
|
||||
///
|
||||
/// The specification says:
|
||||
/// > A .tar.gz source distribution (sdist) contains a single top-level directory called
|
||||
/// > `{name}-{version}` (e.g. foo-1.0), containing the source files of the package.
|
||||
///
|
||||
/// This function returns the path to that top-level directory.
|
||||
pub fn strip_component(source: impl AsRef<Path>) -> Result<PathBuf, Error> {
|
||||
// TODO(konstin): Verify the name of the directory.
|
||||
let top_level =
|
||||
fs_err::read_dir(source.as_ref())?.collect::<std::io::Result<Vec<fs_err::DirEntry>>>()?;
|
||||
let [root] = top_level.as_slice() else {
|
||||
return Err(Error::InvalidArchive(top_level));
|
||||
};
|
||||
Ok(root.path())
|
||||
}
|
||||
|
|
@ -26,7 +26,7 @@ pub trait HasLength {
|
|||
/// and thus can be cloned cheaply. It supports seeking; each cloned instance
|
||||
/// maintains its own pointer into the file, and the underlying instance
|
||||
/// is seeked prior to each read.
|
||||
pub struct CloneableSeekableReader<R: Read + Seek + HasLength> {
|
||||
pub(crate) struct CloneableSeekableReader<R: Read + Seek + HasLength> {
|
||||
file: Arc<Mutex<R>>,
|
||||
pos: u64,
|
||||
// TODO determine and store this once instead of per cloneable file
|
||||
|
|
|
|||
2
crates/puffin-extract/src/vendor/mod.rs
vendored
2
crates/puffin-extract/src/vendor/mod.rs
vendored
|
|
@ -1,3 +1,3 @@
|
|||
pub use cloneable_seekable_reader::{CloneableSeekableReader, HasLength};
|
||||
pub(crate) use cloneable_seekable_reader::{CloneableSeekableReader, HasLength};
|
||||
|
||||
mod cloneable_seekable_reader;
|
||||
|
|
|
|||
|
|
@ -1617,7 +1617,7 @@ fn disallowed_transitive_url_dependency() -> Result<()> {
|
|||
let venv = create_venv_py312(&temp_dir, &cache_dir);
|
||||
|
||||
let requirements_in = temp_dir.child("requirements.in");
|
||||
requirements_in.write_str("transitive_url_dependency @ https://github.com/astral-sh/ruff/files/13257454/transitive_url_dependency.zip")?;
|
||||
requirements_in.write_str("transitive_url_dependency @ https://github.com/astral-sh/ruff/files/14078476/transitive_url_dependency.zip")?;
|
||||
|
||||
insta::with_settings!({
|
||||
filters => INSTA_FILTERS.to_vec()
|
||||
|
|
@ -1654,7 +1654,7 @@ fn allowed_transitive_url_dependency() -> Result<()> {
|
|||
let venv = create_venv_py312(&temp_dir, &cache_dir);
|
||||
|
||||
let requirements_in = temp_dir.child("requirements.in");
|
||||
requirements_in.write_str("transitive_url_dependency @ https://github.com/astral-sh/ruff/files/13257454/transitive_url_dependency.zip")?;
|
||||
requirements_in.write_str("transitive_url_dependency @ https://github.com/astral-sh/ruff/files/14078476/transitive_url_dependency.zip")?;
|
||||
|
||||
let constraints_txt = temp_dir.child("constraints.txt");
|
||||
constraints_txt.write_str("werkzeug @ git+https://github.com/pallets/werkzeug@2.0.0")?;
|
||||
|
|
@ -1679,7 +1679,7 @@ fn allowed_transitive_url_dependency() -> Result<()> {
|
|||
----- stdout -----
|
||||
# This file was autogenerated by Puffin v[VERSION] via the following command:
|
||||
# puffin pip compile requirements.in --constraint constraints.txt --cache-dir [CACHE_DIR]
|
||||
transitive-url-dependency @ https://github.com/astral-sh/ruff/files/13257454/transitive_url_dependency.zip
|
||||
transitive-url-dependency @ https://github.com/astral-sh/ruff/files/14078476/transitive_url_dependency.zip
|
||||
werkzeug @ git+https://github.com/pallets/werkzeug@af160e0b6b7ddd81c22f1652c728ff5ac72d5c74
|
||||
# via transitive-url-dependency
|
||||
|
||||
|
|
@ -1702,7 +1702,7 @@ fn allowed_transitive_canonical_url_dependency() -> Result<()> {
|
|||
let venv = create_venv_py312(&temp_dir, &cache_dir);
|
||||
|
||||
let requirements_in = temp_dir.child("requirements.in");
|
||||
requirements_in.write_str("transitive_url_dependency @ https://github.com/astral-sh/ruff/files/13257454/transitive_url_dependency.zip")?;
|
||||
requirements_in.write_str("transitive_url_dependency @ https://github.com/astral-sh/ruff/files/14078476/transitive_url_dependency.zip")?;
|
||||
|
||||
let constraints_txt = temp_dir.child("constraints.txt");
|
||||
constraints_txt.write_str("werkzeug @ git+https://github.com/pallets/werkzeug.git@2.0.0")?;
|
||||
|
|
@ -1727,7 +1727,7 @@ fn allowed_transitive_canonical_url_dependency() -> Result<()> {
|
|||
----- stdout -----
|
||||
# This file was autogenerated by Puffin v[VERSION] via the following command:
|
||||
# puffin pip compile requirements.in --constraint constraints.txt --cache-dir [CACHE_DIR]
|
||||
transitive-url-dependency @ https://github.com/astral-sh/ruff/files/13257454/transitive_url_dependency.zip
|
||||
transitive-url-dependency @ https://github.com/astral-sh/ruff/files/14078476/transitive_url_dependency.zip
|
||||
werkzeug @ git+https://github.com/pallets/werkzeug@af160e0b6b7ddd81c22f1652c728ff5ac72d5c74
|
||||
# via transitive-url-dependency
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue