From da659fee4898a73dbc75070f3e82d49f745e4628 Mon Sep 17 00:00:00 2001 From: William Woodruff Date: Wed, 29 Oct 2025 11:11:06 -0400 Subject: [PATCH] Merge commit from fork * feat: reject ZIP archives with improbable filenames Signed-off-by: William Woodruff * use my PR for async_zip temporarily Signed-off-by: William Woodruff * update snapshot Signed-off-by: William Woodruff * two more tests Signed-off-by: William Woodruff * update rev Signed-off-by: William Woodruff --------- Signed-off-by: William Woodruff --- Cargo.lock | 3 +- Cargo.toml | 2 +- crates/uv-extract/Cargo.toml | 1 + crates/uv-extract/src/error.rs | 4 ++ crates/uv-extract/src/lib.rs | 93 +++++++++++++++++++++++++++++++ crates/uv-extract/src/stream.rs | 32 +++++------ crates/uv-extract/src/sync.rs | 9 ++- crates/uv/tests/it/pip_install.rs | 19 +++++++ 8 files changed, 143 insertions(+), 20 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index e6a3cbb5f..08a7bbcb4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -299,7 +299,7 @@ dependencies = [ [[package]] name = "async_zip" version = "0.0.17" -source = "git+https://github.com/astral-sh/rs-async-zip?rev=285e48742b74ab109887d62e1ae79e7c15fd4878#285e48742b74ab109887d62e1ae79e7c15fd4878" +source = "git+https://github.com/astral-sh/rs-async-zip?rev=f6a41d32866003c868d03ed791a89c794f61b703#f6a41d32866003c868d03ed791a89c794f61b703" dependencies = [ "async-compression", "crc32fast", @@ -5867,6 +5867,7 @@ dependencies = [ "futures", "md-5", "rayon", + "regex", "reqwest", "rustc-hash", "sha2", diff --git a/Cargo.toml b/Cargo.toml index 46e39b582..f888438bc 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -88,7 +88,7 @@ async-channel = { version = "2.3.1" } async-compression = { version = "0.4.12", features = ["bzip2", "gzip", "xz", "zstd"] } async-trait = { version = "0.1.82" } async_http_range_reader = { version = "0.9.1" } -async_zip = { git = "https://github.com/astral-sh/rs-async-zip", rev = "285e48742b74ab109887d62e1ae79e7c15fd4878", features = ["bzip2", "deflate", "lzma", "tokio", "xz", "zstd"] } +async_zip = { git = "https://github.com/astral-sh/rs-async-zip", rev = "f6a41d32866003c868d03ed791a89c794f61b703", features = ["bzip2", "deflate", "lzma", "tokio", "xz", "zstd"] } axoupdater = { version = "0.9.0", default-features = false } backon = { version = "1.3.0" } base64 = { version = "0.22.1" } diff --git a/crates/uv-extract/Cargo.toml b/crates/uv-extract/Cargo.toml index acbd50927..c5f2b21ac 100644 --- a/crates/uv-extract/Cargo.toml +++ b/crates/uv-extract/Cargo.toml @@ -29,6 +29,7 @@ fs-err = { workspace = true, features = ["tokio"] } futures = { workspace = true } md-5 = { workspace = true } rayon = { workspace = true } +regex = { workspace = true } reqwest = { workspace = true } rustc-hash = { workspace = true } sha2 = { workspace = true } diff --git a/crates/uv-extract/src/error.rs b/crates/uv-extract/src/error.rs index 96f4e5215..9a5719eb1 100644 --- a/crates/uv-extract/src/error.rs +++ b/crates/uv-extract/src/error.rs @@ -89,6 +89,10 @@ pub enum Error { ExtensibleData, #[error("ZIP file end-of-central-directory record contains multiple entries with the same path, but conflicting modes: {}", path.display())] DuplicateExecutableFileHeader { path: PathBuf }, + #[error("Archive contains a file with an empty filename")] + EmptyFilename, + #[error("Archive contains unacceptable filename: {filename}")] + UnacceptableFilename { filename: String }, } impl Error { diff --git a/crates/uv-extract/src/lib.rs b/crates/uv-extract/src/lib.rs index f87059b3a..931619583 100644 --- a/crates/uv-extract/src/lib.rs +++ b/crates/uv-extract/src/lib.rs @@ -1,8 +1,101 @@ +use std::sync::LazyLock; + pub use error::Error; +use regex::Regex; pub use sync::*; +use uv_static::EnvVars; mod error; pub mod hash; pub mod stream; mod sync; mod vendor; + +static CONTROL_CHARACTERS_RE: LazyLock = LazyLock::new(|| Regex::new(r"\p{C}").unwrap()); +static REPLACEMENT_CHARACTER: &'static str = "\u{FFFD}"; + +/// Validate that a given filename (e.g. reported by a ZIP archive's +/// local file entries or central directory entries) is "safe" to use. +/// +/// "Safe" in this context doesn't refer to directory traversal +/// risk, but whether we believe that other ZIP implementations +/// handle the name correctly and consistently. +/// +/// Specifically, we want to avoid names that: +/// +/// - Contain *any* non-printable characters +/// - Are empty +/// +/// In the future, we may also want to check for names that contain +/// leading/trailing whitespace, or names that are exceedingly long. +pub(crate) fn validate_archive_member_name(name: &str) -> Result<(), Error> { + if name.is_empty() { + return Err(Error::EmptyFilename); + } + + match CONTROL_CHARACTERS_RE.replace_all(name, REPLACEMENT_CHARACTER) { + // No replacements mean no control characters. + std::borrow::Cow::Borrowed(_) => Ok(()), + std::borrow::Cow::Owned(sanitized) => Err(Error::UnacceptableFilename { + filename: sanitized, + }), + } +} + +/// Returns `true` if ZIP validation is disabled. +pub(crate) fn insecure_no_validate() -> bool { + // TODO(charlie) Parse this in `EnvironmentOptions`. + let Some(value) = std::env::var_os(EnvVars::UV_INSECURE_NO_ZIP_VALIDATION) else { + return false; + }; + let Some(value) = value.to_str() else { + return false; + }; + matches!( + value.to_lowercase().as_str(), + "y" | "yes" | "t" | "true" | "on" | "1" + ) +} + +#[cfg(test)] +mod tests { + #[test] + fn test_validate_archive_member_name() { + for (testcase, ok) in &[ + // Valid cases. + ("normal.txt", true), + ("__init__.py", true), + ("fine i guess.py", true), + ("🌈.py", true), + // Invalid cases. + ("", false), + ("new\nline.py", false), + ("carriage\rreturn.py", false), + ("tab\tcharacter.py", false), + ("null\0byte.py", false), + ("control\x01code.py", false), + ("control\x02code.py", false), + ("control\x03code.py", false), + ("control\x04code.py", false), + ("backspace\x08code.py", false), + ("delete\x7fcode.py", false), + ] { + assert_eq!( + super::validate_archive_member_name(testcase).is_ok(), + *ok, + "testcase: {testcase}" + ); + } + } + + #[test] + fn test_unacceptable_filename_error_replaces_control_characters() { + let err = super::validate_archive_member_name("bad\nname").unwrap_err(); + match err { + super::Error::UnacceptableFilename { filename } => { + assert_eq!(filename, "bad�name"); + } + _ => panic!("expected UnacceptableFilename error"), + } + } +} diff --git a/crates/uv-extract/src/stream.rs b/crates/uv-extract/src/stream.rs index 5ab7e3d3e..fec33efac 100644 --- a/crates/uv-extract/src/stream.rs +++ b/crates/uv-extract/src/stream.rs @@ -9,9 +9,8 @@ use tokio_util::compat::{FuturesAsyncReadCompatExt, TokioAsyncReadCompatExt}; use tracing::{debug, warn}; use uv_distribution_filename::SourceDistExtension; -use uv_static::EnvVars; -use crate::Error; +use crate::{Error, insecure_no_validate, validate_archive_member_name}; const DEFAULT_BUF_SIZE: usize = 128 * 1024; @@ -39,21 +38,6 @@ struct ComputedEntry { compressed_size: u64, } -/// Returns `true` if ZIP validation is disabled. -fn insecure_no_validate() -> bool { - // TODO(charlie) Parse this in `EnvironmentOptions`. - let Some(value) = std::env::var_os(EnvVars::UV_INSECURE_NO_ZIP_VALIDATION) else { - return false; - }; - let Some(value) = value.to_str() else { - return false; - }; - matches!( - value.to_lowercase().as_str(), - "y" | "yes" | "t" | "true" | "on" | "1" - ) -} - /// Unpack a `.zip` archive into the target directory, without requiring `Seek`. /// /// This is useful for unzipping files as they're being downloaded. If the archive @@ -102,6 +86,13 @@ pub async fn unzip( Err(err) => return Err(err.into()), }; + // Apply sanity checks to the file names in local headers. + if let Err(e) = validate_archive_member_name(path) { + if !skip_validation { + return Err(e); + } + } + // Sanitize the file name to prevent directory traversal attacks. let Some(relpath) = enclosed_name(path) else { warn!("Skipping unsafe file name: {path}"); @@ -373,6 +364,13 @@ pub async fn unzip( Err(err) => return Err(err.into()), }; + // Apply sanity checks to the file names in CD headers. + if let Err(e) = validate_archive_member_name(path) { + if !skip_validation { + return Err(e); + } + } + // Sanitize the file name to prevent directory traversal attacks. let Some(relpath) = enclosed_name(path) else { continue; diff --git a/crates/uv-extract/src/sync.rs b/crates/uv-extract/src/sync.rs index 9674715b5..f60e416ae 100644 --- a/crates/uv-extract/src/sync.rs +++ b/crates/uv-extract/src/sync.rs @@ -1,8 +1,8 @@ use std::path::{Path, PathBuf}; use std::sync::{LazyLock, Mutex}; -use crate::Error; use crate::vendor::{CloneableSeekableReader, HasLength}; +use crate::{Error, insecure_no_validate, validate_archive_member_name}; use rayon::prelude::*; use rustc_hash::FxHashSet; use tracing::warn; @@ -18,6 +18,7 @@ pub fn unzip( let reader = std::io::BufReader::new(reader); let archive = ZipArchive::new(CloneableSeekableReader::new(reader))?; let directories = Mutex::new(FxHashSet::default()); + let skip_validation = insecure_no_validate(); // Initialize the threadpool with the user settings. LazyLock::force(&RAYON_INITIALIZE); (0..archive.len()) @@ -26,6 +27,12 @@ pub fn unzip( let mut archive = archive.clone(); let mut file = archive.by_index(file_number)?; + if let Err(e) = validate_archive_member_name(file.name()) { + if !skip_validation { + return Err(e); + } + } + // Determine the path of the file within the wheel. let Some(enclosed_name) = file.enclosed_name() else { warn!("Skipping unsafe file name: {}", file.name()); diff --git a/crates/uv/tests/it/pip_install.rs b/crates/uv/tests/it/pip_install.rs index d2b3a2a49..825de07a0 100644 --- a/crates/uv/tests/it/pip_install.rs +++ b/crates/uv/tests/it/pip_install.rs @@ -12189,6 +12189,25 @@ fn config_settings_package() -> Result<()> { Ok(()) } +#[test] +fn reject_invalid_archive_member_names() { + let context = TestContext::new("3.12").with_exclude_newer("2025-10-07T00:00:00Z"); + + uv_snapshot!(context.filters(), context.pip_install() + .arg("cbwheeldiff2==0.0.1"), @r" + success: false + exit_code: 1 + ----- stdout ----- + + ----- stderr ----- + Resolved 1 package in [TIME] + × Failed to download `cbwheeldiff2==0.0.1` + ├─▶ Failed to extract archive: cbwheeldiff2-0.0.1-py2.py3-none-any.whl + ╰─▶ Archive contains unacceptable filename: cbwheeldiff2-0.0.1.dist-info/RECORD� + " + ); +} + #[test] fn reject_invalid_streaming_zip() { let context = TestContext::new("3.12").with_exclude_newer("2025-07-10T00:00:00Z");