Merge commit from fork

* feat: reject ZIP archives with improbable filenames

Signed-off-by: William Woodruff <william@astral.sh>

* use my PR for async_zip temporarily

Signed-off-by: William Woodruff <william@astral.sh>

* update snapshot

Signed-off-by: William Woodruff <william@astral.sh>

* two more tests

Signed-off-by: William Woodruff <william@astral.sh>

* update rev

Signed-off-by: William Woodruff <william@astral.sh>

---------

Signed-off-by: William Woodruff <william@astral.sh>
This commit is contained in:
William Woodruff 2025-10-29 11:11:06 -04:00 committed by GitHub
parent 41cd3d1926
commit da659fee48
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 143 additions and 20 deletions

View file

@ -29,6 +29,7 @@ fs-err = { workspace = true, features = ["tokio"] }
futures = { workspace = true }
md-5 = { workspace = true }
rayon = { workspace = true }
regex = { workspace = true }
reqwest = { workspace = true }
rustc-hash = { workspace = true }
sha2 = { workspace = true }

View file

@ -89,6 +89,10 @@ pub enum Error {
ExtensibleData,
#[error("ZIP file end-of-central-directory record contains multiple entries with the same path, but conflicting modes: {}", path.display())]
DuplicateExecutableFileHeader { path: PathBuf },
#[error("Archive contains a file with an empty filename")]
EmptyFilename,
#[error("Archive contains unacceptable filename: {filename}")]
UnacceptableFilename { filename: String },
}
impl Error {

View file

@ -1,8 +1,101 @@
use std::sync::LazyLock;
pub use error::Error;
use regex::Regex;
pub use sync::*;
use uv_static::EnvVars;
mod error;
pub mod hash;
pub mod stream;
mod sync;
mod vendor;
static CONTROL_CHARACTERS_RE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\p{C}").unwrap());
static REPLACEMENT_CHARACTER: &'static str = "\u{FFFD}";
/// Validate that a given filename (e.g. reported by a ZIP archive's
/// local file entries or central directory entries) is "safe" to use.
///
/// "Safe" in this context doesn't refer to directory traversal
/// risk, but whether we believe that other ZIP implementations
/// handle the name correctly and consistently.
///
/// Specifically, we want to avoid names that:
///
/// - Contain *any* non-printable characters
/// - Are empty
///
/// In the future, we may also want to check for names that contain
/// leading/trailing whitespace, or names that are exceedingly long.
pub(crate) fn validate_archive_member_name(name: &str) -> Result<(), Error> {
if name.is_empty() {
return Err(Error::EmptyFilename);
}
match CONTROL_CHARACTERS_RE.replace_all(name, REPLACEMENT_CHARACTER) {
// No replacements mean no control characters.
std::borrow::Cow::Borrowed(_) => Ok(()),
std::borrow::Cow::Owned(sanitized) => Err(Error::UnacceptableFilename {
filename: sanitized,
}),
}
}
/// Returns `true` if ZIP validation is disabled.
pub(crate) fn insecure_no_validate() -> bool {
// TODO(charlie) Parse this in `EnvironmentOptions`.
let Some(value) = std::env::var_os(EnvVars::UV_INSECURE_NO_ZIP_VALIDATION) else {
return false;
};
let Some(value) = value.to_str() else {
return false;
};
matches!(
value.to_lowercase().as_str(),
"y" | "yes" | "t" | "true" | "on" | "1"
)
}
#[cfg(test)]
mod tests {
#[test]
fn test_validate_archive_member_name() {
for (testcase, ok) in &[
// Valid cases.
("normal.txt", true),
("__init__.py", true),
("fine i guess.py", true),
("🌈.py", true),
// Invalid cases.
("", false),
("new\nline.py", false),
("carriage\rreturn.py", false),
("tab\tcharacter.py", false),
("null\0byte.py", false),
("control\x01code.py", false),
("control\x02code.py", false),
("control\x03code.py", false),
("control\x04code.py", false),
("backspace\x08code.py", false),
("delete\x7fcode.py", false),
] {
assert_eq!(
super::validate_archive_member_name(testcase).is_ok(),
*ok,
"testcase: {testcase}"
);
}
}
#[test]
fn test_unacceptable_filename_error_replaces_control_characters() {
let err = super::validate_archive_member_name("bad\nname").unwrap_err();
match err {
super::Error::UnacceptableFilename { filename } => {
assert_eq!(filename, "bad<EFBFBD>name");
}
_ => panic!("expected UnacceptableFilename error"),
}
}
}

View file

@ -9,9 +9,8 @@ use tokio_util::compat::{FuturesAsyncReadCompatExt, TokioAsyncReadCompatExt};
use tracing::{debug, warn};
use uv_distribution_filename::SourceDistExtension;
use uv_static::EnvVars;
use crate::Error;
use crate::{Error, insecure_no_validate, validate_archive_member_name};
const DEFAULT_BUF_SIZE: usize = 128 * 1024;
@ -39,21 +38,6 @@ struct ComputedEntry {
compressed_size: u64,
}
/// Returns `true` if ZIP validation is disabled.
fn insecure_no_validate() -> bool {
// TODO(charlie) Parse this in `EnvironmentOptions`.
let Some(value) = std::env::var_os(EnvVars::UV_INSECURE_NO_ZIP_VALIDATION) else {
return false;
};
let Some(value) = value.to_str() else {
return false;
};
matches!(
value.to_lowercase().as_str(),
"y" | "yes" | "t" | "true" | "on" | "1"
)
}
/// Unpack a `.zip` archive into the target directory, without requiring `Seek`.
///
/// This is useful for unzipping files as they're being downloaded. If the archive
@ -102,6 +86,13 @@ pub async fn unzip<R: tokio::io::AsyncRead + Unpin>(
Err(err) => return Err(err.into()),
};
// Apply sanity checks to the file names in local headers.
if let Err(e) = validate_archive_member_name(path) {
if !skip_validation {
return Err(e);
}
}
// Sanitize the file name to prevent directory traversal attacks.
let Some(relpath) = enclosed_name(path) else {
warn!("Skipping unsafe file name: {path}");
@ -373,6 +364,13 @@ pub async fn unzip<R: tokio::io::AsyncRead + Unpin>(
Err(err) => return Err(err.into()),
};
// Apply sanity checks to the file names in CD headers.
if let Err(e) = validate_archive_member_name(path) {
if !skip_validation {
return Err(e);
}
}
// Sanitize the file name to prevent directory traversal attacks.
let Some(relpath) = enclosed_name(path) else {
continue;

View file

@ -1,8 +1,8 @@
use std::path::{Path, PathBuf};
use std::sync::{LazyLock, Mutex};
use crate::Error;
use crate::vendor::{CloneableSeekableReader, HasLength};
use crate::{Error, insecure_no_validate, validate_archive_member_name};
use rayon::prelude::*;
use rustc_hash::FxHashSet;
use tracing::warn;
@ -18,6 +18,7 @@ pub fn unzip<R: Send + std::io::Read + std::io::Seek + HasLength>(
let reader = std::io::BufReader::new(reader);
let archive = ZipArchive::new(CloneableSeekableReader::new(reader))?;
let directories = Mutex::new(FxHashSet::default());
let skip_validation = insecure_no_validate();
// Initialize the threadpool with the user settings.
LazyLock::force(&RAYON_INITIALIZE);
(0..archive.len())
@ -26,6 +27,12 @@ pub fn unzip<R: Send + std::io::Read + std::io::Seek + HasLength>(
let mut archive = archive.clone();
let mut file = archive.by_index(file_number)?;
if let Err(e) = validate_archive_member_name(file.name()) {
if !skip_validation {
return Err(e);
}
}
// Determine the path of the file within the wheel.
let Some(enclosed_name) = file.enclosed_name() else {
warn!("Skipping unsafe file name: {}", file.name());

View file

@ -12189,6 +12189,25 @@ fn config_settings_package() -> Result<()> {
Ok(())
}
#[test]
fn reject_invalid_archive_member_names() {
let context = TestContext::new("3.12").with_exclude_newer("2025-10-07T00:00:00Z");
uv_snapshot!(context.filters(), context.pip_install()
.arg("cbwheeldiff2==0.0.1"), @r"
success: false
exit_code: 1
----- stdout -----
----- stderr -----
Resolved 1 package in [TIME]
× Failed to download `cbwheeldiff2==0.0.1`
Failed to extract archive: cbwheeldiff2-0.0.1-py2.py3-none-any.whl
Archive contains unacceptable filename: cbwheeldiff2-0.0.1.dist-info/RECORD<EFBFBD>
"
);
}
#[test]
fn reject_invalid_streaming_zip() {
let context = TestContext::new("3.12").with_exclude_newer("2025-07-10T00:00:00Z");