mirror of
https://github.com/astral-sh/uv.git
synced 2025-11-25 05:33:43 +00:00
Merge commit from fork
* feat: reject ZIP archives with improbable filenames Signed-off-by: William Woodruff <william@astral.sh> * use my PR for async_zip temporarily Signed-off-by: William Woodruff <william@astral.sh> * update snapshot Signed-off-by: William Woodruff <william@astral.sh> * two more tests Signed-off-by: William Woodruff <william@astral.sh> * update rev Signed-off-by: William Woodruff <william@astral.sh> --------- Signed-off-by: William Woodruff <william@astral.sh>
This commit is contained in:
parent
41cd3d1926
commit
da659fee48
8 changed files with 143 additions and 20 deletions
|
|
@ -29,6 +29,7 @@ fs-err = { workspace = true, features = ["tokio"] }
|
|||
futures = { workspace = true }
|
||||
md-5 = { workspace = true }
|
||||
rayon = { workspace = true }
|
||||
regex = { workspace = true }
|
||||
reqwest = { workspace = true }
|
||||
rustc-hash = { workspace = true }
|
||||
sha2 = { workspace = true }
|
||||
|
|
|
|||
|
|
@ -89,6 +89,10 @@ pub enum Error {
|
|||
ExtensibleData,
|
||||
#[error("ZIP file end-of-central-directory record contains multiple entries with the same path, but conflicting modes: {}", path.display())]
|
||||
DuplicateExecutableFileHeader { path: PathBuf },
|
||||
#[error("Archive contains a file with an empty filename")]
|
||||
EmptyFilename,
|
||||
#[error("Archive contains unacceptable filename: {filename}")]
|
||||
UnacceptableFilename { filename: String },
|
||||
}
|
||||
|
||||
impl Error {
|
||||
|
|
|
|||
|
|
@ -1,8 +1,101 @@
|
|||
use std::sync::LazyLock;
|
||||
|
||||
pub use error::Error;
|
||||
use regex::Regex;
|
||||
pub use sync::*;
|
||||
use uv_static::EnvVars;
|
||||
|
||||
mod error;
|
||||
pub mod hash;
|
||||
pub mod stream;
|
||||
mod sync;
|
||||
mod vendor;
|
||||
|
||||
static CONTROL_CHARACTERS_RE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\p{C}").unwrap());
|
||||
static REPLACEMENT_CHARACTER: &'static str = "\u{FFFD}";
|
||||
|
||||
/// Validate that a given filename (e.g. reported by a ZIP archive's
|
||||
/// local file entries or central directory entries) is "safe" to use.
|
||||
///
|
||||
/// "Safe" in this context doesn't refer to directory traversal
|
||||
/// risk, but whether we believe that other ZIP implementations
|
||||
/// handle the name correctly and consistently.
|
||||
///
|
||||
/// Specifically, we want to avoid names that:
|
||||
///
|
||||
/// - Contain *any* non-printable characters
|
||||
/// - Are empty
|
||||
///
|
||||
/// In the future, we may also want to check for names that contain
|
||||
/// leading/trailing whitespace, or names that are exceedingly long.
|
||||
pub(crate) fn validate_archive_member_name(name: &str) -> Result<(), Error> {
|
||||
if name.is_empty() {
|
||||
return Err(Error::EmptyFilename);
|
||||
}
|
||||
|
||||
match CONTROL_CHARACTERS_RE.replace_all(name, REPLACEMENT_CHARACTER) {
|
||||
// No replacements mean no control characters.
|
||||
std::borrow::Cow::Borrowed(_) => Ok(()),
|
||||
std::borrow::Cow::Owned(sanitized) => Err(Error::UnacceptableFilename {
|
||||
filename: sanitized,
|
||||
}),
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns `true` if ZIP validation is disabled.
|
||||
pub(crate) fn insecure_no_validate() -> bool {
|
||||
// TODO(charlie) Parse this in `EnvironmentOptions`.
|
||||
let Some(value) = std::env::var_os(EnvVars::UV_INSECURE_NO_ZIP_VALIDATION) else {
|
||||
return false;
|
||||
};
|
||||
let Some(value) = value.to_str() else {
|
||||
return false;
|
||||
};
|
||||
matches!(
|
||||
value.to_lowercase().as_str(),
|
||||
"y" | "yes" | "t" | "true" | "on" | "1"
|
||||
)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
#[test]
|
||||
fn test_validate_archive_member_name() {
|
||||
for (testcase, ok) in &[
|
||||
// Valid cases.
|
||||
("normal.txt", true),
|
||||
("__init__.py", true),
|
||||
("fine i guess.py", true),
|
||||
("🌈.py", true),
|
||||
// Invalid cases.
|
||||
("", false),
|
||||
("new\nline.py", false),
|
||||
("carriage\rreturn.py", false),
|
||||
("tab\tcharacter.py", false),
|
||||
("null\0byte.py", false),
|
||||
("control\x01code.py", false),
|
||||
("control\x02code.py", false),
|
||||
("control\x03code.py", false),
|
||||
("control\x04code.py", false),
|
||||
("backspace\x08code.py", false),
|
||||
("delete\x7fcode.py", false),
|
||||
] {
|
||||
assert_eq!(
|
||||
super::validate_archive_member_name(testcase).is_ok(),
|
||||
*ok,
|
||||
"testcase: {testcase}"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_unacceptable_filename_error_replaces_control_characters() {
|
||||
let err = super::validate_archive_member_name("bad\nname").unwrap_err();
|
||||
match err {
|
||||
super::Error::UnacceptableFilename { filename } => {
|
||||
assert_eq!(filename, "bad<EFBFBD>name");
|
||||
}
|
||||
_ => panic!("expected UnacceptableFilename error"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -9,9 +9,8 @@ use tokio_util::compat::{FuturesAsyncReadCompatExt, TokioAsyncReadCompatExt};
|
|||
use tracing::{debug, warn};
|
||||
|
||||
use uv_distribution_filename::SourceDistExtension;
|
||||
use uv_static::EnvVars;
|
||||
|
||||
use crate::Error;
|
||||
use crate::{Error, insecure_no_validate, validate_archive_member_name};
|
||||
|
||||
const DEFAULT_BUF_SIZE: usize = 128 * 1024;
|
||||
|
||||
|
|
@ -39,21 +38,6 @@ struct ComputedEntry {
|
|||
compressed_size: u64,
|
||||
}
|
||||
|
||||
/// Returns `true` if ZIP validation is disabled.
|
||||
fn insecure_no_validate() -> bool {
|
||||
// TODO(charlie) Parse this in `EnvironmentOptions`.
|
||||
let Some(value) = std::env::var_os(EnvVars::UV_INSECURE_NO_ZIP_VALIDATION) else {
|
||||
return false;
|
||||
};
|
||||
let Some(value) = value.to_str() else {
|
||||
return false;
|
||||
};
|
||||
matches!(
|
||||
value.to_lowercase().as_str(),
|
||||
"y" | "yes" | "t" | "true" | "on" | "1"
|
||||
)
|
||||
}
|
||||
|
||||
/// Unpack a `.zip` archive into the target directory, without requiring `Seek`.
|
||||
///
|
||||
/// This is useful for unzipping files as they're being downloaded. If the archive
|
||||
|
|
@ -102,6 +86,13 @@ pub async fn unzip<R: tokio::io::AsyncRead + Unpin>(
|
|||
Err(err) => return Err(err.into()),
|
||||
};
|
||||
|
||||
// Apply sanity checks to the file names in local headers.
|
||||
if let Err(e) = validate_archive_member_name(path) {
|
||||
if !skip_validation {
|
||||
return Err(e);
|
||||
}
|
||||
}
|
||||
|
||||
// Sanitize the file name to prevent directory traversal attacks.
|
||||
let Some(relpath) = enclosed_name(path) else {
|
||||
warn!("Skipping unsafe file name: {path}");
|
||||
|
|
@ -373,6 +364,13 @@ pub async fn unzip<R: tokio::io::AsyncRead + Unpin>(
|
|||
Err(err) => return Err(err.into()),
|
||||
};
|
||||
|
||||
// Apply sanity checks to the file names in CD headers.
|
||||
if let Err(e) = validate_archive_member_name(path) {
|
||||
if !skip_validation {
|
||||
return Err(e);
|
||||
}
|
||||
}
|
||||
|
||||
// Sanitize the file name to prevent directory traversal attacks.
|
||||
let Some(relpath) = enclosed_name(path) else {
|
||||
continue;
|
||||
|
|
|
|||
|
|
@ -1,8 +1,8 @@
|
|||
use std::path::{Path, PathBuf};
|
||||
use std::sync::{LazyLock, Mutex};
|
||||
|
||||
use crate::Error;
|
||||
use crate::vendor::{CloneableSeekableReader, HasLength};
|
||||
use crate::{Error, insecure_no_validate, validate_archive_member_name};
|
||||
use rayon::prelude::*;
|
||||
use rustc_hash::FxHashSet;
|
||||
use tracing::warn;
|
||||
|
|
@ -18,6 +18,7 @@ pub fn unzip<R: Send + std::io::Read + std::io::Seek + HasLength>(
|
|||
let reader = std::io::BufReader::new(reader);
|
||||
let archive = ZipArchive::new(CloneableSeekableReader::new(reader))?;
|
||||
let directories = Mutex::new(FxHashSet::default());
|
||||
let skip_validation = insecure_no_validate();
|
||||
// Initialize the threadpool with the user settings.
|
||||
LazyLock::force(&RAYON_INITIALIZE);
|
||||
(0..archive.len())
|
||||
|
|
@ -26,6 +27,12 @@ pub fn unzip<R: Send + std::io::Read + std::io::Seek + HasLength>(
|
|||
let mut archive = archive.clone();
|
||||
let mut file = archive.by_index(file_number)?;
|
||||
|
||||
if let Err(e) = validate_archive_member_name(file.name()) {
|
||||
if !skip_validation {
|
||||
return Err(e);
|
||||
}
|
||||
}
|
||||
|
||||
// Determine the path of the file within the wheel.
|
||||
let Some(enclosed_name) = file.enclosed_name() else {
|
||||
warn!("Skipping unsafe file name: {}", file.name());
|
||||
|
|
|
|||
|
|
@ -12189,6 +12189,25 @@ fn config_settings_package() -> Result<()> {
|
|||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn reject_invalid_archive_member_names() {
|
||||
let context = TestContext::new("3.12").with_exclude_newer("2025-10-07T00:00:00Z");
|
||||
|
||||
uv_snapshot!(context.filters(), context.pip_install()
|
||||
.arg("cbwheeldiff2==0.0.1"), @r"
|
||||
success: false
|
||||
exit_code: 1
|
||||
----- stdout -----
|
||||
|
||||
----- stderr -----
|
||||
Resolved 1 package in [TIME]
|
||||
× Failed to download `cbwheeldiff2==0.0.1`
|
||||
├─▶ Failed to extract archive: cbwheeldiff2-0.0.1-py2.py3-none-any.whl
|
||||
╰─▶ Archive contains unacceptable filename: cbwheeldiff2-0.0.1.dist-info/RECORD<EFBFBD>
|
||||
"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn reject_invalid_streaming_zip() {
|
||||
let context = TestContext::new("3.12").with_exclude_newer("2025-07-10T00:00:00Z");
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue