Enforce extension validity at parse time (#5888)

## Summary

This PR adds a `DistExtension` field to some of our distribution types,
which requires that we validate that the file type is known and
supported when parsing (rather than when attempting to unzip). It
removes a bunch of extension parsing from the code too, in favor of
doing it once upfront.

Closes https://github.com/astral-sh/uv/issues/5858.
This commit is contained in:
Charlie Marsh 2024-08-08 21:39:47 -04:00 committed by GitHub
parent ba7c09edd0
commit 21408c1f35
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
36 changed files with 803 additions and 480 deletions

View file

@ -5,15 +5,15 @@ use std::pin::Pin;
use std::str::FromStr;
use std::task::{Context, Poll};
use distribution_filename::{ExtensionError, SourceDistExtension};
use futures::TryStreamExt;
use owo_colors::OwoColorize;
use pypi_types::{HashAlgorithm, HashDigest};
use thiserror::Error;
use tokio::io::{AsyncRead, ReadBuf};
use tokio_util::compat::FuturesAsyncReadCompatExt;
use tracing::{debug, instrument};
use url::Url;
use pypi_types::{HashAlgorithm, HashDigest};
use uv_cache::Cache;
use uv_client::WrappedReqwestError;
use uv_extract::hash::Hasher;
@ -32,6 +32,8 @@ pub enum Error {
Io(#[from] io::Error),
#[error(transparent)]
ImplementationError(#[from] ImplementationError),
#[error("Expected download URL (`{0}`) to end in a supported file extension: {1}")]
MissingExtension(String, ExtensionError),
#[error("Invalid Python version: {0}")]
InvalidPythonVersion(String),
#[error("Invalid request key (too many parts): {0}")]
@ -423,6 +425,8 @@ impl ManagedPythonDownload {
}
let filename = url.path_segments().unwrap().last().unwrap();
let ext = SourceDistExtension::from_path(filename)
.map_err(|err| Error::MissingExtension(url.to_string(), err))?;
let response = client.get(url.clone()).send().await?;
// Ensure the request was successful.
@ -458,12 +462,12 @@ impl ManagedPythonDownload {
match progress {
Some((&reporter, progress)) => {
let mut reader = ProgressReader::new(&mut hasher, progress, reporter);
uv_extract::stream::archive(&mut reader, filename, temp_dir.path())
uv_extract::stream::archive(&mut reader, ext, temp_dir.path())
.await
.map_err(|err| Error::ExtractError(filename.to_string(), err))?;
}
None => {
uv_extract::stream::archive(&mut hasher, filename, temp_dir.path())
uv_extract::stream::archive(&mut hasher, ext, temp_dir.path())
.await
.map_err(|err| Error::ExtractError(filename.to_string(), err))?;
}