Enforce extension validity at parse time (#5888)

## Summary

This PR adds a `DistExtension` field to some of our distribution types,
which requires that we validate that the file type is known and
supported when parsing (rather than when attempting to unzip). It
removes a bunch of extension parsing from the code too, in favor of
doing it once upfront.

Closes https://github.com/astral-sh/uv/issues/5858.
This commit is contained in:
Charlie Marsh 2024-08-08 21:39:47 -04:00 committed by GitHub
parent ba7c09edd0
commit 21408c1f35
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
36 changed files with 803 additions and 480 deletions

View file

@ -13,6 +13,7 @@ license = { workspace = true }
workspace = true
[dependencies]
distribution-filename = { workspace = true }
pep440_rs = { workspace = true }
pep508_rs = { workspace = true }
uv-fs = { workspace = true, features = ["serde"] }

View file

@ -1,9 +1,9 @@
use distribution_filename::{DistExtension, ExtensionError};
use pep508_rs::{Pep508Url, UnnamedRequirementUrl, VerbatimUrl, VerbatimUrlError};
use std::fmt::{Display, Formatter};
use std::path::{Path, PathBuf};
use thiserror::Error;
use url::{ParseError, Url};
use pep508_rs::{Pep508Url, UnnamedRequirementUrl, VerbatimUrl, VerbatimUrlError};
use uv_git::{GitReference, GitSha, GitUrl, OidParseError};
use crate::{ArchiveInfo, DirInfo, DirectUrl, VcsInfo, VcsKind};
@ -13,17 +13,21 @@ pub enum ParsedUrlError {
#[error("Unsupported URL prefix `{prefix}` in URL: `{url}` ({message})")]
UnsupportedUrlPrefix {
prefix: String,
url: Url,
url: String,
message: &'static str,
},
#[error("Invalid path in file URL: `{0}`")]
InvalidFileUrl(Url),
InvalidFileUrl(String),
#[error("Failed to parse Git reference from URL: `{0}`")]
GitShaParse(Url, #[source] OidParseError),
GitShaParse(String, #[source] OidParseError),
#[error("Not a valid URL: `{0}`")]
UrlParse(String, #[source] ParseError),
#[error(transparent)]
VerbatimUrl(#[from] VerbatimUrlError),
#[error("Expected direct URL (`{0}`) to end in a supported file extension: {1}")]
MissingExtensionUrl(String, ExtensionError),
#[error("Expected path (`{0}`) to end in a supported file extension: {1}")]
MissingExtensionPath(PathBuf, ExtensionError),
}
#[derive(Debug, Clone, Hash, PartialEq, PartialOrd, Eq, Ord)]
@ -75,6 +79,9 @@ impl UnnamedRequirementUrl for VerbatimParsedUrl {
url: verbatim.to_url(),
install_path: verbatim.as_path()?,
lock_path: path.as_ref().to_path_buf(),
ext: DistExtension::from_path(&path).map_err(|err| {
ParsedUrlError::MissingExtensionPath(path.as_ref().to_path_buf(), err)
})?,
})
};
Ok(Self {
@ -103,6 +110,9 @@ impl UnnamedRequirementUrl for VerbatimParsedUrl {
url: verbatim.to_url(),
install_path: verbatim.as_path()?,
lock_path: path.as_ref().to_path_buf(),
ext: DistExtension::from_path(&path).map_err(|err| {
ParsedUrlError::MissingExtensionPath(path.as_ref().to_path_buf(), err)
})?,
})
};
Ok(Self {
@ -181,15 +191,23 @@ pub struct ParsedPathUrl {
/// which we use for locking. Unlike `given` on the verbatim URL all environment variables
/// are resolved, and unlike the install path, we did not yet join it on the base directory.
pub lock_path: PathBuf,
/// The file extension, e.g. `tar.gz`, `zip`, etc.
pub ext: DistExtension,
}
impl ParsedPathUrl {
/// Construct a [`ParsedPathUrl`] from a path requirement source.
pub fn from_source(install_path: PathBuf, lock_path: PathBuf, url: Url) -> Self {
pub fn from_source(
install_path: PathBuf,
lock_path: PathBuf,
ext: DistExtension,
url: Url,
) -> Self {
Self {
url,
install_path,
lock_path,
ext,
}
}
}
@ -258,7 +276,7 @@ impl ParsedGitUrl {
impl TryFrom<Url> for ParsedGitUrl {
type Error = ParsedUrlError;
/// Supports URLS with and without the `git+` prefix.
/// Supports URLs with and without the `git+` prefix.
///
/// When the URL includes a prefix, it's presumed to come from a PEP 508 requirement; when it's
/// excluded, it's presumed to come from `tool.uv.sources`.
@ -271,7 +289,7 @@ impl TryFrom<Url> for ParsedGitUrl {
.unwrap_or(url_in.as_str());
let url = Url::parse(url).map_err(|err| ParsedUrlError::UrlParse(url.to_string(), err))?;
let url = GitUrl::try_from(url)
.map_err(|err| ParsedUrlError::GitShaParse(url_in.clone(), err))?;
.map_err(|err| ParsedUrlError::GitShaParse(url_in.to_string(), err))?;
Ok(Self { url, subdirectory })
}
}
@ -286,22 +304,32 @@ impl TryFrom<Url> for ParsedGitUrl {
pub struct ParsedArchiveUrl {
pub url: Url,
pub subdirectory: Option<PathBuf>,
pub ext: DistExtension,
}
impl ParsedArchiveUrl {
/// Construct a [`ParsedArchiveUrl`] from a URL requirement source.
pub fn from_source(location: Url, subdirectory: Option<PathBuf>) -> Self {
pub fn from_source(location: Url, subdirectory: Option<PathBuf>, ext: DistExtension) -> Self {
Self {
url: location,
subdirectory,
ext,
}
}
}
impl From<Url> for ParsedArchiveUrl {
fn from(url: Url) -> Self {
impl TryFrom<Url> for ParsedArchiveUrl {
type Error = ParsedUrlError;
fn try_from(url: Url) -> Result<Self, Self::Error> {
let subdirectory = get_subdirectory(&url);
Self { url, subdirectory }
let ext = DistExtension::from_path(url.path())
.map_err(|err| ParsedUrlError::MissingExtensionUrl(url.to_string(), err))?;
Ok(Self {
url,
subdirectory,
ext,
})
}
}
@ -328,22 +356,22 @@ impl TryFrom<Url> for ParsedUrl {
"git" => Ok(Self::Git(ParsedGitUrl::try_from(url)?)),
"bzr" => Err(ParsedUrlError::UnsupportedUrlPrefix {
prefix: prefix.to_string(),
url: url.clone(),
url: url.to_string(),
message: "Bazaar is not supported",
}),
"hg" => Err(ParsedUrlError::UnsupportedUrlPrefix {
prefix: prefix.to_string(),
url: url.clone(),
url: url.to_string(),
message: "Mercurial is not supported",
}),
"svn" => Err(ParsedUrlError::UnsupportedUrlPrefix {
prefix: prefix.to_string(),
url: url.clone(),
url: url.to_string(),
message: "Subversion is not supported",
}),
_ => Err(ParsedUrlError::UnsupportedUrlPrefix {
prefix: prefix.to_string(),
url: url.clone(),
url: url.to_string(),
message: "Unknown scheme",
}),
}
@ -355,7 +383,7 @@ impl TryFrom<Url> for ParsedUrl {
} else if url.scheme().eq_ignore_ascii_case("file") {
let path = url
.to_file_path()
.map_err(|()| ParsedUrlError::InvalidFileUrl(url.clone()))?;
.map_err(|()| ParsedUrlError::InvalidFileUrl(url.to_string()))?;
let is_dir = if let Ok(metadata) = path.metadata() {
metadata.is_dir()
} else {
@ -371,12 +399,14 @@ impl TryFrom<Url> for ParsedUrl {
} else {
Ok(Self::Path(ParsedPathUrl {
url,
ext: DistExtension::from_path(&path)
.map_err(|err| ParsedUrlError::MissingExtensionPath(path.clone(), err))?,
install_path: path.clone(),
lock_path: path,
}))
}
} else {
Ok(Self::Archive(ParsedArchiveUrl::from(url)))
Ok(Self::Archive(ParsedArchiveUrl::try_from(url)?))
}
}
}

View file

@ -2,17 +2,18 @@ use std::fmt::{Display, Formatter};
use std::path::{Path, PathBuf};
use std::str::FromStr;
use thiserror::Error;
use url::Url;
use distribution_filename::DistExtension;
use pep440_rs::VersionSpecifiers;
use pep508_rs::{MarkerEnvironment, MarkerTree, RequirementOrigin, VerbatimUrl, VersionOrUrl};
use thiserror::Error;
use url::Url;
use uv_fs::PortablePathBuf;
use uv_git::{GitReference, GitSha, GitUrl};
use uv_normalize::{ExtraName, PackageName};
use crate::{
ParsedArchiveUrl, ParsedDirectoryUrl, ParsedGitUrl, ParsedPathUrl, ParsedUrl, VerbatimParsedUrl,
ParsedArchiveUrl, ParsedDirectoryUrl, ParsedGitUrl, ParsedPathUrl, ParsedUrl, ParsedUrlError,
VerbatimParsedUrl,
};
#[derive(Debug, Error)]
@ -20,6 +21,8 @@ pub enum RequirementError {
#[error(transparent)]
VerbatimUrlError(#[from] pep508_rs::VerbatimUrlError),
#[error(transparent)]
ParsedUrlError(#[from] ParsedUrlError),
#[error(transparent)]
UrlParseError(#[from] url::ParseError),
#[error(transparent)]
OidParseError(#[from] uv_git::OidParseError),
@ -95,13 +98,15 @@ impl From<Requirement> for pep508_rs::Requirement<VerbatimParsedUrl> {
Some(VersionOrUrl::VersionSpecifier(specifier))
}
RequirementSource::Url {
subdirectory,
location,
subdirectory,
ext,
url,
} => Some(VersionOrUrl::Url(VerbatimParsedUrl {
parsed_url: ParsedUrl::Archive(ParsedArchiveUrl {
url: location,
subdirectory,
ext,
}),
verbatim: url,
})),
@ -128,12 +133,14 @@ impl From<Requirement> for pep508_rs::Requirement<VerbatimParsedUrl> {
RequirementSource::Path {
install_path,
lock_path,
ext,
url,
} => Some(VersionOrUrl::Url(VerbatimParsedUrl {
parsed_url: ParsedUrl::Path(ParsedPathUrl {
url: url.to_url(),
install_path,
lock_path,
ext,
}),
verbatim: url,
})),
@ -259,11 +266,13 @@ pub enum RequirementSource {
/// e.g. `foo @ https://example.org/foo-1.0-py3-none-any.whl`, or a source distribution,
/// e.g.`foo @ https://example.org/foo-1.0.zip`.
Url {
/// The remote location of the archive file, without subdirectory fragment.
location: Url,
/// For source distributions, the path to the distribution if it is not in the archive
/// root.
subdirectory: Option<PathBuf>,
/// The remote location of the archive file, without subdirectory fragment.
location: Url,
/// The file extension, e.g. `tar.gz`, `zip`, etc.
ext: DistExtension,
/// The PEP 508 style URL in the format
/// `<scheme>://<domain>/<path>#subdirectory=<subdirectory>`.
url: VerbatimUrl,
@ -292,6 +301,8 @@ pub enum RequirementSource {
/// which we use for locking. Unlike `given` on the verbatim URL all environment variables
/// are resolved, and unlike the install path, we did not yet join it on the base directory.
lock_path: PathBuf,
/// The file extension, e.g. `tar.gz`, `zip`, etc.
ext: DistExtension,
/// The PEP 508 style URL in the format
/// `file:///<path>#subdirectory=<subdirectory>`.
url: VerbatimUrl,
@ -321,6 +332,7 @@ impl RequirementSource {
ParsedUrl::Path(local_file) => RequirementSource::Path {
install_path: local_file.install_path.clone(),
lock_path: local_file.lock_path.clone(),
ext: local_file.ext,
url,
},
ParsedUrl::Directory(directory) => RequirementSource::Directory {
@ -340,6 +352,7 @@ impl RequirementSource {
url,
location: archive.url,
subdirectory: archive.subdirectory,
ext: archive.ext,
},
}
}
@ -347,7 +360,7 @@ impl RequirementSource {
/// Construct a [`RequirementSource`] for a URL source, given a URL parsed into components.
pub fn from_verbatim_parsed_url(parsed_url: ParsedUrl) -> Self {
let verbatim_url = VerbatimUrl::from_url(Url::from(parsed_url.clone()));
RequirementSource::from_parsed_url(parsed_url, verbatim_url)
Self::from_parsed_url(parsed_url, verbatim_url)
}
/// Convert the source to a [`VerbatimParsedUrl`], if it's a URL source.
@ -355,24 +368,28 @@ impl RequirementSource {
match &self {
Self::Registry { .. } => None,
Self::Url {
subdirectory,
location,
subdirectory,
ext,
url,
} => Some(VerbatimParsedUrl {
parsed_url: ParsedUrl::Archive(ParsedArchiveUrl::from_source(
location.clone(),
subdirectory.clone(),
*ext,
)),
verbatim: url.clone(),
}),
Self::Path {
install_path,
lock_path,
ext,
url,
} => Some(VerbatimParsedUrl {
parsed_url: ParsedUrl::Path(ParsedPathUrl::from_source(
install_path.clone(),
lock_path.clone(),
*ext,
url.to_url(),
)),
verbatim: url.clone(),
@ -504,6 +521,7 @@ impl From<RequirementSource> for RequirementSourceWire {
RequirementSource::Url {
subdirectory,
location,
ext: _,
url: _,
} => Self::Direct {
url: location,
@ -564,6 +582,7 @@ impl From<RequirementSource> for RequirementSourceWire {
RequirementSource::Path {
install_path,
lock_path: _,
ext: _,
url: _,
} => Self::Path {
path: PortablePathBuf::from(install_path),
@ -626,13 +645,17 @@ impl TryFrom<RequirementSourceWire> for RequirementSource {
}
RequirementSourceWire::Direct { url, subdirectory } => Ok(Self::Url {
url: VerbatimUrl::from_url(url.clone()),
subdirectory: subdirectory.map(PathBuf::from),
location: url.clone(),
subdirectory: subdirectory.map(PathBuf::from),
ext: DistExtension::from_path(url.path())
.map_err(|err| ParsedUrlError::MissingExtensionUrl(url.to_string(), err))?,
}),
RequirementSourceWire::Path { path } => {
let path = PathBuf::from(path);
Ok(Self::Path {
url: VerbatimUrl::from_path(path.as_path())?,
ext: DistExtension::from_path(path.as_path())
.map_err(|err| ParsedUrlError::MissingExtensionPath(path.clone(), err))?,
install_path: path.clone(),
lock_path: path,
})