Use VerbatimParsedUrl in pep508_rs (#3758)

When parsing requirements from any source, directly parse the url parts
(and reject unsupported urls) instead of parsing url parts at a later
stage. This removes a bunch of error branches and concludes the work
parsing url parts once and passing them around everywhere.

Many usages of the assembled `VerbatimUrl` remain, but these can be
removed incrementally.

Please review commit-by-commit.
This commit is contained in:
konsti 2024-05-23 21:52:47 +02:00 committed by GitHub
parent 0d2f3fc4e4
commit 4db468e27f
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
56 changed files with 877 additions and 656 deletions

View file

@ -25,7 +25,6 @@ uv-normalize = { workspace = true }
anyhow = { workspace = true }
fs-err = { workspace = true }
git2 = { workspace = true }
indexmap = { workspace = true }
itertools = { workspace = true }
once_cell = { workspace = true }

View file

@ -105,9 +105,9 @@ impl std::fmt::Display for DirectSourceUrl<'_> {
pub struct GitSourceUrl<'a> {
/// The URL with the revision and subdirectory fragment.
pub url: &'a VerbatimUrl,
pub git: &'a GitUrl,
/// The URL without the revision and subdirectory fragment.
pub git: Cow<'a, GitUrl>,
pub subdirectory: Option<Cow<'a, Path>>,
pub subdirectory: Option<&'a Path>,
}
impl std::fmt::Display for GitSourceUrl<'_> {
@ -120,8 +120,8 @@ impl<'a> From<&'a GitSourceDist> for GitSourceUrl<'a> {
fn from(dist: &'a GitSourceDist) -> Self {
Self {
url: &dist.url,
git: Cow::Borrowed(&dist.git),
subdirectory: dist.subdirectory.as_deref().map(Cow::Borrowed),
git: &dist.git,
subdirectory: dist.subdirectory.as_deref(),
}
}
}

View file

@ -4,12 +4,12 @@ use anyhow::{anyhow, Result};
use distribution_filename::WheelFilename;
use pep508_rs::VerbatimUrl;
use pypi_types::HashDigest;
use pypi_types::{HashDigest, ParsedPathUrl};
use uv_normalize::PackageName;
use crate::{
BuiltDist, Dist, DistributionMetadata, Hashed, InstalledMetadata, InstalledVersion, Name,
ParsedPathUrl, ParsedUrl, SourceDist, VersionOrUrlRef,
ParsedUrl, SourceDist, VersionOrUrlRef,
};
/// A built distribution (wheel) that exists in the local cache.

View file

@ -42,6 +42,7 @@ use url::Url;
use distribution_filename::WheelFilename;
use pep440_rs::Version;
use pep508_rs::{Pep508Url, VerbatimUrl};
use pypi_types::{ParsedUrl, VerbatimParsedUrl};
use uv_git::GitUrl;
use uv_normalize::PackageName;
@ -57,7 +58,6 @@ pub use crate::hash::*;
pub use crate::id::*;
pub use crate::index_url::*;
pub use crate::installed::*;
pub use crate::parsed_url::*;
pub use crate::prioritized_distribution::*;
pub use crate::requirement::*;
pub use crate::resolution::*;
@ -77,7 +77,6 @@ mod hash;
mod id;
mod index_url;
mod installed;
mod parsed_url;
mod prioritized_distribution;
mod requirement;
mod resolution;

View file

@ -1,311 +0,0 @@
use std::path::PathBuf;
use anyhow::{Error, Result};
use thiserror::Error;
use url::Url;
use pep508_rs::VerbatimUrl;
use uv_git::{GitSha, GitUrl};
#[derive(Debug, Error)]
pub enum ParsedUrlError {
#[error("Unsupported URL prefix `{prefix}` in URL: `{url}` ({message})")]
UnsupportedUrlPrefix {
prefix: String,
url: Url,
message: &'static str,
},
#[error("Invalid path in file URL: `{0}`")]
InvalidFileUrl(Url),
#[error("Failed to parse Git reference from URL: `{0}`")]
GitShaParse(Url, #[source] git2::Error),
#[error("Not a valid URL: `{0}`")]
UrlParse(String, #[source] url::ParseError),
}
#[derive(Debug, Clone, Hash, PartialEq, PartialOrd, Eq, Ord)]
pub struct VerbatimParsedUrl {
pub parsed_url: ParsedUrl,
pub verbatim: VerbatimUrl,
}
/// We support three types of URLs for distributions:
/// * The path to a file or directory (`file://`)
/// * A Git repository (`git+https://` or `git+ssh://`), optionally with a subdirectory and/or
/// string to checkout.
/// * A remote archive (`https://`), optional with a subdirectory (source dist only).
///
/// A URL in a requirement `foo @ <url>` must be one of the above.
#[derive(Debug, Clone, Eq, PartialEq, PartialOrd, Hash, Ord)]
pub enum ParsedUrl {
/// The direct URL is a path to a local directory or file.
Path(ParsedPathUrl),
/// The direct URL is path to a Git repository.
Git(ParsedGitUrl),
/// The direct URL is a URL to a source archive (e.g., a `.tar.gz` file) or built archive
/// (i.e., a `.whl` file).
Archive(ParsedArchiveUrl),
}
/// A local path url
///
/// Examples:
/// * `file:///home/ferris/my_project`
#[derive(Debug, Clone, Eq, PartialEq, PartialOrd, Hash, Ord)]
pub struct ParsedPathUrl {
pub url: Url,
pub path: PathBuf,
pub editable: bool,
}
/// A Git repository URL.
///
/// Examples:
/// * `git+https://git.example.com/MyProject.git`
/// * `git+https://git.example.com/MyProject.git@v1.0#egg=pkg&subdirectory=pkg_dir`
#[derive(Debug, Clone, Eq, PartialEq, PartialOrd, Hash, Ord)]
pub struct ParsedGitUrl {
pub url: GitUrl,
pub subdirectory: Option<PathBuf>,
}
impl TryFrom<Url> for ParsedGitUrl {
type Error = ParsedUrlError;
/// Supports URLS with and without the `git+` prefix.
///
/// When the URL includes a prefix, it's presumed to come from a PEP 508 requirement; when it's
/// excluded, it's presumed to come from `tool.uv.sources`.
fn try_from(url_in: Url) -> Result<Self, Self::Error> {
let subdirectory = get_subdirectory(&url_in);
let url = url_in
.as_str()
.strip_prefix("git+")
.unwrap_or(url_in.as_str());
let url = Url::parse(url).map_err(|err| ParsedUrlError::UrlParse(url.to_string(), err))?;
let url = GitUrl::try_from(url)
.map_err(|err| ParsedUrlError::GitShaParse(url_in.clone(), err))?;
Ok(Self { url, subdirectory })
}
}
/// A URL to a source or built archive.
///
/// Examples:
/// * A built distribution: `https://files.pythonhosted.org/packages/62/06/d5604a70d160f6a6ca5fd2ba25597c24abd5c5ca5f437263d177ac242308/tqdm-4.66.1-py2.py3-none-any.whl`
/// * A source distribution with a valid name: `https://files.pythonhosted.org/packages/62/06/d5604a70d160f6a6ca5fd2ba25597c24abd5c5ca5f437263d177ac242308/tqdm-4.66.1.tar.gz`
/// * A source dist with a recognizable extension but invalid name: `https://github.com/foo-labs/foo/archive/master.zip#egg=pkg&subdirectory=packages/bar`
#[derive(Debug, Clone, Eq, PartialEq, Hash, PartialOrd, Ord)]
pub struct ParsedArchiveUrl {
pub url: Url,
pub subdirectory: Option<PathBuf>,
}
impl From<Url> for ParsedArchiveUrl {
fn from(url: Url) -> Self {
let subdirectory = get_subdirectory(&url);
Self { url, subdirectory }
}
}
/// If the URL points to a subdirectory, extract it, as in (git):
/// `git+https://git.example.com/MyProject.git@v1.0#subdirectory=pkg_dir`
/// `git+https://git.example.com/MyProject.git@v1.0#egg=pkg&subdirectory=pkg_dir`
/// or (direct archive url):
/// `https://github.com/foo-labs/foo/archive/master.zip#subdirectory=packages/bar`
/// `https://github.com/foo-labs/foo/archive/master.zip#egg=pkg&subdirectory=packages/bar`
fn get_subdirectory(url: &Url) -> Option<PathBuf> {
let fragment = url.fragment()?;
let subdirectory = fragment
.split('&')
.find_map(|fragment| fragment.strip_prefix("subdirectory="))?;
Some(PathBuf::from(subdirectory))
}
/// Return the Git reference of the given URL, if it exists.
pub fn git_reference(url: Url) -> Result<Option<GitSha>, Error> {
let ParsedGitUrl { url, .. } = ParsedGitUrl::try_from(url)?;
Ok(url.precise())
}
impl TryFrom<Url> for ParsedUrl {
type Error = ParsedUrlError;
fn try_from(url: Url) -> Result<Self, Self::Error> {
if let Some((prefix, ..)) = url.scheme().split_once('+') {
match prefix {
"git" => Ok(Self::Git(ParsedGitUrl::try_from(url)?)),
"bzr" => Err(ParsedUrlError::UnsupportedUrlPrefix {
prefix: prefix.to_string(),
url: url.clone(),
message: "Bazaar is not supported",
}),
"hg" => Err(ParsedUrlError::UnsupportedUrlPrefix {
prefix: prefix.to_string(),
url: url.clone(),
message: "Mercurial is not supported",
}),
"svn" => Err(ParsedUrlError::UnsupportedUrlPrefix {
prefix: prefix.to_string(),
url: url.clone(),
message: "Subversion is not supported",
}),
_ => Err(ParsedUrlError::UnsupportedUrlPrefix {
prefix: prefix.to_string(),
url: url.clone(),
message: "Unknown scheme",
}),
}
} else if url.scheme().eq_ignore_ascii_case("file") {
let path = url
.to_file_path()
.map_err(|()| ParsedUrlError::InvalidFileUrl(url.clone()))?;
Ok(Self::Path(ParsedPathUrl {
url,
path,
editable: false,
}))
} else {
Ok(Self::Archive(ParsedArchiveUrl::from(url)))
}
}
}
impl TryFrom<&ParsedUrl> for pypi_types::DirectUrl {
type Error = Error;
fn try_from(value: &ParsedUrl) -> std::result::Result<Self, Self::Error> {
match value {
ParsedUrl::Path(value) => Self::try_from(value),
ParsedUrl::Git(value) => Self::try_from(value),
ParsedUrl::Archive(value) => Self::try_from(value),
}
}
}
impl TryFrom<&ParsedPathUrl> for pypi_types::DirectUrl {
type Error = Error;
fn try_from(value: &ParsedPathUrl) -> Result<Self, Self::Error> {
Ok(Self::LocalDirectory {
url: value.url.to_string(),
dir_info: pypi_types::DirInfo {
editable: value.editable.then_some(true),
},
})
}
}
impl TryFrom<&ParsedArchiveUrl> for pypi_types::DirectUrl {
type Error = Error;
fn try_from(value: &ParsedArchiveUrl) -> Result<Self, Self::Error> {
Ok(Self::ArchiveUrl {
url: value.url.to_string(),
archive_info: pypi_types::ArchiveInfo {
hash: None,
hashes: None,
},
subdirectory: value.subdirectory.clone(),
})
}
}
impl TryFrom<&ParsedGitUrl> for pypi_types::DirectUrl {
type Error = Error;
fn try_from(value: &ParsedGitUrl) -> Result<Self, Self::Error> {
Ok(Self::VcsUrl {
url: value.url.repository().to_string(),
vcs_info: pypi_types::VcsInfo {
vcs: pypi_types::VcsKind::Git,
commit_id: value.url.precise().as_ref().map(ToString::to_string),
requested_revision: value.url.reference().as_str().map(ToString::to_string),
},
subdirectory: value.subdirectory.clone(),
})
}
}
impl From<ParsedUrl> for Url {
fn from(value: ParsedUrl) -> Self {
match value {
ParsedUrl::Path(value) => value.into(),
ParsedUrl::Git(value) => value.into(),
ParsedUrl::Archive(value) => value.into(),
}
}
}
impl From<ParsedPathUrl> for Url {
fn from(value: ParsedPathUrl) -> Self {
value.url
}
}
impl From<ParsedArchiveUrl> for Url {
fn from(value: ParsedArchiveUrl) -> Self {
let mut url = value.url;
if let Some(subdirectory) = value.subdirectory {
url.set_fragment(Some(&format!("subdirectory={}", subdirectory.display())));
}
url
}
}
impl From<ParsedGitUrl> for Url {
fn from(value: ParsedGitUrl) -> Self {
let mut url = Self::parse(&format!("{}{}", "git+", Self::from(value.url).as_str()))
.expect("Git URL is invalid");
if let Some(subdirectory) = value.subdirectory {
url.set_fragment(Some(&format!("subdirectory={}", subdirectory.display())));
}
url
}
}
#[cfg(test)]
mod tests {
use anyhow::Result;
use url::Url;
use crate::parsed_url::ParsedUrl;
#[test]
fn direct_url_from_url() -> Result<()> {
let expected = Url::parse("git+https://github.com/pallets/flask.git")?;
let actual = Url::from(ParsedUrl::try_from(expected.clone())?);
assert_eq!(expected, actual);
let expected = Url::parse("git+https://github.com/pallets/flask.git#subdirectory=pkg_dir")?;
let actual = Url::from(ParsedUrl::try_from(expected.clone())?);
assert_eq!(expected, actual);
let expected = Url::parse("git+https://github.com/pallets/flask.git@2.0.0")?;
let actual = Url::from(ParsedUrl::try_from(expected.clone())?);
assert_eq!(expected, actual);
let expected =
Url::parse("git+https://github.com/pallets/flask.git@2.0.0#subdirectory=pkg_dir")?;
let actual = Url::from(ParsedUrl::try_from(expected.clone())?);
assert_eq!(expected, actual);
// TODO(charlie): Preserve other fragments.
let expected =
Url::parse("git+https://github.com/pallets/flask.git#egg=flask&subdirectory=pkg_dir")?;
let actual = Url::from(ParsedUrl::try_from(expected.clone())?);
assert_ne!(expected, actual);
Ok(())
}
#[test]
#[cfg(unix)]
fn direct_url_from_url_absolute() -> Result<()> {
let expected = Url::parse("file:///path/to/directory")?;
let actual = Url::from(ParsedUrl::try_from(expected.clone())?);
assert_eq!(expected, actual);
Ok(())
}
}

View file

@ -9,7 +9,7 @@ use pep508_rs::{MarkerEnvironment, MarkerTree, RequirementOrigin, VerbatimUrl, V
use uv_git::{GitReference, GitSha};
use uv_normalize::{ExtraName, PackageName};
use crate::{ParsedUrl, ParsedUrlError};
use crate::{ParsedUrl, VerbatimParsedUrl};
/// The requirements of a distribution, an extension over PEP 508's requirements.
#[derive(Debug, Clone, Eq, PartialEq)]
@ -44,9 +44,11 @@ impl Requirement {
true
}
}
}
impl From<pep508_rs::Requirement<VerbatimParsedUrl>> for Requirement {
/// Convert a [`pep508_rs::Requirement`] to a [`Requirement`].
pub fn from_pep508(requirement: pep508_rs::Requirement) -> Result<Self, Box<ParsedUrlError>> {
fn from(requirement: pep508_rs::Requirement<VerbatimParsedUrl>) -> Self {
let source = match requirement.version_or_url {
None => RequirementSource::Registry {
specifier: VersionSpecifiers::empty(),
@ -58,17 +60,16 @@ impl Requirement {
index: None,
},
Some(VersionOrUrl::Url(url)) => {
let direct_url = ParsedUrl::try_from(url.to_url())?;
RequirementSource::from_parsed_url(direct_url, url)
RequirementSource::from_parsed_url(url.parsed_url, url.verbatim)
}
};
Ok(Requirement {
Requirement {
name: requirement.name,
extras: requirement.extras,
marker: requirement.marker,
source,
origin: requirement.origin,
})
}
}
}

View file

@ -4,7 +4,7 @@ use std::fmt::{Display, Formatter};
use pep508_rs::{MarkerEnvironment, UnnamedRequirement};
use uv_normalize::ExtraName;
use crate::{ParsedUrl, ParsedUrlError, Requirement, RequirementSource};
use crate::{Requirement, RequirementSource, VerbatimParsedUrl};
/// An [`UnresolvedRequirement`] with additional metadata from `requirements.txt`, currently only
/// hashes but in the future also editable and similar information.
@ -29,7 +29,7 @@ pub enum UnresolvedRequirement {
/// `tool.uv.sources`.
Named(Requirement),
/// A PEP 508-like, direct URL dependency specifier.
Unnamed(UnnamedRequirement),
Unnamed(UnnamedRequirement<VerbatimParsedUrl>),
}
impl Display for UnresolvedRequirement {
@ -64,17 +64,13 @@ impl UnresolvedRequirement {
}
/// Return the version specifier or URL for the requirement.
pub fn source(&self) -> Result<Cow<'_, RequirementSource>, Box<ParsedUrlError>> {
// TODO(konsti): This is a bad place to raise errors, we should have parsed the url earlier.
pub fn source(&self) -> Cow<'_, RequirementSource> {
match self {
Self::Named(requirement) => Ok(Cow::Borrowed(&requirement.source)),
Self::Unnamed(requirement) => {
let parsed_url = ParsedUrl::try_from(requirement.url.to_url())?;
Ok(Cow::Owned(RequirementSource::from_parsed_url(
parsed_url,
requirement.url.clone(),
)))
}
Self::Named(requirement) => Cow::Borrowed(&requirement.source),
Self::Unnamed(requirement) => Cow::Owned(RequirementSource::from_parsed_url(
requirement.url.parsed_url.clone(),
requirement.url.verbatim.clone(),
)),
}
}
}