mirror of
https://github.com/astral-sh/uv.git
synced 2025-08-04 19:08:04 +00:00
Use VerbatimParsedUrl
in pep508_rs
(#3758)
When parsing requirements from any source, directly parse the url parts (and reject unsupported urls) instead of parsing url parts at a later stage. This removes a bunch of error branches and concludes the work parsing url parts once and passing them around everywhere. Many usages of the assembled `VerbatimUrl` remain, but these can be removed incrementally. Please review commit-by-commit.
This commit is contained in:
parent
0d2f3fc4e4
commit
4db468e27f
56 changed files with 877 additions and 656 deletions
|
@ -25,7 +25,6 @@ uv-normalize = { workspace = true }
|
|||
|
||||
anyhow = { workspace = true }
|
||||
fs-err = { workspace = true }
|
||||
git2 = { workspace = true }
|
||||
indexmap = { workspace = true }
|
||||
itertools = { workspace = true }
|
||||
once_cell = { workspace = true }
|
||||
|
|
|
@ -105,9 +105,9 @@ impl std::fmt::Display for DirectSourceUrl<'_> {
|
|||
pub struct GitSourceUrl<'a> {
|
||||
/// The URL with the revision and subdirectory fragment.
|
||||
pub url: &'a VerbatimUrl,
|
||||
pub git: &'a GitUrl,
|
||||
/// The URL without the revision and subdirectory fragment.
|
||||
pub git: Cow<'a, GitUrl>,
|
||||
pub subdirectory: Option<Cow<'a, Path>>,
|
||||
pub subdirectory: Option<&'a Path>,
|
||||
}
|
||||
|
||||
impl std::fmt::Display for GitSourceUrl<'_> {
|
||||
|
@ -120,8 +120,8 @@ impl<'a> From<&'a GitSourceDist> for GitSourceUrl<'a> {
|
|||
fn from(dist: &'a GitSourceDist) -> Self {
|
||||
Self {
|
||||
url: &dist.url,
|
||||
git: Cow::Borrowed(&dist.git),
|
||||
subdirectory: dist.subdirectory.as_deref().map(Cow::Borrowed),
|
||||
git: &dist.git,
|
||||
subdirectory: dist.subdirectory.as_deref(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -4,12 +4,12 @@ use anyhow::{anyhow, Result};
|
|||
|
||||
use distribution_filename::WheelFilename;
|
||||
use pep508_rs::VerbatimUrl;
|
||||
use pypi_types::HashDigest;
|
||||
use pypi_types::{HashDigest, ParsedPathUrl};
|
||||
use uv_normalize::PackageName;
|
||||
|
||||
use crate::{
|
||||
BuiltDist, Dist, DistributionMetadata, Hashed, InstalledMetadata, InstalledVersion, Name,
|
||||
ParsedPathUrl, ParsedUrl, SourceDist, VersionOrUrlRef,
|
||||
ParsedUrl, SourceDist, VersionOrUrlRef,
|
||||
};
|
||||
|
||||
/// A built distribution (wheel) that exists in the local cache.
|
||||
|
|
|
@ -42,6 +42,7 @@ use url::Url;
|
|||
use distribution_filename::WheelFilename;
|
||||
use pep440_rs::Version;
|
||||
use pep508_rs::{Pep508Url, VerbatimUrl};
|
||||
use pypi_types::{ParsedUrl, VerbatimParsedUrl};
|
||||
use uv_git::GitUrl;
|
||||
use uv_normalize::PackageName;
|
||||
|
||||
|
@ -57,7 +58,6 @@ pub use crate::hash::*;
|
|||
pub use crate::id::*;
|
||||
pub use crate::index_url::*;
|
||||
pub use crate::installed::*;
|
||||
pub use crate::parsed_url::*;
|
||||
pub use crate::prioritized_distribution::*;
|
||||
pub use crate::requirement::*;
|
||||
pub use crate::resolution::*;
|
||||
|
@ -77,7 +77,6 @@ mod hash;
|
|||
mod id;
|
||||
mod index_url;
|
||||
mod installed;
|
||||
mod parsed_url;
|
||||
mod prioritized_distribution;
|
||||
mod requirement;
|
||||
mod resolution;
|
||||
|
|
|
@ -1,311 +0,0 @@
|
|||
use std::path::PathBuf;
|
||||
|
||||
use anyhow::{Error, Result};
|
||||
use thiserror::Error;
|
||||
use url::Url;
|
||||
|
||||
use pep508_rs::VerbatimUrl;
|
||||
use uv_git::{GitSha, GitUrl};
|
||||
|
||||
#[derive(Debug, Error)]
|
||||
pub enum ParsedUrlError {
|
||||
#[error("Unsupported URL prefix `{prefix}` in URL: `{url}` ({message})")]
|
||||
UnsupportedUrlPrefix {
|
||||
prefix: String,
|
||||
url: Url,
|
||||
message: &'static str,
|
||||
},
|
||||
#[error("Invalid path in file URL: `{0}`")]
|
||||
InvalidFileUrl(Url),
|
||||
#[error("Failed to parse Git reference from URL: `{0}`")]
|
||||
GitShaParse(Url, #[source] git2::Error),
|
||||
#[error("Not a valid URL: `{0}`")]
|
||||
UrlParse(String, #[source] url::ParseError),
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Hash, PartialEq, PartialOrd, Eq, Ord)]
|
||||
pub struct VerbatimParsedUrl {
|
||||
pub parsed_url: ParsedUrl,
|
||||
pub verbatim: VerbatimUrl,
|
||||
}
|
||||
|
||||
/// We support three types of URLs for distributions:
|
||||
/// * The path to a file or directory (`file://`)
|
||||
/// * A Git repository (`git+https://` or `git+ssh://`), optionally with a subdirectory and/or
|
||||
/// string to checkout.
|
||||
/// * A remote archive (`https://`), optional with a subdirectory (source dist only).
|
||||
///
|
||||
/// A URL in a requirement `foo @ <url>` must be one of the above.
|
||||
#[derive(Debug, Clone, Eq, PartialEq, PartialOrd, Hash, Ord)]
|
||||
pub enum ParsedUrl {
|
||||
/// The direct URL is a path to a local directory or file.
|
||||
Path(ParsedPathUrl),
|
||||
/// The direct URL is path to a Git repository.
|
||||
Git(ParsedGitUrl),
|
||||
/// The direct URL is a URL to a source archive (e.g., a `.tar.gz` file) or built archive
|
||||
/// (i.e., a `.whl` file).
|
||||
Archive(ParsedArchiveUrl),
|
||||
}
|
||||
|
||||
/// A local path url
|
||||
///
|
||||
/// Examples:
|
||||
/// * `file:///home/ferris/my_project`
|
||||
#[derive(Debug, Clone, Eq, PartialEq, PartialOrd, Hash, Ord)]
|
||||
pub struct ParsedPathUrl {
|
||||
pub url: Url,
|
||||
pub path: PathBuf,
|
||||
pub editable: bool,
|
||||
}
|
||||
|
||||
/// A Git repository URL.
|
||||
///
|
||||
/// Examples:
|
||||
/// * `git+https://git.example.com/MyProject.git`
|
||||
/// * `git+https://git.example.com/MyProject.git@v1.0#egg=pkg&subdirectory=pkg_dir`
|
||||
#[derive(Debug, Clone, Eq, PartialEq, PartialOrd, Hash, Ord)]
|
||||
pub struct ParsedGitUrl {
|
||||
pub url: GitUrl,
|
||||
pub subdirectory: Option<PathBuf>,
|
||||
}
|
||||
|
||||
impl TryFrom<Url> for ParsedGitUrl {
|
||||
type Error = ParsedUrlError;
|
||||
|
||||
/// Supports URLS with and without the `git+` prefix.
|
||||
///
|
||||
/// When the URL includes a prefix, it's presumed to come from a PEP 508 requirement; when it's
|
||||
/// excluded, it's presumed to come from `tool.uv.sources`.
|
||||
fn try_from(url_in: Url) -> Result<Self, Self::Error> {
|
||||
let subdirectory = get_subdirectory(&url_in);
|
||||
|
||||
let url = url_in
|
||||
.as_str()
|
||||
.strip_prefix("git+")
|
||||
.unwrap_or(url_in.as_str());
|
||||
let url = Url::parse(url).map_err(|err| ParsedUrlError::UrlParse(url.to_string(), err))?;
|
||||
let url = GitUrl::try_from(url)
|
||||
.map_err(|err| ParsedUrlError::GitShaParse(url_in.clone(), err))?;
|
||||
Ok(Self { url, subdirectory })
|
||||
}
|
||||
}
|
||||
|
||||
/// A URL to a source or built archive.
|
||||
///
|
||||
/// Examples:
|
||||
/// * A built distribution: `https://files.pythonhosted.org/packages/62/06/d5604a70d160f6a6ca5fd2ba25597c24abd5c5ca5f437263d177ac242308/tqdm-4.66.1-py2.py3-none-any.whl`
|
||||
/// * A source distribution with a valid name: `https://files.pythonhosted.org/packages/62/06/d5604a70d160f6a6ca5fd2ba25597c24abd5c5ca5f437263d177ac242308/tqdm-4.66.1.tar.gz`
|
||||
/// * A source dist with a recognizable extension but invalid name: `https://github.com/foo-labs/foo/archive/master.zip#egg=pkg&subdirectory=packages/bar`
|
||||
#[derive(Debug, Clone, Eq, PartialEq, Hash, PartialOrd, Ord)]
|
||||
pub struct ParsedArchiveUrl {
|
||||
pub url: Url,
|
||||
pub subdirectory: Option<PathBuf>,
|
||||
}
|
||||
|
||||
impl From<Url> for ParsedArchiveUrl {
|
||||
fn from(url: Url) -> Self {
|
||||
let subdirectory = get_subdirectory(&url);
|
||||
Self { url, subdirectory }
|
||||
}
|
||||
}
|
||||
|
||||
/// If the URL points to a subdirectory, extract it, as in (git):
|
||||
/// `git+https://git.example.com/MyProject.git@v1.0#subdirectory=pkg_dir`
|
||||
/// `git+https://git.example.com/MyProject.git@v1.0#egg=pkg&subdirectory=pkg_dir`
|
||||
/// or (direct archive url):
|
||||
/// `https://github.com/foo-labs/foo/archive/master.zip#subdirectory=packages/bar`
|
||||
/// `https://github.com/foo-labs/foo/archive/master.zip#egg=pkg&subdirectory=packages/bar`
|
||||
fn get_subdirectory(url: &Url) -> Option<PathBuf> {
|
||||
let fragment = url.fragment()?;
|
||||
let subdirectory = fragment
|
||||
.split('&')
|
||||
.find_map(|fragment| fragment.strip_prefix("subdirectory="))?;
|
||||
Some(PathBuf::from(subdirectory))
|
||||
}
|
||||
|
||||
/// Return the Git reference of the given URL, if it exists.
|
||||
pub fn git_reference(url: Url) -> Result<Option<GitSha>, Error> {
|
||||
let ParsedGitUrl { url, .. } = ParsedGitUrl::try_from(url)?;
|
||||
Ok(url.precise())
|
||||
}
|
||||
|
||||
impl TryFrom<Url> for ParsedUrl {
|
||||
type Error = ParsedUrlError;
|
||||
|
||||
fn try_from(url: Url) -> Result<Self, Self::Error> {
|
||||
if let Some((prefix, ..)) = url.scheme().split_once('+') {
|
||||
match prefix {
|
||||
"git" => Ok(Self::Git(ParsedGitUrl::try_from(url)?)),
|
||||
"bzr" => Err(ParsedUrlError::UnsupportedUrlPrefix {
|
||||
prefix: prefix.to_string(),
|
||||
url: url.clone(),
|
||||
message: "Bazaar is not supported",
|
||||
}),
|
||||
"hg" => Err(ParsedUrlError::UnsupportedUrlPrefix {
|
||||
prefix: prefix.to_string(),
|
||||
url: url.clone(),
|
||||
message: "Mercurial is not supported",
|
||||
}),
|
||||
"svn" => Err(ParsedUrlError::UnsupportedUrlPrefix {
|
||||
prefix: prefix.to_string(),
|
||||
url: url.clone(),
|
||||
message: "Subversion is not supported",
|
||||
}),
|
||||
_ => Err(ParsedUrlError::UnsupportedUrlPrefix {
|
||||
prefix: prefix.to_string(),
|
||||
url: url.clone(),
|
||||
message: "Unknown scheme",
|
||||
}),
|
||||
}
|
||||
} else if url.scheme().eq_ignore_ascii_case("file") {
|
||||
let path = url
|
||||
.to_file_path()
|
||||
.map_err(|()| ParsedUrlError::InvalidFileUrl(url.clone()))?;
|
||||
Ok(Self::Path(ParsedPathUrl {
|
||||
url,
|
||||
path,
|
||||
editable: false,
|
||||
}))
|
||||
} else {
|
||||
Ok(Self::Archive(ParsedArchiveUrl::from(url)))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<&ParsedUrl> for pypi_types::DirectUrl {
|
||||
type Error = Error;
|
||||
|
||||
fn try_from(value: &ParsedUrl) -> std::result::Result<Self, Self::Error> {
|
||||
match value {
|
||||
ParsedUrl::Path(value) => Self::try_from(value),
|
||||
ParsedUrl::Git(value) => Self::try_from(value),
|
||||
ParsedUrl::Archive(value) => Self::try_from(value),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<&ParsedPathUrl> for pypi_types::DirectUrl {
|
||||
type Error = Error;
|
||||
|
||||
fn try_from(value: &ParsedPathUrl) -> Result<Self, Self::Error> {
|
||||
Ok(Self::LocalDirectory {
|
||||
url: value.url.to_string(),
|
||||
dir_info: pypi_types::DirInfo {
|
||||
editable: value.editable.then_some(true),
|
||||
},
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<&ParsedArchiveUrl> for pypi_types::DirectUrl {
|
||||
type Error = Error;
|
||||
|
||||
fn try_from(value: &ParsedArchiveUrl) -> Result<Self, Self::Error> {
|
||||
Ok(Self::ArchiveUrl {
|
||||
url: value.url.to_string(),
|
||||
archive_info: pypi_types::ArchiveInfo {
|
||||
hash: None,
|
||||
hashes: None,
|
||||
},
|
||||
subdirectory: value.subdirectory.clone(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<&ParsedGitUrl> for pypi_types::DirectUrl {
|
||||
type Error = Error;
|
||||
|
||||
fn try_from(value: &ParsedGitUrl) -> Result<Self, Self::Error> {
|
||||
Ok(Self::VcsUrl {
|
||||
url: value.url.repository().to_string(),
|
||||
vcs_info: pypi_types::VcsInfo {
|
||||
vcs: pypi_types::VcsKind::Git,
|
||||
commit_id: value.url.precise().as_ref().map(ToString::to_string),
|
||||
requested_revision: value.url.reference().as_str().map(ToString::to_string),
|
||||
},
|
||||
subdirectory: value.subdirectory.clone(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl From<ParsedUrl> for Url {
|
||||
fn from(value: ParsedUrl) -> Self {
|
||||
match value {
|
||||
ParsedUrl::Path(value) => value.into(),
|
||||
ParsedUrl::Git(value) => value.into(),
|
||||
ParsedUrl::Archive(value) => value.into(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<ParsedPathUrl> for Url {
|
||||
fn from(value: ParsedPathUrl) -> Self {
|
||||
value.url
|
||||
}
|
||||
}
|
||||
|
||||
impl From<ParsedArchiveUrl> for Url {
|
||||
fn from(value: ParsedArchiveUrl) -> Self {
|
||||
let mut url = value.url;
|
||||
if let Some(subdirectory) = value.subdirectory {
|
||||
url.set_fragment(Some(&format!("subdirectory={}", subdirectory.display())));
|
||||
}
|
||||
url
|
||||
}
|
||||
}
|
||||
|
||||
impl From<ParsedGitUrl> for Url {
|
||||
fn from(value: ParsedGitUrl) -> Self {
|
||||
let mut url = Self::parse(&format!("{}{}", "git+", Self::from(value.url).as_str()))
|
||||
.expect("Git URL is invalid");
|
||||
if let Some(subdirectory) = value.subdirectory {
|
||||
url.set_fragment(Some(&format!("subdirectory={}", subdirectory.display())));
|
||||
}
|
||||
url
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use anyhow::Result;
|
||||
use url::Url;
|
||||
|
||||
use crate::parsed_url::ParsedUrl;
|
||||
|
||||
#[test]
|
||||
fn direct_url_from_url() -> Result<()> {
|
||||
let expected = Url::parse("git+https://github.com/pallets/flask.git")?;
|
||||
let actual = Url::from(ParsedUrl::try_from(expected.clone())?);
|
||||
assert_eq!(expected, actual);
|
||||
|
||||
let expected = Url::parse("git+https://github.com/pallets/flask.git#subdirectory=pkg_dir")?;
|
||||
let actual = Url::from(ParsedUrl::try_from(expected.clone())?);
|
||||
assert_eq!(expected, actual);
|
||||
|
||||
let expected = Url::parse("git+https://github.com/pallets/flask.git@2.0.0")?;
|
||||
let actual = Url::from(ParsedUrl::try_from(expected.clone())?);
|
||||
assert_eq!(expected, actual);
|
||||
|
||||
let expected =
|
||||
Url::parse("git+https://github.com/pallets/flask.git@2.0.0#subdirectory=pkg_dir")?;
|
||||
let actual = Url::from(ParsedUrl::try_from(expected.clone())?);
|
||||
assert_eq!(expected, actual);
|
||||
|
||||
// TODO(charlie): Preserve other fragments.
|
||||
let expected =
|
||||
Url::parse("git+https://github.com/pallets/flask.git#egg=flask&subdirectory=pkg_dir")?;
|
||||
let actual = Url::from(ParsedUrl::try_from(expected.clone())?);
|
||||
assert_ne!(expected, actual);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[cfg(unix)]
|
||||
fn direct_url_from_url_absolute() -> Result<()> {
|
||||
let expected = Url::parse("file:///path/to/directory")?;
|
||||
let actual = Url::from(ParsedUrl::try_from(expected.clone())?);
|
||||
assert_eq!(expected, actual);
|
||||
Ok(())
|
||||
}
|
||||
}
|
|
@ -9,7 +9,7 @@ use pep508_rs::{MarkerEnvironment, MarkerTree, RequirementOrigin, VerbatimUrl, V
|
|||
use uv_git::{GitReference, GitSha};
|
||||
use uv_normalize::{ExtraName, PackageName};
|
||||
|
||||
use crate::{ParsedUrl, ParsedUrlError};
|
||||
use crate::{ParsedUrl, VerbatimParsedUrl};
|
||||
|
||||
/// The requirements of a distribution, an extension over PEP 508's requirements.
|
||||
#[derive(Debug, Clone, Eq, PartialEq)]
|
||||
|
@ -44,9 +44,11 @@ impl Requirement {
|
|||
true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<pep508_rs::Requirement<VerbatimParsedUrl>> for Requirement {
|
||||
/// Convert a [`pep508_rs::Requirement`] to a [`Requirement`].
|
||||
pub fn from_pep508(requirement: pep508_rs::Requirement) -> Result<Self, Box<ParsedUrlError>> {
|
||||
fn from(requirement: pep508_rs::Requirement<VerbatimParsedUrl>) -> Self {
|
||||
let source = match requirement.version_or_url {
|
||||
None => RequirementSource::Registry {
|
||||
specifier: VersionSpecifiers::empty(),
|
||||
|
@ -58,17 +60,16 @@ impl Requirement {
|
|||
index: None,
|
||||
},
|
||||
Some(VersionOrUrl::Url(url)) => {
|
||||
let direct_url = ParsedUrl::try_from(url.to_url())?;
|
||||
RequirementSource::from_parsed_url(direct_url, url)
|
||||
RequirementSource::from_parsed_url(url.parsed_url, url.verbatim)
|
||||
}
|
||||
};
|
||||
Ok(Requirement {
|
||||
Requirement {
|
||||
name: requirement.name,
|
||||
extras: requirement.extras,
|
||||
marker: requirement.marker,
|
||||
source,
|
||||
origin: requirement.origin,
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -4,7 +4,7 @@ use std::fmt::{Display, Formatter};
|
|||
use pep508_rs::{MarkerEnvironment, UnnamedRequirement};
|
||||
use uv_normalize::ExtraName;
|
||||
|
||||
use crate::{ParsedUrl, ParsedUrlError, Requirement, RequirementSource};
|
||||
use crate::{Requirement, RequirementSource, VerbatimParsedUrl};
|
||||
|
||||
/// An [`UnresolvedRequirement`] with additional metadata from `requirements.txt`, currently only
|
||||
/// hashes but in the future also editable and similar information.
|
||||
|
@ -29,7 +29,7 @@ pub enum UnresolvedRequirement {
|
|||
/// `tool.uv.sources`.
|
||||
Named(Requirement),
|
||||
/// A PEP 508-like, direct URL dependency specifier.
|
||||
Unnamed(UnnamedRequirement),
|
||||
Unnamed(UnnamedRequirement<VerbatimParsedUrl>),
|
||||
}
|
||||
|
||||
impl Display for UnresolvedRequirement {
|
||||
|
@ -64,17 +64,13 @@ impl UnresolvedRequirement {
|
|||
}
|
||||
|
||||
/// Return the version specifier or URL for the requirement.
|
||||
pub fn source(&self) -> Result<Cow<'_, RequirementSource>, Box<ParsedUrlError>> {
|
||||
// TODO(konsti): This is a bad place to raise errors, we should have parsed the url earlier.
|
||||
pub fn source(&self) -> Cow<'_, RequirementSource> {
|
||||
match self {
|
||||
Self::Named(requirement) => Ok(Cow::Borrowed(&requirement.source)),
|
||||
Self::Unnamed(requirement) => {
|
||||
let parsed_url = ParsedUrl::try_from(requirement.url.to_url())?;
|
||||
Ok(Cow::Owned(RequirementSource::from_parsed_url(
|
||||
parsed_url,
|
||||
requirement.url.clone(),
|
||||
)))
|
||||
}
|
||||
Self::Named(requirement) => Cow::Borrowed(&requirement.source),
|
||||
Self::Unnamed(requirement) => Cow::Owned(RequirementSource::from_parsed_url(
|
||||
requirement.url.parsed_url.clone(),
|
||||
requirement.url.verbatim.clone(),
|
||||
)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue